/****************************************************************************** * arch/x86/time.c * * Per-CPU time calibration and management. * * Copyright (c) 2002-2005, K A Fraser * * Portions from Linux are: * Copyright (c) 1991, 1992, 1995 Linus Torvalds */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for early_time_init */ #include /* opt_clocksource: Force clocksource to one of: pit, hpet, cyclone, acpi. */ static char __initdata opt_clocksource[10]; string_param("clocksource", opt_clocksource); unsigned long cpu_khz; /* CPU clock frequency in kHz. */ DEFINE_SPINLOCK(rtc_lock); unsigned long pit0_ticks; static u32 wc_sec, wc_nsec; /* UTC time at last 'time update'. */ static DEFINE_SPINLOCK(wc_lock); struct cpu_time { u64 local_tsc_stamp; s_time_t stime_local_stamp; s_time_t stime_master_stamp; struct time_scale tsc_scale; }; struct platform_timesource { char *id; char *name; u64 frequency; u64 (*read_counter)(void); int (*init)(struct platform_timesource *); void (*resume)(struct platform_timesource *); int counter_bits; }; static DEFINE_PER_CPU(struct cpu_time, cpu_time); /* Calibrate all CPUs to platform timer every EPOCH. */ #define EPOCH MILLISECS(1000) static struct timer calibration_timer; /* * We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter. * Otherwise overflow happens too quickly (~50ms) for us to guarantee that * softirq handling will happen in time. * * The pit_lock protects the 16- and 32-bit stamp fields as well as the */ static DEFINE_SPINLOCK(pit_lock); static u16 pit_stamp16; static u32 pit_stamp32; static int using_pit; /* * 32-bit division of integer dividend and integer divisor yielding * 32-bit fractional quotient. */ static inline u32 div_frac(u32 dividend, u32 divisor) { u32 quotient, remainder; ASSERT(dividend < divisor); asm ( "divl %4" : "=a" (quotient), "=d" (remainder) : "0" (0), "1" (dividend), "r" (divisor) ); return quotient; } /* * 32-bit multiplication of multiplicand and fractional multiplier * yielding 32-bit product (radix point at same position as in multiplicand). */ static inline u32 mul_frac(u32 multiplicand, u32 multiplier) { u32 product_int, product_frac; asm ( "mul %3" : "=a" (product_frac), "=d" (product_int) : "0" (multiplicand), "r" (multiplier) ); return product_int; } /* * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, * yielding a 64-bit result. */ static inline u64 scale_delta(u64 delta, struct time_scale *scale) { u64 product; #ifdef CONFIG_X86_32 u32 tmp1, tmp2; #endif if ( scale->shift < 0 ) delta >>= -scale->shift; else delta <<= scale->shift; #ifdef CONFIG_X86_32 asm ( "mul %5 ; " "mov %4,%%eax ; " "mov %%edx,%4 ; " "mul %5 ; " "xor %5,%5 ; " "add %4,%%eax ; " "adc %5,%%edx ; " : "=A" (product), "=r" (tmp1), "=r" (tmp2) : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (scale->mul_frac) ); #else asm ( "mul %%rdx ; shrd $32,%%rdx,%%rax" : "=a" (product) : "0" (delta), "d" ((u64)scale->mul_frac) ); #endif return product; } #define _TS_MUL_FRAC_IDENTITY 0x80000000UL /* Compute the reciprocal of the given time_scale. */ static inline struct time_scale scale_reciprocal(struct time_scale scale) { struct time_scale reciprocal; u32 dividend; ASSERT(scale.mul_frac != 0); dividend = _TS_MUL_FRAC_IDENTITY; reciprocal.shift = 1 - scale.shift; while ( unlikely(dividend >= scale.mul_frac) ) { dividend >>= 1; reciprocal.shift++; } asm ( "divl %4" : "=a" (reciprocal.mul_frac), "=d" (dividend) : "0" (0), "1" (dividend), "r" (scale.mul_frac) ); return reciprocal; } /* * cpu_mask that denotes the CPUs that needs timer interrupt coming in as * IPIs in place of local APIC timers */ static cpumask_t pit_broadcast_mask; static void smp_send_timer_broadcast_ipi(void) { int cpu = smp_processor_id(); cpumask_t mask; cpus_and(mask, cpu_online_map, pit_broadcast_mask); if ( cpu_isset(cpu, mask) ) { cpu_clear(cpu, mask); raise_softirq(TIMER_SOFTIRQ); } if ( !cpus_empty(mask) ) { cpumask_raise_softirq(mask, TIMER_SOFTIRQ); } } static void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs) { ASSERT(local_irq_is_enabled()); if ( hpet_legacy_irq_tick() ) return; /* Only for start-of-day interruopt tests in io_apic.c. */ (*(volatile unsigned long *)&pit0_ticks)++; /* Rough hack to allow accurate timers to sort-of-work with no APIC. */ if ( !cpu_has_apic ) raise_softirq(TIMER_SOFTIRQ); if ( xen_cpuidle ) smp_send_timer_broadcast_ipi(); /* Emulate a 32-bit PIT counter. */ if ( using_pit ) { u16 count; spin_lock_irq(&pit_lock); outb(0x80, PIT_MODE); count = inb(PIT_CH2); count |= inb(PIT_CH2) << 8; pit_stamp32 += (u16)(pit_stamp16 - count); pit_stamp16 = count; spin_unlock_irq(&pit_lock); } } static struct irqaction __read_mostly irq0 = { timer_interrupt, "timer", NULL }; /* ------ Calibrate the TSC ------- * Return processor ticks per second / CALIBRATE_FRAC. */ #define CLOCK_TICK_RATE 1193182 /* system crystal frequency (Hz) */ #define CALIBRATE_FRAC 20 /* calibrate over 50ms */ #define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC) static u64 init_pit_and_calibrate_tsc(void) { u64 start, end; unsigned long count; /* Set PIT channel 0 to HZ Hz. */ #define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ) outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ outb_p(LATCH & 0xff, PIT_CH0); /* LSB */ outb(LATCH >> 8, PIT_CH0); /* MSB */ /* Set the Gate high, disable speaker */ outb((inb(0x61) & ~0x02) | 0x01, 0x61); /* * Now let's take care of CTC channel 2 * * Set the Gate high, program CTC channel 2 for mode 0, (interrupt on * terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB) * to begin countdown. */ outb(0xb0, PIT_MODE); /* binary, mode 0, LSB/MSB, Ch 2 */ outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */ outb(CALIBRATE_LATCH >> 8, PIT_CH2); /* MSB of count */ rdtscll(start); for ( count = 0; (inb(0x61) & 0x20) == 0; count++ ) continue; rdtscll(end); /* Error if the CTC doesn't behave itself. */ if ( count == 0 ) return 0; return ((end - start) * (u64)CALIBRATE_FRAC); } static void set_time_scale(struct time_scale *ts, u64 ticks_per_sec) { u64 tps64 = ticks_per_sec; u32 tps32; int shift = 0; ASSERT(tps64 != 0); while ( tps64 > (MILLISECS(1000)*2) ) { tps64 >>= 1; shift--; } tps32 = (u32)tps64; while ( tps32 <= (u32)MILLISECS(1000) ) { tps32 <<= 1; shift++; } ts->mul_frac = div_frac(MILLISECS(1000), tps32); ts->shift = shift; } static char *freq_string(u64 freq) { static char s[20]; unsigned int x, y; y = (unsigned int)do_div(freq, 1000000) / 1000; x = (unsigned int)freq; snprintf(s, sizeof(s), "%u.%03uMHz", x, y); return s; } /************************************************************ * PLATFORM TIMER 1: PROGRAMMABLE INTERVAL TIMER (LEGACY PIT) */ static u64 read_pit_count(void) { u16 count16; u32 count32; unsigned long flags; spin_lock_irqsave(&pit_lock, flags); outb(0x80, PIT_MODE); count16 = inb(PIT_CH2); count16 |= inb(PIT_CH2) << 8; count32 = pit_stamp32 + (u16)(pit_stamp16 - count16); spin_unlock_irqrestore(&pit_lock, flags); return count32; } static int __init init_pit(struct platform_timesource *pts) { using_pit = 1; return 1; } static struct platform_timesource __initdata plt_pit = { .id = "pit", .name = "PIT", .frequency = CLOCK_TICK_RATE, .read_counter = read_pit_count, .counter_bits = 32, .init = init_pit }; /************************************************************ * PLATFORM TIMER 2: HIGH PRECISION EVENT TIMER (HPET) */ static u64 read_hpet_count(void) { return hpet_read32(HPET_COUNTER); } static int __init init_hpet(struct platform_timesource *pts) { u64 hpet_rate = hpet_setup(); if ( hpet_rate == 0 ) return 0; pts->frequency = hpet_rate; return 1; } static void resume_hpet(struct platform_timesource *pts) { u64 hpet_rate = hpet_setup(); BUG_ON(hpet_rate == 0); pts->frequency = hpet_rate; } static struct platform_timesource __initdata plt_hpet = { .id = "hpet", .name = "HPET", .read_counter = read_hpet_count, .counter_bits = 32, .init = init_hpet, .resume = resume_hpet }; /************************************************************ * PLATFORM TIMER 3: IBM 'CYCLONE' TIMER */ int use_cyclone; /* * Although the counter is read via a 64-bit register, I believe it is actually * a 40-bit counter. Since this will wrap, I read only the low 32 bits and * periodically fold into a 64-bit software counter, just as for PIT and HPET. */ #define CYCLONE_CBAR_ADDR 0xFEB00CD0 #define CYCLONE_PMCC_OFFSET 0x51A0 #define CYCLONE_MPMC_OFFSET 0x51D0 #define CYCLONE_MPCS_OFFSET 0x51A8 #define CYCLONE_TIMER_FREQ 100000000 /* Cyclone MPMC0 register. */ static volatile u32 *cyclone_timer; static u64 read_cyclone_count(void) { return *cyclone_timer; } static volatile u32 *__init map_cyclone_reg(unsigned long regaddr) { unsigned long pageaddr = regaddr & PAGE_MASK; unsigned long offset = regadd
##############################################################################
# Build global options
# NOTE: Can be overridden externally.
#

# Compiler options here.
ifeq ($(USE_OPT),)
  USE_OPT = -O2 -ggdb -fomit-frame-pointer -falign-functions=16
endif

# C specific options here (added to USE_OPT).
ifeq ($(USE_COPT),)
  USE_COPT = 
endif

# C++ specific options here (added to USE_OPT).
ifeq ($(USE_CPPOPT),)
  USE_CPPOPT = -fno-rtti
endif

# Enable this if you want the linker to remove unused code and data
ifeq ($(USE_LINK_GC),)
  USE_LINK_GC = yes
endif

# Linker extra options here.
ifeq ($(USE_LDOPT),)
  USE_LDOPT = 
endif

# Enable this if you want link time optimizations (LTO)
ifeq ($(USE_LTO),)
  USE_LTO = yes
endif

# If enabled, this option allows to compile the application in THUMB mode.
ifeq ($(USE_THUMB),)
  USE_THUMB = yes
endif

# Enable this if you want to see the full log while compiling.
ifeq ($(USE_VERBOSE_COMPILE),)
  USE_VERBOSE_COMPILE = no
endif

# If enabled, this option makes the build process faster by not compiling
# modules not used in the current configuration.
ifeq ($(USE_SMART_BUILD),)
  USE_SMART_BUILD = yes
endif

#
# Build global options
##############################################################################

##############################################################################
# Architecture or project specific options
#

# Stack size to be allocated to the Cortex-M process stack. This stack is
# the stack used by the main() thread.
ifeq ($(USE_PROCESS_STACKSIZE),)
  USE_PROCESS_STACKSIZE = 0x400
endif

# Stack size to the allocated to the Cortex-M main/exceptions stack. This
# stack is used for processing interrupts and exceptions.
ifeq ($(USE_EXCEPTIONS_STACKSIZE),)
  USE_EXCEPTIONS_STACKSIZE = 0x400
endif

# Enables the use of FPU (no, softfp, hard).
ifeq ($(USE_FPU),)
  USE_FPU = no
endif

#
# Architecture or project specific options
##############################################################################

##############################################################################
# Project, sources and paths
#

# Define project name here
PROJECT = ch

# Imported source files and paths
CHIBIOS = ../../../..

# Licensing files.
include $(CHIBIOS)/os/license/license.mk
# Startup files.
include $(CHIBIOS)/os/common/startup/ARMCMx/compilers/GCC/mk/startup_stm32f3xx.mk
# HAL-OSAL files (optional).
include $(CHIBIOS)/os/hal/hal.mk
include $(CHIBIOS)/os/hal/ports/STM32/STM32F3xx/platform.mk
include $(CHIBIOS)/os/hal/boards/ST_STM32F3_DISCOVERY/board.mk
include $(CHIBIOS)/os/hal/osal/rt/osal.mk
# RTOS files (optional).
include $(CHIBIOS)/os/rt/rt.mk
include $(CHIBIOS)/os/common/ports/ARMCMx/compilers/GCC/mk/port_v7m.mk
# Other files (optional).
#include $(CHIBIOS)/test/lib/test.mk
#include $(CHIBIOS)/test/rt/rt_test.mk
#include $(CHIBIOS)/test/oslib/oslib_test.mk

# Define linker script file here
LDSCRIPT= $(STARTUPLD)/STM32F303xC.ld

# C sources that can be compiled in ARM or THUMB mode depending on the global
# setting.
CSRC = $(ALLCSRC) \
       $(TESTSRC) \
       main.c

# C++ sources that can be compiled in ARM or THUMB mode depending on the global
# setting.
CPPSRC = $(ALLCPPSRC)

# C sources to be compiled in ARM mode regardless of the global setting.
# NOTE: Mixing ARM and THUMB mode enables the -mthumb-interwork compiler
#       option that results in lower performance and larger code size.
ACSRC =

# C++ sources to be compiled in ARM mode regardless of the global setting.
# NOTE: Mixing ARM and THUMB mode enables the -mthumb-interwork compiler
#       option that results in lower performance and larger code size.
ACPPSRC =

# C sources to be compiled in THUMB mode regardless of the global setting.
# NOTE: Mixing ARM and THUMB mode enables the -mthumb-interwork compiler
#       option that results in lower performance and larger code size.
TCSRC =

# C sources to be compiled in THUMB mode regardless of the global setting.
# NOTE: Mixing ARM and THUMB mode enables the -mthumb-interwork compiler
#       option that results in lower performance and larger code size.
TCPPSRC =

# List ASM source files here
ASMSRC = $(ALLASMSRC)
ASMXSRC = $(ALLXASMSRC)

INCDIR = $(ALLINC) $(TESTINC)

#
# Project, sources and paths
##############################################################################

##############################################################################
# Compiler settings
#

MCU  = cortex-m4

#TRGT = arm-elf-
TRGT = arm-none-eabi-
CC   = $(TRGT)gcc
CPPC = $(TRGT)g++
# Enable loading with g++ only if you need C++ runtime support.
# NOTE: You can use C++ even without C++ support if you are careful. C++
#       runtime support makes code size explode.
LD   = $(TRGT)gcc
#LD   = $(TRGT)g++
CP   = $(TRGT)objcopy
AS   = $(TRGT)gcc -x assembler-with-cpp
AR   = $(TRGT)ar
OD   = $(TRGT)objdump
SZ   = $(TRGT)size
HEX  = $(CP) -O ihex
BIN  = $(CP) -O binary

# ARM-specific options here
AOPT =

# THUMB-specific options here
TOPT = -mthumb -DTHUMB

# Define C warning options here
CWARN = -Wall -Wextra -Wundef -Wstrict-prototypes

# Define C++ warning options here
CPPWARN = -Wall -Wextra -Wundef

#
# Compiler settings
##############################################################################

##############################################################################
# Start of user section
#

# List all user C define here, like -D_DEBUG=1
UDEFS =

# Define ASM defines here
UADEFS =

# List all user directories here
UINCDIR =

# List the user directory to look for the libraries here
ULIBDIR =

# List all user libraries here
ULIBS =

#
# End of user defines
##############################################################################

RULESPATH = $(CHIBIOS)/os/common/startup/ARMCMx/compilers/GCC
include $(RULESPATH)/rules.mk
prev_master_stime = t->stime_master_stamp; /* Disabling IRQs ensures we atomically read cpu_calibration struct. */ local_irq_disable(); curr_tsc = c->local_tsc_stamp; curr_local_stime = c->stime_local_stamp; curr_master_stime = c->stime_master_stamp; local_irq_enable(); #if 0 printk("PRE%d: tsc=%"PRIu64" stime=%"PRIu64" master=%"PRIu64"\n", smp_processor_id(), prev_tsc, prev_local_stime, prev_master_stime); printk("CUR%d: tsc=%"PRIu64" stime=%"PRIu64" master=%"PRIu64 " -> %"PRId64"\n", smp_processor_id(), curr_tsc, curr_local_stime, curr_master_stime, curr_master_stime - curr_local_stime); #endif /* Local time warps forward if it lags behind master time. */ if ( curr_local_stime < curr_master_stime ) curr_local_stime = curr_master_stime; stime_elapsed64 = curr_master_stime - prev_master_stime; tsc_elapsed64 = curr_tsc - prev_tsc; /* * Weirdness can happen if we lose sync with the platform timer. * We could be smarter here: resync platform timer with local timer? */ if ( ((s64)stime_elapsed64 < (EPOCH / 2)) ) goto out; /* * Calculate error-correction factor. This only slows down a fast local * clock (slow clocks are warped forwards). The scale factor is clamped * to >= 0.5. */ if ( curr_local_stime != curr_master_stime ) { local_stime_err = curr_local_stime - curr_master_stime; if ( local_stime_err > EPOCH ) local_stime_err = EPOCH; error_factor = div_frac(EPOCH, EPOCH + (u32)local_stime_err); } /* * We require 0 < stime_elapsed < 2^31. * This allows us to binary shift a 32-bit tsc_elapsed such that: * stime_elapsed < tsc_elapsed <= 2*stime_elapsed */ while ( ((u32)stime_elapsed64 != stime_elapsed64) || ((s32)stime_elapsed64 < 0) ) { stime_elapsed64 >>= 1; tsc_elapsed64 >>= 1; } /* stime_master_diff now fits in a 32-bit word. */ stime_elapsed32 = (u32)stime_elapsed64; /* tsc_elapsed <= 2*stime_elapsed */ while ( tsc_elapsed64 > (stime_elapsed32 * 2) ) { tsc_elapsed64 >>= 1; tsc_shift--; } /* Local difference must now fit in 32 bits. */ ASSERT((u32)tsc_elapsed64 == tsc_elapsed64); tsc_elapsed32 = (u32)tsc_elapsed64; /* tsc_elapsed > stime_elapsed */ ASSERT(tsc_elapsed32 != 0); while ( tsc_elapsed32 <= stime_elapsed32 ) { tsc_elapsed32 <<= 1; tsc_shift++; } calibration_mul_frac = div_frac(stime_elapsed32, tsc_elapsed32); if ( error_factor != 0 ) calibration_mul_frac = mul_frac(calibration_mul_frac, error_factor); #if 0 printk("---%d: %08x %08x %d\n", smp_processor_id(), error_factor, calibration_mul_frac, tsc_shift); #endif /* Record new timestamp information, atomically w.r.t. interrupts. */ local_irq_disable(); t->tsc_scale.mul_frac = calibration_mul_frac; t->tsc_scale.shift = tsc_shift; t->local_tsc_stamp = curr_tsc; t->stime_local_stamp = curr_local_stime; t->stime_master_stamp = curr_master_stime; local_irq_enable(); update_vcpu_system_time(current); out: if ( smp_processor_id() == 0 ) { set_timer(&calibration_timer, NOW() + EPOCH); platform_time_calibration(); } } /* * TSC Reliability check */ /* * The Linux original version of this function is * Copyright (c) 2006, Red Hat, Inc., Ingo Molnar */ void check_tsc_warp(unsigned long tsc_khz, unsigned long *max_warp) { #define rdtsc_barrier() mb() static DEFINE_SPINLOCK(sync_lock); static cycles_t last_tsc; cycles_t start, now, prev, end; int i; rdtsc_barrier(); start = get_cycles(); rdtsc_barrier(); /* The measurement runs for 20 msecs: */ end = start + tsc_khz * 20ULL; now = start; for ( i = 0; ; i++ ) { /* * We take the global lock, measure TSC, save the * previous TSC that was measured (possibly on * another CPU) and update the previous TSC timestamp. */ spin_lock(&sync_lock); prev = last_tsc; rdtsc_barrier(); now = get_cycles(); rdtsc_barrier(); last_tsc = now; spin_unlock(&sync_lock); /* * Be nice every now and then (and also check whether measurement is * done [we also insert a 10 million loops safety exit, so we dont * lock up in case the TSC readout is totally broken]): */ if ( unlikely(!(i & 7)) ) { if ( (now > end) || (i > 10000000) ) break; cpu_relax(); /*touch_nmi_watchdog();*/ } /* * Outside the critical section we can now see whether we saw a * time-warp of the TSC going backwards: */ if ( unlikely(prev > now) ) { spin_lock(&sync_lock); if ( *max_warp < prev - now ) *max_warp = prev - now; spin_unlock(&sync_lock); } } } static unsigned long tsc_max_warp, tsc_check_count; static cpumask_t tsc_check_cpumask = CPU_MASK_NONE; static void tsc_check_slave(void *unused) { unsigned int cpu = smp_processor_id(); local_irq_disable(); while ( !cpu_isset(cpu, tsc_check_cpumask) ) mb(); check_tsc_warp(cpu_khz, &tsc_max_warp); cpu_clear(cpu, tsc_check_cpumask); local_irq_enable(); } void tsc_check_reliability(void) { unsigned int cpu = smp_processor_id(); static DEFINE_SPINLOCK(lock); spin_lock(&lock); tsc_check_count++; smp_call_function(tsc_check_slave, NULL, 0); tsc_check_cpumask = cpu_online_map; local_irq_disable(); check_tsc_warp(cpu_khz, &tsc_max_warp); cpu_clear(cpu, tsc_check_cpumask); local_irq_enable(); while ( !cpus_empty(tsc_check_cpumask) ) cpu_relax(); spin_unlock(&lock); } /* * Rendezvous for all CPUs in IRQ context. * Master CPU snapshots the platform timer. * All CPUS snapshot their local TSC and extrapolation of system time. */ struct calibration_rendezvous { cpumask_t cpu_calibration_map; atomic_t semaphore; s_time_t master_stime; u64 master_tsc_stamp; }; /* * Keep TSCs in sync when they run at the same rate, but may stop in * deep-sleep C states. */ static void time_calibration_tsc_rendezvous(void *_r) { int i; struct cpu_calibration *c = &this_cpu(cpu_calibration); struct calibration_rendezvous *r = _r; unsigned int total_cpus = cpus_weight(r->cpu_calibration_map); /* Loop to get rid of cache effects on TSC skew. */ for ( i = 4; i >= 0; i-- ) { if ( smp_processor_id() == 0 ) { while ( atomic_read(&r->semaphore) != (total_cpus - 1) ) mb(); if ( r->master_stime == 0 ) { r->master_stime = read_platform_stime(); rdtscll(r->master_tsc_stamp); } atomic_inc(&r->semaphore); if ( i == 0 ) write_tsc(r->master_tsc_stamp); while ( atomic_read(&r->semaphore) != (2*total_cpus - 1) ) mb(); atomic_set(&r->semaphore, 0); } else { atomic_inc(&r->semaphore); while ( atomic_read(&r->semaphore) < total_cpus ) mb(); if ( i == 0 ) write_tsc(r->master_tsc_stamp); atomic_inc(&r->semaphore); while ( atomic_read(&r->semaphore) > total_cpus ) mb(); } } rdtscll(c->local_tsc_stamp); c->stime_local_stamp = get_s_time(); c->stime_master_stamp = r->master_stime; raise_softirq(TIME_CALIBRATE_SOFTIRQ); } /* Ordinary rendezvous function which does not modify TSC values. */ static void time_calibration_std_rendezvous(void *_r) { struct cpu_calibration *c = &this_cpu(cpu_calibration); struct calibration_rendezvous *r = _r; unsigned int total_cpus = cpus_weight(r->cpu_calibration_map); if ( smp_processor_id() == 0 ) { while ( atomic_read(&r->semaphore) != (total_cpus - 1) ) cpu_relax(); r->master_stime = read_platform_stime(); mb(); /* write r->master_stime /then/ signal */ atomic_inc(&r->semaphore); } else { atomic_inc(&r->semaphore); while ( atomic_read(&r->semaphore) != total_cpus ) cpu_relax(); mb(); /* receive signal /then/ read r->master_stime */ } rdtscll(c->local_tsc_stamp); c->stime_local_stamp = get_s_time(); c->stime_master_stamp = r->master_stime; raise_softirq(TIME_CALIBRATE_SOFTIRQ); } static void (*time_calibration_rendezvous_fn)(void *) = time_calibration_std_rendezvous; static void time_calibration(void *unused) { struct calibration_rendezvous r = { .cpu_calibration_map = cpu_online_map, .semaphore = ATOMIC_INIT(0) }; /* @wait=1 because we must wait for all cpus before freeing @r. */ on_selected_cpus(&r.cpu_calibration_map, time_calibration_rendezvous_fn, &r, 1); } void init_percpu_time(void) { struct cpu_time *t = &this_cpu(cpu_time); unsigned long flags; s_time_t now; /* Initial estimate for TSC rate. */ this_cpu(cpu_time).tsc_scale = per_cpu(cpu_time, 0).tsc_scale; local_irq_save(flags); rdtscll(t->local_tsc_stamp); now = read_platform_stime(); local_irq_restore(flags); t->stime_master_stamp = now; t->stime_local_stamp = now; if ( smp_processor_id() == 0 ) { init_timer(&calibration_timer, time_calibration, NULL, 0); set_timer(&calibration_timer, NOW() + EPOCH); } } /* * On certain older Intel CPUs writing the TSC MSR clears the upper 32 bits. * Obviously we must not use write_tsc() on such CPUs. * * Additionally, AMD specifies that being able to write the TSC MSR is not an * architectural feature (but, other than their manual says, also cannot be * determined from CPUID bits). */ static void __init tsc_check_writability(void) { const char *what = NULL; uint64_t tsc; /* * If all CPUs are reported as synchronised and in sync, we never write * the TSCs (except unavoidably, when a CPU is physically hot-plugged). * Hence testing for writability is pointless and even harmful. */ if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) ) return; rdtscll(tsc); if ( wrmsr_safe(MSR_IA32_TSC, 0) == 0 ) { uint64_t tmp, tmp2; rdtscll(tmp2); write_tsc(tsc | (1ULL << 32)); rdtscll(tmp); if ( ABS((s64)tmp - (s64)tmp2) < (1LL << 31) ) what = "only partially"; } else { what = "not"; } /* Nothing to do if the TSC is fully writable. */ if ( !what ) { /* * Paranoia - write back original TSC value. However, APs get synced * with BSP as they are brought up, so this doesn't much matter. */ write_tsc(tsc); return; } printk(XENLOG_WARNING "TSC %s writable\n", what); /* time_calibration_tsc_rendezvous() must not be used */ setup_clear_cpu_cap(X86_FEATURE_CONSTANT_TSC); /* cstate_restore_tsc() must not be used (or do nothing) */ if ( !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ) cpuidle_disable_deep_cstate(); /* synchronize_tsc_slave() must do nothing */ disable_tsc_sync = 1; } /* Late init function (after all CPUs are booted). */ int __init init_xen_time(void) { if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) ) { /* * Sadly, despite processor vendors' best design guidance efforts, on * some systems, cpus may come out of reset improperly synchronized. * So we must verify there is no warp and we can't do that until all * CPUs are booted. */ tsc_check_reliability(); if ( tsc_max_warp ) setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE); } tsc_check_writability(); /* If we have constant-rate TSCs then scale factor can be shared. */ if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ) { /* If TSCs are not marked as 'reliable', re-sync during rendezvous. */ if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) ) time_calibration_rendezvous_fn = time_calibration_tsc_rendezvous; } open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration); /* System time (get_s_time()) starts ticking from now. */ rdtscll(this_cpu(cpu_time).local_tsc_stamp); /* NB. get_cmos_time() can take over one second to execute. */ do_settime(get_cmos_time(), 0, NOW()); init_platform_timer(); init_percpu_time(); return 0; } /* Early init function. */ void __init early_time_init(void) { u64 tmp = init_pit_and_calibrate_tsc(); set_time_scale(&this_cpu(cpu_time).tsc_scale, tmp); do_div(tmp, 1000); cpu_khz = (unsigned long)tmp; printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); setup_irq(0, &irq0); } /* keep pit enabled for pit_broadcast working while cpuidle enabled */ static int disable_pit_irq(void) { if ( using_pit || !cpu_has_apic ) return 0; /* * If we do not rely on PIT CH0 then we can use HPET for one-shot timer * emulation when entering deep C states. * XXX dom0 may rely on RTC interrupt delivery, so only enable * hpet_broadcast if FSB mode available or if force_hpet_broadcast. */ if ( cpuidle_using_deep_cstate() && !boot_cpu_has(X86_FEATURE_ARAT) ) { hpet_broadcast_init(); if ( !hpet_broadcast_is_available() ) { if ( xen_cpuidle == -1 ) { xen_cpuidle = 0; printk("CPUIDLE: disabled due to no HPET. " "Force enable with 'cpuidle'.\n"); } else { printk("HPET broadcast init failed, turn to PIT broadcast.\n"); return 0; } } } /* Disable PIT CH0 timer interrupt. */ outb_p(0x30, PIT_MODE); outb_p(0, PIT_CH0); outb_p(0, PIT_CH0); return 0; } __initcall(disable_pit_irq); void pit_broadcast_enter(void) { cpu_set(smp_processor_id(), pit_broadcast_mask); } void pit_broadcast_exit(void) { int cpu = smp_processor_id(); if ( cpu_test_and_clear(cpu, pit_broadcast_mask) ) reprogram_timer(this_cpu(timer_deadline)); } int pit_broadcast_is_available(void) { return xen_cpuidle; } void send_timer_event(struct vcpu *v) { send_guest_vcpu_virq(v, VIRQ_TIMER); } /* Return secs after 00:00:00 localtime, 1 January, 1970. */ unsigned long get_localtime(struct domain *d) { return wc_sec + (wc_nsec + NOW()) / 1000000000ULL + d->time_offset_seconds; } unsigned long get_sec(void) { return wc_sec + (wc_nsec + NOW()) / 1000000000ULL; } /* "cmos_utc_offset" is the difference between UTC time and CMOS time. */ static long cmos_utc_offset; /* in seconds */ int time_suspend(void) { if ( smp_processor_id() == 0 ) { cmos_utc_offset = -get_cmos_time(); cmos_utc_offset += (wc_sec + (wc_nsec + NOW()) / 1000000000ULL); kill_timer(&calibration_timer); /* Sync platform timer stamps. */ platform_time_calibration(); } /* Better to cancel calibration timer for accuracy. */ clear_bit(TIME_CALIBRATE_SOFTIRQ, &softirq_pending(smp_processor_id())); return 0; } int time_resume(void) { init_pit_and_calibrate_tsc(); resume_platform_timer(); disable_pit_irq(); init_percpu_time(); do_settime(get_cmos_time() + cmos_utc_offset, 0, NOW()); update_vcpu_system_time(current); update_domain_rtc(); return 0; } int dom0_pit_access(struct ioreq *ioreq) { /* Is Xen using Channel 2? Then disallow direct dom0 access. */ if ( using_pit ) return 0; switch ( ioreq->addr ) { case PIT_CH2: if ( ioreq->dir == IOREQ_READ ) ioreq->data = inb(PIT_CH2); else outb(ioreq->data, PIT_CH2); return 1; case PIT_MODE: if ( ioreq->dir == IOREQ_READ ) return 0; /* urk! */ switch ( ioreq->data & 0xc0 ) { case 0xc0: /* Read Back */ if ( ioreq->data & 0x08 ) /* Select Channel 2? */ outb(ioreq->data & 0xf8, PIT_MODE); if ( !(ioreq->data & 0x06) ) /* Select Channel 0/1? */ return 1; /* no - we're done */ /* Filter Channel 2 and reserved bit 0. */ ioreq->data &= ~0x09; return 0; /* emulate ch0/1 readback */ case 0x80: /* Select Counter 2 */ outb(ioreq->data, PIT_MODE); return 1; } case 0x61: if ( ioreq->dir == IOREQ_READ ) ioreq->data = inb(0x61); else outb((inb(0x61) & ~3) | (ioreq->data & 3), 0x61); return 1; } return 0; } struct tm wallclock_time(void) { uint64_t seconds; if ( !wc_sec ) return (struct tm) { 0 }; seconds = NOW() + (wc_sec * 1000000000ull) + wc_nsec; do_div(seconds, 1000000000); return gmtime(seconds); } /* * PV SoftTSC Emulation. */ /* * tsc=unstable: Override all tests; assume TSC is unreliable. * tsc=skewed: Assume TSCs are individually reliable, but skewed across CPUs. */ static void __init tsc_parse(const char *s) { if ( !strcmp(s, "unstable") ) { setup_clear_cpu_cap(X86_FEATURE_CONSTANT_TSC); setup_clear_cpu_cap(X86_FEATURE_NONSTOP_TSC); setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE); } else if ( !strcmp(s, "skewed") ) { setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE); } } custom_param("tsc", tsc_parse); u64 gtime_to_gtsc(struct domain *d, u64 time) { if ( !is_hvm_domain(d) ) time = max_t(s64, time - d->arch.vtsc_offset, 0); return scale_delta(time, &d->arch.ns_to_vtsc); } u64 gtsc_to_gtime(struct domain *d, u64 tsc) { u64 time = scale_delta(tsc, &d->arch.vtsc_to_ns); if ( !is_hvm_domain(d) ) time += d->arch.vtsc_offset; return time; } void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs, int rdtscp) { s_time_t now = get_s_time(); struct domain *d = v->domain; spin_lock(&d->arch.vtsc_lock); if ( guest_kernel_mode(v, regs) ) d->arch.vtsc_kerncount++; else d->arch.vtsc_usercount++; if ( (int64_t)(now - d->arch.vtsc_last) > 0 ) d->arch.vtsc_last = now; else now = ++d->arch.vtsc_last; spin_unlock(&d->arch.vtsc_lock); now = gtime_to_gtsc(d, now); regs->eax = (uint32_t)now; regs->edx = (uint32_t)(now >> 32); if ( rdtscp ) regs->ecx = (d->arch.tsc_mode == TSC_MODE_PVRDTSCP) ? d->arch.incarnation : 0; } int host_tsc_is_safe(void) { return boot_cpu_has(X86_FEATURE_TSC_RELIABLE); } void cpuid_time_leaf(uint32_t sub_idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { struct domain *d = current->domain; uint64_t offset; switch ( sub_idx ) { case 0: /* features */ *eax = ( ( (!!d->arch.vtsc) << 0 ) | ( (!!host_tsc_is_safe()) << 1 ) | ( (!!boot_cpu_has(X86_FEATURE_RDTSCP)) << 2 ) | 0 ); *ebx = d->arch.tsc_mode; *ecx = d->arch.tsc_khz; *edx = d->arch.incarnation; break; case 1: /* scale and offset */ if ( !d->arch.vtsc ) offset = d->arch.vtsc_offset; else /* offset already applied to value returned by virtual rdtscp */ offset = 0; *eax = (uint32_t)offset; *ebx = (uint32_t)(offset >> 32); *ecx = d->arch.vtsc_to_ns.mul_frac; *edx = (s8)d->arch.vtsc_to_ns.shift; break; case 2: /* physical cpu_khz */ *eax = cpu_khz; *ebx = *ecx = *edx = 0; break; default: *eax = *ebx = *ecx = *edx = 0; } } /* * called to collect tsc-related data only for save file or live * migrate; called after last rdtsc is done on this incarnation */ void tsc_get_info(struct domain *d, uint32_t *tsc_mode, uint64_t *elapsed_nsec, uint32_t *gtsc_khz, uint32_t *incarnation) { *incarnation = d->arch.incarnation; *tsc_mode = d->arch.tsc_mode; switch ( *tsc_mode ) { case TSC_MODE_NEVER_EMULATE: *elapsed_nsec = *gtsc_khz = 0; break; case TSC_MODE_ALWAYS_EMULATE: *elapsed_nsec = get_s_time() - d->arch.vtsc_offset; *gtsc_khz = d->arch.tsc_khz; break; case TSC_MODE_DEFAULT: if ( d->arch.vtsc ) { *elapsed_nsec = get_s_time() - d->arch.vtsc_offset; *gtsc_khz = d->arch.tsc_khz; } else { uint64_t tsc = 0; rdtscll(tsc); *elapsed_nsec = scale_delta(tsc,&d->arch.vtsc_to_ns); *gtsc_khz = cpu_khz; } break; case TSC_MODE_PVRDTSCP: if ( d->arch.vtsc ) { *elapsed_nsec = get_s_time() - d->arch.vtsc_offset; *gtsc_khz = cpu_khz; } else { uint64_t tsc = 0; rdtscll(tsc); *elapsed_nsec = (scale_delta(tsc,&d->arch.vtsc_to_ns) - d->arch.vtsc_offset); *gtsc_khz = 0; /* ignored by tsc_set_info */ } break; } if ( (int64_t)*elapsed_nsec < 0 ) *elapsed_nsec = 0; } /* * This may be called as many as three times for a domain, once when the * hypervisor creates the domain, once when the toolstack creates the * domain and, if restoring/migrating, once when saved/migrated values * are restored. Care must be taken that, if multiple calls occur, * only the last "sticks" and all are completed before the guest executes * an rdtsc instruction */ void tsc_set_info(struct domain *d, uint32_t tsc_mode, uint64_t elapsed_nsec, uint32_t gtsc_khz, uint32_t incarnation) { if ( is_idle_domain(d) || (d->domain_id == 0) ) { d->arch.vtsc = 0; return; } switch ( d->arch.tsc_mode = tsc_mode ) { case TSC_MODE_NEVER_EMULATE: d->arch.vtsc = 0; break; case TSC_MODE_ALWAYS_EMULATE: d->arch.vtsc = 1; d->arch.vtsc_offset = get_s_time() - elapsed_nsec; d->arch.tsc_khz = gtsc_khz ? gtsc_khz : cpu_khz; set_time_scale(&d->arch.vtsc_to_ns, d->arch.tsc_khz * 1000 ); d->arch.ns_to_vtsc = scale_reciprocal(d->arch.vtsc_to_ns); break; case TSC_MODE_DEFAULT: d->arch.vtsc = 1; d->arch.vtsc_offset = get_s_time() - elapsed_nsec; d->arch.tsc_khz = gtsc_khz ? gtsc_khz : cpu_khz; set_time_scale(&d->arch.vtsc_to_ns, d->arch.tsc_khz * 1000 ); /* use native TSC if initial host has safe TSC, has not migrated * yet and tsc_khz == cpu_khz */ if ( host_tsc_is_safe() && incarnation == 0 && d->arch.tsc_khz == cpu_khz ) d->arch.vtsc = 0; else d->arch.ns_to_vtsc = scale_reciprocal(d->arch.vtsc_to_ns); break; case TSC_MODE_PVRDTSCP: d->arch.vtsc = boot_cpu_has(X86_FEATURE_RDTSCP) && host_tsc_is_safe() ? 0 : 1; d->arch.tsc_khz = cpu_khz; set_time_scale(&d->arch.vtsc_to_ns, d->arch.tsc_khz * 1000 ); d->arch.ns_to_vtsc = scale_reciprocal(d->arch.vtsc_to_ns); if ( d->arch.vtsc ) d->arch.vtsc_offset = get_s_time() - elapsed_nsec; else { /* when using native TSC, offset is nsec relative to power-on * of physical machine */ uint64_t tsc = 0; rdtscll(tsc); d->arch.vtsc_offset = scale_delta(tsc,&d->arch.vtsc_to_ns) - elapsed_nsec; } break; } d->arch.incarnation = incarnation + 1; if ( is_hvm_domain(d) ) hvm_set_rdtsc_exiting(d, d->arch.vtsc); } /* vtsc may incur measurable performance degradation, diagnose with this */ static void dump_softtsc(unsigned char key) { struct domain *d; int domcnt = 0; tsc_check_reliability(); if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) ) printk("TSC marked as reliable, " "warp = %lu (count=%lu)\n", tsc_max_warp, tsc_check_count); else if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC ) ) { printk("TSC has constant rate, "); if (max_cstate <= 2 && tsc_max_warp == 0) printk("no deep Cstates, passed warp test, deemed reliable, "); else printk("deep Cstates possible, so not reliable, "); printk("warp=%lu (count=%lu)\n", tsc_max_warp, tsc_check_count); } else printk("TSC not marked as either constant or reliable, " "warp=%lu (count=%lu)\n", tsc_max_warp, tsc_check_count); for_each_domain ( d ) { if ( d->domain_id == 0 && d->arch.tsc_mode == TSC_MODE_DEFAULT ) continue; printk("dom%u%s: mode=%d",d->domain_id, is_hvm_domain(d) ? "(hvm)" : "", d->arch.tsc_mode); if ( d->arch.vtsc_offset ) printk(",ofs=0x%"PRIx64"",d->arch.vtsc_offset); if ( d->arch.tsc_khz ) printk(",khz=%"PRIu32"",d->arch.tsc_khz); if ( d->arch.incarnation ) printk(",inc=%"PRIu32"",d->arch.incarnation); if ( !(d->arch.vtsc_kerncount | d->arch.vtsc_usercount) ) { printk("\n"); continue; } if ( is_hvm_domain(d) ) printk(",vtsc count: %"PRIu64" total\n", d->arch.vtsc_kerncount); else printk(",vtsc count: %"PRIu64" kernel, %"PRIu64" user\n", d->arch.vtsc_kerncount, d->arch.vtsc_usercount); domcnt++; } if ( !domcnt ) printk("No domains have emulated TSC\n"); } static struct keyhandler dump_softtsc_keyhandler = { .diagnostic = 1, .u.fn = dump_softtsc, .desc = "dump softtsc stats" }; static int __init setup_dump_softtsc(void) { register_keyhandler('s', &dump_softtsc_keyhandler); return 0; } __initcall(setup_dump_softtsc); /* * Local variables: * mode: C * c-set-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */