diff options
author | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2003-10-27 16:44:00 +0000 |
---|---|---|
committer | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2003-10-27 16:44:00 +0000 |
commit | e313ab9d890ddc015a53011071ada962b506031f (patch) | |
tree | 200b0bd83d6594a9b2e85f0b1b641d8f59a6df65 | |
parent | 51a83ab036e57d360a7681e70eaf30179f51a5dd (diff) | |
download | xen-e313ab9d890ddc015a53011071ada962b506031f.tar.gz xen-e313ab9d890ddc015a53011071ada962b506031f.tar.bz2 xen-e313ab9d890ddc015a53011071ada962b506031f.zip |
bitkeeper revision 1.530 (3f9d4b50eBCuXVk_ImRaxIb9MV1j3g)
time.c:
new file
Many files:
Major fixes to time in Xen and Xenolinux. DOM0 can now set the wallclock time, perhaps using ntpd, for all other domains.
26 files changed, 1040 insertions, 900 deletions
@@ -639,6 +639,7 @@ 3e5a4e68W_hpMlM3u_-QOKMp3gzcwQ xenolinux-2.4.22-sparse/init/do_mounts.c 3e5a4e68TJJavrunYwTAnLRSBxSYqQ xenolinux-2.4.22-sparse/kernel/panic.c 3f1056a9LXNTgSzITNh1mb-MIKV1Ng xenolinux-2.4.22-sparse/kernel/printk.c +3f9d4b44247udoqWEgFkaHiWv6Uvyg xenolinux-2.4.22-sparse/kernel/time.c 3eba8f878XjouY21EkQBXwYBsPsipQ xenolinux-2.4.22-sparse/lndir-rel 3e6e7c1efbQe93xCvOpOVCnXTMmQ5w xenolinux-2.4.22-sparse/mkbuildtree 3e5a4e68GxCIaFH4sy01v1wjapetaA xenolinux-2.4.22-sparse/mm/memory.c diff --git a/tools/internal/xi_build.c b/tools/internal/xi_build.c index 1ba36a1ef4..8f2cc93136 100644 --- a/tools/internal/xi_build.c +++ b/tools/internal/xi_build.c @@ -1,5 +1,4 @@ -#include "hypervisor-ifs/dom0_ops.h" #include "dom0_defs.h" #include "mem_defs.h" diff --git a/tools/internal/xi_create.c b/tools/internal/xi_create.c index d003979731..b5e718caa9 100644 --- a/tools/internal/xi_create.c +++ b/tools/internal/xi_create.c @@ -4,7 +4,6 @@ * Usage: <executable> <mem_kb> <os image> <num_vifs> */ -#include <hypervisor-ifs/dom0_ops.h> #include "dom0_defs.h" #include "mem_defs.h" diff --git a/tools/internal/xi_destroy.c b/tools/internal/xi_destroy.c index b3a975e73f..bd91c7941a 100644 --- a/tools/internal/xi_destroy.c +++ b/tools/internal/xi_destroy.c @@ -1,5 +1,4 @@ -#include "hypervisor-ifs/dom0_ops.h" #include "dom0_defs.h" #include "mem_defs.h" diff --git a/tools/internal/xi_list.c b/tools/internal/xi_list.c index ad6b5d3ccc..fc8f4ed73c 100644 --- a/tools/internal/xi_list.c +++ b/tools/internal/xi_list.c @@ -21,7 +21,6 @@ */ #define SILENT_ERRORS_FROM_XEN -#include "hypervisor-ifs/dom0_ops.h" #include "dom0_defs.h" #include "mem_defs.h" diff --git a/tools/internal/xi_sched_domain.c b/tools/internal/xi_sched_domain.c index 1408be422b..424bc6d9c4 100644 --- a/tools/internal/xi_sched_domain.c +++ b/tools/internal/xi_sched_domain.c @@ -1,5 +1,4 @@ -#include "hypervisor-ifs/dom0_ops.h" #include "dom0_defs.h" #include "mem_defs.h" diff --git a/tools/internal/xi_sched_global.c b/tools/internal/xi_sched_global.c index 0c07455b9c..16e9d72d89 100644 --- a/tools/internal/xi_sched_global.c +++ b/tools/internal/xi_sched_global.c @@ -1,5 +1,4 @@ -#include "hypervisor-ifs/dom0_ops.h" #include "dom0_defs.h" #include "mem_defs.h" diff --git a/tools/internal/xi_start.c b/tools/internal/xi_start.c index e04fc8b502..23b95ea742 100644 --- a/tools/internal/xi_start.c +++ b/tools/internal/xi_start.c @@ -1,5 +1,4 @@ -#include "hypervisor-ifs/dom0_ops.h" #include "dom0_defs.h" #include "mem_defs.h" diff --git a/tools/internal/xi_stop.c b/tools/internal/xi_stop.c index 1c10fe4eb8..8e5677edca 100644 --- a/tools/internal/xi_stop.c +++ b/tools/internal/xi_stop.c @@ -1,4 +1,4 @@ -#include "hypervisor-ifs/dom0_ops.h" + #include "dom0_defs.h" #include "mem_defs.h" diff --git a/tools/internal/xi_usage.c b/tools/internal/xi_usage.c index 3cd61431e5..59d4f7648a 100644 --- a/tools/internal/xi_usage.c +++ b/tools/internal/xi_usage.c @@ -1,5 +1,4 @@ -#include "hypervisor-ifs/dom0_ops.h" #include "dom0_defs.h" #include "mem_defs.h" diff --git a/tools/internal/xi_vif_params.c b/tools/internal/xi_vif_params.c index e954253f04..a525c68058 100644 --- a/tools/internal/xi_vif_params.c +++ b/tools/internal/xi_vif_params.c @@ -1,5 +1,4 @@ -#include "hypervisor-ifs/dom0_ops.h" #include "dom0_defs.h" #include "mem_defs.h" diff --git a/xen/arch/i386/apic.c b/xen/arch/i386/apic.c index f2ea5bd4a0..0f4b9a3538 100644 --- a/xen/arch/i386/apic.c +++ b/xen/arch/i386/apic.c @@ -713,36 +713,10 @@ int reprogram_ac_timer(s_time_t timeout) /* * Local timer interrupt handler. - * here the programmable, accurate timers are executed. - * If we are on CPU0 and we should have updated jiffies, we do this - * as well and and deal with traditional linux timers. Note, that of - * the timer APIC on CPU does not go off every 10ms or so the linux - * timers loose accuracy, but that shouldn't be a problem. + * Here the programmable, accurate timers are executed. */ -static s_time_t last_cpu0_tirq = 0; -inline void smp_local_timer_interrupt(struct pt_regs * regs) +inline void smp_local_timer_interrupt(struct pt_regs *regs) { - int cpu = smp_processor_id(); - s_time_t diff, now; - - /* if CPU 0 do old timer stuff */ - if (cpu == 0) - { - now = NOW(); - diff = now - last_cpu0_tirq; - - if (diff <= 0) { - printk ("System Time went backwards: %lld\n", diff); - return; - } - - while (diff >= MILLISECS(10)) { - do_timer(regs); - diff -= MILLISECS(10); - last_cpu0_tirq += MILLISECS(10); - } - } - /* call accurate timer function */ do_ac_timer(); } diff --git a/xen/arch/i386/i8259.c b/xen/arch/i386/i8259.c index 645b7b0fef..53d83e75fe 100644 --- a/xen/arch/i386/i8259.c +++ b/xen/arch/i386/i8259.c @@ -468,14 +468,3 @@ void __init init_IRQ(void) setup_irq(2, &irq2); } -/* - * we only need the timer interrupt for callibrating the tsc<->time<->bus cycle - * mappings. After this all timeing related functions should be run of the - * APIC timers. This function allows us to disable the - */ -void __init disable_pit(void) -{ - printk("Disable PIT. Not needed anymore\n"); - /* This is not the most elegant way, but hey. */ - disable_irq(0); -} diff --git a/xen/arch/i386/setup.c b/xen/arch/i386/setup.c index cc74655463..90d3b8fca0 100644 --- a/xen/arch/i386/setup.c +++ b/xen/arch/i386/setup.c @@ -310,10 +310,7 @@ void __init start_of_day(void) extern void time_init(void); extern void softirq_init(void); extern void timer_bh(void); - extern void tqueue_bh(void); - extern void immediate_bh(void); extern void init_timervecs(void); - extern void disable_pit(void); extern void ac_timer_init(void); extern int setup_network_devices(void); extern void net_init(void); @@ -366,8 +363,6 @@ void __init start_of_day(void) softirq_init(); init_timervecs(); init_bh(TIMER_BH, timer_bh); - init_bh(TQUEUE_BH, tqueue_bh); - init_bh(IMMEDIATE_BH, immediate_bh); init_apic_mappings(); /* make APICs addressable in our pagetables. */ #ifndef CONFIG_SMP @@ -384,14 +379,15 @@ void __init start_of_day(void) * fall thru to 8259A if we have to (but slower). */ #endif + initialize_keytable(); /* call back handling for key codes */ - if ( cpu_has_apic ) - disable_pit(); - else if ( smp_num_cpus != 1 ) - panic("We really need local APICs on SMP machines!"); - else + if ( !cpu_has_apic ) + { do_timer_lists_from_pit = 1; + if ( smp_num_cpus != 1 ) + panic("We need local APICs on SMP machines!"); + } ac_timer_init(); /* init accurate timers */ init_xeno_time(); /* initialise the time */ diff --git a/xen/arch/i386/time.c b/xen/arch/i386/time.c index ae7ecae1a2..395b15b68f 100644 --- a/xen/arch/i386/time.c +++ b/xen/arch/i386/time.c @@ -43,8 +43,10 @@ #define TRC(_x) #endif -/* GLOBALS */ +extern rwlock_t xtime_lock; +extern unsigned long wall_jiffies; +/* GLOBAL */ unsigned long cpu_khz; /* Detected as we calibrate the TSC */ unsigned long ticks_per_usec; /* TSC ticks per microsecond. */ spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; @@ -52,39 +54,22 @@ int timer_ack = 0; int do_timer_lists_from_pit = 0; /* PRIVATE */ - static unsigned int rdtsc_bitshift; /* Which 32 bits of TSC do we use? */ -static unsigned long init_cmos_time; /* RTC time when system time == 0 */ -static u64 cpu_freqs[3]; /* Slow/correct/fast CPU frequencies */ -static u64 cpu_freq; /* Currently-selected CPU frequency */ +static u64 cpu_freq; /* CPU frequency (Hz) */ static u32 st_scale_f; /* Cycles -> ns, fractional part */ static u32 st_scale_i; /* Cycles -> ns, integer part */ -static struct ac_timer update_timer; /* Periodic 'time update' function */ -static spinlock_t stime_lock; /* Lock for accessing sys & wc time */ -struct timeval wall_clock_time; /* WC time at last 'time update' */ static u32 tsc_irq; /* CPU0's TSC at last 'time update' */ static s_time_t stime_irq; /* System time at last 'time update' */ -/* - * The scale update period is not a whole number of seconds since we want to - * avoid being in sync with the CMOS update-in-progress flag. - */ -#define SCALE_UPDATE_PERIOD MILLISECS(50200) -#define TIME_UPDATE_PERIOD MILLISECS(200) +static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + u64 full_tsc; + write_lock(&xtime_lock); -static inline void do_timer_interrupt( - int irq, void *dev_id, struct pt_regs *regs) -{ #ifdef CONFIG_X86_IO_APIC if ( timer_ack ) { - /* - * Subtle, when I/O APICs are used we have to ack timer IRQ manually - * to reset the IRR bit for do_slow_gettimeoffset(). This will also - * deassert NMI lines for the watchdog if run on an 82489DX-based - * system. - */ extern spinlock_t i8259A_lock; spin_lock(&i8259A_lock); outb(0x0c, 0x20); @@ -93,22 +78,27 @@ static inline void do_timer_interrupt( spin_unlock(&i8259A_lock); } #endif + + /* + * Updates TSC timestamp (used to interpolate passage of time between + * interrupts). + */ + rdtscll(full_tsc); + tsc_irq = (u32)(full_tsc >> rdtsc_bitshift); + /* Updates xtime (wallclock time). */ do_timer(regs); + /* Updates system time (nanoseconds since boot). */ + stime_irq += MILLISECS(1000/HZ); + + write_unlock(&xtime_lock); + + /* Rough hack to allow accurate timers to sort-of-work with no APIC. */ if ( do_timer_lists_from_pit ) do_ac_timer(); } -/* - * This is only temporarily. Once the APIC s up and running this - * timer interrupt is turned off. - */ -static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - do_timer_interrupt(irq, NULL, regs); -} - static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL}; @@ -222,27 +212,6 @@ static unsigned long __get_cmos_time(void) return mktime(year, mon, day, hour, min, sec); } -/* This version is fast: it bails if there's an update in progress. */ -static unsigned long maybe_get_cmos_time(void) -{ - unsigned long ct, retval = 0, flags; - - spin_lock_irqsave(&rtc_lock, flags); - - if ( (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) ) - goto out; - - ct = __get_cmos_time(); - - if ( !(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) ) - retval = ct; - - out: - spin_unlock_irqrestore(&rtc_lock, flags); - return retval; -} - -/* This version spins until it definitely reads a valid time from CMOS RAM. */ static unsigned long get_cmos_time(void) { unsigned long res, flags; @@ -264,17 +233,16 @@ static unsigned long get_cmos_time(void) return res; } - /*************************************************************************** * System Time ***************************************************************************/ -static inline s_time_t __get_s_time(void) +static inline u64 get_time_delta(void) { s32 delta_tsc; u32 low; u64 delta, tsc; - + rdtscll(tsc); low = (u32)(tsc >> rdtsc_bitshift); delta_tsc = (s32)(low - tsc_irq); @@ -283,162 +251,76 @@ static inline s_time_t __get_s_time(void) delta >>= 32; delta += ((u64)delta_tsc * st_scale_i); - return stime_irq + delta; + return delta; } s_time_t get_s_time(void) { s_time_t now; unsigned long flags; - spin_lock_irqsave(&stime_lock, flags); - now = __get_s_time(); - spin_unlock_irqrestore(&stime_lock, flags); + read_lock_irqsave(&xtime_lock, flags); + now = stime_irq + get_time_delta(); + read_unlock_irqrestore(&xtime_lock, flags); return now; } -void do_gettimeofday(struct timeval *tv) +void update_dom_time(shared_info_t *si) { unsigned long flags; - unsigned long usec, sec; - spin_lock_irqsave(&stime_lock, flags); - usec = ((unsigned long)(__get_s_time() - stime_irq))/1000; - sec = wall_clock_time.tv_sec; - usec += wall_clock_time.tv_usec; - spin_unlock_irqrestore(&stime_lock, flags); + read_lock_irqsave(&xtime_lock, flags); - while ( usec >= 1000000 ) - { - usec -= 1000000; - sec++; - } + si->time_version1++; + wmb(); - tv->tv_sec = sec; - tv->tv_usec = usec; -} - -void do_settimeofday(struct timeval *tv) -{ - printk("XXX: do_settimeofday not implemented\n"); -} - - -/*************************************************************************** - * Update times - ***************************************************************************/ - -void update_dom_time(shared_info_t *si) -{ - unsigned long flags; - - spin_lock_irqsave(&stime_lock, flags); + /* NB. These two values don't actually ever change. */ si->cpu_freq = cpu_freq; si->rdtsc_bitshift = rdtsc_bitshift; + si->system_time = stime_irq; - si->st_timestamp = tsc_irq; - si->tv_sec = wall_clock_time.tv_sec; - si->tv_usec = wall_clock_time.tv_usec; - si->wc_timestamp = stime_irq; - si->wc_version++; - spin_unlock_irqrestore(&stime_lock, flags); -} + si->tsc_timestamp = tsc_irq; + si->wc_sec = xtime.tv_sec; + si->wc_usec = xtime.tv_usec; + si->wc_usec += (jiffies - wall_jiffies) * (1000000 / HZ); + while ( si->wc_usec >= 1000000 ) + { + si->wc_usec -= 1000000; + si->wc_sec++; + } -/* - * VERY crude way to keep system time from drfiting. - * Update the scaling factor using the RTC - * This is done periodically of it's own timer - * We maintain an array of cpu frequencies. - * - index 0 -> go slower - * - index 1 -> frequency as determined during calibration - * - index 2 -> go faster - * - * NB2. Note that update_scale is called from update_time with the stime_lock - * still held. This is because we must only slow down cpu_freq at a timebase - * change. If we did it in the middle of an update period then time would - * seem to jump backwards since BASE+OLD_FREQ*DIFF > BASE+NEW_FREQ*DIFF. - */ -static void update_scale(void) -{ - unsigned long cmos_time; - u32 st, ct; - s32 dt; - u64 scale; - int freq_index; - - if ( (cmos_time = maybe_get_cmos_time()) == 0 ) - return; - - ct = (u32)(cmos_time - init_cmos_time); - st = (u32)(stime_irq/SECONDS(1)); - dt = (s32)(ct - st); - - /* Work out adjustment to scaling factor. Allow +/- 1s drift. */ - if ( dt < -1 ) - freq_index = 0; /* go slower */ - else if ( dt > 1 ) - freq_index = 2; /* go faster */ - else - freq_index = 1; /* correct speed */ - - if ( (dt <= -10) || (dt >= 10) ) - printk("Large time drift (cmos time - system time = %ds)\n", dt); - - /* set new frequency */ - cpu_freq = cpu_freqs[freq_index]; - - /* adjust scaling factor */ - scale = 1000000000LL << (32 + rdtsc_bitshift); - scale /= cpu_freq; - st_scale_f = scale & 0xffffffff; - st_scale_i = scale >> 32; + wmb(); + si->time_version2++; + + read_unlock_irqrestore(&xtime_lock, flags); } -static void update_time(unsigned long unused) +/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */ +void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base) { - unsigned long flags; - s_time_t new_st; - unsigned long usec; - u64 full_tsc; - static int calls_since_scale_update = 0; + s64 delta; + long _usecs = (long)usecs; - spin_lock_irqsave(&stime_lock, flags); + write_lock_irq(&xtime_lock); - rdtscll(full_tsc); - new_st = __get_s_time(); + delta = (s64)(stime_irq - system_time_base); - /* Update wall clock time. */ - usec = ((unsigned long)(new_st - stime_irq))/1000; - usec += wall_clock_time.tv_usec; - while ( usec >= 1000000 ) - { - usec -= 1000000; - wall_clock_time.tv_sec++; - } - wall_clock_time.tv_usec = usec; - - /* Update system time. */ - stime_irq = new_st; - tsc_irq = (u32)(full_tsc >> rdtsc_bitshift); + _usecs += (long)(delta/1000); + _usecs -= (jiffies - wall_jiffies) * (1000000 / HZ); - /* Maybe update our rate to be in sync with the RTC. */ - if ( ++calls_since_scale_update >= - (SCALE_UPDATE_PERIOD/TIME_UPDATE_PERIOD) ) + while ( _usecs < 0 ) { - update_scale(); - calls_since_scale_update = 0; - } + _usecs += 1000000; + secs--; + } - spin_unlock_irqrestore(&stime_lock, flags); + xtime.tv_sec = secs; + xtime.tv_usec = _usecs; - TRC(printk("TIME[%02d] update time: stime_irq=%lld now=%lld,wct=%ld:%ld\n", - smp_processor_id(), stime_irq, new_st, wall_clock_time.tv_sec, - wall_clock_time.tv_usec)); + write_unlock_irq(&xtime_lock); - /* Reload the timer. */ - update_timer.expires = new_st + TIME_UPDATE_PERIOD; - add_ac_timer(&update_timer); + update_dom_time(current->shared_info); } @@ -446,21 +328,22 @@ static void update_time(unsigned long unused) int __init init_xeno_time() { u64 scale; - s64 freq_off; u64 full_tsc; unsigned int cpu_ghz; - spin_lock_init(&stime_lock); - cpu_ghz = (unsigned int)(cpu_freq / 1000000000ULL); for ( rdtsc_bitshift = 0; cpu_ghz != 0; rdtsc_bitshift++, cpu_ghz >>= 1 ) continue; - /* Calculate adjusted frequencies: +/- 0.1% */ - freq_off = cpu_freq/1000; - cpu_freqs[0] = cpu_freq + freq_off; - cpu_freqs[1] = cpu_freq; - cpu_freqs[2] = cpu_freq - freq_off; + /* + * We actually adjust cpu_freq to be 0.001% slower than the real + * frequenecy. This makes time run a little bit slower when interpolating + * the passage of time between periodic interrupts, so we expect a little + * jump in time whenever an interrupt comes in (roughly 100ns every 10ms). + * However, this should avoid us considtently running too fast and jumping + * _backwards_ on each interrupt, which would be much worse! + */ + cpu_freq = cpu_freq - (cpu_freq / 100000ULL); scale = 1000000000LL << (32 + rdtsc_bitshift); scale /= cpu_freq; @@ -473,15 +356,8 @@ int __init init_xeno_time() tsc_irq = (u32)(full_tsc >> rdtsc_bitshift); /* Wallclock time starts as the initial RTC time. */ - wall_clock_time.tv_sec = init_cmos_time = get_cmos_time(); - wall_clock_time.tv_usec = 0; - - /* Start timer to periodically update time and frequency scale. */ - init_ac_timer(&update_timer, 0); - update_timer.data = 1; - update_timer.function = &update_time; - update_time(0); - + xtime.tv_sec = get_cmos_time(); + printk("Time init:\n"); printk(".... System Time: %lldns\n", NOW()); @@ -490,7 +366,7 @@ int __init init_xeno_time() printk(".... scale: %08X:%08X\n", (u32)(scale>>32), (u32)scale); printk(".... Wall Clock: %lds %ldus\n", - wall_clock_time.tv_sec, wall_clock_time.tv_usec); + xtime.tv_sec, xtime.tv_usec); return 0; } diff --git a/xen/common/dom0_ops.c b/xen/common/dom0_ops.c index 408f1a763d..d5d5e278f1 100644 --- a/xen/common/dom0_ops.c +++ b/xen/common/dom0_ops.c @@ -284,39 +284,47 @@ long do_dom0_op(dom0_op_t *u_dom0_op) case DOM0_MSR: { - if (op.u.msr.write) + if (op.u.msr.write) { - msr_cpu_mask = op.u.msr.cpu_mask; - msr_addr = op.u.msr.msr; - msr_lo = op.u.msr.in1; - msr_hi = op.u.msr.in2; - smp_call_function(write_msr_for, NULL, 1, 1); - write_msr_for(NULL); + msr_cpu_mask = op.u.msr.cpu_mask; + msr_addr = op.u.msr.msr; + msr_lo = op.u.msr.in1; + msr_hi = op.u.msr.in2; + smp_call_function(write_msr_for, NULL, 1, 1); + write_msr_for(NULL); } - else + else { - msr_cpu_mask = op.u.msr.cpu_mask; - msr_addr = op.u.msr.msr; - smp_call_function(read_msr_for, NULL, 1, 1); - read_msr_for(NULL); - - op.u.msr.out1 = msr_lo; - op.u.msr.out2 = msr_hi; - copy_to_user(u_dom0_op, &op, sizeof(op)); + msr_cpu_mask = op.u.msr.cpu_mask; + msr_addr = op.u.msr.msr; + smp_call_function(read_msr_for, NULL, 1, 1); + read_msr_for(NULL); + + op.u.msr.out1 = msr_lo; + op.u.msr.out2 = msr_hi; + copy_to_user(u_dom0_op, &op, sizeof(op)); } - ret = 0; + ret = 0; } break; case DOM0_DEBUG: { - op.u.debug.out1 = op.u.debug.in2 + 1; - op.u.debug.out2 = op.u.debug.in1 + 1; - copy_to_user(u_dom0_op, &op, sizeof(op)); - ret = 0; + op.u.debug.out1 = op.u.debug.in2 + 1; + op.u.debug.out2 = op.u.debug.in1 + 1; + copy_to_user(u_dom0_op, &op, sizeof(op)); + ret = 0; } break; + case DOM0_SETTIME: + { + do_settime(op.u.settime.secs, + op.u.settime.usecs, + op.u.settime.system_time); + ret = 0; + } + break; default: ret = -ENOSYS; diff --git a/xen/common/kernel.c b/xen/common/kernel.c index a410fd1155..299215eb7d 100644 --- a/xen/common/kernel.c +++ b/xen/common/kernel.c @@ -227,7 +227,6 @@ void cmain (unsigned long magic, multiboot_info_t *mbi) (mod[1].mod_end - mod[1].mod_start):0) != 0 ) panic("Could not set up DOM0 guest OS\n"); - update_dom_time(new_dom->shared_info); wake_up(new_dom); startup_cpu_idle_loop(); diff --git a/xen/common/schedule.c b/xen/common/schedule.c index d95a287c93..9cd98b65a5 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -535,6 +535,8 @@ static void virt_timer(unsigned long foo) do { if ( is_idle_task(p) ) continue; cpu_mask |= mark_guest_event(p, _EVENT_TIMER); + if ( p->has_cpu ) + update_dom_time(p->shared_info); } while ( (p = p->next_task) != &idle0_task ); read_unlock(&tasklist_lock); diff --git a/xen/common/timer.c b/xen/common/timer.c index bdcda32d89..c0e7d3a393 100644 --- a/xen/common/timer.c +++ b/xen/common/timer.c @@ -22,58 +22,15 @@ #include <linux/timex.h> #include <linux/tqueue.h> #include <linux/delay.h> -//#include <linux/smp_lock.h> #include <linux/interrupt.h> -//#include <linux/kernel_stat.h> #include <xeno/event.h> #include <asm/uaccess.h> -/* - * Timekeeping variables - */ - -long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */ - -/* The current time */ struct timeval xtime __attribute__ ((aligned (16))); - -/* Don't completely fail for HZ > 500. */ -int tickadj = 500/HZ ? : 1; /* microsecs */ - -DECLARE_TASK_QUEUE(tq_timer); -DECLARE_TASK_QUEUE(tq_immediate); - -/* - * phase-lock loop variables - */ -/* TIME_ERROR prevents overwriting the CMOS clock */ -int time_state = TIME_OK; /* clock synchronization status */ -int time_status = STA_UNSYNC; /* clock status bits */ -long time_offset; /* time adjustment (us) */ -long time_constant = 2; /* pll time constant */ -long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ -long time_precision = 1; /* clock precision (us) */ -long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ -long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ -long time_phase; /* phase offset (scaled us) */ -long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC; - /* frequency offset (scaled ppm)*/ -long time_adj; /* tick adjust (scaled 1 / HZ) */ -long time_reftime; /* time at last adjustment (s) */ - -long time_adjust; -long time_adjust_step; - -unsigned long event; - unsigned long volatile jiffies; -unsigned int * prof_buffer; -unsigned long prof_len; -unsigned long prof_shift; - /* * Event timer code */ @@ -85,13 +42,13 @@ unsigned long prof_shift; #define TVR_MASK (TVR_SIZE - 1) struct timer_vec { - int index; - struct list_head vec[TVN_SIZE]; + int index; + struct list_head vec[TVN_SIZE]; }; struct timer_vec_root { - int index; - struct list_head vec[TVR_SIZE]; + int index; + struct list_head vec[TVR_SIZE]; }; static struct timer_vec tv5; @@ -101,65 +58,65 @@ static struct timer_vec tv2; static struct timer_vec_root tv1; static struct timer_vec * const tvecs[] = { - (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5 + (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5 }; #define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0])) void init_timervecs (void) { - int i; - - for (i = 0; i < TVN_SIZE; i++) { - INIT_LIST_HEAD(tv5.vec + i); - INIT_LIST_HEAD(tv4.vec + i); - INIT_LIST_HEAD(tv3.vec + i); - INIT_LIST_HEAD(tv2.vec + i); - } - for (i = 0; i < TVR_SIZE; i++) - INIT_LIST_HEAD(tv1.vec + i); + int i; + + for (i = 0; i < TVN_SIZE; i++) { + INIT_LIST_HEAD(tv5.vec + i); + INIT_LIST_HEAD(tv4.vec + i); + INIT_LIST_HEAD(tv3.vec + i); + INIT_LIST_HEAD(tv2.vec + i); + } + for (i = 0; i < TVR_SIZE; i++) + INIT_LIST_HEAD(tv1.vec + i); } static unsigned long timer_jiffies; static inline void internal_add_timer(struct timer_list *timer) { - /* - * must be cli-ed when calling this - */ - unsigned long expires = timer->expires; - unsigned long idx = expires - timer_jiffies; - struct list_head * vec; - - if (idx < TVR_SIZE) { - int i = expires & TVR_MASK; - vec = tv1.vec + i; - } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { - int i = (expires >> TVR_BITS) & TVN_MASK; - vec = tv2.vec + i; - } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { - int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; - vec = tv3.vec + i; - } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { - int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; - vec = tv4.vec + i; - } else if ((signed long) idx < 0) { - /* can happen if you add a timer with expires == jiffies, + /* + * must be cli-ed when calling this + */ + unsigned long expires = timer->expires; + unsigned long idx = expires - timer_jiffies; + struct list_head * vec; + + if (idx < TVR_SIZE) { + int i = expires & TVR_MASK; + vec = tv1.vec + i; + } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { + int i = (expires >> TVR_BITS) & TVN_MASK; + vec = tv2.vec + i; + } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { + int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; + vec = tv3.vec + i; + } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { + int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; + vec = tv4.vec + i; + } else if ((signed long) idx < 0) { + /* can happen if you add a timer with expires == jiffies, * or you set a timer to go off in the past */ - vec = tv1.vec + tv1.index; - } else if (idx <= 0xffffffffUL) { - int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; - vec = tv5.vec + i; - } else { - /* Can only get here on architectures with 64-bit jiffies */ - INIT_LIST_HEAD(&timer->list); - return; - } - /* + vec = tv1.vec + tv1.index; + } else if (idx <= 0xffffffffUL) { + int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; + vec = tv5.vec + i; + } else { + /* Can only get here on architectures with 64-bit jiffies */ + INIT_LIST_HEAD(&timer->list); + return; + } + /* * Timers are FIFO! */ - list_add(&timer->list, vec->prev); + list_add(&timer->list, vec->prev); } /* Initialize both explicitly - let's try to have them in the same cache line */ @@ -178,57 +135,57 @@ volatile struct timer_list * volatile running_timer; void add_timer(struct timer_list *timer) { - unsigned long flags; - - spin_lock_irqsave(&timerlist_lock, flags); - if (timer_pending(timer)) - goto bug; - internal_add_timer(timer); - spin_unlock_irqrestore(&timerlist_lock, flags); - return; -bug: - spin_unlock_irqrestore(&timerlist_lock, flags); - printk("bug: kernel timer added twice at %p.\n", - __builtin_return_address(0)); + unsigned long flags; + + spin_lock_irqsave(&timerlist_lock, flags); + if (timer_pending(timer)) + goto bug; + internal_add_timer(timer); + spin_unlock_irqrestore(&timerlist_lock, flags); + return; + bug: + spin_unlock_irqrestore(&timerlist_lock, flags); + printk("bug: kernel timer added twice at %p.\n", + __builtin_return_address(0)); } static inline int detach_timer (struct timer_list *timer) { - if (!timer_pending(timer)) - return 0; - list_del(&timer->list); - return 1; + if (!timer_pending(timer)) + return 0; + list_del(&timer->list); + return 1; } int mod_timer(struct timer_list *timer, unsigned long expires) { - int ret; - unsigned long flags; - - spin_lock_irqsave(&timerlist_lock, flags); - timer->expires = expires; - ret = detach_timer(timer); - internal_add_timer(timer); - spin_unlock_irqrestore(&timerlist_lock, flags); - return ret; + int ret; + unsigned long flags; + + spin_lock_irqsave(&timerlist_lock, flags); + timer->expires = expires; + ret = detach_timer(timer); + internal_add_timer(timer); + spin_unlock_irqrestore(&timerlist_lock, flags); + return ret; } int del_timer(struct timer_list * timer) { - int ret; - unsigned long flags; - - spin_lock_irqsave(&timerlist_lock, flags); - ret = detach_timer(timer); - timer->list.next = timer->list.prev = NULL; - spin_unlock_irqrestore(&timerlist_lock, flags); - return ret; + int ret; + unsigned long flags; + + spin_lock_irqsave(&timerlist_lock, flags); + ret = detach_timer(timer); + timer->list.next = timer->list.prev = NULL; + spin_unlock_irqrestore(&timerlist_lock, flags); + return ret; } #ifdef CONFIG_SMP void sync_timers(void) { - spin_unlock_wait(&global_bh_lock); + spin_unlock_wait(&global_bh_lock); } /* @@ -241,269 +198,104 @@ void sync_timers(void) int del_timer_sync(struct timer_list * timer) { - int ret = 0; + int ret = 0; - for (;;) { - unsigned long flags; - int running; + for (;;) { + unsigned long flags; + int running; - spin_lock_irqsave(&timerlist_lock, flags); - ret += detach_timer(timer); - timer->list.next = timer->list.prev = 0; - running = timer_is_running(timer); - spin_unlock_irqrestore(&timerlist_lock, flags); + spin_lock_irqsave(&timerlist_lock, flags); + ret += detach_timer(timer); + timer->list.next = timer->list.prev = 0; + running = timer_is_running(timer); + spin_unlock_irqrestore(&timerlist_lock, flags); - if (!running) - break; + if (!running) + break; - timer_synchronize(timer); - } + timer_synchronize(timer); + } - return ret; + return ret; } #endif static inline void cascade_timers(struct timer_vec *tv) { - /* cascade all the timers from tv up one level */ - struct list_head *head, *curr, *next; - - head = tv->vec + tv->index; - curr = head->next; - /* - * We are removing _all_ timers from the list, so we don't have to - * detach them individually, just clear the list afterwards. + /* cascade all the timers from tv up one level */ + struct list_head *head, *curr, *next; + + head = tv->vec + tv->index; + curr = head->next; + /* + * We are removing _all_ timers from the list, so we don't have to + * detach them individually, just clear the list afterwards. */ - while (curr != head) { - struct timer_list *tmp; - - tmp = list_entry(curr, struct timer_list, list); - next = curr->next; - list_del(curr); /* not needed */ - internal_add_timer(tmp); - curr = next; - } - INIT_LIST_HEAD(head); - tv->index = (tv->index + 1) & TVN_MASK; + while (curr != head) { + struct timer_list *tmp; + + tmp = list_entry(curr, struct timer_list, list); + next = curr->next; + list_del(curr); /* not needed */ + internal_add_timer(tmp); + curr = next; + } + INIT_LIST_HEAD(head); + tv->index = (tv->index + 1) & TVN_MASK; } static inline void run_timer_list(void) { - spin_lock_irq(&timerlist_lock); - while ((long)(jiffies - timer_jiffies) >= 0) { - struct list_head *head, *curr; - if (!tv1.index) { - int n = 1; - do { - cascade_timers(tvecs[n]); - } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS); - } -repeat: - head = tv1.vec + tv1.index; - curr = head->next; - if (curr != head) { - struct timer_list *timer; - void (*fn)(unsigned long); - unsigned long data; - - timer = list_entry(curr, struct timer_list, list); - fn = timer->function; - data= timer->data; - - detach_timer(timer); - timer->list.next = timer->list.prev = NULL; - timer_enter(timer); - spin_unlock_irq(&timerlist_lock); - fn(data); - spin_lock_irq(&timerlist_lock); - timer_exit(); - goto repeat; - } - ++timer_jiffies; - tv1.index = (tv1.index + 1) & TVR_MASK; - } - spin_unlock_irq(&timerlist_lock); -} - -spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED; - -void tqueue_bh(void) -{ - run_task_queue(&tq_timer); -} - -void immediate_bh(void) -{ - run_task_queue(&tq_immediate); -} - -/* - * this routine handles the overflow of the microsecond field - * - * The tricky bits of code to handle the accurate clock support - * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. - * They were originally developed for SUN and DEC kernels. - * All the kudos should go to Dave for this stuff. - * - */ -static void second_overflow(void) -{ - long ltemp; - - /* Bump the maxerror field */ - time_maxerror += time_tolerance >> SHIFT_USEC; - if ( time_maxerror > NTP_PHASE_LIMIT ) { - time_maxerror = NTP_PHASE_LIMIT; - time_status |= STA_UNSYNC; + spin_lock_irq(&timerlist_lock); + while ((long)(jiffies - timer_jiffies) >= 0) { + struct list_head *head, *curr; + if (!tv1.index) { + int n = 1; + do { + cascade_timers(tvecs[n]); + } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS); + } + repeat: + head = tv1.vec + tv1.index; + curr = head->next; + if (curr != head) { + struct timer_list *timer; + void (*fn)(unsigned long); + unsigned long data; + + timer = list_entry(curr, struct timer_list, list); + fn = timer->function; + data= timer->data; + + detach_timer(timer); + timer->list.next = timer->list.prev = NULL; + timer_enter(timer); + spin_unlock_irq(&timerlist_lock); + fn(data); + spin_lock_irq(&timerlist_lock); + timer_exit(); + goto repeat; + } + ++timer_jiffies; + tv1.index = (tv1.index + 1) & TVR_MASK; } - - /* - * Leap second processing. If in leap-insert state at - * the end of the day, the system clock is set back one - * second; if in leap-delete state, the system clock is - * set ahead one second. The microtime() routine or - * external clock driver will insure that reported time - * is always monotonic. The ugly divides should be - * replaced. - */ - switch (time_state) { - - case TIME_OK: - if (time_status & STA_INS) - time_state = TIME_INS; - else if (time_status & STA_DEL) - time_state = TIME_DEL; - break; - - case TIME_INS: - if (xtime.tv_sec % 86400 == 0) { - xtime.tv_sec--; - time_state = TIME_OOP; - printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n"); - } - break; - - case TIME_DEL: - if ((xtime.tv_sec + 1) % 86400 == 0) { - xtime.tv_sec++; - time_state = TIME_WAIT; - printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n"); - } - break; - - case TIME_OOP: - time_state = TIME_WAIT; - break; - - case TIME_WAIT: - if (!(time_status & (STA_INS | STA_DEL))) - time_state = TIME_OK; - } - - /* - * Compute the phase adjustment for the next second. In - * PLL mode, the offset is reduced by a fixed factor - * times the time constant. In FLL mode the offset is - * used directly. In either mode, the maximum phase - * adjustment for each second is clamped so as to spread - * the adjustment over not more than the number of - * seconds between updates. - */ - if (time_offset < 0) { - ltemp = -time_offset; - if (!(time_status & STA_FLL)) - ltemp >>= SHIFT_KG + time_constant; - if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) - ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; - time_offset += ltemp; - time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); - } else { - ltemp = time_offset; - if (!(time_status & STA_FLL)) - ltemp >>= SHIFT_KG + time_constant; - if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) - ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; - time_offset -= ltemp; - time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); - } - - if (ltemp < 0) - time_adj -= -ltemp >> - (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); - else - time_adj += ltemp >> - (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); - -#if HZ == 100 - /* Compensate for (HZ==100) != (1 << SHIFT_HZ). - * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14) - */ - if (time_adj < 0) - time_adj -= (-time_adj >> 2) + (-time_adj >> 5); - else - time_adj += (time_adj >> 2) + (time_adj >> 5); -#endif + spin_unlock_irq(&timerlist_lock); } -/* in the NTP reference this is called "hardclock()" */ -static void update_wall_time_one_tick(void) -{ - if ( (time_adjust_step = time_adjust) != 0 ) { - /* We are doing an adjtime thing. - * - * Prepare time_adjust_step to be within bounds. - * Note that a positive time_adjust means we want the clock - * to run faster. - * - * Limit the amount of the step to be in the range - * -tickadj .. +tickadj - */ - if (time_adjust > tickadj) - time_adjust_step = tickadj; - else if (time_adjust < -tickadj) - time_adjust_step = -tickadj; - - /* Reduce by this step the amount of time left */ - time_adjust -= time_adjust_step; - } - xtime.tv_usec += tick + time_adjust_step; - /* - * Advance the phase, once it gets to one microsecond, then - * advance the tick more. - */ - time_phase += time_adj; - if (time_phase <= -FINEUSEC) { - long ltemp = -time_phase >> SHIFT_SCALE; - time_phase += ltemp << SHIFT_SCALE; - xtime.tv_usec -= ltemp; - } - else if (time_phase >= FINEUSEC) { - long ltemp = time_phase >> SHIFT_SCALE; - time_phase -= ltemp << SHIFT_SCALE; - xtime.tv_usec += ltemp; - } -} +spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED; -/* - * Using a loop looks inefficient, but "ticks" is - * usually just one (we shouldn't be losing ticks, - * we're doing this this way mainly for interrupt - * latency reasons, not because we think we'll - * have lots of lost timer ticks - */ static void update_wall_time(unsigned long ticks) { - do { - ticks--; - update_wall_time_one_tick(); - } while (ticks); - - if (xtime.tv_usec >= 1000000) { - xtime.tv_usec -= 1000000; - xtime.tv_sec++; - second_overflow(); - } + do { + ticks--; + xtime.tv_usec += 1000000/HZ; + } while (ticks); + + if (xtime.tv_usec >= 1000000) { + xtime.tv_usec -= 1000000; + xtime.tv_sec++; + } } /* jiffies at the most recent update of wall time */ @@ -516,47 +308,31 @@ rwlock_t xtime_lock = RW_LOCK_UNLOCKED; static inline void update_times(void) { - unsigned long ticks; + unsigned long ticks; - /* - * update_times() is run from the raw timer_bh handler so we - * just know that the irqs are locally enabled and so we don't - * need to save/restore the flags of the local CPU here. -arca - */ - write_lock_irq(&xtime_lock); - - ticks = jiffies - wall_jiffies; - if (ticks) { - wall_jiffies += ticks; - update_wall_time(ticks); - } - write_unlock_irq(&xtime_lock); + /* + * update_times() is run from the raw timer_bh handler so we + * just know that the irqs are locally enabled and so we don't + * need to save/restore the flags of the local CPU here. -arca + */ + write_lock_irq(&xtime_lock); + + ticks = jiffies - wall_jiffies; + if (ticks) { + wall_jiffies += ticks; + update_wall_time(ticks); + } + write_unlock_irq(&xtime_lock); } void timer_bh(void) { - update_times(); - run_timer_list(); + update_times(); + run_timer_list(); } -#include <xeno/errno.h> -#include <xeno/sched.h> -#include <xeno/lib.h> -#include <xeno/config.h> -#include <xeno/smp.h> -#include <xeno/irq.h> -#include <asm/msr.h> - void do_timer(struct pt_regs *regs) { (*(unsigned long *)&jiffies)++; - mark_bh(TIMER_BH); - if (TQ_ACTIVE(tq_timer)) - mark_bh(TQUEUE_BH); -} - -void get_fast_time(struct timeval * tm) -{ - *tm=xtime; } diff --git a/xen/include/hypervisor-ifs/dom0_ops.h b/xen/include/hypervisor-ifs/dom0_ops.h index 4aa0810f44..1e7ab9a489 100644 --- a/xen/include/hypervisor-ifs/dom0_ops.h +++ b/xen/include/hypervisor-ifs/dom0_ops.h @@ -21,7 +21,8 @@ #define DOM0_BUILDDOMAIN 13 #define DOM0_IOPL 14 #define DOM0_MSR 15 -#define DOM0_DEBUG 16 /* pervasive debugger */ +#define DOM0_DEBUG 16 +#define DOM0_SETTIME 17 #define MAX_CMD_LEN 256 #define MAX_DOMAIN_NAME 16 @@ -118,6 +119,17 @@ typedef struct dom0_debug_st } dom0_debug_t; +/* + * Set clock such that it would read <secs,usecs> after 00:00:00 UTC, + * 1 January, 1970 if the current system time was <system_time>. + */ +typedef struct dom0_settime_st +{ + /* IN variables. */ + unsigned long secs, usecs; + u64 system_time; +} dom0_settime_t; + typedef struct dom0_op_st { unsigned long cmd; @@ -133,6 +145,7 @@ typedef struct dom0_op_st dom0_iopl_t iopl; dom0_msr_t msr; dom0_debug_t debug; + dom0_settime_t settime; } u; } dom0_op_t; diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h index 4405191a6d..b7fa129afc 100644 --- a/xen/include/hypervisor-ifs/hypervisor-if.h +++ b/xen/include/hypervisor-ifs/hypervisor-if.h @@ -232,32 +232,35 @@ typedef struct shared_info_st { /* * Time: The following abstractions are exposed: System Time, Clock Time, * Domain Virtual Time. Domains can access Cycle counter time directly. - * - * The following values are updated periodically (and atomically, from the - * p.o.v. of the guest OS). Th eguest OS detects this because the wc_version - * is incremented. */ - u32 wc_version; /* a version number for info below */ - unsigned int rdtsc_bitshift; /* use bits N:N+31 of TSC */ - u64 cpu_freq; /* to calculate ticks -> real time */ - /* System Time */ - long long system_time; /* in ns */ - unsigned long st_timestamp; /* cyclecounter at last update */ - /* Wall Clock Time */ - long tv_sec; /* essentially a struct timeval */ - long tv_usec; - long long wc_timestamp; /* system time at last update */ + + unsigned int rdtsc_bitshift; /* tsc_timestamp uses N:N+31 of TSC. */ + u64 cpu_freq; /* CPU frequency (Hz). */ + + /* + * The following values are updated periodically (and not necessarily + * atomically!). The guest OS detects this because 'time_version1' is + * incremented just before updating these values, and 'time_version2' is + * incremented immediately after. See Xenolinux code for an example of how + * to read these values safely (arch/xeno/kernel/time.c). + */ + unsigned long time_version1; /* A version number for info below. */ + unsigned long time_version2; /* A version number for info below. */ + unsigned long tsc_timestamp; /* TSC at last update of time vals. */ + u64 system_time; /* Time, in nanosecs, since boot. */ + unsigned long wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ + unsigned long wc_usec; /* Usecs 00:00:00 UTC, Jan 1, 1970. */ /* Domain Virtual Time */ - unsigned long long domain_time; + u64 domain_time; /* * Timeout values: * Allow a domain to specify a timeout value in system time and * domain virtual time. */ - unsigned long long wall_timeout; - unsigned long long domain_timeout; + u64 wall_timeout; + u64 domain_timeout; /* * The index structures are all stored here for convenience. The rings diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h index ba98b7d1f2..8923a8a56d 100644 --- a/xen/include/xeno/sched.h +++ b/xen/include/xeno/sched.h @@ -23,6 +23,8 @@ extern unsigned long volatile jiffies; extern rwlock_t tasklist_lock; +extern struct timeval xtime; + #include <xeno/spinlock.h> struct mm_struct { diff --git a/xen/include/xeno/time.h b/xen/include/xeno/time.h index c30fe56140..10cd3b7e1b 100644 --- a/xen/include/xeno/time.h +++ b/xen/include/xeno/time.h @@ -50,44 +50,21 @@ extern int init_xeno_time(); s_time_t get_s_time(void); #define NOW() ((s_time_t)get_s_time()) -#define SECONDS(_s) (((s_time_t)(_s)) * 1000000000UL ) -#define TENTHS(_ts) (((s_time_t)(_ts)) * 100000000UL ) -#define HUNDREDTHS(_hs) (((s_time_t)(_hs)) * 10000000UL ) -#define MILLISECS(_ms) (((s_time_t)(_ms)) * 1000000UL ) -#define MICROSECS(_us) (((s_time_t)(_us)) * 1000UL ) +#define SECONDS(_s) (((s_time_t)(_s)) * 1000000000ULL ) +#define MILLISECS(_ms) (((s_time_t)(_ms)) * 1000000ULL ) +#define MICROSECS(_us) (((s_time_t)(_us)) * 1000ULL ) #define Time_Max ((s_time_t) 0x7fffffffffffffffLL) #define FOREVER Time_Max -/* - * Wall Clock Time - */ +/* Wall Clock Time */ struct timeval { long tv_sec; /* seconds */ long tv_usec; /* microseconds */ }; -struct timezone { - int tz_minuteswest; /* minutes west of Greenwich */ - int tz_dsttime; /* type of dst correction */ -}; - -#ifdef __KERNEL__ -extern void do_gettimeofday(struct timeval *tv); -extern void do_settimeofday(struct timeval *tv); -extern void get_fast_time(struct timeval *tv); -extern void (*do_get_fast_time)(struct timeval *); -#endif - -/* - * Domain Virtual Time (defined in asm/time.h) - */ -/* XXX Interface for getting and setting still missing */ - - -/* update the per domain time information */ extern void update_dom_time(shared_info_t *si); - -/* XXX move this */ +extern void do_settime(unsigned long secs, unsigned long usecs, + u64 system_time_base); extern void do_timer(struct pt_regs *regs); #endif /* __XENO_TIME_H__ */ diff --git a/xen/include/xeno/tqueue.h b/xen/include/xeno/tqueue.h index 4a730f0ad9..3cf830d3f7 100644 --- a/xen/include/xeno/tqueue.h +++ b/xen/include/xeno/tqueue.h @@ -66,7 +66,7 @@ typedef struct list_head task_queue; #define DECLARE_TASK_QUEUE(q) LIST_HEAD(q) #define TQ_ACTIVE(q) (!list_empty(&q)) -extern task_queue tq_timer, tq_immediate, tq_disk; +extern task_queue tq_disk; /* * To implement your own list of active bottom halfs, use the following diff --git a/xenolinux-2.4.22-sparse/arch/xeno/kernel/time.c b/xenolinux-2.4.22-sparse/arch/xeno/kernel/time.c index 73ac82c9a4..65280df6ce 100644 --- a/xenolinux-2.4.22-sparse/arch/xeno/kernel/time.c +++ b/xenolinux-2.4.22-sparse/arch/xeno/kernel/time.c @@ -1,23 +1,13 @@ /* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- **************************************************************************** - * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2002-2003 - Keir Fraser - University of Cambridge **************************************************************************** * - * File: arch.xeno/time.c - * Author: Rolf Neugebauer - * Changes: - * - * Date: Nov 2002 + * File: arch/xeno/kernel/time.c + * Author: Rolf Neugebauer and Keir Fraser * - * Environment: XenoLinux - * Description: Interface with Hypervisor to get correct notion of time - * Currently supports Systemtime and WallClock time. - * - * (This has hardly any resemblence with the Linux code but left the - * copyright notice anyway. Ignore the comments in the copyright notice.) - **************************************************************************** - * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ - **************************************************************************** + * Description: Interface with Xen to get correct notion of time */ /* @@ -62,7 +52,9 @@ #include <asm/div64.h> #include <asm/hypervisor.h> +#include <asm/hypervisor-ifs/dom0_ops.h> +#include <linux/mc146818rtc.h> #include <linux/kernel.h> #include <linux/interrupt.h> #include <linux/time.h> @@ -70,214 +62,334 @@ #include <linux/smp.h> #include <linux/irq.h> -#undef XENO_TIME_DEBUG /* adds sanity checks and periodic printouts */ - spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; extern rwlock_t xtime_lock; +extern unsigned long wall_jiffies; unsigned long cpu_khz; /* get this from Xen, used elsewhere */ -static spinlock_t hyp_time_lock = SPIN_LOCK_UNLOCKED; static unsigned int rdtsc_bitshift; -static u32 st_scale_f; -static u32 st_scale_i; -static u32 shadow_st_pcc; -static s64 shadow_st; +static u32 st_scale_f; /* convert ticks -> usecs */ +static u32 st_scale_i; /* convert ticks -> usecs */ + +/* These are peridically updated in shared_info, and then copied here. */ +static u32 shadow_tsc_stamp; +static s64 shadow_system_time; +static u32 shadow_time_version; +static struct timeval shadow_tv; + +#ifdef CONFIG_XENO_PRIV +/* Periodically propagate synchronised time to the RTC and to Xen. */ +static long last_rtc_update, last_xen_update; +#endif +static u64 processed_system_time; + +#define HANDLE_USEC_UNDERFLOW(_tv) \ + do { \ + while ( (_tv).tv_usec < 0 ) \ + { \ + (_tv).tv_usec += 1000000; \ + (_tv).tv_sec--; \ + } \ + } while ( 0 ) +#define HANDLE_USEC_OVERFLOW(_tv) \ + do { \ + while ( (_tv).tv_usec >= 1000000 ) \ + { \ + (_tv).tv_usec -= 1000000; \ + (_tv).tv_sec++; \ + } \ + } while ( 0 ) + + +#ifdef CONFIG_XENO_PRIV /* - * System time. - * Although the rest of the Linux kernel doesn't know about this, we - * we use it to extrapolate passage of wallclock time. - * We need to read the values from the shared info page "atomically" - * and use the cycle counter value as the "version" number. Clashes - * should be very rare. + * In order to set the CMOS clock precisely, set_rtc_mmss has to be + * called 500 ms after the second nowtime has started, because when + * nowtime is written into the registers of the CMOS clock, it will + * jump to the next second precisely 500 ms later. Check the Motorola + * MC146818A or Dallas DS12887 data sheet for details. + * + * BUG: This routine does not handle hour overflow properly; it just + * sets the minutes. Usually you'll only notice that after reboot! */ -static inline s64 __get_s_time(void) +static int set_rtc_mmss(unsigned long nowtime) { - s32 delta_tsc; - u32 low; - u64 delta, tsc; + int retval = 0; + int real_seconds, real_minutes, cmos_minutes; + unsigned char save_control, save_freq_select; - rdtscll(tsc); - low = (u32)(tsc >> rdtsc_bitshift); - delta_tsc = (s32)(low - shadow_st_pcc); - if ( unlikely(delta_tsc < 0) ) delta_tsc = 0; - delta = ((u64)delta_tsc * st_scale_f); - delta >>= 32; - delta += ((u64)delta_tsc * st_scale_i); + /* gets recalled with irq locally disabled */ + spin_lock(&rtc_lock); + save_control = CMOS_READ(RTC_CONTROL); + CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); - return shadow_st + delta; -} + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); + CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); -/* - * Wallclock time. - * Based on what the hypervisor tells us, extrapolated using system time. - * Again need to read a number of values from the shared page "atomically". - * this time using a version number. - */ -static u32 shadow_wc_version=0; -static long shadow_tv_sec; -static long shadow_tv_usec; -static long long shadow_wc_timestamp; -void do_gettimeofday(struct timeval *tv) -{ - unsigned long flags; - long usec, sec; - u32 version; - u64 now, cpu_freq, scale; + cmos_minutes = CMOS_READ(RTC_MINUTES); + if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD ) + BCD_TO_BIN(cmos_minutes); - spin_lock_irqsave(&hyp_time_lock, flags); + /* + * since we're only adjusting minutes and seconds, don't interfere with + * hour overflow. This avoids messing with unknown time zones but requires + * your RTC not to be off by more than 15 minutes + */ + real_seconds = nowtime % 60; + real_minutes = nowtime / 60; + if ( ((abs(real_minutes - cmos_minutes) + 15)/30) & 1 ) + real_minutes += 30; /* correct for half hour time zone */ + real_minutes %= 60; - while ( (version = HYPERVISOR_shared_info->wc_version) != - shadow_wc_version ) + if ( abs(real_minutes - cmos_minutes) < 30 ) { - barrier(); + if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD ) + { + BIN_TO_BCD(real_seconds); + BIN_TO_BCD(real_minutes); + } + CMOS_WRITE(real_seconds,RTC_SECONDS); + CMOS_WRITE(real_minutes,RTC_MINUTES); + } + else + { + printk(KERN_WARNING + "set_rtc_mmss: can't update from %d to %d\n", + cmos_minutes, real_minutes); + retval = -1; + } - shadow_wc_version = version; - shadow_tv_sec = HYPERVISOR_shared_info->tv_sec; - shadow_tv_usec = HYPERVISOR_shared_info->tv_usec; - shadow_wc_timestamp = HYPERVISOR_shared_info->wc_timestamp; - shadow_st_pcc = HYPERVISOR_shared_info->st_timestamp; - shadow_st = HYPERVISOR_shared_info->system_time; + /* The following flags have to be released exactly in this order, + * otherwise the DS12887 (popular MC146818A clone with integrated + * battery and quartz) will not reset the oscillator and will not + * update precisely 500 ms later. You won't find this mentioned in + * the Dallas Semiconductor data sheets, but who believes data + * sheets anyway ... -- Markus Kuhn + */ + CMOS_WRITE(save_control, RTC_CONTROL); + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + spin_unlock(&rtc_lock); - rdtsc_bitshift = HYPERVISOR_shared_info->rdtsc_bitshift; - cpu_freq = HYPERVISOR_shared_info->cpu_freq; + return retval; +} +#endif - /* XXX cpu_freq as u32 limits it to 4.29 GHz. Get a better do_div! */ - scale = 1000000000LL << (32 + rdtsc_bitshift); - do_div(scale,(u32)cpu_freq); - st_scale_f = scale & 0xffffffff; - st_scale_i = scale >> 32; - barrier(); - } +/* Must be called with the xtime_lock held for writing. */ +static void get_time_values_from_xen(void) +{ + do { + shadow_time_version = HYPERVISOR_shared_info->time_version2; + rmb(); + shadow_tv.tv_sec = HYPERVISOR_shared_info->wc_sec; + shadow_tv.tv_usec = HYPERVISOR_shared_info->wc_usec; + shadow_tsc_stamp = HYPERVISOR_shared_info->tsc_timestamp; + shadow_system_time = HYPERVISOR_shared_info->system_time; + rmb(); + } + while ( shadow_time_version != HYPERVISOR_shared_info->time_version1 ); +} - now = __get_s_time(); - usec = ((unsigned long)(now-shadow_wc_timestamp))/1000; - sec = shadow_tv_sec; - usec += shadow_tv_usec; +#define TIME_VALUES_UP_TO_DATE \ + (shadow_time_version == HYPERVISOR_shared_info->time_version2) - while ( usec >= 1000000 ) - { - usec -= 1000000; - sec++; - } - tv->tv_sec = sec; - tv->tv_usec = usec; +static inline unsigned long get_time_delta_usecs(void) +{ + s32 delta_tsc; + u32 low; + u64 delta, tsc; - spin_unlock_irqrestore(&hyp_time_lock, flags); + rdtscll(tsc); + low = (u32)(tsc >> rdtsc_bitshift); + delta_tsc = (s32)(low - shadow_tsc_stamp); + if ( unlikely(delta_tsc < 0) ) delta_tsc = 0; + delta = ((u64)delta_tsc * st_scale_f); + delta >>= 32; + delta += ((u64)delta_tsc * st_scale_i); -#ifdef XENO_TIME_DEBUG - { - static long long old_now=0; - static long long wct=0, old_wct=0; - - /* This debug code checks if time increase over two subsequent calls */ - wct=(((long long)sec) * 1000000) + usec; - /* wall clock time going backwards */ - if ((wct < old_wct) ) { - printk("Urgh1: wc diff=%6ld, usec = %ld (0x%lX)\n", - (long)(wct-old_wct), usec, usec); - printk(" st diff=%lld cur st=0x%016llX old st=0x%016llX\n", - now-old_now, now, old_now); - } + return (unsigned long)delta; +} - /* system time going backwards */ - if (now<=old_now) { - printk("Urgh2: st diff=%lld cur st=0x%016llX old st=0x%016llX\n", - now-old_now, now, old_now); - } - old_wct = wct; - old_now = now; + +void do_gettimeofday(struct timeval *tv) +{ + unsigned long flags, lost; + struct timeval _tv; + + again: + read_lock_irqsave(&xtime_lock, flags); + _tv.tv_usec = get_time_delta_usecs(); + if ( (lost = (jiffies - wall_jiffies)) != 0 ) + _tv.tv_usec += lost * (1000000 / HZ); + _tv.tv_sec = xtime.tv_sec; + _tv.tv_usec += xtime.tv_usec; + if ( unlikely(!TIME_VALUES_UP_TO_DATE) ) + { + /* + * We may have blocked for a long time, rendering our calculations + * invalid (e.g. the time delta may have overflowed). Detect that + * and recalculate with fresh values. + */ + read_unlock_irqrestore(&xtime_lock, flags); + write_lock_irqsave(&xtime_lock, flags); + get_time_values_from_xen(); + write_unlock_irqrestore(&xtime_lock, flags); + goto again; } -#endif + read_unlock_irqrestore(&xtime_lock, flags); + + HANDLE_USEC_OVERFLOW(_tv); + + *tv = _tv; } void do_settimeofday(struct timeval *tv) { -/* XXX RN: should do something special here for dom0 */ -#if 0 +#ifdef CONFIG_XENO_PRIV + struct timeval newtv; + dom0_op_t op; + + if ( start_info.dom_id != 0 ) + return; + write_lock_irq(&xtime_lock); + /* - * This is revolting. We need to set "xtime" correctly. However, the - * value in this location is the value at the most recent update of - * wall time. Discover what correction gettimeofday() would have - * made, and then undo it! + * Ensure we don't get blocked for a long time so that our time delta + * overflows. If that were to happen then our shadow time values would + * be stale, so we can retry with fresh ones. */ - tv->tv_usec -= do_gettimeoffset(); - tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ); - - while ( tv->tv_usec < 0 ) + again: + tv->tv_usec -= get_time_delta_usecs(); + if ( unlikely(!TIME_VALUES_UP_TO_DATE) ) { - tv->tv_usec += 1000000; - tv->tv_sec--; + get_time_values_from_xen(); + goto again; } + + HANDLE_USEC_UNDERFLOW(*tv); + + newtv = *tv; + + tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ); + HANDLE_USEC_UNDERFLOW(*tv); xtime = *tv; time_adjust = 0; /* stop active adjtime() */ time_status |= STA_UNSYNC; time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; + + last_rtc_update = last_xen_update = 0; + + op.cmd = DOM0_SETTIME; + op.u.settime.secs = newtv.tv_sec; + op.u.settime.usecs = newtv.tv_usec; + op.u.settime.system_time = shadow_system_time; + write_unlock_irq(&xtime_lock); + + HYPERVISOR_dom0_op(&op); #endif } +asmlinkage long sys_stime(int *tptr) +{ + int value; + struct timeval tv; -/* - * Timer ISR. - * Unlike normal Linux these don't come in at a fixed rate of HZ. - * In here we wrok out how often it should have been called and then call - * the architecture independent part (do_timer()) the appropriate number of - * times. A bit of a nasty hack, to keep the "other" notion of wallclock time - * happy. - */ -static long long us_per_tick=1000000/HZ; -static long long last_irq; + if ( !capable(CAP_SYS_TIME) ) + return -EPERM; + + if ( get_user(value, tptr) ) + return -EFAULT; + + tv.tv_sec = value; + tv.tv_usec = 0; + + do_settimeofday(&tv); + + return 0; +} + +#define NS_PER_TICK (1000000000ULL/HZ) static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { - struct timeval tv; - long long time, delta; + s64 delta; - /* - * The next bit really sucks: - * Linux not only uses do_gettimeofday() to keep a notion of - * wallclock time, but also maintains the xtime struct and jiffies. - * (Even worse some userland code accesses this via the sys_time() - * system call) - * Unfortunately, xtime is maintain in the architecture independent - * part of the timer ISR (./kernel/timer.c sic!). So, although we have - * perfectly valid notion of wallclock time from the hypervisor we here - * fake missed timer interrupts so that the arch independent part of - * the Timer ISR updates jiffies for us *and* once the bh gets run - * updates xtime accordingly. Yuck! - */ + get_time_values_from_xen(); - /* Work out the number of jiffy intervals passed and update them. */ - do_gettimeofday(&tv); - time = (((long long)tv.tv_sec) * 1000000) + tv.tv_usec; - delta = time - last_irq; - if (delta <= 0) { - printk ("Timer ISR: Time went backwards: %lld\n", delta); + if ( (delta = (s64)(shadow_system_time - processed_system_time)) < 0 ) + { + printk("Timer ISR: Time went backwards: %lld\n", delta); return; } - while (delta >= us_per_tick) { + + while ( delta >= NS_PER_TICK ) + { do_timer(regs); - delta -= us_per_tick; - last_irq += us_per_tick; + delta -= NS_PER_TICK; + processed_system_time += NS_PER_TICK; + } + + if ( (time_status & STA_UNSYNC) != 0 ) + { + /* Adjust shadow timeval for jiffies that haven't updated xtime yet. */ + shadow_tv.tv_usec -= (jiffies - wall_jiffies) * (1000000/HZ); + HANDLE_USEC_UNDERFLOW(shadow_tv); + + /* Update our unsynchronised xtime appropriately. */ + xtime = shadow_tv; } -#if 0 - if (!user_mode(regs)) - x86_do_profile(regs->eip); +#ifdef CONFIG_XENO_PRIV + if ( (start_info.dom_id == 0) && ((time_status & STA_UNSYNC) == 0) ) + { + /* Send synchronised time to Xen approximately every minute. */ + if ( xtime.tv_sec > (last_xen_update + 60) ) + { + dom0_op_t op; + struct timeval tv = xtime; + + tv.tv_usec += (jiffies - wall_jiffies) * (1000000/HZ); + HANDLE_USEC_OVERFLOW(tv); + + op.cmd = DOM0_SETTIME; + op.u.settime.secs = tv.tv_sec; + op.u.settime.usecs = tv.tv_usec; + op.u.settime.system_time = shadow_system_time; + HYPERVISOR_dom0_op(&op); + + last_xen_update = xtime.tv_sec; + } + + /* + * If we have an externally synchronized Linux clock, then update CMOS + * clock accordingly every ~11 minutes. Set_rtc_mmss() has to be called + * as close as possible to 500 ms before the new second starts. + */ + if ( (xtime.tv_sec > (last_rtc_update + 660)) && + (xtime.tv_usec >= (500000 - ((unsigned) tick) / 2)) && + (xtime.tv_usec <= (500000 + ((unsigned) tick) / 2)) ) + { + if ( set_rtc_mmss(xtime.tv_sec) == 0 ) + last_rtc_update = xtime.tv_sec; + else + last_rtc_update = xtime.tv_sec - 600; + } + } #endif } static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { write_lock(&xtime_lock); - do_timer_interrupt(irq, NULL, regs); + while ( !TIME_VALUES_UP_TO_DATE ) + do_timer_interrupt(irq, NULL, regs); write_unlock(&xtime_lock); } @@ -293,7 +405,7 @@ static struct irqaction irq_timer = { void __init time_init(void) { unsigned long long alarm; - u64 __cpu_khz; + u64 __cpu_khz, cpu_freq, scale, scale2; __cpu_khz = HYPERVISOR_shared_info->cpu_freq; do_div(__cpu_khz, 1000); @@ -301,23 +413,29 @@ void __init time_init(void) printk("Xen reported: %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); - do_gettimeofday(&xtime); - last_irq = (((long long)xtime.tv_sec) * 1000000) + xtime.tv_usec; + xtime.tv_sec = HYPERVISOR_shared_info->wc_sec; + xtime.tv_usec = HYPERVISOR_shared_info->wc_usec; + processed_system_time = shadow_system_time; - setup_irq(TIMER_IRQ, &irq_timer); + rdtsc_bitshift = HYPERVISOR_shared_info->rdtsc_bitshift; + cpu_freq = HYPERVISOR_shared_info->cpu_freq; - /* - * Start ticker. Note that timing runs of wall clock, not virtual 'domain' - * time. This means that clock sshould run at the correct rate. For things - * like scheduling, it's not clear whether it matters which sort of time - * we use. XXX RN: unimplemented. - */ + scale = 1000000LL << (32 + rdtsc_bitshift); + do_div(scale, (u32)cpu_freq); + + if ( (cpu_freq >> 32) != 0 ) + { + scale2 = 1000000LL << rdtsc_bitshift; + do_div(scale2, (u32)(cpu_freq>>32)); + scale += scale2; + } + + st_scale_f = scale & 0xffffffff; + st_scale_i = scale >> 32; + + setup_irq(TIMER_IRQ, &irq_timer); rdtscll(alarm); -#if 0 - alarm += (1000/HZ)*HYPERVISOR_shared_info->ticks_per_ms; - HYPERVISOR_shared_info->wall_timeout = alarm; - HYPERVISOR_shared_info->domain_timeout = ~0ULL; -#endif + clear_bit(_EVENT_TIMER, &HYPERVISOR_shared_info->events); } diff --git a/xenolinux-2.4.22-sparse/kernel/time.c b/xenolinux-2.4.22-sparse/kernel/time.c new file mode 100644 index 0000000000..fe6ecde6d8 --- /dev/null +++ b/xenolinux-2.4.22-sparse/kernel/time.c @@ -0,0 +1,415 @@ +/* + * linux/kernel/time.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * This file contains the interface functions for the various + * time related system calls: time, stime, gettimeofday, settimeofday, + * adjtime + */ +/* + * Modification history kernel/time.c + * + * 1993-09-02 Philip Gladstone + * Created file with time related functions from sched.c and adjtimex() + * 1993-10-08 Torsten Duwe + * adjtime interface update and CMOS clock write code + * 1995-08-13 Torsten Duwe + * kernel PLL updated to 1994-12-13 specs (rfc-1589) + * 1999-01-16 Ulrich Windl + * Introduced error checking for many cases in adjtimex(). + * Updated NTP code according to technical memorandum Jan '96 + * "A Kernel Model for Precision Timekeeping" by Dave Mills + * Allow time_constant larger than MAXTC(6) for NTP v4 (MAXTC == 10) + * (Even though the technical memorandum forbids it) + */ + +#include <linux/mm.h> +#include <linux/timex.h> +#include <linux/smp_lock.h> + +#include <asm/uaccess.h> + +/* + * The timezone where the local system is located. Used as a default by some + * programs who obtain this value by using gettimeofday. + */ +struct timezone sys_tz; + +/* The xtime_lock is not only serializing the xtime read/writes but it's also + serializing all accesses to the global NTP variables now. */ +extern rwlock_t xtime_lock; + +#if !defined(__alpha__) && !defined(__ia64__) + +/* + * sys_time() can be implemented in user-level using + * sys_gettimeofday(). Is this for backwards compatibility? If so, + * why not move it into the appropriate arch directory (for those + * architectures that need it). + * + * XXX This function is NOT 64-bit clean! + */ +asmlinkage long sys_time(int * tloc) +{ + struct timeval now; + int i; + + do_gettimeofday(&now); + i = now.tv_sec; + if (tloc) { + if (put_user(i,tloc)) + i = -EFAULT; + } + return i; +} + +#if !defined(CONFIG_XENO) + +/* + * sys_stime() can be implemented in user-level using + * sys_settimeofday(). Is this for backwards compatibility? If so, + * why not move it into the appropriate arch directory (for those + * architectures that need it). + */ + +asmlinkage long sys_stime(int * tptr) +{ + int value; + + if (!capable(CAP_SYS_TIME)) + return -EPERM; + if (get_user(value, tptr)) + return -EFAULT; + write_lock_irq(&xtime_lock); + vxtime_lock(); + xtime.tv_sec = value; + xtime.tv_usec = 0; + vxtime_unlock(); + time_adjust = 0; /* stop active adjtime() */ + time_status |= STA_UNSYNC; + time_maxerror = NTP_PHASE_LIMIT; + time_esterror = NTP_PHASE_LIMIT; + write_unlock_irq(&xtime_lock); + return 0; +} + +#endif + +#endif + +asmlinkage long sys_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + if (tv) { + struct timeval ktv; + do_gettimeofday(&ktv); + if (copy_to_user(tv, &ktv, sizeof(ktv))) + return -EFAULT; + } + if (tz) { + if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) + return -EFAULT; + } + return 0; +} + +/* + * Adjust the time obtained from the CMOS to be UTC time instead of + * local time. + * + * This is ugly, but preferable to the alternatives. Otherwise we + * would either need to write a program to do it in /etc/rc (and risk + * confusion if the program gets run more than once; it would also be + * hard to make the program warp the clock precisely n hours) or + * compile in the timezone information into the kernel. Bad, bad.... + * + * - TYT, 1992-01-01 + * + * The best thing to do is to keep the CMOS clock in universal time (UTC) + * as real UNIX machines always do it. This avoids all headaches about + * daylight saving times and warping kernel clocks. + */ +inline static void warp_clock(void) +{ + write_lock_irq(&xtime_lock); + vxtime_lock(); + xtime.tv_sec += sys_tz.tz_minuteswest * 60; + vxtime_unlock(); + write_unlock_irq(&xtime_lock); +} + +/* + * In case for some reason the CMOS clock has not already been running + * in UTC, but in some local time: The first time we set the timezone, + * we will warp the clock so that it is ticking UTC time instead of + * local time. Presumably, if someone is setting the timezone then we + * are running in an environment where the programs understand about + * timezones. This should be done at boot time in the /etc/rc script, + * as soon as possible, so that the clock can be set right. Otherwise, + * various programs will get confused when the clock gets warped. + */ + +int do_sys_settimeofday(struct timeval *tv, struct timezone *tz) +{ + static int firsttime = 1; + + if (!capable(CAP_SYS_TIME)) + return -EPERM; + + if (tz) { + /* SMP safe, global irq locking makes it work. */ + sys_tz = *tz; + if (firsttime) { + firsttime = 0; + if (!tv) + warp_clock(); + } + } + if (tv) + { + /* SMP safe, again the code in arch/foo/time.c should + * globally block out interrupts when it runs. + */ + do_settimeofday(tv); + } + return 0; +} + +asmlinkage long sys_settimeofday(struct timeval *tv, struct timezone *tz) +{ + struct timeval new_tv; + struct timezone new_tz; + + if (tv) { + if (copy_from_user(&new_tv, tv, sizeof(*tv))) + return -EFAULT; + } + if (tz) { + if (copy_from_user(&new_tz, tz, sizeof(*tz))) + return -EFAULT; + } + + return do_sys_settimeofday(tv ? &new_tv : NULL, tz ? &new_tz : NULL); +} + +long pps_offset; /* pps time offset (us) */ +long pps_jitter = MAXTIME; /* time dispersion (jitter) (us) */ + +long pps_freq; /* frequency offset (scaled ppm) */ +long pps_stabil = MAXFREQ; /* frequency dispersion (scaled ppm) */ + +long pps_valid = PPS_VALID; /* pps signal watchdog counter */ + +int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */ + +long pps_jitcnt; /* jitter limit exceeded */ +long pps_calcnt; /* calibration intervals */ +long pps_errcnt; /* calibration errors */ +long pps_stbcnt; /* stability limit exceeded */ + +/* hook for a loadable hardpps kernel module */ +void (*hardpps_ptr)(struct timeval *); + +/* adjtimex mainly allows reading (and writing, if superuser) of + * kernel time-keeping variables. used by xntpd. + */ +int do_adjtimex(struct timex *txc) +{ + long ltemp, mtemp, save_adjust; + int result; + + /* In order to modify anything, you gotta be super-user! */ + if (txc->modes && !capable(CAP_SYS_TIME)) + return -EPERM; + + /* Now we validate the data before disabling interrupts */ + + if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) + /* singleshot must not be used with any other mode bits */ + if (txc->modes != ADJ_OFFSET_SINGLESHOT) + return -EINVAL; + + if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET)) + /* adjustment Offset limited to +- .512 seconds */ + if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE ) + return -EINVAL; + + /* if the quartz is off by more than 10% something is VERY wrong ! */ + if (txc->modes & ADJ_TICK) + if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ) + return -EINVAL; + + write_lock_irq(&xtime_lock); + result = time_state; /* mostly `TIME_OK' */ + + /* Save for later - semantics of adjtime is to return old value */ + save_adjust = time_adjust; + +#if 0 /* STA_CLOCKERR is never set yet */ + time_status &= ~STA_CLOCKERR; /* reset STA_CLOCKERR */ +#endif + /* If there are input parameters, then process them */ + if (txc->modes) + { + if (txc->modes & ADJ_STATUS) /* only set allowed bits */ + time_status = (txc->status & ~STA_RONLY) | + (time_status & STA_RONLY); + + if (txc->modes & ADJ_FREQUENCY) { /* p. 22 */ + if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) { + result = -EINVAL; + goto leave; + } + time_freq = txc->freq - pps_freq; + } + + if (txc->modes & ADJ_MAXERROR) { + if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) { + result = -EINVAL; + goto leave; + } + time_maxerror = txc->maxerror; + } + + if (txc->modes & ADJ_ESTERROR) { + if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) { + result = -EINVAL; + goto leave; + } + time_esterror = txc->esterror; + } + + if (txc->modes & ADJ_TIMECONST) { /* p. 24 */ + if (txc->constant < 0) { /* NTP v4 uses values > 6 */ + result = -EINVAL; + goto leave; + } + time_constant = txc->constant; + } + + if (txc->modes & ADJ_OFFSET) { /* values checked earlier */ + if (txc->modes == ADJ_OFFSET_SINGLESHOT) { + /* adjtime() is independent from ntp_adjtime() */ + time_adjust = txc->offset; + } + else if ( time_status & (STA_PLL | STA_PPSTIME) ) { + ltemp = (time_status & (STA_PPSTIME | STA_PPSSIGNAL)) == + (STA_PPSTIME | STA_PPSSIGNAL) ? + pps_offset : txc->offset; + + /* + * Scale the phase adjustment and + * clamp to the operating range. + */ + if (ltemp > MAXPHASE) + time_offset = MAXPHASE << SHIFT_UPDATE; + else if (ltemp < -MAXPHASE) + time_offset = -(MAXPHASE << SHIFT_UPDATE); + else + time_offset = ltemp << SHIFT_UPDATE; + + /* + * Select whether the frequency is to be controlled + * and in which mode (PLL or FLL). Clamp to the operating + * range. Ugly multiply/divide should be replaced someday. + */ + + if (time_status & STA_FREQHOLD || time_reftime == 0) + time_reftime = xtime.tv_sec; + mtemp = xtime.tv_sec - time_reftime; + time_reftime = xtime.tv_sec; + if (time_status & STA_FLL) { + if (mtemp >= MINSEC) { + ltemp = (time_offset / mtemp) << (SHIFT_USEC - + SHIFT_UPDATE); + if (ltemp < 0) + time_freq -= -ltemp >> SHIFT_KH; + else + time_freq += ltemp >> SHIFT_KH; + } else /* calibration interval too short (p. 12) */ + result = TIME_ERROR; + } else { /* PLL mode */ + if (mtemp < MAXSEC) { + ltemp *= mtemp; + if (ltemp < 0) + time_freq -= -ltemp >> (time_constant + + time_constant + + SHIFT_KF - SHIFT_USEC); + else + time_freq += ltemp >> (time_constant + + time_constant + + SHIFT_KF - SHIFT_USEC); + } else /* calibration interval too long (p. 12) */ + result = TIME_ERROR; + } + if (time_freq > time_tolerance) + time_freq = time_tolerance; + else if (time_freq < -time_tolerance) + time_freq = -time_tolerance; + } /* STA_PLL || STA_PPSTIME */ + } /* txc->modes & ADJ_OFFSET */ + if (txc->modes & ADJ_TICK) { + /* if the quartz is off by more than 10% something is + VERY wrong ! */ + if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ) { + result = -EINVAL; + goto leave; + } + tick = txc->tick; + } + } /* txc->modes */ +leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0 + || ((time_status & (STA_PPSFREQ|STA_PPSTIME)) != 0 + && (time_status & STA_PPSSIGNAL) == 0) + /* p. 24, (b) */ + || ((time_status & (STA_PPSTIME|STA_PPSJITTER)) + == (STA_PPSTIME|STA_PPSJITTER)) + /* p. 24, (c) */ + || ((time_status & STA_PPSFREQ) != 0 + && (time_status & (STA_PPSWANDER|STA_PPSERROR)) != 0)) + /* p. 24, (d) */ + result = TIME_ERROR; + + if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) + txc->offset = save_adjust; + else { + if (time_offset < 0) + txc->offset = -(-time_offset >> SHIFT_UPDATE); + else + txc->offset = time_offset >> SHIFT_UPDATE; + } + txc->freq = time_freq + pps_freq; + txc->maxerror = time_maxerror; + txc->esterror = time_esterror; + txc->status = time_status; + txc->constant = time_constant; + txc->precision = time_precision; + txc->tolerance = time_tolerance; + txc->tick = tick; + txc->ppsfreq = pps_freq; + txc->jitter = pps_jitter >> PPS_AVG; + txc->shift = pps_shift; + txc->stabil = pps_stabil; + txc->jitcnt = pps_jitcnt; + txc->calcnt = pps_calcnt; + txc->errcnt = pps_errcnt; + txc->stbcnt = pps_stbcnt; + write_unlock_irq(&xtime_lock); + do_gettimeofday(&txc->time); + return(result); +} + +asmlinkage long sys_adjtimex(struct timex *txc_p) +{ + struct timex txc; /* Local copy of parameter */ + int ret; + + /* Copy the user data space into the kernel copy + * structure. But bear in mind that the structures + * may change + */ + if(copy_from_user(&txc, txc_p, sizeof(struct timex))) + return -EFAULT; + ret = do_adjtimex(&txc); + return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; +} |