aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>2003-10-27 16:44:00 +0000
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>2003-10-27 16:44:00 +0000
commite313ab9d890ddc015a53011071ada962b506031f (patch)
tree200b0bd83d6594a9b2e85f0b1b641d8f59a6df65
parent51a83ab036e57d360a7681e70eaf30179f51a5dd (diff)
downloadxen-e313ab9d890ddc015a53011071ada962b506031f.tar.gz
xen-e313ab9d890ddc015a53011071ada962b506031f.tar.bz2
xen-e313ab9d890ddc015a53011071ada962b506031f.zip
bitkeeper revision 1.530 (3f9d4b50eBCuXVk_ImRaxIb9MV1j3g)
time.c: new file Many files: Major fixes to time in Xen and Xenolinux. DOM0 can now set the wallclock time, perhaps using ntpd, for all other domains.
-rw-r--r--.rootkeys1
-rw-r--r--tools/internal/xi_build.c1
-rw-r--r--tools/internal/xi_create.c1
-rw-r--r--tools/internal/xi_destroy.c1
-rw-r--r--tools/internal/xi_list.c1
-rw-r--r--tools/internal/xi_sched_domain.c1
-rw-r--r--tools/internal/xi_sched_global.c1
-rw-r--r--tools/internal/xi_start.c1
-rw-r--r--tools/internal/xi_stop.c2
-rw-r--r--tools/internal/xi_usage.c1
-rw-r--r--tools/internal/xi_vif_params.c1
-rw-r--r--xen/arch/i386/apic.c30
-rw-r--r--xen/arch/i386/i8259.c11
-rw-r--r--xen/arch/i386/setup.c16
-rw-r--r--xen/arch/i386/time.c274
-rw-r--r--xen/common/dom0_ops.c50
-rw-r--r--xen/common/kernel.c1
-rw-r--r--xen/common/schedule.c2
-rw-r--r--xen/common/timer.c570
-rw-r--r--xen/include/hypervisor-ifs/dom0_ops.h15
-rw-r--r--xen/include/hypervisor-ifs/hypervisor-if.h37
-rw-r--r--xen/include/xeno/sched.h2
-rw-r--r--xen/include/xeno/time.h35
-rw-r--r--xen/include/xeno/tqueue.h2
-rw-r--r--xenolinux-2.4.22-sparse/arch/xeno/kernel/time.c468
-rw-r--r--xenolinux-2.4.22-sparse/kernel/time.c415
26 files changed, 1040 insertions, 900 deletions
diff --git a/.rootkeys b/.rootkeys
index 1d3d6cd413..269d0ef9d3 100644
--- a/.rootkeys
+++ b/.rootkeys
@@ -639,6 +639,7 @@
3e5a4e68W_hpMlM3u_-QOKMp3gzcwQ xenolinux-2.4.22-sparse/init/do_mounts.c
3e5a4e68TJJavrunYwTAnLRSBxSYqQ xenolinux-2.4.22-sparse/kernel/panic.c
3f1056a9LXNTgSzITNh1mb-MIKV1Ng xenolinux-2.4.22-sparse/kernel/printk.c
+3f9d4b44247udoqWEgFkaHiWv6Uvyg xenolinux-2.4.22-sparse/kernel/time.c
3eba8f878XjouY21EkQBXwYBsPsipQ xenolinux-2.4.22-sparse/lndir-rel
3e6e7c1efbQe93xCvOpOVCnXTMmQ5w xenolinux-2.4.22-sparse/mkbuildtree
3e5a4e68GxCIaFH4sy01v1wjapetaA xenolinux-2.4.22-sparse/mm/memory.c
diff --git a/tools/internal/xi_build.c b/tools/internal/xi_build.c
index 1ba36a1ef4..8f2cc93136 100644
--- a/tools/internal/xi_build.c
+++ b/tools/internal/xi_build.c
@@ -1,5 +1,4 @@
-#include "hypervisor-ifs/dom0_ops.h"
#include "dom0_defs.h"
#include "mem_defs.h"
diff --git a/tools/internal/xi_create.c b/tools/internal/xi_create.c
index d003979731..b5e718caa9 100644
--- a/tools/internal/xi_create.c
+++ b/tools/internal/xi_create.c
@@ -4,7 +4,6 @@
* Usage: <executable> <mem_kb> <os image> <num_vifs>
*/
-#include <hypervisor-ifs/dom0_ops.h>
#include "dom0_defs.h"
#include "mem_defs.h"
diff --git a/tools/internal/xi_destroy.c b/tools/internal/xi_destroy.c
index b3a975e73f..bd91c7941a 100644
--- a/tools/internal/xi_destroy.c
+++ b/tools/internal/xi_destroy.c
@@ -1,5 +1,4 @@
-#include "hypervisor-ifs/dom0_ops.h"
#include "dom0_defs.h"
#include "mem_defs.h"
diff --git a/tools/internal/xi_list.c b/tools/internal/xi_list.c
index ad6b5d3ccc..fc8f4ed73c 100644
--- a/tools/internal/xi_list.c
+++ b/tools/internal/xi_list.c
@@ -21,7 +21,6 @@
*/
#define SILENT_ERRORS_FROM_XEN
-#include "hypervisor-ifs/dom0_ops.h"
#include "dom0_defs.h"
#include "mem_defs.h"
diff --git a/tools/internal/xi_sched_domain.c b/tools/internal/xi_sched_domain.c
index 1408be422b..424bc6d9c4 100644
--- a/tools/internal/xi_sched_domain.c
+++ b/tools/internal/xi_sched_domain.c
@@ -1,5 +1,4 @@
-#include "hypervisor-ifs/dom0_ops.h"
#include "dom0_defs.h"
#include "mem_defs.h"
diff --git a/tools/internal/xi_sched_global.c b/tools/internal/xi_sched_global.c
index 0c07455b9c..16e9d72d89 100644
--- a/tools/internal/xi_sched_global.c
+++ b/tools/internal/xi_sched_global.c
@@ -1,5 +1,4 @@
-#include "hypervisor-ifs/dom0_ops.h"
#include "dom0_defs.h"
#include "mem_defs.h"
diff --git a/tools/internal/xi_start.c b/tools/internal/xi_start.c
index e04fc8b502..23b95ea742 100644
--- a/tools/internal/xi_start.c
+++ b/tools/internal/xi_start.c
@@ -1,5 +1,4 @@
-#include "hypervisor-ifs/dom0_ops.h"
#include "dom0_defs.h"
#include "mem_defs.h"
diff --git a/tools/internal/xi_stop.c b/tools/internal/xi_stop.c
index 1c10fe4eb8..8e5677edca 100644
--- a/tools/internal/xi_stop.c
+++ b/tools/internal/xi_stop.c
@@ -1,4 +1,4 @@
-#include "hypervisor-ifs/dom0_ops.h"
+
#include "dom0_defs.h"
#include "mem_defs.h"
diff --git a/tools/internal/xi_usage.c b/tools/internal/xi_usage.c
index 3cd61431e5..59d4f7648a 100644
--- a/tools/internal/xi_usage.c
+++ b/tools/internal/xi_usage.c
@@ -1,5 +1,4 @@
-#include "hypervisor-ifs/dom0_ops.h"
#include "dom0_defs.h"
#include "mem_defs.h"
diff --git a/tools/internal/xi_vif_params.c b/tools/internal/xi_vif_params.c
index e954253f04..a525c68058 100644
--- a/tools/internal/xi_vif_params.c
+++ b/tools/internal/xi_vif_params.c
@@ -1,5 +1,4 @@
-#include "hypervisor-ifs/dom0_ops.h"
#include "dom0_defs.h"
#include "mem_defs.h"
diff --git a/xen/arch/i386/apic.c b/xen/arch/i386/apic.c
index f2ea5bd4a0..0f4b9a3538 100644
--- a/xen/arch/i386/apic.c
+++ b/xen/arch/i386/apic.c
@@ -713,36 +713,10 @@ int reprogram_ac_timer(s_time_t timeout)
/*
* Local timer interrupt handler.
- * here the programmable, accurate timers are executed.
- * If we are on CPU0 and we should have updated jiffies, we do this
- * as well and and deal with traditional linux timers. Note, that of
- * the timer APIC on CPU does not go off every 10ms or so the linux
- * timers loose accuracy, but that shouldn't be a problem.
+ * Here the programmable, accurate timers are executed.
*/
-static s_time_t last_cpu0_tirq = 0;
-inline void smp_local_timer_interrupt(struct pt_regs * regs)
+inline void smp_local_timer_interrupt(struct pt_regs *regs)
{
- int cpu = smp_processor_id();
- s_time_t diff, now;
-
- /* if CPU 0 do old timer stuff */
- if (cpu == 0)
- {
- now = NOW();
- diff = now - last_cpu0_tirq;
-
- if (diff <= 0) {
- printk ("System Time went backwards: %lld\n", diff);
- return;
- }
-
- while (diff >= MILLISECS(10)) {
- do_timer(regs);
- diff -= MILLISECS(10);
- last_cpu0_tirq += MILLISECS(10);
- }
- }
- /* call accurate timer function */
do_ac_timer();
}
diff --git a/xen/arch/i386/i8259.c b/xen/arch/i386/i8259.c
index 645b7b0fef..53d83e75fe 100644
--- a/xen/arch/i386/i8259.c
+++ b/xen/arch/i386/i8259.c
@@ -468,14 +468,3 @@ void __init init_IRQ(void)
setup_irq(2, &irq2);
}
-/*
- * we only need the timer interrupt for callibrating the tsc<->time<->bus cycle
- * mappings. After this all timeing related functions should be run of the
- * APIC timers. This function allows us to disable the
- */
-void __init disable_pit(void)
-{
- printk("Disable PIT. Not needed anymore\n");
- /* This is not the most elegant way, but hey. */
- disable_irq(0);
-}
diff --git a/xen/arch/i386/setup.c b/xen/arch/i386/setup.c
index cc74655463..90d3b8fca0 100644
--- a/xen/arch/i386/setup.c
+++ b/xen/arch/i386/setup.c
@@ -310,10 +310,7 @@ void __init start_of_day(void)
extern void time_init(void);
extern void softirq_init(void);
extern void timer_bh(void);
- extern void tqueue_bh(void);
- extern void immediate_bh(void);
extern void init_timervecs(void);
- extern void disable_pit(void);
extern void ac_timer_init(void);
extern int setup_network_devices(void);
extern void net_init(void);
@@ -366,8 +363,6 @@ void __init start_of_day(void)
softirq_init();
init_timervecs();
init_bh(TIMER_BH, timer_bh);
- init_bh(TQUEUE_BH, tqueue_bh);
- init_bh(IMMEDIATE_BH, immediate_bh);
init_apic_mappings(); /* make APICs addressable in our pagetables. */
#ifndef CONFIG_SMP
@@ -384,14 +379,15 @@ void __init start_of_day(void)
* fall thru to 8259A if we have to (but slower).
*/
#endif
+
initialize_keytable(); /* call back handling for key codes */
- if ( cpu_has_apic )
- disable_pit();
- else if ( smp_num_cpus != 1 )
- panic("We really need local APICs on SMP machines!");
- else
+ if ( !cpu_has_apic )
+ {
do_timer_lists_from_pit = 1;
+ if ( smp_num_cpus != 1 )
+ panic("We need local APICs on SMP machines!");
+ }
ac_timer_init(); /* init accurate timers */
init_xeno_time(); /* initialise the time */
diff --git a/xen/arch/i386/time.c b/xen/arch/i386/time.c
index ae7ecae1a2..395b15b68f 100644
--- a/xen/arch/i386/time.c
+++ b/xen/arch/i386/time.c
@@ -43,8 +43,10 @@
#define TRC(_x)
#endif
-/* GLOBALS */
+extern rwlock_t xtime_lock;
+extern unsigned long wall_jiffies;
+/* GLOBAL */
unsigned long cpu_khz; /* Detected as we calibrate the TSC */
unsigned long ticks_per_usec; /* TSC ticks per microsecond. */
spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
@@ -52,39 +54,22 @@ int timer_ack = 0;
int do_timer_lists_from_pit = 0;
/* PRIVATE */
-
static unsigned int rdtsc_bitshift; /* Which 32 bits of TSC do we use? */
-static unsigned long init_cmos_time; /* RTC time when system time == 0 */
-static u64 cpu_freqs[3]; /* Slow/correct/fast CPU frequencies */
-static u64 cpu_freq; /* Currently-selected CPU frequency */
+static u64 cpu_freq; /* CPU frequency (Hz) */
static u32 st_scale_f; /* Cycles -> ns, fractional part */
static u32 st_scale_i; /* Cycles -> ns, integer part */
-static struct ac_timer update_timer; /* Periodic 'time update' function */
-static spinlock_t stime_lock; /* Lock for accessing sys & wc time */
-struct timeval wall_clock_time; /* WC time at last 'time update' */
static u32 tsc_irq; /* CPU0's TSC at last 'time update' */
static s_time_t stime_irq; /* System time at last 'time update' */
-/*
- * The scale update period is not a whole number of seconds since we want to
- * avoid being in sync with the CMOS update-in-progress flag.
- */
-#define SCALE_UPDATE_PERIOD MILLISECS(50200)
-#define TIME_UPDATE_PERIOD MILLISECS(200)
+static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ u64 full_tsc;
+ write_lock(&xtime_lock);
-static inline void do_timer_interrupt(
- int irq, void *dev_id, struct pt_regs *regs)
-{
#ifdef CONFIG_X86_IO_APIC
if ( timer_ack )
{
- /*
- * Subtle, when I/O APICs are used we have to ack timer IRQ manually
- * to reset the IRR bit for do_slow_gettimeoffset(). This will also
- * deassert NMI lines for the watchdog if run on an 82489DX-based
- * system.
- */
extern spinlock_t i8259A_lock;
spin_lock(&i8259A_lock);
outb(0x0c, 0x20);
@@ -93,22 +78,27 @@ static inline void do_timer_interrupt(
spin_unlock(&i8259A_lock);
}
#endif
+
+ /*
+ * Updates TSC timestamp (used to interpolate passage of time between
+ * interrupts).
+ */
+ rdtscll(full_tsc);
+ tsc_irq = (u32)(full_tsc >> rdtsc_bitshift);
+ /* Updates xtime (wallclock time). */
do_timer(regs);
+ /* Updates system time (nanoseconds since boot). */
+ stime_irq += MILLISECS(1000/HZ);
+
+ write_unlock(&xtime_lock);
+
+ /* Rough hack to allow accurate timers to sort-of-work with no APIC. */
if ( do_timer_lists_from_pit )
do_ac_timer();
}
-/*
- * This is only temporarily. Once the APIC s up and running this
- * timer interrupt is turned off.
- */
-static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
- do_timer_interrupt(irq, NULL, regs);
-}
-
static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0,
"timer", NULL, NULL};
@@ -222,27 +212,6 @@ static unsigned long __get_cmos_time(void)
return mktime(year, mon, day, hour, min, sec);
}
-/* This version is fast: it bails if there's an update in progress. */
-static unsigned long maybe_get_cmos_time(void)
-{
- unsigned long ct, retval = 0, flags;
-
- spin_lock_irqsave(&rtc_lock, flags);
-
- if ( (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) )
- goto out;
-
- ct = __get_cmos_time();
-
- if ( !(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) )
- retval = ct;
-
- out:
- spin_unlock_irqrestore(&rtc_lock, flags);
- return retval;
-}
-
-/* This version spins until it definitely reads a valid time from CMOS RAM. */
static unsigned long get_cmos_time(void)
{
unsigned long res, flags;
@@ -264,17 +233,16 @@ static unsigned long get_cmos_time(void)
return res;
}
-
/***************************************************************************
* System Time
***************************************************************************/
-static inline s_time_t __get_s_time(void)
+static inline u64 get_time_delta(void)
{
s32 delta_tsc;
u32 low;
u64 delta, tsc;
-
+
rdtscll(tsc);
low = (u32)(tsc >> rdtsc_bitshift);
delta_tsc = (s32)(low - tsc_irq);
@@ -283,162 +251,76 @@ static inline s_time_t __get_s_time(void)
delta >>= 32;
delta += ((u64)delta_tsc * st_scale_i);
- return stime_irq + delta;
+ return delta;
}
s_time_t get_s_time(void)
{
s_time_t now;
unsigned long flags;
- spin_lock_irqsave(&stime_lock, flags);
- now = __get_s_time();
- spin_unlock_irqrestore(&stime_lock, flags);
+ read_lock_irqsave(&xtime_lock, flags);
+ now = stime_irq + get_time_delta();
+ read_unlock_irqrestore(&xtime_lock, flags);
return now;
}
-void do_gettimeofday(struct timeval *tv)
+void update_dom_time(shared_info_t *si)
{
unsigned long flags;
- unsigned long usec, sec;
- spin_lock_irqsave(&stime_lock, flags);
- usec = ((unsigned long)(__get_s_time() - stime_irq))/1000;
- sec = wall_clock_time.tv_sec;
- usec += wall_clock_time.tv_usec;
- spin_unlock_irqrestore(&stime_lock, flags);
+ read_lock_irqsave(&xtime_lock, flags);
- while ( usec >= 1000000 )
- {
- usec -= 1000000;
- sec++;
- }
+ si->time_version1++;
+ wmb();
- tv->tv_sec = sec;
- tv->tv_usec = usec;
-}
-
-void do_settimeofday(struct timeval *tv)
-{
- printk("XXX: do_settimeofday not implemented\n");
-}
-
-
-/***************************************************************************
- * Update times
- ***************************************************************************/
-
-void update_dom_time(shared_info_t *si)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&stime_lock, flags);
+ /* NB. These two values don't actually ever change. */
si->cpu_freq = cpu_freq;
si->rdtsc_bitshift = rdtsc_bitshift;
+
si->system_time = stime_irq;
- si->st_timestamp = tsc_irq;
- si->tv_sec = wall_clock_time.tv_sec;
- si->tv_usec = wall_clock_time.tv_usec;
- si->wc_timestamp = stime_irq;
- si->wc_version++;
- spin_unlock_irqrestore(&stime_lock, flags);
-}
+ si->tsc_timestamp = tsc_irq;
+ si->wc_sec = xtime.tv_sec;
+ si->wc_usec = xtime.tv_usec;
+ si->wc_usec += (jiffies - wall_jiffies) * (1000000 / HZ);
+ while ( si->wc_usec >= 1000000 )
+ {
+ si->wc_usec -= 1000000;
+ si->wc_sec++;
+ }
-/*
- * VERY crude way to keep system time from drfiting.
- * Update the scaling factor using the RTC
- * This is done periodically of it's own timer
- * We maintain an array of cpu frequencies.
- * - index 0 -> go slower
- * - index 1 -> frequency as determined during calibration
- * - index 2 -> go faster
- *
- * NB2. Note that update_scale is called from update_time with the stime_lock
- * still held. This is because we must only slow down cpu_freq at a timebase
- * change. If we did it in the middle of an update period then time would
- * seem to jump backwards since BASE+OLD_FREQ*DIFF > BASE+NEW_FREQ*DIFF.
- */
-static void update_scale(void)
-{
- unsigned long cmos_time;
- u32 st, ct;
- s32 dt;
- u64 scale;
- int freq_index;
-
- if ( (cmos_time = maybe_get_cmos_time()) == 0 )
- return;
-
- ct = (u32)(cmos_time - init_cmos_time);
- st = (u32)(stime_irq/SECONDS(1));
- dt = (s32)(ct - st);
-
- /* Work out adjustment to scaling factor. Allow +/- 1s drift. */
- if ( dt < -1 )
- freq_index = 0; /* go slower */
- else if ( dt > 1 )
- freq_index = 2; /* go faster */
- else
- freq_index = 1; /* correct speed */
-
- if ( (dt <= -10) || (dt >= 10) )
- printk("Large time drift (cmos time - system time = %ds)\n", dt);
-
- /* set new frequency */
- cpu_freq = cpu_freqs[freq_index];
-
- /* adjust scaling factor */
- scale = 1000000000LL << (32 + rdtsc_bitshift);
- scale /= cpu_freq;
- st_scale_f = scale & 0xffffffff;
- st_scale_i = scale >> 32;
+ wmb();
+ si->time_version2++;
+
+ read_unlock_irqrestore(&xtime_lock, flags);
}
-static void update_time(unsigned long unused)
+/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
+void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base)
{
- unsigned long flags;
- s_time_t new_st;
- unsigned long usec;
- u64 full_tsc;
- static int calls_since_scale_update = 0;
+ s64 delta;
+ long _usecs = (long)usecs;
- spin_lock_irqsave(&stime_lock, flags);
+ write_lock_irq(&xtime_lock);
- rdtscll(full_tsc);
- new_st = __get_s_time();
+ delta = (s64)(stime_irq - system_time_base);
- /* Update wall clock time. */
- usec = ((unsigned long)(new_st - stime_irq))/1000;
- usec += wall_clock_time.tv_usec;
- while ( usec >= 1000000 )
- {
- usec -= 1000000;
- wall_clock_time.tv_sec++;
- }
- wall_clock_time.tv_usec = usec;
-
- /* Update system time. */
- stime_irq = new_st;
- tsc_irq = (u32)(full_tsc >> rdtsc_bitshift);
+ _usecs += (long)(delta/1000);
+ _usecs -= (jiffies - wall_jiffies) * (1000000 / HZ);
- /* Maybe update our rate to be in sync with the RTC. */
- if ( ++calls_since_scale_update >=
- (SCALE_UPDATE_PERIOD/TIME_UPDATE_PERIOD) )
+ while ( _usecs < 0 )
{
- update_scale();
- calls_since_scale_update = 0;
- }
+ _usecs += 1000000;
+ secs--;
+ }
- spin_unlock_irqrestore(&stime_lock, flags);
+ xtime.tv_sec = secs;
+ xtime.tv_usec = _usecs;
- TRC(printk("TIME[%02d] update time: stime_irq=%lld now=%lld,wct=%ld:%ld\n",
- smp_processor_id(), stime_irq, new_st, wall_clock_time.tv_sec,
- wall_clock_time.tv_usec));
+ write_unlock_irq(&xtime_lock);
- /* Reload the timer. */
- update_timer.expires = new_st + TIME_UPDATE_PERIOD;
- add_ac_timer(&update_timer);
+ update_dom_time(current->shared_info);
}
@@ -446,21 +328,22 @@ static void update_time(unsigned long unused)
int __init init_xeno_time()
{
u64 scale;
- s64 freq_off;
u64 full_tsc;
unsigned int cpu_ghz;
- spin_lock_init(&stime_lock);
-
cpu_ghz = (unsigned int)(cpu_freq / 1000000000ULL);
for ( rdtsc_bitshift = 0; cpu_ghz != 0; rdtsc_bitshift++, cpu_ghz >>= 1 )
continue;
- /* Calculate adjusted frequencies: +/- 0.1% */
- freq_off = cpu_freq/1000;
- cpu_freqs[0] = cpu_freq + freq_off;
- cpu_freqs[1] = cpu_freq;
- cpu_freqs[2] = cpu_freq - freq_off;
+ /*
+ * We actually adjust cpu_freq to be 0.001% slower than the real
+ * frequenecy. This makes time run a little bit slower when interpolating
+ * the passage of time between periodic interrupts, so we expect a little
+ * jump in time whenever an interrupt comes in (roughly 100ns every 10ms).
+ * However, this should avoid us considtently running too fast and jumping
+ * _backwards_ on each interrupt, which would be much worse!
+ */
+ cpu_freq = cpu_freq - (cpu_freq / 100000ULL);
scale = 1000000000LL << (32 + rdtsc_bitshift);
scale /= cpu_freq;
@@ -473,15 +356,8 @@ int __init init_xeno_time()
tsc_irq = (u32)(full_tsc >> rdtsc_bitshift);
/* Wallclock time starts as the initial RTC time. */
- wall_clock_time.tv_sec = init_cmos_time = get_cmos_time();
- wall_clock_time.tv_usec = 0;
-
- /* Start timer to periodically update time and frequency scale. */
- init_ac_timer(&update_timer, 0);
- update_timer.data = 1;
- update_timer.function = &update_time;
- update_time(0);
-
+ xtime.tv_sec = get_cmos_time();
+
printk("Time init:\n");
printk(".... System Time: %lldns\n",
NOW());
@@ -490,7 +366,7 @@ int __init init_xeno_time()
printk(".... scale: %08X:%08X\n",
(u32)(scale>>32), (u32)scale);
printk(".... Wall Clock: %lds %ldus\n",
- wall_clock_time.tv_sec, wall_clock_time.tv_usec);
+ xtime.tv_sec, xtime.tv_usec);
return 0;
}
diff --git a/xen/common/dom0_ops.c b/xen/common/dom0_ops.c
index 408f1a763d..d5d5e278f1 100644
--- a/xen/common/dom0_ops.c
+++ b/xen/common/dom0_ops.c
@@ -284,39 +284,47 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
case DOM0_MSR:
{
- if (op.u.msr.write)
+ if (op.u.msr.write)
{
- msr_cpu_mask = op.u.msr.cpu_mask;
- msr_addr = op.u.msr.msr;
- msr_lo = op.u.msr.in1;
- msr_hi = op.u.msr.in2;
- smp_call_function(write_msr_for, NULL, 1, 1);
- write_msr_for(NULL);
+ msr_cpu_mask = op.u.msr.cpu_mask;
+ msr_addr = op.u.msr.msr;
+ msr_lo = op.u.msr.in1;
+ msr_hi = op.u.msr.in2;
+ smp_call_function(write_msr_for, NULL, 1, 1);
+ write_msr_for(NULL);
}
- else
+ else
{
- msr_cpu_mask = op.u.msr.cpu_mask;
- msr_addr = op.u.msr.msr;
- smp_call_function(read_msr_for, NULL, 1, 1);
- read_msr_for(NULL);
-
- op.u.msr.out1 = msr_lo;
- op.u.msr.out2 = msr_hi;
- copy_to_user(u_dom0_op, &op, sizeof(op));
+ msr_cpu_mask = op.u.msr.cpu_mask;
+ msr_addr = op.u.msr.msr;
+ smp_call_function(read_msr_for, NULL, 1, 1);
+ read_msr_for(NULL);
+
+ op.u.msr.out1 = msr_lo;
+ op.u.msr.out2 = msr_hi;
+ copy_to_user(u_dom0_op, &op, sizeof(op));
}
- ret = 0;
+ ret = 0;
}
break;
case DOM0_DEBUG:
{
- op.u.debug.out1 = op.u.debug.in2 + 1;
- op.u.debug.out2 = op.u.debug.in1 + 1;
- copy_to_user(u_dom0_op, &op, sizeof(op));
- ret = 0;
+ op.u.debug.out1 = op.u.debug.in2 + 1;
+ op.u.debug.out2 = op.u.debug.in1 + 1;
+ copy_to_user(u_dom0_op, &op, sizeof(op));
+ ret = 0;
}
break;
+ case DOM0_SETTIME:
+ {
+ do_settime(op.u.settime.secs,
+ op.u.settime.usecs,
+ op.u.settime.system_time);
+ ret = 0;
+ }
+ break;
default:
ret = -ENOSYS;
diff --git a/xen/common/kernel.c b/xen/common/kernel.c
index a410fd1155..299215eb7d 100644
--- a/xen/common/kernel.c
+++ b/xen/common/kernel.c
@@ -227,7 +227,6 @@ void cmain (unsigned long magic, multiboot_info_t *mbi)
(mod[1].mod_end - mod[1].mod_start):0)
!= 0 ) panic("Could not set up DOM0 guest OS\n");
- update_dom_time(new_dom->shared_info);
wake_up(new_dom);
startup_cpu_idle_loop();
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index d95a287c93..9cd98b65a5 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -535,6 +535,8 @@ static void virt_timer(unsigned long foo)
do {
if ( is_idle_task(p) ) continue;
cpu_mask |= mark_guest_event(p, _EVENT_TIMER);
+ if ( p->has_cpu )
+ update_dom_time(p->shared_info);
}
while ( (p = p->next_task) != &idle0_task );
read_unlock(&tasklist_lock);
diff --git a/xen/common/timer.c b/xen/common/timer.c
index bdcda32d89..c0e7d3a393 100644
--- a/xen/common/timer.c
+++ b/xen/common/timer.c
@@ -22,58 +22,15 @@
#include <linux/timex.h>
#include <linux/tqueue.h>
#include <linux/delay.h>
-//#include <linux/smp_lock.h>
#include <linux/interrupt.h>
-//#include <linux/kernel_stat.h>
#include <xeno/event.h>
#include <asm/uaccess.h>
-/*
- * Timekeeping variables
- */
-
-long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */
-
-/* The current time */
struct timeval xtime __attribute__ ((aligned (16)));
-
-/* Don't completely fail for HZ > 500. */
-int tickadj = 500/HZ ? : 1; /* microsecs */
-
-DECLARE_TASK_QUEUE(tq_timer);
-DECLARE_TASK_QUEUE(tq_immediate);
-
-/*
- * phase-lock loop variables
- */
-/* TIME_ERROR prevents overwriting the CMOS clock */
-int time_state = TIME_OK; /* clock synchronization status */
-int time_status = STA_UNSYNC; /* clock status bits */
-long time_offset; /* time adjustment (us) */
-long time_constant = 2; /* pll time constant */
-long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */
-long time_precision = 1; /* clock precision (us) */
-long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */
-long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */
-long time_phase; /* phase offset (scaled us) */
-long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;
- /* frequency offset (scaled ppm)*/
-long time_adj; /* tick adjust (scaled 1 / HZ) */
-long time_reftime; /* time at last adjustment (s) */
-
-long time_adjust;
-long time_adjust_step;
-
-unsigned long event;
-
unsigned long volatile jiffies;
-unsigned int * prof_buffer;
-unsigned long prof_len;
-unsigned long prof_shift;
-
/*
* Event timer code
*/
@@ -85,13 +42,13 @@ unsigned long prof_shift;
#define TVR_MASK (TVR_SIZE - 1)
struct timer_vec {
- int index;
- struct list_head vec[TVN_SIZE];
+ int index;
+ struct list_head vec[TVN_SIZE];
};
struct timer_vec_root {
- int index;
- struct list_head vec[TVR_SIZE];
+ int index;
+ struct list_head vec[TVR_SIZE];
};
static struct timer_vec tv5;
@@ -101,65 +58,65 @@ static struct timer_vec tv2;
static struct timer_vec_root tv1;
static struct timer_vec * const tvecs[] = {
- (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
+ (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
};
#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
void init_timervecs (void)
{
- int i;
-
- for (i = 0; i < TVN_SIZE; i++) {
- INIT_LIST_HEAD(tv5.vec + i);
- INIT_LIST_HEAD(tv4.vec + i);
- INIT_LIST_HEAD(tv3.vec + i);
- INIT_LIST_HEAD(tv2.vec + i);
- }
- for (i = 0; i < TVR_SIZE; i++)
- INIT_LIST_HEAD(tv1.vec + i);
+ int i;
+
+ for (i = 0; i < TVN_SIZE; i++) {
+ INIT_LIST_HEAD(tv5.vec + i);
+ INIT_LIST_HEAD(tv4.vec + i);
+ INIT_LIST_HEAD(tv3.vec + i);
+ INIT_LIST_HEAD(tv2.vec + i);
+ }
+ for (i = 0; i < TVR_SIZE; i++)
+ INIT_LIST_HEAD(tv1.vec + i);
}
static unsigned long timer_jiffies;
static inline void internal_add_timer(struct timer_list *timer)
{
- /*
- * must be cli-ed when calling this
- */
- unsigned long expires = timer->expires;
- unsigned long idx = expires - timer_jiffies;
- struct list_head * vec;
-
- if (idx < TVR_SIZE) {
- int i = expires & TVR_MASK;
- vec = tv1.vec + i;
- } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
- int i = (expires >> TVR_BITS) & TVN_MASK;
- vec = tv2.vec + i;
- } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
- int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
- vec = tv3.vec + i;
- } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
- int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
- vec = tv4.vec + i;
- } else if ((signed long) idx < 0) {
- /* can happen if you add a timer with expires == jiffies,
+ /*
+ * must be cli-ed when calling this
+ */
+ unsigned long expires = timer->expires;
+ unsigned long idx = expires - timer_jiffies;
+ struct list_head * vec;
+
+ if (idx < TVR_SIZE) {
+ int i = expires & TVR_MASK;
+ vec = tv1.vec + i;
+ } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
+ int i = (expires >> TVR_BITS) & TVN_MASK;
+ vec = tv2.vec + i;
+ } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
+ int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
+ vec = tv3.vec + i;
+ } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
+ int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
+ vec = tv4.vec + i;
+ } else if ((signed long) idx < 0) {
+ /* can happen if you add a timer with expires == jiffies,
* or you set a timer to go off in the past
*/
- vec = tv1.vec + tv1.index;
- } else if (idx <= 0xffffffffUL) {
- int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
- vec = tv5.vec + i;
- } else {
- /* Can only get here on architectures with 64-bit jiffies */
- INIT_LIST_HEAD(&timer->list);
- return;
- }
- /*
+ vec = tv1.vec + tv1.index;
+ } else if (idx <= 0xffffffffUL) {
+ int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
+ vec = tv5.vec + i;
+ } else {
+ /* Can only get here on architectures with 64-bit jiffies */
+ INIT_LIST_HEAD(&timer->list);
+ return;
+ }
+ /*
* Timers are FIFO!
*/
- list_add(&timer->list, vec->prev);
+ list_add(&timer->list, vec->prev);
}
/* Initialize both explicitly - let's try to have them in the same cache line */
@@ -178,57 +135,57 @@ volatile struct timer_list * volatile running_timer;
void add_timer(struct timer_list *timer)
{
- unsigned long flags;
-
- spin_lock_irqsave(&timerlist_lock, flags);
- if (timer_pending(timer))
- goto bug;
- internal_add_timer(timer);
- spin_unlock_irqrestore(&timerlist_lock, flags);
- return;
-bug:
- spin_unlock_irqrestore(&timerlist_lock, flags);
- printk("bug: kernel timer added twice at %p.\n",
- __builtin_return_address(0));
+ unsigned long flags;
+
+ spin_lock_irqsave(&timerlist_lock, flags);
+ if (timer_pending(timer))
+ goto bug;
+ internal_add_timer(timer);
+ spin_unlock_irqrestore(&timerlist_lock, flags);
+ return;
+ bug:
+ spin_unlock_irqrestore(&timerlist_lock, flags);
+ printk("bug: kernel timer added twice at %p.\n",
+ __builtin_return_address(0));
}
static inline int detach_timer (struct timer_list *timer)
{
- if (!timer_pending(timer))
- return 0;
- list_del(&timer->list);
- return 1;
+ if (!timer_pending(timer))
+ return 0;
+ list_del(&timer->list);
+ return 1;
}
int mod_timer(struct timer_list *timer, unsigned long expires)
{
- int ret;
- unsigned long flags;
-
- spin_lock_irqsave(&timerlist_lock, flags);
- timer->expires = expires;
- ret = detach_timer(timer);
- internal_add_timer(timer);
- spin_unlock_irqrestore(&timerlist_lock, flags);
- return ret;
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&timerlist_lock, flags);
+ timer->expires = expires;
+ ret = detach_timer(timer);
+ internal_add_timer(timer);
+ spin_unlock_irqrestore(&timerlist_lock, flags);
+ return ret;
}
int del_timer(struct timer_list * timer)
{
- int ret;
- unsigned long flags;
-
- spin_lock_irqsave(&timerlist_lock, flags);
- ret = detach_timer(timer);
- timer->list.next = timer->list.prev = NULL;
- spin_unlock_irqrestore(&timerlist_lock, flags);
- return ret;
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&timerlist_lock, flags);
+ ret = detach_timer(timer);
+ timer->list.next = timer->list.prev = NULL;
+ spin_unlock_irqrestore(&timerlist_lock, flags);
+ return ret;
}
#ifdef CONFIG_SMP
void sync_timers(void)
{
- spin_unlock_wait(&global_bh_lock);
+ spin_unlock_wait(&global_bh_lock);
}
/*
@@ -241,269 +198,104 @@ void sync_timers(void)
int del_timer_sync(struct timer_list * timer)
{
- int ret = 0;
+ int ret = 0;
- for (;;) {
- unsigned long flags;
- int running;
+ for (;;) {
+ unsigned long flags;
+ int running;
- spin_lock_irqsave(&timerlist_lock, flags);
- ret += detach_timer(timer);
- timer->list.next = timer->list.prev = 0;
- running = timer_is_running(timer);
- spin_unlock_irqrestore(&timerlist_lock, flags);
+ spin_lock_irqsave(&timerlist_lock, flags);
+ ret += detach_timer(timer);
+ timer->list.next = timer->list.prev = 0;
+ running = timer_is_running(timer);
+ spin_unlock_irqrestore(&timerlist_lock, flags);
- if (!running)
- break;
+ if (!running)
+ break;
- timer_synchronize(timer);
- }
+ timer_synchronize(timer);
+ }
- return ret;
+ return ret;
}
#endif
static inline void cascade_timers(struct timer_vec *tv)
{
- /* cascade all the timers from tv up one level */
- struct list_head *head, *curr, *next;
-
- head = tv->vec + tv->index;
- curr = head->next;
- /*
- * We are removing _all_ timers from the list, so we don't have to
- * detach them individually, just clear the list afterwards.
+ /* cascade all the timers from tv up one level */
+ struct list_head *head, *curr, *next;
+
+ head = tv->vec + tv->index;
+ curr = head->next;
+ /*
+ * We are removing _all_ timers from the list, so we don't have to
+ * detach them individually, just clear the list afterwards.
*/
- while (curr != head) {
- struct timer_list *tmp;
-
- tmp = list_entry(curr, struct timer_list, list);
- next = curr->next;
- list_del(curr); /* not needed */
- internal_add_timer(tmp);
- curr = next;
- }
- INIT_LIST_HEAD(head);
- tv->index = (tv->index + 1) & TVN_MASK;
+ while (curr != head) {
+ struct timer_list *tmp;
+
+ tmp = list_entry(curr, struct timer_list, list);
+ next = curr->next;
+ list_del(curr); /* not needed */
+ internal_add_timer(tmp);
+ curr = next;
+ }
+ INIT_LIST_HEAD(head);
+ tv->index = (tv->index + 1) & TVN_MASK;
}
static inline void run_timer_list(void)
{
- spin_lock_irq(&timerlist_lock);
- while ((long)(jiffies - timer_jiffies) >= 0) {
- struct list_head *head, *curr;
- if (!tv1.index) {
- int n = 1;
- do {
- cascade_timers(tvecs[n]);
- } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
- }
-repeat:
- head = tv1.vec + tv1.index;
- curr = head->next;
- if (curr != head) {
- struct timer_list *timer;
- void (*fn)(unsigned long);
- unsigned long data;
-
- timer = list_entry(curr, struct timer_list, list);
- fn = timer->function;
- data= timer->data;
-
- detach_timer(timer);
- timer->list.next = timer->list.prev = NULL;
- timer_enter(timer);
- spin_unlock_irq(&timerlist_lock);
- fn(data);
- spin_lock_irq(&timerlist_lock);
- timer_exit();
- goto repeat;
- }
- ++timer_jiffies;
- tv1.index = (tv1.index + 1) & TVR_MASK;
- }
- spin_unlock_irq(&timerlist_lock);
-}
-
-spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED;
-
-void tqueue_bh(void)
-{
- run_task_queue(&tq_timer);
-}
-
-void immediate_bh(void)
-{
- run_task_queue(&tq_immediate);
-}
-
-/*
- * this routine handles the overflow of the microsecond field
- *
- * The tricky bits of code to handle the accurate clock support
- * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
- * They were originally developed for SUN and DEC kernels.
- * All the kudos should go to Dave for this stuff.
- *
- */
-static void second_overflow(void)
-{
- long ltemp;
-
- /* Bump the maxerror field */
- time_maxerror += time_tolerance >> SHIFT_USEC;
- if ( time_maxerror > NTP_PHASE_LIMIT ) {
- time_maxerror = NTP_PHASE_LIMIT;
- time_status |= STA_UNSYNC;
+ spin_lock_irq(&timerlist_lock);
+ while ((long)(jiffies - timer_jiffies) >= 0) {
+ struct list_head *head, *curr;
+ if (!tv1.index) {
+ int n = 1;
+ do {
+ cascade_timers(tvecs[n]);
+ } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
+ }
+ repeat:
+ head = tv1.vec + tv1.index;
+ curr = head->next;
+ if (curr != head) {
+ struct timer_list *timer;
+ void (*fn)(unsigned long);
+ unsigned long data;
+
+ timer = list_entry(curr, struct timer_list, list);
+ fn = timer->function;
+ data= timer->data;
+
+ detach_timer(timer);
+ timer->list.next = timer->list.prev = NULL;
+ timer_enter(timer);
+ spin_unlock_irq(&timerlist_lock);
+ fn(data);
+ spin_lock_irq(&timerlist_lock);
+ timer_exit();
+ goto repeat;
+ }
+ ++timer_jiffies;
+ tv1.index = (tv1.index + 1) & TVR_MASK;
}
-
- /*
- * Leap second processing. If in leap-insert state at
- * the end of the day, the system clock is set back one
- * second; if in leap-delete state, the system clock is
- * set ahead one second. The microtime() routine or
- * external clock driver will insure that reported time
- * is always monotonic. The ugly divides should be
- * replaced.
- */
- switch (time_state) {
-
- case TIME_OK:
- if (time_status & STA_INS)
- time_state = TIME_INS;
- else if (time_status & STA_DEL)
- time_state = TIME_DEL;
- break;
-
- case TIME_INS:
- if (xtime.tv_sec % 86400 == 0) {
- xtime.tv_sec--;
- time_state = TIME_OOP;
- printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
- }
- break;
-
- case TIME_DEL:
- if ((xtime.tv_sec + 1) % 86400 == 0) {
- xtime.tv_sec++;
- time_state = TIME_WAIT;
- printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
- }
- break;
-
- case TIME_OOP:
- time_state = TIME_WAIT;
- break;
-
- case TIME_WAIT:
- if (!(time_status & (STA_INS | STA_DEL)))
- time_state = TIME_OK;
- }
-
- /*
- * Compute the phase adjustment for the next second. In
- * PLL mode, the offset is reduced by a fixed factor
- * times the time constant. In FLL mode the offset is
- * used directly. In either mode, the maximum phase
- * adjustment for each second is clamped so as to spread
- * the adjustment over not more than the number of
- * seconds between updates.
- */
- if (time_offset < 0) {
- ltemp = -time_offset;
- if (!(time_status & STA_FLL))
- ltemp >>= SHIFT_KG + time_constant;
- if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
- ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
- time_offset += ltemp;
- time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
- } else {
- ltemp = time_offset;
- if (!(time_status & STA_FLL))
- ltemp >>= SHIFT_KG + time_constant;
- if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
- ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
- time_offset -= ltemp;
- time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
- }
-
- if (ltemp < 0)
- time_adj -= -ltemp >>
- (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
- else
- time_adj += ltemp >>
- (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
-
-#if HZ == 100
- /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
- * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
- */
- if (time_adj < 0)
- time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
- else
- time_adj += (time_adj >> 2) + (time_adj >> 5);
-#endif
+ spin_unlock_irq(&timerlist_lock);
}
-/* in the NTP reference this is called "hardclock()" */
-static void update_wall_time_one_tick(void)
-{
- if ( (time_adjust_step = time_adjust) != 0 ) {
- /* We are doing an adjtime thing.
- *
- * Prepare time_adjust_step to be within bounds.
- * Note that a positive time_adjust means we want the clock
- * to run faster.
- *
- * Limit the amount of the step to be in the range
- * -tickadj .. +tickadj
- */
- if (time_adjust > tickadj)
- time_adjust_step = tickadj;
- else if (time_adjust < -tickadj)
- time_adjust_step = -tickadj;
-
- /* Reduce by this step the amount of time left */
- time_adjust -= time_adjust_step;
- }
- xtime.tv_usec += tick + time_adjust_step;
- /*
- * Advance the phase, once it gets to one microsecond, then
- * advance the tick more.
- */
- time_phase += time_adj;
- if (time_phase <= -FINEUSEC) {
- long ltemp = -time_phase >> SHIFT_SCALE;
- time_phase += ltemp << SHIFT_SCALE;
- xtime.tv_usec -= ltemp;
- }
- else if (time_phase >= FINEUSEC) {
- long ltemp = time_phase >> SHIFT_SCALE;
- time_phase -= ltemp << SHIFT_SCALE;
- xtime.tv_usec += ltemp;
- }
-}
+spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED;
-/*
- * Using a loop looks inefficient, but "ticks" is
- * usually just one (we shouldn't be losing ticks,
- * we're doing this this way mainly for interrupt
- * latency reasons, not because we think we'll
- * have lots of lost timer ticks
- */
static void update_wall_time(unsigned long ticks)
{
- do {
- ticks--;
- update_wall_time_one_tick();
- } while (ticks);
-
- if (xtime.tv_usec >= 1000000) {
- xtime.tv_usec -= 1000000;
- xtime.tv_sec++;
- second_overflow();
- }
+ do {
+ ticks--;
+ xtime.tv_usec += 1000000/HZ;
+ } while (ticks);
+
+ if (xtime.tv_usec >= 1000000) {
+ xtime.tv_usec -= 1000000;
+ xtime.tv_sec++;
+ }
}
/* jiffies at the most recent update of wall time */
@@ -516,47 +308,31 @@ rwlock_t xtime_lock = RW_LOCK_UNLOCKED;
static inline void update_times(void)
{
- unsigned long ticks;
+ unsigned long ticks;
- /*
- * update_times() is run from the raw timer_bh handler so we
- * just know that the irqs are locally enabled and so we don't
- * need to save/restore the flags of the local CPU here. -arca
- */
- write_lock_irq(&xtime_lock);
-
- ticks = jiffies - wall_jiffies;
- if (ticks) {
- wall_jiffies += ticks;
- update_wall_time(ticks);
- }
- write_unlock_irq(&xtime_lock);
+ /*
+ * update_times() is run from the raw timer_bh handler so we
+ * just know that the irqs are locally enabled and so we don't
+ * need to save/restore the flags of the local CPU here. -arca
+ */
+ write_lock_irq(&xtime_lock);
+
+ ticks = jiffies - wall_jiffies;
+ if (ticks) {
+ wall_jiffies += ticks;
+ update_wall_time(ticks);
+ }
+ write_unlock_irq(&xtime_lock);
}
void timer_bh(void)
{
- update_times();
- run_timer_list();
+ update_times();
+ run_timer_list();
}
-#include <xeno/errno.h>
-#include <xeno/sched.h>
-#include <xeno/lib.h>
-#include <xeno/config.h>
-#include <xeno/smp.h>
-#include <xeno/irq.h>
-#include <asm/msr.h>
-
void do_timer(struct pt_regs *regs)
{
(*(unsigned long *)&jiffies)++;
-
mark_bh(TIMER_BH);
- if (TQ_ACTIVE(tq_timer))
- mark_bh(TQUEUE_BH);
-}
-
-void get_fast_time(struct timeval * tm)
-{
- *tm=xtime;
}
diff --git a/xen/include/hypervisor-ifs/dom0_ops.h b/xen/include/hypervisor-ifs/dom0_ops.h
index 4aa0810f44..1e7ab9a489 100644
--- a/xen/include/hypervisor-ifs/dom0_ops.h
+++ b/xen/include/hypervisor-ifs/dom0_ops.h
@@ -21,7 +21,8 @@
#define DOM0_BUILDDOMAIN 13
#define DOM0_IOPL 14
#define DOM0_MSR 15
-#define DOM0_DEBUG 16 /* pervasive debugger */
+#define DOM0_DEBUG 16
+#define DOM0_SETTIME 17
#define MAX_CMD_LEN 256
#define MAX_DOMAIN_NAME 16
@@ -118,6 +119,17 @@ typedef struct dom0_debug_st
} dom0_debug_t;
+/*
+ * Set clock such that it would read <secs,usecs> after 00:00:00 UTC,
+ * 1 January, 1970 if the current system time was <system_time>.
+ */
+typedef struct dom0_settime_st
+{
+ /* IN variables. */
+ unsigned long secs, usecs;
+ u64 system_time;
+} dom0_settime_t;
+
typedef struct dom0_op_st
{
unsigned long cmd;
@@ -133,6 +145,7 @@ typedef struct dom0_op_st
dom0_iopl_t iopl;
dom0_msr_t msr;
dom0_debug_t debug;
+ dom0_settime_t settime;
}
u;
} dom0_op_t;
diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h
index 4405191a6d..b7fa129afc 100644
--- a/xen/include/hypervisor-ifs/hypervisor-if.h
+++ b/xen/include/hypervisor-ifs/hypervisor-if.h
@@ -232,32 +232,35 @@ typedef struct shared_info_st {
/*
* Time: The following abstractions are exposed: System Time, Clock Time,
* Domain Virtual Time. Domains can access Cycle counter time directly.
- *
- * The following values are updated periodically (and atomically, from the
- * p.o.v. of the guest OS). Th eguest OS detects this because the wc_version
- * is incremented.
*/
- u32 wc_version; /* a version number for info below */
- unsigned int rdtsc_bitshift; /* use bits N:N+31 of TSC */
- u64 cpu_freq; /* to calculate ticks -> real time */
- /* System Time */
- long long system_time; /* in ns */
- unsigned long st_timestamp; /* cyclecounter at last update */
- /* Wall Clock Time */
- long tv_sec; /* essentially a struct timeval */
- long tv_usec;
- long long wc_timestamp; /* system time at last update */
+
+ unsigned int rdtsc_bitshift; /* tsc_timestamp uses N:N+31 of TSC. */
+ u64 cpu_freq; /* CPU frequency (Hz). */
+
+ /*
+ * The following values are updated periodically (and not necessarily
+ * atomically!). The guest OS detects this because 'time_version1' is
+ * incremented just before updating these values, and 'time_version2' is
+ * incremented immediately after. See Xenolinux code for an example of how
+ * to read these values safely (arch/xeno/kernel/time.c).
+ */
+ unsigned long time_version1; /* A version number for info below. */
+ unsigned long time_version2; /* A version number for info below. */
+ unsigned long tsc_timestamp; /* TSC at last update of time vals. */
+ u64 system_time; /* Time, in nanosecs, since boot. */
+ unsigned long wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */
+ unsigned long wc_usec; /* Usecs 00:00:00 UTC, Jan 1, 1970. */
/* Domain Virtual Time */
- unsigned long long domain_time;
+ u64 domain_time;
/*
* Timeout values:
* Allow a domain to specify a timeout value in system time and
* domain virtual time.
*/
- unsigned long long wall_timeout;
- unsigned long long domain_timeout;
+ u64 wall_timeout;
+ u64 domain_timeout;
/*
* The index structures are all stored here for convenience. The rings
diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h
index ba98b7d1f2..8923a8a56d 100644
--- a/xen/include/xeno/sched.h
+++ b/xen/include/xeno/sched.h
@@ -23,6 +23,8 @@
extern unsigned long volatile jiffies;
extern rwlock_t tasklist_lock;
+extern struct timeval xtime;
+
#include <xeno/spinlock.h>
struct mm_struct {
diff --git a/xen/include/xeno/time.h b/xen/include/xeno/time.h
index c30fe56140..10cd3b7e1b 100644
--- a/xen/include/xeno/time.h
+++ b/xen/include/xeno/time.h
@@ -50,44 +50,21 @@ extern int init_xeno_time();
s_time_t get_s_time(void);
#define NOW() ((s_time_t)get_s_time())
-#define SECONDS(_s) (((s_time_t)(_s)) * 1000000000UL )
-#define TENTHS(_ts) (((s_time_t)(_ts)) * 100000000UL )
-#define HUNDREDTHS(_hs) (((s_time_t)(_hs)) * 10000000UL )
-#define MILLISECS(_ms) (((s_time_t)(_ms)) * 1000000UL )
-#define MICROSECS(_us) (((s_time_t)(_us)) * 1000UL )
+#define SECONDS(_s) (((s_time_t)(_s)) * 1000000000ULL )
+#define MILLISECS(_ms) (((s_time_t)(_ms)) * 1000000ULL )
+#define MICROSECS(_us) (((s_time_t)(_us)) * 1000ULL )
#define Time_Max ((s_time_t) 0x7fffffffffffffffLL)
#define FOREVER Time_Max
-/*
- * Wall Clock Time
- */
+/* Wall Clock Time */
struct timeval {
long tv_sec; /* seconds */
long tv_usec; /* microseconds */
};
-struct timezone {
- int tz_minuteswest; /* minutes west of Greenwich */
- int tz_dsttime; /* type of dst correction */
-};
-
-#ifdef __KERNEL__
-extern void do_gettimeofday(struct timeval *tv);
-extern void do_settimeofday(struct timeval *tv);
-extern void get_fast_time(struct timeval *tv);
-extern void (*do_get_fast_time)(struct timeval *);
-#endif
-
-/*
- * Domain Virtual Time (defined in asm/time.h)
- */
-/* XXX Interface for getting and setting still missing */
-
-
-/* update the per domain time information */
extern void update_dom_time(shared_info_t *si);
-
-/* XXX move this */
+extern void do_settime(unsigned long secs, unsigned long usecs,
+ u64 system_time_base);
extern void do_timer(struct pt_regs *regs);
#endif /* __XENO_TIME_H__ */
diff --git a/xen/include/xeno/tqueue.h b/xen/include/xeno/tqueue.h
index 4a730f0ad9..3cf830d3f7 100644
--- a/xen/include/xeno/tqueue.h
+++ b/xen/include/xeno/tqueue.h
@@ -66,7 +66,7 @@ typedef struct list_head task_queue;
#define DECLARE_TASK_QUEUE(q) LIST_HEAD(q)
#define TQ_ACTIVE(q) (!list_empty(&q))
-extern task_queue tq_timer, tq_immediate, tq_disk;
+extern task_queue tq_disk;
/*
* To implement your own list of active bottom halfs, use the following
diff --git a/xenolinux-2.4.22-sparse/arch/xeno/kernel/time.c b/xenolinux-2.4.22-sparse/arch/xeno/kernel/time.c
index 73ac82c9a4..65280df6ce 100644
--- a/xenolinux-2.4.22-sparse/arch/xeno/kernel/time.c
+++ b/xenolinux-2.4.22-sparse/arch/xeno/kernel/time.c
@@ -1,23 +1,13 @@
/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
****************************************************************************
- * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
+ * (C) 2002-2003 - Keir Fraser - University of Cambridge
****************************************************************************
*
- * File: arch.xeno/time.c
- * Author: Rolf Neugebauer
- * Changes:
- *
- * Date: Nov 2002
+ * File: arch/xeno/kernel/time.c
+ * Author: Rolf Neugebauer and Keir Fraser
*
- * Environment: XenoLinux
- * Description: Interface with Hypervisor to get correct notion of time
- * Currently supports Systemtime and WallClock time.
- *
- * (This has hardly any resemblence with the Linux code but left the
- * copyright notice anyway. Ignore the comments in the copyright notice.)
- ****************************************************************************
- * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
- ****************************************************************************
+ * Description: Interface with Xen to get correct notion of time
*/
/*
@@ -62,7 +52,9 @@
#include <asm/div64.h>
#include <asm/hypervisor.h>
+#include <asm/hypervisor-ifs/dom0_ops.h>
+#include <linux/mc146818rtc.h>
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/time.h>
@@ -70,214 +62,334 @@
#include <linux/smp.h>
#include <linux/irq.h>
-#undef XENO_TIME_DEBUG /* adds sanity checks and periodic printouts */
-
spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
extern rwlock_t xtime_lock;
+extern unsigned long wall_jiffies;
unsigned long cpu_khz; /* get this from Xen, used elsewhere */
-static spinlock_t hyp_time_lock = SPIN_LOCK_UNLOCKED;
static unsigned int rdtsc_bitshift;
-static u32 st_scale_f;
-static u32 st_scale_i;
-static u32 shadow_st_pcc;
-static s64 shadow_st;
+static u32 st_scale_f; /* convert ticks -> usecs */
+static u32 st_scale_i; /* convert ticks -> usecs */
+
+/* These are peridically updated in shared_info, and then copied here. */
+static u32 shadow_tsc_stamp;
+static s64 shadow_system_time;
+static u32 shadow_time_version;
+static struct timeval shadow_tv;
+
+#ifdef CONFIG_XENO_PRIV
+/* Periodically propagate synchronised time to the RTC and to Xen. */
+static long last_rtc_update, last_xen_update;
+#endif
+static u64 processed_system_time;
+
+#define HANDLE_USEC_UNDERFLOW(_tv) \
+ do { \
+ while ( (_tv).tv_usec < 0 ) \
+ { \
+ (_tv).tv_usec += 1000000; \
+ (_tv).tv_sec--; \
+ } \
+ } while ( 0 )
+#define HANDLE_USEC_OVERFLOW(_tv) \
+ do { \
+ while ( (_tv).tv_usec >= 1000000 ) \
+ { \
+ (_tv).tv_usec -= 1000000; \
+ (_tv).tv_sec++; \
+ } \
+ } while ( 0 )
+
+
+#ifdef CONFIG_XENO_PRIV
/*
- * System time.
- * Although the rest of the Linux kernel doesn't know about this, we
- * we use it to extrapolate passage of wallclock time.
- * We need to read the values from the shared info page "atomically"
- * and use the cycle counter value as the "version" number. Clashes
- * should be very rare.
+ * In order to set the CMOS clock precisely, set_rtc_mmss has to be
+ * called 500 ms after the second nowtime has started, because when
+ * nowtime is written into the registers of the CMOS clock, it will
+ * jump to the next second precisely 500 ms later. Check the Motorola
+ * MC146818A or Dallas DS12887 data sheet for details.
+ *
+ * BUG: This routine does not handle hour overflow properly; it just
+ * sets the minutes. Usually you'll only notice that after reboot!
*/
-static inline s64 __get_s_time(void)
+static int set_rtc_mmss(unsigned long nowtime)
{
- s32 delta_tsc;
- u32 low;
- u64 delta, tsc;
+ int retval = 0;
+ int real_seconds, real_minutes, cmos_minutes;
+ unsigned char save_control, save_freq_select;
- rdtscll(tsc);
- low = (u32)(tsc >> rdtsc_bitshift);
- delta_tsc = (s32)(low - shadow_st_pcc);
- if ( unlikely(delta_tsc < 0) ) delta_tsc = 0;
- delta = ((u64)delta_tsc * st_scale_f);
- delta >>= 32;
- delta += ((u64)delta_tsc * st_scale_i);
+ /* gets recalled with irq locally disabled */
+ spin_lock(&rtc_lock);
+ save_control = CMOS_READ(RTC_CONTROL);
+ CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
- return shadow_st + delta;
-}
+ save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+ CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
-/*
- * Wallclock time.
- * Based on what the hypervisor tells us, extrapolated using system time.
- * Again need to read a number of values from the shared page "atomically".
- * this time using a version number.
- */
-static u32 shadow_wc_version=0;
-static long shadow_tv_sec;
-static long shadow_tv_usec;
-static long long shadow_wc_timestamp;
-void do_gettimeofday(struct timeval *tv)
-{
- unsigned long flags;
- long usec, sec;
- u32 version;
- u64 now, cpu_freq, scale;
+ cmos_minutes = CMOS_READ(RTC_MINUTES);
+ if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD )
+ BCD_TO_BIN(cmos_minutes);
- spin_lock_irqsave(&hyp_time_lock, flags);
+ /*
+ * since we're only adjusting minutes and seconds, don't interfere with
+ * hour overflow. This avoids messing with unknown time zones but requires
+ * your RTC not to be off by more than 15 minutes
+ */
+ real_seconds = nowtime % 60;
+ real_minutes = nowtime / 60;
+ if ( ((abs(real_minutes - cmos_minutes) + 15)/30) & 1 )
+ real_minutes += 30; /* correct for half hour time zone */
+ real_minutes %= 60;
- while ( (version = HYPERVISOR_shared_info->wc_version) !=
- shadow_wc_version )
+ if ( abs(real_minutes - cmos_minutes) < 30 )
{
- barrier();
+ if ( !(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD )
+ {
+ BIN_TO_BCD(real_seconds);
+ BIN_TO_BCD(real_minutes);
+ }
+ CMOS_WRITE(real_seconds,RTC_SECONDS);
+ CMOS_WRITE(real_minutes,RTC_MINUTES);
+ }
+ else
+ {
+ printk(KERN_WARNING
+ "set_rtc_mmss: can't update from %d to %d\n",
+ cmos_minutes, real_minutes);
+ retval = -1;
+ }
- shadow_wc_version = version;
- shadow_tv_sec = HYPERVISOR_shared_info->tv_sec;
- shadow_tv_usec = HYPERVISOR_shared_info->tv_usec;
- shadow_wc_timestamp = HYPERVISOR_shared_info->wc_timestamp;
- shadow_st_pcc = HYPERVISOR_shared_info->st_timestamp;
- shadow_st = HYPERVISOR_shared_info->system_time;
+ /* The following flags have to be released exactly in this order,
+ * otherwise the DS12887 (popular MC146818A clone with integrated
+ * battery and quartz) will not reset the oscillator and will not
+ * update precisely 500 ms later. You won't find this mentioned in
+ * the Dallas Semiconductor data sheets, but who believes data
+ * sheets anyway ... -- Markus Kuhn
+ */
+ CMOS_WRITE(save_control, RTC_CONTROL);
+ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+ spin_unlock(&rtc_lock);
- rdtsc_bitshift = HYPERVISOR_shared_info->rdtsc_bitshift;
- cpu_freq = HYPERVISOR_shared_info->cpu_freq;
+ return retval;
+}
+#endif
- /* XXX cpu_freq as u32 limits it to 4.29 GHz. Get a better do_div! */
- scale = 1000000000LL << (32 + rdtsc_bitshift);
- do_div(scale,(u32)cpu_freq);
- st_scale_f = scale & 0xffffffff;
- st_scale_i = scale >> 32;
- barrier();
- }
+/* Must be called with the xtime_lock held for writing. */
+static void get_time_values_from_xen(void)
+{
+ do {
+ shadow_time_version = HYPERVISOR_shared_info->time_version2;
+ rmb();
+ shadow_tv.tv_sec = HYPERVISOR_shared_info->wc_sec;
+ shadow_tv.tv_usec = HYPERVISOR_shared_info->wc_usec;
+ shadow_tsc_stamp = HYPERVISOR_shared_info->tsc_timestamp;
+ shadow_system_time = HYPERVISOR_shared_info->system_time;
+ rmb();
+ }
+ while ( shadow_time_version != HYPERVISOR_shared_info->time_version1 );
+}
- now = __get_s_time();
- usec = ((unsigned long)(now-shadow_wc_timestamp))/1000;
- sec = shadow_tv_sec;
- usec += shadow_tv_usec;
+#define TIME_VALUES_UP_TO_DATE \
+ (shadow_time_version == HYPERVISOR_shared_info->time_version2)
- while ( usec >= 1000000 )
- {
- usec -= 1000000;
- sec++;
- }
- tv->tv_sec = sec;
- tv->tv_usec = usec;
+static inline unsigned long get_time_delta_usecs(void)
+{
+ s32 delta_tsc;
+ u32 low;
+ u64 delta, tsc;
- spin_unlock_irqrestore(&hyp_time_lock, flags);
+ rdtscll(tsc);
+ low = (u32)(tsc >> rdtsc_bitshift);
+ delta_tsc = (s32)(low - shadow_tsc_stamp);
+ if ( unlikely(delta_tsc < 0) ) delta_tsc = 0;
+ delta = ((u64)delta_tsc * st_scale_f);
+ delta >>= 32;
+ delta += ((u64)delta_tsc * st_scale_i);
-#ifdef XENO_TIME_DEBUG
- {
- static long long old_now=0;
- static long long wct=0, old_wct=0;
-
- /* This debug code checks if time increase over two subsequent calls */
- wct=(((long long)sec) * 1000000) + usec;
- /* wall clock time going backwards */
- if ((wct < old_wct) ) {
- printk("Urgh1: wc diff=%6ld, usec = %ld (0x%lX)\n",
- (long)(wct-old_wct), usec, usec);
- printk(" st diff=%lld cur st=0x%016llX old st=0x%016llX\n",
- now-old_now, now, old_now);
- }
+ return (unsigned long)delta;
+}
- /* system time going backwards */
- if (now<=old_now) {
- printk("Urgh2: st diff=%lld cur st=0x%016llX old st=0x%016llX\n",
- now-old_now, now, old_now);
- }
- old_wct = wct;
- old_now = now;
+
+void do_gettimeofday(struct timeval *tv)
+{
+ unsigned long flags, lost;
+ struct timeval _tv;
+
+ again:
+ read_lock_irqsave(&xtime_lock, flags);
+ _tv.tv_usec = get_time_delta_usecs();
+ if ( (lost = (jiffies - wall_jiffies)) != 0 )
+ _tv.tv_usec += lost * (1000000 / HZ);
+ _tv.tv_sec = xtime.tv_sec;
+ _tv.tv_usec += xtime.tv_usec;
+ if ( unlikely(!TIME_VALUES_UP_TO_DATE) )
+ {
+ /*
+ * We may have blocked for a long time, rendering our calculations
+ * invalid (e.g. the time delta may have overflowed). Detect that
+ * and recalculate with fresh values.
+ */
+ read_unlock_irqrestore(&xtime_lock, flags);
+ write_lock_irqsave(&xtime_lock, flags);
+ get_time_values_from_xen();
+ write_unlock_irqrestore(&xtime_lock, flags);
+ goto again;
}
-#endif
+ read_unlock_irqrestore(&xtime_lock, flags);
+
+ HANDLE_USEC_OVERFLOW(_tv);
+
+ *tv = _tv;
}
void do_settimeofday(struct timeval *tv)
{
-/* XXX RN: should do something special here for dom0 */
-#if 0
+#ifdef CONFIG_XENO_PRIV
+ struct timeval newtv;
+ dom0_op_t op;
+
+ if ( start_info.dom_id != 0 )
+ return;
+
write_lock_irq(&xtime_lock);
+
/*
- * This is revolting. We need to set "xtime" correctly. However, the
- * value in this location is the value at the most recent update of
- * wall time. Discover what correction gettimeofday() would have
- * made, and then undo it!
+ * Ensure we don't get blocked for a long time so that our time delta
+ * overflows. If that were to happen then our shadow time values would
+ * be stale, so we can retry with fresh ones.
*/
- tv->tv_usec -= do_gettimeoffset();
- tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ);
-
- while ( tv->tv_usec < 0 )
+ again:
+ tv->tv_usec -= get_time_delta_usecs();
+ if ( unlikely(!TIME_VALUES_UP_TO_DATE) )
{
- tv->tv_usec += 1000000;
- tv->tv_sec--;
+ get_time_values_from_xen();
+ goto again;
}
+
+ HANDLE_USEC_UNDERFLOW(*tv);
+
+ newtv = *tv;
+
+ tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ);
+ HANDLE_USEC_UNDERFLOW(*tv);
xtime = *tv;
time_adjust = 0; /* stop active adjtime() */
time_status |= STA_UNSYNC;
time_maxerror = NTP_PHASE_LIMIT;
time_esterror = NTP_PHASE_LIMIT;
+
+ last_rtc_update = last_xen_update = 0;
+
+ op.cmd = DOM0_SETTIME;
+ op.u.settime.secs = newtv.tv_sec;
+ op.u.settime.usecs = newtv.tv_usec;
+ op.u.settime.system_time = shadow_system_time;
+
write_unlock_irq(&xtime_lock);
+
+ HYPERVISOR_dom0_op(&op);
#endif
}
+asmlinkage long sys_stime(int *tptr)
+{
+ int value;
+ struct timeval tv;
-/*
- * Timer ISR.
- * Unlike normal Linux these don't come in at a fixed rate of HZ.
- * In here we wrok out how often it should have been called and then call
- * the architecture independent part (do_timer()) the appropriate number of
- * times. A bit of a nasty hack, to keep the "other" notion of wallclock time
- * happy.
- */
-static long long us_per_tick=1000000/HZ;
-static long long last_irq;
+ if ( !capable(CAP_SYS_TIME) )
+ return -EPERM;
+
+ if ( get_user(value, tptr) )
+ return -EFAULT;
+
+ tv.tv_sec = value;
+ tv.tv_usec = 0;
+
+ do_settimeofday(&tv);
+
+ return 0;
+}
+
+#define NS_PER_TICK (1000000000ULL/HZ)
static inline void do_timer_interrupt(int irq, void *dev_id,
struct pt_regs *regs)
{
- struct timeval tv;
- long long time, delta;
+ s64 delta;
- /*
- * The next bit really sucks:
- * Linux not only uses do_gettimeofday() to keep a notion of
- * wallclock time, but also maintains the xtime struct and jiffies.
- * (Even worse some userland code accesses this via the sys_time()
- * system call)
- * Unfortunately, xtime is maintain in the architecture independent
- * part of the timer ISR (./kernel/timer.c sic!). So, although we have
- * perfectly valid notion of wallclock time from the hypervisor we here
- * fake missed timer interrupts so that the arch independent part of
- * the Timer ISR updates jiffies for us *and* once the bh gets run
- * updates xtime accordingly. Yuck!
- */
+ get_time_values_from_xen();
- /* Work out the number of jiffy intervals passed and update them. */
- do_gettimeofday(&tv);
- time = (((long long)tv.tv_sec) * 1000000) + tv.tv_usec;
- delta = time - last_irq;
- if (delta <= 0) {
- printk ("Timer ISR: Time went backwards: %lld\n", delta);
+ if ( (delta = (s64)(shadow_system_time - processed_system_time)) < 0 )
+ {
+ printk("Timer ISR: Time went backwards: %lld\n", delta);
return;
}
- while (delta >= us_per_tick) {
+
+ while ( delta >= NS_PER_TICK )
+ {
do_timer(regs);
- delta -= us_per_tick;
- last_irq += us_per_tick;
+ delta -= NS_PER_TICK;
+ processed_system_time += NS_PER_TICK;
+ }
+
+ if ( (time_status & STA_UNSYNC) != 0 )
+ {
+ /* Adjust shadow timeval for jiffies that haven't updated xtime yet. */
+ shadow_tv.tv_usec -= (jiffies - wall_jiffies) * (1000000/HZ);
+ HANDLE_USEC_UNDERFLOW(shadow_tv);
+
+ /* Update our unsynchronised xtime appropriately. */
+ xtime = shadow_tv;
}
-#if 0
- if (!user_mode(regs))
- x86_do_profile(regs->eip);
+#ifdef CONFIG_XENO_PRIV
+ if ( (start_info.dom_id == 0) && ((time_status & STA_UNSYNC) == 0) )
+ {
+ /* Send synchronised time to Xen approximately every minute. */
+ if ( xtime.tv_sec > (last_xen_update + 60) )
+ {
+ dom0_op_t op;
+ struct timeval tv = xtime;
+
+ tv.tv_usec += (jiffies - wall_jiffies) * (1000000/HZ);
+ HANDLE_USEC_OVERFLOW(tv);
+
+ op.cmd = DOM0_SETTIME;
+ op.u.settime.secs = tv.tv_sec;
+ op.u.settime.usecs = tv.tv_usec;
+ op.u.settime.system_time = shadow_system_time;
+ HYPERVISOR_dom0_op(&op);
+
+ last_xen_update = xtime.tv_sec;
+ }
+
+ /*
+ * If we have an externally synchronized Linux clock, then update CMOS
+ * clock accordingly every ~11 minutes. Set_rtc_mmss() has to be called
+ * as close as possible to 500 ms before the new second starts.
+ */
+ if ( (xtime.tv_sec > (last_rtc_update + 660)) &&
+ (xtime.tv_usec >= (500000 - ((unsigned) tick) / 2)) &&
+ (xtime.tv_usec <= (500000 + ((unsigned) tick) / 2)) )
+ {
+ if ( set_rtc_mmss(xtime.tv_sec) == 0 )
+ last_rtc_update = xtime.tv_sec;
+ else
+ last_rtc_update = xtime.tv_sec - 600;
+ }
+ }
#endif
}
static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
write_lock(&xtime_lock);
- do_timer_interrupt(irq, NULL, regs);
+ while ( !TIME_VALUES_UP_TO_DATE )
+ do_timer_interrupt(irq, NULL, regs);
write_unlock(&xtime_lock);
}
@@ -293,7 +405,7 @@ static struct irqaction irq_timer = {
void __init time_init(void)
{
unsigned long long alarm;
- u64 __cpu_khz;
+ u64 __cpu_khz, cpu_freq, scale, scale2;
__cpu_khz = HYPERVISOR_shared_info->cpu_freq;
do_div(__cpu_khz, 1000);
@@ -301,23 +413,29 @@ void __init time_init(void)
printk("Xen reported: %lu.%03lu MHz processor.\n",
cpu_khz / 1000, cpu_khz % 1000);
- do_gettimeofday(&xtime);
- last_irq = (((long long)xtime.tv_sec) * 1000000) + xtime.tv_usec;
+ xtime.tv_sec = HYPERVISOR_shared_info->wc_sec;
+ xtime.tv_usec = HYPERVISOR_shared_info->wc_usec;
+ processed_system_time = shadow_system_time;
- setup_irq(TIMER_IRQ, &irq_timer);
+ rdtsc_bitshift = HYPERVISOR_shared_info->rdtsc_bitshift;
+ cpu_freq = HYPERVISOR_shared_info->cpu_freq;
- /*
- * Start ticker. Note that timing runs of wall clock, not virtual 'domain'
- * time. This means that clock sshould run at the correct rate. For things
- * like scheduling, it's not clear whether it matters which sort of time
- * we use. XXX RN: unimplemented.
- */
+ scale = 1000000LL << (32 + rdtsc_bitshift);
+ do_div(scale, (u32)cpu_freq);
+
+ if ( (cpu_freq >> 32) != 0 )
+ {
+ scale2 = 1000000LL << rdtsc_bitshift;
+ do_div(scale2, (u32)(cpu_freq>>32));
+ scale += scale2;
+ }
+
+ st_scale_f = scale & 0xffffffff;
+ st_scale_i = scale >> 32;
+
+ setup_irq(TIMER_IRQ, &irq_timer);
rdtscll(alarm);
-#if 0
- alarm += (1000/HZ)*HYPERVISOR_shared_info->ticks_per_ms;
- HYPERVISOR_shared_info->wall_timeout = alarm;
- HYPERVISOR_shared_info->domain_timeout = ~0ULL;
-#endif
+
clear_bit(_EVENT_TIMER, &HYPERVISOR_shared_info->events);
}
diff --git a/xenolinux-2.4.22-sparse/kernel/time.c b/xenolinux-2.4.22-sparse/kernel/time.c
new file mode 100644
index 0000000000..fe6ecde6d8
--- /dev/null
+++ b/xenolinux-2.4.22-sparse/kernel/time.c
@@ -0,0 +1,415 @@
+/*
+ * linux/kernel/time.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * This file contains the interface functions for the various
+ * time related system calls: time, stime, gettimeofday, settimeofday,
+ * adjtime
+ */
+/*
+ * Modification history kernel/time.c
+ *
+ * 1993-09-02 Philip Gladstone
+ * Created file with time related functions from sched.c and adjtimex()
+ * 1993-10-08 Torsten Duwe
+ * adjtime interface update and CMOS clock write code
+ * 1995-08-13 Torsten Duwe
+ * kernel PLL updated to 1994-12-13 specs (rfc-1589)
+ * 1999-01-16 Ulrich Windl
+ * Introduced error checking for many cases in adjtimex().
+ * Updated NTP code according to technical memorandum Jan '96
+ * "A Kernel Model for Precision Timekeeping" by Dave Mills
+ * Allow time_constant larger than MAXTC(6) for NTP v4 (MAXTC == 10)
+ * (Even though the technical memorandum forbids it)
+ */
+
+#include <linux/mm.h>
+#include <linux/timex.h>
+#include <linux/smp_lock.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * The timezone where the local system is located. Used as a default by some
+ * programs who obtain this value by using gettimeofday.
+ */
+struct timezone sys_tz;
+
+/* The xtime_lock is not only serializing the xtime read/writes but it's also
+ serializing all accesses to the global NTP variables now. */
+extern rwlock_t xtime_lock;
+
+#if !defined(__alpha__) && !defined(__ia64__)
+
+/*
+ * sys_time() can be implemented in user-level using
+ * sys_gettimeofday(). Is this for backwards compatibility? If so,
+ * why not move it into the appropriate arch directory (for those
+ * architectures that need it).
+ *
+ * XXX This function is NOT 64-bit clean!
+ */
+asmlinkage long sys_time(int * tloc)
+{
+ struct timeval now;
+ int i;
+
+ do_gettimeofday(&now);
+ i = now.tv_sec;
+ if (tloc) {
+ if (put_user(i,tloc))
+ i = -EFAULT;
+ }
+ return i;
+}
+
+#if !defined(CONFIG_XENO)
+
+/*
+ * sys_stime() can be implemented in user-level using
+ * sys_settimeofday(). Is this for backwards compatibility? If so,
+ * why not move it into the appropriate arch directory (for those
+ * architectures that need it).
+ */
+
+asmlinkage long sys_stime(int * tptr)
+{
+ int value;
+
+ if (!capable(CAP_SYS_TIME))
+ return -EPERM;
+ if (get_user(value, tptr))
+ return -EFAULT;
+ write_lock_irq(&xtime_lock);
+ vxtime_lock();
+ xtime.tv_sec = value;
+ xtime.tv_usec = 0;
+ vxtime_unlock();
+ time_adjust = 0; /* stop active adjtime() */
+ time_status |= STA_UNSYNC;
+ time_maxerror = NTP_PHASE_LIMIT;
+ time_esterror = NTP_PHASE_LIMIT;
+ write_unlock_irq(&xtime_lock);
+ return 0;
+}
+
+#endif
+
+#endif
+
+asmlinkage long sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ if (tv) {
+ struct timeval ktv;
+ do_gettimeofday(&ktv);
+ if (copy_to_user(tv, &ktv, sizeof(ktv)))
+ return -EFAULT;
+ }
+ if (tz) {
+ if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+/*
+ * Adjust the time obtained from the CMOS to be UTC time instead of
+ * local time.
+ *
+ * This is ugly, but preferable to the alternatives. Otherwise we
+ * would either need to write a program to do it in /etc/rc (and risk
+ * confusion if the program gets run more than once; it would also be
+ * hard to make the program warp the clock precisely n hours) or
+ * compile in the timezone information into the kernel. Bad, bad....
+ *
+ * - TYT, 1992-01-01
+ *
+ * The best thing to do is to keep the CMOS clock in universal time (UTC)
+ * as real UNIX machines always do it. This avoids all headaches about
+ * daylight saving times and warping kernel clocks.
+ */
+inline static void warp_clock(void)
+{
+ write_lock_irq(&xtime_lock);
+ vxtime_lock();
+ xtime.tv_sec += sys_tz.tz_minuteswest * 60;
+ vxtime_unlock();
+ write_unlock_irq(&xtime_lock);
+}
+
+/*
+ * In case for some reason the CMOS clock has not already been running
+ * in UTC, but in some local time: The first time we set the timezone,
+ * we will warp the clock so that it is ticking UTC time instead of
+ * local time. Presumably, if someone is setting the timezone then we
+ * are running in an environment where the programs understand about
+ * timezones. This should be done at boot time in the /etc/rc script,
+ * as soon as possible, so that the clock can be set right. Otherwise,
+ * various programs will get confused when the clock gets warped.
+ */
+
+int do_sys_settimeofday(struct timeval *tv, struct timezone *tz)
+{
+ static int firsttime = 1;
+
+ if (!capable(CAP_SYS_TIME))
+ return -EPERM;
+
+ if (tz) {
+ /* SMP safe, global irq locking makes it work. */
+ sys_tz = *tz;
+ if (firsttime) {
+ firsttime = 0;
+ if (!tv)
+ warp_clock();
+ }
+ }
+ if (tv)
+ {
+ /* SMP safe, again the code in arch/foo/time.c should
+ * globally block out interrupts when it runs.
+ */
+ do_settimeofday(tv);
+ }
+ return 0;
+}
+
+asmlinkage long sys_settimeofday(struct timeval *tv, struct timezone *tz)
+{
+ struct timeval new_tv;
+ struct timezone new_tz;
+
+ if (tv) {
+ if (copy_from_user(&new_tv, tv, sizeof(*tv)))
+ return -EFAULT;
+ }
+ if (tz) {
+ if (copy_from_user(&new_tz, tz, sizeof(*tz)))
+ return -EFAULT;
+ }
+
+ return do_sys_settimeofday(tv ? &new_tv : NULL, tz ? &new_tz : NULL);
+}
+
+long pps_offset; /* pps time offset (us) */
+long pps_jitter = MAXTIME; /* time dispersion (jitter) (us) */
+
+long pps_freq; /* frequency offset (scaled ppm) */
+long pps_stabil = MAXFREQ; /* frequency dispersion (scaled ppm) */
+
+long pps_valid = PPS_VALID; /* pps signal watchdog counter */
+
+int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */
+
+long pps_jitcnt; /* jitter limit exceeded */
+long pps_calcnt; /* calibration intervals */
+long pps_errcnt; /* calibration errors */
+long pps_stbcnt; /* stability limit exceeded */
+
+/* hook for a loadable hardpps kernel module */
+void (*hardpps_ptr)(struct timeval *);
+
+/* adjtimex mainly allows reading (and writing, if superuser) of
+ * kernel time-keeping variables. used by xntpd.
+ */
+int do_adjtimex(struct timex *txc)
+{
+ long ltemp, mtemp, save_adjust;
+ int result;
+
+ /* In order to modify anything, you gotta be super-user! */
+ if (txc->modes && !capable(CAP_SYS_TIME))
+ return -EPERM;
+
+ /* Now we validate the data before disabling interrupts */
+
+ if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
+ /* singleshot must not be used with any other mode bits */
+ if (txc->modes != ADJ_OFFSET_SINGLESHOT)
+ return -EINVAL;
+
+ if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET))
+ /* adjustment Offset limited to +- .512 seconds */
+ if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE )
+ return -EINVAL;
+
+ /* if the quartz is off by more than 10% something is VERY wrong ! */
+ if (txc->modes & ADJ_TICK)
+ if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ)
+ return -EINVAL;
+
+ write_lock_irq(&xtime_lock);
+ result = time_state; /* mostly `TIME_OK' */
+
+ /* Save for later - semantics of adjtime is to return old value */
+ save_adjust = time_adjust;
+
+#if 0 /* STA_CLOCKERR is never set yet */
+ time_status &= ~STA_CLOCKERR; /* reset STA_CLOCKERR */
+#endif
+ /* If there are input parameters, then process them */
+ if (txc->modes)
+ {
+ if (txc->modes & ADJ_STATUS) /* only set allowed bits */
+ time_status = (txc->status & ~STA_RONLY) |
+ (time_status & STA_RONLY);
+
+ if (txc->modes & ADJ_FREQUENCY) { /* p. 22 */
+ if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) {
+ result = -EINVAL;
+ goto leave;
+ }
+ time_freq = txc->freq - pps_freq;
+ }
+
+ if (txc->modes & ADJ_MAXERROR) {
+ if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) {
+ result = -EINVAL;
+ goto leave;
+ }
+ time_maxerror = txc->maxerror;
+ }
+
+ if (txc->modes & ADJ_ESTERROR) {
+ if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) {
+ result = -EINVAL;
+ goto leave;
+ }
+ time_esterror = txc->esterror;
+ }
+
+ if (txc->modes & ADJ_TIMECONST) { /* p. 24 */
+ if (txc->constant < 0) { /* NTP v4 uses values > 6 */
+ result = -EINVAL;
+ goto leave;
+ }
+ time_constant = txc->constant;
+ }
+
+ if (txc->modes & ADJ_OFFSET) { /* values checked earlier */
+ if (txc->modes == ADJ_OFFSET_SINGLESHOT) {
+ /* adjtime() is independent from ntp_adjtime() */
+ time_adjust = txc->offset;
+ }
+ else if ( time_status & (STA_PLL | STA_PPSTIME) ) {
+ ltemp = (time_status & (STA_PPSTIME | STA_PPSSIGNAL)) ==
+ (STA_PPSTIME | STA_PPSSIGNAL) ?
+ pps_offset : txc->offset;
+
+ /*
+ * Scale the phase adjustment and
+ * clamp to the operating range.
+ */
+ if (ltemp > MAXPHASE)
+ time_offset = MAXPHASE << SHIFT_UPDATE;
+ else if (ltemp < -MAXPHASE)
+ time_offset = -(MAXPHASE << SHIFT_UPDATE);
+ else
+ time_offset = ltemp << SHIFT_UPDATE;
+
+ /*
+ * Select whether the frequency is to be controlled
+ * and in which mode (PLL or FLL). Clamp to the operating
+ * range. Ugly multiply/divide should be replaced someday.
+ */
+
+ if (time_status & STA_FREQHOLD || time_reftime == 0)
+ time_reftime = xtime.tv_sec;
+ mtemp = xtime.tv_sec - time_reftime;
+ time_reftime = xtime.tv_sec;
+ if (time_status & STA_FLL) {
+ if (mtemp >= MINSEC) {
+ ltemp = (time_offset / mtemp) << (SHIFT_USEC -
+ SHIFT_UPDATE);
+ if (ltemp < 0)
+ time_freq -= -ltemp >> SHIFT_KH;
+ else
+ time_freq += ltemp >> SHIFT_KH;
+ } else /* calibration interval too short (p. 12) */
+ result = TIME_ERROR;
+ } else { /* PLL mode */
+ if (mtemp < MAXSEC) {
+ ltemp *= mtemp;
+ if (ltemp < 0)
+ time_freq -= -ltemp >> (time_constant +
+ time_constant +
+ SHIFT_KF - SHIFT_USEC);
+ else
+ time_freq += ltemp >> (time_constant +
+ time_constant +
+ SHIFT_KF - SHIFT_USEC);
+ } else /* calibration interval too long (p. 12) */
+ result = TIME_ERROR;
+ }
+ if (time_freq > time_tolerance)
+ time_freq = time_tolerance;
+ else if (time_freq < -time_tolerance)
+ time_freq = -time_tolerance;
+ } /* STA_PLL || STA_PPSTIME */
+ } /* txc->modes & ADJ_OFFSET */
+ if (txc->modes & ADJ_TICK) {
+ /* if the quartz is off by more than 10% something is
+ VERY wrong ! */
+ if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ) {
+ result = -EINVAL;
+ goto leave;
+ }
+ tick = txc->tick;
+ }
+ } /* txc->modes */
+leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
+ || ((time_status & (STA_PPSFREQ|STA_PPSTIME)) != 0
+ && (time_status & STA_PPSSIGNAL) == 0)
+ /* p. 24, (b) */
+ || ((time_status & (STA_PPSTIME|STA_PPSJITTER))
+ == (STA_PPSTIME|STA_PPSJITTER))
+ /* p. 24, (c) */
+ || ((time_status & STA_PPSFREQ) != 0
+ && (time_status & (STA_PPSWANDER|STA_PPSERROR)) != 0))
+ /* p. 24, (d) */
+ result = TIME_ERROR;
+
+ if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
+ txc->offset = save_adjust;
+ else {
+ if (time_offset < 0)
+ txc->offset = -(-time_offset >> SHIFT_UPDATE);
+ else
+ txc->offset = time_offset >> SHIFT_UPDATE;
+ }
+ txc->freq = time_freq + pps_freq;
+ txc->maxerror = time_maxerror;
+ txc->esterror = time_esterror;
+ txc->status = time_status;
+ txc->constant = time_constant;
+ txc->precision = time_precision;
+ txc->tolerance = time_tolerance;
+ txc->tick = tick;
+ txc->ppsfreq = pps_freq;
+ txc->jitter = pps_jitter >> PPS_AVG;
+ txc->shift = pps_shift;
+ txc->stabil = pps_stabil;
+ txc->jitcnt = pps_jitcnt;
+ txc->calcnt = pps_calcnt;
+ txc->errcnt = pps_errcnt;
+ txc->stbcnt = pps_stbcnt;
+ write_unlock_irq(&xtime_lock);
+ do_gettimeofday(&txc->time);
+ return(result);
+}
+
+asmlinkage long sys_adjtimex(struct timex *txc_p)
+{
+ struct timex txc; /* Local copy of parameter */
+ int ret;
+
+ /* Copy the user data space into the kernel copy
+ * structure. But bear in mind that the structures
+ * may change
+ */
+ if(copy_from_user(&txc, txc_p, sizeof(struct timex)))
+ return -EFAULT;
+ ret = do_adjtimex(&txc);
+ return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
+}