aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrn@wyvis.camb.intel-research.net <rn@wyvis.camb.intel-research.net>2003-01-30 19:35:24 +0000
committerrn@wyvis.camb.intel-research.net <rn@wyvis.camb.intel-research.net>2003-01-30 19:35:24 +0000
commit9062553a0dc188eaf5f0fa001dbe92e64a7d64e9 (patch)
tree8a21d6af1b57302389695fc71fa61a01813dda5e
parent22a857bde9b89547d682aa92bb226096119b0223 (diff)
downloadxen-9062553a0dc188eaf5f0fa001dbe92e64a7d64e9.tar.gz
xen-9062553a0dc188eaf5f0fa001dbe92e64a7d64e9.tar.bz2
xen-9062553a0dc188eaf5f0fa001dbe92e64a7d64e9.zip
bitkeeper revision 1.14.1.1 (3e397e7cPGmZK7y5LLOGYa43nTQMUw)
added time and accurate timer support partially there for better scheduler (most of the infrastructure should be there)
-rw-r--r--.rootkeys4
-rw-r--r--BitKeeper/etc/logging_ok1
-rw-r--r--xen-2.4.16/arch/i386/apic.c321
-rw-r--r--xen-2.4.16/arch/i386/entry.S26
-rw-r--r--xen-2.4.16/arch/i386/setup.c5
-rw-r--r--xen-2.4.16/arch/i386/time.c236
-rw-r--r--xen-2.4.16/common/ac_timer.c304
-rw-r--r--xen-2.4.16/common/domain.c205
-rw-r--r--xen-2.4.16/common/lib.c355
-rw-r--r--xen-2.4.16/common/schedule.c345
-rw-r--r--xen-2.4.16/common/timer.c1
-rw-r--r--xen-2.4.16/include/asm-i386/apic.h3
-rw-r--r--xen-2.4.16/include/asm-i386/time.h80
-rw-r--r--xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h57
-rw-r--r--xen-2.4.16/include/xeno/ac_timer.h65
-rw-r--r--xen-2.4.16/include/xeno/sched.h73
-rw-r--r--xen-2.4.16/include/xeno/time.h61
-rw-r--r--xen-2.4.16/include/xeno/timer.h9
-rw-r--r--xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c4
-rw-r--r--xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c2
-rw-r--r--xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c91
-rw-r--r--xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h5
22 files changed, 1756 insertions, 497 deletions
diff --git a/.rootkeys b/.rootkeys
index 64c5e5a0a9..fceb5ebeda 100644
--- a/.rootkeys
+++ b/.rootkeys
@@ -35,6 +35,7 @@
3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen-2.4.16/arch/i386/usercopy.c
3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen-2.4.16/arch/i386/xeno.lds
3ddb79bdff-gj-jFGKjOejeHLqL8Lg xen-2.4.16/common/Makefile
+3e397e66AyyD5fYraAySWuwi9uqSXg xen-2.4.16/common/ac_timer.c
3ddb79bddEYJbcURvqqcx99Yl2iAhQ xen-2.4.16/common/block.c
3ddb79bdrqnW93GR9gZk1OJe1qK-iQ xen-2.4.16/common/brlock.c
3ddb79bdLX_P6iB7ILiblRLWvebapg xen-2.4.16/common/dom0_ops.c
@@ -46,6 +47,7 @@
3ddb79bdN51qpRC-6bOH-v5hl_AK6A xen-2.4.16/common/network.c
3ddb79bdD4SLmmdMD7yLW5HcUWucXw xen-2.4.16/common/page_alloc.c
3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen-2.4.16/common/resource.c
+3e397e6619PgAfBbw2XFbXkewvUWgw xen-2.4.16/common/schedule.c
3ddb79bdB9RNMnkQnUyZ5C9hhMSQQw xen-2.4.16/common/slab.c
3ddb79bd0gVQYmL2zvuJnldvD0AGxQ xen-2.4.16/common/softirq.c
3ddb79bdQqFHtHRGEO2dsxGgo6eAhw xen-2.4.16/common/timer.c
@@ -157,6 +159,7 @@
3ddb79c3e9DCEoR-WzNxcOQDzLu7BQ xen-2.4.16/include/asm-i386/softirq.h
3ddb79c3NiyQE2vQnyGiaBnNjBO1rA xen-2.4.16/include/asm-i386/spinlock.h
3ddb79c3ezddh34MdelJpa5tNR00Dw xen-2.4.16/include/asm-i386/system.h
+3e397e66xPNc8eaSqC9pPbyAtRGzHA xen-2.4.16/include/asm-i386/time.h
3ddb79c4HugMq7IYGxcQKFBpKwKhzA xen-2.4.16/include/asm-i386/types.h
3ddb79c3M2n1ROZH6xk3HbyN4CPDqg xen-2.4.16/include/asm-i386/uaccess.h
3ddb79c3uPGcP_l_2xyGgBSWd5aC-Q xen-2.4.16/include/asm-i386/unaligned.h
@@ -167,6 +170,7 @@
3ddb79c4R4iVwqIIeychVQYmIH4FUg xen-2.4.16/include/scsi/scsi_ioctl.h
3ddb79c4yw_mfd4Uikn3v_IOPRpa1Q xen-2.4.16/include/scsi/scsicam.h
3ddb79c4HKPMLvDBP9LxzPi_szVxGA xen-2.4.16/include/scsi/sg.h
+3e397e66m2tO3s-J8Jnr7Ws_tGoPTg xen-2.4.16/include/xeno/ac_timer.h
3ddb79c0nTsjSpVK4ZVTI9WwN24xtQ xen-2.4.16/include/xeno/blk.h
3ddb79c0dVhTHLsv6CPTf4baKix4mA xen-2.4.16/include/xeno/blkdev.h
3ddb79c18ePBgitnOs7GiOCFilODVw xen-2.4.16/include/xeno/blkpg.h
diff --git a/BitKeeper/etc/logging_ok b/BitKeeper/etc/logging_ok
index bf45a4d7d0..af6fb70b2a 100644
--- a/BitKeeper/etc/logging_ok
+++ b/BitKeeper/etc/logging_ok
@@ -3,5 +3,6 @@ akw27@labyrinth.cl.cam.ac.uk
kaf24@labyrinth.cl.cam.ac.uk
kaf24@plym.cl.cam.ac.uk
kaf24@striker.cl.cam.ac.uk
+rn@wyvis.camb.intel-research.net
smh22@boulderdash.cl.cam.ac.uk
smh22@uridium.cl.cam.ac.uk
diff --git a/xen-2.4.16/arch/i386/apic.c b/xen-2.4.16/arch/i386/apic.c
index a09613bee4..d479a69322 100644
--- a/xen-2.4.16/arch/i386/apic.c
+++ b/xen-2.4.16/arch/i386/apic.c
@@ -1,3 +1,23 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: apic.c
+ * Author:
+ * Changes:
+ *
+ * Date: Nov 2002
+ *
+ * Environment: Xen Hypervisor
+ * Description: programmable APIC timer interface for accurate timers
+ * modified version of Linux' apic.c
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ */
+
/*
* Local APIC handling, local APIC timers
*
@@ -10,6 +30,7 @@
* for testing these extensively.
*/
+
#include <xeno/config.h>
#include <xeno/init.h>
#include <xeno/sched.h>
@@ -25,6 +46,17 @@
#include <asm/pgalloc.h>
#include <asm/hardirq.h>
+#include <xeno/ac_timer.h>
+
+
+#undef APIC_TIME_TRACE
+#ifdef APIC_TIME_TRACE
+#define TRC(_x) _x
+#else
+#define TRC(_x)
+#endif
+
+
/* Using APIC to generate smp_local_timer_interrupt? */
int using_apic_timer = 0;
@@ -39,7 +71,7 @@ int get_maxlvt(void)
return maxlvt;
}
-void clear_local_APIC(void)
+static void clear_local_APIC(void)
{
int maxlvt;
unsigned long v;
@@ -313,7 +345,6 @@ void __init setup_local_APIC (void)
*/
value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
if (!smp_processor_id()) {
-/* && (pic_mode || !value)) { */
value = APIC_DM_EXTINT;
printk("enabled ExtINT on CPU#%d\n", smp_processor_id());
} else {
@@ -340,11 +371,9 @@ void __init setup_local_APIC (void)
value = apic_read(APIC_ESR);
printk("ESR value before enabling vector: %08lx\n", value);
- value = ERROR_APIC_VECTOR; // enables sending errors
+ value = ERROR_APIC_VECTOR; /* enables sending errors */
apic_write_around(APIC_LVTERR, value);
- /*
- * spec says clear errors after enabling vector.
- */
+ /* spec says clear errors after enabling vector. */
if (maxlvt > 3)
apic_write(APIC_ESR, 0);
value = apic_read(APIC_ESR);
@@ -416,9 +445,7 @@ static int __init detect_init_APIC (void)
boot_cpu_physical_apicid = 0;
printk("Found and enabled local APIC!\n");
-
apic_pm_init1();
-
return 0;
no_apic:
@@ -467,14 +494,24 @@ void __init init_apic_mappings(void)
#endif
}
-/*
- * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts
- * per second. We assume that the caller has already set up the local
- * APIC.
- *
- * The APIC timer is not exactly sync with the external timer chip, it
- * closely follows bus clocks.
- */
+/*****************************************************************************
+ * APIC calibration
+ *
+ * The APIC is programmed in bus cycles.
+ * Timeout values should specified in real time units.
+ * The "cheapest" time source is the cyclecounter.
+ *
+ * Thus, we need a mappings from: bus cycles <- cycle counter <- system time
+ *
+ * The calibration is currently a bit shoddy since it requires the external
+ * timer chip to generate periodic timer interupts.
+ *****************************************************************************/
+
+/* used for system time scaling */
+static unsigned int bus_freq;
+static u32 bus_cycle; /* length of one bus cycle in pico-seconds */
+static u32 bus_scale; /* scaling factor convert ns to bus cycles */
+
/*
* The timer chip is already set up at HZ interrupts per second here,
@@ -485,17 +522,12 @@ static unsigned int __init get_8254_timer_count(void)
{
/*extern spinlock_t i8253_lock;*/
/*unsigned long flags;*/
-
unsigned int count;
-
/*spin_lock_irqsave(&i8253_lock, flags);*/
-
outb_p(0x00, 0x43);
count = inb_p(0x40);
count |= inb_p(0x40) << 8;
-
/*spin_unlock_irqrestore(&i8253_lock, flags);*/
-
return count;
}
@@ -503,112 +535,67 @@ void __init wait_8254_wraparound(void)
{
unsigned int curr_count, prev_count=~0;
int delta;
-
curr_count = get_8254_timer_count();
-
do {
prev_count = curr_count;
curr_count = get_8254_timer_count();
delta = curr_count-prev_count;
-
/*
* This limit for delta seems arbitrary, but it isn't, it's
* slightly above the level of error a buggy Mercury/Neptune
* chipset timer can cause.
*/
-
} while (delta < 300);
}
/*
* This function sets up the local APIC timer, with a timeout of
* 'clocks' APIC bus clock. During calibration we actually call
- * this function twice on the boot CPU, once with a bogus timeout
- * value, second time for real. The other (noncalibrating) CPUs
- * call this function only once, with the real, calibrated value.
+ * this function with a very large value and read the current time after
+ * a well defined period of time as expired.
+ *
+ * Calibration is only performed once, for CPU0!
*
* We do reads before writes even if unnecessary, to get around the
* P5 APIC double write bug.
*/
-
-#define APIC_DIVISOR 16
-
-void __setup_APIC_LVTT(unsigned int clocks)
+#define APIC_DIVISOR 1
+static void __setup_APIC_LVTT(unsigned int clocks)
{
unsigned int lvtt1_value, tmp_value;
-
- lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV) |
- APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
+ lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV)|LOCAL_TIMER_VECTOR;
apic_write_around(APIC_LVTT, lvtt1_value);
-
- /*
- * Divide PICLK by 16
- */
tmp_value = apic_read(APIC_TDCR);
- apic_write_around(APIC_TDCR, (tmp_value
- & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
- | APIC_TDR_DIV_16);
-
+ apic_write_around(APIC_TDCR, (tmp_value | APIC_TDR_DIV_1));
apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
}
+/*
+ * this is done for every CPU from setup_APIC_clocks() below.
+ * We setup each local APIC with a zero timeout value for now.
+ * Unlike Linux, we don't have to wait for slices etc.
+ */
void setup_APIC_timer(void * data)
{
- unsigned int clocks = (unsigned int) data, slice, t0, t1;
unsigned long flags;
- int delta;
-
__save_flags(flags);
__sti();
- /*
- * ok, Intel has some smart code in their APIC that knows
- * if a CPU was in 'hlt' lowpower mode, and this increases
- * its APIC arbitration priority. To avoid the external timer
- * IRQ APIC event being in synchron with the APIC clock we
- * introduce an interrupt skew to spread out timer events.
- *
- * The number of slices within a 'big' timeslice is smp_num_cpus+1
- */
-
- slice = clocks / (smp_num_cpus+1);
- printk("cpu: %d, clocks: %d, slice: %d\n",
- smp_processor_id(), clocks, slice);
-
- /*
- * Wait for IRQ0's slice:
- */
- wait_8254_wraparound();
-
- __setup_APIC_LVTT(clocks);
-
- t0 = apic_read(APIC_TMICT)*APIC_DIVISOR;
- /* Wait till TMCCT gets reloaded from TMICT... */
- do {
- t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR;
- delta = (int)(t0 - t1 - slice*(smp_processor_id()+1));
- } while (delta >= 0);
- /* Now wait for our slice for real. */
- do {
- t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR;
- delta = (int)(t0 - t1 - slice*(smp_processor_id()+1));
- } while (delta < 0);
-
- __setup_APIC_LVTT(clocks);
-
- printk("CPU%d<T0:%d,T1:%d,D:%d,S:%d,C:%d>\n",
- smp_processor_id(), t0, t1, delta, slice, clocks);
-
+ printk("cpu: %d: setup timer.", smp_processor_id());
+ __setup_APIC_LVTT(0);
+ printk("done\n");
__restore_flags(flags);
}
/*
* In this function we calibrate APIC bus clocks to the external timer.
*
- * We want to do the calibration only once since we
- * want to have local timer irqs syncron. CPUs connected
- * by the same APIC bus have the very same bus frequency.
- * And we want to have irqs off anyways, no accidental
- * APIC irq that way.
+ * As a result we have the Bys Speed and CPU speed in Hz.
+ *
+ * We want to do the calibration only once (for CPU0). CPUs connected by the
+ * same APIC bus have the very same bus frequency.
+ *
+ * This bit is a bit shoddy since we use the very same periodic timer interrupt
+ * we try to eliminate to calibrate the APIC.
*/
int __init calibrate_APIC_clock(void)
@@ -619,95 +606,152 @@ int __init calibrate_APIC_clock(void)
int i;
const int LOOPS = HZ/10;
- printk("calibrating APIC timer ...\n");
+ printk("calibrating APIC timer for CPU%d...\n", smp_processor_id());
- /*
- * Put whatever arbitrary (but long enough) timeout
+ /* Put whatever arbitrary (but long enough) timeout
* value into the APIC clock, we just want to get the
- * counter running for calibration.
- */
+ * counter running for calibration. */
__setup_APIC_LVTT(1000000000);
- /*
- * The timer chip counts down to zero. Let's wait
+ /* The timer chip counts down to zero. Let's wait
* for a wraparound to start exact measurement:
- * (the current tick might have been already half done)
- */
-
+ * (the current tick might have been already half done) */
wait_8254_wraparound();
- /*
- * We wrapped around just now. Let's start:
- */
+ /* We wrapped around just now. Let's start: */
rdtscll(t1);
tt1 = apic_read(APIC_TMCCT);
- /*
- * Let's wait LOOPS wraprounds:
- */
+ /* Let's wait LOOPS wraprounds: */
for (i = 0; i < LOOPS; i++)
wait_8254_wraparound();
tt2 = apic_read(APIC_TMCCT);
rdtscll(t2);
- /*
- * The APIC bus clock counter is 32 bits only, it
+ /* The APIC bus clock counter is 32 bits only, it
* might have overflown, but note that we use signed
* longs, thus no extra care needed.
- *
- * underflown to be exact, as the timer counts down ;)
- */
-
+ * underflown to be exact, as the timer counts down ;) */
result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
- printk("..... CPU clock speed is %ld.%04ld MHz.\n",
+ printk("..... CPU speed is %ld.%04ld MHz.\n",
((long)(t2-t1)/LOOPS)/(1000000/HZ),
((long)(t2-t1)/LOOPS)%(1000000/HZ));
- printk("..... host bus clock speed is %ld.%04ld MHz.\n",
+ printk("..... Bus speed is %ld.%04ld MHz.\n",
result/(1000000/HZ),
result%(1000000/HZ));
+ /* set up multipliers for accurate timer code */
+ bus_freq = result*HZ;
+ bus_cycle = (u32) (1000000000000LL/bus_freq); /* in pico seconds */
+ bus_scale = (1000*262144)/bus_cycle;
+
+ /* print results */
+ printk("..... bus_freq = %u Hz\n", bus_freq);
+ printk("..... bus_cycle = %u ps\n", bus_cycle);
+ printk("..... bus_scale = %u \n", bus_scale);
+ /* reset APIC to zero timeout value */
+ __setup_APIC_LVTT(0);
return result;
}
-static unsigned int calibration_result;
-
+/*
+ * initialise the APIC timers for all CPUs
+ * we start with the first and find out processor frequency and bus speed
+ */
void __init setup_APIC_clocks (void)
{
printk("Using local APIC timer interrupts.\n");
using_apic_timer = 1;
-
__cli();
-
- calibration_result = calibrate_APIC_clock();
- /*
- * Now set up the timer for real.
- */
- setup_APIC_timer((void *)calibration_result);
-
+ /* calibrate CPU0 for CPU speed and BUS speed */
+ bus_freq = calibrate_APIC_clock();
+ /* Now set up the timer for real. */
+ setup_APIC_timer((void *)bus_freq);
__sti();
-
/* and update all other cpus */
- smp_call_function(setup_APIC_timer, (void *)calibration_result, 1, 1);
+ smp_call_function(setup_APIC_timer, (void *)bus_freq, 1, 1);
}
#undef APIC_DIVISOR
+/*
+ * reprogram the APIC timer. Timeoutvalue is in ns from start of boot
+ * returns 1 on success
+ * returns 0 if the timeout value is too small or in the past.
+ */
+
+
+int reprogram_ac_timer(s_time_t timeout)
+{
+ int cpu = smp_processor_id();
+ s_time_t now;
+ s_time_t expire;
+ u64 apic_tmict;
+
+ now = NOW();
+ expire = timeout - now; /* value from now */
+
+
+ if (expire <= 0) {
+ printk("APICT[%02d] Timeout value in the past %lld > %lld\n",
+ cpu, now, timeout);
+ return 0; /* timeout value in the past */
+ }
+
+ /* conversion to bus units */
+ apic_tmict = (((u64)bus_scale) * expire)>>18;
+
+ if (apic_tmict >= 0xffffffff) {
+ printk("APICT[%02d] Timeout value too large\n", cpu);
+ apic_tmict = 0xffffffff;
+ }
+ if (apic_tmict == 0) {
+ printk("APICT[%02d] timeout value too small\n", cpu);
+ return 0;
+ }
+
+ /* programm timer */
+ apic_write(APIC_TMICT, (unsigned long)apic_tmict);
+
+ TRC(printk("APICT[%02d] reprog(): expire=%lld %u\n",
+ cpu, expire, apic_tmict));
+ return 1;
+}
/*
- * Local timer interrupt handler. It does both profiling and
- * process statistics/rescheduling.
- *
- * We do profiling in every local tick, statistics/rescheduling
- * happen only every 'profiling multiplier' ticks. The default
- * multiplier is 1 and it can be changed by writing the new multiplier
- * value into /proc/profile.
+ * Local timer interrupt handler.
+ * here the programmable, accurate timers are executed.
+ * If we are on CPU0 and we should have updated jiffies, we do this
+ * as well and and deal with traditional linux timers. Note, that of
+ * the timer APIC on CPU does not go off every 10ms or so the linux
+ * timers loose accuracy, but that shouldn't be a problem.
*/
+static s_time_t last_cpu0_tirq = 0;
inline void smp_local_timer_interrupt(struct pt_regs * regs)
{
- update_process_times(user_mode(regs));
+ int cpu = smp_processor_id();
+ s_time_t diff, now;
+
+ /* if CPU 0 do old timer stuff */
+ if (cpu == 0) {
+ update_time();
+ now = NOW();
+ diff = now - last_cpu0_tirq;
+ /* this uses three 64bit divisions which should be avoided!! */
+ if (diff >= MILLISECS(10)) {
+ /* update jiffies */
+ (*(unsigned long *)&jiffies) += diff / MILLISECS(10);
+
+ /* do traditional linux timers */
+ do_timer(regs);
+ last_cpu0_tirq = now;
+ }
+ }
+ /* call timer function */
+ do_ac_timer();
}
/*
@@ -732,13 +776,11 @@ void smp_apic_timer_interrupt(struct pt_regs * regs)
/*
* NOTE! We'd better ACK the irq immediately,
* because timer handling can be slow.
+ * XXX is this save?
*/
ack_APIC_irq();
- /*
- * update_process_times() expects us to have done irq_enter().
- * Besides, if we don't timer interrupts ignore the global
- * interrupt lock, which is the WrongThing (tm) to do.
- */
+
+ /* call the local handler */
irq_enter(cpu, 0);
smp_local_timer_interrupt(regs);
irq_exit(cpu, 0);
@@ -809,7 +851,8 @@ int __init APIC_init_uniprocessor (void)
/*
* Complain if the BIOS pretends there is one.
*/
- if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+ if (!cpu_has_apic&&APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]))
+ {
printk("BIOS bug, local APIC #%d not detected!...\n",
boot_cpu_physical_apicid);
return -1;
diff --git a/xen-2.4.16/arch/i386/entry.S b/xen-2.4.16/arch/i386/entry.S
index ea0a58f75f..3c336a21c9 100644
--- a/xen-2.4.16/arch/i386/entry.S
+++ b/xen-2.4.16/arch/i386/entry.S
@@ -513,19 +513,19 @@ ENTRY(spurious_interrupt_bug)
.data
ENTRY(hypervisor_call_table)
- .long SYMBOL_NAME(do_set_trap_table)
- .long SYMBOL_NAME(do_process_page_updates)
- .long SYMBOL_NAME(do_console_write)
- .long SYMBOL_NAME(sys_ni_syscall)
- .long SYMBOL_NAME(do_set_guest_stack)
- .long SYMBOL_NAME(do_net_update)
- .long SYMBOL_NAME(do_fpu_taskswitch)
- .long SYMBOL_NAME(do_yield)
- .long SYMBOL_NAME(kill_domain)
- .long SYMBOL_NAME(do_dom0_op)
- .long SYMBOL_NAME(do_network_op)
- .long SYMBOL_NAME(do_set_debugreg)
- .long SYMBOL_NAME(do_get_debugreg)
+ .long SYMBOL_NAME(do_set_trap_table) /* 0 */
+ .long SYMBOL_NAME(do_process_page_updates) /* 1 */
+ .long SYMBOL_NAME(do_console_write) /* 2 */
+ .long SYMBOL_NAME(sys_ni_syscall) /* 3 */
+ .long SYMBOL_NAME(do_set_guest_stack) /* 4 */
+ .long SYMBOL_NAME(do_net_update) /* 5 */
+ .long SYMBOL_NAME(do_fpu_taskswitch) /* 6 */
+ .long SYMBOL_NAME(do_sched_op) /* 7 */
+ .long SYMBOL_NAME(kill_domain) /* 10 */
+ .long SYMBOL_NAME(do_dom0_op) /* 11 */
+ .long SYMBOL_NAME(do_network_op) /* 12 */
+ .long SYMBOL_NAME(do_set_debugreg) /* 13 */
+ .long SYMBOL_NAME(do_get_debugreg) /* 14 */
.rept NR_syscalls-(.-hypervisor_call_table)/4
.long SYMBOL_NAME(sys_ni_syscall)
.endr
diff --git a/xen-2.4.16/arch/i386/setup.c b/xen-2.4.16/arch/i386/setup.c
index e23ca04ea4..b7e6a9d550 100644
--- a/xen-2.4.16/arch/i386/setup.c
+++ b/xen-2.4.16/arch/i386/setup.c
@@ -277,6 +277,7 @@ void __init start_of_day(void)
extern void tqueue_bh(void);
extern void immediate_bh(void);
extern void init_timervecs(void);
+ extern void ac_timer_init(void);
extern int setup_network_devices(void);
extern void net_init(void);
@@ -300,6 +301,7 @@ void __init start_of_day(void)
paging_init(); /* not much here now, but sets up fixmap */
if ( smp_found_config ) get_smp_config();
domain_init();
+ scheduler_init();
trap_init();
init_IRQ(); /* installs simple interrupt wrappers. Starts HZ clock. */
time_init(); /* installs software handler for HZ clock. */
@@ -320,6 +322,9 @@ void __init start_of_day(void)
* fall thru to 8259A if we have to (but slower).
*/
#endif
+ init_xeno_time(); /* initialise the time */
+ ac_timer_init(); /* init accurate timers */
+ schedulers_start(); /* start scheduler for each CPU */
sti();
diff --git a/xen-2.4.16/arch/i386/time.c b/xen-2.4.16/arch/i386/time.c
index 0b7d3ead4e..4b4ac515cb 100644
--- a/xen-2.4.16/arch/i386/time.c
+++ b/xen-2.4.16/arch/i386/time.c
@@ -1,3 +1,22 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: i386/time.c
+ * Author:
+ * Changes:
+ *
+ * Date: Jan 2003
+ *
+ * Environment: Xen Hypervisor
+ * Description: modified version of Linux' time.c
+ * implement system and wall clock time.
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ */
/*
* linux/arch/i386/kernel/time.c
*
@@ -19,12 +38,22 @@
#include <asm/mpspec.h>
#include <asm/processor.h>
#include <asm/fixmap.h>
+#include <asm/mc146818rtc.h>
+
+#ifdef TIME_TRACE
+#define TRC(_x) _x
+#else
+#define TRC(_x)
+#endif
+
+
unsigned long cpu_khz; /* Detected as we calibrate the TSC */
unsigned long ticks_per_usec; /* TSC ticks per microsecond. */
spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
+
/*
* timer_interrupt() needs to keep up the real-time clock,
* as well as call the "do_timer()" routine every clocktick
@@ -62,14 +91,15 @@ static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
do_timer_interrupt(irq, NULL, regs);
}
-static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL};
+static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0,
+ "timer", NULL, NULL};
/* ------ Calibrate the TSC -------
* Return processor ticks per second / CALIBRATE_FRAC.
*/
#define CLOCK_TICK_RATE 1193180 /* system crystal frequency (Hz) */
-#define CALIBRATE_FRAC 20 /* calibrate over 50ms */
+#define CALIBRATE_FRAC 20 /* calibrate over 50ms */
#define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC)
static unsigned long __init calibrate_tsc(void)
@@ -126,6 +156,207 @@ static unsigned long __init calibrate_tsc(void)
return 0;
}
+/***************************************************************************
+ * CMOS Timer functions
+ ***************************************************************************/
+
+/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
+ * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
+ * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
+ *
+ * [For the Julian calendar (which was used in Russia before 1917,
+ * Britain & colonies before 1752, anywhere else before 1582,
+ * and is still in use by some communities) leave out the
+ * -year/100+year/400 terms, and add 10.]
+ *
+ * This algorithm was first published by Gauss (I think).
+ *
+ * WARNING: this function will overflow on 2106-02-07 06:28:16 on
+ * machines were long is 32-bit! (However, as time_t is signed, we
+ * will already get problems at other places on 2038-01-19 03:14:08)
+ */
+static inline unsigned long
+mktime (unsigned int year, unsigned int mon,
+ unsigned int day, unsigned int hour,
+ unsigned int min, unsigned int sec)
+{
+ if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */
+ mon += 12; /* Puts Feb last since it has leap day */
+ year -= 1;
+ }
+ return ((((unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day)+
+ year*365 - 719499
+ )*24 + hour /* now have hours */
+ )*60 + min /* now have minutes */
+ )*60 + sec; /* finally seconds */
+}
+
+static unsigned long get_cmos_time(void)
+{
+ unsigned int year, mon, day, hour, min, sec;
+ int i;
+
+ spin_lock(&rtc_lock);
+ /* The Linux interpretation of the CMOS clock register contents:
+ * When the Update-In-Progress (UIP) flag goes from 1 to 0, the
+ * RTC registers show the second which has precisely just started.
+ * Let's hope other operating systems interpret the RTC the same way.
+ */
+ /* read RTC exactly on falling edge of update flag */
+ for (i = 0 ; i < 1000000 ; i++) /* may take up to 1 second... */
+ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)
+ break;
+ for (i = 0 ; i < 1000000 ; i++) /* must try at least 2.228 ms */
+ if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
+ break;
+ do { /* Isn't this overkill ? UIP above should guarantee consistency */
+ sec = CMOS_READ(RTC_SECONDS);
+ min = CMOS_READ(RTC_MINUTES);
+ hour = CMOS_READ(RTC_HOURS);
+ day = CMOS_READ(RTC_DAY_OF_MONTH);
+ mon = CMOS_READ(RTC_MONTH);
+ year = CMOS_READ(RTC_YEAR);
+ } while (sec != CMOS_READ(RTC_SECONDS));
+ if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+ {
+ BCD_TO_BIN(sec);
+ BCD_TO_BIN(min);
+ BCD_TO_BIN(hour);
+ BCD_TO_BIN(day);
+ BCD_TO_BIN(mon);
+ BCD_TO_BIN(year);
+ }
+ spin_unlock(&rtc_lock);
+ if ((year += 1900) < 1970)
+ year += 100;
+ printk(".... CMOS Clock: %02d/%02d/%04d %02d:%02d:%02d\n",
+ day, mon, year, hour, min, sec);
+ return mktime(year, mon, day, hour, min, sec);
+}
+
+/***************************************************************************
+ * System time
+ ***************************************************************************/
+u32 stime_pcc; /* cycle counter value at last timer irq */
+u32 stime_scale; /* scale factor for converting cc to ns */
+s_time_t stime_now; /* time in ns at last timer IRQ */
+
+/***************************************************************************
+ * Wall Clock time
+ ***************************************************************************/
+static rwlock_t wctime_lock = RW_LOCK_UNLOCKED;
+struct timeval wall_clock_time; /* wall clock time at last update */
+s_time_t wctime_st; /* system time at last update */
+
+void do_gettimeofday(struct timeval *tv)
+{
+ unsigned long flags;
+ unsigned long usec, sec;
+
+ read_lock_irqsave(&wctime_lock, flags);
+
+ usec = ((unsigned long)(NOW() - wctime_st))/1000;
+ sec = wall_clock_time.tv_sec;
+ usec += wall_clock_time.tv_usec;
+ read_unlock_irqrestore(&wctime_lock, flags);
+
+ while (usec >= 1000000) {
+ usec -= 1000000;
+ sec++;
+ }
+ tv->tv_sec = sec;
+ tv->tv_usec = usec;
+}
+
+void do_settimeofday(struct timeval *tv)
+{
+ printk("XXX: do_settimeofday not implemented\n");
+}
+
+/***************************************************************************
+ * Update times
+ ***************************************************************************/
+
+/* update hypervisors notion of time */
+void update_time(void) {
+ u32 new_pcc;
+ s_time_t new_st;
+ unsigned long usec;
+
+ /* update system time */
+ rdtscl(new_pcc);
+ stime_now = stime_now+((((s_time_t)stime_scale)*
+ (new_pcc-stime_pcc))>>10);
+ stime_pcc=new_pcc;
+
+ /* update wall clock time */
+ write_lock(&wctime_lock);
+ new_st = NOW();
+ usec = ((unsigned long)(new_st - wctime_st))/1000;
+ usec += wall_clock_time.tv_usec;
+ while (usec >= 1000000) {
+ usec -= 1000000;
+ wall_clock_time.tv_sec++;
+ }
+ wall_clock_time.tv_usec = usec;
+ wctime_st = new_st;
+ write_unlock(&wctime_lock);
+
+ TRC(printk("TIME[%02d] update time: stime_now=%lld now=%lld, wct=%ld:%ld\n"
+ cpu, stime_now, new_st, wall_clock_time.tv_sec,
+ wall_clock_time.tv_usec));
+}
+
+/* update a domains notion of time */
+void update_dom_time(shared_info_t *si)
+{
+ unsigned long flags;
+ read_lock_irqsave(&wctime_lock, flags);
+ si->system_time = stime_now;
+ si->st_timestamp = stime_pcc;
+ si->tv_sec = wall_clock_time.tv_sec;
+ si->tv_usec = wall_clock_time.tv_usec;
+ si->wc_timestamp = wctime_st;
+ read_unlock_irqrestore(&wctime_lock, flags);
+}
+
+/***************************************************************************
+ * Init Xeno Time
+ * This has to be done after all CPUs have been booted
+ ***************************************************************************/
+int __init init_xeno_time()
+{
+ int cpu = smp_processor_id();
+ u32 cpu_cycle; /* time of one cpu cyle in pico-seconds */
+
+ /* System Time */
+ cpu_cycle = (u32) (1000000000LL/cpu_khz); /* in pico seconds */
+ stime_scale = (cpu_cycle * 1024) / 1000;
+
+ stime_now = (s_time_t)0;
+ rdtscl(stime_pcc);
+
+ printk("Init Time[%02d]:\n", cpu);
+ printk(".... System Time: %lldns\n", NOW());
+ printk(".... stime_scale: %u\n", stime_scale);
+ printk(".... stime_pcc: %u\n", stime_pcc);
+
+ /* Wall Clock time */
+ wall_clock_time.tv_sec = get_cmos_time();
+ wall_clock_time.tv_usec = 0;
+ wctime_st = NOW();
+
+ printk(".... Wall Clock: %lds %ldus\n", wall_clock_time.tv_sec,
+ wall_clock_time.tv_usec);
+ printk(".... wctime_st: %lld\n", wctime_st);
+ return 0;
+}
+
+
+/***************************************************************************
+ * Init
+ ***************************************************************************/
+
void __init time_init(void)
{
unsigned long ticks_per_frac = calibrate_tsc();
@@ -136,6 +367,7 @@ void __init time_init(void)
ticks_per_usec = ticks_per_frac / (1000000/CALIBRATE_FRAC);
cpu_khz = ticks_per_frac / (1000/CALIBRATE_FRAC);
+
printk("Detected %lu.%03lu MHz processor.\n",
cpu_khz / 1000, cpu_khz % 1000);
diff --git a/xen-2.4.16/common/ac_timer.c b/xen-2.4.16/common/ac_timer.c
new file mode 100644
index 0000000000..dec3f34646
--- /dev/null
+++ b/xen-2.4.16/common/ac_timer.c
@@ -0,0 +1,304 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: ac_timer.c
+ * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ * Changes:
+ *
+ * Date: Nov 2002
+ *
+ * Environment: Xen Hypervisor
+ * Description: Accurate timer for the Hypervisor
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/lib.h>
+#include <xeno/config.h>
+#include <xeno/smp.h>
+#include <xeno/init.h>
+
+#include <xeno/time.h>
+#include <xeno/ac_timer.h>
+
+#include <asm/system.h>
+#include <asm/desc.h>
+
+
+#undef AC_TIMER_TRACE
+#undef AC_TIMER_STATS
+
+#ifdef AC_TIMER_TRACE
+#define TRC(_x) _x
+#else
+#define TRC(_x)
+#endif
+
+/* A timer list per CPU */
+typedef struct ac_timers_st
+{
+ spinlock_t lock;
+ struct list_head timers;
+ struct ac_timer *prev, *curr;
+} __cacheline_aligned ac_timers_t;
+static ac_timers_t ac_timers[NR_CPUS];
+
+#ifdef AC_TIMER_STATS
+#define BUCKETS 1000
+#define MAX_STATS
+typedef struct act_stats_st
+{
+ u32 count;
+ u32 times[2*(BUCKETS)];
+} __cacheline_aligned act_stats_t;
+static act_stats_t act_stats[NR_CPUS];
+
+#endif
+
+/* local prototypes */
+static int detach_ac_timer(struct ac_timer *timer);
+static void ac_timer_debug(unsigned long);
+
+/*
+ * add a timer.
+ * return value:
+ * 0: success
+ * 1: failure, timer in the past or timeout value to small
+ * -1: failure, timer uninitialised
+ * fail
+ */
+int add_ac_timer(struct ac_timer *timer)
+{
+ int cpu = smp_processor_id();
+ unsigned long flags;
+ struct list_head *tmp, *prev;
+ struct ac_timer *t;
+ s_time_t now;
+
+ /* sanity checks */
+
+ /* make sure timeout value is in the future */
+ now = NOW();
+ TRC(printk("ACT [%02d] add(): now=%lld timo=%lld\n",
+ cpu, now, timer->expires));
+ if (timer->expires <= now) {
+ printk("ACT[%02d] add_ac_timer(): timeout value in the past\n", cpu);
+ return 1;
+ }
+
+ local_irq_save(flags);
+
+ /* check if timer would be inserted at start of list */
+ if ((list_empty(&ac_timers[cpu].timers)) ||
+ (timer->expires <
+ (list_entry(&ac_timers[cpu].timers, struct ac_timer, timer_list))->expires)) {
+
+ TRC(printk("ACT [%02d] add(): add at head\n", cpu));
+ /* Reprogramm and add to head of list */
+ if (!reprogram_ac_timer(timer->expires)) {
+ /* failed */
+ TRC(printk("ACT [%02d] add(): add at head failed\n", cpu));
+ local_irq_restore(flags);
+ return 1;
+ }
+ list_add(&timer->timer_list, &ac_timers[cpu].timers);
+
+ } else {
+ /* find correct entry and add timer */
+ prev = &ac_timers[cpu].timers;
+ list_for_each(tmp, &ac_timers[cpu].timers) {
+ t = list_entry(tmp, struct ac_timer, timer_list);
+ if (t->expires < timer->expires) {
+ list_add(&timer->timer_list, prev);
+ TRC(printk("ACT [%02d] add(): added between %lld and %lld\n",
+ cpu,
+ list_entry(prev,struct ac_timer,timer_list)->expires,
+ list_entry(tmp,struct ac_timer,timer_list)->expires));
+ break;
+ }
+ prev = tmp;
+ }
+ }
+ local_irq_restore(flags);
+ return 0;
+}
+
+/*
+ * remove a timer
+ * return values:
+ * 0: success
+ * -1: bogus timer
+ */
+static int detach_ac_timer(struct ac_timer *timer)
+{
+ TRC(int cpu = smp_processor_id());
+ TRC(printk("ACT [%02d] detach(): \n", cpu));
+ list_del(&timer->timer_list);
+ timer->timer_list.next = NULL;
+ return 0;
+}
+
+/*
+ * remove a timer
+ * return values:
+ * 0: success
+ * -1: bogus timer
+ */
+int rem_ac_timer(struct ac_timer *timer)
+{
+ int res;
+ unsigned long flags;
+ TRC(int cpu = smp_processor_id());
+
+ TRC(printk("ACT [%02d] remove(): timo=%lld \n", cpu, timer->expires));
+ /* sanity checks */
+
+ local_irq_save(flags);
+ res = detach_ac_timer(timer);
+ local_irq_restore(flags);
+ return res;
+}
+
+/*
+ * modify a timer, i.e., set a new timeout value
+ * return value:
+ * 0: sucess
+ * -1: error
+ */
+int mod_ac_timer(struct ac_timer *timer, s_time_t new_time)
+{
+ if (rem_ac_timer(timer) != 0)
+ return -1;
+ timer->expires = new_time;
+ if (add_ac_timer(timer) != 0)
+ return -1;
+ return 0;
+}
+
+/*
+ * do_ac_timer
+ * deal with timeouts and run the handlers
+ */
+void do_ac_timer(void)
+{
+ int cpu = smp_processor_id();
+ unsigned long flags;
+ s_time_t now;
+ struct ac_timer *t;
+ struct list_head *tmp;
+
+ local_irq_save(flags);
+
+ do_timer_again:
+
+ now = NOW();
+ TRC(printk("ACT [%02d] do(): now=%lld\n", cpu, now));
+
+ /* Sanity checks */
+ /* empty time list */
+ if (list_empty(&ac_timers[cpu].timers)) {
+ printk("ACT[%02d] do_ac_timer(): timer irq without timer\n", cpu);
+ local_irq_restore(flags);
+ return;
+ }
+
+
+ /* execute the head of timer queue */
+ t = list_entry(ac_timers[cpu].timers.next, struct ac_timer, timer_list);
+ detach_ac_timer(t);
+
+
+#ifdef AC_TIMER_STATS
+ {
+ s32 diff;
+ u32 i;
+ diff = ((s32)(now - t->expires)) / 1000; /* delta in us */
+ if (diff < -BUCKETS)
+ diff = -BUCKETS;
+ else if (diff > BUCKETS)
+ diff = BUCKETS;
+ act_stats[cpu].times[diff+BUCKETS]++;
+ act_stats[cpu].count++;
+
+ if (act_stats[cpu].count >= 5000) {
+ printk("ACT Stats\n");
+ for (i=0; i < 2*BUCKETS; i++) {
+ if (act_stats[cpu].times[i] != 0)
+ printk("ACT [%02d]: %3dus: %5d\n",
+ cpu,i-BUCKETS, act_stats[cpu].times[i]);
+ act_stats[cpu].times[i]=0;
+ }
+ act_stats[cpu].count = 0;
+ printk("\n");
+ }
+ }
+#endif
+
+
+
+ if (t->expires > now) {
+ //printk("ACT [%02d] do(): irq too early (%lld ns)\n",
+ // cpu, now - t->expires );
+ }
+ if (t->function != NULL)
+ t->function(t->data);
+
+
+ /* check if there are other timer functions on the list */
+ now = NOW();
+ if (!list_empty(&ac_timers[cpu].timers)) {
+ list_for_each(tmp, &ac_timers[cpu].timers) {
+ t = list_entry(tmp, struct ac_timer, timer_list);
+ TRC(printk("ACT [%02d] do(): now=%lld timo=%lld\n",
+ cpu, now, t->expires));
+ if (t->expires <= now) {
+ detach_ac_timer(t);
+ if (t->function != NULL)
+ t->function(t->data);
+ now = NOW();
+ } else {
+ TRC(printk("ACT [%02d] do(): break1\n", cpu));
+ break;
+ }
+ }
+ }
+
+ /* If list not empty reprogramm timer to new head of list */
+ if (!list_empty(&ac_timers[cpu].timers)) {
+ t = list_entry(ac_timers[cpu].timers.next,struct ac_timer,timer_list);
+ if (t->expires > 0) {
+ TRC(printk("ACT [%02d] do(): reprog timo=%lld\n",cpu,t->expires));
+ if (!reprogram_ac_timer(t->expires)) {
+ TRC(printk("ACT [%02d] do(): again\n", cpu));
+ goto do_timer_again;
+ }
+ }
+ }
+ local_irq_restore(flags);
+}
+
+/*
+ * init
+ */
+void __init ac_timer_init(void)
+{
+ int i;
+
+ printk ("ACT: Initialising Accurate timers\n");
+
+ for (i = 0; i < NR_CPUS; i++)
+ {
+ INIT_LIST_HEAD(&ac_timers[i].timers);
+ spin_lock_init(&ac_timers[i].lock);
+ }
+ /* ac_timer_debug(0); */
+}
diff --git a/xen-2.4.16/common/domain.c b/xen-2.4.16/common/domain.c
index 90c1a0a779..7e95845ad1 100644
--- a/xen-2.4.16/common/domain.c
+++ b/xen-2.4.16/common/domain.c
@@ -14,24 +14,9 @@
rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;
-schedule_data_t schedule_data[NR_CPUS];
-
-int wake_up(struct task_struct *p)
-{
- unsigned long flags;
- int ret = 0;
- spin_lock_irqsave(&schedule_data[p->processor].lock, flags);
- if ( __task_on_runqueue(p) ) goto out;
- p->state = TASK_RUNNING;
- __add_to_runqueue(p);
- ret = 1;
-
- out:
- spin_unlock_irqrestore(&schedule_data[p->processor].lock, flags);
- return ret;
-}
-
-
+/*
+ * create a new domain
+ */
struct task_struct *do_newdomain(void)
{
int retval;
@@ -66,97 +51,6 @@ struct task_struct *do_newdomain(void)
return(p);
}
-
-void reschedule(struct task_struct *p)
-{
- int cpu = p->processor;
- struct task_struct *curr;
- unsigned long flags;
-
- if ( p->has_cpu ) return;
-
- spin_lock_irqsave(&schedule_data[cpu].lock, flags);
- curr = schedule_data[cpu].curr;
- if ( is_idle_task(curr) )
- {
- set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
- spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
-#ifdef CONFIG_SMP
- if ( cpu != smp_processor_id() ) smp_send_event_check_cpu(cpu);
-#endif
- }
- else
- {
- spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
- }
-}
-
-
-static void process_timeout(unsigned long __data)
-{
- struct task_struct * p = (struct task_struct *) __data;
- wake_up(p);
-}
-
-long schedule_timeout(long timeout)
-{
- struct timer_list timer;
- unsigned long expire;
-
- switch (timeout)
- {
- case MAX_SCHEDULE_TIMEOUT:
- /*
- * These two special cases are useful to be comfortable in the caller.
- * Nothing more. We could take MAX_SCHEDULE_TIMEOUT from one of the
- * negative value but I' d like to return a valid offset (>=0) to allow
- * the caller to do everything it want with the retval.
- */
- schedule();
- goto out;
- default:
- /*
- * Another bit of PARANOID. Note that the retval will be 0 since no
- * piece of kernel is supposed to do a check for a negative retval of
- * schedule_timeout() (since it should never happens anyway). You just
- * have the printk() that will tell you if something is gone wrong and
- * where.
- */
- if (timeout < 0)
- {
- printk(KERN_ERR "schedule_timeout: wrong timeout "
- "value %lx from %p\n", timeout,
- __builtin_return_address(0));
- current->state = TASK_RUNNING;
- goto out;
- }
- }
-
- expire = timeout + jiffies;
-
- init_timer(&timer);
- timer.expires = expire;
- timer.data = (unsigned long) current;
- timer.function = process_timeout;
-
- add_timer(&timer);
- schedule();
- del_timer_sync(&timer);
-
- timeout = expire - jiffies;
-
- out:
- return timeout < 0 ? 0 : timeout;
-}
-
-
-long do_yield(void)
-{
- current->state = TASK_INTERRUPTIBLE;
- schedule();
- return 0;
-}
-
/* Get a pointer to the specified domain. Consider replacing this
* with a hash lookup later.
*
@@ -231,7 +125,7 @@ long kill_other_domain(unsigned int dom)
/* Release resources belonging to task @p. */
void release_task(struct task_struct *p)
{
- ASSERT(!__task_on_runqueue(p));
+ //ASSERT(!__task_on_runqueue(p));
ASSERT(p->state == TASK_DYING);
ASSERT(!p->has_cpu);
write_lock_irq(&tasklist_lock);
@@ -252,77 +146,6 @@ void release_task(struct task_struct *p)
}
-asmlinkage void schedule(void)
-{
- struct task_struct *prev, *next;
- struct list_head *tmp;
- int this_cpu;
-
- need_resched_back:
- prev = current;
- this_cpu = prev->processor;
-
- spin_lock_irq(&schedule_data[this_cpu].lock);
-
- ASSERT(!in_interrupt());
- ASSERT(__task_on_runqueue(prev));
-
- if ( !prev->counter )
- {
- prev->counter = 2;
- __move_last_runqueue(prev);
- }
-
- switch ( prev->state )
- {
- case TASK_INTERRUPTIBLE:
- if ( signal_pending(prev) )
- {
- prev->state = TASK_RUNNING;
- break;
- }
- default:
- __del_from_runqueue(prev);
- case TASK_RUNNING:;
- }
- clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events);
-
- /* Round-robin, skipping idle where possible. */
- next = NULL;
- list_for_each(tmp, &schedule_data[smp_processor_id()].runqueue) {
- next = list_entry(tmp, struct task_struct, run_list);
- if ( next->domain != IDLE_DOMAIN_ID ) break;
- }
-
- prev->has_cpu = 0;
- next->has_cpu = 1;
-
- schedule_data[this_cpu].prev = prev;
- schedule_data[this_cpu].curr = next;
-
- spin_unlock_irq(&schedule_data[this_cpu].lock);
-
- if ( unlikely(prev == next) )
- {
- /* We won't go through the normal tail, so do this by hand */
- prev->policy &= ~SCHED_YIELD;
- goto same_process;
- }
-
- prepare_to_switch();
- switch_to(prev, next);
- prev = schedule_data[this_cpu].prev;
-
- prev->policy &= ~SCHED_YIELD;
- if ( prev->state == TASK_DYING ) release_task(prev);
-
- same_process:
- if ( test_bit(_HYP_EVENT_NEED_RESCHED, &current->hyp_events) )
- goto need_resched_back;
- return;
-}
-
-
static unsigned int alloc_new_dom_mem(struct task_struct *p, unsigned int kbytes)
{
struct list_head *temp;
@@ -404,7 +227,6 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params)
unsigned long ft_mapping = (unsigned long)frame_table;
unsigned int ft_size = 0;
start_info_t *virt_startinfo_address;
- unsigned long long time;
l2_pgentry_t *l2tab;
l1_pgentry_t *l1tab = NULL;
struct pfn_info *page = NULL;
@@ -543,11 +365,12 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params)
l1tab = map_domain_mem(phys_l1tab);
*l1tab = mk_l1_pgentry(__pa(p->shared_info)|L1_PROT);
- /* Set up shared info area. */
- rdtscll(time);
- p->shared_info->wall_time = time;
- p->shared_info->domain_time = time;
+ /*
+ * Set up time
+ */
+ update_dom_time(p->shared_info);
p->shared_info->ticks_per_ms = ticks_per_usec * 1000;
+ p->shared_info->domain_time = 0;
/* for DOM0, setup mapping of frame table */
if ( dom == 0 )
@@ -666,14 +489,8 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params)
void __init domain_init(void)
{
- int i;
- for ( i = 0; i < NR_CPUS; i++ )
- {
- INIT_LIST_HEAD(&schedule_data[i].runqueue);
- spin_lock_init(&schedule_data[i].lock);
- schedule_data[i].prev = &idle0_task;
- schedule_data[i].curr = &idle0_task;
- }
+ printk("Initialising domains\n");
+// scheduler_init();
}
diff --git a/xen-2.4.16/common/lib.c b/xen-2.4.16/common/lib.c
index 2a55b992e8..ae75196ffd 100644
--- a/xen-2.4.16/common/lib.c
+++ b/xen-2.4.16/common/lib.c
@@ -169,3 +169,358 @@ unsigned char *quad_to_str(unsigned long q, unsigned char *s)
}
+/* a couple of 64 bit operations ported from freebsd */
+
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $
+ */
+
+#include <asm/types.h>
+
+
+/*
+ * Depending on the desired operation, we view a `long long' (aka quad_t) in
+ * one or more of the following formats.
+ */
+union uu {
+ s64 q; /* as a (signed) quad */
+ s64 uq; /* as an unsigned quad */
+ long sl[2]; /* as two signed longs */
+ unsigned long ul[2]; /* as two unsigned longs */
+};
+/* XXX RN: Yuck hardcoded endianess :) */
+#define _QUAD_HIGHWORD 1
+#define _QUAD_LOWWORD 0
+/*
+ * Define high and low longwords.
+ */
+#define H _QUAD_HIGHWORD
+#define L _QUAD_LOWWORD
+
+/*
+ * Total number of bits in a quad_t and in the pieces that make it up.
+ * These are used for shifting, and also below for halfword extraction
+ * and assembly.
+ */
+#define CHAR_BIT 8 /* number of bits in a char */
+#define QUAD_BITS (sizeof(s64) * CHAR_BIT)
+#define LONG_BITS (sizeof(long) * CHAR_BIT)
+#define HALF_BITS (sizeof(long) * CHAR_BIT / 2)
+
+/*
+ * Extract high and low shortwords from longword, and move low shortword of
+ * longword to upper half of long, i.e., produce the upper longword of
+ * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be u_long.)
+ *
+ * These are used in the multiply code, to split a longword into upper
+ * and lower halves, and to reassemble a product as a quad_t, shifted left
+ * (sizeof(long)*CHAR_BIT/2).
+ */
+#define HHALF(x) ((x) >> HALF_BITS)
+#define LHALF(x) ((x) & ((1 << HALF_BITS) - 1))
+#define LHUP(x) ((x) << HALF_BITS)
+
+/*
+ * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed),
+ * section 4.3.1, pp. 257--259.
+ */
+#define B (1 << HALF_BITS) /* digit base */
+
+/* Combine two `digits' to make a single two-digit number. */
+#define COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b))
+
+/* select a type for digits in base B: use unsigned short if they fit */
+#if ULONG_MAX == 0xffffffff && USHRT_MAX >= 0xffff
+typedef unsigned short digit;
+#else
+typedef u_long digit;
+#endif
+
+/*
+ * Shift p[0]..p[len] left `sh' bits, ignoring any bits that
+ * `fall out' the left (there never will be any such anyway).
+ * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS.
+ */
+static void
+shl(register digit *p, register int len, register int sh)
+{
+ register int i;
+
+ for (i = 0; i < len; i++)
+ p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh));
+ p[i] = LHALF(p[i] << sh);
+}
+
+/*
+ * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v.
+ *
+ * We do this in base 2-sup-HALF_BITS, so that all intermediate products
+ * fit within u_long. As a consequence, the maximum length dividend and
+ * divisor are 4 `digits' in this base (they are shorter if they have
+ * leading zeros).
+ */
+u64
+__qdivrem(uq, vq, arq)
+ u64 uq, vq, *arq;
+{
+ union uu tmp;
+ digit *u, *v, *q;
+ register digit v1, v2;
+ u_long qhat, rhat, t;
+ int m, n, d, j, i;
+ digit uspace[5], vspace[5], qspace[5];
+
+ /*
+ * Take care of special cases: divide by zero, and u < v.
+ */
+ if (vq == 0) {
+ /* divide by zero. */
+ static volatile const unsigned int zero = 0;
+
+ tmp.ul[H] = tmp.ul[L] = 1 / zero;
+ if (arq)
+ *arq = uq;
+ return (tmp.q);
+ }
+ if (uq < vq) {
+ if (arq)
+ *arq = uq;
+ return (0);
+ }
+ u = &uspace[0];
+ v = &vspace[0];
+ q = &qspace[0];
+
+ /*
+ * Break dividend and divisor into digits in base B, then
+ * count leading zeros to determine m and n. When done, we
+ * will have:
+ * u = (u[1]u[2]...u[m+n]) sub B
+ * v = (v[1]v[2]...v[n]) sub B
+ * v[1] != 0
+ * 1 < n <= 4 (if n = 1, we use a different division algorithm)
+ * m >= 0 (otherwise u < v, which we already checked)
+ * m + n = 4
+ * and thus
+ * m = 4 - n <= 2
+ */
+ tmp.uq = uq;
+ u[0] = 0;
+ u[1] = HHALF(tmp.ul[H]);
+ u[2] = LHALF(tmp.ul[H]);
+ u[3] = HHALF(tmp.ul[L]);
+ u[4] = LHALF(tmp.ul[L]);
+ tmp.uq = vq;
+ v[1] = HHALF(tmp.ul[H]);
+ v[2] = LHALF(tmp.ul[H]);
+ v[3] = HHALF(tmp.ul[L]);
+ v[4] = LHALF(tmp.ul[L]);
+ for (n = 4; v[1] == 0; v++) {
+ if (--n == 1) {
+ u_long rbj; /* r*B+u[j] (not root boy jim) */
+ digit q1, q2, q3, q4;
+
+ /*
+ * Change of plan, per exercise 16.
+ * r = 0;
+ * for j = 1..4:
+ * q[j] = floor((r*B + u[j]) / v),
+ * r = (r*B + u[j]) % v;
+ * We unroll this completely here.
+ */
+ t = v[2]; /* nonzero, by definition */
+ q1 = u[1] / t;
+ rbj = COMBINE(u[1] % t, u[2]);
+ q2 = rbj / t;
+ rbj = COMBINE(rbj % t, u[3]);
+ q3 = rbj / t;
+ rbj = COMBINE(rbj % t, u[4]);
+ q4 = rbj / t;
+ if (arq)
+ *arq = rbj % t;
+ tmp.ul[H] = COMBINE(q1, q2);
+ tmp.ul[L] = COMBINE(q3, q4);
+ return (tmp.q);
+ }
+ }
+
+ /*
+ * By adjusting q once we determine m, we can guarantee that
+ * there is a complete four-digit quotient at &qspace[1] when
+ * we finally stop.
+ */
+ for (m = 4 - n; u[1] == 0; u++)
+ m--;
+ for (i = 4 - m; --i >= 0;)
+ q[i] = 0;
+ q += 4 - m;
+
+ /*
+ * Here we run Program D, translated from MIX to C and acquiring
+ * a few minor changes.
+ *
+ * D1: choose multiplier 1 << d to ensure v[1] >= B/2.
+ */
+ d = 0;
+ for (t = v[1]; t < B / 2; t <<= 1)
+ d++;
+ if (d > 0) {
+ shl(&u[0], m + n, d); /* u <<= d */
+ shl(&v[1], n - 1, d); /* v <<= d */
+ }
+ /*
+ * D2: j = 0.
+ */
+ j = 0;
+ v1 = v[1]; /* for D3 -- note that v[1..n] are constant */
+ v2 = v[2]; /* for D3 */
+ do {
+ register digit uj0, uj1, uj2;
+
+ /*
+ * D3: Calculate qhat (\^q, in TeX notation).
+ * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and
+ * let rhat = (u[j]*B + u[j+1]) mod v[1].
+ * While rhat < B and v[2]*qhat > rhat*B+u[j+2],
+ * decrement qhat and increase rhat correspondingly.
+ * Note that if rhat >= B, v[2]*qhat < rhat*B.
+ */
+ uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */
+ uj1 = u[j + 1]; /* for D3 only */
+ uj2 = u[j + 2]; /* for D3 only */
+ if (uj0 == v1) {
+ qhat = B;
+ rhat = uj1;
+ goto qhat_too_big;
+ } else {
+ u_long nn = COMBINE(uj0, uj1);
+ qhat = nn / v1;
+ rhat = nn % v1;
+ }
+ while (v2 * qhat > COMBINE(rhat, uj2)) {
+ qhat_too_big:
+ qhat--;
+ if ((rhat += v1) >= B)
+ break;
+ }
+ /*
+ * D4: Multiply and subtract.
+ * The variable `t' holds any borrows across the loop.
+ * We split this up so that we do not require v[0] = 0,
+ * and to eliminate a final special case.
+ */
+ for (t = 0, i = n; i > 0; i--) {
+ t = u[i + j] - v[i] * qhat - t;
+ u[i + j] = LHALF(t);
+ t = (B - HHALF(t)) & (B - 1);
+ }
+ t = u[j] - t;
+ u[j] = LHALF(t);
+ /*
+ * D5: test remainder.
+ * There is a borrow if and only if HHALF(t) is nonzero;
+ * in that (rare) case, qhat was too large (by exactly 1).
+ * Fix it by adding v[1..n] to u[j..j+n].
+ */
+ if (HHALF(t)) {
+ qhat--;
+ for (t = 0, i = n; i > 0; i--) { /* D6: add back. */
+ t += u[i + j] + v[i];
+ u[i + j] = LHALF(t);
+ t = HHALF(t);
+ }
+ u[j] = LHALF(u[j] + t);
+ }
+ q[j] = qhat;
+ } while (++j <= m); /* D7: loop on j. */
+
+ /*
+ * If caller wants the remainder, we have to calculate it as
+ * u[m..m+n] >> d (this is at most n digits and thus fits in
+ * u[m+1..m+n], but we may need more source digits).
+ */
+ if (arq) {
+ if (d) {
+ for (i = m + n; i > m; --i)
+ u[i] = (u[i] >> d) |
+ LHALF(u[i - 1] << (HALF_BITS - d));
+ u[i] = 0;
+ }
+ tmp.ul[H] = COMBINE(uspace[1], uspace[2]);
+ tmp.ul[L] = COMBINE(uspace[3], uspace[4]);
+ *arq = tmp.q;
+ }
+
+ tmp.ul[H] = COMBINE(qspace[1], qspace[2]);
+ tmp.ul[L] = COMBINE(qspace[3], qspace[4]);
+ return (tmp.q);
+}
+
+
+/*
+ * Divide two signed quads.
+ * ??? if -1/2 should produce -1 on this machine, this code is wrong
+ */
+s64
+__divdi3(s64 a, s64 b)
+{
+ u64 ua, ub, uq;
+ int neg;
+
+ if (a < 0)
+ ua = -(u64)a, neg = 1;
+ else
+ ua = a, neg = 0;
+ if (b < 0)
+ ub = -(u64)b, neg ^= 1;
+ else
+ ub = b;
+ uq = __qdivrem(ua, ub, (u64 *)0);
+ return (neg ? -uq : uq);
+}
+
+/*
+ * Divide two unsigned quads.
+ */
+u64
+__udivdi3(a, b)
+ u64 a, b;
+{
+
+ return (__qdivrem(a, b, (u64 *)0));
+}
diff --git a/xen-2.4.16/common/schedule.c b/xen-2.4.16/common/schedule.c
new file mode 100644
index 0000000000..a835151cc5
--- /dev/null
+++ b/xen-2.4.16/common/schedule.c
@@ -0,0 +1,345 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: schedule.c
+ * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ * Changes:
+ *
+ * Date: Nov 2002
+ *
+ * Environment: Xen Hypervisor
+ * Description: CPU scheduling
+ * partially moved from domain.c
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/lib.h>
+#include <xeno/sched.h>
+#include <xeno/delay.h>
+#include <xeno/event.h>
+#include <xeno/time.h>
+#include <xeno/ac_timer.h>
+
+#undef SCHEDULER_TRACE
+#ifdef SCHEDULER_TRACE
+#define TRC(_x) _x
+#else
+#define TRC(_x)
+#endif
+
+/*
+ * per CPU data for the scheduler.
+ */
+typedef struct schedule_data_st
+{
+ spinlock_t lock;
+ struct list_head runqueue;
+ struct task_struct *prev, *curr;
+} __cacheline_aligned schedule_data_t;
+schedule_data_t schedule_data[NR_CPUS];
+
+static __cacheline_aligned struct ac_timer s_timer[NR_CPUS];
+
+/*
+ * Some convenience functions
+ */
+
+static inline void __add_to_runqueue(struct task_struct * p)
+{
+ list_add(&p->run_list, &schedule_data[p->processor].runqueue);
+}
+
+static inline void __move_last_runqueue(struct task_struct * p)
+{
+ list_del(&p->run_list);
+ list_add_tail(&p->run_list, &schedule_data[p->processor].runqueue);
+}
+
+static inline void __move_first_runqueue(struct task_struct * p)
+{
+ list_del(&p->run_list);
+ list_add(&p->run_list, &schedule_data[p->processor].runqueue);
+}
+
+static inline void __del_from_runqueue(struct task_struct * p)
+{
+ list_del(&p->run_list);
+ p->run_list.next = NULL;
+}
+
+static inline int __task_on_runqueue(struct task_struct *p)
+{
+ return (p->run_list.next != NULL);
+}
+
+
+/*
+ * Add a new domain to the scheduler
+ */
+void sched_add_domain(struct task_struct *p)
+{
+ p->state = TASK_UNINTERRUPTIBLE;
+}
+
+/*
+ * Remove domain to the scheduler
+ */
+void sched_rem_domain(struct task_struct *p)
+{
+ p->state = TASK_DYING;
+}
+
+
+/*
+ * wake up a domain which had been sleeping
+ */
+int wake_up(struct task_struct *p)
+{
+ unsigned long flags;
+ int ret = 0;
+ spin_lock_irqsave(&schedule_data[p->processor].lock, flags);
+ if ( __task_on_runqueue(p) ) goto out;
+ p->state = TASK_RUNNING;
+ __add_to_runqueue(p);
+ ret = 1;
+
+ out:
+ spin_unlock_irqrestore(&schedule_data[p->processor].lock, flags);
+ return ret;
+}
+
+static void process_timeout(unsigned long __data)
+{
+ struct task_struct * p = (struct task_struct *) __data;
+ wake_up(p);
+}
+
+long schedule_timeout(long timeout)
+{
+ struct timer_list timer;
+ unsigned long expire;
+
+ switch (timeout)
+ {
+ case MAX_SCHEDULE_TIMEOUT:
+ /*
+ * These two special cases are useful to be comfortable in the caller.
+ * Nothing more. We could take MAX_SCHEDULE_TIMEOUT from one of the
+ * negative value but I' d like to return a valid offset (>=0) to allow
+ * the caller to do everything it want with the retval.
+ */
+ schedule();
+ goto out;
+ default:
+ /*
+ * Another bit of PARANOID. Note that the retval will be 0 since no
+ * piece of kernel is supposed to do a check for a negative retval of
+ * schedule_timeout() (since it should never happens anyway). You just
+ * have the printk() that will tell you if something is gone wrong and
+ * where.
+ */
+ if (timeout < 0)
+ {
+ printk(KERN_ERR "schedule_timeout: wrong timeout "
+ "value %lx from %p\n", timeout,
+ __builtin_return_address(0));
+ current->state = TASK_RUNNING;
+ goto out;
+ }
+ }
+
+ expire = timeout + jiffies;
+
+ init_timer(&timer);
+ timer.expires = expire;
+ timer.data = (unsigned long) current;
+ timer.function = process_timeout;
+
+ add_timer(&timer);
+ schedule();
+ del_timer_sync(&timer);
+
+ timeout = expire - jiffies;
+
+ out:
+ return timeout < 0 ? 0 : timeout;
+}
+
+/* RN: XXX turn this into do_halt() */
+/*
+ * yield the current process
+ */
+long do_sched_op(void)
+{
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+ return 0;
+}
+
+/*
+ *
+ */
+void reschedule(struct task_struct *p)
+{
+ int cpu = p->processor;
+ struct task_struct *curr;
+ unsigned long flags;
+
+ if (p->has_cpu)
+ return;
+
+ spin_lock_irqsave(&schedule_data[cpu].lock, flags);
+ curr = schedule_data[cpu].curr;
+ if (is_idle_task(curr)) {
+ set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
+ spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
+#ifdef CONFIG_SMP
+ if (cpu != smp_processor_id())
+ smp_send_event_check_cpu(cpu);
+#endif
+ } else {
+ spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
+ }
+}
+
+
+/*
+ * Pick the next domain to run
+ */
+
+asmlinkage void schedule(void)
+{
+ struct task_struct *prev, *next, *p;
+ struct list_head *tmp;
+ int this_cpu;
+
+ need_resched_back:
+ prev = current;
+ this_cpu = prev->processor;
+
+ spin_lock_irq(&schedule_data[this_cpu].lock);
+
+ //ASSERT(!in_interrupt());
+ ASSERT(__task_on_runqueue(prev));
+
+ __move_last_runqueue(prev);
+
+ switch ( prev->state )
+ {
+ case TASK_INTERRUPTIBLE:
+ if ( signal_pending(prev) )
+ {
+ prev->state = TASK_RUNNING;
+ break;
+ }
+ default:
+ __del_from_runqueue(prev);
+ case TASK_RUNNING:;
+ }
+ clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events);
+
+ next = NULL;
+ list_for_each(tmp, &schedule_data[smp_processor_id()].runqueue) {
+ p = list_entry(tmp, struct task_struct, run_list);
+ next = p;
+ break;
+ }
+
+ prev->has_cpu = 0;
+ next->has_cpu = 1;
+
+ schedule_data[this_cpu].prev = prev;
+ schedule_data[this_cpu].curr = next;
+
+ spin_unlock_irq(&schedule_data[this_cpu].lock);
+
+ if ( unlikely(prev == next) )
+ {
+ /* We won't go through the normal tail, so do this by hand */
+ prev->policy &= ~SCHED_YIELD;
+ goto same_process;
+ }
+
+ prepare_to_switch();
+ switch_to(prev, next);
+ prev = schedule_data[this_cpu].prev;
+
+ prev->policy &= ~SCHED_YIELD;
+ if ( prev->state == TASK_DYING ) release_task(prev);
+
+ same_process:
+
+ update_dom_time(next->shared_info);
+
+ if ( test_bit(_HYP_EVENT_NEED_RESCHED, &current->hyp_events) )
+ goto need_resched_back;
+ return;
+}
+
+static __cacheline_aligned int count[NR_CPUS];
+static void sched_timer(unsigned long foo)
+{
+ int cpu = smp_processor_id();
+ struct task_struct *curr = schedule_data[cpu].curr;
+ s_time_t now;
+ int res;
+
+ if (count[cpu] >= 5) {
+ set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
+ count[cpu] = 0;
+ }
+ count[cpu]++;
+
+ again:
+ now = NOW();
+ s_timer[cpu].expires = now + MILLISECS(10);
+
+ TRC(printk("SCHED[%02d] timer(): now=0x%08X%08X timo=0x%08X%08X\n",
+ cpu, (u32)(now>>32), (u32)now,
+ (u32)(s_timer[cpu].expires>>32), (u32)s_timer[cpu].expires));
+ res=add_ac_timer(&s_timer[cpu]);
+ if (res==1) {
+ goto again;
+ }
+}
+/*
+ * Initialise the data structures
+ */
+void __init scheduler_init(void)
+{
+ int i;
+
+ printk("Initialising schedulers\n");
+
+ for ( i = 0; i < NR_CPUS; i++ )
+ {
+ INIT_LIST_HEAD(&schedule_data[i].runqueue);
+ spin_lock_init(&schedule_data[i].lock);
+ schedule_data[i].prev = &idle0_task;
+ schedule_data[i].curr = &idle0_task;
+
+ /* a timer for each CPU */
+ init_ac_timer(&s_timer[i]);
+ s_timer[i].function = &sched_timer;
+ }
+}
+
+/*
+ * Start a scheduler for each CPU
+ * This has to be done *after* the timers, e.g., APICs, have been initialised
+ */
+void schedulers_start(void) {
+
+ printk("Start schedulers\n");
+ __cli();
+ sched_timer(0);
+ smp_call_function(sched_timer, (void*)0, 1, 1);
+ __sti();
+}
diff --git a/xen-2.4.16/common/timer.c b/xen-2.4.16/common/timer.c
index 388275307a..77e511de34 100644
--- a/xen-2.4.16/common/timer.c
+++ b/xen-2.4.16/common/timer.c
@@ -602,7 +602,6 @@ void do_timer(struct pt_regs *regs)
p = &idle0_task;
do {
s = p->shared_info;
- s->wall_time = s->domain_time = wall;
cpu_mask |= mark_guest_event(p, _EVENT_TIMER);
}
while ( (p = p->next_task) != &idle0_task );
diff --git a/xen-2.4.16/include/asm-i386/apic.h b/xen-2.4.16/include/asm-i386/apic.h
index 86dd0fbc66..1f5670943b 100644
--- a/xen-2.4.16/include/asm-i386/apic.h
+++ b/xen-2.4.16/include/asm-i386/apic.h
@@ -55,17 +55,14 @@ static inline void ack_APIC_irq(void)
}
extern int get_maxlvt(void);
-extern void clear_local_APIC(void);
extern void connect_bsp_APIC (void);
extern void disconnect_bsp_APIC (void);
extern void disable_local_APIC (void);
extern int verify_local_APIC (void);
-extern void cache_APIC_registers (void);
extern void sync_Arb_IDs (void);
extern void init_bsp_APIC (void);
extern void setup_local_APIC (void);
extern void init_apic_mappings (void);
-extern void smp_local_timer_interrupt (struct pt_regs * regs);
extern void setup_APIC_clocks (void);
extern int APIC_init_uniprocessor (void);
diff --git a/xen-2.4.16/include/asm-i386/time.h b/xen-2.4.16/include/asm-i386/time.h
new file mode 100644
index 0000000000..9e2f77727d
--- /dev/null
+++ b/xen-2.4.16/include/asm-i386/time.h
@@ -0,0 +1,80 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: time.h
+ * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ * Changes:
+ *
+ * Date: Nov 2002
+ *
+ * Environment: Xen Hypervisor
+ * Description: Architecture dependent definition of time variables
+ *
+ ****************************************************************************
+ * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
+ ****************************************************************************
+ */
+
+#ifndef _ASM_TIME_H_
+#define _ASM_TIME_H_
+
+#include <asm/types.h>
+#include <asm/msr.h>
+
+/*
+ * Cycle Counter Time
+ */
+typedef u64 cc_time_t;
+static inline cc_time_t get_cc_time()
+{
+ u64 ret;
+ rdtscll(ret);
+ return ret;
+}
+
+/*
+ * System Time
+ */
+typedef s64 s_time_t; /* System time */
+extern u32 stime_pcc; /* cycle counter value at last timer irq */
+extern s_time_t stime_now; /* time in ns at last timer IRQ */
+extern u32 stime_scale; /* scale factur for converting cc to ns */
+
+
+/*
+ * This is the Nemesis implementation.
+ * The variables are all set in apic.c
+ * Every timer IRQ time_now and time_pcc is set to the current values
+ * At callibration time_scale is set
+ */
+static s_time_t get_s_time(void)
+{
+ u32 delta, low, pcc;
+ s_time_t now;
+ s_time_t incr;
+
+ /* read two values (pcc, now) "atomically" */
+again:
+ pcc = stime_pcc;
+ now = stime_now;
+ if (stime_pcc != pcc) goto again;
+
+ /* only use bottom 32bits of TSC. This should be sufficient */
+ rdtscl(low);
+ delta = low - pcc;
+
+ incr = ((s_time_t)(stime_scale) * delta) >> 10;
+ return now + incr;
+}
+
+/* update time variables once in a while */
+extern void update_time(void);
+
+/*
+ * Domain Virtual Time
+ */
+typedef u64 dv_time_t;
+
+#endif /* _ASM_TIME_H_ */
diff --git a/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h b/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h
index 1e3705e9ac..82a4d9eb2d 100644
--- a/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h
+++ b/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h
@@ -60,12 +60,12 @@ typedef struct
#define __HYPERVISOR_set_guest_stack 4
#define __HYPERVISOR_net_update 5
#define __HYPERVISOR_fpu_taskswitch 6
-#define __HYPERVISOR_yield 7
-#define __HYPERVISOR_exit 8
-#define __HYPERVISOR_dom0_op 9
-#define __HYPERVISOR_network_op 10
-#define __HYPERVISOR_set_debugreg 11
-#define __HYPERVISOR_get_debugreg 12
+#define __HYPERVISOR_sched_op 7
+#define __HYPERVISOR_exit 10
+#define __HYPERVISOR_dom0_op 11
+#define __HYPERVISOR_network_op 12
+#define __HYPERVISOR_set_debugreg 13
+#define __HYPERVISOR_get_debugreg 14
#define TRAP_INSTR "int $0x82"
@@ -142,36 +142,33 @@ typedef struct shared_info_st {
*/
unsigned long failsafe_address;
- /*
- * CPU ticks since start of day.
- * `wall_time' counts CPU ticks in real time.
- * `domain_time' counts CPU ticks during which this domain has run.
- */
- unsigned long ticks_per_ms; /* CPU ticks per millisecond */
- /*
- * Current wall_time can be found by rdtsc. Only possible use of
- * variable below is that it provides a timestamp for last update
- * of domain_time.
+ /*
+ * Time:
+ * The following abstractions are exposed: System Time, Wall Clock
+ * Time, Domain Virtual Time. Domains can access Cycle counter time
+ * directly.
*/
- unsigned long long wall_time;
- unsigned long long domain_time;
- /*
- * Timeouts for points at which guest OS would like a callback.
- * This will probably be backed up by a timer heap in the guest OS.
- * In Linux we use timeouts to update 'jiffies'.
+ /* System Time */
+ long long system_time; /* in ns */
+ unsigned long st_timestamp; /* cyclecounter at last update */
+ unsigned long ticks_per_ms; /* CPU ticks per millisecond */
+ /* Wall Clock Time */
+ long tv_sec; /* essentially a struct timeval */
+ long tv_usec;
+ long long wc_timestamp; /* system time at last update */
+
+ /* Domain Virtual Time */
+ unsigned long long domain_time;
+
+ /*
+ * Timeout values:
+ * Allow a domain to specify a timeout value in system time and
+ * domain virtual time.
*/
unsigned long long wall_timeout;
unsigned long long domain_timeout;
- /*
- * Real-Time Clock. This shows time, in seconds, since 1.1.1980.
- * The timestamp shows the CPU 'wall time' when RTC was last read.
- * Thus it allows a mapping between 'real time' and 'wall time'.
- */
- unsigned long rtc_time;
- unsigned long long rtc_timestamp;
-
} shared_info_t;
/*
diff --git a/xen-2.4.16/include/xeno/ac_timer.h b/xen-2.4.16/include/xeno/ac_timer.h
new file mode 100644
index 0000000000..f78082cb6b
--- /dev/null
+++ b/xen-2.4.16/include/xeno/ac_timer.h
@@ -0,0 +1,65 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: ac_timer.h
+ * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ * Changes:
+ *
+ * Date: Nov 2002
+ *
+ * Environment: Xen Hypervisor
+ * Description: Accurate timer for the Hypervisor
+ *
+ ****************************************************************************
+ * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
+ ****************************************************************************
+ */
+
+#ifndef _AC_TIMER_H_
+#define _AC_TIMER_H_
+
+#include <time.h> /* include notion of time */
+
+/*
+ * The Xen Hypervisor provides two types of timers:
+ *
+ * - Linux style, jiffy based timers for legacy code and coarse grain timeouts
+ * These are defined in ./include/xeno/timer.h and implemented in
+ * ./common/timer.c. Unlike in Linux they are executed not on a periodic
+ * timer interrupt but "occasionally" with somewhat lesser accuracy.
+ *
+ * - accurate timers defined in this file and implemented in
+ * ./common/ac_timer.c. These are implemented using a programmable timer
+ * interrupt and are thus as accurate as the hardware allows. Where possible
+ * we use the local APIC for this purpose. However, this fact is hidden
+ * behind a architecture independent layer.
+ * accurate timers are programmed using system time.
+ *
+ * The interface to accurate timers is very similar to Linux timers with the
+ * exception that the expires value is not expressed in jiffies but in ns from
+ * boot time. Its implementation however, is entirely different.
+ */
+
+struct ac_timer {
+ struct list_head timer_list;
+ s_time_t expires; /* system time time out value */
+ unsigned long data;
+ void (*function)(unsigned long);
+};
+
+/* interface for "clients" */
+extern int add_ac_timer(struct ac_timer *timer);
+extern int rem_ac_timer(struct ac_timer *timer);
+extern int mod_ac_timer(struct ac_timer *timer, s_time_t new_time);
+static inline void init_ac_timer(struct ac_timer *timer)
+{
+ //timer->next = NULL;
+}
+
+/* interface used by programmable timer, implemented hardware dependent */
+extern int reprogram_ac_timer(s_time_t timeout);
+extern void do_ac_timer(void);
+
+#endif /* _AC_TIMER_H_ */
diff --git a/xen-2.4.16/include/xeno/sched.h b/xen-2.4.16/include/xeno/sched.h
index 295335bd16..02cf6fa5e1 100644
--- a/xen-2.4.16/include/xeno/sched.h
+++ b/xen-2.4.16/include/xeno/sched.h
@@ -42,7 +42,8 @@ extern struct mm_struct init_mm;
struct task_struct {
int processor;
- int state, hyp_events;
+ int state;
+ int hyp_events;
unsigned int domain;
/* An unsafe pointer into a shared data area. */
@@ -85,11 +86,24 @@ struct task_struct {
unsigned long flags;
};
+/*
+ * domain states
+ * TASK_RUNNING: Domain is runable and should be on a run queue
+ * TASK_INTERRUPTIBLE: Domain is blocked by may be woken up by an event
+ * or expiring timer
+ * TASK_UNINTERRUPTIBLE: Domain is blocked but may not be woken up by an
+ * arbitrary event or timer.
+ * TASK_WAIT: Domains CPU allocation expired.
+ * TASK_STOPPED: not really used in Xen
+ * TASK_DYING: Domain is about to cross over to the land of the dead.
+ */
+
#define TASK_RUNNING 0
#define TASK_INTERRUPTIBLE 1
#define TASK_UNINTERRUPTIBLE 2
-#define TASK_STOPPED 4
-#define TASK_DYING 8
+#define TASK_WAIT 4
+#define TASK_DYING 16
+/* #define TASK_STOPPED 8 not really used */
#define SCHED_YIELD 0x10
@@ -150,52 +164,23 @@ extern void free_irq(unsigned int, void *);
extern unsigned long wait_init_idle;
#define init_idle() clear_bit(smp_processor_id(), &wait_init_idle);
+
+
+/*
+ * Scheduler functions (in schedule.c)
+ */
#define set_current_state(_s) do { current->state = (_s); } while (0)
#define MAX_SCHEDULE_TIMEOUT LONG_MAX
+void scheduler_init(void);
+void start_scheduler(void);
+void sched_add_domain(struct task_struct *p);
+void sched_rem_domain(struct task_struct *p);
+int wake_up(struct task_struct *p);
long schedule_timeout(long timeout);
-asmlinkage void schedule(void);
-
+long do_yield(void);
void reschedule(struct task_struct *p);
+asmlinkage void schedule(void);
-typedef struct schedule_data_st
-{
- spinlock_t lock;
- struct list_head runqueue;
- struct task_struct *prev, *curr;
-} __cacheline_aligned schedule_data_t;
-extern schedule_data_t schedule_data[NR_CPUS];
-
-static inline void __add_to_runqueue(struct task_struct * p)
-{
- list_add(&p->run_list, &schedule_data[p->processor].runqueue);
-}
-
-
-static inline void __move_last_runqueue(struct task_struct * p)
-{
- list_del(&p->run_list);
- list_add_tail(&p->run_list, &schedule_data[p->processor].runqueue);
-}
-
-
-static inline void __move_first_runqueue(struct task_struct * p)
-{
- list_del(&p->run_list);
- list_add(&p->run_list, &schedule_data[p->processor].runqueue);
-}
-
-static inline void __del_from_runqueue(struct task_struct * p)
-{
- list_del(&p->run_list);
- p->run_list.next = NULL;
-}
-
-static inline int __task_on_runqueue(struct task_struct *p)
-{
- return (p->run_list.next != NULL);
-}
-
-int wake_up(struct task_struct *p);
#define signal_pending(_p) ((_p)->hyp_events || \
(_p)->shared_info->events)
diff --git a/xen-2.4.16/include/xeno/time.h b/xen-2.4.16/include/xeno/time.h
index 33837c5009..5bb717fb2d 100644
--- a/xen-2.4.16/include/xeno/time.h
+++ b/xen-2.4.16/include/xeno/time.h
@@ -1,13 +1,61 @@
-/******************************************************************************
- * time.h
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: time.h
+ * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ * Changes:
+ *
+ * Date: Nov 2002
+ *
+ * Environment: Xen Hypervisor
+ * Description: This file provides a one stop shop for all time related
+ * issues within the hypervisor.
+ *
+ * The Hypervisor provides the following notions of time:
+ * Cycle Counter Time, System Time, Wall Clock Time, and
+ * Domain Virtual Time.
+ *
+ ****************************************************************************
+ * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
+ ****************************************************************************
*/
+
+
#ifndef __XENO_TIME_H__
#define __XENO_TIME_H__
+#include <asm/ptrace.h> /* XXX Only used for do_timer which should be moved */
+#include <asm/time.h> /* pull in architecture specific time definition */
#include <xeno/types.h>
-#include <asm/ptrace.h>
+/*
+ * Cycle Counter Time (defined in asm/time.h)
+ */
+
+
+/*
+ * System Time
+ * 64 bit value containing the nanoseconds elapsed since boot time.
+ * This value is adjusted by frequency drift.
+ * NOW() returns the current time.
+ * The other macros are for convenience to approximate short intervals
+ * of real time into system time
+ */
+#define NOW() ((s_time_t)get_s_time())
+#define SECONDS(_s) (((s_time_t)(_s)) * 1000000000UL )
+#define TENTHS(_ts) (((s_time_t)(_ts)) * 100000000UL )
+#define HUNDREDTHS(_hs) (((s_time_t)(_hs)) * 10000000UL )
+#define MILLISECS(_ms) (((s_time_t)(_ms)) * 1000000UL )
+#define MICROSECS(_us) (((s_time_t)(_us)) * 1000UL )
+#define Time_Max ((s_time_t) 0x7fffffffffffffffLL)
+#define FOREVER Time_Max
+
+/*
+ * Wall Clock Time
+ */
struct timeval {
long tv_sec; /* seconds */
long tv_usec; /* microseconds */
@@ -25,6 +73,13 @@ extern void get_fast_time(struct timeval *tv);
extern void (*do_get_fast_time)(struct timeval *);
#endif
+/*
+ * Domain Virtual Time (defined in asm/time.h)
+ */
+/* XXX Interface for getting and setting still missing */
+
+
+/* XXX move this */
extern void do_timer(struct pt_regs *regs);
#endif /* __XENO_TIME_H__ */
diff --git a/xen-2.4.16/include/xeno/timer.h b/xen-2.4.16/include/xeno/timer.h
index c4f01ada59..dcde75b182 100644
--- a/xen-2.4.16/include/xeno/timer.h
+++ b/xen-2.4.16/include/xeno/timer.h
@@ -12,10 +12,17 @@
* The "data" field enables use of a common timeout function for several
* timeouts. You can use this field to distinguish between the different
* invocations.
+ *
+ * RN: Unlike the Linux timers, which are executed at the periodic timer
+ * interrupt, in Xen, the timer list is only checked "occasionally", thus
+ * its accuracy might be somewhat worse than under Linux. However, the
+ * hypervisor should be purely event-driven and, in fact, in the current
+ * implementation, timers are only used for watchdog purpose at a very
+ * coarse granularity anyway. Thus this is not a problem.
*/
struct timer_list {
struct list_head list;
- unsigned long expires;
+ unsigned long expires; /* jiffies */
unsigned long data;
void (*function)(unsigned long);
};
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c
index 12db77164b..a35ef1cc8a 100644
--- a/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c
+++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c
@@ -42,7 +42,7 @@ extern struct drive_info_struct drive_info;
EXPORT_SYMBOL(drive_info);
#endif
-extern unsigned long get_cmos_time(void);
+//extern unsigned long get_cmos_time(void);
/* platform dependent support */
EXPORT_SYMBOL(boot_cpu_data);
@@ -58,7 +58,7 @@ EXPORT_SYMBOL(probe_irq_mask);
EXPORT_SYMBOL(kernel_thread);
EXPORT_SYMBOL(pm_idle);
EXPORT_SYMBOL(pm_power_off);
-EXPORT_SYMBOL(get_cmos_time);
+//EXPORT_SYMBOL(get_cmos_time);
EXPORT_SYMBOL(apm_info);
#ifdef CONFIG_DEBUG_IOVIRT
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c
index 9e26f3d65a..608049ece0 100644
--- a/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c
+++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c
@@ -86,7 +86,7 @@ void cpu_idle (void)
while (1) {
while (!current->need_resched)
- HYPERVISOR_yield();
+ HYPERVISOR_do_sched_op(NULL);
schedule();
check_pgt_cache();
}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c
index c728eb15e6..2557918c6d 100644
--- a/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c
+++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c
@@ -102,47 +102,26 @@ static inline unsigned long ticks_to_us(unsigned long ticks)
return(hi);
}
-static inline unsigned long do_gettimeoffset(void)
+static long long get_s_time(void)
{
-#if 0
- register unsigned long eax, edx;
-
- /* Read the Time Stamp Counter */
-
- rdtsc(eax,edx);
-
- /* .. relative to previous jiffy (32 bits is enough) */
- eax -= last_tsc_low; /* tsc_low delta */
-
- /*
- * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
- * = (tsc_low delta) * (usecs_per_clock)
- * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
- *
- * Using a mull instead of a divl saves up to 31 clock cycles
- * in the critical path.
- */
-
- edx = ticks_to_us(eax);
-
- /* our adjusted time offset in microseconds */
- return delay_at_last_interrupt + edx;
-#else
- /*
- * We should keep a 'last_tsc_low' thing which incorporates
- * delay_at_last_interrupt, adjusted in timer_interrupt after
- * do_timer_interrupt. It would look at change in xtime, and
- * make appropriate adjustment to a last_tsc variable.
- *
- * We'd be affected by rounding error in ticks_per_usec, and by
- * processor clock drift (which should be no more than in an
- * external interrupt source anyhow).
- *
- * Perhaps a bit rough and ready, but never mind!
- */
- return 0;
-#endif
+ u32 delta, low, pcc;
+ long long now;
+ long long incr;
+
+ /* read two values (pcc, now) "atomically" */
+again:
+ pcc = HYPERVISOR_shared_info->st_timestamp;
+ now = HYPERVISOR_shared_info->system_time;
+ if (HYPERVISOR_shared_info->st_timestamp != pcc) goto again;
+
+ /* only use bottom 32bits of TSC. This should be sufficient */
+ rdtscl(low);
+ delta = low - pcc;
+
+ incr = ((long long)(ticks_to_us(delta)*1000));
+ return now + incr;
}
+#define NOW() ((long long)get_s_time())
/*
* This version of gettimeofday has microsecond resolution
@@ -151,15 +130,15 @@ static inline unsigned long do_gettimeoffset(void)
void do_gettimeofday(struct timeval *tv)
{
unsigned long flags;
- unsigned long usec, sec, lost;
+ unsigned long usec, sec;
read_lock_irqsave(&xtime_lock, flags);
- usec = do_gettimeoffset();
- lost = jiffies - wall_jiffies;
- if ( lost != 0 ) usec += lost * (1000000 / HZ);
- sec = xtime.tv_sec;
- usec += xtime.tv_usec;
- read_unlock_irqrestore(&xtime_lock, flags);
+
+ usec = ((unsigned long)(NOW()-HYPERVISOR_shared_info->wc_timestamp))/1000;
+ sec = HYPERVISOR_shared_info->tv_sec;
+ usec += HYPERVISOR_shared_info->tv_usec;
+
+ read_unlock_irqrestore(&xtime_lock, flags);
while ( usec >= 1000000 )
{
@@ -173,6 +152,8 @@ void do_gettimeofday(struct timeval *tv)
void do_settimeofday(struct timeval *tv)
{
+/* XXX RN: shoudl do something special here for dom0 */
+#if 0
write_lock_irq(&xtime_lock);
/*
* This is revolting. We need to set "xtime" correctly. However, the
@@ -195,6 +176,7 @@ void do_settimeofday(struct timeval *tv)
time_maxerror = NTP_PHASE_LIMIT;
time_esterror = NTP_PHASE_LIMIT;
write_unlock_irq(&xtime_lock);
+#endif
}
@@ -235,19 +217,6 @@ static struct irqaction irq_timer = {
};
-unsigned long get_cmos_time(void)
-{
- unsigned long secs = HYPERVISOR_shared_info->rtc_time;
- unsigned long diff;
-
- rdtscl(diff);
- diff -= (unsigned long)HYPERVISOR_shared_info->rtc_timestamp;
-
- secs += ticks_to_us(diff);
-
- return(secs + ticks_to_secs(diff));
-}
-
/* Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset(). */
static unsigned long __init calibrate_tsc(void)
@@ -268,7 +237,6 @@ void __init time_init(void)
unsigned long long alarm;
fast_gettimeoffset_quotient = calibrate_tsc();
- do_get_fast_time = do_gettimeofday;
/* report CPU clock rate in Hz.
* The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
@@ -299,6 +267,5 @@ void __init time_init(void)
HYPERVISOR_shared_info->domain_timeout = ~0ULL;
clear_bit(_EVENT_TIMER, &HYPERVISOR_shared_info->events);
- xtime.tv_sec = get_cmos_time();
- xtime.tv_usec = 0;
+ do_gettimeofday(&xtime);
}
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h
index 839feed153..0d0940c9bb 100644
--- a/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h
+++ b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h
@@ -203,12 +203,13 @@ static inline int HYPERVISOR_fpu_taskswitch(void)
return ret;
}
-static inline int HYPERVISOR_yield(void)
+static inline int HYPERVISOR_do_sched_op(void *sched_op)
{
int ret;
__asm__ __volatile__ (
TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_yield) );
+ : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
+ "b" (sched_op) );
return ret;
}