diff options
-rw-r--r-- | .rootkeys | 4 | ||||
-rw-r--r-- | BitKeeper/etc/logging_ok | 1 | ||||
-rw-r--r-- | xen-2.4.16/arch/i386/apic.c | 321 | ||||
-rw-r--r-- | xen-2.4.16/arch/i386/setup.c | 5 | ||||
-rw-r--r-- | xen-2.4.16/arch/i386/time.c | 236 | ||||
-rw-r--r-- | xen-2.4.16/common/ac_timer.c | 304 | ||||
-rw-r--r-- | xen-2.4.16/common/domain.c | 202 | ||||
-rw-r--r-- | xen-2.4.16/common/lib.c | 355 | ||||
-rw-r--r-- | xen-2.4.16/common/schedule.c | 345 | ||||
-rw-r--r-- | xen-2.4.16/common/timer.c | 1 | ||||
-rw-r--r-- | xen-2.4.16/include/asm-i386/apic.h | 3 | ||||
-rw-r--r-- | xen-2.4.16/include/asm-i386/time.h | 80 | ||||
-rw-r--r-- | xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h | 45 | ||||
-rw-r--r-- | xen-2.4.16/include/xeno/ac_timer.h | 65 | ||||
-rw-r--r-- | xen-2.4.16/include/xeno/sched.h | 73 | ||||
-rw-r--r-- | xen-2.4.16/include/xeno/time.h | 61 | ||||
-rw-r--r-- | xen-2.4.16/include/xeno/timer.h | 9 | ||||
-rw-r--r-- | xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c | 4 | ||||
-rw-r--r-- | xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c | 2 | ||||
-rw-r--r-- | xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c | 91 | ||||
-rw-r--r-- | xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h | 5 |
21 files changed, 1739 insertions, 473 deletions
@@ -35,6 +35,7 @@ 3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen-2.4.16/arch/i386/usercopy.c 3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen-2.4.16/arch/i386/xeno.lds 3ddb79bdff-gj-jFGKjOejeHLqL8Lg xen-2.4.16/common/Makefile +3e397e66AyyD5fYraAySWuwi9uqSXg xen-2.4.16/common/ac_timer.c 3ddb79bddEYJbcURvqqcx99Yl2iAhQ xen-2.4.16/common/block.c 3ddb79bdrqnW93GR9gZk1OJe1qK-iQ xen-2.4.16/common/brlock.c 3ddb79bdLX_P6iB7ILiblRLWvebapg xen-2.4.16/common/dom0_ops.c @@ -47,6 +48,7 @@ 3ddb79bdN51qpRC-6bOH-v5hl_AK6A xen-2.4.16/common/network.c 3ddb79bdD4SLmmdMD7yLW5HcUWucXw xen-2.4.16/common/page_alloc.c 3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen-2.4.16/common/resource.c +3e397e6619PgAfBbw2XFbXkewvUWgw xen-2.4.16/common/schedule.c 3ddb79bdB9RNMnkQnUyZ5C9hhMSQQw xen-2.4.16/common/slab.c 3ddb79bd0gVQYmL2zvuJnldvD0AGxQ xen-2.4.16/common/softirq.c 3ddb79bdQqFHtHRGEO2dsxGgo6eAhw xen-2.4.16/common/timer.c @@ -158,6 +160,7 @@ 3ddb79c3e9DCEoR-WzNxcOQDzLu7BQ xen-2.4.16/include/asm-i386/softirq.h 3ddb79c3NiyQE2vQnyGiaBnNjBO1rA xen-2.4.16/include/asm-i386/spinlock.h 3ddb79c3ezddh34MdelJpa5tNR00Dw xen-2.4.16/include/asm-i386/system.h +3e397e66xPNc8eaSqC9pPbyAtRGzHA xen-2.4.16/include/asm-i386/time.h 3ddb79c4HugMq7IYGxcQKFBpKwKhzA xen-2.4.16/include/asm-i386/types.h 3ddb79c3M2n1ROZH6xk3HbyN4CPDqg xen-2.4.16/include/asm-i386/uaccess.h 3ddb79c3uPGcP_l_2xyGgBSWd5aC-Q xen-2.4.16/include/asm-i386/unaligned.h @@ -168,6 +171,7 @@ 3ddb79c4R4iVwqIIeychVQYmIH4FUg xen-2.4.16/include/scsi/scsi_ioctl.h 3ddb79c4yw_mfd4Uikn3v_IOPRpa1Q xen-2.4.16/include/scsi/scsicam.h 3ddb79c4HKPMLvDBP9LxzPi_szVxGA xen-2.4.16/include/scsi/sg.h +3e397e66m2tO3s-J8Jnr7Ws_tGoPTg xen-2.4.16/include/xeno/ac_timer.h 3ddb79c0nTsjSpVK4ZVTI9WwN24xtQ xen-2.4.16/include/xeno/blk.h 3ddb79c0dVhTHLsv6CPTf4baKix4mA xen-2.4.16/include/xeno/blkdev.h 3ddb79c18ePBgitnOs7GiOCFilODVw xen-2.4.16/include/xeno/blkpg.h diff --git a/BitKeeper/etc/logging_ok b/BitKeeper/etc/logging_ok index 2fd8b201d7..cca59e5d2c 100644 --- a/BitKeeper/etc/logging_ok +++ b/BitKeeper/etc/logging_ok @@ -6,5 +6,6 @@ kaf24@labyrinth.cl.cam.ac.uk kaf24@plym.cl.cam.ac.uk kaf24@striker.cl.cam.ac.uk lynx@idefix.cl.cam.ac.uk +rn@wyvis.camb.intel-research.net smh22@boulderdash.cl.cam.ac.uk smh22@uridium.cl.cam.ac.uk diff --git a/xen-2.4.16/arch/i386/apic.c b/xen-2.4.16/arch/i386/apic.c index a09613bee4..d479a69322 100644 --- a/xen-2.4.16/arch/i386/apic.c +++ b/xen-2.4.16/arch/i386/apic.c @@ -1,3 +1,23 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: apic.c + * Author: + * Changes: + * + * Date: Nov 2002 + * + * Environment: Xen Hypervisor + * Description: programmable APIC timer interface for accurate timers + * modified version of Linux' apic.c + * + **************************************************************************** + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ + **************************************************************************** + */ + /* * Local APIC handling, local APIC timers * @@ -10,6 +30,7 @@ * for testing these extensively. */ + #include <xeno/config.h> #include <xeno/init.h> #include <xeno/sched.h> @@ -25,6 +46,17 @@ #include <asm/pgalloc.h> #include <asm/hardirq.h> +#include <xeno/ac_timer.h> + + +#undef APIC_TIME_TRACE +#ifdef APIC_TIME_TRACE +#define TRC(_x) _x +#else +#define TRC(_x) +#endif + + /* Using APIC to generate smp_local_timer_interrupt? */ int using_apic_timer = 0; @@ -39,7 +71,7 @@ int get_maxlvt(void) return maxlvt; } -void clear_local_APIC(void) +static void clear_local_APIC(void) { int maxlvt; unsigned long v; @@ -313,7 +345,6 @@ void __init setup_local_APIC (void) */ value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; if (!smp_processor_id()) { -/* && (pic_mode || !value)) { */ value = APIC_DM_EXTINT; printk("enabled ExtINT on CPU#%d\n", smp_processor_id()); } else { @@ -340,11 +371,9 @@ void __init setup_local_APIC (void) value = apic_read(APIC_ESR); printk("ESR value before enabling vector: %08lx\n", value); - value = ERROR_APIC_VECTOR; // enables sending errors + value = ERROR_APIC_VECTOR; /* enables sending errors */ apic_write_around(APIC_LVTERR, value); - /* - * spec says clear errors after enabling vector. - */ + /* spec says clear errors after enabling vector. */ if (maxlvt > 3) apic_write(APIC_ESR, 0); value = apic_read(APIC_ESR); @@ -416,9 +445,7 @@ static int __init detect_init_APIC (void) boot_cpu_physical_apicid = 0; printk("Found and enabled local APIC!\n"); - apic_pm_init1(); - return 0; no_apic: @@ -467,14 +494,24 @@ void __init init_apic_mappings(void) #endif } -/* - * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts - * per second. We assume that the caller has already set up the local - * APIC. - * - * The APIC timer is not exactly sync with the external timer chip, it - * closely follows bus clocks. - */ +/***************************************************************************** + * APIC calibration + * + * The APIC is programmed in bus cycles. + * Timeout values should specified in real time units. + * The "cheapest" time source is the cyclecounter. + * + * Thus, we need a mappings from: bus cycles <- cycle counter <- system time + * + * The calibration is currently a bit shoddy since it requires the external + * timer chip to generate periodic timer interupts. + *****************************************************************************/ + +/* used for system time scaling */ +static unsigned int bus_freq; +static u32 bus_cycle; /* length of one bus cycle in pico-seconds */ +static u32 bus_scale; /* scaling factor convert ns to bus cycles */ + /* * The timer chip is already set up at HZ interrupts per second here, @@ -485,17 +522,12 @@ static unsigned int __init get_8254_timer_count(void) { /*extern spinlock_t i8253_lock;*/ /*unsigned long flags;*/ - unsigned int count; - /*spin_lock_irqsave(&i8253_lock, flags);*/ - outb_p(0x00, 0x43); count = inb_p(0x40); count |= inb_p(0x40) << 8; - /*spin_unlock_irqrestore(&i8253_lock, flags);*/ - return count; } @@ -503,112 +535,67 @@ void __init wait_8254_wraparound(void) { unsigned int curr_count, prev_count=~0; int delta; - curr_count = get_8254_timer_count(); - do { prev_count = curr_count; curr_count = get_8254_timer_count(); delta = curr_count-prev_count; - /* * This limit for delta seems arbitrary, but it isn't, it's * slightly above the level of error a buggy Mercury/Neptune * chipset timer can cause. */ - } while (delta < 300); } /* * This function sets up the local APIC timer, with a timeout of * 'clocks' APIC bus clock. During calibration we actually call - * this function twice on the boot CPU, once with a bogus timeout - * value, second time for real. The other (noncalibrating) CPUs - * call this function only once, with the real, calibrated value. + * this function with a very large value and read the current time after + * a well defined period of time as expired. + * + * Calibration is only performed once, for CPU0! * * We do reads before writes even if unnecessary, to get around the * P5 APIC double write bug. */ - -#define APIC_DIVISOR 16 - -void __setup_APIC_LVTT(unsigned int clocks) +#define APIC_DIVISOR 1 +static void __setup_APIC_LVTT(unsigned int clocks) { unsigned int lvtt1_value, tmp_value; - - lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV) | - APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; + lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV)|LOCAL_TIMER_VECTOR; apic_write_around(APIC_LVTT, lvtt1_value); - - /* - * Divide PICLK by 16 - */ tmp_value = apic_read(APIC_TDCR); - apic_write_around(APIC_TDCR, (tmp_value - & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) - | APIC_TDR_DIV_16); - + apic_write_around(APIC_TDCR, (tmp_value | APIC_TDR_DIV_1)); apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); } +/* + * this is done for every CPU from setup_APIC_clocks() below. + * We setup each local APIC with a zero timeout value for now. + * Unlike Linux, we don't have to wait for slices etc. + */ void setup_APIC_timer(void * data) { - unsigned int clocks = (unsigned int) data, slice, t0, t1; unsigned long flags; - int delta; - __save_flags(flags); __sti(); - /* - * ok, Intel has some smart code in their APIC that knows - * if a CPU was in 'hlt' lowpower mode, and this increases - * its APIC arbitration priority. To avoid the external timer - * IRQ APIC event being in synchron with the APIC clock we - * introduce an interrupt skew to spread out timer events. - * - * The number of slices within a 'big' timeslice is smp_num_cpus+1 - */ - - slice = clocks / (smp_num_cpus+1); - printk("cpu: %d, clocks: %d, slice: %d\n", - smp_processor_id(), clocks, slice); - - /* - * Wait for IRQ0's slice: - */ - wait_8254_wraparound(); - - __setup_APIC_LVTT(clocks); - - t0 = apic_read(APIC_TMICT)*APIC_DIVISOR; - /* Wait till TMCCT gets reloaded from TMICT... */ - do { - t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR; - delta = (int)(t0 - t1 - slice*(smp_processor_id()+1)); - } while (delta >= 0); - /* Now wait for our slice for real. */ - do { - t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR; - delta = (int)(t0 - t1 - slice*(smp_processor_id()+1)); - } while (delta < 0); - - __setup_APIC_LVTT(clocks); - - printk("CPU%d<T0:%d,T1:%d,D:%d,S:%d,C:%d>\n", - smp_processor_id(), t0, t1, delta, slice, clocks); - + printk("cpu: %d: setup timer.", smp_processor_id()); + __setup_APIC_LVTT(0); + printk("done\n"); __restore_flags(flags); } /* * In this function we calibrate APIC bus clocks to the external timer. * - * We want to do the calibration only once since we - * want to have local timer irqs syncron. CPUs connected - * by the same APIC bus have the very same bus frequency. - * And we want to have irqs off anyways, no accidental - * APIC irq that way. + * As a result we have the Bys Speed and CPU speed in Hz. + * + * We want to do the calibration only once (for CPU0). CPUs connected by the + * same APIC bus have the very same bus frequency. + * + * This bit is a bit shoddy since we use the very same periodic timer interrupt + * we try to eliminate to calibrate the APIC. */ int __init calibrate_APIC_clock(void) @@ -619,95 +606,152 @@ int __init calibrate_APIC_clock(void) int i; const int LOOPS = HZ/10; - printk("calibrating APIC timer ...\n"); + printk("calibrating APIC timer for CPU%d...\n", smp_processor_id()); - /* - * Put whatever arbitrary (but long enough) timeout + /* Put whatever arbitrary (but long enough) timeout * value into the APIC clock, we just want to get the - * counter running for calibration. - */ + * counter running for calibration. */ __setup_APIC_LVTT(1000000000); - /* - * The timer chip counts down to zero. Let's wait + /* The timer chip counts down to zero. Let's wait * for a wraparound to start exact measurement: - * (the current tick might have been already half done) - */ - + * (the current tick might have been already half done) */ wait_8254_wraparound(); - /* - * We wrapped around just now. Let's start: - */ + /* We wrapped around just now. Let's start: */ rdtscll(t1); tt1 = apic_read(APIC_TMCCT); - /* - * Let's wait LOOPS wraprounds: - */ + /* Let's wait LOOPS wraprounds: */ for (i = 0; i < LOOPS; i++) wait_8254_wraparound(); tt2 = apic_read(APIC_TMCCT); rdtscll(t2); - /* - * The APIC bus clock counter is 32 bits only, it + /* The APIC bus clock counter is 32 bits only, it * might have overflown, but note that we use signed * longs, thus no extra care needed. - * - * underflown to be exact, as the timer counts down ;) - */ - + * underflown to be exact, as the timer counts down ;) */ result = (tt1-tt2)*APIC_DIVISOR/LOOPS; - printk("..... CPU clock speed is %ld.%04ld MHz.\n", + printk("..... CPU speed is %ld.%04ld MHz.\n", ((long)(t2-t1)/LOOPS)/(1000000/HZ), ((long)(t2-t1)/LOOPS)%(1000000/HZ)); - printk("..... host bus clock speed is %ld.%04ld MHz.\n", + printk("..... Bus speed is %ld.%04ld MHz.\n", result/(1000000/HZ), result%(1000000/HZ)); + /* set up multipliers for accurate timer code */ + bus_freq = result*HZ; + bus_cycle = (u32) (1000000000000LL/bus_freq); /* in pico seconds */ + bus_scale = (1000*262144)/bus_cycle; + + /* print results */ + printk("..... bus_freq = %u Hz\n", bus_freq); + printk("..... bus_cycle = %u ps\n", bus_cycle); + printk("..... bus_scale = %u \n", bus_scale); + /* reset APIC to zero timeout value */ + __setup_APIC_LVTT(0); return result; } -static unsigned int calibration_result; - +/* + * initialise the APIC timers for all CPUs + * we start with the first and find out processor frequency and bus speed + */ void __init setup_APIC_clocks (void) { printk("Using local APIC timer interrupts.\n"); using_apic_timer = 1; - __cli(); - - calibration_result = calibrate_APIC_clock(); - /* - * Now set up the timer for real. - */ - setup_APIC_timer((void *)calibration_result); - + /* calibrate CPU0 for CPU speed and BUS speed */ + bus_freq = calibrate_APIC_clock(); + /* Now set up the timer for real. */ + setup_APIC_timer((void *)bus_freq); __sti(); - /* and update all other cpus */ - smp_call_function(setup_APIC_timer, (void *)calibration_result, 1, 1); + smp_call_function(setup_APIC_timer, (void *)bus_freq, 1, 1); } #undef APIC_DIVISOR +/* + * reprogram the APIC timer. Timeoutvalue is in ns from start of boot + * returns 1 on success + * returns 0 if the timeout value is too small or in the past. + */ + + +int reprogram_ac_timer(s_time_t timeout) +{ + int cpu = smp_processor_id(); + s_time_t now; + s_time_t expire; + u64 apic_tmict; + + now = NOW(); + expire = timeout - now; /* value from now */ + + + if (expire <= 0) { + printk("APICT[%02d] Timeout value in the past %lld > %lld\n", + cpu, now, timeout); + return 0; /* timeout value in the past */ + } + + /* conversion to bus units */ + apic_tmict = (((u64)bus_scale) * expire)>>18; + + if (apic_tmict >= 0xffffffff) { + printk("APICT[%02d] Timeout value too large\n", cpu); + apic_tmict = 0xffffffff; + } + if (apic_tmict == 0) { + printk("APICT[%02d] timeout value too small\n", cpu); + return 0; + } + + /* programm timer */ + apic_write(APIC_TMICT, (unsigned long)apic_tmict); + + TRC(printk("APICT[%02d] reprog(): expire=%lld %u\n", + cpu, expire, apic_tmict)); + return 1; +} /* - * Local timer interrupt handler. It does both profiling and - * process statistics/rescheduling. - * - * We do profiling in every local tick, statistics/rescheduling - * happen only every 'profiling multiplier' ticks. The default - * multiplier is 1 and it can be changed by writing the new multiplier - * value into /proc/profile. + * Local timer interrupt handler. + * here the programmable, accurate timers are executed. + * If we are on CPU0 and we should have updated jiffies, we do this + * as well and and deal with traditional linux timers. Note, that of + * the timer APIC on CPU does not go off every 10ms or so the linux + * timers loose accuracy, but that shouldn't be a problem. */ +static s_time_t last_cpu0_tirq = 0; inline void smp_local_timer_interrupt(struct pt_regs * regs) { - update_process_times(user_mode(regs)); + int cpu = smp_processor_id(); + s_time_t diff, now; + + /* if CPU 0 do old timer stuff */ + if (cpu == 0) { + update_time(); + now = NOW(); + diff = now - last_cpu0_tirq; + /* this uses three 64bit divisions which should be avoided!! */ + if (diff >= MILLISECS(10)) { + /* update jiffies */ + (*(unsigned long *)&jiffies) += diff / MILLISECS(10); + + /* do traditional linux timers */ + do_timer(regs); + last_cpu0_tirq = now; + } + } + /* call timer function */ + do_ac_timer(); } /* @@ -732,13 +776,11 @@ void smp_apic_timer_interrupt(struct pt_regs * regs) /* * NOTE! We'd better ACK the irq immediately, * because timer handling can be slow. + * XXX is this save? */ ack_APIC_irq(); - /* - * update_process_times() expects us to have done irq_enter(). - * Besides, if we don't timer interrupts ignore the global - * interrupt lock, which is the WrongThing (tm) to do. - */ + + /* call the local handler */ irq_enter(cpu, 0); smp_local_timer_interrupt(regs); irq_exit(cpu, 0); @@ -809,7 +851,8 @@ int __init APIC_init_uniprocessor (void) /* * Complain if the BIOS pretends there is one. */ - if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { + if (!cpu_has_apic&&APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) + { printk("BIOS bug, local APIC #%d not detected!...\n", boot_cpu_physical_apicid); return -1; diff --git a/xen-2.4.16/arch/i386/setup.c b/xen-2.4.16/arch/i386/setup.c index a03c4bac4d..924d0ce2a1 100644 --- a/xen-2.4.16/arch/i386/setup.c +++ b/xen-2.4.16/arch/i386/setup.c @@ -280,6 +280,7 @@ void __init start_of_day(void) extern void tqueue_bh(void); extern void immediate_bh(void); extern void init_timervecs(void); + extern void ac_timer_init(void); extern int setup_network_devices(void); extern void net_init(void); @@ -303,6 +304,7 @@ void __init start_of_day(void) paging_init(); /* not much here now, but sets up fixmap */ if ( smp_found_config ) get_smp_config(); domain_init(); + scheduler_init(); trap_init(); init_IRQ(); /* installs simple interrupt wrappers. Starts HZ clock. */ time_init(); /* installs software handler for HZ clock. */ @@ -323,6 +325,9 @@ void __init start_of_day(void) * fall thru to 8259A if we have to (but slower). */ #endif + init_xeno_time(); /* initialise the time */ + ac_timer_init(); /* init accurate timers */ + schedulers_start(); /* start scheduler for each CPU */ sti(); diff --git a/xen-2.4.16/arch/i386/time.c b/xen-2.4.16/arch/i386/time.c index 0b7d3ead4e..4b4ac515cb 100644 --- a/xen-2.4.16/arch/i386/time.c +++ b/xen-2.4.16/arch/i386/time.c @@ -1,3 +1,22 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: i386/time.c + * Author: + * Changes: + * + * Date: Jan 2003 + * + * Environment: Xen Hypervisor + * Description: modified version of Linux' time.c + * implement system and wall clock time. + * + **************************************************************************** + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ + **************************************************************************** + */ /* * linux/arch/i386/kernel/time.c * @@ -19,12 +38,22 @@ #include <asm/mpspec.h> #include <asm/processor.h> #include <asm/fixmap.h> +#include <asm/mc146818rtc.h> + +#ifdef TIME_TRACE +#define TRC(_x) _x +#else +#define TRC(_x) +#endif + + unsigned long cpu_khz; /* Detected as we calibrate the TSC */ unsigned long ticks_per_usec; /* TSC ticks per microsecond. */ spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; + /* * timer_interrupt() needs to keep up the real-time clock, * as well as call the "do_timer()" routine every clocktick @@ -62,14 +91,15 @@ static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) do_timer_interrupt(irq, NULL, regs); } -static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL}; +static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0, + "timer", NULL, NULL}; /* ------ Calibrate the TSC ------- * Return processor ticks per second / CALIBRATE_FRAC. */ #define CLOCK_TICK_RATE 1193180 /* system crystal frequency (Hz) */ -#define CALIBRATE_FRAC 20 /* calibrate over 50ms */ +#define CALIBRATE_FRAC 20 /* calibrate over 50ms */ #define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC) static unsigned long __init calibrate_tsc(void) @@ -126,6 +156,207 @@ static unsigned long __init calibrate_tsc(void) return 0; } +/*************************************************************************** + * CMOS Timer functions + ***************************************************************************/ + +/* Converts Gregorian date to seconds since 1970-01-01 00:00:00. + * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 + * => year=1980, mon=12, day=31, hour=23, min=59, sec=59. + * + * [For the Julian calendar (which was used in Russia before 1917, + * Britain & colonies before 1752, anywhere else before 1582, + * and is still in use by some communities) leave out the + * -year/100+year/400 terms, and add 10.] + * + * This algorithm was first published by Gauss (I think). + * + * WARNING: this function will overflow on 2106-02-07 06:28:16 on + * machines were long is 32-bit! (However, as time_t is signed, we + * will already get problems at other places on 2038-01-19 03:14:08) + */ +static inline unsigned long +mktime (unsigned int year, unsigned int mon, + unsigned int day, unsigned int hour, + unsigned int min, unsigned int sec) +{ + if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */ + mon += 12; /* Puts Feb last since it has leap day */ + year -= 1; + } + return ((((unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day)+ + year*365 - 719499 + )*24 + hour /* now have hours */ + )*60 + min /* now have minutes */ + )*60 + sec; /* finally seconds */ +} + +static unsigned long get_cmos_time(void) +{ + unsigned int year, mon, day, hour, min, sec; + int i; + + spin_lock(&rtc_lock); + /* The Linux interpretation of the CMOS clock register contents: + * When the Update-In-Progress (UIP) flag goes from 1 to 0, the + * RTC registers show the second which has precisely just started. + * Let's hope other operating systems interpret the RTC the same way. + */ + /* read RTC exactly on falling edge of update flag */ + for (i = 0 ; i < 1000000 ; i++) /* may take up to 1 second... */ + if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) + break; + for (i = 0 ; i < 1000000 ; i++) /* must try at least 2.228 ms */ + if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)) + break; + do { /* Isn't this overkill ? UIP above should guarantee consistency */ + sec = CMOS_READ(RTC_SECONDS); + min = CMOS_READ(RTC_MINUTES); + hour = CMOS_READ(RTC_HOURS); + day = CMOS_READ(RTC_DAY_OF_MONTH); + mon = CMOS_READ(RTC_MONTH); + year = CMOS_READ(RTC_YEAR); + } while (sec != CMOS_READ(RTC_SECONDS)); + if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) + { + BCD_TO_BIN(sec); + BCD_TO_BIN(min); + BCD_TO_BIN(hour); + BCD_TO_BIN(day); + BCD_TO_BIN(mon); + BCD_TO_BIN(year); + } + spin_unlock(&rtc_lock); + if ((year += 1900) < 1970) + year += 100; + printk(".... CMOS Clock: %02d/%02d/%04d %02d:%02d:%02d\n", + day, mon, year, hour, min, sec); + return mktime(year, mon, day, hour, min, sec); +} + +/*************************************************************************** + * System time + ***************************************************************************/ +u32 stime_pcc; /* cycle counter value at last timer irq */ +u32 stime_scale; /* scale factor for converting cc to ns */ +s_time_t stime_now; /* time in ns at last timer IRQ */ + +/*************************************************************************** + * Wall Clock time + ***************************************************************************/ +static rwlock_t wctime_lock = RW_LOCK_UNLOCKED; +struct timeval wall_clock_time; /* wall clock time at last update */ +s_time_t wctime_st; /* system time at last update */ + +void do_gettimeofday(struct timeval *tv) +{ + unsigned long flags; + unsigned long usec, sec; + + read_lock_irqsave(&wctime_lock, flags); + + usec = ((unsigned long)(NOW() - wctime_st))/1000; + sec = wall_clock_time.tv_sec; + usec += wall_clock_time.tv_usec; + read_unlock_irqrestore(&wctime_lock, flags); + + while (usec >= 1000000) { + usec -= 1000000; + sec++; + } + tv->tv_sec = sec; + tv->tv_usec = usec; +} + +void do_settimeofday(struct timeval *tv) +{ + printk("XXX: do_settimeofday not implemented\n"); +} + +/*************************************************************************** + * Update times + ***************************************************************************/ + +/* update hypervisors notion of time */ +void update_time(void) { + u32 new_pcc; + s_time_t new_st; + unsigned long usec; + + /* update system time */ + rdtscl(new_pcc); + stime_now = stime_now+((((s_time_t)stime_scale)* + (new_pcc-stime_pcc))>>10); + stime_pcc=new_pcc; + + /* update wall clock time */ + write_lock(&wctime_lock); + new_st = NOW(); + usec = ((unsigned long)(new_st - wctime_st))/1000; + usec += wall_clock_time.tv_usec; + while (usec >= 1000000) { + usec -= 1000000; + wall_clock_time.tv_sec++; + } + wall_clock_time.tv_usec = usec; + wctime_st = new_st; + write_unlock(&wctime_lock); + + TRC(printk("TIME[%02d] update time: stime_now=%lld now=%lld, wct=%ld:%ld\n" + cpu, stime_now, new_st, wall_clock_time.tv_sec, + wall_clock_time.tv_usec)); +} + +/* update a domains notion of time */ +void update_dom_time(shared_info_t *si) +{ + unsigned long flags; + read_lock_irqsave(&wctime_lock, flags); + si->system_time = stime_now; + si->st_timestamp = stime_pcc; + si->tv_sec = wall_clock_time.tv_sec; + si->tv_usec = wall_clock_time.tv_usec; + si->wc_timestamp = wctime_st; + read_unlock_irqrestore(&wctime_lock, flags); +} + +/*************************************************************************** + * Init Xeno Time + * This has to be done after all CPUs have been booted + ***************************************************************************/ +int __init init_xeno_time() +{ + int cpu = smp_processor_id(); + u32 cpu_cycle; /* time of one cpu cyle in pico-seconds */ + + /* System Time */ + cpu_cycle = (u32) (1000000000LL/cpu_khz); /* in pico seconds */ + stime_scale = (cpu_cycle * 1024) / 1000; + + stime_now = (s_time_t)0; + rdtscl(stime_pcc); + + printk("Init Time[%02d]:\n", cpu); + printk(".... System Time: %lldns\n", NOW()); + printk(".... stime_scale: %u\n", stime_scale); + printk(".... stime_pcc: %u\n", stime_pcc); + + /* Wall Clock time */ + wall_clock_time.tv_sec = get_cmos_time(); + wall_clock_time.tv_usec = 0; + wctime_st = NOW(); + + printk(".... Wall Clock: %lds %ldus\n", wall_clock_time.tv_sec, + wall_clock_time.tv_usec); + printk(".... wctime_st: %lld\n", wctime_st); + return 0; +} + + +/*************************************************************************** + * Init + ***************************************************************************/ + void __init time_init(void) { unsigned long ticks_per_frac = calibrate_tsc(); @@ -136,6 +367,7 @@ void __init time_init(void) ticks_per_usec = ticks_per_frac / (1000000/CALIBRATE_FRAC); cpu_khz = ticks_per_frac / (1000/CALIBRATE_FRAC); + printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); diff --git a/xen-2.4.16/common/ac_timer.c b/xen-2.4.16/common/ac_timer.c new file mode 100644 index 0000000000..dec3f34646 --- /dev/null +++ b/xen-2.4.16/common/ac_timer.c @@ -0,0 +1,304 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: ac_timer.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Nov 2002 + * + * Environment: Xen Hypervisor + * Description: Accurate timer for the Hypervisor + * + **************************************************************************** + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ + **************************************************************************** + */ + +#include <xeno/config.h> +#include <xeno/init.h> +#include <xeno/types.h> +#include <xeno/errno.h> +#include <xeno/sched.h> +#include <xeno/lib.h> +#include <xeno/config.h> +#include <xeno/smp.h> +#include <xeno/init.h> + +#include <xeno/time.h> +#include <xeno/ac_timer.h> + +#include <asm/system.h> +#include <asm/desc.h> + + +#undef AC_TIMER_TRACE +#undef AC_TIMER_STATS + +#ifdef AC_TIMER_TRACE +#define TRC(_x) _x +#else +#define TRC(_x) +#endif + +/* A timer list per CPU */ +typedef struct ac_timers_st +{ + spinlock_t lock; + struct list_head timers; + struct ac_timer *prev, *curr; +} __cacheline_aligned ac_timers_t; +static ac_timers_t ac_timers[NR_CPUS]; + +#ifdef AC_TIMER_STATS +#define BUCKETS 1000 +#define MAX_STATS +typedef struct act_stats_st +{ + u32 count; + u32 times[2*(BUCKETS)]; +} __cacheline_aligned act_stats_t; +static act_stats_t act_stats[NR_CPUS]; + +#endif + +/* local prototypes */ +static int detach_ac_timer(struct ac_timer *timer); +static void ac_timer_debug(unsigned long); + +/* + * add a timer. + * return value: + * 0: success + * 1: failure, timer in the past or timeout value to small + * -1: failure, timer uninitialised + * fail + */ +int add_ac_timer(struct ac_timer *timer) +{ + int cpu = smp_processor_id(); + unsigned long flags; + struct list_head *tmp, *prev; + struct ac_timer *t; + s_time_t now; + + /* sanity checks */ + + /* make sure timeout value is in the future */ + now = NOW(); + TRC(printk("ACT [%02d] add(): now=%lld timo=%lld\n", + cpu, now, timer->expires)); + if (timer->expires <= now) { + printk("ACT[%02d] add_ac_timer(): timeout value in the past\n", cpu); + return 1; + } + + local_irq_save(flags); + + /* check if timer would be inserted at start of list */ + if ((list_empty(&ac_timers[cpu].timers)) || + (timer->expires < + (list_entry(&ac_timers[cpu].timers, struct ac_timer, timer_list))->expires)) { + + TRC(printk("ACT [%02d] add(): add at head\n", cpu)); + /* Reprogramm and add to head of list */ + if (!reprogram_ac_timer(timer->expires)) { + /* failed */ + TRC(printk("ACT [%02d] add(): add at head failed\n", cpu)); + local_irq_restore(flags); + return 1; + } + list_add(&timer->timer_list, &ac_timers[cpu].timers); + + } else { + /* find correct entry and add timer */ + prev = &ac_timers[cpu].timers; + list_for_each(tmp, &ac_timers[cpu].timers) { + t = list_entry(tmp, struct ac_timer, timer_list); + if (t->expires < timer->expires) { + list_add(&timer->timer_list, prev); + TRC(printk("ACT [%02d] add(): added between %lld and %lld\n", + cpu, + list_entry(prev,struct ac_timer,timer_list)->expires, + list_entry(tmp,struct ac_timer,timer_list)->expires)); + break; + } + prev = tmp; + } + } + local_irq_restore(flags); + return 0; +} + +/* + * remove a timer + * return values: + * 0: success + * -1: bogus timer + */ +static int detach_ac_timer(struct ac_timer *timer) +{ + TRC(int cpu = smp_processor_id()); + TRC(printk("ACT [%02d] detach(): \n", cpu)); + list_del(&timer->timer_list); + timer->timer_list.next = NULL; + return 0; +} + +/* + * remove a timer + * return values: + * 0: success + * -1: bogus timer + */ +int rem_ac_timer(struct ac_timer *timer) +{ + int res; + unsigned long flags; + TRC(int cpu = smp_processor_id()); + + TRC(printk("ACT [%02d] remove(): timo=%lld \n", cpu, timer->expires)); + /* sanity checks */ + + local_irq_save(flags); + res = detach_ac_timer(timer); + local_irq_restore(flags); + return res; +} + +/* + * modify a timer, i.e., set a new timeout value + * return value: + * 0: sucess + * -1: error + */ +int mod_ac_timer(struct ac_timer *timer, s_time_t new_time) +{ + if (rem_ac_timer(timer) != 0) + return -1; + timer->expires = new_time; + if (add_ac_timer(timer) != 0) + return -1; + return 0; +} + +/* + * do_ac_timer + * deal with timeouts and run the handlers + */ +void do_ac_timer(void) +{ + int cpu = smp_processor_id(); + unsigned long flags; + s_time_t now; + struct ac_timer *t; + struct list_head *tmp; + + local_irq_save(flags); + + do_timer_again: + + now = NOW(); + TRC(printk("ACT [%02d] do(): now=%lld\n", cpu, now)); + + /* Sanity checks */ + /* empty time list */ + if (list_empty(&ac_timers[cpu].timers)) { + printk("ACT[%02d] do_ac_timer(): timer irq without timer\n", cpu); + local_irq_restore(flags); + return; + } + + + /* execute the head of timer queue */ + t = list_entry(ac_timers[cpu].timers.next, struct ac_timer, timer_list); + detach_ac_timer(t); + + +#ifdef AC_TIMER_STATS + { + s32 diff; + u32 i; + diff = ((s32)(now - t->expires)) / 1000; /* delta in us */ + if (diff < -BUCKETS) + diff = -BUCKETS; + else if (diff > BUCKETS) + diff = BUCKETS; + act_stats[cpu].times[diff+BUCKETS]++; + act_stats[cpu].count++; + + if (act_stats[cpu].count >= 5000) { + printk("ACT Stats\n"); + for (i=0; i < 2*BUCKETS; i++) { + if (act_stats[cpu].times[i] != 0) + printk("ACT [%02d]: %3dus: %5d\n", + cpu,i-BUCKETS, act_stats[cpu].times[i]); + act_stats[cpu].times[i]=0; + } + act_stats[cpu].count = 0; + printk("\n"); + } + } +#endif + + + + if (t->expires > now) { + //printk("ACT [%02d] do(): irq too early (%lld ns)\n", + // cpu, now - t->expires ); + } + if (t->function != NULL) + t->function(t->data); + + + /* check if there are other timer functions on the list */ + now = NOW(); + if (!list_empty(&ac_timers[cpu].timers)) { + list_for_each(tmp, &ac_timers[cpu].timers) { + t = list_entry(tmp, struct ac_timer, timer_list); + TRC(printk("ACT [%02d] do(): now=%lld timo=%lld\n", + cpu, now, t->expires)); + if (t->expires <= now) { + detach_ac_timer(t); + if (t->function != NULL) + t->function(t->data); + now = NOW(); + } else { + TRC(printk("ACT [%02d] do(): break1\n", cpu)); + break; + } + } + } + + /* If list not empty reprogramm timer to new head of list */ + if (!list_empty(&ac_timers[cpu].timers)) { + t = list_entry(ac_timers[cpu].timers.next,struct ac_timer,timer_list); + if (t->expires > 0) { + TRC(printk("ACT [%02d] do(): reprog timo=%lld\n",cpu,t->expires)); + if (!reprogram_ac_timer(t->expires)) { + TRC(printk("ACT [%02d] do(): again\n", cpu)); + goto do_timer_again; + } + } + } + local_irq_restore(flags); +} + +/* + * init + */ +void __init ac_timer_init(void) +{ + int i; + + printk ("ACT: Initialising Accurate timers\n"); + + for (i = 0; i < NR_CPUS; i++) + { + INIT_LIST_HEAD(&ac_timers[i].timers); + spin_lock_init(&ac_timers[i].lock); + } + /* ac_timer_debug(0); */ +} diff --git a/xen-2.4.16/common/domain.c b/xen-2.4.16/common/domain.c index 9608894832..19a2f8119b 100644 --- a/xen-2.4.16/common/domain.c +++ b/xen-2.4.16/common/domain.c @@ -25,24 +25,9 @@ extern unsigned char *cmdline; rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; -schedule_data_t schedule_data[NR_CPUS]; - -int wake_up(struct task_struct *p) -{ - unsigned long flags; - int ret = 0; - spin_lock_irqsave(&schedule_data[p->processor].lock, flags); - if ( __task_on_runqueue(p) ) goto out; - p->state = TASK_RUNNING; - __add_to_runqueue(p); - ret = 1; - - out: - spin_unlock_irqrestore(&schedule_data[p->processor].lock, flags); - return ret; -} - - +/* + * create a new domain + */ struct task_struct *do_newdomain(void) { int retval; @@ -80,97 +65,6 @@ struct task_struct *do_newdomain(void) return(p); } - -void reschedule(struct task_struct *p) -{ - int cpu = p->processor; - struct task_struct *curr; - unsigned long flags; - - if ( p->has_cpu ) return; - - spin_lock_irqsave(&schedule_data[cpu].lock, flags); - curr = schedule_data[cpu].curr; - if ( is_idle_task(curr) ) - { - set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events); - spin_unlock_irqrestore(&schedule_data[cpu].lock, flags); -#ifdef CONFIG_SMP - if ( cpu != smp_processor_id() ) smp_send_event_check_cpu(cpu); -#endif - } - else - { - spin_unlock_irqrestore(&schedule_data[cpu].lock, flags); - } -} - - -static void process_timeout(unsigned long __data) -{ - struct task_struct * p = (struct task_struct *) __data; - wake_up(p); -} - -long schedule_timeout(long timeout) -{ - struct timer_list timer; - unsigned long expire; - - switch (timeout) - { - case MAX_SCHEDULE_TIMEOUT: - /* - * These two special cases are useful to be comfortable in the caller. - * Nothing more. We could take MAX_SCHEDULE_TIMEOUT from one of the - * negative value but I' d like to return a valid offset (>=0) to allow - * the caller to do everything it want with the retval. - */ - schedule(); - goto out; - default: - /* - * Another bit of PARANOID. Note that the retval will be 0 since no - * piece of kernel is supposed to do a check for a negative retval of - * schedule_timeout() (since it should never happens anyway). You just - * have the printk() that will tell you if something is gone wrong and - * where. - */ - if (timeout < 0) - { - printk(KERN_ERR "schedule_timeout: wrong timeout " - "value %lx from %p\n", timeout, - __builtin_return_address(0)); - current->state = TASK_RUNNING; - goto out; - } - } - - expire = timeout + jiffies; - - init_timer(&timer); - timer.expires = expire; - timer.data = (unsigned long) current; - timer.function = process_timeout; - - add_timer(&timer); - schedule(); - del_timer_sync(&timer); - - timeout = expire - jiffies; - - out: - return timeout < 0 ? 0 : timeout; -} - - -long do_yield(void) -{ - current->state = TASK_INTERRUPTIBLE; - schedule(); - return 0; -} - /* Get a pointer to the specified domain. Consider replacing this * with a hash lookup later. * @@ -245,7 +139,7 @@ long kill_other_domain(unsigned int dom) /* Release resources belonging to task @p. */ void release_task(struct task_struct *p) { - ASSERT(!__task_on_runqueue(p)); + //ASSERT(!__task_on_runqueue(p)); ASSERT(p->state == TASK_DYING); ASSERT(!p->has_cpu); write_lock_irq(&tasklist_lock); @@ -267,78 +161,7 @@ void release_task(struct task_struct *p) } -asmlinkage void schedule(void) -{ - struct task_struct *prev, *next; - struct list_head *tmp; - int this_cpu; - - need_resched_back: - prev = current; - this_cpu = prev->processor; - - spin_lock_irq(&schedule_data[this_cpu].lock); - - ASSERT(!in_interrupt()); - ASSERT(__task_on_runqueue(prev)); - - if ( !prev->counter ) - { - prev->counter = 2; - __move_last_runqueue(prev); - } - - switch ( prev->state ) - { - case TASK_INTERRUPTIBLE: - if ( signal_pending(prev) ) - { - prev->state = TASK_RUNNING; - break; - } - default: - __del_from_runqueue(prev); - case TASK_RUNNING:; - } - clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events); - - /* Round-robin, skipping idle where possible. */ - next = NULL; - list_for_each(tmp, &schedule_data[smp_processor_id()].runqueue) { - next = list_entry(tmp, struct task_struct, run_list); - if ( next->domain != IDLE_DOMAIN_ID ) break; - } - - prev->has_cpu = 0; - next->has_cpu = 1; - - schedule_data[this_cpu].prev = prev; - schedule_data[this_cpu].curr = next; - - spin_unlock_irq(&schedule_data[this_cpu].lock); - - if ( unlikely(prev == next) ) - { - /* We won't go through the normal tail, so do this by hand */ - prev->policy &= ~SCHED_YIELD; - goto same_process; - } - - prepare_to_switch(); - switch_to(prev, next); - prev = schedule_data[this_cpu].prev; - - prev->policy &= ~SCHED_YIELD; - if ( prev->state == TASK_DYING ) release_task(prev); - - same_process: - if ( test_bit(_HYP_EVENT_NEED_RESCHED, ¤t->hyp_events) ) - goto need_resched_back; - return; -} - - -unsigned int alloc_new_dom_mem(struct task_struct *p, unsigned int kbytes) +static unsigned int alloc_new_dom_mem(struct task_struct *p, unsigned int kbytes) { struct list_head *temp; struct pfn_info *pf, *pf_head; @@ -813,14 +636,15 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params) return 0; } + void __init domain_init(void) { - int i; - for ( i = 0; i < NR_CPUS; i++ ) - { - INIT_LIST_HEAD(&schedule_data[i].runqueue); - spin_lock_init(&schedule_data[i].lock); - schedule_data[i].prev = &idle0_task; - schedule_data[i].curr = &idle0_task; + printk("Initialising domains\n"); +// scheduler_init(); +} + + + +#if 0 } } diff --git a/xen-2.4.16/common/lib.c b/xen-2.4.16/common/lib.c index 2a55b992e8..ae75196ffd 100644 --- a/xen-2.4.16/common/lib.c +++ b/xen-2.4.16/common/lib.c @@ -169,3 +169,358 @@ unsigned char *quad_to_str(unsigned long q, unsigned char *s) } +/* a couple of 64 bit operations ported from freebsd */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $ + */ + +#include <asm/types.h> + + +/* + * Depending on the desired operation, we view a `long long' (aka quad_t) in + * one or more of the following formats. + */ +union uu { + s64 q; /* as a (signed) quad */ + s64 uq; /* as an unsigned quad */ + long sl[2]; /* as two signed longs */ + unsigned long ul[2]; /* as two unsigned longs */ +}; +/* XXX RN: Yuck hardcoded endianess :) */ +#define _QUAD_HIGHWORD 1 +#define _QUAD_LOWWORD 0 +/* + * Define high and low longwords. + */ +#define H _QUAD_HIGHWORD +#define L _QUAD_LOWWORD + +/* + * Total number of bits in a quad_t and in the pieces that make it up. + * These are used for shifting, and also below for halfword extraction + * and assembly. + */ +#define CHAR_BIT 8 /* number of bits in a char */ +#define QUAD_BITS (sizeof(s64) * CHAR_BIT) +#define LONG_BITS (sizeof(long) * CHAR_BIT) +#define HALF_BITS (sizeof(long) * CHAR_BIT / 2) + +/* + * Extract high and low shortwords from longword, and move low shortword of + * longword to upper half of long, i.e., produce the upper longword of + * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be u_long.) + * + * These are used in the multiply code, to split a longword into upper + * and lower halves, and to reassemble a product as a quad_t, shifted left + * (sizeof(long)*CHAR_BIT/2). + */ +#define HHALF(x) ((x) >> HALF_BITS) +#define LHALF(x) ((x) & ((1 << HALF_BITS) - 1)) +#define LHUP(x) ((x) << HALF_BITS) + +/* + * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), + * section 4.3.1, pp. 257--259. + */ +#define B (1 << HALF_BITS) /* digit base */ + +/* Combine two `digits' to make a single two-digit number. */ +#define COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b)) + +/* select a type for digits in base B: use unsigned short if they fit */ +#if ULONG_MAX == 0xffffffff && USHRT_MAX >= 0xffff +typedef unsigned short digit; +#else +typedef u_long digit; +#endif + +/* + * Shift p[0]..p[len] left `sh' bits, ignoring any bits that + * `fall out' the left (there never will be any such anyway). + * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. + */ +static void +shl(register digit *p, register int len, register int sh) +{ + register int i; + + for (i = 0; i < len; i++) + p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh)); + p[i] = LHALF(p[i] << sh); +} + +/* + * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. + * + * We do this in base 2-sup-HALF_BITS, so that all intermediate products + * fit within u_long. As a consequence, the maximum length dividend and + * divisor are 4 `digits' in this base (they are shorter if they have + * leading zeros). + */ +u64 +__qdivrem(uq, vq, arq) + u64 uq, vq, *arq; +{ + union uu tmp; + digit *u, *v, *q; + register digit v1, v2; + u_long qhat, rhat, t; + int m, n, d, j, i; + digit uspace[5], vspace[5], qspace[5]; + + /* + * Take care of special cases: divide by zero, and u < v. + */ + if (vq == 0) { + /* divide by zero. */ + static volatile const unsigned int zero = 0; + + tmp.ul[H] = tmp.ul[L] = 1 / zero; + if (arq) + *arq = uq; + return (tmp.q); + } + if (uq < vq) { + if (arq) + *arq = uq; + return (0); + } + u = &uspace[0]; + v = &vspace[0]; + q = &qspace[0]; + + /* + * Break dividend and divisor into digits in base B, then + * count leading zeros to determine m and n. When done, we + * will have: + * u = (u[1]u[2]...u[m+n]) sub B + * v = (v[1]v[2]...v[n]) sub B + * v[1] != 0 + * 1 < n <= 4 (if n = 1, we use a different division algorithm) + * m >= 0 (otherwise u < v, which we already checked) + * m + n = 4 + * and thus + * m = 4 - n <= 2 + */ + tmp.uq = uq; + u[0] = 0; + u[1] = HHALF(tmp.ul[H]); + u[2] = LHALF(tmp.ul[H]); + u[3] = HHALF(tmp.ul[L]); + u[4] = LHALF(tmp.ul[L]); + tmp.uq = vq; + v[1] = HHALF(tmp.ul[H]); + v[2] = LHALF(tmp.ul[H]); + v[3] = HHALF(tmp.ul[L]); + v[4] = LHALF(tmp.ul[L]); + for (n = 4; v[1] == 0; v++) { + if (--n == 1) { + u_long rbj; /* r*B+u[j] (not root boy jim) */ + digit q1, q2, q3, q4; + + /* + * Change of plan, per exercise 16. + * r = 0; + * for j = 1..4: + * q[j] = floor((r*B + u[j]) / v), + * r = (r*B + u[j]) % v; + * We unroll this completely here. + */ + t = v[2]; /* nonzero, by definition */ + q1 = u[1] / t; + rbj = COMBINE(u[1] % t, u[2]); + q2 = rbj / t; + rbj = COMBINE(rbj % t, u[3]); + q3 = rbj / t; + rbj = COMBINE(rbj % t, u[4]); + q4 = rbj / t; + if (arq) + *arq = rbj % t; + tmp.ul[H] = COMBINE(q1, q2); + tmp.ul[L] = COMBINE(q3, q4); + return (tmp.q); + } + } + + /* + * By adjusting q once we determine m, we can guarantee that + * there is a complete four-digit quotient at &qspace[1] when + * we finally stop. + */ + for (m = 4 - n; u[1] == 0; u++) + m--; + for (i = 4 - m; --i >= 0;) + q[i] = 0; + q += 4 - m; + + /* + * Here we run Program D, translated from MIX to C and acquiring + * a few minor changes. + * + * D1: choose multiplier 1 << d to ensure v[1] >= B/2. + */ + d = 0; + for (t = v[1]; t < B / 2; t <<= 1) + d++; + if (d > 0) { + shl(&u[0], m + n, d); /* u <<= d */ + shl(&v[1], n - 1, d); /* v <<= d */ + } + /* + * D2: j = 0. + */ + j = 0; + v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ + v2 = v[2]; /* for D3 */ + do { + register digit uj0, uj1, uj2; + + /* + * D3: Calculate qhat (\^q, in TeX notation). + * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and + * let rhat = (u[j]*B + u[j+1]) mod v[1]. + * While rhat < B and v[2]*qhat > rhat*B+u[j+2], + * decrement qhat and increase rhat correspondingly. + * Note that if rhat >= B, v[2]*qhat < rhat*B. + */ + uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ + uj1 = u[j + 1]; /* for D3 only */ + uj2 = u[j + 2]; /* for D3 only */ + if (uj0 == v1) { + qhat = B; + rhat = uj1; + goto qhat_too_big; + } else { + u_long nn = COMBINE(uj0, uj1); + qhat = nn / v1; + rhat = nn % v1; + } + while (v2 * qhat > COMBINE(rhat, uj2)) { + qhat_too_big: + qhat--; + if ((rhat += v1) >= B) + break; + } + /* + * D4: Multiply and subtract. + * The variable `t' holds any borrows across the loop. + * We split this up so that we do not require v[0] = 0, + * and to eliminate a final special case. + */ + for (t = 0, i = n; i > 0; i--) { + t = u[i + j] - v[i] * qhat - t; + u[i + j] = LHALF(t); + t = (B - HHALF(t)) & (B - 1); + } + t = u[j] - t; + u[j] = LHALF(t); + /* + * D5: test remainder. + * There is a borrow if and only if HHALF(t) is nonzero; + * in that (rare) case, qhat was too large (by exactly 1). + * Fix it by adding v[1..n] to u[j..j+n]. + */ + if (HHALF(t)) { + qhat--; + for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ + t += u[i + j] + v[i]; + u[i + j] = LHALF(t); + t = HHALF(t); + } + u[j] = LHALF(u[j] + t); + } + q[j] = qhat; + } while (++j <= m); /* D7: loop on j. */ + + /* + * If caller wants the remainder, we have to calculate it as + * u[m..m+n] >> d (this is at most n digits and thus fits in + * u[m+1..m+n], but we may need more source digits). + */ + if (arq) { + if (d) { + for (i = m + n; i > m; --i) + u[i] = (u[i] >> d) | + LHALF(u[i - 1] << (HALF_BITS - d)); + u[i] = 0; + } + tmp.ul[H] = COMBINE(uspace[1], uspace[2]); + tmp.ul[L] = COMBINE(uspace[3], uspace[4]); + *arq = tmp.q; + } + + tmp.ul[H] = COMBINE(qspace[1], qspace[2]); + tmp.ul[L] = COMBINE(qspace[3], qspace[4]); + return (tmp.q); +} + + +/* + * Divide two signed quads. + * ??? if -1/2 should produce -1 on this machine, this code is wrong + */ +s64 +__divdi3(s64 a, s64 b) +{ + u64 ua, ub, uq; + int neg; + + if (a < 0) + ua = -(u64)a, neg = 1; + else + ua = a, neg = 0; + if (b < 0) + ub = -(u64)b, neg ^= 1; + else + ub = b; + uq = __qdivrem(ua, ub, (u64 *)0); + return (neg ? -uq : uq); +} + +/* + * Divide two unsigned quads. + */ +u64 +__udivdi3(a, b) + u64 a, b; +{ + + return (__qdivrem(a, b, (u64 *)0)); +} diff --git a/xen-2.4.16/common/schedule.c b/xen-2.4.16/common/schedule.c new file mode 100644 index 0000000000..a835151cc5 --- /dev/null +++ b/xen-2.4.16/common/schedule.c @@ -0,0 +1,345 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: schedule.c + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Nov 2002 + * + * Environment: Xen Hypervisor + * Description: CPU scheduling + * partially moved from domain.c + * + **************************************************************************** + * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ + **************************************************************************** + */ + +#include <xeno/config.h> +#include <xeno/init.h> +#include <xeno/lib.h> +#include <xeno/sched.h> +#include <xeno/delay.h> +#include <xeno/event.h> +#include <xeno/time.h> +#include <xeno/ac_timer.h> + +#undef SCHEDULER_TRACE +#ifdef SCHEDULER_TRACE +#define TRC(_x) _x +#else +#define TRC(_x) +#endif + +/* + * per CPU data for the scheduler. + */ +typedef struct schedule_data_st +{ + spinlock_t lock; + struct list_head runqueue; + struct task_struct *prev, *curr; +} __cacheline_aligned schedule_data_t; +schedule_data_t schedule_data[NR_CPUS]; + +static __cacheline_aligned struct ac_timer s_timer[NR_CPUS]; + +/* + * Some convenience functions + */ + +static inline void __add_to_runqueue(struct task_struct * p) +{ + list_add(&p->run_list, &schedule_data[p->processor].runqueue); +} + +static inline void __move_last_runqueue(struct task_struct * p) +{ + list_del(&p->run_list); + list_add_tail(&p->run_list, &schedule_data[p->processor].runqueue); +} + +static inline void __move_first_runqueue(struct task_struct * p) +{ + list_del(&p->run_list); + list_add(&p->run_list, &schedule_data[p->processor].runqueue); +} + +static inline void __del_from_runqueue(struct task_struct * p) +{ + list_del(&p->run_list); + p->run_list.next = NULL; +} + +static inline int __task_on_runqueue(struct task_struct *p) +{ + return (p->run_list.next != NULL); +} + + +/* + * Add a new domain to the scheduler + */ +void sched_add_domain(struct task_struct *p) +{ + p->state = TASK_UNINTERRUPTIBLE; +} + +/* + * Remove domain to the scheduler + */ +void sched_rem_domain(struct task_struct *p) +{ + p->state = TASK_DYING; +} + + +/* + * wake up a domain which had been sleeping + */ +int wake_up(struct task_struct *p) +{ + unsigned long flags; + int ret = 0; + spin_lock_irqsave(&schedule_data[p->processor].lock, flags); + if ( __task_on_runqueue(p) ) goto out; + p->state = TASK_RUNNING; + __add_to_runqueue(p); + ret = 1; + + out: + spin_unlock_irqrestore(&schedule_data[p->processor].lock, flags); + return ret; +} + +static void process_timeout(unsigned long __data) +{ + struct task_struct * p = (struct task_struct *) __data; + wake_up(p); +} + +long schedule_timeout(long timeout) +{ + struct timer_list timer; + unsigned long expire; + + switch (timeout) + { + case MAX_SCHEDULE_TIMEOUT: + /* + * These two special cases are useful to be comfortable in the caller. + * Nothing more. We could take MAX_SCHEDULE_TIMEOUT from one of the + * negative value but I' d like to return a valid offset (>=0) to allow + * the caller to do everything it want with the retval. + */ + schedule(); + goto out; + default: + /* + * Another bit of PARANOID. Note that the retval will be 0 since no + * piece of kernel is supposed to do a check for a negative retval of + * schedule_timeout() (since it should never happens anyway). You just + * have the printk() that will tell you if something is gone wrong and + * where. + */ + if (timeout < 0) + { + printk(KERN_ERR "schedule_timeout: wrong timeout " + "value %lx from %p\n", timeout, + __builtin_return_address(0)); + current->state = TASK_RUNNING; + goto out; + } + } + + expire = timeout + jiffies; + + init_timer(&timer); + timer.expires = expire; + timer.data = (unsigned long) current; + timer.function = process_timeout; + + add_timer(&timer); + schedule(); + del_timer_sync(&timer); + + timeout = expire - jiffies; + + out: + return timeout < 0 ? 0 : timeout; +} + +/* RN: XXX turn this into do_halt() */ +/* + * yield the current process + */ +long do_sched_op(void) +{ + current->state = TASK_INTERRUPTIBLE; + schedule(); + return 0; +} + +/* + * + */ +void reschedule(struct task_struct *p) +{ + int cpu = p->processor; + struct task_struct *curr; + unsigned long flags; + + if (p->has_cpu) + return; + + spin_lock_irqsave(&schedule_data[cpu].lock, flags); + curr = schedule_data[cpu].curr; + if (is_idle_task(curr)) { + set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events); + spin_unlock_irqrestore(&schedule_data[cpu].lock, flags); +#ifdef CONFIG_SMP + if (cpu != smp_processor_id()) + smp_send_event_check_cpu(cpu); +#endif + } else { + spin_unlock_irqrestore(&schedule_data[cpu].lock, flags); + } +} + + +/* + * Pick the next domain to run + */ + +asmlinkage void schedule(void) +{ + struct task_struct *prev, *next, *p; + struct list_head *tmp; + int this_cpu; + + need_resched_back: + prev = current; + this_cpu = prev->processor; + + spin_lock_irq(&schedule_data[this_cpu].lock); + + //ASSERT(!in_interrupt()); + ASSERT(__task_on_runqueue(prev)); + + __move_last_runqueue(prev); + + switch ( prev->state ) + { + case TASK_INTERRUPTIBLE: + if ( signal_pending(prev) ) + { + prev->state = TASK_RUNNING; + break; + } + default: + __del_from_runqueue(prev); + case TASK_RUNNING:; + } + clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events); + + next = NULL; + list_for_each(tmp, &schedule_data[smp_processor_id()].runqueue) { + p = list_entry(tmp, struct task_struct, run_list); + next = p; + break; + } + + prev->has_cpu = 0; + next->has_cpu = 1; + + schedule_data[this_cpu].prev = prev; + schedule_data[this_cpu].curr = next; + + spin_unlock_irq(&schedule_data[this_cpu].lock); + + if ( unlikely(prev == next) ) + { + /* We won't go through the normal tail, so do this by hand */ + prev->policy &= ~SCHED_YIELD; + goto same_process; + } + + prepare_to_switch(); + switch_to(prev, next); + prev = schedule_data[this_cpu].prev; + + prev->policy &= ~SCHED_YIELD; + if ( prev->state == TASK_DYING ) release_task(prev); + + same_process: + + update_dom_time(next->shared_info); + + if ( test_bit(_HYP_EVENT_NEED_RESCHED, ¤t->hyp_events) ) + goto need_resched_back; + return; +} + +static __cacheline_aligned int count[NR_CPUS]; +static void sched_timer(unsigned long foo) +{ + int cpu = smp_processor_id(); + struct task_struct *curr = schedule_data[cpu].curr; + s_time_t now; + int res; + + if (count[cpu] >= 5) { + set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events); + count[cpu] = 0; + } + count[cpu]++; + + again: + now = NOW(); + s_timer[cpu].expires = now + MILLISECS(10); + + TRC(printk("SCHED[%02d] timer(): now=0x%08X%08X timo=0x%08X%08X\n", + cpu, (u32)(now>>32), (u32)now, + (u32)(s_timer[cpu].expires>>32), (u32)s_timer[cpu].expires)); + res=add_ac_timer(&s_timer[cpu]); + if (res==1) { + goto again; + } +} +/* + * Initialise the data structures + */ +void __init scheduler_init(void) +{ + int i; + + printk("Initialising schedulers\n"); + + for ( i = 0; i < NR_CPUS; i++ ) + { + INIT_LIST_HEAD(&schedule_data[i].runqueue); + spin_lock_init(&schedule_data[i].lock); + schedule_data[i].prev = &idle0_task; + schedule_data[i].curr = &idle0_task; + + /* a timer for each CPU */ + init_ac_timer(&s_timer[i]); + s_timer[i].function = &sched_timer; + } +} + +/* + * Start a scheduler for each CPU + * This has to be done *after* the timers, e.g., APICs, have been initialised + */ +void schedulers_start(void) { + + printk("Start schedulers\n"); + __cli(); + sched_timer(0); + smp_call_function(sched_timer, (void*)0, 1, 1); + __sti(); +} diff --git a/xen-2.4.16/common/timer.c b/xen-2.4.16/common/timer.c index 388275307a..77e511de34 100644 --- a/xen-2.4.16/common/timer.c +++ b/xen-2.4.16/common/timer.c @@ -602,7 +602,6 @@ void do_timer(struct pt_regs *regs) p = &idle0_task; do { s = p->shared_info; - s->wall_time = s->domain_time = wall; cpu_mask |= mark_guest_event(p, _EVENT_TIMER); } while ( (p = p->next_task) != &idle0_task ); diff --git a/xen-2.4.16/include/asm-i386/apic.h b/xen-2.4.16/include/asm-i386/apic.h index 86dd0fbc66..1f5670943b 100644 --- a/xen-2.4.16/include/asm-i386/apic.h +++ b/xen-2.4.16/include/asm-i386/apic.h @@ -55,17 +55,14 @@ static inline void ack_APIC_irq(void) } extern int get_maxlvt(void); -extern void clear_local_APIC(void); extern void connect_bsp_APIC (void); extern void disconnect_bsp_APIC (void); extern void disable_local_APIC (void); extern int verify_local_APIC (void); -extern void cache_APIC_registers (void); extern void sync_Arb_IDs (void); extern void init_bsp_APIC (void); extern void setup_local_APIC (void); extern void init_apic_mappings (void); -extern void smp_local_timer_interrupt (struct pt_regs * regs); extern void setup_APIC_clocks (void); extern int APIC_init_uniprocessor (void); diff --git a/xen-2.4.16/include/asm-i386/time.h b/xen-2.4.16/include/asm-i386/time.h new file mode 100644 index 0000000000..9e2f77727d --- /dev/null +++ b/xen-2.4.16/include/asm-i386/time.h @@ -0,0 +1,80 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: time.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Nov 2002 + * + * Environment: Xen Hypervisor + * Description: Architecture dependent definition of time variables + * + **************************************************************************** + * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $ + **************************************************************************** + */ + +#ifndef _ASM_TIME_H_ +#define _ASM_TIME_H_ + +#include <asm/types.h> +#include <asm/msr.h> + +/* + * Cycle Counter Time + */ +typedef u64 cc_time_t; +static inline cc_time_t get_cc_time() +{ + u64 ret; + rdtscll(ret); + return ret; +} + +/* + * System Time + */ +typedef s64 s_time_t; /* System time */ +extern u32 stime_pcc; /* cycle counter value at last timer irq */ +extern s_time_t stime_now; /* time in ns at last timer IRQ */ +extern u32 stime_scale; /* scale factur for converting cc to ns */ + + +/* + * This is the Nemesis implementation. + * The variables are all set in apic.c + * Every timer IRQ time_now and time_pcc is set to the current values + * At callibration time_scale is set + */ +static s_time_t get_s_time(void) +{ + u32 delta, low, pcc; + s_time_t now; + s_time_t incr; + + /* read two values (pcc, now) "atomically" */ +again: + pcc = stime_pcc; + now = stime_now; + if (stime_pcc != pcc) goto again; + + /* only use bottom 32bits of TSC. This should be sufficient */ + rdtscl(low); + delta = low - pcc; + + incr = ((s_time_t)(stime_scale) * delta) >> 10; + return now + incr; +} + +/* update time variables once in a while */ +extern void update_time(void); + +/* + * Domain Virtual Time + */ +typedef u64 dv_time_t; + +#endif /* _ASM_TIME_H_ */ diff --git a/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h b/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h index 9038d51d11..5f86f4fd58 100644 --- a/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h +++ b/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h @@ -155,36 +155,33 @@ typedef struct shared_info_st { */ unsigned long failsafe_address; - /* - * CPU ticks since start of day. - * `wall_time' counts CPU ticks in real time. - * `domain_time' counts CPU ticks during which this domain has run. - */ - unsigned long ticks_per_ms; /* CPU ticks per millisecond */ - /* - * Current wall_time can be found by rdtsc. Only possible use of - * variable below is that it provides a timestamp for last update - * of domain_time. + /* + * Time: + * The following abstractions are exposed: System Time, Wall Clock + * Time, Domain Virtual Time. Domains can access Cycle counter time + * directly. */ - unsigned long long wall_time; - unsigned long long domain_time; - /* - * Timeouts for points at which guest OS would like a callback. - * This will probably be backed up by a timer heap in the guest OS. - * In Linux we use timeouts to update 'jiffies'. + /* System Time */ + long long system_time; /* in ns */ + unsigned long st_timestamp; /* cyclecounter at last update */ + unsigned long ticks_per_ms; /* CPU ticks per millisecond */ + /* Wall Clock Time */ + long tv_sec; /* essentially a struct timeval */ + long tv_usec; + long long wc_timestamp; /* system time at last update */ + + /* Domain Virtual Time */ + unsigned long long domain_time; + + /* + * Timeout values: + * Allow a domain to specify a timeout value in system time and + * domain virtual time. */ unsigned long long wall_timeout; unsigned long long domain_timeout; - /* - * Real-Time Clock. This shows time, in seconds, since 1.1.1980. - * The timestamp shows the CPU 'wall time' when RTC was last read. - * Thus it allows a mapping between 'real time' and 'wall time'. - */ - unsigned long rtc_time; - unsigned long long rtc_timestamp; - } shared_info_t; /* diff --git a/xen-2.4.16/include/xeno/ac_timer.h b/xen-2.4.16/include/xeno/ac_timer.h new file mode 100644 index 0000000000..f78082cb6b --- /dev/null +++ b/xen-2.4.16/include/xeno/ac_timer.h @@ -0,0 +1,65 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: ac_timer.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Nov 2002 + * + * Environment: Xen Hypervisor + * Description: Accurate timer for the Hypervisor + * + **************************************************************************** + * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $ + **************************************************************************** + */ + +#ifndef _AC_TIMER_H_ +#define _AC_TIMER_H_ + +#include <time.h> /* include notion of time */ + +/* + * The Xen Hypervisor provides two types of timers: + * + * - Linux style, jiffy based timers for legacy code and coarse grain timeouts + * These are defined in ./include/xeno/timer.h and implemented in + * ./common/timer.c. Unlike in Linux they are executed not on a periodic + * timer interrupt but "occasionally" with somewhat lesser accuracy. + * + * - accurate timers defined in this file and implemented in + * ./common/ac_timer.c. These are implemented using a programmable timer + * interrupt and are thus as accurate as the hardware allows. Where possible + * we use the local APIC for this purpose. However, this fact is hidden + * behind a architecture independent layer. + * accurate timers are programmed using system time. + * + * The interface to accurate timers is very similar to Linux timers with the + * exception that the expires value is not expressed in jiffies but in ns from + * boot time. Its implementation however, is entirely different. + */ + +struct ac_timer { + struct list_head timer_list; + s_time_t expires; /* system time time out value */ + unsigned long data; + void (*function)(unsigned long); +}; + +/* interface for "clients" */ +extern int add_ac_timer(struct ac_timer *timer); +extern int rem_ac_timer(struct ac_timer *timer); +extern int mod_ac_timer(struct ac_timer *timer, s_time_t new_time); +static inline void init_ac_timer(struct ac_timer *timer) +{ + //timer->next = NULL; +} + +/* interface used by programmable timer, implemented hardware dependent */ +extern int reprogram_ac_timer(s_time_t timeout); +extern void do_ac_timer(void); + +#endif /* _AC_TIMER_H_ */ diff --git a/xen-2.4.16/include/xeno/sched.h b/xen-2.4.16/include/xeno/sched.h index 7afc6d9e1d..d0fdab4948 100644 --- a/xen-2.4.16/include/xeno/sched.h +++ b/xen-2.4.16/include/xeno/sched.h @@ -60,7 +60,8 @@ extern struct mm_struct init_mm; struct task_struct { int processor; - int state, hyp_events; + int state; + int hyp_events; unsigned int domain; /* index into frame_table threading pages belonging to this @@ -105,11 +106,24 @@ struct task_struct { unsigned long flags; }; +/* + * domain states + * TASK_RUNNING: Domain is runable and should be on a run queue + * TASK_INTERRUPTIBLE: Domain is blocked by may be woken up by an event + * or expiring timer + * TASK_UNINTERRUPTIBLE: Domain is blocked but may not be woken up by an + * arbitrary event or timer. + * TASK_WAIT: Domains CPU allocation expired. + * TASK_STOPPED: not really used in Xen + * TASK_DYING: Domain is about to cross over to the land of the dead. + */ + #define TASK_RUNNING 0 #define TASK_INTERRUPTIBLE 1 #define TASK_UNINTERRUPTIBLE 2 -#define TASK_STOPPED 4 -#define TASK_DYING 8 +#define TASK_WAIT 4 +#define TASK_DYING 16 +/* #define TASK_STOPPED 8 not really used */ #define SCHED_YIELD 0x10 @@ -171,52 +185,23 @@ extern void free_irq(unsigned int, void *); extern unsigned long wait_init_idle; #define init_idle() clear_bit(smp_processor_id(), &wait_init_idle); + + +/* + * Scheduler functions (in schedule.c) + */ #define set_current_state(_s) do { current->state = (_s); } while (0) #define MAX_SCHEDULE_TIMEOUT LONG_MAX +void scheduler_init(void); +void start_scheduler(void); +void sched_add_domain(struct task_struct *p); +void sched_rem_domain(struct task_struct *p); +int wake_up(struct task_struct *p); long schedule_timeout(long timeout); -asmlinkage void schedule(void); - +long do_yield(void); void reschedule(struct task_struct *p); +asmlinkage void schedule(void); -typedef struct schedule_data_st -{ - spinlock_t lock; - struct list_head runqueue; - struct task_struct *prev, *curr; -} __cacheline_aligned schedule_data_t; -extern schedule_data_t schedule_data[NR_CPUS]; - -static inline void __add_to_runqueue(struct task_struct * p) -{ - list_add(&p->run_list, &schedule_data[p->processor].runqueue); -} - - -static inline void __move_last_runqueue(struct task_struct * p) -{ - list_del(&p->run_list); - list_add_tail(&p->run_list, &schedule_data[p->processor].runqueue); -} - - -static inline void __move_first_runqueue(struct task_struct * p) -{ - list_del(&p->run_list); - list_add(&p->run_list, &schedule_data[p->processor].runqueue); -} - -static inline void __del_from_runqueue(struct task_struct * p) -{ - list_del(&p->run_list); - p->run_list.next = NULL; -} - -static inline int __task_on_runqueue(struct task_struct *p) -{ - return (p->run_list.next != NULL); -} - -int wake_up(struct task_struct *p); #define signal_pending(_p) ((_p)->hyp_events || \ (_p)->shared_info->events) diff --git a/xen-2.4.16/include/xeno/time.h b/xen-2.4.16/include/xeno/time.h index 33837c5009..5bb717fb2d 100644 --- a/xen-2.4.16/include/xeno/time.h +++ b/xen-2.4.16/include/xeno/time.h @@ -1,13 +1,61 @@ -/****************************************************************************** - * time.h +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge + **************************************************************************** + * + * File: time.h + * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) + * Changes: + * + * Date: Nov 2002 + * + * Environment: Xen Hypervisor + * Description: This file provides a one stop shop for all time related + * issues within the hypervisor. + * + * The Hypervisor provides the following notions of time: + * Cycle Counter Time, System Time, Wall Clock Time, and + * Domain Virtual Time. + * + **************************************************************************** + * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $ + **************************************************************************** */ + + #ifndef __XENO_TIME_H__ #define __XENO_TIME_H__ +#include <asm/ptrace.h> /* XXX Only used for do_timer which should be moved */ +#include <asm/time.h> /* pull in architecture specific time definition */ #include <xeno/types.h> -#include <asm/ptrace.h> +/* + * Cycle Counter Time (defined in asm/time.h) + */ + + +/* + * System Time + * 64 bit value containing the nanoseconds elapsed since boot time. + * This value is adjusted by frequency drift. + * NOW() returns the current time. + * The other macros are for convenience to approximate short intervals + * of real time into system time + */ +#define NOW() ((s_time_t)get_s_time()) +#define SECONDS(_s) (((s_time_t)(_s)) * 1000000000UL ) +#define TENTHS(_ts) (((s_time_t)(_ts)) * 100000000UL ) +#define HUNDREDTHS(_hs) (((s_time_t)(_hs)) * 10000000UL ) +#define MILLISECS(_ms) (((s_time_t)(_ms)) * 1000000UL ) +#define MICROSECS(_us) (((s_time_t)(_us)) * 1000UL ) +#define Time_Max ((s_time_t) 0x7fffffffffffffffLL) +#define FOREVER Time_Max + +/* + * Wall Clock Time + */ struct timeval { long tv_sec; /* seconds */ long tv_usec; /* microseconds */ @@ -25,6 +73,13 @@ extern void get_fast_time(struct timeval *tv); extern void (*do_get_fast_time)(struct timeval *); #endif +/* + * Domain Virtual Time (defined in asm/time.h) + */ +/* XXX Interface for getting and setting still missing */ + + +/* XXX move this */ extern void do_timer(struct pt_regs *regs); #endif /* __XENO_TIME_H__ */ diff --git a/xen-2.4.16/include/xeno/timer.h b/xen-2.4.16/include/xeno/timer.h index c4f01ada59..dcde75b182 100644 --- a/xen-2.4.16/include/xeno/timer.h +++ b/xen-2.4.16/include/xeno/timer.h @@ -12,10 +12,17 @@ * The "data" field enables use of a common timeout function for several * timeouts. You can use this field to distinguish between the different * invocations. + * + * RN: Unlike the Linux timers, which are executed at the periodic timer + * interrupt, in Xen, the timer list is only checked "occasionally", thus + * its accuracy might be somewhat worse than under Linux. However, the + * hypervisor should be purely event-driven and, in fact, in the current + * implementation, timers are only used for watchdog purpose at a very + * coarse granularity anyway. Thus this is not a problem. */ struct timer_list { struct list_head list; - unsigned long expires; + unsigned long expires; /* jiffies */ unsigned long data; void (*function)(unsigned long); }; diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c index 12db77164b..a35ef1cc8a 100644 --- a/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c @@ -42,7 +42,7 @@ extern struct drive_info_struct drive_info; EXPORT_SYMBOL(drive_info); #endif -extern unsigned long get_cmos_time(void); +//extern unsigned long get_cmos_time(void); /* platform dependent support */ EXPORT_SYMBOL(boot_cpu_data); @@ -58,7 +58,7 @@ EXPORT_SYMBOL(probe_irq_mask); EXPORT_SYMBOL(kernel_thread); EXPORT_SYMBOL(pm_idle); EXPORT_SYMBOL(pm_power_off); -EXPORT_SYMBOL(get_cmos_time); +//EXPORT_SYMBOL(get_cmos_time); EXPORT_SYMBOL(apm_info); #ifdef CONFIG_DEBUG_IOVIRT diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c index 1c7f27176d..87c52056f6 100644 --- a/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c @@ -86,7 +86,7 @@ void cpu_idle (void) while (1) { while (!current->need_resched) - HYPERVISOR_yield(); + HYPERVISOR_do_sched_op(NULL); schedule(); check_pgt_cache(); } diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c index c728eb15e6..2557918c6d 100644 --- a/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c @@ -102,47 +102,26 @@ static inline unsigned long ticks_to_us(unsigned long ticks) return(hi); } -static inline unsigned long do_gettimeoffset(void) +static long long get_s_time(void) { -#if 0 - register unsigned long eax, edx; - - /* Read the Time Stamp Counter */ - - rdtsc(eax,edx); - - /* .. relative to previous jiffy (32 bits is enough) */ - eax -= last_tsc_low; /* tsc_low delta */ - - /* - * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient - * = (tsc_low delta) * (usecs_per_clock) - * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy) - * - * Using a mull instead of a divl saves up to 31 clock cycles - * in the critical path. - */ - - edx = ticks_to_us(eax); - - /* our adjusted time offset in microseconds */ - return delay_at_last_interrupt + edx; -#else - /* - * We should keep a 'last_tsc_low' thing which incorporates - * delay_at_last_interrupt, adjusted in timer_interrupt after - * do_timer_interrupt. It would look at change in xtime, and - * make appropriate adjustment to a last_tsc variable. - * - * We'd be affected by rounding error in ticks_per_usec, and by - * processor clock drift (which should be no more than in an - * external interrupt source anyhow). - * - * Perhaps a bit rough and ready, but never mind! - */ - return 0; -#endif + u32 delta, low, pcc; + long long now; + long long incr; + + /* read two values (pcc, now) "atomically" */ +again: + pcc = HYPERVISOR_shared_info->st_timestamp; + now = HYPERVISOR_shared_info->system_time; + if (HYPERVISOR_shared_info->st_timestamp != pcc) goto again; + + /* only use bottom 32bits of TSC. This should be sufficient */ + rdtscl(low); + delta = low - pcc; + + incr = ((long long)(ticks_to_us(delta)*1000)); + return now + incr; } +#define NOW() ((long long)get_s_time()) /* * This version of gettimeofday has microsecond resolution @@ -151,15 +130,15 @@ static inline unsigned long do_gettimeoffset(void) void do_gettimeofday(struct timeval *tv) { unsigned long flags; - unsigned long usec, sec, lost; + unsigned long usec, sec; read_lock_irqsave(&xtime_lock, flags); - usec = do_gettimeoffset(); - lost = jiffies - wall_jiffies; - if ( lost != 0 ) usec += lost * (1000000 / HZ); - sec = xtime.tv_sec; - usec += xtime.tv_usec; - read_unlock_irqrestore(&xtime_lock, flags); + + usec = ((unsigned long)(NOW()-HYPERVISOR_shared_info->wc_timestamp))/1000; + sec = HYPERVISOR_shared_info->tv_sec; + usec += HYPERVISOR_shared_info->tv_usec; + + read_unlock_irqrestore(&xtime_lock, flags); while ( usec >= 1000000 ) { @@ -173,6 +152,8 @@ void do_gettimeofday(struct timeval *tv) void do_settimeofday(struct timeval *tv) { +/* XXX RN: shoudl do something special here for dom0 */ +#if 0 write_lock_irq(&xtime_lock); /* * This is revolting. We need to set "xtime" correctly. However, the @@ -195,6 +176,7 @@ void do_settimeofday(struct timeval *tv) time_maxerror = NTP_PHASE_LIMIT; time_esterror = NTP_PHASE_LIMIT; write_unlock_irq(&xtime_lock); +#endif } @@ -235,19 +217,6 @@ static struct irqaction irq_timer = { }; -unsigned long get_cmos_time(void) -{ - unsigned long secs = HYPERVISOR_shared_info->rtc_time; - unsigned long diff; - - rdtscl(diff); - diff -= (unsigned long)HYPERVISOR_shared_info->rtc_timestamp; - - secs += ticks_to_us(diff); - - return(secs + ticks_to_secs(diff)); -} - /* Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset(). */ static unsigned long __init calibrate_tsc(void) @@ -268,7 +237,6 @@ void __init time_init(void) unsigned long long alarm; fast_gettimeoffset_quotient = calibrate_tsc(); - do_get_fast_time = do_gettimeofday; /* report CPU clock rate in Hz. * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = @@ -299,6 +267,5 @@ void __init time_init(void) HYPERVISOR_shared_info->domain_timeout = ~0ULL; clear_bit(_EVENT_TIMER, &HYPERVISOR_shared_info->events); - xtime.tv_sec = get_cmos_time(); - xtime.tv_usec = 0; + do_gettimeofday(&xtime); } diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h index 640c3a958e..c120294bf7 100644 --- a/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h @@ -215,12 +215,13 @@ static inline int HYPERVISOR_fpu_taskswitch(void) return ret; } -static inline int HYPERVISOR_yield(void) +static inline int HYPERVISOR_do_sched_op(void *sched_op) { int ret; __asm__ __volatile__ ( TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_yield) ); + : "=a" (ret) : "0" (__HYPERVISOR_sched_op), + "b" (sched_op) ); return ret; } |