aboutsummaryrefslogtreecommitdiffstats
path: root/xenolinux-2.4.24-sparse
diff options
context:
space:
mode:
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>2004-01-31 19:45:13 +0000
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>2004-01-31 19:45:13 +0000
commitdea8eefd6214e3ad5b54795fa958ab721d58710c (patch)
treef210aa8b10d18345ea90980eef13ea0c6a2665ee /xenolinux-2.4.24-sparse
parentf21e4de207c89ca1f1d18122c43db30b2ff41e7b (diff)
downloadxen-dea8eefd6214e3ad5b54795fa958ab721d58710c.tar.gz
xen-dea8eefd6214e3ad5b54795fa958ab721d58710c.tar.bz2
xen-dea8eefd6214e3ad5b54795fa958ab721d58710c.zip
bitkeeper revision 1.699 (401c05c9TV2zsaZ_e3zpy-zaKxCetw)
timer.c, timer.h, sched.h: new file Many files: Rolf's new timer interface, plus various cleanups.
Diffstat (limited to 'xenolinux-2.4.24-sparse')
-rw-r--r--xenolinux-2.4.24-sparse/arch/xeno/config.in4
-rw-r--r--xenolinux-2.4.24-sparse/arch/xeno/defconfig5
-rw-r--r--xenolinux-2.4.24-sparse/arch/xeno/drivers/network/network.c13
-rw-r--r--xenolinux-2.4.24-sparse/arch/xeno/kernel/process.c30
-rw-r--r--xenolinux-2.4.24-sparse/arch/xeno/kernel/time.c155
-rw-r--r--xenolinux-2.4.24-sparse/include/asm-xeno/hypervisor.h24
-rw-r--r--xenolinux-2.4.24-sparse/include/linux/sched.h966
-rw-r--r--xenolinux-2.4.24-sparse/include/linux/timer.h77
-rw-r--r--xenolinux-2.4.24-sparse/kernel/panic.c3
-rw-r--r--xenolinux-2.4.24-sparse/kernel/timer.c968
10 files changed, 2216 insertions, 29 deletions
diff --git a/xenolinux-2.4.24-sparse/arch/xeno/config.in b/xenolinux-2.4.24-sparse/arch/xeno/config.in
index 445b574a71..3f4736fd1f 100644
--- a/xenolinux-2.4.24-sparse/arch/xeno/config.in
+++ b/xenolinux-2.4.24-sparse/arch/xeno/config.in
@@ -13,9 +13,11 @@ define_bool CONFIG_SBUS n
define_bool CONFIG_UID16 y
mainmenu_option next_comment
-comment 'Privileged guest OS'
+comment 'XenoLinux'
bool 'Support for privileged operations (domain 0)' CONFIG_XENO_PRIV
endmenu
+# the IBM S/390 patch needs this.
+define_bool CONFIG_NO_IDLE_HZ y
mainmenu_option next_comment
comment 'Code maturity level options'
diff --git a/xenolinux-2.4.24-sparse/arch/xeno/defconfig b/xenolinux-2.4.24-sparse/arch/xeno/defconfig
index abef573aa7..3ba185a19b 100644
--- a/xenolinux-2.4.24-sparse/arch/xeno/defconfig
+++ b/xenolinux-2.4.24-sparse/arch/xeno/defconfig
@@ -8,9 +8,12 @@ CONFIG_ISA=y
CONFIG_UID16=y
#
-# Privileged guest OS
+# XenoLinux Options
#
+# support for priviledged domains
CONFIG_XENO_PRIV=y
+# On demand timer setting (taken from s390 patch set)
+CONFIG_NO_IDLE_HZ=y
#
# Code maturity level options
diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/network/network.c b/xenolinux-2.4.24-sparse/arch/xeno/drivers/network/network.c
index ac557a3c11..075acdf5af 100644
--- a/xenolinux-2.4.24-sparse/arch/xeno/drivers/network/network.c
+++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/network/network.c
@@ -81,15 +81,15 @@ static void _dbg_network_int(struct net_device *dev)
if ( np->state == STATE_CLOSED )
return;
- printk(KERN_ALERT "tx_full = %d, tx_resp_cons = 0x%08x,"
- " tx_req_prod = 0x%08x, tx_resp_prod = 0x%08x,"
- " tx_event = 0x%08x, state=%d\n",
+ printk(KERN_ALERT "net: tx_full=%d, tx_resp_cons=0x%08x,"
+ " tx_req_prod=0x%08x\nnet: tx_resp_prod=0x%08x,"
+ " tx_event=0x%08x, state=%d\n",
np->tx_full, np->tx_resp_cons,
np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod,
np->net_idx->tx_event,
test_bit(__LINK_STATE_XOFF, &dev->state));
- printk(KERN_ALERT "rx_resp_cons = 0x%08x,"
- " rx_req_prod = 0x%08x, rx_resp_prod = 0x%08x, rx_event = 0x%08x\n",
+ printk(KERN_ALERT "net: rx_resp_cons=0x%08x,"
+ " rx_req_prod=0x%08x\nnet: rx_resp_prod=0x%08x, rx_event=0x%08x\n",
np->rx_resp_cons, np->net_idx->rx_req_prod,
np->net_idx->rx_resp_prod, np->net_idx->rx_event);
}
@@ -550,7 +550,8 @@ int __init init_module(void)
goto fail;
}
- err = request_irq(_EVENT_DEBUG, dbg_network_int, 0, "debug", NULL);
+ err = request_irq(_EVENT_DEBUG, dbg_network_int, SA_SHIRQ, "net_dbg",
+ &dbg_network_int);
if ( err )
printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
diff --git a/xenolinux-2.4.24-sparse/arch/xeno/kernel/process.c b/xenolinux-2.4.24-sparse/arch/xeno/kernel/process.c
index 3b17c7326c..ff64bccd4c 100644
--- a/xenolinux-2.4.24-sparse/arch/xeno/kernel/process.c
+++ b/xenolinux-2.4.24-sparse/arch/xeno/kernel/process.c
@@ -80,14 +80,36 @@ void enable_hlt(void)
*/
void cpu_idle (void)
{
- /* endless idle loop with no priority at all */
+ extern int set_timeout_timer(void);
+
+ /* Endless idle loop with no priority at all. */
init_idle();
current->nice = 20;
current->counter = -100;
- while (1) {
- while (!current->need_resched)
- HYPERVISOR_yield();
+ for ( ; ; )
+ {
+ while ( !current->need_resched )
+ {
+ __cli();
+ if ( current->need_resched )
+ {
+ /* The race-free check for events failed. */
+ __sti();
+ break;
+ }
+ else if ( set_timeout_timer() == 0 )
+ {
+ /* NB. Blocking reenable events in a race-free manner. */
+ HYPERVISOR_block();
+ }
+ else
+ {
+ /* No race here: yielding will get us the CPU again anyway. */
+ __sti();
+ HYPERVISOR_yield();
+ }
+ }
schedule();
check_pgt_cache();
}
diff --git a/xenolinux-2.4.24-sparse/arch/xeno/kernel/time.c b/xenolinux-2.4.24-sparse/arch/xeno/kernel/time.c
index 1944e63c1c..bf43b6a99b 100644
--- a/xenolinux-2.4.24-sparse/arch/xeno/kernel/time.c
+++ b/xenolinux-2.4.24-sparse/arch/xeno/kernel/time.c
@@ -75,7 +75,7 @@ static u32 st_scale_i; /* convert ticks -> usecs */
/* These are peridically updated in shared_info, and then copied here. */
static u32 shadow_tsc_stamp;
-static s64 shadow_system_time;
+static u64 shadow_system_time;
static u32 shadow_time_version;
static struct timeval shadow_tv;
@@ -91,9 +91,12 @@ static long last_update_to_rtc, last_update_to_xen;
#endif
/* Periodically take synchronised time base from Xen, if we need it. */
-static long last_update_from_xen;
+static long last_update_from_xen; /* UTC seconds when last read Xen clock. */
-static u64 processed_system_time;
+/* Keep track of last time we did processing/updating of jiffies and xtime. */
+static u64 processed_system_time; /* System time (ns) at last processing. */
+
+#define NS_PER_TICK (1000000000ULL/HZ)
#define HANDLE_USEC_UNDERFLOW(_tv) \
do { \
@@ -197,8 +200,11 @@ static int set_rtc_mmss(unsigned long nowtime)
#endif
-/* Must be called with the xtime_lock held for writing. */
-static void get_time_values_from_xen(void)
+/*
+ * Reads a consistent set of time-base values from Xen, into a shadow data
+ * area. Must be called with the xtime_lock held for writing.
+ */
+static void __get_time_values_from_xen(void)
{
do {
shadow_time_version = HYPERVISOR_shared_info->time_version2;
@@ -216,7 +222,11 @@ static void get_time_values_from_xen(void)
(shadow_time_version == HYPERVISOR_shared_info->time_version2)
-static inline unsigned long get_time_delta_usecs(void)
+/*
+ * Returns the system time elapsed, in ns, since the current shadow_timestamp
+ * was calculated. Must be called with the xtime_lock held for reading.
+ */
+static inline unsigned long __get_time_delta_usecs(void)
{
s32 delta_tsc;
u32 low;
@@ -234,6 +244,9 @@ static inline unsigned long get_time_delta_usecs(void)
}
+/*
+ * Returns the current time-of-day in UTC timeval format.
+ */
void do_gettimeofday(struct timeval *tv)
{
unsigned long flags, lost;
@@ -242,7 +255,7 @@ void do_gettimeofday(struct timeval *tv)
again:
read_lock_irqsave(&xtime_lock, flags);
- _tv.tv_usec = get_time_delta_usecs();
+ _tv.tv_usec = __get_time_delta_usecs();
if ( (lost = (jiffies - wall_jiffies)) != 0 )
_tv.tv_usec += lost * (1000000 / HZ);
_tv.tv_sec = xtime.tv_sec;
@@ -257,7 +270,7 @@ void do_gettimeofday(struct timeval *tv)
*/
read_unlock_irqrestore(&xtime_lock, flags);
write_lock_irqsave(&xtime_lock, flags);
- get_time_values_from_xen();
+ __get_time_values_from_xen();
write_unlock_irqrestore(&xtime_lock, flags);
goto again;
}
@@ -276,6 +289,10 @@ void do_gettimeofday(struct timeval *tv)
*tv = _tv;
}
+
+/*
+ * Sets the current time-of-day based on passed-in UTC timeval parameter.
+ */
void do_settimeofday(struct timeval *tv)
{
struct timeval newtv;
@@ -291,10 +308,10 @@ void do_settimeofday(struct timeval *tv)
* be stale, so we can retry with fresh ones.
*/
again:
- tv->tv_usec -= get_time_delta_usecs();
+ tv->tv_usec -= __get_time_delta_usecs();
if ( unlikely(!TIME_VALUES_UP_TO_DATE) )
{
- get_time_values_from_xen();
+ __get_time_values_from_xen();
goto again;
}
@@ -334,6 +351,7 @@ void do_settimeofday(struct timeval *tv)
}
}
+
asmlinkage long sys_stime(int *tptr)
{
int value;
@@ -353,14 +371,22 @@ asmlinkage long sys_stime(int *tptr)
return 0;
}
-#define NS_PER_TICK (1000000000ULL/HZ)
+
+/* Convert jiffies to system time. Call with xtime_lock held for reading. */
+static inline u64 __jiffies_to_st(unsigned long j)
+{
+ return processed_system_time + ((j - jiffies) * NS_PER_TICK);
+}
+
+
static inline void do_timer_interrupt(int irq, void *dev_id,
struct pt_regs *regs)
{
s64 delta;
+ unsigned long ticks = 0;
long sec_diff;
- get_time_values_from_xen();
+ __get_time_values_from_xen();
if ( (delta = (s64)(shadow_system_time - processed_system_time)) < 0 )
{
@@ -368,13 +394,24 @@ static inline void do_timer_interrupt(int irq, void *dev_id,
return;
}
+ /* Process elapsed jiffies since last call. */
while ( delta >= NS_PER_TICK )
{
- do_timer(regs);
+ ticks++;
delta -= NS_PER_TICK;
processed_system_time += NS_PER_TICK;
}
-
+
+ if ( ticks != 0 )
+ {
+ do_timer_ticks(ticks);
+
+ if ( user_mode(regs) )
+ update_process_times_us(ticks, 0);
+ else
+ update_process_times_us(0, ticks);
+ }
+
/*
* Take synchronised time from Xen once a minute if we're not
* synchronised ourselves, and we haven't chosen to keep an independent
@@ -446,6 +483,7 @@ static inline void do_timer_interrupt(int irq, void *dev_id,
#endif
}
+
static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
write_lock(&xtime_lock);
@@ -463,6 +501,89 @@ static struct irqaction irq_timer = {
NULL
};
+
+/*
+ * This function works out when the the next timer function has to be
+ * executed (by looking at the timer list) and sets the Xen one-shot
+ * domain timer to the appropriate value. This is typically called in
+ * cpu_idle() before the domain blocks.
+ *
+ * The function returns a non-0 value on error conditions.
+ *
+ * It must be called with interrupts disabled.
+ */
+extern spinlock_t timerlist_lock;
+int set_timeout_timer(void)
+{
+ struct timer_list *timer;
+ u64 alarm = 0;
+ int ret = 0;
+
+ spin_lock(&timerlist_lock);
+
+ /*
+ * This is safe against long blocking (since calculations are not based on
+ * TSC deltas). It is also safe against warped system time since
+ * suspend-resume is cooperative and we would first get locked out. It is
+ * safe against normal updates of jiffies since interrupts are off.
+ */
+ if ( (timer = next_timer_event()) != NULL )
+ alarm = __jiffies_to_st(timer->expires);
+
+ /* Failure is pretty bad, but we'd best soldier on. */
+ if ( HYPERVISOR_set_dom_timer(alarm) != 0 )
+ ret = -1;
+
+ spin_unlock(&timerlist_lock);
+
+ return ret;
+}
+
+
+/* Time debugging. */
+static void dbg_time_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+ unsigned long flags, j;
+ u64 s_now, j_st;
+ struct timeval s_tv, tv;
+
+ struct timer_list *timer;
+ u64 t_st;
+
+ read_lock_irqsave(&xtime_lock, flags);
+ s_tv.tv_sec = shadow_tv.tv_sec;
+ s_tv.tv_usec = shadow_tv.tv_usec;
+ s_now = shadow_system_time;
+ read_unlock_irqrestore(&xtime_lock, flags);
+
+ do_gettimeofday(&tv);
+
+ j = jiffies;
+ j_st = __jiffies_to_st(j);
+
+ timer = next_timer_event();
+ t_st = __jiffies_to_st(timer->expires);
+
+ printk(KERN_ALERT "time: shadow_st=0x%X:%08X\n",
+ (u32)(s_now>>32), (u32)s_now);
+ printk(KERN_ALERT "time: wct=%lds %ldus shadow_wct=%lds %ldus\n",
+ tv.tv_sec, tv.tv_usec, s_tv.tv_sec, s_tv.tv_usec);
+ printk(KERN_ALERT "time: jiffies=%lu(0x%X:%08X) timeout=%lu(0x%X:%08X)\n",
+ jiffies,(u32)(j_st>>32), (u32)j_st,
+ timer->expires,(u32)(t_st>>32), (u32)t_st);
+ printk(KERN_ALERT "time: processed_system_time=0x%X:%08X\n",
+ (u32)(processed_system_time>>32), (u32)processed_system_time);
+}
+
+static struct irqaction dbg_time = {
+ dbg_time_int,
+ SA_SHIRQ,
+ 0,
+ "timer_dbg",
+ &dbg_time_int,
+ NULL
+};
+
void __init time_init(void)
{
unsigned long long alarm;
@@ -494,10 +615,12 @@ void __init time_init(void)
st_scale_f = scale & 0xffffffff;
st_scale_i = scale >> 32;
- get_time_values_from_xen();
+ __get_time_values_from_xen();
processed_system_time = shadow_system_time;
- setup_irq(TIMER_IRQ, &irq_timer);
+ (void)setup_irq(TIMER_IRQ, &irq_timer);
+
+ (void)setup_irq(_EVENT_DEBUG, &dbg_time);
rdtscll(alarm);
diff --git a/xenolinux-2.4.24-sparse/include/asm-xeno/hypervisor.h b/xenolinux-2.4.24-sparse/include/asm-xeno/hypervisor.h
index 064088ff6f..34272a624f 100644
--- a/xenolinux-2.4.24-sparse/include/asm-xeno/hypervisor.h
+++ b/xenolinux-2.4.24-sparse/include/asm-xeno/hypervisor.h
@@ -256,6 +256,17 @@ static inline int HYPERVISOR_yield(void)
return ret;
}
+static inline int HYPERVISOR_block(void)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
+ "b" (SCHEDOP_block) );
+
+ return ret;
+}
+
static inline int HYPERVISOR_exit(void)
{
int ret;
@@ -279,6 +290,19 @@ static inline int HYPERVISOR_stop(unsigned long srec)
return ret;
}
+static inline long HYPERVISOR_set_dom_timer(u64 timeout)
+{
+ int ret;
+ unsigned long timeout_hi = (unsigned long)(timeout>>32);
+ unsigned long timeout_lo = (unsigned long)timeout;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_set_dom_timer),
+ "b" (timeout_hi), "c" (timeout_lo) : "memory" );
+
+ return ret;
+}
+
static inline int HYPERVISOR_dom0_op(dom0_op_t *dom0_op)
{
int ret;
diff --git a/xenolinux-2.4.24-sparse/include/linux/sched.h b/xenolinux-2.4.24-sparse/include/linux/sched.h
new file mode 100644
index 0000000000..ed42340517
--- /dev/null
+++ b/xenolinux-2.4.24-sparse/include/linux/sched.h
@@ -0,0 +1,966 @@
+#ifndef _LINUX_SCHED_H
+#define _LINUX_SCHED_H
+
+#include <asm/param.h> /* for HZ */
+
+extern unsigned long event;
+
+#include <linux/config.h>
+#include <linux/binfmts.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/times.h>
+#include <linux/timex.h>
+#include <linux/rbtree.h>
+
+#include <asm/system.h>
+#include <asm/semaphore.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/mmu.h>
+
+#include <linux/smp.h>
+#include <linux/tty.h>
+#include <linux/sem.h>
+#include <linux/signal.h>
+#include <linux/securebits.h>
+#include <linux/fs_struct.h>
+
+struct exec_domain;
+
+/*
+ * cloning flags:
+ */
+#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */
+#define CLONE_VM 0x00000100 /* set if VM shared between processes */
+#define CLONE_FS 0x00000200 /* set if fs info shared between processes */
+#define CLONE_FILES 0x00000400 /* set if open files shared between processes */
+#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */
+#define CLONE_PID 0x00001000 /* set if pid shared */
+#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */
+#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */
+#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
+#define CLONE_THREAD 0x00010000 /* Same thread group? */
+#define CLONE_NEWNS 0x00020000 /* New namespace group? */
+
+#define CLONE_SIGNAL (CLONE_SIGHAND | CLONE_THREAD)
+
+/*
+ * These are the constant used to fake the fixed-point load-average
+ * counting. Some notes:
+ * - 11 bit fractions expand to 22 bits by the multiplies: this gives
+ * a load-average precision of 10 bits integer + 11 bits fractional
+ * - if you want to count load-averages more often, you need more
+ * precision, or rounding will get you. With 2-second counting freq,
+ * the EXP_n values would be 1981, 2034 and 2043 if still using only
+ * 11 bit fractions.
+ */
+extern unsigned long avenrun[]; /* Load averages */
+
+#define FSHIFT 11 /* nr of bits of precision */
+#define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */
+#define LOAD_FREQ (5*HZ) /* 5 sec intervals */
+#define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */
+#define EXP_5 2014 /* 1/exp(5sec/5min) */
+#define EXP_15 2037 /* 1/exp(5sec/15min) */
+
+#define CALC_LOAD(load,exp,n) \
+ load *= exp; \
+ load += n*(FIXED_1-exp); \
+ load >>= FSHIFT;
+
+#define CT_TO_SECS(x) ((x) / HZ)
+#define CT_TO_USECS(x) (((x) % HZ) * 1000000/HZ)
+
+extern int nr_running, nr_threads;
+extern int last_pid;
+
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/param.h>
+#include <linux/resource.h>
+#ifdef __KERNEL__
+#include <linux/timer.h>
+#endif
+
+#include <asm/processor.h>
+
+#define TASK_RUNNING 0
+#define TASK_INTERRUPTIBLE 1
+#define TASK_UNINTERRUPTIBLE 2
+#define TASK_ZOMBIE 4
+#define TASK_STOPPED 8
+
+#define __set_task_state(tsk, state_value) \
+ do { (tsk)->state = (state_value); } while (0)
+#define set_task_state(tsk, state_value) \
+ set_mb((tsk)->state, (state_value))
+
+#define __set_current_state(state_value) \
+ do { current->state = (state_value); } while (0)
+#define set_current_state(state_value) \
+ set_mb(current->state, (state_value))
+
+/*
+ * Scheduling policies
+ */
+#define SCHED_OTHER 0
+#define SCHED_FIFO 1
+#define SCHED_RR 2
+
+/*
+ * This is an additional bit set when we want to
+ * yield the CPU for one re-schedule..
+ */
+#define SCHED_YIELD 0x10
+
+struct sched_param {
+ int sched_priority;
+};
+
+struct completion;
+
+#ifdef __KERNEL__
+
+#include <linux/spinlock.h>
+
+/*
+ * This serializes "schedule()" and also protects
+ * the run-queue from deletions/modifications (but
+ * _adding_ to the beginning of the run-queue has
+ * a separate lock).
+ */
+extern rwlock_t tasklist_lock;
+extern spinlock_t runqueue_lock;
+extern spinlock_t mmlist_lock;
+
+extern void sched_init(void);
+extern void init_idle(void);
+extern void show_state(void);
+extern void cpu_init (void);
+extern void trap_init(void);
+extern void update_process_times(int user);
+#ifdef CONFIG_NO_IDLE_HZ
+extern void update_process_times_us(int user, int system);
+#endif
+extern void update_one_process(struct task_struct *p, unsigned long user,
+ unsigned long system, int cpu);
+
+#define MAX_SCHEDULE_TIMEOUT LONG_MAX
+extern signed long FASTCALL(schedule_timeout(signed long timeout));
+asmlinkage void schedule(void);
+
+extern int schedule_task(struct tq_struct *task);
+extern void flush_scheduled_tasks(void);
+extern int start_context_thread(void);
+extern int current_is_keventd(void);
+
+#if CONFIG_SMP
+extern void set_cpus_allowed(struct task_struct *p, unsigned long new_mask);
+#else
+# define set_cpus_allowed(p, new_mask) do { } while (0)
+#endif
+
+/*
+ * The default fd array needs to be at least BITS_PER_LONG,
+ * as this is the granularity returned by copy_fdset().
+ */
+#define NR_OPEN_DEFAULT BITS_PER_LONG
+
+struct namespace;
+/*
+ * Open file table structure
+ */
+struct files_struct {
+ atomic_t count;
+ rwlock_t file_lock; /* Protects all the below members. Nests inside tsk->alloc_lock */
+ int max_fds;
+ int max_fdset;
+ int next_fd;
+ struct file ** fd; /* current fd array */
+ fd_set *close_on_exec;
+ fd_set *open_fds;
+ fd_set close_on_exec_init;
+ fd_set open_fds_init;
+ struct file * fd_array[NR_OPEN_DEFAULT];
+};
+
+#define INIT_FILES \
+{ \
+ count: ATOMIC_INIT(1), \
+ file_lock: RW_LOCK_UNLOCKED, \
+ max_fds: NR_OPEN_DEFAULT, \
+ max_fdset: __FD_SETSIZE, \
+ next_fd: 0, \
+ fd: &init_files.fd_array[0], \
+ close_on_exec: &init_files.close_on_exec_init, \
+ open_fds: &init_files.open_fds_init, \
+ close_on_exec_init: { { 0, } }, \
+ open_fds_init: { { 0, } }, \
+ fd_array: { NULL, } \
+}
+
+/* Maximum number of active map areas.. This is a random (large) number */
+#define DEFAULT_MAX_MAP_COUNT (65536)
+
+extern int max_map_count;
+
+struct mm_struct {
+ struct vm_area_struct * mmap; /* list of VMAs */
+ rb_root_t mm_rb;
+ struct vm_area_struct * mmap_cache; /* last find_vma result */
+ pgd_t * pgd;
+ atomic_t mm_users; /* How many users with user space? */
+ atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */
+ int map_count; /* number of VMAs */
+ struct rw_semaphore mmap_sem;
+ spinlock_t page_table_lock; /* Protects task page tables and mm->rss */
+
+ struct list_head mmlist; /* List of all active mm's. These are globally strung
+ * together off init_mm.mmlist, and are protected
+ * by mmlist_lock
+ */
+
+ unsigned long start_code, end_code, start_data, end_data;
+ unsigned long start_brk, brk, start_stack;
+ unsigned long arg_start, arg_end, env_start, env_end;
+ unsigned long rss, total_vm, locked_vm;
+ unsigned long def_flags;
+ unsigned long cpu_vm_mask;
+ unsigned long swap_address;
+
+ unsigned dumpable:1;
+
+ /* Architecture-specific MM context */
+ mm_context_t context;
+};
+
+extern int mmlist_nr;
+
+#define INIT_MM(name) \
+{ \
+ mm_rb: RB_ROOT, \
+ pgd: swapper_pg_dir, \
+ mm_users: ATOMIC_INIT(2), \
+ mm_count: ATOMIC_INIT(1), \
+ mmap_sem: __RWSEM_INITIALIZER(name.mmap_sem), \
+ page_table_lock: SPIN_LOCK_UNLOCKED, \
+ mmlist: LIST_HEAD_INIT(name.mmlist), \
+}
+
+struct signal_struct {
+ atomic_t count;
+ struct k_sigaction action[_NSIG];
+ spinlock_t siglock;
+};
+
+
+#define INIT_SIGNALS { \
+ count: ATOMIC_INIT(1), \
+ action: { {{0,}}, }, \
+ siglock: SPIN_LOCK_UNLOCKED \
+}
+
+/*
+ * Some day this will be a full-fledged user tracking system..
+ */
+struct user_struct {
+ atomic_t __count; /* reference count */
+ atomic_t processes; /* How many processes does this user have? */
+ atomic_t files; /* How many open files does this user have? */
+
+ /* Hash table maintenance information */
+ struct user_struct *next, **pprev;
+ uid_t uid;
+};
+
+#define get_current_user() ({ \
+ struct user_struct *__user = current->user; \
+ atomic_inc(&__user->__count); \
+ __user; })
+
+extern struct user_struct root_user;
+#define INIT_USER (&root_user)
+
+struct task_struct {
+ /*
+ * offsets of these are hardcoded elsewhere - touch with care
+ */
+ volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
+ unsigned long flags; /* per process flags, defined below */
+ int sigpending;
+ mm_segment_t addr_limit; /* thread address space:
+ 0-0xBFFFFFFF for user-thead
+ 0-0xFFFFFFFF for kernel-thread
+ */
+ struct exec_domain *exec_domain;
+ volatile long need_resched;
+ unsigned long ptrace;
+
+ int lock_depth; /* Lock depth */
+
+/*
+ * offset 32 begins here on 32-bit platforms. We keep
+ * all fields in a single cacheline that are needed for
+ * the goodness() loop in schedule().
+ */
+ long counter;
+ long nice;
+ unsigned long policy;
+ struct mm_struct *mm;
+ int processor;
+ /*
+ * cpus_runnable is ~0 if the process is not running on any
+ * CPU. It's (1 << cpu) if it's running on a CPU. This mask
+ * is updated under the runqueue lock.
+ *
+ * To determine whether a process might run on a CPU, this
+ * mask is AND-ed with cpus_allowed.
+ */
+ unsigned long cpus_runnable, cpus_allowed;
+ /*
+ * (only the 'next' pointer fits into the cacheline, but
+ * that's just fine.)
+ */
+ struct list_head run_list;
+ unsigned long sleep_time;
+
+ struct task_struct *next_task, *prev_task;
+ struct mm_struct *active_mm;
+ struct list_head local_pages;
+ unsigned int allocation_order, nr_local_pages;
+
+/* task state */
+ struct linux_binfmt *binfmt;
+ int exit_code, exit_signal;
+ int pdeath_signal; /* The signal sent when the parent dies */
+ /* ??? */
+ unsigned long personality;
+ int did_exec:1;
+ unsigned task_dumpable:1;
+ pid_t pid;
+ pid_t pgrp;
+ pid_t tty_old_pgrp;
+ pid_t session;
+ pid_t tgid;
+ /* boolean value for session group leader */
+ int leader;
+ /*
+ * pointers to (original) parent process, youngest child, younger sibling,
+ * older sibling, respectively. (p->father can be replaced with
+ * p->p_pptr->pid)
+ */
+ struct task_struct *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;
+ struct list_head thread_group;
+
+ /* PID hash table linkage. */
+ struct task_struct *pidhash_next;
+ struct task_struct **pidhash_pprev;
+
+ wait_queue_head_t wait_chldexit; /* for wait4() */
+ struct completion *vfork_done; /* for vfork() */
+ unsigned long rt_priority;
+ unsigned long it_real_value, it_prof_value, it_virt_value;
+ unsigned long it_real_incr, it_prof_incr, it_virt_incr;
+ struct timer_list real_timer;
+ struct tms times;
+ unsigned long start_time;
+ long per_cpu_utime[NR_CPUS], per_cpu_stime[NR_CPUS];
+/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
+ unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
+ int swappable:1;
+/* process credentials */
+ uid_t uid,euid,suid,fsuid;
+ gid_t gid,egid,sgid,fsgid;
+ int ngroups;
+ gid_t groups[NGROUPS];
+ kernel_cap_t cap_effective, cap_inheritable, cap_permitted;
+ int keep_capabilities:1;
+ struct user_struct *user;
+/* limits */
+ struct rlimit rlim[RLIM_NLIMITS];
+ unsigned short used_math;
+ char comm[16];
+/* file system info */
+ int link_count, total_link_count;
+ struct tty_struct *tty; /* NULL if no tty */
+ unsigned int locks; /* How many file locks are being held */
+/* ipc stuff */
+ struct sem_undo *semundo;
+ struct sem_queue *semsleeping;
+/* CPU-specific state of this task */
+ struct thread_struct thread;
+/* filesystem information */
+ struct fs_struct *fs;
+/* open file information */
+ struct files_struct *files;
+/* namespace */
+ struct namespace *namespace;
+/* signal handlers */
+ spinlock_t sigmask_lock; /* Protects signal and blocked */
+ struct signal_struct *sig;
+
+ sigset_t blocked;
+ struct sigpending pending;
+
+ unsigned long sas_ss_sp;
+ size_t sas_ss_size;
+ int (*notifier)(void *priv);
+ void *notifier_data;
+ sigset_t *notifier_mask;
+
+/* Thread group tracking */
+ u32 parent_exec_id;
+ u32 self_exec_id;
+/* Protection of (de-)allocation: mm, files, fs, tty */
+ spinlock_t alloc_lock;
+
+/* journalling filesystem info */
+ void *journal_info;
+};
+
+/*
+ * Per process flags
+ */
+#define PF_ALIGNWARN 0x00000001 /* Print alignment warning msgs */
+ /* Not implemented yet, only for 486*/
+#define PF_STARTING 0x00000002 /* being created */
+#define PF_EXITING 0x00000004 /* getting shut down */
+#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
+#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
+#define PF_DUMPCORE 0x00000200 /* dumped core */
+#define PF_SIGNALED 0x00000400 /* killed by a signal */
+#define PF_MEMALLOC 0x00000800 /* Allocating memory */
+#define PF_FREE_PAGES 0x00002000 /* per process page freeing */
+#define PF_NOIO 0x00004000 /* avoid generating further I/O */
+
+#define PF_USEDFPU 0x00100000 /* task used FPU this quantum (SMP) */
+
+/*
+ * Ptrace flags
+ */
+
+#define PT_PTRACED 0x00000001
+#define PT_TRACESYS 0x00000002
+#define PT_DTRACE 0x00000004 /* delayed trace (used on m68k, i386) */
+#define PT_TRACESYSGOOD 0x00000008
+#define PT_PTRACE_CAP 0x00000010 /* ptracer can follow suid-exec */
+
+#define is_dumpable(tsk) ((tsk)->task_dumpable && (tsk)->mm && (tsk)->mm->dumpable)
+
+/*
+ * Limit the stack by to some sane default: root can always
+ * increase this limit if needed.. 8MB seems reasonable.
+ */
+#define _STK_LIM (8*1024*1024)
+
+#define DEF_COUNTER (10*HZ/100) /* 100 ms time slice */
+#define MAX_COUNTER (20*HZ/100)
+#define DEF_NICE (0)
+
+extern void yield(void);
+
+/*
+ * The default (Linux) execution domain.
+ */
+extern struct exec_domain default_exec_domain;
+
+/*
+ * INIT_TASK is used to set up the first task table, touch at
+ * your own risk!. Base=0, limit=0x1fffff (=2MB)
+ */
+#define INIT_TASK(tsk) \
+{ \
+ state: 0, \
+ flags: 0, \
+ sigpending: 0, \
+ addr_limit: KERNEL_DS, \
+ exec_domain: &default_exec_domain, \
+ lock_depth: -1, \
+ counter: DEF_COUNTER, \
+ nice: DEF_NICE, \
+ policy: SCHED_OTHER, \
+ mm: NULL, \
+ active_mm: &init_mm, \
+ cpus_runnable: ~0UL, \
+ cpus_allowed: ~0UL, \
+ run_list: LIST_HEAD_INIT(tsk.run_list), \
+ next_task: &tsk, \
+ prev_task: &tsk, \
+ p_opptr: &tsk, \
+ p_pptr: &tsk, \
+ thread_group: LIST_HEAD_INIT(tsk.thread_group), \
+ wait_chldexit: __WAIT_QUEUE_HEAD_INITIALIZER(tsk.wait_chldexit),\
+ real_timer: { \
+ function: it_real_fn \
+ }, \
+ cap_effective: CAP_INIT_EFF_SET, \
+ cap_inheritable: CAP_INIT_INH_SET, \
+ cap_permitted: CAP_FULL_SET, \
+ keep_capabilities: 0, \
+ rlim: INIT_RLIMITS, \
+ user: INIT_USER, \
+ comm: "swapper", \
+ thread: INIT_THREAD, \
+ fs: &init_fs, \
+ files: &init_files, \
+ sigmask_lock: SPIN_LOCK_UNLOCKED, \
+ sig: &init_signals, \
+ pending: { NULL, &tsk.pending.head, {{0}}}, \
+ blocked: {{0}}, \
+ alloc_lock: SPIN_LOCK_UNLOCKED, \
+ journal_info: NULL, \
+}
+
+
+#ifndef INIT_TASK_SIZE
+# define INIT_TASK_SIZE 2048*sizeof(long)
+#endif
+
+union task_union {
+ struct task_struct task;
+ unsigned long stack[INIT_TASK_SIZE/sizeof(long)];
+};
+
+extern union task_union init_task_union;
+
+extern struct mm_struct init_mm;
+extern struct task_struct *init_tasks[NR_CPUS];
+
+/* PID hashing. (shouldnt this be dynamic?) */
+#define PIDHASH_SZ (4096 >> 2)
+extern struct task_struct *pidhash[PIDHASH_SZ];
+
+#define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1))
+
+static inline void hash_pid(struct task_struct *p)
+{
+ struct task_struct **htable = &pidhash[pid_hashfn(p->pid)];
+
+ if((p->pidhash_next = *htable) != NULL)
+ (*htable)->pidhash_pprev = &p->pidhash_next;
+ *htable = p;
+ p->pidhash_pprev = htable;
+}
+
+static inline void unhash_pid(struct task_struct *p)
+{
+ if(p->pidhash_next)
+ p->pidhash_next->pidhash_pprev = p->pidhash_pprev;
+ *p->pidhash_pprev = p->pidhash_next;
+}
+
+static inline struct task_struct *find_task_by_pid(int pid)
+{
+ struct task_struct *p, **htable = &pidhash[pid_hashfn(pid)];
+
+ for(p = *htable; p && p->pid != pid; p = p->pidhash_next)
+ ;
+
+ return p;
+}
+
+#define task_has_cpu(tsk) ((tsk)->cpus_runnable != ~0UL)
+
+static inline void task_set_cpu(struct task_struct *tsk, unsigned int cpu)
+{
+ tsk->processor = cpu;
+ tsk->cpus_runnable = 1UL << cpu;
+}
+
+static inline void task_release_cpu(struct task_struct *tsk)
+{
+ tsk->cpus_runnable = ~0UL;
+}
+
+/* per-UID process charging. */
+extern struct user_struct * alloc_uid(uid_t);
+extern void free_uid(struct user_struct *);
+extern void switch_uid(struct user_struct *);
+
+#include <asm/current.h>
+
+extern unsigned long volatile jiffies;
+extern unsigned long itimer_ticks;
+extern unsigned long itimer_next;
+extern struct timeval xtime;
+extern void do_timer(struct pt_regs *);
+#ifdef CONFIG_NO_IDLE_HZ
+extern void do_timer_ticks(int ticks);
+#endif
+
+extern unsigned int * prof_buffer;
+extern unsigned long prof_len;
+extern unsigned long prof_shift;
+
+#define CURRENT_TIME (xtime.tv_sec)
+
+extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr));
+extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr));
+extern void FASTCALL(sleep_on(wait_queue_head_t *q));
+extern long FASTCALL(sleep_on_timeout(wait_queue_head_t *q,
+ signed long timeout));
+extern void FASTCALL(interruptible_sleep_on(wait_queue_head_t *q));
+extern long FASTCALL(interruptible_sleep_on_timeout(wait_queue_head_t *q,
+ signed long timeout));
+extern int FASTCALL(wake_up_process(struct task_struct * tsk));
+
+#define wake_up(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
+#define wake_up_nr(x, nr) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
+#define wake_up_all(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0)
+#define wake_up_sync(x) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
+#define wake_up_sync_nr(x, nr) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
+#define wake_up_interruptible(x) __wake_up((x),TASK_INTERRUPTIBLE, 1)
+#define wake_up_interruptible_nr(x, nr) __wake_up((x),TASK_INTERRUPTIBLE, nr)
+#define wake_up_interruptible_all(x) __wake_up((x),TASK_INTERRUPTIBLE, 0)
+#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
+#define wake_up_interruptible_sync_nr(x, nr) __wake_up_sync((x),TASK_INTERRUPTIBLE, nr)
+asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru);
+
+extern int in_group_p(gid_t);
+extern int in_egroup_p(gid_t);
+
+extern void proc_caches_init(void);
+extern void flush_signals(struct task_struct *);
+extern void flush_signal_handlers(struct task_struct *);
+extern void sig_exit(int, int, struct siginfo *);
+extern int dequeue_signal(sigset_t *, siginfo_t *);
+extern void block_all_signals(int (*notifier)(void *priv), void *priv,
+ sigset_t *mask);
+extern void unblock_all_signals(void);
+extern int send_sig_info(int, struct siginfo *, struct task_struct *);
+extern int force_sig_info(int, struct siginfo *, struct task_struct *);
+extern int kill_pg_info(int, struct siginfo *, pid_t);
+extern int kill_sl_info(int, struct siginfo *, pid_t);
+extern int kill_proc_info(int, struct siginfo *, pid_t);
+extern void notify_parent(struct task_struct *, int);
+extern void do_notify_parent(struct task_struct *, int);
+extern void force_sig(int, struct task_struct *);
+extern int send_sig(int, struct task_struct *, int);
+extern int kill_pg(pid_t, int, int);
+extern int kill_sl(pid_t, int, int);
+extern int kill_proc(pid_t, int, int);
+extern int do_sigaction(int, const struct k_sigaction *, struct k_sigaction *);
+extern int do_sigaltstack(const stack_t *, stack_t *, unsigned long);
+
+static inline int signal_pending(struct task_struct *p)
+{
+ return (p->sigpending != 0);
+}
+
+/*
+ * Re-calculate pending state from the set of locally pending
+ * signals, globally pending signals, and blocked signals.
+ */
+static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
+{
+ unsigned long ready;
+ long i;
+
+ switch (_NSIG_WORDS) {
+ default:
+ for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;)
+ ready |= signal->sig[i] &~ blocked->sig[i];
+ break;
+
+ case 4: ready = signal->sig[3] &~ blocked->sig[3];
+ ready |= signal->sig[2] &~ blocked->sig[2];
+ ready |= signal->sig[1] &~ blocked->sig[1];
+ ready |= signal->sig[0] &~ blocked->sig[0];
+ break;
+
+ case 2: ready = signal->sig[1] &~ blocked->sig[1];
+ ready |= signal->sig[0] &~ blocked->sig[0];
+ break;
+
+ case 1: ready = signal->sig[0] &~ blocked->sig[0];
+ }
+ return ready != 0;
+}
+
+/* Reevaluate whether the task has signals pending delivery.
+ This is required every time the blocked sigset_t changes.
+ All callers should have t->sigmask_lock. */
+
+static inline void recalc_sigpending(struct task_struct *t)
+{
+ t->sigpending = has_pending_signals(&t->pending.signal, &t->blocked);
+}
+
+/* True if we are on the alternate signal stack. */
+
+static inline int on_sig_stack(unsigned long sp)
+{
+ return (sp - current->sas_ss_sp < current->sas_ss_size);
+}
+
+static inline int sas_ss_flags(unsigned long sp)
+{
+ return (current->sas_ss_size == 0 ? SS_DISABLE
+ : on_sig_stack(sp) ? SS_ONSTACK : 0);
+}
+
+extern int request_irq(unsigned int,
+ void (*handler)(int, void *, struct pt_regs *),
+ unsigned long, const char *, void *);
+extern void free_irq(unsigned int, void *);
+
+/*
+ * This has now become a routine instead of a macro, it sets a flag if
+ * it returns true (to do BSD-style accounting where the process is flagged
+ * if it uses root privs). The implication of this is that you should do
+ * normal permissions checks first, and check suser() last.
+ *
+ * [Dec 1997 -- Chris Evans]
+ * For correctness, the above considerations need to be extended to
+ * fsuser(). This is done, along with moving fsuser() checks to be
+ * last.
+ *
+ * These will be removed, but in the mean time, when the SECURE_NOROOT
+ * flag is set, uids don't grant privilege.
+ */
+static inline int suser(void)
+{
+ if (!issecure(SECURE_NOROOT) && current->euid == 0) {
+ current->flags |= PF_SUPERPRIV;
+ return 1;
+ }
+ return 0;
+}
+
+static inline int fsuser(void)
+{
+ if (!issecure(SECURE_NOROOT) && current->fsuid == 0) {
+ current->flags |= PF_SUPERPRIV;
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * capable() checks for a particular capability.
+ * New privilege checks should use this interface, rather than suser() or
+ * fsuser(). See include/linux/capability.h for defined capabilities.
+ */
+
+static inline int capable(int cap)
+{
+#if 1 /* ok now */
+ if (cap_raised(current->cap_effective, cap))
+#else
+ if (cap_is_fs_cap(cap) ? current->fsuid == 0 : current->euid == 0)
+#endif
+ {
+ current->flags |= PF_SUPERPRIV;
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Routines for handling mm_structs
+ */
+extern struct mm_struct * mm_alloc(void);
+
+extern struct mm_struct * start_lazy_tlb(void);
+extern void end_lazy_tlb(struct mm_struct *mm);
+
+/* mmdrop drops the mm and the page tables */
+extern inline void FASTCALL(__mmdrop(struct mm_struct *));
+static inline void mmdrop(struct mm_struct * mm)
+{
+ if (atomic_dec_and_test(&mm->mm_count))
+ __mmdrop(mm);
+}
+
+/* mmput gets rid of the mappings and all user-space */
+extern void mmput(struct mm_struct *);
+/* Remove the current tasks stale references to the old mm_struct */
+extern void mm_release(void);
+
+/*
+ * Routines for handling the fd arrays
+ */
+extern struct file ** alloc_fd_array(int);
+extern int expand_fd_array(struct files_struct *, int nr);
+extern void free_fd_array(struct file **, int);
+
+extern fd_set *alloc_fdset(int);
+extern int expand_fdset(struct files_struct *, int nr);
+extern void free_fdset(fd_set *, int);
+
+extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
+extern void flush_thread(void);
+extern void exit_thread(void);
+
+extern void exit_mm(struct task_struct *);
+extern void exit_files(struct task_struct *);
+extern void exit_sighand(struct task_struct *);
+
+extern void reparent_to_init(void);
+extern void daemonize(void);
+
+extern int do_execve(char *, char **, char **, struct pt_regs *);
+extern int do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long);
+
+extern void FASTCALL(add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
+extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait));
+extern void FASTCALL(remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
+
+extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
+
+#define __wait_event(wq, condition) \
+do { \
+ wait_queue_t __wait; \
+ init_waitqueue_entry(&__wait, current); \
+ \
+ add_wait_queue(&wq, &__wait); \
+ for (;;) { \
+ set_current_state(TASK_UNINTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ schedule(); \
+ } \
+ current->state = TASK_RUNNING; \
+ remove_wait_queue(&wq, &__wait); \
+} while (0)
+
+#define wait_event(wq, condition) \
+do { \
+ if (condition) \
+ break; \
+ __wait_event(wq, condition); \
+} while (0)
+
+#define __wait_event_interruptible(wq, condition, ret) \
+do { \
+ wait_queue_t __wait; \
+ init_waitqueue_entry(&__wait, current); \
+ \
+ add_wait_queue(&wq, &__wait); \
+ for (;;) { \
+ set_current_state(TASK_INTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ if (!signal_pending(current)) { \
+ schedule(); \
+ continue; \
+ } \
+ ret = -ERESTARTSYS; \
+ break; \
+ } \
+ current->state = TASK_RUNNING; \
+ remove_wait_queue(&wq, &__wait); \
+} while (0)
+
+#define wait_event_interruptible(wq, condition) \
+({ \
+ int __ret = 0; \
+ if (!(condition)) \
+ __wait_event_interruptible(wq, condition, __ret); \
+ __ret; \
+})
+
+#define REMOVE_LINKS(p) do { \
+ (p)->next_task->prev_task = (p)->prev_task; \
+ (p)->prev_task->next_task = (p)->next_task; \
+ if ((p)->p_osptr) \
+ (p)->p_osptr->p_ysptr = (p)->p_ysptr; \
+ if ((p)->p_ysptr) \
+ (p)->p_ysptr->p_osptr = (p)->p_osptr; \
+ else \
+ (p)->p_pptr->p_cptr = (p)->p_osptr; \
+ } while (0)
+
+#define SET_LINKS(p) do { \
+ (p)->next_task = &init_task; \
+ (p)->prev_task = init_task.prev_task; \
+ init_task.prev_task->next_task = (p); \
+ init_task.prev_task = (p); \
+ (p)->p_ysptr = NULL; \
+ if (((p)->p_osptr = (p)->p_pptr->p_cptr) != NULL) \
+ (p)->p_osptr->p_ysptr = p; \
+ (p)->p_pptr->p_cptr = p; \
+ } while (0)
+
+#define for_each_task(p) \
+ for (p = &init_task ; (p = p->next_task) != &init_task ; )
+
+#define for_each_thread(task) \
+ for (task = next_thread(current) ; task != current ; task = next_thread(task))
+
+#define next_thread(p) \
+ list_entry((p)->thread_group.next, struct task_struct, thread_group)
+
+#define thread_group_leader(p) (p->pid == p->tgid)
+
+static inline void del_from_runqueue(struct task_struct * p)
+{
+ nr_running--;
+ p->sleep_time = jiffies;
+ list_del(&p->run_list);
+ p->run_list.next = NULL;
+}
+
+static inline int task_on_runqueue(struct task_struct *p)
+{
+ return (p->run_list.next != NULL);
+}
+
+static inline void unhash_process(struct task_struct *p)
+{
+ if (task_on_runqueue(p))
+ out_of_line_bug();
+ write_lock_irq(&tasklist_lock);
+ nr_threads--;
+ unhash_pid(p);
+ REMOVE_LINKS(p);
+ list_del(&p->thread_group);
+ write_unlock_irq(&tasklist_lock);
+}
+
+/* Protects ->fs, ->files, ->mm, and synchronises with wait4(). Nests inside tasklist_lock */
+static inline void task_lock(struct task_struct *p)
+{
+ spin_lock(&p->alloc_lock);
+}
+
+static inline void task_unlock(struct task_struct *p)
+{
+ spin_unlock(&p->alloc_lock);
+}
+
+/* write full pathname into buffer and return start of pathname */
+static inline char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
+ char *buf, int buflen)
+{
+ char *res;
+ struct vfsmount *rootmnt;
+ struct dentry *root;
+ read_lock(&current->fs->lock);
+ rootmnt = mntget(current->fs->rootmnt);
+ root = dget(current->fs->root);
+ read_unlock(&current->fs->lock);
+ spin_lock(&dcache_lock);
+ res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen);
+ spin_unlock(&dcache_lock);
+ dput(root);
+ mntput(rootmnt);
+ return res;
+}
+
+static inline int need_resched(void)
+{
+ return (unlikely(current->need_resched));
+}
+
+extern void __cond_resched(void);
+static inline void cond_resched(void)
+{
+ if (need_resched())
+ __cond_resched();
+}
+
+#endif /* __KERNEL__ */
+#endif
diff --git a/xenolinux-2.4.24-sparse/include/linux/timer.h b/xenolinux-2.4.24-sparse/include/linux/timer.h
new file mode 100644
index 0000000000..238083218f
--- /dev/null
+++ b/xenolinux-2.4.24-sparse/include/linux/timer.h
@@ -0,0 +1,77 @@
+#ifndef _LINUX_TIMER_H
+#define _LINUX_TIMER_H
+
+#include <linux/config.h>
+#include <linux/list.h>
+
+/*
+ * In Linux 2.4, static timers have been removed from the kernel.
+ * Timers may be dynamically created and destroyed, and should be initialized
+ * by a call to init_timer() upon creation.
+ *
+ * The "data" field enables use of a common timeout function for several
+ * timeouts. You can use this field to distinguish between the different
+ * invocations.
+ */
+struct timer_list {
+ struct list_head list;
+ unsigned long expires;
+ unsigned long data;
+ void (*function)(unsigned long);
+};
+
+extern void add_timer(struct timer_list * timer);
+extern int del_timer(struct timer_list * timer);
+#ifdef CONFIG_NO_IDLE_HZ
+extern struct timer_list *next_timer_event(void);
+#endif
+
+#ifdef CONFIG_SMP
+extern int del_timer_sync(struct timer_list * timer);
+extern void sync_timers(void);
+#else
+#define del_timer_sync(t) del_timer(t)
+#define sync_timers() do { } while (0)
+#endif
+
+/*
+ * mod_timer is a more efficient way to update the expire field of an
+ * active timer (if the timer is inactive it will be activated)
+ * mod_timer(a,b) is equivalent to del_timer(a); a->expires = b; add_timer(a).
+ * If the timer is known to be not pending (ie, in the handler), mod_timer
+ * is less efficient than a->expires = b; add_timer(a).
+ */
+int mod_timer(struct timer_list *timer, unsigned long expires);
+
+extern void it_real_fn(unsigned long);
+
+static inline void init_timer(struct timer_list * timer)
+{
+ timer->list.next = timer->list.prev = NULL;
+}
+
+static inline int timer_pending (const struct timer_list * timer)
+{
+ return timer->list.next != NULL;
+}
+
+/*
+ * These inlines deal with timer wrapping correctly. You are
+ * strongly encouraged to use them
+ * 1. Because people otherwise forget
+ * 2. Because if the timer wrap changes in future you wont have to
+ * alter your driver code.
+ *
+ * time_after(a,b) returns true if the time a is after time b.
+ *
+ * Do this with "<0" and ">=0" to only test the sign of the result. A
+ * good compiler would generate better code (and a really good compiler
+ * wouldn't care). Gcc is currently neither.
+ */
+#define time_after(a,b) ((long)(b) - (long)(a) < 0)
+#define time_before(a,b) time_after(b,a)
+
+#define time_after_eq(a,b) ((long)(a) - (long)(b) >= 0)
+#define time_before_eq(a,b) time_after_eq(b,a)
+
+#endif
diff --git a/xenolinux-2.4.24-sparse/kernel/panic.c b/xenolinux-2.4.24-sparse/kernel/panic.c
index 871ea67fee..6ab619a607 100644
--- a/xenolinux-2.4.24-sparse/kernel/panic.c
+++ b/xenolinux-2.4.24-sparse/kernel/panic.c
@@ -110,7 +110,8 @@ NORET_TYPE void panic(const char * fmt, ...)
#endif
CHECK_EMERGENCY_SYNC
#if defined(CONFIG_XENO)
- HYPERVISOR_exit();
+ HYPERVISOR_console_write(buf, strlen(buf));
+ HYPERVISOR_exit();
#endif
}
}
diff --git a/xenolinux-2.4.24-sparse/kernel/timer.c b/xenolinux-2.4.24-sparse/kernel/timer.c
new file mode 100644
index 0000000000..567794ab26
--- /dev/null
+++ b/xenolinux-2.4.24-sparse/kernel/timer.c
@@ -0,0 +1,968 @@
+/*
+ * linux/kernel/timer.c
+ *
+ * Kernel internal timers, kernel timekeeping, basic process system calls
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better.
+ *
+ * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
+ * "A Kernel Model for Precision Timekeeping" by Dave Mills
+ * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
+ * serialize accesses to xtime/lost_ticks).
+ * Copyright (C) 1998 Andrea Arcangeli
+ * 1999-03-10 Improved NTP compatibility by Ulrich Windl
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/timex.h>
+#include <linux/delay.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * Timekeeping variables
+ */
+
+long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */
+
+/* The current time */
+struct timeval xtime __attribute__ ((aligned (16)));
+
+/* Don't completely fail for HZ > 500. */
+int tickadj = 500/HZ ? : 1; /* microsecs */
+
+DECLARE_TASK_QUEUE(tq_timer);
+DECLARE_TASK_QUEUE(tq_immediate);
+
+/*
+ * phase-lock loop variables
+ */
+/* TIME_ERROR prevents overwriting the CMOS clock */
+int time_state = TIME_OK; /* clock synchronization status */
+int time_status = STA_UNSYNC; /* clock status bits */
+long time_offset; /* time adjustment (us) */
+long time_constant = 2; /* pll time constant */
+long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */
+long time_precision = 1; /* clock precision (us) */
+long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */
+long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */
+long time_phase; /* phase offset (scaled us) */
+long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;
+ /* frequency offset (scaled ppm)*/
+long time_adj; /* tick adjust (scaled 1 / HZ) */
+long time_reftime; /* time at last adjustment (s) */
+
+long time_adjust;
+long time_adjust_step;
+
+unsigned long event;
+
+extern int do_setitimer(int, struct itimerval *, struct itimerval *);
+
+unsigned long volatile jiffies;
+
+unsigned int * prof_buffer;
+unsigned long prof_len;
+unsigned long prof_shift;
+
+/*
+ * Event timer code
+ */
+#define TVN_BITS 6
+#define TVR_BITS 8
+#define TVN_SIZE (1 << TVN_BITS)
+#define TVR_SIZE (1 << TVR_BITS)
+#define TVN_MASK (TVN_SIZE - 1)
+#define TVR_MASK (TVR_SIZE - 1)
+
+struct timer_vec {
+ int index;
+ struct list_head vec[TVN_SIZE];
+};
+
+struct timer_vec_root {
+ int index;
+ struct list_head vec[TVR_SIZE];
+};
+
+static struct timer_vec tv5;
+static struct timer_vec tv4;
+static struct timer_vec tv3;
+static struct timer_vec tv2;
+static struct timer_vec_root tv1;
+
+static struct timer_vec * const tvecs[] = {
+ (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
+};
+
+static struct list_head * run_timer_list_running;
+
+#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
+
+void init_timervecs (void)
+{
+ int i;
+
+ for (i = 0; i < TVN_SIZE; i++) {
+ INIT_LIST_HEAD(tv5.vec + i);
+ INIT_LIST_HEAD(tv4.vec + i);
+ INIT_LIST_HEAD(tv3.vec + i);
+ INIT_LIST_HEAD(tv2.vec + i);
+ }
+ for (i = 0; i < TVR_SIZE; i++)
+ INIT_LIST_HEAD(tv1.vec + i);
+}
+
+static unsigned long timer_jiffies;
+
+static inline void internal_add_timer(struct timer_list *timer)
+{
+ /*
+ * must be cli-ed when calling this
+ */
+ unsigned long expires = timer->expires;
+ unsigned long idx = expires - timer_jiffies;
+ struct list_head * vec;
+
+ if (run_timer_list_running)
+ vec = run_timer_list_running;
+ else if (idx < TVR_SIZE) {
+ int i = expires & TVR_MASK;
+ vec = tv1.vec + i;
+ } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
+ int i = (expires >> TVR_BITS) & TVN_MASK;
+ vec = tv2.vec + i;
+ } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
+ int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
+ vec = tv3.vec + i;
+ } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
+ int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
+ vec = tv4.vec + i;
+ } else if ((signed long) idx < 0) {
+ /* can happen if you add a timer with expires == jiffies,
+ * or you set a timer to go off in the past
+ */
+ vec = tv1.vec + tv1.index;
+ } else if (idx <= 0xffffffffUL) {
+ int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
+ vec = tv5.vec + i;
+ } else {
+ /* Can only get here on architectures with 64-bit jiffies */
+ INIT_LIST_HEAD(&timer->list);
+ return;
+ }
+ /*
+ * Timers are FIFO!
+ */
+ list_add(&timer->list, vec->prev);
+}
+
+/* Initialize both explicitly - let's try to have them in the same cache line */
+spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
+
+#ifdef CONFIG_SMP
+volatile struct timer_list * volatile running_timer;
+#define timer_enter(t) do { running_timer = t; mb(); } while (0)
+#define timer_exit() do { running_timer = NULL; } while (0)
+#define timer_is_running(t) (running_timer == t)
+#define timer_synchronize(t) while (timer_is_running(t)) barrier()
+#else
+#define timer_enter(t) do { } while (0)
+#define timer_exit() do { } while (0)
+#endif
+
+void add_timer(struct timer_list *timer)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&timerlist_lock, flags);
+ if (timer_pending(timer))
+ goto bug;
+ internal_add_timer(timer);
+ spin_unlock_irqrestore(&timerlist_lock, flags);
+ return;
+bug:
+ spin_unlock_irqrestore(&timerlist_lock, flags);
+ printk("bug: kernel timer added twice at %p.\n",
+ __builtin_return_address(0));
+}
+
+static inline int detach_timer (struct timer_list *timer)
+{
+ if (!timer_pending(timer))
+ return 0;
+ list_del(&timer->list);
+ return 1;
+}
+
+int mod_timer(struct timer_list *timer, unsigned long expires)
+{
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&timerlist_lock, flags);
+ timer->expires = expires;
+ ret = detach_timer(timer);
+ internal_add_timer(timer);
+ spin_unlock_irqrestore(&timerlist_lock, flags);
+ return ret;
+}
+
+int del_timer(struct timer_list * timer)
+{
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&timerlist_lock, flags);
+ ret = detach_timer(timer);
+ timer->list.next = timer->list.prev = NULL;
+ spin_unlock_irqrestore(&timerlist_lock, flags);
+ return ret;
+}
+
+#ifdef CONFIG_SMP
+void sync_timers(void)
+{
+ spin_unlock_wait(&global_bh_lock);
+}
+
+/*
+ * SMP specific function to delete periodic timer.
+ * Caller must disable by some means restarting the timer
+ * for new. Upon exit the timer is not queued and handler is not running
+ * on any CPU. It returns number of times, which timer was deleted
+ * (for reference counting).
+ */
+
+int del_timer_sync(struct timer_list * timer)
+{
+ int ret = 0;
+
+ for (;;) {
+ unsigned long flags;
+ int running;
+
+ spin_lock_irqsave(&timerlist_lock, flags);
+ ret += detach_timer(timer);
+ timer->list.next = timer->list.prev = 0;
+ running = timer_is_running(timer);
+ spin_unlock_irqrestore(&timerlist_lock, flags);
+
+ if (!running)
+ break;
+
+ timer_synchronize(timer);
+ }
+
+ return ret;
+}
+#endif
+
+
+static inline void cascade_timers(struct timer_vec *tv)
+{
+ /* cascade all the timers from tv up one level */
+ struct list_head *head, *curr, *next;
+
+ head = tv->vec + tv->index;
+ curr = head->next;
+ /*
+ * We are removing _all_ timers from the list, so we don't have to
+ * detach them individually, just clear the list afterwards.
+ */
+ while (curr != head) {
+ struct timer_list *tmp;
+
+ tmp = list_entry(curr, struct timer_list, list);
+ next = curr->next;
+ list_del(curr); // not needed
+ internal_add_timer(tmp);
+ curr = next;
+ }
+ INIT_LIST_HEAD(head);
+ tv->index = (tv->index + 1) & TVN_MASK;
+}
+
+static inline void run_timer_list(void)
+{
+ spin_lock_irq(&timerlist_lock);
+ while ((long)(jiffies - timer_jiffies) >= 0) {
+ LIST_HEAD(queued);
+ struct list_head *head, *curr;
+ if (!tv1.index) {
+ int n = 1;
+ do {
+ cascade_timers(tvecs[n]);
+ } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
+ }
+ run_timer_list_running = &queued;
+repeat:
+ head = tv1.vec + tv1.index;
+ curr = head->next;
+ if (curr != head) {
+ struct timer_list *timer;
+ void (*fn)(unsigned long);
+ unsigned long data;
+
+ timer = list_entry(curr, struct timer_list, list);
+ fn = timer->function;
+ data= timer->data;
+
+ detach_timer(timer);
+ timer->list.next = timer->list.prev = NULL;
+ timer_enter(timer);
+ spin_unlock_irq(&timerlist_lock);
+ fn(data);
+ spin_lock_irq(&timerlist_lock);
+ timer_exit();
+ goto repeat;
+ }
+ run_timer_list_running = NULL;
+ ++timer_jiffies;
+ tv1.index = (tv1.index + 1) & TVR_MASK;
+
+ curr = queued.next;
+ while (curr != &queued) {
+ struct timer_list *timer;
+
+ timer = list_entry(curr, struct timer_list, list);
+ curr = curr->next;
+ internal_add_timer(timer);
+ }
+ }
+ spin_unlock_irq(&timerlist_lock);
+}
+
+#ifdef CONFIG_NO_IDLE_HZ
+/*
+ * Find out when the next timer event is due to happen. This
+ * is used on S/390 to stop all activity when all cpus are idle.
+ * And in XenoLinux to achieve the same.
+ * The timerlist_lock must be acquired before calling this function.
+ */
+struct timer_list *next_timer_event(void)
+{
+ struct timer_list *nte, *tmp;
+ struct list_head *lst;
+ int i, j;
+
+ /* Look for the next timer event in tv1. */
+ i = 0;
+ j = tvecs[0]->index;
+ do {
+ struct list_head *head = tvecs[0]->vec + j;
+ if (!list_empty(head)) {
+ nte = list_entry(head->next, struct timer_list, list);
+ goto found;
+ }
+ j = (j + 1) & TVR_MASK;
+ } while (j != tv1.index);
+
+ /* No event found in tv1. Check tv2-tv5. */
+ for (i = 1; i < NOOF_TVECS; i++) {
+ j = tvecs[i]->index;
+ do {
+ nte = NULL;
+ list_for_each(lst, tvecs[i]->vec + j) {
+ tmp = list_entry(lst, struct timer_list, list);
+ if (nte == NULL ||
+ time_before(tmp->expires, nte->expires))
+ nte = tmp;
+ }
+ if (nte)
+ goto found;
+ j = (j + 1) & TVN_MASK;
+ } while (j != tvecs[i]->index);
+ }
+ return NULL;
+found:
+ /* Found timer event in tvecs[i]->vec[j] */
+ if (j < tvecs[i]->index && i < NOOF_TVECS-1) {
+ /*
+ * The search wrapped. We need to look at the next list
+ * from tvecs[i+1] that would cascade into tvecs[i].
+ */
+ list_for_each(lst, tvecs[i+1]->vec+tvecs[i+1]->index) {
+ tmp = list_entry(lst, struct timer_list, list);
+ if (time_before(tmp->expires, nte->expires))
+ nte = tmp;
+ }
+ }
+ return nte;
+}
+#endif
+
+spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED;
+
+void tqueue_bh(void)
+{
+ run_task_queue(&tq_timer);
+}
+
+void immediate_bh(void)
+{
+ run_task_queue(&tq_immediate);
+}
+
+/*
+ * this routine handles the overflow of the microsecond field
+ *
+ * The tricky bits of code to handle the accurate clock support
+ * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
+ * They were originally developed for SUN and DEC kernels.
+ * All the kudos should go to Dave for this stuff.
+ *
+ */
+static void second_overflow(void)
+{
+ long ltemp;
+
+ /* Bump the maxerror field */
+ time_maxerror += time_tolerance >> SHIFT_USEC;
+ if ( time_maxerror > NTP_PHASE_LIMIT ) {
+ time_maxerror = NTP_PHASE_LIMIT;
+ time_status |= STA_UNSYNC;
+ }
+
+ /*
+ * Leap second processing. If in leap-insert state at
+ * the end of the day, the system clock is set back one
+ * second; if in leap-delete state, the system clock is
+ * set ahead one second. The microtime() routine or
+ * external clock driver will insure that reported time
+ * is always monotonic. The ugly divides should be
+ * replaced.
+ */
+ switch (time_state) {
+
+ case TIME_OK:
+ if (time_status & STA_INS)
+ time_state = TIME_INS;
+ else if (time_status & STA_DEL)
+ time_state = TIME_DEL;
+ break;
+
+ case TIME_INS:
+ if (xtime.tv_sec % 86400 == 0) {
+ xtime.tv_sec--;
+ time_state = TIME_OOP;
+ printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
+ }
+ break;
+
+ case TIME_DEL:
+ if ((xtime.tv_sec + 1) % 86400 == 0) {
+ xtime.tv_sec++;
+ time_state = TIME_WAIT;
+ printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
+ }
+ break;
+
+ case TIME_OOP:
+ time_state = TIME_WAIT;
+ break;
+
+ case TIME_WAIT:
+ if (!(time_status & (STA_INS | STA_DEL)))
+ time_state = TIME_OK;
+ }
+
+ /*
+ * Compute the phase adjustment for the next second. In
+ * PLL mode, the offset is reduced by a fixed factor
+ * times the time constant. In FLL mode the offset is
+ * used directly. In either mode, the maximum phase
+ * adjustment for each second is clamped so as to spread
+ * the adjustment over not more than the number of
+ * seconds between updates.
+ */
+ if (time_offset < 0) {
+ ltemp = -time_offset;
+ if (!(time_status & STA_FLL))
+ ltemp >>= SHIFT_KG + time_constant;
+ if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
+ ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
+ time_offset += ltemp;
+ time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+ } else {
+ ltemp = time_offset;
+ if (!(time_status & STA_FLL))
+ ltemp >>= SHIFT_KG + time_constant;
+ if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
+ ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
+ time_offset -= ltemp;
+ time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+ }
+
+ /*
+ * Compute the frequency estimate and additional phase
+ * adjustment due to frequency error for the next
+ * second. When the PPS signal is engaged, gnaw on the
+ * watchdog counter and update the frequency computed by
+ * the pll and the PPS signal.
+ */
+ pps_valid++;
+ if (pps_valid == PPS_VALID) { /* PPS signal lost */
+ pps_jitter = MAXTIME;
+ pps_stabil = MAXFREQ;
+ time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
+ STA_PPSWANDER | STA_PPSERROR);
+ }
+ ltemp = time_freq + pps_freq;
+ if (ltemp < 0)
+ time_adj -= -ltemp >>
+ (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
+ else
+ time_adj += ltemp >>
+ (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
+
+#if HZ == 100
+ /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
+ * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
+ */
+ if (time_adj < 0)
+ time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
+ else
+ time_adj += (time_adj >> 2) + (time_adj >> 5);
+#endif
+}
+
+/* in the NTP reference this is called "hardclock()" */
+static void update_wall_time_one_tick(void)
+{
+ if ( (time_adjust_step = time_adjust) != 0 ) {
+ /* We are doing an adjtime thing.
+ *
+ * Prepare time_adjust_step to be within bounds.
+ * Note that a positive time_adjust means we want the clock
+ * to run faster.
+ *
+ * Limit the amount of the step to be in the range
+ * -tickadj .. +tickadj
+ */
+ if (time_adjust > tickadj)
+ time_adjust_step = tickadj;
+ else if (time_adjust < -tickadj)
+ time_adjust_step = -tickadj;
+
+ /* Reduce by this step the amount of time left */
+ time_adjust -= time_adjust_step;
+ }
+ xtime.tv_usec += tick + time_adjust_step;
+ /*
+ * Advance the phase, once it gets to one microsecond, then
+ * advance the tick more.
+ */
+ time_phase += time_adj;
+ if (time_phase <= -FINEUSEC) {
+ long ltemp = -time_phase >> SHIFT_SCALE;
+ time_phase += ltemp << SHIFT_SCALE;
+ xtime.tv_usec -= ltemp;
+ }
+ else if (time_phase >= FINEUSEC) {
+ long ltemp = time_phase >> SHIFT_SCALE;
+ time_phase -= ltemp << SHIFT_SCALE;
+ xtime.tv_usec += ltemp;
+ }
+}
+
+/*
+ * Using a loop looks inefficient, but "ticks" is
+ * usually just one (we shouldn't be losing ticks,
+ * we're doing this this way mainly for interrupt
+ * latency reasons, not because we think we'll
+ * have lots of lost timer ticks
+ */
+static void update_wall_time(unsigned long ticks)
+{
+ do {
+ ticks--;
+ update_wall_time_one_tick();
+ } while (ticks);
+
+ while (xtime.tv_usec >= 1000000) {
+ xtime.tv_usec -= 1000000;
+ xtime.tv_sec++;
+ second_overflow();
+ }
+}
+
+static inline void do_process_times(struct task_struct *p,
+ unsigned long user, unsigned long system)
+{
+ unsigned long psecs;
+
+ psecs = (p->times.tms_utime += user);
+ psecs += (p->times.tms_stime += system);
+ if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) {
+ /* Send SIGXCPU every second.. */
+ if (!(psecs % HZ))
+ send_sig(SIGXCPU, p, 1);
+ /* and SIGKILL when we go over max.. */
+ if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max)
+ send_sig(SIGKILL, p, 1);
+ }
+}
+
+static inline void do_it_virt(struct task_struct * p, unsigned long ticks)
+{
+ unsigned long it_virt = p->it_virt_value;
+
+ if (it_virt) {
+ it_virt -= ticks;
+ if (!it_virt) {
+ it_virt = p->it_virt_incr;
+ send_sig(SIGVTALRM, p, 1);
+ }
+ p->it_virt_value = it_virt;
+ }
+}
+
+static inline void do_it_prof(struct task_struct *p)
+{
+ unsigned long it_prof = p->it_prof_value;
+
+ if (it_prof) {
+ if (--it_prof == 0) {
+ it_prof = p->it_prof_incr;
+ send_sig(SIGPROF, p, 1);
+ }
+ p->it_prof_value = it_prof;
+ }
+}
+
+void update_one_process(struct task_struct *p, unsigned long user,
+ unsigned long system, int cpu)
+{
+ p->per_cpu_utime[cpu] += user;
+ p->per_cpu_stime[cpu] += system;
+ do_process_times(p, user, system);
+ do_it_virt(p, user);
+ do_it_prof(p);
+}
+
+/*
+ * Called from the timer interrupt handler to charge one tick to the current
+ * process. user_tick is 1 if the tick is user time, 0 for system.
+ */
+void update_process_times(int user_tick)
+{
+ struct task_struct *p = current;
+ int cpu = smp_processor_id(), system = user_tick ^ 1;
+
+ update_one_process(p, user_tick, system, cpu);
+ if (p->pid) {
+ if (--p->counter <= 0) {
+ p->counter = 0;
+ /*
+ * SCHED_FIFO is priority preemption, so this is
+ * not the place to decide whether to reschedule a
+ * SCHED_FIFO task or not - Bhavesh Davda
+ */
+ if (p->policy != SCHED_FIFO) {
+ p->need_resched = 1;
+ }
+ }
+ if (p->nice > 0)
+ kstat.per_cpu_nice[cpu] += user_tick;
+ else
+ kstat.per_cpu_user[cpu] += user_tick;
+ kstat.per_cpu_system[cpu] += system;
+ } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
+ kstat.per_cpu_system[cpu] += system;
+}
+
+/*
+ * Called from the timer interrupt handler to charge a couple of ticks
+ * to the current process.
+ */
+void update_process_times_us(int user_ticks, int system_ticks)
+{
+ struct task_struct *p = current;
+ int cpu = smp_processor_id();
+
+ update_one_process(p, user_ticks, system_ticks, cpu);
+ if (p->pid) {
+ p->counter -= user_ticks + system_ticks;
+ if (p->counter <= 0) {
+ p->counter = 0;
+ p->need_resched = 1;
+ }
+ if (p->nice > 0)
+ kstat.per_cpu_nice[cpu] += user_ticks;
+ else
+ kstat.per_cpu_user[cpu] += user_ticks;
+ kstat.per_cpu_system[cpu] += system_ticks;
+ } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
+ kstat.per_cpu_system[cpu] += system_ticks;
+}
+
+/*
+ * Nr of active tasks - counted in fixed-point numbers
+ */
+static unsigned long count_active_tasks(void)
+{
+ struct task_struct *p;
+ unsigned long nr = 0;
+
+ read_lock(&tasklist_lock);
+ for_each_task(p) {
+ if ((p->state == TASK_RUNNING ||
+ (p->state & TASK_UNINTERRUPTIBLE)))
+ nr += FIXED_1;
+ }
+ read_unlock(&tasklist_lock);
+ return nr;
+}
+
+/*
+ * Hmm.. Changed this, as the GNU make sources (load.c) seems to
+ * imply that avenrun[] is the standard name for this kind of thing.
+ * Nothing else seems to be standardized: the fractional size etc
+ * all seem to differ on different machines.
+ */
+unsigned long avenrun[3];
+
+static inline void calc_load(unsigned long ticks)
+{
+ unsigned long active_tasks; /* fixed-point */
+ static int count = LOAD_FREQ;
+
+ count -= ticks;
+ while (count < 0) {
+ count += LOAD_FREQ;
+ active_tasks = count_active_tasks();
+ CALC_LOAD(avenrun[0], EXP_1, active_tasks);
+ CALC_LOAD(avenrun[1], EXP_5, active_tasks);
+ CALC_LOAD(avenrun[2], EXP_15, active_tasks);
+ }
+}
+
+/* jiffies at the most recent update of wall time */
+unsigned long wall_jiffies;
+
+/*
+ * This spinlock protect us from races in SMP while playing with xtime. -arca
+ */
+rwlock_t xtime_lock = RW_LOCK_UNLOCKED;
+
+static inline void update_times(void)
+{
+ unsigned long ticks;
+
+ /*
+ * update_times() is run from the raw timer_bh handler so we
+ * just know that the irqs are locally enabled and so we don't
+ * need to save/restore the flags of the local CPU here. -arca
+ */
+ write_lock_irq(&xtime_lock);
+ vxtime_lock();
+
+ ticks = jiffies - wall_jiffies;
+ if (ticks) {
+ wall_jiffies += ticks;
+ update_wall_time(ticks);
+ }
+ vxtime_unlock();
+ write_unlock_irq(&xtime_lock);
+ calc_load(ticks);
+}
+
+void timer_bh(void)
+{
+ update_times();
+ run_timer_list();
+}
+
+void do_timer(struct pt_regs *regs)
+{
+ (*(unsigned long *)&jiffies)++;
+#ifndef CONFIG_SMP
+ /* SMP process accounting uses the local APIC timer */
+
+ update_process_times(user_mode(regs));
+#endif
+ mark_bh(TIMER_BH);
+ if (TQ_ACTIVE(tq_timer))
+ mark_bh(TQUEUE_BH);
+}
+
+void do_timer_ticks(int ticks)
+{
+ (*(unsigned long *)&jiffies) += ticks;
+ mark_bh(TIMER_BH);
+ if (TQ_ACTIVE(tq_timer))
+ mark_bh(TQUEUE_BH);
+}
+
+#if !defined(__alpha__) && !defined(__ia64__)
+
+/*
+ * For backwards compatibility? This can be done in libc so Alpha
+ * and all newer ports shouldn't need it.
+ */
+asmlinkage unsigned long sys_alarm(unsigned int seconds)
+{
+ struct itimerval it_new, it_old;
+ unsigned int oldalarm;
+
+ it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
+ it_new.it_value.tv_sec = seconds;
+ it_new.it_value.tv_usec = 0;
+ do_setitimer(ITIMER_REAL, &it_new, &it_old);
+ oldalarm = it_old.it_value.tv_sec;
+ /* ehhh.. We can't return 0 if we have an alarm pending.. */
+ /* And we'd better return too much than too little anyway */
+ if (it_old.it_value.tv_usec)
+ oldalarm++;
+ return oldalarm;
+}
+
+#endif
+
+#ifndef __alpha__
+
+/*
+ * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this
+ * should be moved into arch/i386 instead?
+ */
+
+/**
+ * sys_getpid - return the thread group id of the current process
+ *
+ * Note, despite the name, this returns the tgid not the pid. The tgid and
+ * the pid are identical unless CLONE_THREAD was specified on clone() in
+ * which case the tgid is the same in all threads of the same group.
+ *
+ * This is SMP safe as current->tgid does not change.
+ */
+asmlinkage long sys_getpid(void)
+{
+ return current->tgid;
+}
+
+/*
+ * This is not strictly SMP safe: p_opptr could change
+ * from under us. However, rather than getting any lock
+ * we can use an optimistic algorithm: get the parent
+ * pid, and go back and check that the parent is still
+ * the same. If it has changed (which is extremely unlikely
+ * indeed), we just try again..
+ *
+ * NOTE! This depends on the fact that even if we _do_
+ * get an old value of "parent", we can happily dereference
+ * the pointer: we just can't necessarily trust the result
+ * until we know that the parent pointer is valid.
+ *
+ * The "mb()" macro is a memory barrier - a synchronizing
+ * event. It also makes sure that gcc doesn't optimize
+ * away the necessary memory references.. The barrier doesn't
+ * have to have all that strong semantics: on x86 we don't
+ * really require a synchronizing instruction, for example.
+ * The barrier is more important for code generation than
+ * for any real memory ordering semantics (even if there is
+ * a small window for a race, using the old pointer is
+ * harmless for a while).
+ */
+asmlinkage long sys_getppid(void)
+{
+ int pid;
+ struct task_struct * me = current;
+ struct task_struct * parent;
+
+ parent = me->p_opptr;
+ for (;;) {
+ pid = parent->pid;
+#if CONFIG_SMP
+{
+ struct task_struct *old = parent;
+ mb();
+ parent = me->p_opptr;
+ if (old != parent)
+ continue;
+}
+#endif
+ break;
+ }
+ return pid;
+}
+
+asmlinkage long sys_getuid(void)
+{
+ /* Only we change this so SMP safe */
+ return current->uid;
+}
+
+asmlinkage long sys_geteuid(void)
+{
+ /* Only we change this so SMP safe */
+ return current->euid;
+}
+
+asmlinkage long sys_getgid(void)
+{
+ /* Only we change this so SMP safe */
+ return current->gid;
+}
+
+asmlinkage long sys_getegid(void)
+{
+ /* Only we change this so SMP safe */
+ return current->egid;
+}
+
+#endif
+
+/* Thread ID - the internal kernel "pid" */
+asmlinkage long sys_gettid(void)
+{
+ return current->pid;
+}
+
+asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
+{
+ struct timespec t;
+ unsigned long expire;
+
+ if(copy_from_user(&t, rqtp, sizeof(struct timespec)))
+ return -EFAULT;
+
+ if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
+ return -EINVAL;
+
+
+ if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
+ current->policy != SCHED_OTHER)
+ {
+ /*
+ * Short delay requests up to 2 ms will be handled with
+ * high precision by a busy wait for all real-time processes.
+ *
+ * Its important on SMP not to do this holding locks.
+ */
+ udelay((t.tv_nsec + 999) / 1000);
+ return 0;
+ }
+
+ expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
+
+ current->state = TASK_INTERRUPTIBLE;
+ expire = schedule_timeout(expire);
+
+ if (expire) {
+ if (rmtp) {
+ jiffies_to_timespec(expire, &t);
+ if (copy_to_user(rmtp, &t, sizeof(struct timespec)))
+ return -EFAULT;
+ }
+ return -EINTR;
+ }
+ return 0;
+}
+