/* * linux/kernel/timer.c * * Kernel internal timers, kernel timekeeping, basic process system calls * * Copyright (C) 1991, 1992 Linus Torvalds * * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. * * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 * "A Kernel Model for Precision Timekeeping" by Dave Mills * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to * serialize accesses to xtime/lost_ticks). * Copyright (C) 1998 Andrea Arcangeli * 1999-03-10 Improved NTP compatibility by Ulrich Windl */ #include #include #include #include #include #include #include //#include #include //#include #include #include /* * Timekeeping variables */ long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */ /* The current time */ struct timeval xtime __attribute__ ((aligned (16))); /* Don't completely fail for HZ > 500. */ int tickadj = 500/HZ ? : 1; /* microsecs */ DECLARE_TASK_QUEUE(tq_timer); DECLARE_TASK_QUEUE(tq_immediate); /* * phase-lock loop variables */ /* TIME_ERROR prevents overwriting the CMOS clock */ int time_state = TIME_OK; /* clock synchronization status */ int time_status = STA_UNSYNC; /* clock status bits */ long time_offset; /* time adjustment (us) */ long time_constant = 2; /* pll time constant */ long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ long time_precision = 1; /* clock precision (us) */ long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ long time_phase; /* phase offset (scaled us) */ long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC; /* frequency offset (scaled ppm)*/ long time_adj; /* tick adjust (scaled 1 / HZ) */ long time_reftime; /* time at last adjustment (s) */ long time_adjust; long time_adjust_step; unsigned long event; unsigned long volatile jiffies; unsigned int * prof_buffer; unsigned long prof_len; unsigned long prof_shift; /* * Event timer code */ #define TVN_BITS 6 #define TVR_BITS 8 #define TVN_SIZE (1 << TVN_BITS) #define TVR_SIZE (1 << TVR_BITS) #define TVN_MASK (TVN_SIZE - 1) #define TVR_MASK (TVR_SIZE - 1) struct timer_vec { int index; struct list_head vec[TVN_SIZE]; }; struct timer_vec_root { int index; struct list_head vec[TVR_SIZE]; }; static struct timer_vec tv5; static struct timer_vec tv4; static struct timer_vec tv3; static struct timer_vec tv2; static struct timer_vec_root tv1; static struct timer_vec * const tvecs[] = { (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5 }; #define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0])) void init_timervecs (void) { int i; for (i = 0; i < TVN_SIZE; i++) { INIT_LIST_HEAD(tv5.vec + i); INIT_LIST_HEAD(tv4.vec + i); INIT_LIST_HEAD(tv3.vec + i); INIT_LIST_HEAD(tv2.vec + i); } for (i = 0; i < TVR_SIZE; i++) INIT_LIST_HEAD(tv1.vec + i); } static unsigned long timer_jiffies; static inline void internal_add_timer(struct timer_list *timer) { /* * must be cli-ed when calling this */ unsigned long expires = timer->expires; unsigned long idx = expires - timer_jiffies; struct list_head * vec; if (idx < TVR_SIZE) { int i = expires & TVR_MASK; vec = tv1.vec + i; } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { int i = (expires >> TVR_BITS) & TVN_MASK; vec = tv2.vec + i; } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; vec = tv3.vec + i; } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; vec = tv4.vec + i; } else if ((signed long) idx < 0) { /* can happen if you add a timer with expires == jiffies, * or you set a timer to go off in the past */ vec = tv1.vec + tv1.index; } else if (idx <= 0xffffffffUL) { int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; vec = tv5.vec + i; } else { /* Can only get here on architectures with 64-bit jiffies */ INIT_LIST_HEAD(&timer->list); return; } /* * Timers are FIFO! */ list_add(&timer->list, vec->prev); } /* Initialize both explicitly - let's try to have them in the same cache line */ spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED; #ifdef CONFIG_SMP volatile struct timer_list * volatile running_timer; #define timer_enter(t) do { running_timer = t; mb(); } while (0) #define timer_exit() do { running_timer = NULL; } while (0) #define timer_is_running(t) (running_timer == t) #define timer_synchronize(t) while (timer_is_running(t)) barrier() #else #define timer_enter(t) do { } while (0) #define timer_exit() do { } while (0) #endif void add_timer(struct timer_list *timer) { unsigned long flags; spin_lock_irqsave(&timerlist_lock, flags); if (timer_pending(timer)) goto bug; internal_add_timer(timer); spin_unlock_irqrestore(&timerlist_lock, flags); return; bug: spin_unlock_irqrestore(&timerlist_lock, flags); printk("bug: kernel timer added twice at %p.\n", __builtin_return_address(0)); } static inline int detach_timer (struct timer_list *timer) { if (!timer_pending(timer)) return 0; list_del(&timer->list); return 1; } int mod_timer(struct timer_list *timer, unsigned long expires) { int ret; unsigned long flags; spin_lock_irqsave(&timerlist_lock, flags); timer->expires = expires; ret = detach_timer(timer); internal_add_timer(timer); spin_unlock_irqrestore(&timerlist_lock, flags); return ret; } int del_timer(struct timer_list * timer) { int ret; unsigned long flags; spin_lock_irqsave(&timerlist_lock, flags); ret = detach_timer(timer); timer->list.next = timer->list.prev = NULL; spin_unlock_irqrestore(&timerlist_lock, flags); return ret; } #ifdef CONFIG_SMP void sync_timers(void) { spin_unlock_wait(&global_bh_lock); } /* * SMP specific function to delete periodic timer. * Caller must disable by some means restarting the timer * for new. Upon exit the timer is not queued and handler is not running * on any CPU. It returns number of times, which timer was deleted * (for reference counting). */ int del_timer_sync(struct timer_list * timer) { int ret = 0; for (;;) { unsigned long flags; int running; spin_lock_irqsave(&timerlist_lock, flags); ret += detach_timer(timer); timer->list.next = timer->list.prev = 0; running = timer_is_running(timer); spin_unlock_irqrestore(&timerlist_lock, flags); if (!running) break; timer_synchronize(timer); } return ret; } #endif static inline void cascade_timers(struct timer_vec *tv) { /* cascade all the timers from tv up one level */ struct list_head *head, *curr, *next; head = tv->vec + tv->index; curr = head->next; /* * We are removing _all_ timers from the list, so we don't have to * detach them individually, just clear the list afterwards. */ while (curr != head) { struct timer_list *tmp; tmp = list_entry(curr, struct timer_list, list); next = curr->next; list_del(curr); // not needed internal_add_timer(tmp); curr = next; } INIT_LIST_HEAD(head); tv->index = (tv->index + 1) & TVN_MASK; } static inline void run_timer_list(void) { spin_lock_irq(&timerlist_lock); while ((long)(jiffies - timer_jiffies) >= 0) { struct list_head *head, *curr; if (!tv1.index) { int n = 1; do { cascade_timers(tvecs[n]); } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS); } repeat: head = tv1.vec + tv1.index; curr = head->next; if (curr != head) { struct timer_list *timer; void (*fn)(unsigned long); unsigned long data; timer = list_entry(curr, struct timer_list, list); fn = timer->function; data= timer->data; detach_timer(timer); timer->list.next = timer->list.prev = NULL; timer_enter(timer); spin_unlock_irq(&timerlist_lock); fn(data); spin_lock_irq(&timerlist_lock); timer_exit(); goto repeat; } ++timer_jiffies; tv1.index = (tv1.index + 1) & TVR_MASK; } spin_unlock_irq(&timerlist_lock); } spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED; void tqueue_bh(void) { run_task_queue(&tq_timer); } void immediate_bh(void) { run_task_queue(&tq_immediate); } /* * this routine handles the overflow of the microsecond field * * The tricky bits of code to handle the accurate clock support * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. * They were originally developed for SUN and DEC kernels. * All the kudos should go to Dave for this stuff. * */ static void second_overflow(void) { long ltemp; /* Bump the maxerror field */ time_maxerror += time_tolerance >> SHIFT_USEC; if ( time_maxerror > NTP_PHASE_LIMIT ) { time_maxerror = NTP_PHASE_LIMIT; time_status |= STA_UNSYNC; } /* * Leap second processing. If in leap-insert state at * the end of the day, the system clock is set back one * second; if in leap-delete state, the system clock is * set ahead one second. The microtime() routine or * external clock driver will insure that reported time * is always monotonic. The ugly divides should be * replaced. */ switch (time_state) { case TIME_OK: if (time_status & STA_INS) time_state = TIME_INS; else if (time_status & STA_DEL) time_state = TIME_DEL; break; case TIME_INS: if (xtime.tv_sec % 86400 == 0) { xtime.tv_sec--; time_state = TIME_OOP; printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n"); } break; case TIME_DEL: if ((xtime.tv_sec + 1) % 86400 == 0) { xtime.tv_sec++; time_state = TIME_WAIT; printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n"); } break; case TIME_OOP: time_state = TIME_WAIT; break; case TIME_WAIT: if (!(time_status & (STA_INS | STA_DEL))) time_state = TIME_OK; } /* * Compute the phase adjustment for the next second. In * PLL mode, the offset is reduced by a fixed factor * times the time constant. In FLL mode the offset is * used directly. In either mode, the maximum phase * adjustment for each second is clamped so as to spread * the adjustment over not more than the number of * seconds between updates. */ if (time_offset < 0) { ltemp = -time_offset; if (!(time_status & STA_FLL)) ltemp >>= SHIFT_KG + time_constant; if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; time_offset += ltemp; time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); } else { ltemp = time_offset; if (!(time_status & STA_FLL)) ltemp >>= SHIFT_KG + time_constant; if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; time_offset -= ltemp; time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); } if (ltemp < 0) time_adj -= -ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); else time_adj += ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); #if HZ == 100 /* Compensate for (HZ==100) != (1 << SHIFT_HZ). * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14) */ if (time_adj < 0) time_adj -= (-time_adj >> 2) + (-time_adj >> 5); else time_adj += (time_adj >> 2) + (time_adj >> 5); #endif } /* in the NTP reference this is called "hardclock()" */ static void update_wall_time_one_tick(void) { if ( (time_adjust_step = time_adjust) != 0 ) { /* We are doing an adjtime thing. * * Prepare time_adjust_step to be within bounds. * Note that a positive time_adjust means we want the clock * to run faster. * * Limit the amount of the step to be in the range * -tickadj .. +tickadj */ if (time_adjust > tickadj) time_adjust_step = tickadj; else if (time_adjust < -tickadj) time_adjust_step = -tickadj; /* Reduce by this step the amount of time left */ time_adjust -= time_adjust_step; } xtime.tv_usec += tick + time_adjust_step; /* * Advance the phase, once it gets to one microsecond, then * advance the tick more. */ time_phase += time_adj; if (time_phase <= -FINEUSEC) { long ltemp = -time_phase >> SHIFT_SCALE; time_phase += ltemp << SHIFT_SCALE; xtime.tv_usec -= ltemp; } else if (time_phase >= FINEUSEC) { long ltemp = time_phase >> SHIFT_SCALE; time_phase -= ltemp << SHIFT_SCALE; xtime.tv_usec += ltemp; } } /* * Using a loop looks inefficient, but "ticks" is * usually just one (we shouldn't be losing ticks, * we're doing this this way mainly for interrupt * latency reasons, not because we think we'll * have lots of lost timer ticks */ static void update_wall_time(unsigned long ticks) { do { ticks--; update_wall_time_one_tick(); } while (ticks); if (xtime.tv_usec >= 1000000) { xtime.tv_usec -= 1000000; xtime.tv_sec++; second_overflow(); } } static inline void do_process_times(struct task_struct *p, unsigned long user, unsigned long system) { //unsigned long psecs; // psecs = (p->times.tms_utime += user); //psecs += (p->times.tms_stime += system); } void update_one_process(struct task_struct *p, unsigned long user, unsigned long system, int cpu) { // p->per_cpu_utime[cpu] += user; // p->per_cpu_stime[cpu] += system; do_process_times(p, user, system); } /* * Called from the timer interrupt handler to charge one tick to the current * process. user_tick is 1 if the tick is user time, 0 for system. */ void update_process_times(int user_tick) { struct task_struct *p = current; int cpu = smp_processor_id(), system = user_tick ^ 1; update_one_process(p, user_tick, system, cpu); if ( --p->counter <= 0 ) { p->counter = 0; set_bit(_HYP_EVENT_NEED_RESCHED, &p->hyp_events); } } /* jiffies at the most recent update of wall time */ unsigned long wall_jiffies; /* * This spinlock protect us from races in SMP while playing with xtime. -arca */ rwlock_t xtime_lock = RW_LOCK_UNLOCKED; static inline void update_times(void) { unsigned long ticks; /* * update_times() is run from the raw timer_bh handler so we * just know that the irqs are locally enabled and so we don't * need to save/restore the flags of the local CPU here. -arca */ write_lock_irq(&xtime_lock); ticks = jiffies - wall_jiffies; if (ticks) { wall_jiffies += ticks; update_wall_time(ticks); } write_unlock_irq(&xtime_lock); } void timer_bh(void) { update_times(); run_timer_list(); } #include #include #include #include #include #include #include void do_timer(struct pt_regs *regs) { (*(unsigned long *)&jiffies)++; if ( !using_apic_timer ) update_process_times(user_mode(regs)); mark_bh(TIMER_BH); if (TQ_ACTIVE(tq_timer)) mark_bh(TQUEUE_BH); } void get_fast_time(struct timeval * tm) { *tm=xtime; }