direct-io.hg

changeset 13576:8331aca2f29c

[linux] Disable GENERIC_TIME until we have a xen clocksource.

Signed-off-by: Christian Limpach <Christian.Limpach@xensource.com>
author Christian Limpach <Christian.Limpach@xensource.com>
date Wed Jan 24 12:02:56 2007 +0000 (2007-01-24)
parents c3b2443408f4
children 4f5772324e67
files linux-2.6-xen-sparse/arch/i386/Kconfig linux-2.6-xen-sparse/kernel/timer.c
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/i386/Kconfig	Wed Jan 24 11:04:22 2007 +0000
     1.2 +++ b/linux-2.6-xen-sparse/arch/i386/Kconfig	Wed Jan 24 12:02:56 2007 +0000
     1.3 @@ -16,6 +16,7 @@ config X86_32
     1.4  
     1.5  config GENERIC_TIME
     1.6  	bool
     1.7 +	depends on !X86_XEN
     1.8  	default y
     1.9  
    1.10  config LOCKDEP_SUPPORT
     2.1 --- a/linux-2.6-xen-sparse/kernel/timer.c	Wed Jan 24 11:04:22 2007 +0000
     2.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.3 @@ -1,1914 +0,0 @@
     2.4 -/*
     2.5 - *  linux/kernel/timer.c
     2.6 - *
     2.7 - *  Kernel internal timers, kernel timekeeping, basic process system calls
     2.8 - *
     2.9 - *  Copyright (C) 1991, 1992  Linus Torvalds
    2.10 - *
    2.11 - *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
    2.12 - *
    2.13 - *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
    2.14 - *              "A Kernel Model for Precision Timekeeping" by Dave Mills
    2.15 - *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
    2.16 - *              serialize accesses to xtime/lost_ticks).
    2.17 - *                              Copyright (C) 1998  Andrea Arcangeli
    2.18 - *  1999-03-10  Improved NTP compatibility by Ulrich Windl
    2.19 - *  2002-05-31	Move sys_sysinfo here and make its locking sane, Robert Love
    2.20 - *  2000-10-05  Implemented scalable SMP per-CPU timer handling.
    2.21 - *                              Copyright (C) 2000, 2001, 2002  Ingo Molnar
    2.22 - *              Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
    2.23 - */
    2.24 -
    2.25 -#include <linux/kernel_stat.h>
    2.26 -#include <linux/module.h>
    2.27 -#include <linux/interrupt.h>
    2.28 -#include <linux/percpu.h>
    2.29 -#include <linux/init.h>
    2.30 -#include <linux/mm.h>
    2.31 -#include <linux/swap.h>
    2.32 -#include <linux/notifier.h>
    2.33 -#include <linux/thread_info.h>
    2.34 -#include <linux/time.h>
    2.35 -#include <linux/jiffies.h>
    2.36 -#include <linux/posix-timers.h>
    2.37 -#include <linux/cpu.h>
    2.38 -#include <linux/syscalls.h>
    2.39 -#include <linux/delay.h>
    2.40 -
    2.41 -#include <asm/uaccess.h>
    2.42 -#include <asm/unistd.h>
    2.43 -#include <asm/div64.h>
    2.44 -#include <asm/timex.h>
    2.45 -#include <asm/io.h>
    2.46 -
    2.47 -#ifdef CONFIG_TIME_INTERPOLATION
    2.48 -static void time_interpolator_update(long delta_nsec);
    2.49 -#else
    2.50 -#define time_interpolator_update(x)
    2.51 -#endif
    2.52 -
    2.53 -u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
    2.54 -
    2.55 -EXPORT_SYMBOL(jiffies_64);
    2.56 -
    2.57 -/*
    2.58 - * per-CPU timer vector definitions:
    2.59 - */
    2.60 -#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)
    2.61 -#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)
    2.62 -#define TVN_SIZE (1 << TVN_BITS)
    2.63 -#define TVR_SIZE (1 << TVR_BITS)
    2.64 -#define TVN_MASK (TVN_SIZE - 1)
    2.65 -#define TVR_MASK (TVR_SIZE - 1)
    2.66 -
    2.67 -typedef struct tvec_s {
    2.68 -	struct list_head vec[TVN_SIZE];
    2.69 -} tvec_t;
    2.70 -
    2.71 -typedef struct tvec_root_s {
    2.72 -	struct list_head vec[TVR_SIZE];
    2.73 -} tvec_root_t;
    2.74 -
    2.75 -struct tvec_t_base_s {
    2.76 -	spinlock_t lock;
    2.77 -	struct timer_list *running_timer;
    2.78 -	unsigned long timer_jiffies;
    2.79 -	tvec_root_t tv1;
    2.80 -	tvec_t tv2;
    2.81 -	tvec_t tv3;
    2.82 -	tvec_t tv4;
    2.83 -	tvec_t tv5;
    2.84 -} ____cacheline_aligned_in_smp;
    2.85 -
    2.86 -typedef struct tvec_t_base_s tvec_base_t;
    2.87 -
    2.88 -tvec_base_t boot_tvec_bases;
    2.89 -EXPORT_SYMBOL(boot_tvec_bases);
    2.90 -static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
    2.91 -
    2.92 -static inline void set_running_timer(tvec_base_t *base,
    2.93 -					struct timer_list *timer)
    2.94 -{
    2.95 -#ifdef CONFIG_SMP
    2.96 -	base->running_timer = timer;
    2.97 -#endif
    2.98 -}
    2.99 -
   2.100 -static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
   2.101 -{
   2.102 -	unsigned long expires = timer->expires;
   2.103 -	unsigned long idx = expires - base->timer_jiffies;
   2.104 -	struct list_head *vec;
   2.105 -
   2.106 -	if (idx < TVR_SIZE) {
   2.107 -		int i = expires & TVR_MASK;
   2.108 -		vec = base->tv1.vec + i;
   2.109 -	} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
   2.110 -		int i = (expires >> TVR_BITS) & TVN_MASK;
   2.111 -		vec = base->tv2.vec + i;
   2.112 -	} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
   2.113 -		int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
   2.114 -		vec = base->tv3.vec + i;
   2.115 -	} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
   2.116 -		int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
   2.117 -		vec = base->tv4.vec + i;
   2.118 -	} else if ((signed long) idx < 0) {
   2.119 -		/*
   2.120 -		 * Can happen if you add a timer with expires == jiffies,
   2.121 -		 * or you set a timer to go off in the past
   2.122 -		 */
   2.123 -		vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
   2.124 -	} else {
   2.125 -		int i;
   2.126 -		/* If the timeout is larger than 0xffffffff on 64-bit
   2.127 -		 * architectures then we use the maximum timeout:
   2.128 -		 */
   2.129 -		if (idx > 0xffffffffUL) {
   2.130 -			idx = 0xffffffffUL;
   2.131 -			expires = idx + base->timer_jiffies;
   2.132 -		}
   2.133 -		i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
   2.134 -		vec = base->tv5.vec + i;
   2.135 -	}
   2.136 -	/*
   2.137 -	 * Timers are FIFO:
   2.138 -	 */
   2.139 -	list_add_tail(&timer->entry, vec);
   2.140 -}
   2.141 -
   2.142 -/***
   2.143 - * init_timer - initialize a timer.
   2.144 - * @timer: the timer to be initialized
   2.145 - *
   2.146 - * init_timer() must be done to a timer prior calling *any* of the
   2.147 - * other timer functions.
   2.148 - */
   2.149 -void fastcall init_timer(struct timer_list *timer)
   2.150 -{
   2.151 -	timer->entry.next = NULL;
   2.152 -	timer->base = __raw_get_cpu_var(tvec_bases);
   2.153 -}
   2.154 -EXPORT_SYMBOL(init_timer);
   2.155 -
   2.156 -static inline void detach_timer(struct timer_list *timer,
   2.157 -					int clear_pending)
   2.158 -{
   2.159 -	struct list_head *entry = &timer->entry;
   2.160 -
   2.161 -	__list_del(entry->prev, entry->next);
   2.162 -	if (clear_pending)
   2.163 -		entry->next = NULL;
   2.164 -	entry->prev = LIST_POISON2;
   2.165 -}
   2.166 -
   2.167 -/*
   2.168 - * We are using hashed locking: holding per_cpu(tvec_bases).lock
   2.169 - * means that all timers which are tied to this base via timer->base are
   2.170 - * locked, and the base itself is locked too.
   2.171 - *
   2.172 - * So __run_timers/migrate_timers can safely modify all timers which could
   2.173 - * be found on ->tvX lists.
   2.174 - *
   2.175 - * When the timer's base is locked, and the timer removed from list, it is
   2.176 - * possible to set timer->base = NULL and drop the lock: the timer remains
   2.177 - * locked.
   2.178 - */
   2.179 -static tvec_base_t *lock_timer_base(struct timer_list *timer,
   2.180 -					unsigned long *flags)
   2.181 -{
   2.182 -	tvec_base_t *base;
   2.183 -
   2.184 -	for (;;) {
   2.185 -		base = timer->base;
   2.186 -		if (likely(base != NULL)) {
   2.187 -			spin_lock_irqsave(&base->lock, *flags);
   2.188 -			if (likely(base == timer->base))
   2.189 -				return base;
   2.190 -			/* The timer has migrated to another CPU */
   2.191 -			spin_unlock_irqrestore(&base->lock, *flags);
   2.192 -		}
   2.193 -		cpu_relax();
   2.194 -	}
   2.195 -}
   2.196 -
   2.197 -int __mod_timer(struct timer_list *timer, unsigned long expires)
   2.198 -{
   2.199 -	tvec_base_t *base, *new_base;
   2.200 -	unsigned long flags;
   2.201 -	int ret = 0;
   2.202 -
   2.203 -	BUG_ON(!timer->function);
   2.204 -
   2.205 -	base = lock_timer_base(timer, &flags);
   2.206 -
   2.207 -	if (timer_pending(timer)) {
   2.208 -		detach_timer(timer, 0);
   2.209 -		ret = 1;
   2.210 -	}
   2.211 -
   2.212 -	new_base = __get_cpu_var(tvec_bases);
   2.213 -
   2.214 -	if (base != new_base) {
   2.215 -		/*
   2.216 -		 * We are trying to schedule the timer on the local CPU.
   2.217 -		 * However we can't change timer's base while it is running,
   2.218 -		 * otherwise del_timer_sync() can't detect that the timer's
   2.219 -		 * handler yet has not finished. This also guarantees that
   2.220 -		 * the timer is serialized wrt itself.
   2.221 -		 */
   2.222 -		if (likely(base->running_timer != timer)) {
   2.223 -			/* See the comment in lock_timer_base() */
   2.224 -			timer->base = NULL;
   2.225 -			spin_unlock(&base->lock);
   2.226 -			base = new_base;
   2.227 -			spin_lock(&base->lock);
   2.228 -			timer->base = base;
   2.229 -		}
   2.230 -	}
   2.231 -
   2.232 -	timer->expires = expires;
   2.233 -	internal_add_timer(base, timer);
   2.234 -	spin_unlock_irqrestore(&base->lock, flags);
   2.235 -
   2.236 -	return ret;
   2.237 -}
   2.238 -
   2.239 -EXPORT_SYMBOL(__mod_timer);
   2.240 -
   2.241 -/***
   2.242 - * add_timer_on - start a timer on a particular CPU
   2.243 - * @timer: the timer to be added
   2.244 - * @cpu: the CPU to start it on
   2.245 - *
   2.246 - * This is not very scalable on SMP. Double adds are not possible.
   2.247 - */
   2.248 -void add_timer_on(struct timer_list *timer, int cpu)
   2.249 -{
   2.250 -	tvec_base_t *base = per_cpu(tvec_bases, cpu);
   2.251 -  	unsigned long flags;
   2.252 -
   2.253 -  	BUG_ON(timer_pending(timer) || !timer->function);
   2.254 -	spin_lock_irqsave(&base->lock, flags);
   2.255 -	timer->base = base;
   2.256 -	internal_add_timer(base, timer);
   2.257 -	spin_unlock_irqrestore(&base->lock, flags);
   2.258 -}
   2.259 -
   2.260 -
   2.261 -/***
   2.262 - * mod_timer - modify a timer's timeout
   2.263 - * @timer: the timer to be modified
   2.264 - *
   2.265 - * mod_timer is a more efficient way to update the expire field of an
   2.266 - * active timer (if the timer is inactive it will be activated)
   2.267 - *
   2.268 - * mod_timer(timer, expires) is equivalent to:
   2.269 - *
   2.270 - *     del_timer(timer); timer->expires = expires; add_timer(timer);
   2.271 - *
   2.272 - * Note that if there are multiple unserialized concurrent users of the
   2.273 - * same timer, then mod_timer() is the only safe way to modify the timeout,
   2.274 - * since add_timer() cannot modify an already running timer.
   2.275 - *
   2.276 - * The function returns whether it has modified a pending timer or not.
   2.277 - * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
   2.278 - * active timer returns 1.)
   2.279 - */
   2.280 -int mod_timer(struct timer_list *timer, unsigned long expires)
   2.281 -{
   2.282 -	BUG_ON(!timer->function);
   2.283 -
   2.284 -	/*
   2.285 -	 * This is a common optimization triggered by the
   2.286 -	 * networking code - if the timer is re-modified
   2.287 -	 * to be the same thing then just return:
   2.288 -	 */
   2.289 -	if (timer->expires == expires && timer_pending(timer))
   2.290 -		return 1;
   2.291 -
   2.292 -	return __mod_timer(timer, expires);
   2.293 -}
   2.294 -
   2.295 -EXPORT_SYMBOL(mod_timer);
   2.296 -
   2.297 -/***
   2.298 - * del_timer - deactive a timer.
   2.299 - * @timer: the timer to be deactivated
   2.300 - *
   2.301 - * del_timer() deactivates a timer - this works on both active and inactive
   2.302 - * timers.
   2.303 - *
   2.304 - * The function returns whether it has deactivated a pending timer or not.
   2.305 - * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
   2.306 - * active timer returns 1.)
   2.307 - */
   2.308 -int del_timer(struct timer_list *timer)
   2.309 -{
   2.310 -	tvec_base_t *base;
   2.311 -	unsigned long flags;
   2.312 -	int ret = 0;
   2.313 -
   2.314 -	if (timer_pending(timer)) {
   2.315 -		base = lock_timer_base(timer, &flags);
   2.316 -		if (timer_pending(timer)) {
   2.317 -			detach_timer(timer, 1);
   2.318 -			ret = 1;
   2.319 -		}
   2.320 -		spin_unlock_irqrestore(&base->lock, flags);
   2.321 -	}
   2.322 -
   2.323 -	return ret;
   2.324 -}
   2.325 -
   2.326 -EXPORT_SYMBOL(del_timer);
   2.327 -
   2.328 -#ifdef CONFIG_SMP
   2.329 -/*
   2.330 - * This function tries to deactivate a timer. Upon successful (ret >= 0)
   2.331 - * exit the timer is not queued and the handler is not running on any CPU.
   2.332 - *
   2.333 - * It must not be called from interrupt contexts.
   2.334 - */
   2.335 -int try_to_del_timer_sync(struct timer_list *timer)
   2.336 -{
   2.337 -	tvec_base_t *base;
   2.338 -	unsigned long flags;
   2.339 -	int ret = -1;
   2.340 -
   2.341 -	base = lock_timer_base(timer, &flags);
   2.342 -
   2.343 -	if (base->running_timer == timer)
   2.344 -		goto out;
   2.345 -
   2.346 -	ret = 0;
   2.347 -	if (timer_pending(timer)) {
   2.348 -		detach_timer(timer, 1);
   2.349 -		ret = 1;
   2.350 -	}
   2.351 -out:
   2.352 -	spin_unlock_irqrestore(&base->lock, flags);
   2.353 -
   2.354 -	return ret;
   2.355 -}
   2.356 -
   2.357 -/***
   2.358 - * del_timer_sync - deactivate a timer and wait for the handler to finish.
   2.359 - * @timer: the timer to be deactivated
   2.360 - *
   2.361 - * This function only differs from del_timer() on SMP: besides deactivating
   2.362 - * the timer it also makes sure the handler has finished executing on other
   2.363 - * CPUs.
   2.364 - *
   2.365 - * Synchronization rules: callers must prevent restarting of the timer,
   2.366 - * otherwise this function is meaningless. It must not be called from
   2.367 - * interrupt contexts. The caller must not hold locks which would prevent
   2.368 - * completion of the timer's handler. The timer's handler must not call
   2.369 - * add_timer_on(). Upon exit the timer is not queued and the handler is
   2.370 - * not running on any CPU.
   2.371 - *
   2.372 - * The function returns whether it has deactivated a pending timer or not.
   2.373 - */
   2.374 -int del_timer_sync(struct timer_list *timer)
   2.375 -{
   2.376 -	for (;;) {
   2.377 -		int ret = try_to_del_timer_sync(timer);
   2.378 -		if (ret >= 0)
   2.379 -			return ret;
   2.380 -		cpu_relax();
   2.381 -	}
   2.382 -}
   2.383 -
   2.384 -EXPORT_SYMBOL(del_timer_sync);
   2.385 -#endif
   2.386 -
   2.387 -static int cascade(tvec_base_t *base, tvec_t *tv, int index)
   2.388 -{
   2.389 -	/* cascade all the timers from tv up one level */
   2.390 -	struct timer_list *timer, *tmp;
   2.391 -	struct list_head tv_list;
   2.392 -
   2.393 -	list_replace_init(tv->vec + index, &tv_list);
   2.394 -
   2.395 -	/*
   2.396 -	 * We are removing _all_ timers from the list, so we
   2.397 -	 * don't have to detach them individually.
   2.398 -	 */
   2.399 -	list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
   2.400 -		BUG_ON(timer->base != base);
   2.401 -		internal_add_timer(base, timer);
   2.402 -	}
   2.403 -
   2.404 -	return index;
   2.405 -}
   2.406 -
   2.407 -/***
   2.408 - * __run_timers - run all expired timers (if any) on this CPU.
   2.409 - * @base: the timer vector to be processed.
   2.410 - *
   2.411 - * This function cascades all vectors and executes all expired timer
   2.412 - * vectors.
   2.413 - */
   2.414 -#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
   2.415 -
   2.416 -static inline void __run_timers(tvec_base_t *base)
   2.417 -{
   2.418 -	struct timer_list *timer;
   2.419 -
   2.420 -	spin_lock_irq(&base->lock);
   2.421 -	while (time_after_eq(jiffies, base->timer_jiffies)) {
   2.422 -		struct list_head work_list;
   2.423 -		struct list_head *head = &work_list;
   2.424 - 		int index = base->timer_jiffies & TVR_MASK;
   2.425 -
   2.426 -		/*
   2.427 -		 * Cascade timers:
   2.428 -		 */
   2.429 -		if (!index &&
   2.430 -			(!cascade(base, &base->tv2, INDEX(0))) &&
   2.431 -				(!cascade(base, &base->tv3, INDEX(1))) &&
   2.432 -					!cascade(base, &base->tv4, INDEX(2)))
   2.433 -			cascade(base, &base->tv5, INDEX(3));
   2.434 -		++base->timer_jiffies;
   2.435 -		list_replace_init(base->tv1.vec + index, &work_list);
   2.436 -		while (!list_empty(head)) {
   2.437 -			void (*fn)(unsigned long);
   2.438 -			unsigned long data;
   2.439 -
   2.440 -			timer = list_entry(head->next,struct timer_list,entry);
   2.441 - 			fn = timer->function;
   2.442 - 			data = timer->data;
   2.443 -
   2.444 -			set_running_timer(base, timer);
   2.445 -			detach_timer(timer, 1);
   2.446 -			spin_unlock_irq(&base->lock);
   2.447 -			{
   2.448 -				int preempt_count = preempt_count();
   2.449 -				fn(data);
   2.450 -				if (preempt_count != preempt_count()) {
   2.451 -					printk(KERN_WARNING "huh, entered %p "
   2.452 -					       "with preempt_count %08x, exited"
   2.453 -					       " with %08x?\n",
   2.454 -					       fn, preempt_count,
   2.455 -					       preempt_count());
   2.456 -					BUG();
   2.457 -				}
   2.458 -			}
   2.459 -			spin_lock_irq(&base->lock);
   2.460 -		}
   2.461 -	}
   2.462 -	set_running_timer(base, NULL);
   2.463 -	spin_unlock_irq(&base->lock);
   2.464 -}
   2.465 -
   2.466 -#ifdef CONFIG_NO_IDLE_HZ
   2.467 -/*
   2.468 - * Find out when the next timer event is due to happen. This
   2.469 - * is used on S/390 to stop all activity when a cpus is idle.
   2.470 - * This functions needs to be called disabled.
   2.471 - */
   2.472 -unsigned long next_timer_interrupt(void)
   2.473 -{
   2.474 -	tvec_base_t *base;
   2.475 -	struct list_head *list;
   2.476 -	struct timer_list *nte;
   2.477 -	unsigned long expires;
   2.478 -	unsigned long hr_expires = MAX_JIFFY_OFFSET;
   2.479 -	ktime_t hr_delta;
   2.480 -	tvec_t *varray[4];
   2.481 -	int i, j;
   2.482 -
   2.483 -	hr_delta = hrtimer_get_next_event();
   2.484 -	if (hr_delta.tv64 != KTIME_MAX) {
   2.485 -		struct timespec tsdelta;
   2.486 -		tsdelta = ktime_to_timespec(hr_delta);
   2.487 -		hr_expires = timespec_to_jiffies(&tsdelta);
   2.488 -		if (hr_expires < 3)
   2.489 -			return hr_expires + jiffies;
   2.490 -	}
   2.491 -	hr_expires += jiffies;
   2.492 -
   2.493 -	base = __get_cpu_var(tvec_bases);
   2.494 -	spin_lock(&base->lock);
   2.495 -	expires = base->timer_jiffies + (LONG_MAX >> 1);
   2.496 -	list = NULL;
   2.497 -
   2.498 -	/* Look for timer events in tv1. */
   2.499 -	j = base->timer_jiffies & TVR_MASK;
   2.500 -	do {
   2.501 -		list_for_each_entry(nte, base->tv1.vec + j, entry) {
   2.502 -			expires = nte->expires;
   2.503 -			if (j < (base->timer_jiffies & TVR_MASK))
   2.504 -				list = base->tv2.vec + (INDEX(0));
   2.505 -			goto found;
   2.506 -		}
   2.507 -		j = (j + 1) & TVR_MASK;
   2.508 -	} while (j != (base->timer_jiffies & TVR_MASK));
   2.509 -
   2.510 -	/* Check tv2-tv5. */
   2.511 -	varray[0] = &base->tv2;
   2.512 -	varray[1] = &base->tv3;
   2.513 -	varray[2] = &base->tv4;
   2.514 -	varray[3] = &base->tv5;
   2.515 -	for (i = 0; i < 4; i++) {
   2.516 -		j = INDEX(i);
   2.517 -		do {
   2.518 -			if (list_empty(varray[i]->vec + j)) {
   2.519 -				j = (j + 1) & TVN_MASK;
   2.520 -				continue;
   2.521 -			}
   2.522 -			list_for_each_entry(nte, varray[i]->vec + j, entry)
   2.523 -				if (time_before(nte->expires, expires))
   2.524 -					expires = nte->expires;
   2.525 -			if (j < (INDEX(i)) && i < 3)
   2.526 -				list = varray[i + 1]->vec + (INDEX(i + 1));
   2.527 -			goto found;
   2.528 -		} while (j != (INDEX(i)));
   2.529 -	}
   2.530 -found:
   2.531 -	if (list) {
   2.532 -		/*
   2.533 -		 * The search wrapped. We need to look at the next list
   2.534 -		 * from next tv element that would cascade into tv element
   2.535 -		 * where we found the timer element.
   2.536 -		 */
   2.537 -		list_for_each_entry(nte, list, entry) {
   2.538 -			if (time_before(nte->expires, expires))
   2.539 -				expires = nte->expires;
   2.540 -		}
   2.541 -	}
   2.542 -	spin_unlock(&base->lock);
   2.543 -
   2.544 -	/*
   2.545 -	 * It can happen that other CPUs service timer IRQs and increment
   2.546 -	 * jiffies, but we have not yet got a local timer tick to process
   2.547 -	 * the timer wheels.  In that case, the expiry time can be before
   2.548 -	 * jiffies, but since the high-resolution timer here is relative to
   2.549 -	 * jiffies, the default expression when high-resolution timers are
   2.550 -	 * not active,
   2.551 -	 *
   2.552 -	 *   time_before(MAX_JIFFY_OFFSET + jiffies, expires)
   2.553 -	 *
   2.554 -	 * would falsely evaluate to true.  If that is the case, just
   2.555 -	 * return jiffies so that we can immediately fire the local timer
   2.556 -	 */
   2.557 -	if (time_before(expires, jiffies))
   2.558 -		return jiffies;
   2.559 -
   2.560 -	if (time_before(hr_expires, expires))
   2.561 -		return hr_expires;
   2.562 -
   2.563 -	return expires;
   2.564 -}
   2.565 -#endif
   2.566 -
   2.567 -/******************************************************************/
   2.568 -
   2.569 -/*
   2.570 - * Timekeeping variables
   2.571 - */
   2.572 -unsigned long tick_usec = TICK_USEC; 		/* USER_HZ period (usec) */
   2.573 -unsigned long tick_nsec = TICK_NSEC;		/* ACTHZ period (nsec) */
   2.574 -
   2.575 -/* 
   2.576 - * The current time 
   2.577 - * wall_to_monotonic is what we need to add to xtime (or xtime corrected 
   2.578 - * for sub jiffie times) to get to monotonic time.  Monotonic is pegged
   2.579 - * at zero at system boot time, so wall_to_monotonic will be negative,
   2.580 - * however, we will ALWAYS keep the tv_nsec part positive so we can use
   2.581 - * the usual normalization.
   2.582 - */
   2.583 -struct timespec xtime __attribute__ ((aligned (16)));
   2.584 -struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
   2.585 -
   2.586 -EXPORT_SYMBOL(xtime);
   2.587 -
   2.588 -/* Don't completely fail for HZ > 500.  */
   2.589 -int tickadj = 500/HZ ? : 1;		/* microsecs */
   2.590 -
   2.591 -
   2.592 -/*
   2.593 - * phase-lock loop variables
   2.594 - */
   2.595 -/* TIME_ERROR prevents overwriting the CMOS clock */
   2.596 -int time_state = TIME_OK;		/* clock synchronization status	*/
   2.597 -int time_status = STA_UNSYNC;		/* clock status bits		*/
   2.598 -long time_offset;			/* time adjustment (us)		*/
   2.599 -long time_constant = 2;			/* pll time constant		*/
   2.600 -long time_tolerance = MAXFREQ;		/* frequency tolerance (ppm)	*/
   2.601 -long time_precision = 1;		/* clock precision (us)		*/
   2.602 -long time_maxerror = NTP_PHASE_LIMIT;	/* maximum error (us)		*/
   2.603 -long time_esterror = NTP_PHASE_LIMIT;	/* estimated error (us)		*/
   2.604 -long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC;
   2.605 -					/* frequency offset (scaled ppm)*/
   2.606 -static long time_adj;			/* tick adjust (scaled 1 / HZ)	*/
   2.607 -long time_reftime;			/* time at last adjustment (s)	*/
   2.608 -long time_adjust;
   2.609 -long time_next_adjust;
   2.610 -
   2.611 -/*
   2.612 - * this routine handles the overflow of the microsecond field
   2.613 - *
   2.614 - * The tricky bits of code to handle the accurate clock support
   2.615 - * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
   2.616 - * They were originally developed for SUN and DEC kernels.
   2.617 - * All the kudos should go to Dave for this stuff.
   2.618 - *
   2.619 - */
   2.620 -static void second_overflow(void)
   2.621 -{
   2.622 -	long ltemp;
   2.623 -
   2.624 -	/* Bump the maxerror field */
   2.625 -	time_maxerror += time_tolerance >> SHIFT_USEC;
   2.626 -	if (time_maxerror > NTP_PHASE_LIMIT) {
   2.627 -		time_maxerror = NTP_PHASE_LIMIT;
   2.628 -		time_status |= STA_UNSYNC;
   2.629 -	}
   2.630 -
   2.631 -	/*
   2.632 -	 * Leap second processing. If in leap-insert state at the end of the
   2.633 -	 * day, the system clock is set back one second; if in leap-delete
   2.634 -	 * state, the system clock is set ahead one second. The microtime()
   2.635 -	 * routine or external clock driver will insure that reported time is
   2.636 -	 * always monotonic. The ugly divides should be replaced.
   2.637 -	 */
   2.638 -	switch (time_state) {
   2.639 -	case TIME_OK:
   2.640 -		if (time_status & STA_INS)
   2.641 -			time_state = TIME_INS;
   2.642 -		else if (time_status & STA_DEL)
   2.643 -			time_state = TIME_DEL;
   2.644 -		break;
   2.645 -	case TIME_INS:
   2.646 -		if (xtime.tv_sec % 86400 == 0) {
   2.647 -			xtime.tv_sec--;
   2.648 -			wall_to_monotonic.tv_sec++;
   2.649 -			/*
   2.650 -			 * The timer interpolator will make time change
   2.651 -			 * gradually instead of an immediate jump by one second
   2.652 -			 */
   2.653 -			time_interpolator_update(-NSEC_PER_SEC);
   2.654 -			time_state = TIME_OOP;
   2.655 -			clock_was_set();
   2.656 -			printk(KERN_NOTICE "Clock: inserting leap second "
   2.657 -					"23:59:60 UTC\n");
   2.658 -		}
   2.659 -		break;
   2.660 -	case TIME_DEL:
   2.661 -		if ((xtime.tv_sec + 1) % 86400 == 0) {
   2.662 -			xtime.tv_sec++;
   2.663 -			wall_to_monotonic.tv_sec--;
   2.664 -			/*
   2.665 -			 * Use of time interpolator for a gradual change of
   2.666 -			 * time
   2.667 -			 */
   2.668 -			time_interpolator_update(NSEC_PER_SEC);
   2.669 -			time_state = TIME_WAIT;
   2.670 -			clock_was_set();
   2.671 -			printk(KERN_NOTICE "Clock: deleting leap second "
   2.672 -					"23:59:59 UTC\n");
   2.673 -		}
   2.674 -		break;
   2.675 -	case TIME_OOP:
   2.676 -		time_state = TIME_WAIT;
   2.677 -		break;
   2.678 -	case TIME_WAIT:
   2.679 -		if (!(time_status & (STA_INS | STA_DEL)))
   2.680 -		time_state = TIME_OK;
   2.681 -	}
   2.682 -
   2.683 -	/*
   2.684 -	 * Compute the phase adjustment for the next second. In PLL mode, the
   2.685 -	 * offset is reduced by a fixed factor times the time constant. In FLL
   2.686 -	 * mode the offset is used directly. In either mode, the maximum phase
   2.687 -	 * adjustment for each second is clamped so as to spread the adjustment
   2.688 -	 * over not more than the number of seconds between updates.
   2.689 -	 */
   2.690 -	ltemp = time_offset;
   2.691 -	if (!(time_status & STA_FLL))
   2.692 -		ltemp = shift_right(ltemp, SHIFT_KG + time_constant);
   2.693 -	ltemp = min(ltemp, (MAXPHASE / MINSEC) << SHIFT_UPDATE);
   2.694 -	ltemp = max(ltemp, -(MAXPHASE / MINSEC) << SHIFT_UPDATE);
   2.695 -	time_offset -= ltemp;
   2.696 -	time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
   2.697 -
   2.698 -	/*
   2.699 -	 * Compute the frequency estimate and additional phase adjustment due
   2.700 -	 * to frequency error for the next second.
   2.701 -	 */
   2.702 -	ltemp = time_freq;
   2.703 -	time_adj += shift_right(ltemp,(SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE));
   2.704 -
   2.705 -#if HZ == 100
   2.706 -	/*
   2.707 -	 * Compensate for (HZ==100) != (1 << SHIFT_HZ).  Add 25% and 3.125% to
   2.708 -	 * get 128.125; => only 0.125% error (p. 14)
   2.709 -	 */
   2.710 -	time_adj += shift_right(time_adj, 2) + shift_right(time_adj, 5);
   2.711 -#endif
   2.712 -#if HZ == 250
   2.713 -	/*
   2.714 -	 * Compensate for (HZ==250) != (1 << SHIFT_HZ).  Add 1.5625% and
   2.715 -	 * 0.78125% to get 255.85938; => only 0.05% error (p. 14)
   2.716 -	 */
   2.717 -	time_adj += shift_right(time_adj, 6) + shift_right(time_adj, 7);
   2.718 -#endif
   2.719 -#if HZ == 1000
   2.720 -	/*
   2.721 -	 * Compensate for (HZ==1000) != (1 << SHIFT_HZ).  Add 1.5625% and
   2.722 -	 * 0.78125% to get 1023.4375; => only 0.05% error (p. 14)
   2.723 -	 */
   2.724 -	time_adj += shift_right(time_adj, 6) + shift_right(time_adj, 7);
   2.725 -#endif
   2.726 -}
   2.727 -
   2.728 -/*
   2.729 - * Returns how many microseconds we need to add to xtime this tick
   2.730 - * in doing an adjustment requested with adjtime.
   2.731 - */
   2.732 -static long adjtime_adjustment(void)
   2.733 -{
   2.734 -	long time_adjust_step;
   2.735 -
   2.736 -	time_adjust_step = time_adjust;
   2.737 -	if (time_adjust_step) {
   2.738 -		/*
   2.739 -		 * We are doing an adjtime thing.  Prepare time_adjust_step to
   2.740 -		 * be within bounds.  Note that a positive time_adjust means we
   2.741 -		 * want the clock to run faster.
   2.742 -		 *
   2.743 -		 * Limit the amount of the step to be in the range
   2.744 -		 * -tickadj .. +tickadj
   2.745 -		 */
   2.746 -		time_adjust_step = min(time_adjust_step, (long)tickadj);
   2.747 -		time_adjust_step = max(time_adjust_step, (long)-tickadj);
   2.748 -	}
   2.749 -	return time_adjust_step;
   2.750 -}
   2.751 -
   2.752 -/* in the NTP reference this is called "hardclock()" */
   2.753 -static void update_ntp_one_tick(void)
   2.754 -{
   2.755 -	long time_adjust_step;
   2.756 -
   2.757 -	time_adjust_step = adjtime_adjustment();
   2.758 -	if (time_adjust_step)
   2.759 -		/* Reduce by this step the amount of time left  */
   2.760 -		time_adjust -= time_adjust_step;
   2.761 -
   2.762 -	/* Changes by adjtime() do not take effect till next tick. */
   2.763 -	if (time_next_adjust != 0) {
   2.764 -		time_adjust = time_next_adjust;
   2.765 -		time_next_adjust = 0;
   2.766 -	}
   2.767 -}
   2.768 -
   2.769 -/*
   2.770 - * Return how long ticks are at the moment, that is, how much time
   2.771 - * update_wall_time_one_tick will add to xtime next time we call it
   2.772 - * (assuming no calls to do_adjtimex in the meantime).
   2.773 - * The return value is in fixed-point nanoseconds shifted by the
   2.774 - * specified number of bits to the right of the binary point.
   2.775 - * This function has no side-effects.
   2.776 - */
   2.777 -u64 current_tick_length(void)
   2.778 -{
   2.779 -	long delta_nsec;
   2.780 -	u64 ret;
   2.781 -
   2.782 -	/* calculate the finest interval NTP will allow.
   2.783 -	 *    ie: nanosecond value shifted by (SHIFT_SCALE - 10)
   2.784 -	 */
   2.785 -	delta_nsec = tick_nsec + adjtime_adjustment() * 1000;
   2.786 -	ret = (u64)delta_nsec << TICK_LENGTH_SHIFT;
   2.787 -	ret += (s64)time_adj << (TICK_LENGTH_SHIFT - (SHIFT_SCALE - 10));
   2.788 -
   2.789 -	return ret;
   2.790 -}
   2.791 -
   2.792 -/* XXX - all of this timekeeping code should be later moved to time.c */
   2.793 -#include <linux/clocksource.h>
   2.794 -static struct clocksource *clock; /* pointer to current clocksource */
   2.795 -
   2.796 -#ifdef CONFIG_GENERIC_TIME
   2.797 -/**
   2.798 - * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook
   2.799 - *
   2.800 - * private function, must hold xtime_lock lock when being
   2.801 - * called. Returns the number of nanoseconds since the
   2.802 - * last call to update_wall_time() (adjusted by NTP scaling)
   2.803 - */
   2.804 -static inline s64 __get_nsec_offset(void)
   2.805 -{
   2.806 -	cycle_t cycle_now, cycle_delta;
   2.807 -	s64 ns_offset;
   2.808 -
   2.809 -	/* read clocksource: */
   2.810 -	cycle_now = clocksource_read(clock);
   2.811 -
   2.812 -	/* calculate the delta since the last update_wall_time: */
   2.813 -	cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
   2.814 -
   2.815 -	/* convert to nanoseconds: */
   2.816 -	ns_offset = cyc2ns(clock, cycle_delta);
   2.817 -
   2.818 -	return ns_offset;
   2.819 -}
   2.820 -
   2.821 -/**
   2.822 - * __get_realtime_clock_ts - Returns the time of day in a timespec
   2.823 - * @ts:		pointer to the timespec to be set
   2.824 - *
   2.825 - * Returns the time of day in a timespec. Used by
   2.826 - * do_gettimeofday() and get_realtime_clock_ts().
   2.827 - */
   2.828 -static inline void __get_realtime_clock_ts(struct timespec *ts)
   2.829 -{
   2.830 -	unsigned long seq;
   2.831 -	s64 nsecs;
   2.832 -
   2.833 -	do {
   2.834 -		seq = read_seqbegin(&xtime_lock);
   2.835 -
   2.836 -		*ts = xtime;
   2.837 -		nsecs = __get_nsec_offset();
   2.838 -
   2.839 -	} while (read_seqretry(&xtime_lock, seq));
   2.840 -
   2.841 -	timespec_add_ns(ts, nsecs);
   2.842 -}
   2.843 -
   2.844 -/**
   2.845 - * getnstimeofday - Returns the time of day in a timespec
   2.846 - * @ts:		pointer to the timespec to be set
   2.847 - *
   2.848 - * Returns the time of day in a timespec.
   2.849 - */
   2.850 -void getnstimeofday(struct timespec *ts)
   2.851 -{
   2.852 -	__get_realtime_clock_ts(ts);
   2.853 -}
   2.854 -
   2.855 -EXPORT_SYMBOL(getnstimeofday);
   2.856 -
   2.857 -#ifndef CONFIG_XEN
   2.858 -/**
   2.859 - * do_gettimeofday - Returns the time of day in a timeval
   2.860 - * @tv:		pointer to the timeval to be set
   2.861 - *
   2.862 - * NOTE: Users should be converted to using get_realtime_clock_ts()
   2.863 - */
   2.864 -void do_gettimeofday(struct timeval *tv)
   2.865 -{
   2.866 -	struct timespec now;
   2.867 -
   2.868 -	__get_realtime_clock_ts(&now);
   2.869 -	tv->tv_sec = now.tv_sec;
   2.870 -	tv->tv_usec = now.tv_nsec/1000;
   2.871 -}
   2.872 -
   2.873 -EXPORT_SYMBOL(do_gettimeofday);
   2.874 -/**
   2.875 - * do_settimeofday - Sets the time of day
   2.876 - * @tv:		pointer to the timespec variable containing the new time
   2.877 - *
   2.878 - * Sets the time of day to the new time and update NTP and notify hrtimers
   2.879 - */
   2.880 -int do_settimeofday(struct timespec *tv)
   2.881 -{
   2.882 -	unsigned long flags;
   2.883 -	time_t wtm_sec, sec = tv->tv_sec;
   2.884 -	long wtm_nsec, nsec = tv->tv_nsec;
   2.885 -
   2.886 -	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
   2.887 -		return -EINVAL;
   2.888 -
   2.889 -	write_seqlock_irqsave(&xtime_lock, flags);
   2.890 -
   2.891 -	nsec -= __get_nsec_offset();
   2.892 -
   2.893 -	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
   2.894 -	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
   2.895 -
   2.896 -	set_normalized_timespec(&xtime, sec, nsec);
   2.897 -	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
   2.898 -
   2.899 -	clock->error = 0;
   2.900 -	ntp_clear();
   2.901 -
   2.902 -	write_sequnlock_irqrestore(&xtime_lock, flags);
   2.903 -
   2.904 -	/* signal hrtimers about time change */
   2.905 -	clock_was_set();
   2.906 -
   2.907 -	return 0;
   2.908 -}
   2.909 -
   2.910 -EXPORT_SYMBOL(do_settimeofday);
   2.911 -#endif
   2.912 -
   2.913 -/**
   2.914 - * change_clocksource - Swaps clocksources if a new one is available
   2.915 - *
   2.916 - * Accumulates current time interval and initializes new clocksource
   2.917 - */
   2.918 -static int change_clocksource(void)
   2.919 -{
   2.920 -	struct clocksource *new;
   2.921 -	cycle_t now;
   2.922 -	u64 nsec;
   2.923 -	new = clocksource_get_next();
   2.924 -	if (clock != new) {
   2.925 -		now = clocksource_read(new);
   2.926 -		nsec =  __get_nsec_offset();
   2.927 -		timespec_add_ns(&xtime, nsec);
   2.928 -
   2.929 -		clock = new;
   2.930 -		clock->cycle_last = now;
   2.931 -		printk(KERN_INFO "Time: %s clocksource has been installed.\n",
   2.932 -					clock->name);
   2.933 -		return 1;
   2.934 -	} else if (clock->update_callback) {
   2.935 -		return clock->update_callback();
   2.936 -	}
   2.937 -	return 0;
   2.938 -}
   2.939 -#else
   2.940 -#define change_clocksource() (0)
   2.941 -#endif
   2.942 -
   2.943 -/**
   2.944 - * timeofday_is_continuous - check to see if timekeeping is free running
   2.945 - */
   2.946 -int timekeeping_is_continuous(void)
   2.947 -{
   2.948 -	unsigned long seq;
   2.949 -	int ret;
   2.950 -
   2.951 -	do {
   2.952 -		seq = read_seqbegin(&xtime_lock);
   2.953 -
   2.954 -		ret = clock->is_continuous;
   2.955 -
   2.956 -	} while (read_seqretry(&xtime_lock, seq));
   2.957 -
   2.958 -	return ret;
   2.959 -}
   2.960 -
   2.961 -/*
   2.962 - * timekeeping_init - Initializes the clocksource and common timekeeping values
   2.963 - */
   2.964 -void __init timekeeping_init(void)
   2.965 -{
   2.966 -	unsigned long flags;
   2.967 -
   2.968 -	write_seqlock_irqsave(&xtime_lock, flags);
   2.969 -	clock = clocksource_get_next();
   2.970 -	clocksource_calculate_interval(clock, tick_nsec);
   2.971 -	clock->cycle_last = clocksource_read(clock);
   2.972 -	ntp_clear();
   2.973 -	write_sequnlock_irqrestore(&xtime_lock, flags);
   2.974 -}
   2.975 -
   2.976 -
   2.977 -static int timekeeping_suspended;
   2.978 -/*
   2.979 - * timekeeping_resume - Resumes the generic timekeeping subsystem.
   2.980 - * @dev:	unused
   2.981 - *
   2.982 - * This is for the generic clocksource timekeeping.
   2.983 - * xtime/wall_to_monotonic/jiffies/wall_jiffies/etc are
   2.984 - * still managed by arch specific suspend/resume code.
   2.985 - */
   2.986 -static int timekeeping_resume(struct sys_device *dev)
   2.987 -{
   2.988 -	unsigned long flags;
   2.989 -
   2.990 -	write_seqlock_irqsave(&xtime_lock, flags);
   2.991 -	/* restart the last cycle value */
   2.992 -	clock->cycle_last = clocksource_read(clock);
   2.993 -	clock->error = 0;
   2.994 -	timekeeping_suspended = 0;
   2.995 -	write_sequnlock_irqrestore(&xtime_lock, flags);
   2.996 -	return 0;
   2.997 -}
   2.998 -
   2.999 -static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
  2.1000 -{
  2.1001 -	unsigned long flags;
  2.1002 -
  2.1003 -	write_seqlock_irqsave(&xtime_lock, flags);
  2.1004 -	timekeeping_suspended = 1;
  2.1005 -	write_sequnlock_irqrestore(&xtime_lock, flags);
  2.1006 -	return 0;
  2.1007 -}
  2.1008 -
  2.1009 -/* sysfs resume/suspend bits for timekeeping */
  2.1010 -static struct sysdev_class timekeeping_sysclass = {
  2.1011 -	.resume		= timekeeping_resume,
  2.1012 -	.suspend	= timekeeping_suspend,
  2.1013 -	set_kset_name("timekeeping"),
  2.1014 -};
  2.1015 -
  2.1016 -static struct sys_device device_timer = {
  2.1017 -	.id		= 0,
  2.1018 -	.cls		= &timekeeping_sysclass,
  2.1019 -};
  2.1020 -
  2.1021 -static int __init timekeeping_init_device(void)
  2.1022 -{
  2.1023 -	int error = sysdev_class_register(&timekeeping_sysclass);
  2.1024 -	if (!error)
  2.1025 -		error = sysdev_register(&device_timer);
  2.1026 -	return error;
  2.1027 -}
  2.1028 -
  2.1029 -device_initcall(timekeeping_init_device);
  2.1030 -
  2.1031 -/*
  2.1032 - * If the error is already larger, we look ahead even further
  2.1033 - * to compensate for late or lost adjustments.
  2.1034 - */
  2.1035 -static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, s64 *offset)
  2.1036 -{
  2.1037 -	s64 tick_error, i;
  2.1038 -	u32 look_ahead, adj;
  2.1039 -	s32 error2, mult;
  2.1040 -
  2.1041 -	/*
  2.1042 -	 * Use the current error value to determine how much to look ahead.
  2.1043 -	 * The larger the error the slower we adjust for it to avoid problems
  2.1044 -	 * with losing too many ticks, otherwise we would overadjust and
  2.1045 -	 * produce an even larger error.  The smaller the adjustment the
  2.1046 -	 * faster we try to adjust for it, as lost ticks can do less harm
  2.1047 -	 * here.  This is tuned so that an error of about 1 msec is adusted
  2.1048 -	 * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks).
  2.1049 -	 */
  2.1050 -	error2 = clock->error >> (TICK_LENGTH_SHIFT + 22 - 2 * SHIFT_HZ);
  2.1051 -	error2 = abs(error2);
  2.1052 -	for (look_ahead = 0; error2 > 0; look_ahead++)
  2.1053 -		error2 >>= 2;
  2.1054 -
  2.1055 -	/*
  2.1056 -	 * Now calculate the error in (1 << look_ahead) ticks, but first
  2.1057 -	 * remove the single look ahead already included in the error.
  2.1058 -	 */
  2.1059 -	tick_error = current_tick_length() >> (TICK_LENGTH_SHIFT - clock->shift + 1);
  2.1060 -	tick_error -= clock->xtime_interval >> 1;
  2.1061 -	error = ((error - tick_error) >> look_ahead) + tick_error;
  2.1062 -
  2.1063 -	/* Finally calculate the adjustment shift value.  */
  2.1064 -	i = *interval;
  2.1065 -	mult = 1;
  2.1066 -	if (error < 0) {
  2.1067 -		error = -error;
  2.1068 -		*interval = -*interval;
  2.1069 -		*offset = -*offset;
  2.1070 -		mult = -1;
  2.1071 -	}
  2.1072 -	for (adj = 0; error > i; adj++)
  2.1073 -		error >>= 1;
  2.1074 -
  2.1075 -	*interval <<= adj;
  2.1076 -	*offset <<= adj;
  2.1077 -	return mult << adj;
  2.1078 -}
  2.1079 -
  2.1080 -/*
  2.1081 - * Adjust the multiplier to reduce the error value,
  2.1082 - * this is optimized for the most common adjustments of -1,0,1,
  2.1083 - * for other values we can do a bit more work.
  2.1084 - */
  2.1085 -static void clocksource_adjust(struct clocksource *clock, s64 offset)
  2.1086 -{
  2.1087 -	s64 error, interval = clock->cycle_interval;
  2.1088 -	int adj;
  2.1089 -
  2.1090 -	error = clock->error >> (TICK_LENGTH_SHIFT - clock->shift - 1);
  2.1091 -	if (error > interval) {
  2.1092 -		error >>= 2;
  2.1093 -		if (likely(error <= interval))
  2.1094 -			adj = 1;
  2.1095 -		else
  2.1096 -			adj = clocksource_bigadjust(error, &interval, &offset);
  2.1097 -	} else if (error < -interval) {
  2.1098 -		error >>= 2;
  2.1099 -		if (likely(error >= -interval)) {
  2.1100 -			adj = -1;
  2.1101 -			interval = -interval;
  2.1102 -			offset = -offset;
  2.1103 -		} else
  2.1104 -			adj = clocksource_bigadjust(error, &interval, &offset);
  2.1105 -	} else
  2.1106 -		return;
  2.1107 -
  2.1108 -	clock->mult += adj;
  2.1109 -	clock->xtime_interval += interval;
  2.1110 -	clock->xtime_nsec -= offset;
  2.1111 -	clock->error -= (interval - offset) << (TICK_LENGTH_SHIFT - clock->shift);
  2.1112 -}
  2.1113 -
  2.1114 -/*
  2.1115 - * update_wall_time - Uses the current clocksource to increment the wall time
  2.1116 - *
  2.1117 - * Called from the timer interrupt, must hold a write on xtime_lock.
  2.1118 - */
  2.1119 -static void update_wall_time(void)
  2.1120 -{
  2.1121 -	cycle_t offset;
  2.1122 -
  2.1123 -	/* Make sure we're fully resumed: */
  2.1124 -	if (unlikely(timekeeping_suspended))
  2.1125 -		return;
  2.1126 -
  2.1127 -#ifdef CONFIG_GENERIC_TIME
  2.1128 -	offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask;
  2.1129 -#else
  2.1130 -	offset = clock->cycle_interval;
  2.1131 -#endif
  2.1132 -	clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift;
  2.1133 -
  2.1134 -	/* normally this loop will run just once, however in the
  2.1135 -	 * case of lost or late ticks, it will accumulate correctly.
  2.1136 -	 */
  2.1137 -	while (offset >= clock->cycle_interval) {
  2.1138 -		/* accumulate one interval */
  2.1139 -		clock->xtime_nsec += clock->xtime_interval;
  2.1140 -		clock->cycle_last += clock->cycle_interval;
  2.1141 -		offset -= clock->cycle_interval;
  2.1142 -
  2.1143 -		if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) {
  2.1144 -			clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift;
  2.1145 -			xtime.tv_sec++;
  2.1146 -			second_overflow();
  2.1147 -		}
  2.1148 -
  2.1149 -		/* interpolator bits */
  2.1150 -		time_interpolator_update(clock->xtime_interval
  2.1151 -						>> clock->shift);
  2.1152 -		/* increment the NTP state machine */
  2.1153 -		update_ntp_one_tick();
  2.1154 -
  2.1155 -		/* accumulate error between NTP and clock interval */
  2.1156 -		clock->error += current_tick_length();
  2.1157 -		clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift);
  2.1158 -	}
  2.1159 -
  2.1160 -	/* correct the clock when NTP error is too big */
  2.1161 -	clocksource_adjust(clock, offset);
  2.1162 -
  2.1163 -	/* store full nanoseconds into xtime */
  2.1164 -	xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift;
  2.1165 -	clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
  2.1166 -
  2.1167 -	/* check to see if there is a new clocksource to use */
  2.1168 -	if (change_clocksource()) {
  2.1169 -		clock->error = 0;
  2.1170 -		clock->xtime_nsec = 0;
  2.1171 -		clocksource_calculate_interval(clock, tick_nsec);
  2.1172 -	}
  2.1173 -}
  2.1174 -
  2.1175 -/*
  2.1176 - * Called from the timer interrupt handler to charge one tick to the current 
  2.1177 - * process.  user_tick is 1 if the tick is user time, 0 for system.
  2.1178 - */
  2.1179 -void update_process_times(int user_tick)
  2.1180 -{
  2.1181 -	struct task_struct *p = current;
  2.1182 -	int cpu = smp_processor_id();
  2.1183 -
  2.1184 -	/* Note: this timer irq context must be accounted for as well. */
  2.1185 -	if (user_tick)
  2.1186 -		account_user_time(p, jiffies_to_cputime(1));
  2.1187 -	else
  2.1188 -		account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1));
  2.1189 -	run_local_timers();
  2.1190 -	if (rcu_pending(cpu))
  2.1191 -		rcu_check_callbacks(cpu, user_tick);
  2.1192 -	scheduler_tick();
  2.1193 - 	run_posix_cpu_timers(p);
  2.1194 -}
  2.1195 -
  2.1196 -/*
  2.1197 - * Nr of active tasks - counted in fixed-point numbers
  2.1198 - */
  2.1199 -static unsigned long count_active_tasks(void)
  2.1200 -{
  2.1201 -	return nr_active() * FIXED_1;
  2.1202 -}
  2.1203 -
  2.1204 -/*
  2.1205 - * Hmm.. Changed this, as the GNU make sources (load.c) seems to
  2.1206 - * imply that avenrun[] is the standard name for this kind of thing.
  2.1207 - * Nothing else seems to be standardized: the fractional size etc
  2.1208 - * all seem to differ on different machines.
  2.1209 - *
  2.1210 - * Requires xtime_lock to access.
  2.1211 - */
  2.1212 -unsigned long avenrun[3];
  2.1213 -
  2.1214 -EXPORT_SYMBOL(avenrun);
  2.1215 -
  2.1216 -/*
  2.1217 - * calc_load - given tick count, update the avenrun load estimates.
  2.1218 - * This is called while holding a write_lock on xtime_lock.
  2.1219 - */
  2.1220 -static inline void calc_load(unsigned long ticks)
  2.1221 -{
  2.1222 -	unsigned long active_tasks; /* fixed-point */
  2.1223 -	static int count = LOAD_FREQ;
  2.1224 -
  2.1225 -	count -= ticks;
  2.1226 -	if (count < 0) {
  2.1227 -		count += LOAD_FREQ;
  2.1228 -		active_tasks = count_active_tasks();
  2.1229 -		CALC_LOAD(avenrun[0], EXP_1, active_tasks);
  2.1230 -		CALC_LOAD(avenrun[1], EXP_5, active_tasks);
  2.1231 -		CALC_LOAD(avenrun[2], EXP_15, active_tasks);
  2.1232 -	}
  2.1233 -}
  2.1234 -
  2.1235 -/* jiffies at the most recent update of wall time */
  2.1236 -unsigned long wall_jiffies = INITIAL_JIFFIES;
  2.1237 -
  2.1238 -/*
  2.1239 - * This read-write spinlock protects us from races in SMP while
  2.1240 - * playing with xtime and avenrun.
  2.1241 - */
  2.1242 -#ifndef ARCH_HAVE_XTIME_LOCK
  2.1243 -__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
  2.1244 -
  2.1245 -EXPORT_SYMBOL(xtime_lock);
  2.1246 -#endif
  2.1247 -
  2.1248 -/*
  2.1249 - * This function runs timers and the timer-tq in bottom half context.
  2.1250 - */
  2.1251 -static void run_timer_softirq(struct softirq_action *h)
  2.1252 -{
  2.1253 -	tvec_base_t *base = __get_cpu_var(tvec_bases);
  2.1254 -
  2.1255 - 	hrtimer_run_queues();
  2.1256 -	if (time_after_eq(jiffies, base->timer_jiffies))
  2.1257 -		__run_timers(base);
  2.1258 -}
  2.1259 -
  2.1260 -/*
  2.1261 - * Called by the local, per-CPU timer interrupt on SMP.
  2.1262 - */
  2.1263 -void run_local_timers(void)
  2.1264 -{
  2.1265 -	raise_softirq(TIMER_SOFTIRQ);
  2.1266 -	softlockup_tick();
  2.1267 -}
  2.1268 -
  2.1269 -/*
  2.1270 - * Called by the timer interrupt. xtime_lock must already be taken
  2.1271 - * by the timer IRQ!
  2.1272 - */
  2.1273 -static inline void update_times(void)
  2.1274 -{
  2.1275 -	unsigned long ticks;
  2.1276 -
  2.1277 -	ticks = jiffies - wall_jiffies;
  2.1278 -	wall_jiffies += ticks;
  2.1279 -	update_wall_time();
  2.1280 -	calc_load(ticks);
  2.1281 -}
  2.1282 -  
  2.1283 -/*
  2.1284 - * The 64-bit jiffies value is not atomic - you MUST NOT read it
  2.1285 - * without sampling the sequence number in xtime_lock.
  2.1286 - * jiffies is defined in the linker script...
  2.1287 - */
  2.1288 -
  2.1289 -void do_timer(struct pt_regs *regs)
  2.1290 -{
  2.1291 -	jiffies_64++;
  2.1292 -	/* prevent loading jiffies before storing new jiffies_64 value. */
  2.1293 -	barrier();
  2.1294 -	update_times();
  2.1295 -}
  2.1296 -
  2.1297 -#ifdef __ARCH_WANT_SYS_ALARM
  2.1298 -
  2.1299 -/*
  2.1300 - * For backwards compatibility?  This can be done in libc so Alpha
  2.1301 - * and all newer ports shouldn't need it.
  2.1302 - */
  2.1303 -asmlinkage unsigned long sys_alarm(unsigned int seconds)
  2.1304 -{
  2.1305 -	return alarm_setitimer(seconds);
  2.1306 -}
  2.1307 -
  2.1308 -#endif
  2.1309 -
  2.1310 -#ifndef __alpha__
  2.1311 -
  2.1312 -/*
  2.1313 - * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
  2.1314 - * should be moved into arch/i386 instead?
  2.1315 - */
  2.1316 -
  2.1317 -/**
  2.1318 - * sys_getpid - return the thread group id of the current process
  2.1319 - *
  2.1320 - * Note, despite the name, this returns the tgid not the pid.  The tgid and
  2.1321 - * the pid are identical unless CLONE_THREAD was specified on clone() in
  2.1322 - * which case the tgid is the same in all threads of the same group.
  2.1323 - *
  2.1324 - * This is SMP safe as current->tgid does not change.
  2.1325 - */
  2.1326 -asmlinkage long sys_getpid(void)
  2.1327 -{
  2.1328 -	return current->tgid;
  2.1329 -}
  2.1330 -
  2.1331 -/*
  2.1332 - * Accessing ->real_parent is not SMP-safe, it could
  2.1333 - * change from under us. However, we can use a stale
  2.1334 - * value of ->real_parent under rcu_read_lock(), see
  2.1335 - * release_task()->call_rcu(delayed_put_task_struct).
  2.1336 - */
  2.1337 -asmlinkage long sys_getppid(void)
  2.1338 -{
  2.1339 -	int pid;
  2.1340 -
  2.1341 -	rcu_read_lock();
  2.1342 -	pid = rcu_dereference(current->real_parent)->tgid;
  2.1343 -	rcu_read_unlock();
  2.1344 -
  2.1345 -	return pid;
  2.1346 -}
  2.1347 -
  2.1348 -asmlinkage long sys_getuid(void)
  2.1349 -{
  2.1350 -	/* Only we change this so SMP safe */
  2.1351 -	return current->uid;
  2.1352 -}
  2.1353 -
  2.1354 -asmlinkage long sys_geteuid(void)
  2.1355 -{
  2.1356 -	/* Only we change this so SMP safe */
  2.1357 -	return current->euid;
  2.1358 -}
  2.1359 -
  2.1360 -asmlinkage long sys_getgid(void)
  2.1361 -{
  2.1362 -	/* Only we change this so SMP safe */
  2.1363 -	return current->gid;
  2.1364 -}
  2.1365 -
  2.1366 -asmlinkage long sys_getegid(void)
  2.1367 -{
  2.1368 -	/* Only we change this so SMP safe */
  2.1369 -	return  current->egid;
  2.1370 -}
  2.1371 -
  2.1372 -#endif
  2.1373 -
  2.1374 -static void process_timeout(unsigned long __data)
  2.1375 -{
  2.1376 -	wake_up_process((struct task_struct *)__data);
  2.1377 -}
  2.1378 -
  2.1379 -/**
  2.1380 - * schedule_timeout - sleep until timeout
  2.1381 - * @timeout: timeout value in jiffies
  2.1382 - *
  2.1383 - * Make the current task sleep until @timeout jiffies have
  2.1384 - * elapsed. The routine will return immediately unless
  2.1385 - * the current task state has been set (see set_current_state()).
  2.1386 - *
  2.1387 - * You can set the task state as follows -
  2.1388 - *
  2.1389 - * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
  2.1390 - * pass before the routine returns. The routine will return 0
  2.1391 - *
  2.1392 - * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
  2.1393 - * delivered to the current task. In this case the remaining time
  2.1394 - * in jiffies will be returned, or 0 if the timer expired in time
  2.1395 - *
  2.1396 - * The current task state is guaranteed to be TASK_RUNNING when this
  2.1397 - * routine returns.
  2.1398 - *
  2.1399 - * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
  2.1400 - * the CPU away without a bound on the timeout. In this case the return
  2.1401 - * value will be %MAX_SCHEDULE_TIMEOUT.
  2.1402 - *
  2.1403 - * In all cases the return value is guaranteed to be non-negative.
  2.1404 - */
  2.1405 -fastcall signed long __sched schedule_timeout(signed long timeout)
  2.1406 -{
  2.1407 -	struct timer_list timer;
  2.1408 -	unsigned long expire;
  2.1409 -
  2.1410 -	switch (timeout)
  2.1411 -	{
  2.1412 -	case MAX_SCHEDULE_TIMEOUT:
  2.1413 -		/*
  2.1414 -		 * These two special cases are useful to be comfortable
  2.1415 -		 * in the caller. Nothing more. We could take
  2.1416 -		 * MAX_SCHEDULE_TIMEOUT from one of the negative value
  2.1417 -		 * but I' d like to return a valid offset (>=0) to allow
  2.1418 -		 * the caller to do everything it want with the retval.
  2.1419 -		 */
  2.1420 -		schedule();
  2.1421 -		goto out;
  2.1422 -	default:
  2.1423 -		/*
  2.1424 -		 * Another bit of PARANOID. Note that the retval will be
  2.1425 -		 * 0 since no piece of kernel is supposed to do a check
  2.1426 -		 * for a negative retval of schedule_timeout() (since it
  2.1427 -		 * should never happens anyway). You just have the printk()
  2.1428 -		 * that will tell you if something is gone wrong and where.
  2.1429 -		 */
  2.1430 -		if (timeout < 0)
  2.1431 -		{
  2.1432 -			printk(KERN_ERR "schedule_timeout: wrong timeout "
  2.1433 -				"value %lx from %p\n", timeout,
  2.1434 -				__builtin_return_address(0));
  2.1435 -			current->state = TASK_RUNNING;
  2.1436 -			goto out;
  2.1437 -		}
  2.1438 -	}
  2.1439 -
  2.1440 -	expire = timeout + jiffies;
  2.1441 -
  2.1442 -	setup_timer(&timer, process_timeout, (unsigned long)current);
  2.1443 -	__mod_timer(&timer, expire);
  2.1444 -	schedule();
  2.1445 -	del_singleshot_timer_sync(&timer);
  2.1446 -
  2.1447 -	timeout = expire - jiffies;
  2.1448 -
  2.1449 - out:
  2.1450 -	return timeout < 0 ? 0 : timeout;
  2.1451 -}
  2.1452 -EXPORT_SYMBOL(schedule_timeout);
  2.1453 -
  2.1454 -/*
  2.1455 - * We can use __set_current_state() here because schedule_timeout() calls
  2.1456 - * schedule() unconditionally.
  2.1457 - */
  2.1458 -signed long __sched schedule_timeout_interruptible(signed long timeout)
  2.1459 -{
  2.1460 -	__set_current_state(TASK_INTERRUPTIBLE);
  2.1461 -	return schedule_timeout(timeout);
  2.1462 -}
  2.1463 -EXPORT_SYMBOL(schedule_timeout_interruptible);
  2.1464 -
  2.1465 -signed long __sched schedule_timeout_uninterruptible(signed long timeout)
  2.1466 -{
  2.1467 -	__set_current_state(TASK_UNINTERRUPTIBLE);
  2.1468 -	return schedule_timeout(timeout);
  2.1469 -}
  2.1470 -EXPORT_SYMBOL(schedule_timeout_uninterruptible);
  2.1471 -
  2.1472 -/* Thread ID - the internal kernel "pid" */
  2.1473 -asmlinkage long sys_gettid(void)
  2.1474 -{
  2.1475 -	return current->pid;
  2.1476 -}
  2.1477 -
  2.1478 -/*
  2.1479 - * sys_sysinfo - fill in sysinfo struct
  2.1480 - */ 
  2.1481 -asmlinkage long sys_sysinfo(struct sysinfo __user *info)
  2.1482 -{
  2.1483 -	struct sysinfo val;
  2.1484 -	unsigned long mem_total, sav_total;
  2.1485 -	unsigned int mem_unit, bitcount;
  2.1486 -	unsigned long seq;
  2.1487 -
  2.1488 -	memset((char *)&val, 0, sizeof(struct sysinfo));
  2.1489 -
  2.1490 -	do {
  2.1491 -		struct timespec tp;
  2.1492 -		seq = read_seqbegin(&xtime_lock);
  2.1493 -
  2.1494 -		/*
  2.1495 -		 * This is annoying.  The below is the same thing
  2.1496 -		 * posix_get_clock_monotonic() does, but it wants to
  2.1497 -		 * take the lock which we want to cover the loads stuff
  2.1498 -		 * too.
  2.1499 -		 */
  2.1500 -
  2.1501 -		getnstimeofday(&tp);
  2.1502 -		tp.tv_sec += wall_to_monotonic.tv_sec;
  2.1503 -		tp.tv_nsec += wall_to_monotonic.tv_nsec;
  2.1504 -		if (tp.tv_nsec - NSEC_PER_SEC >= 0) {
  2.1505 -			tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
  2.1506 -			tp.tv_sec++;
  2.1507 -		}
  2.1508 -		val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
  2.1509 -
  2.1510 -		val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
  2.1511 -		val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
  2.1512 -		val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
  2.1513 -
  2.1514 -		val.procs = nr_threads;
  2.1515 -	} while (read_seqretry(&xtime_lock, seq));
  2.1516 -
  2.1517 -	si_meminfo(&val);
  2.1518 -	si_swapinfo(&val);
  2.1519 -
  2.1520 -	/*
  2.1521 -	 * If the sum of all the available memory (i.e. ram + swap)
  2.1522 -	 * is less than can be stored in a 32 bit unsigned long then
  2.1523 -	 * we can be binary compatible with 2.2.x kernels.  If not,
  2.1524 -	 * well, in that case 2.2.x was broken anyways...
  2.1525 -	 *
  2.1526 -	 *  -Erik Andersen <andersee@debian.org>
  2.1527 -	 */
  2.1528 -
  2.1529 -	mem_total = val.totalram + val.totalswap;
  2.1530 -	if (mem_total < val.totalram || mem_total < val.totalswap)
  2.1531 -		goto out;
  2.1532 -	bitcount = 0;
  2.1533 -	mem_unit = val.mem_unit;
  2.1534 -	while (mem_unit > 1) {
  2.1535 -		bitcount++;
  2.1536 -		mem_unit >>= 1;
  2.1537 -		sav_total = mem_total;
  2.1538 -		mem_total <<= 1;
  2.1539 -		if (mem_total < sav_total)
  2.1540 -			goto out;
  2.1541 -	}
  2.1542 -
  2.1543 -	/*
  2.1544 -	 * If mem_total did not overflow, multiply all memory values by
  2.1545 -	 * val.mem_unit and set it to 1.  This leaves things compatible
  2.1546 -	 * with 2.2.x, and also retains compatibility with earlier 2.4.x
  2.1547 -	 * kernels...
  2.1548 -	 */
  2.1549 -
  2.1550 -	val.mem_unit = 1;
  2.1551 -	val.totalram <<= bitcount;
  2.1552 -	val.freeram <<= bitcount;
  2.1553 -	val.sharedram <<= bitcount;
  2.1554 -	val.bufferram <<= bitcount;
  2.1555 -	val.totalswap <<= bitcount;
  2.1556 -	val.freeswap <<= bitcount;
  2.1557 -	val.totalhigh <<= bitcount;
  2.1558 -	val.freehigh <<= bitcount;
  2.1559 -
  2.1560 - out:
  2.1561 -	if (copy_to_user(info, &val, sizeof(struct sysinfo)))
  2.1562 -		return -EFAULT;
  2.1563 -
  2.1564 -	return 0;
  2.1565 -}
  2.1566 -
  2.1567 -/*
  2.1568 - * lockdep: we want to track each per-CPU base as a separate lock-class,
  2.1569 - * but timer-bases are kmalloc()-ed, so we need to attach separate
  2.1570 - * keys to them:
  2.1571 - */
  2.1572 -static struct lock_class_key base_lock_keys[NR_CPUS];
  2.1573 -
  2.1574 -static int __devinit init_timers_cpu(int cpu)
  2.1575 -{
  2.1576 -	int j;
  2.1577 -	tvec_base_t *base;
  2.1578 -	static char __devinitdata tvec_base_done[NR_CPUS];
  2.1579 -
  2.1580 -	if (!tvec_base_done[cpu]) {
  2.1581 -		static char boot_done;
  2.1582 -
  2.1583 -		if (boot_done) {
  2.1584 -			/*
  2.1585 -			 * The APs use this path later in boot
  2.1586 -			 */
  2.1587 -			base = kmalloc_node(sizeof(*base), GFP_KERNEL,
  2.1588 -						cpu_to_node(cpu));
  2.1589 -			if (!base)
  2.1590 -				return -ENOMEM;
  2.1591 -			memset(base, 0, sizeof(*base));
  2.1592 -			per_cpu(tvec_bases, cpu) = base;
  2.1593 -		} else {
  2.1594 -			/*
  2.1595 -			 * This is for the boot CPU - we use compile-time
  2.1596 -			 * static initialisation because per-cpu memory isn't
  2.1597 -			 * ready yet and because the memory allocators are not
  2.1598 -			 * initialised either.
  2.1599 -			 */
  2.1600 -			boot_done = 1;
  2.1601 -			base = &boot_tvec_bases;
  2.1602 -		}
  2.1603 -		tvec_base_done[cpu] = 1;
  2.1604 -	} else {
  2.1605 -		base = per_cpu(tvec_bases, cpu);
  2.1606 -	}
  2.1607 -
  2.1608 -	spin_lock_init(&base->lock);
  2.1609 -	lockdep_set_class(&base->lock, base_lock_keys + cpu);
  2.1610 -
  2.1611 -	for (j = 0; j < TVN_SIZE; j++) {
  2.1612 -		INIT_LIST_HEAD(base->tv5.vec + j);
  2.1613 -		INIT_LIST_HEAD(base->tv4.vec + j);
  2.1614 -		INIT_LIST_HEAD(base->tv3.vec + j);
  2.1615 -		INIT_LIST_HEAD(base->tv2.vec + j);
  2.1616 -	}
  2.1617 -	for (j = 0; j < TVR_SIZE; j++)
  2.1618 -		INIT_LIST_HEAD(base->tv1.vec + j);
  2.1619 -
  2.1620 -	base->timer_jiffies = jiffies;
  2.1621 -	return 0;
  2.1622 -}
  2.1623 -
  2.1624 -#ifdef CONFIG_HOTPLUG_CPU
  2.1625 -static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
  2.1626 -{
  2.1627 -	struct timer_list *timer;
  2.1628 -
  2.1629 -	while (!list_empty(head)) {
  2.1630 -		timer = list_entry(head->next, struct timer_list, entry);
  2.1631 -		detach_timer(timer, 0);
  2.1632 -		timer->base = new_base;
  2.1633 -		internal_add_timer(new_base, timer);
  2.1634 -	}
  2.1635 -}
  2.1636 -
  2.1637 -static void __devinit migrate_timers(int cpu)
  2.1638 -{
  2.1639 -	tvec_base_t *old_base;
  2.1640 -	tvec_base_t *new_base;
  2.1641 -	int i;
  2.1642 -
  2.1643 -	BUG_ON(cpu_online(cpu));
  2.1644 -	old_base = per_cpu(tvec_bases, cpu);
  2.1645 -	new_base = get_cpu_var(tvec_bases);
  2.1646 -
  2.1647 -	local_irq_disable();
  2.1648 -	spin_lock(&new_base->lock);
  2.1649 -	spin_lock(&old_base->lock);
  2.1650 -
  2.1651 -	BUG_ON(old_base->running_timer);
  2.1652 -
  2.1653 -	for (i = 0; i < TVR_SIZE; i++)
  2.1654 -		migrate_timer_list(new_base, old_base->tv1.vec + i);
  2.1655 -	for (i = 0; i < TVN_SIZE; i++) {
  2.1656 -		migrate_timer_list(new_base, old_base->tv2.vec + i);
  2.1657 -		migrate_timer_list(new_base, old_base->tv3.vec + i);
  2.1658 -		migrate_timer_list(new_base, old_base->tv4.vec + i);
  2.1659 -		migrate_timer_list(new_base, old_base->tv5.vec + i);
  2.1660 -	}
  2.1661 -
  2.1662 -	spin_unlock(&old_base->lock);
  2.1663 -	spin_unlock(&new_base->lock);
  2.1664 -	local_irq_enable();
  2.1665 -	put_cpu_var(tvec_bases);
  2.1666 -}
  2.1667 -#endif /* CONFIG_HOTPLUG_CPU */
  2.1668 -
  2.1669 -static int __cpuinit timer_cpu_notify(struct notifier_block *self,
  2.1670 -				unsigned long action, void *hcpu)
  2.1671 -{
  2.1672 -	long cpu = (long)hcpu;
  2.1673 -	switch(action) {
  2.1674 -	case CPU_UP_PREPARE:
  2.1675 -		if (init_timers_cpu(cpu) < 0)
  2.1676 -			return NOTIFY_BAD;
  2.1677 -		break;
  2.1678 -#ifdef CONFIG_HOTPLUG_CPU
  2.1679 -	case CPU_DEAD:
  2.1680 -		migrate_timers(cpu);
  2.1681 -		break;
  2.1682 -#endif
  2.1683 -	default:
  2.1684 -		break;
  2.1685 -	}
  2.1686 -	return NOTIFY_OK;
  2.1687 -}
  2.1688 -
  2.1689 -static struct notifier_block __cpuinitdata timers_nb = {
  2.1690 -	.notifier_call	= timer_cpu_notify,
  2.1691 -};
  2.1692 -
  2.1693 -
  2.1694 -void __init init_timers(void)
  2.1695 -{
  2.1696 -	timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
  2.1697 -				(void *)(long)smp_processor_id());
  2.1698 -	register_cpu_notifier(&timers_nb);
  2.1699 -	open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL);
  2.1700 -}
  2.1701 -
  2.1702 -#ifdef CONFIG_TIME_INTERPOLATION
  2.1703 -
  2.1704 -struct time_interpolator *time_interpolator __read_mostly;
  2.1705 -static struct time_interpolator *time_interpolator_list __read_mostly;
  2.1706 -static DEFINE_SPINLOCK(time_interpolator_lock);
  2.1707 -
  2.1708 -static inline u64 time_interpolator_get_cycles(unsigned int src)
  2.1709 -{
  2.1710 -	unsigned long (*x)(void);
  2.1711 -
  2.1712 -	switch (src)
  2.1713 -	{
  2.1714 -		case TIME_SOURCE_FUNCTION:
  2.1715 -			x = time_interpolator->addr;
  2.1716 -			return x();
  2.1717 -
  2.1718 -		case TIME_SOURCE_MMIO64	:
  2.1719 -			return readq_relaxed((void __iomem *)time_interpolator->addr);
  2.1720 -
  2.1721 -		case TIME_SOURCE_MMIO32	:
  2.1722 -			return readl_relaxed((void __iomem *)time_interpolator->addr);
  2.1723 -
  2.1724 -		default: return get_cycles();
  2.1725 -	}
  2.1726 -}
  2.1727 -
  2.1728 -static inline u64 time_interpolator_get_counter(int writelock)
  2.1729 -{
  2.1730 -	unsigned int src = time_interpolator->source;
  2.1731 -
  2.1732 -	if (time_interpolator->jitter)
  2.1733 -	{
  2.1734 -		u64 lcycle;
  2.1735 -		u64 now;
  2.1736 -
  2.1737 -		do {
  2.1738 -			lcycle = time_interpolator->last_cycle;
  2.1739 -			now = time_interpolator_get_cycles(src);
  2.1740 -			if (lcycle && time_after(lcycle, now))
  2.1741 -				return lcycle;
  2.1742 -
  2.1743 -			/* When holding the xtime write lock, there's no need
  2.1744 -			 * to add the overhead of the cmpxchg.  Readers are
  2.1745 -			 * force to retry until the write lock is released.
  2.1746 -			 */
  2.1747 -			if (writelock) {
  2.1748 -				time_interpolator->last_cycle = now;
  2.1749 -				return now;
  2.1750 -			}
  2.1751 -			/* Keep track of the last timer value returned. The use of cmpxchg here
  2.1752 -			 * will cause contention in an SMP environment.
  2.1753 -			 */
  2.1754 -		} while (unlikely(cmpxchg(&time_interpolator->last_cycle, lcycle, now) != lcycle));
  2.1755 -		return now;
  2.1756 -	}
  2.1757 -	else
  2.1758 -		return time_interpolator_get_cycles(src);
  2.1759 -}
  2.1760 -
  2.1761 -void time_interpolator_reset(void)
  2.1762 -{
  2.1763 -	time_interpolator->offset = 0;
  2.1764 -	time_interpolator->last_counter = time_interpolator_get_counter(1);
  2.1765 -}
  2.1766 -
  2.1767 -#define GET_TI_NSECS(count,i) (((((count) - i->last_counter) & (i)->mask) * (i)->nsec_per_cyc) >> (i)->shift)
  2.1768 -
  2.1769 -unsigned long time_interpolator_get_offset(void)
  2.1770 -{
  2.1771 -	/* If we do not have a time interpolator set up then just return zero */
  2.1772 -	if (!time_interpolator)
  2.1773 -		return 0;
  2.1774 -
  2.1775 -	return time_interpolator->offset +
  2.1776 -		GET_TI_NSECS(time_interpolator_get_counter(0), time_interpolator);
  2.1777 -}
  2.1778 -
  2.1779 -#define INTERPOLATOR_ADJUST 65536
  2.1780 -#define INTERPOLATOR_MAX_SKIP 10*INTERPOLATOR_ADJUST
  2.1781 -
  2.1782 -static void time_interpolator_update(long delta_nsec)
  2.1783 -{
  2.1784 -	u64 counter;
  2.1785 -	unsigned long offset;
  2.1786 -
  2.1787 -	/* If there is no time interpolator set up then do nothing */
  2.1788 -	if (!time_interpolator)
  2.1789 -		return;
  2.1790 -
  2.1791 -	/*
  2.1792 -	 * The interpolator compensates for late ticks by accumulating the late
  2.1793 -	 * time in time_interpolator->offset. A tick earlier than expected will
  2.1794 -	 * lead to a reset of the offset and a corresponding jump of the clock
  2.1795 -	 * forward. Again this only works if the interpolator clock is running
  2.1796 -	 * slightly slower than the regular clock and the tuning logic insures
  2.1797 -	 * that.
  2.1798 -	 */
  2.1799 -
  2.1800 -	counter = time_interpolator_get_counter(1);
  2.1801 -	offset = time_interpolator->offset +
  2.1802 -			GET_TI_NSECS(counter, time_interpolator);
  2.1803 -
  2.1804 -	if (delta_nsec < 0 || (unsigned long) delta_nsec < offset)
  2.1805 -		time_interpolator->offset = offset - delta_nsec;
  2.1806 -	else {
  2.1807 -		time_interpolator->skips++;
  2.1808 -		time_interpolator->ns_skipped += delta_nsec - offset;
  2.1809 -		time_interpolator->offset = 0;
  2.1810 -	}
  2.1811 -	time_interpolator->last_counter = counter;
  2.1812 -
  2.1813 -	/* Tuning logic for time interpolator invoked every minute or so.
  2.1814 -	 * Decrease interpolator clock speed if no skips occurred and an offset is carried.
  2.1815 -	 * Increase interpolator clock speed if we skip too much time.
  2.1816 -	 */
  2.1817 -	if (jiffies % INTERPOLATOR_ADJUST == 0)
  2.1818 -	{
  2.1819 -		if (time_interpolator->skips == 0 && time_interpolator->offset > tick_nsec)
  2.1820 -			time_interpolator->nsec_per_cyc--;
  2.1821 -		if (time_interpolator->ns_skipped > INTERPOLATOR_MAX_SKIP && time_interpolator->offset == 0)
  2.1822 -			time_interpolator->nsec_per_cyc++;
  2.1823 -		time_interpolator->skips = 0;
  2.1824 -		time_interpolator->ns_skipped = 0;
  2.1825 -	}
  2.1826 -}
  2.1827 -
  2.1828 -static inline int
  2.1829 -is_better_time_interpolator(struct time_interpolator *new)
  2.1830 -{
  2.1831 -	if (!time_interpolator)
  2.1832 -		return 1;
  2.1833 -	return new->frequency > 2*time_interpolator->frequency ||
  2.1834 -	    (unsigned long)new->drift < (unsigned long)time_interpolator->drift;
  2.1835 -}
  2.1836 -
  2.1837 -void
  2.1838 -register_time_interpolator(struct time_interpolator *ti)
  2.1839 -{
  2.1840 -	unsigned long flags;
  2.1841 -
  2.1842 -	/* Sanity check */
  2.1843 -	BUG_ON(ti->frequency == 0 || ti->mask == 0);
  2.1844 -
  2.1845 -	ti->nsec_per_cyc = ((u64)NSEC_PER_SEC << ti->shift) / ti->frequency;
  2.1846 -	spin_lock(&time_interpolator_lock);
  2.1847 -	write_seqlock_irqsave(&xtime_lock, flags);
  2.1848 -	if (is_better_time_interpolator(ti)) {
  2.1849 -		time_interpolator = ti;
  2.1850 -		time_interpolator_reset();
  2.1851 -	}
  2.1852 -	write_sequnlock_irqrestore(&xtime_lock, flags);
  2.1853 -
  2.1854 -	ti->next = time_interpolator_list;
  2.1855 -	time_interpolator_list = ti;
  2.1856 -	spin_unlock(&time_interpolator_lock);
  2.1857 -}
  2.1858 -
  2.1859 -void
  2.1860 -unregister_time_interpolator(struct time_interpolator *ti)
  2.1861 -{
  2.1862 -	struct time_interpolator *curr, **prev;
  2.1863 -	unsigned long flags;
  2.1864 -
  2.1865 -	spin_lock(&time_interpolator_lock);
  2.1866 -	prev = &time_interpolator_list;
  2.1867 -	for (curr = *prev; curr; curr = curr->next) {
  2.1868 -		if (curr == ti) {
  2.1869 -			*prev = curr->next;
  2.1870 -			break;
  2.1871 -		}
  2.1872 -		prev = &curr->next;
  2.1873 -	}
  2.1874 -
  2.1875 -	write_seqlock_irqsave(&xtime_lock, flags);
  2.1876 -	if (ti == time_interpolator) {
  2.1877 -		/* we lost the best time-interpolator: */
  2.1878 -		time_interpolator = NULL;
  2.1879 -		/* find the next-best interpolator */
  2.1880 -		for (curr = time_interpolator_list; curr; curr = curr->next)
  2.1881 -			if (is_better_time_interpolator(curr))
  2.1882 -				time_interpolator = curr;
  2.1883 -		time_interpolator_reset();
  2.1884 -	}
  2.1885 -	write_sequnlock_irqrestore(&xtime_lock, flags);
  2.1886 -	spin_unlock(&time_interpolator_lock);
  2.1887 -}
  2.1888 -#endif /* CONFIG_TIME_INTERPOLATION */
  2.1889 -
  2.1890 -/**
  2.1891 - * msleep - sleep safely even with waitqueue interruptions
  2.1892 - * @msecs: Time in milliseconds to sleep for
  2.1893 - */
  2.1894 -void msleep(unsigned int msecs)
  2.1895 -{
  2.1896 -	unsigned long timeout = msecs_to_jiffies(msecs) + 1;
  2.1897 -
  2.1898 -	while (timeout)
  2.1899 -		timeout = schedule_timeout_uninterruptible(timeout);
  2.1900 -}
  2.1901 -
  2.1902 -EXPORT_SYMBOL(msleep);
  2.1903 -
  2.1904 -/**
  2.1905 - * msleep_interruptible - sleep waiting for signals
  2.1906 - * @msecs: Time in milliseconds to sleep for
  2.1907 - */
  2.1908 -unsigned long msleep_interruptible(unsigned int msecs)
  2.1909 -{
  2.1910 -	unsigned long timeout = msecs_to_jiffies(msecs) + 1;
  2.1911 -
  2.1912 -	while (timeout && !signal_pending(current))
  2.1913 -		timeout = schedule_timeout_interruptible(timeout);
  2.1914 -	return jiffies_to_msecs(timeout);
  2.1915 -}
  2.1916 -
  2.1917 -EXPORT_SYMBOL(msleep_interruptible);