direct-io.hg

view linux-2.4.30-xen-sparse/kernel/timer.c @ 5517:10e9028c8e3d

bitkeeper revision 1.1718.1.10 (42b7b19aqOS_1M8I4pIOFjiTPYWV-g)

Merge bk://xenbits.xensource.com/xen-unstable.bk
into spot.cl.cam.ac.uk:C:/Documents and Settings/iap10/xen-unstable.bk
author iap10@spot.cl.cam.ac.uk
date Tue Jun 21 06:20:10 2005 +0000 (2005-06-21)
parents 85fcf3b1b7a5
children 56a63f9f378f
line source
1 /*
2 * linux/kernel/timer.c
3 *
4 * Kernel internal timers, kernel timekeeping, basic process system calls
5 *
6 * Copyright (C) 1991, 1992 Linus Torvalds
7 *
8 * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better.
9 *
10 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
11 * "A Kernel Model for Precision Timekeeping" by Dave Mills
12 * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
13 * serialize accesses to xtime/lost_ticks).
14 * Copyright (C) 1998 Andrea Arcangeli
15 * 1999-03-10 Improved NTP compatibility by Ulrich Windl
16 */
18 #include <linux/config.h>
19 #include <linux/mm.h>
20 #include <linux/timex.h>
21 #include <linux/delay.h>
22 #include <linux/smp_lock.h>
23 #include <linux/interrupt.h>
24 #include <linux/kernel_stat.h>
26 #include <asm/uaccess.h>
28 /*
29 * Timekeeping variables
30 */
32 long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */
34 /* The current time */
35 struct timeval xtime __attribute__ ((aligned (16)));
37 /* Don't completely fail for HZ > 500. */
38 int tickadj = 500/HZ ? : 1; /* microsecs */
40 DECLARE_TASK_QUEUE(tq_timer);
41 DECLARE_TASK_QUEUE(tq_immediate);
43 /*
44 * phase-lock loop variables
45 */
46 /* TIME_ERROR prevents overwriting the CMOS clock */
47 int time_state = TIME_OK; /* clock synchronization status */
48 int time_status = STA_UNSYNC; /* clock status bits */
49 long time_offset; /* time adjustment (us) */
50 long time_constant = 2; /* pll time constant */
51 long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */
52 long time_precision = 1; /* clock precision (us) */
53 long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */
54 long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */
55 long time_phase; /* phase offset (scaled us) */
56 long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;
57 /* frequency offset (scaled ppm)*/
58 long time_adj; /* tick adjust (scaled 1 / HZ) */
59 long time_reftime; /* time at last adjustment (s) */
61 long time_adjust;
62 long time_adjust_step;
64 unsigned long event;
66 extern int do_setitimer(int, struct itimerval *, struct itimerval *);
68 unsigned long volatile jiffies;
70 unsigned int * prof_buffer;
71 unsigned long prof_len;
72 unsigned long prof_shift;
74 /*
75 * Event timer code
76 */
77 #define TVN_BITS 6
78 #define TVR_BITS 8
79 #define TVN_SIZE (1 << TVN_BITS)
80 #define TVR_SIZE (1 << TVR_BITS)
81 #define TVN_MASK (TVN_SIZE - 1)
82 #define TVR_MASK (TVR_SIZE - 1)
84 struct timer_vec {
85 int index;
86 struct list_head vec[TVN_SIZE];
87 };
89 struct timer_vec_root {
90 int index;
91 struct list_head vec[TVR_SIZE];
92 };
94 static struct timer_vec tv5;
95 static struct timer_vec tv4;
96 static struct timer_vec tv3;
97 static struct timer_vec tv2;
98 static struct timer_vec_root tv1;
100 static struct timer_vec * const tvecs[] = {
101 (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
102 };
104 static struct list_head * run_timer_list_running;
106 #define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
108 void init_timervecs (void)
109 {
110 int i;
112 for (i = 0; i < TVN_SIZE; i++) {
113 INIT_LIST_HEAD(tv5.vec + i);
114 INIT_LIST_HEAD(tv4.vec + i);
115 INIT_LIST_HEAD(tv3.vec + i);
116 INIT_LIST_HEAD(tv2.vec + i);
117 }
118 for (i = 0; i < TVR_SIZE; i++)
119 INIT_LIST_HEAD(tv1.vec + i);
120 }
122 static unsigned long timer_jiffies;
124 static inline void internal_add_timer(struct timer_list *timer)
125 {
126 /*
127 * must be cli-ed when calling this
128 */
129 unsigned long expires = timer->expires;
130 unsigned long idx = expires - timer_jiffies;
131 struct list_head * vec;
133 if (run_timer_list_running)
134 vec = run_timer_list_running;
135 else if (idx < TVR_SIZE) {
136 int i = expires & TVR_MASK;
137 vec = tv1.vec + i;
138 } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
139 int i = (expires >> TVR_BITS) & TVN_MASK;
140 vec = tv2.vec + i;
141 } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
142 int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
143 vec = tv3.vec + i;
144 } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
145 int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
146 vec = tv4.vec + i;
147 } else if ((signed long) idx < 0) {
148 /* can happen if you add a timer with expires == jiffies,
149 * or you set a timer to go off in the past
150 */
151 vec = tv1.vec + tv1.index;
152 } else if (idx <= 0xffffffffUL) {
153 int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
154 vec = tv5.vec + i;
155 } else {
156 /* Can only get here on architectures with 64-bit jiffies */
157 INIT_LIST_HEAD(&timer->list);
158 return;
159 }
160 /*
161 * Timers are FIFO!
162 */
163 list_add(&timer->list, vec->prev);
164 }
166 /* Initialize both explicitly - let's try to have them in the same cache line */
167 spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
169 #ifdef CONFIG_SMP
170 volatile struct timer_list * volatile running_timer;
171 #define timer_enter(t) do { running_timer = t; mb(); } while (0)
172 #define timer_exit() do { running_timer = NULL; } while (0)
173 #define timer_is_running(t) (running_timer == t)
174 #define timer_synchronize(t) while (timer_is_running(t)) barrier()
175 #else
176 #define timer_enter(t) do { } while (0)
177 #define timer_exit() do { } while (0)
178 #endif
180 void add_timer(struct timer_list *timer)
181 {
182 unsigned long flags;
184 spin_lock_irqsave(&timerlist_lock, flags);
185 if (timer_pending(timer))
186 goto bug;
187 internal_add_timer(timer);
188 spin_unlock_irqrestore(&timerlist_lock, flags);
189 return;
190 bug:
191 spin_unlock_irqrestore(&timerlist_lock, flags);
192 printk("bug: kernel timer added twice at %p.\n",
193 __builtin_return_address(0));
194 }
196 static inline int detach_timer (struct timer_list *timer)
197 {
198 if (!timer_pending(timer))
199 return 0;
200 list_del(&timer->list);
201 return 1;
202 }
204 int mod_timer(struct timer_list *timer, unsigned long expires)
205 {
206 int ret;
207 unsigned long flags;
209 spin_lock_irqsave(&timerlist_lock, flags);
210 timer->expires = expires;
211 ret = detach_timer(timer);
212 internal_add_timer(timer);
213 spin_unlock_irqrestore(&timerlist_lock, flags);
214 return ret;
215 }
217 int del_timer(struct timer_list * timer)
218 {
219 int ret;
220 unsigned long flags;
222 spin_lock_irqsave(&timerlist_lock, flags);
223 ret = detach_timer(timer);
224 timer->list.next = timer->list.prev = NULL;
225 spin_unlock_irqrestore(&timerlist_lock, flags);
226 return ret;
227 }
229 #ifdef CONFIG_SMP
230 void sync_timers(void)
231 {
232 spin_unlock_wait(&global_bh_lock);
233 }
235 /*
236 * SMP specific function to delete periodic timer.
237 * Caller must disable by some means restarting the timer
238 * for new. Upon exit the timer is not queued and handler is not running
239 * on any CPU. It returns number of times, which timer was deleted
240 * (for reference counting).
241 */
243 int del_timer_sync(struct timer_list * timer)
244 {
245 int ret = 0;
247 for (;;) {
248 unsigned long flags;
249 int running;
251 spin_lock_irqsave(&timerlist_lock, flags);
252 ret += detach_timer(timer);
253 timer->list.next = timer->list.prev = 0;
254 running = timer_is_running(timer);
255 spin_unlock_irqrestore(&timerlist_lock, flags);
257 if (!running)
258 break;
260 timer_synchronize(timer);
261 }
263 return ret;
264 }
265 #endif
268 static inline void cascade_timers(struct timer_vec *tv)
269 {
270 /* cascade all the timers from tv up one level */
271 struct list_head *head, *curr, *next;
273 head = tv->vec + tv->index;
274 curr = head->next;
275 /*
276 * We are removing _all_ timers from the list, so we don't have to
277 * detach them individually, just clear the list afterwards.
278 */
279 while (curr != head) {
280 struct timer_list *tmp;
282 tmp = list_entry(curr, struct timer_list, list);
283 next = curr->next;
284 list_del(curr); // not needed
285 internal_add_timer(tmp);
286 curr = next;
287 }
288 INIT_LIST_HEAD(head);
289 tv->index = (tv->index + 1) & TVN_MASK;
290 }
292 static inline void run_timer_list(void)
293 {
294 spin_lock_irq(&timerlist_lock);
295 while ((long)(jiffies - timer_jiffies) >= 0) {
296 LIST_HEAD(queued);
297 struct list_head *head, *curr;
298 if (!tv1.index) {
299 int n = 1;
300 do {
301 cascade_timers(tvecs[n]);
302 } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
303 }
304 run_timer_list_running = &queued;
305 repeat:
306 head = tv1.vec + tv1.index;
307 curr = head->next;
308 if (curr != head) {
309 struct timer_list *timer;
310 void (*fn)(unsigned long);
311 unsigned long data;
313 timer = list_entry(curr, struct timer_list, list);
314 fn = timer->function;
315 data= timer->data;
317 detach_timer(timer);
318 timer->list.next = timer->list.prev = NULL;
319 timer_enter(timer);
320 spin_unlock_irq(&timerlist_lock);
321 fn(data);
322 spin_lock_irq(&timerlist_lock);
323 timer_exit();
324 goto repeat;
325 }
326 run_timer_list_running = NULL;
327 ++timer_jiffies;
328 tv1.index = (tv1.index + 1) & TVR_MASK;
330 curr = queued.next;
331 while (curr != &queued) {
332 struct timer_list *timer;
334 timer = list_entry(curr, struct timer_list, list);
335 curr = curr->next;
336 internal_add_timer(timer);
337 }
338 }
339 spin_unlock_irq(&timerlist_lock);
340 }
342 #ifdef CONFIG_NO_IDLE_HZ
343 /*
344 * Find out when the next timer event is due to happen. This
345 * is used on S/390 to stop all activity when all cpus are idle.
346 * And in XenoLinux to achieve the same.
347 * The timerlist_lock must be acquired before calling this function.
348 */
349 struct timer_list *next_timer_event(void)
350 {
351 struct timer_list *nte, *tmp;
352 struct list_head *lst;
353 int i, j;
355 /* Look for the next timer event in tv1. */
356 i = 0;
357 j = tvecs[0]->index;
358 do {
359 struct list_head *head = tvecs[0]->vec + j;
360 if (!list_empty(head)) {
361 nte = list_entry(head->next, struct timer_list, list);
362 goto found;
363 }
364 j = (j + 1) & TVR_MASK;
365 } while (j != tv1.index);
367 /* No event found in tv1. Check tv2-tv5. */
368 for (i = 1; i < NOOF_TVECS; i++) {
369 j = tvecs[i]->index;
370 do {
371 nte = NULL;
372 list_for_each(lst, tvecs[i]->vec + j) {
373 tmp = list_entry(lst, struct timer_list, list);
374 if (nte == NULL ||
375 time_before(tmp->expires, nte->expires))
376 nte = tmp;
377 }
378 if (nte)
379 goto found;
380 j = (j + 1) & TVN_MASK;
381 } while (j != tvecs[i]->index);
382 }
383 return NULL;
384 found:
385 /* Found timer event in tvecs[i]->vec[j] */
386 if (j < tvecs[i]->index && i < NOOF_TVECS-1) {
387 /*
388 * The search wrapped. We need to look at the next list
389 * from tvecs[i+1] that would cascade into tvecs[i].
390 */
391 list_for_each(lst, tvecs[i+1]->vec+tvecs[i+1]->index) {
392 tmp = list_entry(lst, struct timer_list, list);
393 if (time_before(tmp->expires, nte->expires))
394 nte = tmp;
395 }
396 }
397 return nte;
398 }
399 #endif
401 spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED;
403 void tqueue_bh(void)
404 {
405 run_task_queue(&tq_timer);
406 }
408 void immediate_bh(void)
409 {
410 run_task_queue(&tq_immediate);
411 }
413 /*
414 * this routine handles the overflow of the microsecond field
415 *
416 * The tricky bits of code to handle the accurate clock support
417 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
418 * They were originally developed for SUN and DEC kernels.
419 * All the kudos should go to Dave for this stuff.
420 *
421 */
422 static void second_overflow(void)
423 {
424 long ltemp;
426 /* Bump the maxerror field */
427 time_maxerror += time_tolerance >> SHIFT_USEC;
428 if ( time_maxerror > NTP_PHASE_LIMIT ) {
429 time_maxerror = NTP_PHASE_LIMIT;
430 time_status |= STA_UNSYNC;
431 }
433 /*
434 * Leap second processing. If in leap-insert state at
435 * the end of the day, the system clock is set back one
436 * second; if in leap-delete state, the system clock is
437 * set ahead one second. The microtime() routine or
438 * external clock driver will insure that reported time
439 * is always monotonic. The ugly divides should be
440 * replaced.
441 */
442 switch (time_state) {
444 case TIME_OK:
445 if (time_status & STA_INS)
446 time_state = TIME_INS;
447 else if (time_status & STA_DEL)
448 time_state = TIME_DEL;
449 break;
451 case TIME_INS:
452 if (xtime.tv_sec % 86400 == 0) {
453 xtime.tv_sec--;
454 time_state = TIME_OOP;
455 printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
456 }
457 break;
459 case TIME_DEL:
460 if ((xtime.tv_sec + 1) % 86400 == 0) {
461 xtime.tv_sec++;
462 time_state = TIME_WAIT;
463 printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
464 }
465 break;
467 case TIME_OOP:
468 time_state = TIME_WAIT;
469 break;
471 case TIME_WAIT:
472 if (!(time_status & (STA_INS | STA_DEL)))
473 time_state = TIME_OK;
474 }
476 /*
477 * Compute the phase adjustment for the next second. In
478 * PLL mode, the offset is reduced by a fixed factor
479 * times the time constant. In FLL mode the offset is
480 * used directly. In either mode, the maximum phase
481 * adjustment for each second is clamped so as to spread
482 * the adjustment over not more than the number of
483 * seconds between updates.
484 */
485 if (time_offset < 0) {
486 ltemp = -time_offset;
487 if (!(time_status & STA_FLL))
488 ltemp >>= SHIFT_KG + time_constant;
489 if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
490 ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
491 time_offset += ltemp;
492 time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
493 } else {
494 ltemp = time_offset;
495 if (!(time_status & STA_FLL))
496 ltemp >>= SHIFT_KG + time_constant;
497 if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
498 ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
499 time_offset -= ltemp;
500 time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
501 }
503 /*
504 * Compute the frequency estimate and additional phase
505 * adjustment due to frequency error for the next
506 * second. When the PPS signal is engaged, gnaw on the
507 * watchdog counter and update the frequency computed by
508 * the pll and the PPS signal.
509 */
510 pps_valid++;
511 if (pps_valid == PPS_VALID) { /* PPS signal lost */
512 pps_jitter = MAXTIME;
513 pps_stabil = MAXFREQ;
514 time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
515 STA_PPSWANDER | STA_PPSERROR);
516 }
517 ltemp = time_freq + pps_freq;
518 if (ltemp < 0)
519 time_adj -= -ltemp >>
520 (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
521 else
522 time_adj += ltemp >>
523 (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
525 #if HZ == 100
526 /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
527 * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
528 */
529 if (time_adj < 0)
530 time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
531 else
532 time_adj += (time_adj >> 2) + (time_adj >> 5);
533 #endif
534 }
536 /* in the NTP reference this is called "hardclock()" */
537 static void update_wall_time_one_tick(void)
538 {
539 if ( (time_adjust_step = time_adjust) != 0 ) {
540 /* We are doing an adjtime thing.
541 *
542 * Prepare time_adjust_step to be within bounds.
543 * Note that a positive time_adjust means we want the clock
544 * to run faster.
545 *
546 * Limit the amount of the step to be in the range
547 * -tickadj .. +tickadj
548 */
549 if (time_adjust > tickadj)
550 time_adjust_step = tickadj;
551 else if (time_adjust < -tickadj)
552 time_adjust_step = -tickadj;
554 /* Reduce by this step the amount of time left */
555 time_adjust -= time_adjust_step;
556 }
557 xtime.tv_usec += tick + time_adjust_step;
558 /*
559 * Advance the phase, once it gets to one microsecond, then
560 * advance the tick more.
561 */
562 time_phase += time_adj;
563 if (time_phase <= -FINEUSEC) {
564 long ltemp = -time_phase >> SHIFT_SCALE;
565 time_phase += ltemp << SHIFT_SCALE;
566 xtime.tv_usec -= ltemp;
567 }
568 else if (time_phase >= FINEUSEC) {
569 long ltemp = time_phase >> SHIFT_SCALE;
570 time_phase -= ltemp << SHIFT_SCALE;
571 xtime.tv_usec += ltemp;
572 }
573 }
575 /*
576 * Using a loop looks inefficient, but "ticks" is
577 * usually just one (we shouldn't be losing ticks,
578 * we're doing this this way mainly for interrupt
579 * latency reasons, not because we think we'll
580 * have lots of lost timer ticks
581 */
582 static void update_wall_time(unsigned long ticks)
583 {
584 do {
585 ticks--;
586 update_wall_time_one_tick();
587 } while (ticks);
589 while (xtime.tv_usec >= 1000000) {
590 xtime.tv_usec -= 1000000;
591 xtime.tv_sec++;
592 second_overflow();
593 }
594 }
596 static inline void do_process_times(struct task_struct *p,
597 unsigned long user, unsigned long system)
598 {
599 unsigned long psecs;
601 psecs = (p->times.tms_utime += user);
602 psecs += (p->times.tms_stime += system);
603 if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) {
604 /* Send SIGXCPU every second.. */
605 if (!(psecs % HZ))
606 send_sig(SIGXCPU, p, 1);
607 /* and SIGKILL when we go over max.. */
608 if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max)
609 send_sig(SIGKILL, p, 1);
610 }
611 }
613 static inline void do_it_virt(struct task_struct * p, unsigned long ticks)
614 {
615 unsigned long it_virt = p->it_virt_value;
617 if (it_virt) {
618 it_virt -= ticks;
619 if (!it_virt) {
620 it_virt = p->it_virt_incr;
621 send_sig(SIGVTALRM, p, 1);
622 }
623 p->it_virt_value = it_virt;
624 }
625 }
627 static inline void do_it_prof(struct task_struct *p)
628 {
629 unsigned long it_prof = p->it_prof_value;
631 if (it_prof) {
632 if (--it_prof == 0) {
633 it_prof = p->it_prof_incr;
634 send_sig(SIGPROF, p, 1);
635 }
636 p->it_prof_value = it_prof;
637 }
638 }
640 void update_one_process(struct task_struct *p, unsigned long user,
641 unsigned long system, int cpu)
642 {
643 p->per_cpu_utime[cpu] += user;
644 p->per_cpu_stime[cpu] += system;
645 do_process_times(p, user, system);
646 do_it_virt(p, user);
647 do_it_prof(p);
648 }
650 /*
651 * Called from the timer interrupt handler to charge one tick to the current
652 * process. user_tick is 1 if the tick is user time, 0 for system.
653 */
654 void update_process_times(int user_tick)
655 {
656 struct task_struct *p = current;
657 int cpu = smp_processor_id(), system = user_tick ^ 1;
659 update_one_process(p, user_tick, system, cpu);
660 if (p->pid) {
661 if (--p->counter <= 0) {
662 p->counter = 0;
663 /*
664 * SCHED_FIFO is priority preemption, so this is
665 * not the place to decide whether to reschedule a
666 * SCHED_FIFO task or not - Bhavesh Davda
667 */
668 if (p->policy != SCHED_FIFO) {
669 p->need_resched = 1;
670 }
671 }
672 if (p->nice > 0)
673 kstat.per_cpu_nice[cpu] += user_tick;
674 else
675 kstat.per_cpu_user[cpu] += user_tick;
676 kstat.per_cpu_system[cpu] += system;
677 } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
678 kstat.per_cpu_system[cpu] += system;
679 }
681 /*
682 * Called from the timer interrupt handler to charge a couple of ticks
683 * to the current process.
684 */
685 void update_process_times_us(int user_ticks, int system_ticks)
686 {
687 struct task_struct *p = current;
688 int cpu = smp_processor_id();
690 update_one_process(p, user_ticks, system_ticks, cpu);
691 if (p->pid) {
692 p->counter -= user_ticks + system_ticks;
693 if (p->counter <= 0) {
694 p->counter = 0;
695 p->need_resched = 1;
696 }
697 if (p->nice > 0)
698 kstat.per_cpu_nice[cpu] += user_ticks;
699 else
700 kstat.per_cpu_user[cpu] += user_ticks;
701 kstat.per_cpu_system[cpu] += system_ticks;
702 } else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
703 kstat.per_cpu_system[cpu] += system_ticks;
704 }
706 /*
707 * Nr of active tasks - counted in fixed-point numbers
708 */
709 static unsigned long count_active_tasks(void)
710 {
711 struct task_struct *p;
712 unsigned long nr = 0;
714 read_lock(&tasklist_lock);
715 for_each_task(p) {
716 if ((p->state == TASK_RUNNING ||
717 (p->state & TASK_UNINTERRUPTIBLE)))
718 nr += FIXED_1;
719 }
720 read_unlock(&tasklist_lock);
721 return nr;
722 }
724 /*
725 * Hmm.. Changed this, as the GNU make sources (load.c) seems to
726 * imply that avenrun[] is the standard name for this kind of thing.
727 * Nothing else seems to be standardized: the fractional size etc
728 * all seem to differ on different machines.
729 */
730 unsigned long avenrun[3];
732 static inline void calc_load(unsigned long ticks)
733 {
734 unsigned long active_tasks; /* fixed-point */
735 static int count = LOAD_FREQ;
737 count -= ticks;
738 while (count < 0) {
739 count += LOAD_FREQ;
740 active_tasks = count_active_tasks();
741 CALC_LOAD(avenrun[0], EXP_1, active_tasks);
742 CALC_LOAD(avenrun[1], EXP_5, active_tasks);
743 CALC_LOAD(avenrun[2], EXP_15, active_tasks);
744 }
745 }
747 /* jiffies at the most recent update of wall time */
748 unsigned long wall_jiffies;
750 /*
751 * This spinlock protect us from races in SMP while playing with xtime. -arca
752 */
753 rwlock_t xtime_lock = RW_LOCK_UNLOCKED;
755 static inline void update_times(void)
756 {
757 unsigned long ticks;
759 /*
760 * update_times() is run from the raw timer_bh handler so we
761 * just know that the irqs are locally enabled and so we don't
762 * need to save/restore the flags of the local CPU here. -arca
763 */
764 write_lock_irq(&xtime_lock);
765 vxtime_lock();
767 ticks = jiffies - wall_jiffies;
768 if (ticks) {
769 wall_jiffies += ticks;
770 update_wall_time(ticks);
771 }
772 vxtime_unlock();
773 write_unlock_irq(&xtime_lock);
774 calc_load(ticks);
775 }
777 void timer_bh(void)
778 {
779 update_times();
780 run_timer_list();
781 }
783 void do_timer(struct pt_regs *regs)
784 {
785 (*(unsigned long *)&jiffies)++;
786 #ifndef CONFIG_SMP
787 /* SMP process accounting uses the local APIC timer */
789 update_process_times(user_mode(regs));
790 #endif
791 mark_bh(TIMER_BH);
792 if (TQ_ACTIVE(tq_timer))
793 mark_bh(TQUEUE_BH);
794 }
796 void do_timer_ticks(int ticks)
797 {
798 (*(unsigned long *)&jiffies) += ticks;
799 mark_bh(TIMER_BH);
800 if (TQ_ACTIVE(tq_timer))
801 mark_bh(TQUEUE_BH);
802 }
804 #if !defined(__alpha__) && !defined(__ia64__)
806 /*
807 * For backwards compatibility? This can be done in libc so Alpha
808 * and all newer ports shouldn't need it.
809 */
810 asmlinkage unsigned long sys_alarm(unsigned int seconds)
811 {
812 struct itimerval it_new, it_old;
813 unsigned int oldalarm;
815 it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
816 it_new.it_value.tv_sec = seconds;
817 it_new.it_value.tv_usec = 0;
818 do_setitimer(ITIMER_REAL, &it_new, &it_old);
819 oldalarm = it_old.it_value.tv_sec;
820 /* ehhh.. We can't return 0 if we have an alarm pending.. */
821 /* And we'd better return too much than too little anyway */
822 if (it_old.it_value.tv_usec)
823 oldalarm++;
824 return oldalarm;
825 }
827 #endif
829 #ifndef __alpha__
831 /*
832 * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this
833 * should be moved into arch/i386 instead?
834 */
836 /**
837 * sys_getpid - return the thread group id of the current process
838 *
839 * Note, despite the name, this returns the tgid not the pid. The tgid and
840 * the pid are identical unless CLONE_THREAD was specified on clone() in
841 * which case the tgid is the same in all threads of the same group.
842 *
843 * This is SMP safe as current->tgid does not change.
844 */
845 asmlinkage long sys_getpid(void)
846 {
847 return current->tgid;
848 }
850 /*
851 * This is not strictly SMP safe: p_opptr could change
852 * from under us. However, rather than getting any lock
853 * we can use an optimistic algorithm: get the parent
854 * pid, and go back and check that the parent is still
855 * the same. If it has changed (which is extremely unlikely
856 * indeed), we just try again..
857 *
858 * NOTE! This depends on the fact that even if we _do_
859 * get an old value of "parent", we can happily dereference
860 * the pointer: we just can't necessarily trust the result
861 * until we know that the parent pointer is valid.
862 *
863 * The "mb()" macro is a memory barrier - a synchronizing
864 * event. It also makes sure that gcc doesn't optimize
865 * away the necessary memory references.. The barrier doesn't
866 * have to have all that strong semantics: on x86 we don't
867 * really require a synchronizing instruction, for example.
868 * The barrier is more important for code generation than
869 * for any real memory ordering semantics (even if there is
870 * a small window for a race, using the old pointer is
871 * harmless for a while).
872 */
873 asmlinkage long sys_getppid(void)
874 {
875 int pid;
876 struct task_struct * me = current;
877 struct task_struct * parent;
879 parent = me->p_opptr;
880 for (;;) {
881 pid = parent->pid;
882 #if CONFIG_SMP
883 {
884 struct task_struct *old = parent;
885 mb();
886 parent = me->p_opptr;
887 if (old != parent)
888 continue;
889 }
890 #endif
891 break;
892 }
893 return pid;
894 }
896 asmlinkage long sys_getuid(void)
897 {
898 /* Only we change this so SMP safe */
899 return current->uid;
900 }
902 asmlinkage long sys_geteuid(void)
903 {
904 /* Only we change this so SMP safe */
905 return current->euid;
906 }
908 asmlinkage long sys_getgid(void)
909 {
910 /* Only we change this so SMP safe */
911 return current->gid;
912 }
914 asmlinkage long sys_getegid(void)
915 {
916 /* Only we change this so SMP safe */
917 return current->egid;
918 }
920 #endif
922 /* Thread ID - the internal kernel "pid" */
923 asmlinkage long sys_gettid(void)
924 {
925 return current->pid;
926 }
928 asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
929 {
930 struct timespec t;
931 unsigned long expire;
933 if(copy_from_user(&t, rqtp, sizeof(struct timespec)))
934 return -EFAULT;
936 if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
937 return -EINVAL;
940 if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
941 current->policy != SCHED_OTHER)
942 {
943 /*
944 * Short delay requests up to 2 ms will be handled with
945 * high precision by a busy wait for all real-time processes.
946 *
947 * Its important on SMP not to do this holding locks.
948 */
949 udelay((t.tv_nsec + 999) / 1000);
950 return 0;
951 }
953 expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
955 current->state = TASK_INTERRUPTIBLE;
956 expire = schedule_timeout(expire);
958 if (expire) {
959 if (rmtp) {
960 jiffies_to_timespec(expire, &t);
961 if (copy_to_user(rmtp, &t, sizeof(struct timespec)))
962 return -EFAULT;
963 }
964 return -EINTR;
965 }
966 return 0;
967 }