aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2007-02-16 04:28:11 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-16 11:13:59 -0500
commit54cdfdb47f73b5af3d1ebb0f1e383efbe70fde9e (patch)
treee2f76277f6b7546e53c3a1d025e31bceb10bbff5 /kernel
parentd40891e75fc1f646dce57d5d3bd1349a6aaf7a0e (diff)
[PATCH] hrtimers: add high resolution timer support
Implement high resolution timers on top of the hrtimers infrastructure and the clockevents / tick-management framework. This provides accurate timers for all hrtimer subsystem users. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu> Cc: john stultz <johnstul@us.ibm.com> Cc: Roman Zippel <zippel@linux-m68k.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/hrtimer.c568
-rw-r--r--kernel/itimer.c2
-rw-r--r--kernel/posix-timers.c2
-rw-r--r--kernel/time/Kconfig10
4 files changed, 532 insertions, 50 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index e04ef38ea3be..62aad8e1a383 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> 4 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
5 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar 5 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
6 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 6 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
7 * 7 *
8 * High-resolution kernel timers 8 * High-resolution kernel timers
9 * 9 *
@@ -32,13 +32,17 @@
32 */ 32 */
33 33
34#include <linux/cpu.h> 34#include <linux/cpu.h>
35#include <linux/irq.h>
35#include <linux/module.h> 36#include <linux/module.h>
36#include <linux/percpu.h> 37#include <linux/percpu.h>
37#include <linux/hrtimer.h> 38#include <linux/hrtimer.h>
38#include <linux/notifier.h> 39#include <linux/notifier.h>
39#include <linux/syscalls.h> 40#include <linux/syscalls.h>
41#include <linux/kallsyms.h>
40#include <linux/interrupt.h> 42#include <linux/interrupt.h>
41#include <linux/tick.h> 43#include <linux/tick.h>
44#include <linux/seq_file.h>
45#include <linux/err.h>
42 46
43#include <asm/uaccess.h> 47#include <asm/uaccess.h>
44 48
@@ -81,7 +85,7 @@ EXPORT_SYMBOL_GPL(ktime_get_real);
81 * This ensures that we capture erroneous accesses to these clock ids 85 * This ensures that we capture erroneous accesses to these clock ids
82 * rather than moving them into the range of valid clock id's. 86 * rather than moving them into the range of valid clock id's.
83 */ 87 */
84static DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = 88DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
85{ 89{
86 90
87 .clock_base = 91 .clock_base =
@@ -89,12 +93,12 @@ static DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
89 { 93 {
90 .index = CLOCK_REALTIME, 94 .index = CLOCK_REALTIME,
91 .get_time = &ktime_get_real, 95 .get_time = &ktime_get_real,
92 .resolution = KTIME_REALTIME_RES, 96 .resolution = KTIME_LOW_RES,
93 }, 97 },
94 { 98 {
95 .index = CLOCK_MONOTONIC, 99 .index = CLOCK_MONOTONIC,
96 .get_time = &ktime_get, 100 .get_time = &ktime_get,
97 .resolution = KTIME_MONOTONIC_RES, 101 .resolution = KTIME_LOW_RES,
98 }, 102 },
99 } 103 }
100}; 104};
@@ -151,14 +155,6 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
151} 155}
152 156
153/* 157/*
154 * Helper function to check, whether the timer is on one of the queues
155 */
156static inline int hrtimer_is_queued(struct hrtimer *timer)
157{
158 return timer->state & HRTIMER_STATE_ENQUEUED;
159}
160
161/*
162 * Helper function to check, whether the timer is running the callback 158 * Helper function to check, whether the timer is running the callback
163 * function 159 * function
164 */ 160 */
@@ -226,7 +222,7 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
226 * completed. There is no conflict as we hold the lock until 222 * completed. There is no conflict as we hold the lock until
227 * the timer is enqueued. 223 * the timer is enqueued.
228 */ 224 */
229 if (unlikely(timer->state & HRTIMER_STATE_CALLBACK)) 225 if (unlikely(hrtimer_callback_running(timer)))
230 return base; 226 return base;
231 227
232 /* See the comment in lock_timer_base() */ 228 /* See the comment in lock_timer_base() */
@@ -250,7 +246,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
250 return base; 246 return base;
251} 247}
252 248
253#define switch_hrtimer_base(t, b) (b) 249# define switch_hrtimer_base(t, b) (b)
254 250
255#endif /* !CONFIG_SMP */ 251#endif /* !CONFIG_SMP */
256 252
@@ -281,9 +277,6 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
281 277
282 return ktime_add(kt, tmp); 278 return ktime_add(kt, tmp);
283} 279}
284
285#else /* CONFIG_KTIME_SCALAR */
286
287# endif /* !CONFIG_KTIME_SCALAR */ 280# endif /* !CONFIG_KTIME_SCALAR */
288 281
289/* 282/*
@@ -308,6 +301,290 @@ unsigned long ktime_divns(const ktime_t kt, s64 div)
308} 301}
309#endif /* BITS_PER_LONG >= 64 */ 302#endif /* BITS_PER_LONG >= 64 */
310 303
304/* High resolution timer related functions */
305#ifdef CONFIG_HIGH_RES_TIMERS
306
307/*
308 * High resolution timer enabled ?
309 */
310static int hrtimer_hres_enabled __read_mostly = 1;
311
312/*
313 * Enable / Disable high resolution mode
314 */
315static int __init setup_hrtimer_hres(char *str)
316{
317 if (!strcmp(str, "off"))
318 hrtimer_hres_enabled = 0;
319 else if (!strcmp(str, "on"))
320 hrtimer_hres_enabled = 1;
321 else
322 return 0;
323 return 1;
324}
325
326__setup("highres=", setup_hrtimer_hres);
327
328/*
329 * hrtimer_high_res_enabled - query, if the highres mode is enabled
330 */
331static inline int hrtimer_is_hres_enabled(void)
332{
333 return hrtimer_hres_enabled;
334}
335
336/*
337 * Is the high resolution mode active ?
338 */
339static inline int hrtimer_hres_active(void)
340{
341 return __get_cpu_var(hrtimer_bases).hres_active;
342}
343
344/*
345 * Reprogram the event source with checking both queues for the
346 * next event
347 * Called with interrupts disabled and base->lock held
348 */
349static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base)
350{
351 int i;
352 struct hrtimer_clock_base *base = cpu_base->clock_base;
353 ktime_t expires;
354
355 cpu_base->expires_next.tv64 = KTIME_MAX;
356
357 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
358 struct hrtimer *timer;
359
360 if (!base->first)
361 continue;
362 timer = rb_entry(base->first, struct hrtimer, node);
363 expires = ktime_sub(timer->expires, base->offset);
364 if (expires.tv64 < cpu_base->expires_next.tv64)
365 cpu_base->expires_next = expires;
366 }
367
368 if (cpu_base->expires_next.tv64 != KTIME_MAX)
369 tick_program_event(cpu_base->expires_next, 1);
370}
371
372/*
373 * Shared reprogramming for clock_realtime and clock_monotonic
374 *
375 * When a timer is enqueued and expires earlier than the already enqueued
376 * timers, we have to check, whether it expires earlier than the timer for
377 * which the clock event device was armed.
378 *
379 * Called with interrupts disabled and base->cpu_base.lock held
380 */
381static int hrtimer_reprogram(struct hrtimer *timer,
382 struct hrtimer_clock_base *base)
383{
384 ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next;
385 ktime_t expires = ktime_sub(timer->expires, base->offset);
386 int res;
387
388 /*
389 * When the callback is running, we do not reprogram the clock event
390 * device. The timer callback is either running on a different CPU or
391 * the callback is executed in the hrtimer_interupt context. The
392 * reprogramming is handled either by the softirq, which called the
393 * callback or at the end of the hrtimer_interrupt.
394 */
395 if (hrtimer_callback_running(timer))
396 return 0;
397
398 if (expires.tv64 >= expires_next->tv64)
399 return 0;
400
401 /*
402 * Clockevents returns -ETIME, when the event was in the past.
403 */
404 res = tick_program_event(expires, 0);
405 if (!IS_ERR_VALUE(res))
406 *expires_next = expires;
407 return res;
408}
409
410
411/*
412 * Retrigger next event is called after clock was set
413 *
414 * Called with interrupts disabled via on_each_cpu()
415 */
416static void retrigger_next_event(void *arg)
417{
418 struct hrtimer_cpu_base *base;
419 struct timespec realtime_offset;
420 unsigned long seq;
421
422 if (!hrtimer_hres_active())
423 return;
424
425 do {
426 seq = read_seqbegin(&xtime_lock);
427 set_normalized_timespec(&realtime_offset,
428 -wall_to_monotonic.tv_sec,
429 -wall_to_monotonic.tv_nsec);
430 } while (read_seqretry(&xtime_lock, seq));
431
432 base = &__get_cpu_var(hrtimer_bases);
433
434 /* Adjust CLOCK_REALTIME offset */
435 spin_lock(&base->lock);
436 base->clock_base[CLOCK_REALTIME].offset =
437 timespec_to_ktime(realtime_offset);
438
439 hrtimer_force_reprogram(base);
440 spin_unlock(&base->lock);
441}
442
443/*
444 * Clock realtime was set
445 *
446 * Change the offset of the realtime clock vs. the monotonic
447 * clock.
448 *
449 * We might have to reprogram the high resolution timer interrupt. On
450 * SMP we call the architecture specific code to retrigger _all_ high
451 * resolution timer interrupts. On UP we just disable interrupts and
452 * call the high resolution interrupt code.
453 */
454void clock_was_set(void)
455{
456 /* Retrigger the CPU local events everywhere */
457 on_each_cpu(retrigger_next_event, NULL, 0, 1);
458}
459
460/*
461 * Check, whether the timer is on the callback pending list
462 */
463static inline int hrtimer_cb_pending(const struct hrtimer *timer)
464{
465 return timer->state & HRTIMER_STATE_PENDING;
466}
467
468/*
469 * Remove a timer from the callback pending list
470 */
471static inline void hrtimer_remove_cb_pending(struct hrtimer *timer)
472{
473 list_del_init(&timer->cb_entry);
474}
475
476/*
477 * Initialize the high resolution related parts of cpu_base
478 */
479static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
480{
481 base->expires_next.tv64 = KTIME_MAX;
482 base->hres_active = 0;
483 INIT_LIST_HEAD(&base->cb_pending);
484}
485
486/*
487 * Initialize the high resolution related parts of a hrtimer
488 */
489static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
490{
491 INIT_LIST_HEAD(&timer->cb_entry);
492}
493
494/*
495 * When High resolution timers are active, try to reprogram. Note, that in case
496 * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry
497 * check happens. The timer gets enqueued into the rbtree. The reprogramming
498 * and expiry check is done in the hrtimer_interrupt or in the softirq.
499 */
500static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
501 struct hrtimer_clock_base *base)
502{
503 if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
504
505 /* Timer is expired, act upon the callback mode */
506 switch(timer->cb_mode) {
507 case HRTIMER_CB_IRQSAFE_NO_RESTART:
508 /*
509 * We can call the callback from here. No restart
510 * happens, so no danger of recursion
511 */
512 BUG_ON(timer->function(timer) != HRTIMER_NORESTART);
513 return 1;
514 case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ:
515 /*
516 * This is solely for the sched tick emulation with
517 * dynamic tick support to ensure that we do not
518 * restart the tick right on the edge and end up with
519 * the tick timer in the softirq ! The calling site
520 * takes care of this.
521 */
522 return 1;
523 case HRTIMER_CB_IRQSAFE:
524 case HRTIMER_CB_SOFTIRQ:
525 /*
526 * Move everything else into the softirq pending list !
527 */
528 list_add_tail(&timer->cb_entry,
529 &base->cpu_base->cb_pending);
530 timer->state = HRTIMER_STATE_PENDING;
531 raise_softirq(HRTIMER_SOFTIRQ);
532 return 1;
533 default:
534 BUG();
535 }
536 }
537 return 0;
538}
539
540/*
541 * Switch to high resolution mode
542 */
543static void hrtimer_switch_to_hres(void)
544{
545 struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
546 unsigned long flags;
547
548 if (base->hres_active)
549 return;
550
551 local_irq_save(flags);
552
553 if (tick_init_highres()) {
554 local_irq_restore(flags);
555 return;
556 }
557 base->hres_active = 1;
558 base->clock_base[CLOCK_REALTIME].resolution = KTIME_HIGH_RES;
559 base->clock_base[CLOCK_MONOTONIC].resolution = KTIME_HIGH_RES;
560
561 tick_setup_sched_timer();
562
563 /* "Retrigger" the interrupt to get things going */
564 retrigger_next_event(NULL);
565 local_irq_restore(flags);
566 printk(KERN_INFO "Switched to high resolution mode on CPU %d\n",
567 smp_processor_id());
568}
569
570#else
571
572static inline int hrtimer_hres_active(void) { return 0; }
573static inline int hrtimer_is_hres_enabled(void) { return 0; }
574static inline void hrtimer_switch_to_hres(void) { }
575static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { }
576static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
577 struct hrtimer_clock_base *base)
578{
579 return 0;
580}
581static inline int hrtimer_cb_pending(struct hrtimer *timer) { return 0; }
582static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) { }
583static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
584static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
585
586#endif /* CONFIG_HIGH_RES_TIMERS */
587
311/* 588/*
312 * Counterpart to lock_timer_base above: 589 * Counterpart to lock_timer_base above:
313 */ 590 */
@@ -365,7 +642,7 @@ hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
365 * red black tree is O(log(n)). Must hold the base lock. 642 * red black tree is O(log(n)). Must hold the base lock.
366 */ 643 */
367static void enqueue_hrtimer(struct hrtimer *timer, 644static void enqueue_hrtimer(struct hrtimer *timer,
368 struct hrtimer_clock_base *base) 645 struct hrtimer_clock_base *base, int reprogram)
369{ 646{
370 struct rb_node **link = &base->active.rb_node; 647 struct rb_node **link = &base->active.rb_node;
371 struct rb_node *parent = NULL; 648 struct rb_node *parent = NULL;
@@ -391,6 +668,22 @@ static void enqueue_hrtimer(struct hrtimer *timer,
391 * Insert the timer to the rbtree and check whether it 668 * Insert the timer to the rbtree and check whether it
392 * replaces the first pending timer 669 * replaces the first pending timer
393 */ 670 */
671 if (!base->first || timer->expires.tv64 <
672 rb_entry(base->first, struct hrtimer, node)->expires.tv64) {
673 /*
674 * Reprogram the clock event device. When the timer is already
675 * expired hrtimer_enqueue_reprogram has either called the
676 * callback or added it to the pending list and raised the
677 * softirq.
678 *
679 * This is a NOP for !HIGHRES
680 */
681 if (reprogram && hrtimer_enqueue_reprogram(timer, base))
682 return;
683
684 base->first = &timer->node;
685 }
686
394 rb_link_node(&timer->node, parent, link); 687 rb_link_node(&timer->node, parent, link);
395 rb_insert_color(&timer->node, &base->active); 688 rb_insert_color(&timer->node, &base->active);
396 /* 689 /*
@@ -398,28 +691,38 @@ static void enqueue_hrtimer(struct hrtimer *timer,
398 * state of a possibly running callback. 691 * state of a possibly running callback.
399 */ 692 */
400 timer->state |= HRTIMER_STATE_ENQUEUED; 693 timer->state |= HRTIMER_STATE_ENQUEUED;
401
402 if (!base->first || timer->expires.tv64 <
403 rb_entry(base->first, struct hrtimer, node)->expires.tv64)
404 base->first = &timer->node;
405} 694}
406 695
407/* 696/*
408 * __remove_hrtimer - internal function to remove a timer 697 * __remove_hrtimer - internal function to remove a timer
409 * 698 *
410 * Caller must hold the base lock. 699 * Caller must hold the base lock.
700 *
701 * High resolution timer mode reprograms the clock event device when the
702 * timer is the one which expires next. The caller can disable this by setting
703 * reprogram to zero. This is useful, when the context does a reprogramming
704 * anyway (e.g. timer interrupt)
411 */ 705 */
412static void __remove_hrtimer(struct hrtimer *timer, 706static void __remove_hrtimer(struct hrtimer *timer,
413 struct hrtimer_clock_base *base, 707 struct hrtimer_clock_base *base,
414 unsigned long newstate) 708 unsigned long newstate, int reprogram)
415{ 709{
416 /* 710 /* High res. callback list. NOP for !HIGHRES */
417 * Remove the timer from the rbtree and replace the 711 if (hrtimer_cb_pending(timer))
418 * first entry pointer if necessary. 712 hrtimer_remove_cb_pending(timer);
419 */ 713 else {
420 if (base->first == &timer->node) 714 /*
421 base->first = rb_next(&timer->node); 715 * Remove the timer from the rbtree and replace the
422 rb_erase(&timer->node, &base->active); 716 * first entry pointer if necessary.
717 */
718 if (base->first == &timer->node) {
719 base->first = rb_next(&timer->node);
720 /* Reprogram the clock event device. if enabled */
721 if (reprogram && hrtimer_hres_active())
722 hrtimer_force_reprogram(base->cpu_base);
723 }
724 rb_erase(&timer->node, &base->active);
725 }
423 timer->state = newstate; 726 timer->state = newstate;
424} 727}
425 728
@@ -430,7 +733,19 @@ static inline int
430remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) 733remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
431{ 734{
432 if (hrtimer_is_queued(timer)) { 735 if (hrtimer_is_queued(timer)) {
433 __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE); 736 int reprogram;
737
738 /*
739 * Remove the timer and force reprogramming when high
740 * resolution mode is active and the timer is on the current
741 * CPU. If we remove a timer on another CPU, reprogramming is
742 * skipped. The interrupt event on this CPU is fired and
743 * reprogramming happens in the interrupt handler. This is a
744 * rare case and less expensive than a smp call.
745 */
746 reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases);
747 __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE,
748 reprogram);
434 return 1; 749 return 1;
435 } 750 }
436 return 0; 751 return 0;
@@ -476,7 +791,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
476 } 791 }
477 timer->expires = tim; 792 timer->expires = tim;
478 793
479 enqueue_hrtimer(timer, new_base); 794 enqueue_hrtimer(timer, new_base, base == new_base);
480 795
481 unlock_hrtimer_base(timer, &flags); 796 unlock_hrtimer_base(timer, &flags);
482 797
@@ -567,17 +882,19 @@ ktime_t hrtimer_get_next_event(void)
567 882
568 spin_lock_irqsave(&cpu_base->lock, flags); 883 spin_lock_irqsave(&cpu_base->lock, flags);
569 884
570 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { 885 if (!hrtimer_hres_active()) {
571 struct hrtimer *timer; 886 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
887 struct hrtimer *timer;
572 888
573 if (!base->first) 889 if (!base->first)
574 continue; 890 continue;
575 891
576 timer = rb_entry(base->first, struct hrtimer, node); 892 timer = rb_entry(base->first, struct hrtimer, node);
577 delta.tv64 = timer->expires.tv64; 893 delta.tv64 = timer->expires.tv64;
578 delta = ktime_sub(delta, base->get_time()); 894 delta = ktime_sub(delta, base->get_time());
579 if (delta.tv64 < mindelta.tv64) 895 if (delta.tv64 < mindelta.tv64)
580 mindelta.tv64 = delta.tv64; 896 mindelta.tv64 = delta.tv64;
897 }
581 } 898 }
582 899
583 spin_unlock_irqrestore(&cpu_base->lock, flags); 900 spin_unlock_irqrestore(&cpu_base->lock, flags);
@@ -607,6 +924,7 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
607 clock_id = CLOCK_MONOTONIC; 924 clock_id = CLOCK_MONOTONIC;
608 925
609 timer->base = &cpu_base->clock_base[clock_id]; 926 timer->base = &cpu_base->clock_base[clock_id];
927 hrtimer_init_timer_hres(timer);
610} 928}
611EXPORT_SYMBOL_GPL(hrtimer_init); 929EXPORT_SYMBOL_GPL(hrtimer_init);
612 930
@@ -629,6 +947,139 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
629} 947}
630EXPORT_SYMBOL_GPL(hrtimer_get_res); 948EXPORT_SYMBOL_GPL(hrtimer_get_res);
631 949
950#ifdef CONFIG_HIGH_RES_TIMERS
951
952/*
953 * High resolution timer interrupt
954 * Called with interrupts disabled
955 */
956void hrtimer_interrupt(struct clock_event_device *dev)
957{
958 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
959 struct hrtimer_clock_base *base;
960 ktime_t expires_next, now;
961 int i, raise = 0;
962
963 BUG_ON(!cpu_base->hres_active);
964 cpu_base->nr_events++;
965 dev->next_event.tv64 = KTIME_MAX;
966
967 retry:
968 now = ktime_get();
969
970 expires_next.tv64 = KTIME_MAX;
971
972 base = cpu_base->clock_base;
973
974 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
975 ktime_t basenow;
976 struct rb_node *node;
977
978 spin_lock(&cpu_base->lock);
979
980 basenow = ktime_add(now, base->offset);
981
982 while ((node = base->first)) {
983 struct hrtimer *timer;
984
985 timer = rb_entry(node, struct hrtimer, node);
986
987 if (basenow.tv64 < timer->expires.tv64) {
988 ktime_t expires;
989
990 expires = ktime_sub(timer->expires,
991 base->offset);
992 if (expires.tv64 < expires_next.tv64)
993 expires_next = expires;
994 break;
995 }
996
997 /* Move softirq callbacks to the pending list */
998 if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
999 __remove_hrtimer(timer, base,
1000 HRTIMER_STATE_PENDING, 0);
1001 list_add_tail(&timer->cb_entry,
1002 &base->cpu_base->cb_pending);
1003 raise = 1;
1004 continue;
1005 }
1006
1007 __remove_hrtimer(timer, base,
1008 HRTIMER_STATE_CALLBACK, 0);
1009
1010 /*
1011 * Note: We clear the CALLBACK bit after
1012 * enqueue_hrtimer to avoid reprogramming of
1013 * the event hardware. This happens at the end
1014 * of this function anyway.
1015 */
1016 if (timer->function(timer) != HRTIMER_NORESTART) {
1017 BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
1018 enqueue_hrtimer(timer, base, 0);
1019 }
1020 timer->state &= ~HRTIMER_STATE_CALLBACK;
1021 }
1022 spin_unlock(&cpu_base->lock);
1023 base++;
1024 }
1025
1026 cpu_base->expires_next = expires_next;
1027
1028 /* Reprogramming necessary ? */
1029 if (expires_next.tv64 != KTIME_MAX) {
1030 if (tick_program_event(expires_next, 0))
1031 goto retry;
1032 }
1033
1034 /* Raise softirq ? */
1035 if (raise)
1036 raise_softirq(HRTIMER_SOFTIRQ);
1037}
1038
1039static void run_hrtimer_softirq(struct softirq_action *h)
1040{
1041 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1042
1043 spin_lock_irq(&cpu_base->lock);
1044
1045 while (!list_empty(&cpu_base->cb_pending)) {
1046 enum hrtimer_restart (*fn)(struct hrtimer *);
1047 struct hrtimer *timer;
1048 int restart;
1049
1050 timer = list_entry(cpu_base->cb_pending.next,
1051 struct hrtimer, cb_entry);
1052
1053 fn = timer->function;
1054 __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
1055 spin_unlock_irq(&cpu_base->lock);
1056
1057 restart = fn(timer);
1058
1059 spin_lock_irq(&cpu_base->lock);
1060
1061 timer->state &= ~HRTIMER_STATE_CALLBACK;
1062 if (restart == HRTIMER_RESTART) {
1063 BUG_ON(hrtimer_active(timer));
1064 /*
1065 * Enqueue the timer, allow reprogramming of the event
1066 * device
1067 */
1068 enqueue_hrtimer(timer, timer->base, 1);
1069 } else if (hrtimer_active(timer)) {
1070 /*
1071 * If the timer was rearmed on another CPU, reprogram
1072 * the event device.
1073 */
1074 if (timer->base->first == &timer->node)
1075 hrtimer_reprogram(timer, timer->base);
1076 }
1077 }
1078 spin_unlock_irq(&cpu_base->lock);
1079}
1080
1081#endif /* CONFIG_HIGH_RES_TIMERS */
1082
632/* 1083/*
633 * Expire the per base hrtimer-queue: 1084 * Expire the per base hrtimer-queue:
634 */ 1085 */
@@ -656,7 +1107,7 @@ static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
656 break; 1107 break;
657 1108
658 fn = timer->function; 1109 fn = timer->function;
659 __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK); 1110 __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
660 spin_unlock_irq(&cpu_base->lock); 1111 spin_unlock_irq(&cpu_base->lock);
661 1112
662 restart = fn(timer); 1113 restart = fn(timer);
@@ -666,7 +1117,7 @@ static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
666 timer->state &= ~HRTIMER_STATE_CALLBACK; 1117 timer->state &= ~HRTIMER_STATE_CALLBACK;
667 if (restart != HRTIMER_NORESTART) { 1118 if (restart != HRTIMER_NORESTART) {
668 BUG_ON(hrtimer_active(timer)); 1119 BUG_ON(hrtimer_active(timer));
669 enqueue_hrtimer(timer, base); 1120 enqueue_hrtimer(timer, base, 0);
670 } 1121 }
671 } 1122 }
672 spin_unlock_irq(&cpu_base->lock); 1123 spin_unlock_irq(&cpu_base->lock);
@@ -674,12 +1125,19 @@ static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
674 1125
675/* 1126/*
676 * Called from timer softirq every jiffy, expire hrtimers: 1127 * Called from timer softirq every jiffy, expire hrtimers:
1128 *
1129 * For HRT its the fall back code to run the softirq in the timer
1130 * softirq context in case the hrtimer initialization failed or has
1131 * not been done yet.
677 */ 1132 */
678void hrtimer_run_queues(void) 1133void hrtimer_run_queues(void)
679{ 1134{
680 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); 1135 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
681 int i; 1136 int i;
682 1137
1138 if (hrtimer_hres_active())
1139 return;
1140
683 /* 1141 /*
684 * This _is_ ugly: We have to check in the softirq context, 1142 * This _is_ ugly: We have to check in the softirq context,
685 * whether we can switch to highres and / or nohz mode. The 1143 * whether we can switch to highres and / or nohz mode. The
@@ -688,7 +1146,8 @@ void hrtimer_run_queues(void)
688 * check bit in the tick_oneshot code, otherwise we might 1146 * check bit in the tick_oneshot code, otherwise we might
689 * deadlock vs. xtime_lock. 1147 * deadlock vs. xtime_lock.
690 */ 1148 */
691 tick_check_oneshot_change(1); 1149 if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
1150 hrtimer_switch_to_hres();
692 1151
693 hrtimer_get_softirq_time(cpu_base); 1152 hrtimer_get_softirq_time(cpu_base);
694 1153
@@ -716,6 +1175,9 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
716{ 1175{
717 sl->timer.function = hrtimer_wakeup; 1176 sl->timer.function = hrtimer_wakeup;
718 sl->task = task; 1177 sl->task = task;
1178#ifdef CONFIG_HIGH_RES_TIMERS
1179 sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_RESTART;
1180#endif
719} 1181}
720 1182
721static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) 1183static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
@@ -726,7 +1188,8 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
726 set_current_state(TASK_INTERRUPTIBLE); 1188 set_current_state(TASK_INTERRUPTIBLE);
727 hrtimer_start(&t->timer, t->timer.expires, mode); 1189 hrtimer_start(&t->timer, t->timer.expires, mode);
728 1190
729 schedule(); 1191 if (likely(t->task))
1192 schedule();
730 1193
731 hrtimer_cancel(&t->timer); 1194 hrtimer_cancel(&t->timer);
732 mode = HRTIMER_MODE_ABS; 1195 mode = HRTIMER_MODE_ABS;
@@ -831,6 +1294,7 @@ static void __devinit init_hrtimers_cpu(int cpu)
831 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) 1294 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
832 cpu_base->clock_base[i].cpu_base = cpu_base; 1295 cpu_base->clock_base[i].cpu_base = cpu_base;
833 1296
1297 hrtimer_init_hres(cpu_base);
834} 1298}
835 1299
836#ifdef CONFIG_HOTPLUG_CPU 1300#ifdef CONFIG_HOTPLUG_CPU
@@ -843,10 +1307,13 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
843 1307
844 while ((node = rb_first(&old_base->active))) { 1308 while ((node = rb_first(&old_base->active))) {
845 timer = rb_entry(node, struct hrtimer, node); 1309 timer = rb_entry(node, struct hrtimer, node);
846 BUG_ON(timer->state & HRTIMER_STATE_CALLBACK); 1310 BUG_ON(hrtimer_callback_running(timer));
847 __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE); 1311 __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0);
848 timer->base = new_base; 1312 timer->base = new_base;
849 enqueue_hrtimer(timer, new_base); 1313 /*
1314 * Enqueue the timer. Allow reprogramming of the event device
1315 */
1316 enqueue_hrtimer(timer, new_base, 1);
850 } 1317 }
851} 1318}
852 1319
@@ -859,6 +1326,8 @@ static void migrate_hrtimers(int cpu)
859 old_base = &per_cpu(hrtimer_bases, cpu); 1326 old_base = &per_cpu(hrtimer_bases, cpu);
860 new_base = &get_cpu_var(hrtimer_bases); 1327 new_base = &get_cpu_var(hrtimer_bases);
861 1328
1329 tick_cancel_sched_timer(cpu);
1330
862 local_irq_disable(); 1331 local_irq_disable();
863 1332
864 spin_lock(&new_base->lock); 1333 spin_lock(&new_base->lock);
@@ -910,5 +1379,8 @@ void __init hrtimers_init(void)
910 hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, 1379 hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
911 (void *)(long)smp_processor_id()); 1380 (void *)(long)smp_processor_id());
912 register_cpu_notifier(&hrtimers_nb); 1381 register_cpu_notifier(&hrtimers_nb);
1382#ifdef CONFIG_HIGH_RES_TIMERS
1383 open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL);
1384#endif
913} 1385}
914 1386
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 9cefe1d1eb13..4fc6c0caf5d4 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -136,7 +136,7 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
136 send_group_sig_info(SIGALRM, SEND_SIG_PRIV, sig->tsk); 136 send_group_sig_info(SIGALRM, SEND_SIG_PRIV, sig->tsk);
137 137
138 if (sig->it_real_incr.tv64 != 0) { 138 if (sig->it_real_incr.tv64 != 0) {
139 hrtimer_forward(timer, timer->base->softirq_time, 139 hrtimer_forward(timer, hrtimer_cb_get_time(timer),
140 sig->it_real_incr); 140 sig->it_real_incr);
141 return HRTIMER_RESTART; 141 return HRTIMER_RESTART;
142 } 142 }
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 210f462c650e..44318ca71978 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -356,7 +356,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
356 if (timr->it.real.interval.tv64 != 0) { 356 if (timr->it.real.interval.tv64 != 0) {
357 timr->it_overrun += 357 timr->it_overrun +=
358 hrtimer_forward(timer, 358 hrtimer_forward(timer,
359 timer->base->softirq_time, 359 hrtimer_cb_get_time(timer),
360 timr->it.real.interval); 360 timr->it.real.interval);
361 ret = HRTIMER_RESTART; 361 ret = HRTIMER_RESTART;
362 ++timr->it_requeue_pending; 362 ++timr->it_requeue_pending;
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 9ec54eb3667f..f66351126544 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -13,3 +13,13 @@ config NO_HZ
13 This option enables a tickless system: timer interrupts will 13 This option enables a tickless system: timer interrupts will
14 only trigger on an as-needed basis both when the system is 14 only trigger on an as-needed basis both when the system is
15 busy and when the system is idle. 15 busy and when the system is idle.
16
17config HIGH_RES_TIMERS
18 bool "High Resolution Timer Support"
19 depends on GENERIC_TIME && GENERIC_CLOCKEVENTS
20 select TICK_ONESHOT
21 help
22 This option enables high resolution timer support. If your
23 hardware is not capable then this option only increases
24 the size of the kernel image.
25