aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/timer.c
diff options
context:
space:
mode:
authorJeff Garzik <jeff@garzik.org>2007-02-17 15:11:43 -0500
committerJeff Garzik <jeff@garzik.org>2007-02-17 15:11:43 -0500
commitf630fe2817601314b2eb7ca5ddc23c7834646731 (patch)
tree3bfb4939b7bbc3859575ca8b58fa3f929b015941 /kernel/timer.c
parent48c871c1f6a7c7044dd76774fb469e65c7e2e4e8 (diff)
parent8a03d9a498eaf02c8a118752050a5154852c13bf (diff)
Merge branch 'master' into upstream
Diffstat (limited to 'kernel/timer.c')
-rw-r--r--kernel/timer.c290
1 files changed, 186 insertions, 104 deletions
diff --git a/kernel/timer.c b/kernel/timer.c
index 8533c3796082..cb1b86a9c52f 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -34,6 +34,8 @@
34#include <linux/cpu.h> 34#include <linux/cpu.h>
35#include <linux/syscalls.h> 35#include <linux/syscalls.h>
36#include <linux/delay.h> 36#include <linux/delay.h>
37#include <linux/tick.h>
38#include <linux/kallsyms.h>
37 39
38#include <asm/uaccess.h> 40#include <asm/uaccess.h>
39#include <asm/unistd.h> 41#include <asm/unistd.h>
@@ -262,6 +264,18 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
262 list_add_tail(&timer->entry, vec); 264 list_add_tail(&timer->entry, vec);
263} 265}
264 266
267#ifdef CONFIG_TIMER_STATS
268void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
269{
270 if (timer->start_site)
271 return;
272
273 timer->start_site = addr;
274 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
275 timer->start_pid = current->pid;
276}
277#endif
278
265/** 279/**
266 * init_timer - initialize a timer. 280 * init_timer - initialize a timer.
267 * @timer: the timer to be initialized 281 * @timer: the timer to be initialized
@@ -273,11 +287,16 @@ void fastcall init_timer(struct timer_list *timer)
273{ 287{
274 timer->entry.next = NULL; 288 timer->entry.next = NULL;
275 timer->base = __raw_get_cpu_var(tvec_bases); 289 timer->base = __raw_get_cpu_var(tvec_bases);
290#ifdef CONFIG_TIMER_STATS
291 timer->start_site = NULL;
292 timer->start_pid = -1;
293 memset(timer->start_comm, 0, TASK_COMM_LEN);
294#endif
276} 295}
277EXPORT_SYMBOL(init_timer); 296EXPORT_SYMBOL(init_timer);
278 297
279static inline void detach_timer(struct timer_list *timer, 298static inline void detach_timer(struct timer_list *timer,
280 int clear_pending) 299 int clear_pending)
281{ 300{
282 struct list_head *entry = &timer->entry; 301 struct list_head *entry = &timer->entry;
283 302
@@ -324,6 +343,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
324 unsigned long flags; 343 unsigned long flags;
325 int ret = 0; 344 int ret = 0;
326 345
346 timer_stats_timer_set_start_info(timer);
327 BUG_ON(!timer->function); 347 BUG_ON(!timer->function);
328 348
329 base = lock_timer_base(timer, &flags); 349 base = lock_timer_base(timer, &flags);
@@ -374,6 +394,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
374 tvec_base_t *base = per_cpu(tvec_bases, cpu); 394 tvec_base_t *base = per_cpu(tvec_bases, cpu);
375 unsigned long flags; 395 unsigned long flags;
376 396
397 timer_stats_timer_set_start_info(timer);
377 BUG_ON(timer_pending(timer) || !timer->function); 398 BUG_ON(timer_pending(timer) || !timer->function);
378 spin_lock_irqsave(&base->lock, flags); 399 spin_lock_irqsave(&base->lock, flags);
379 timer->base = base; 400 timer->base = base;
@@ -406,6 +427,7 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
406{ 427{
407 BUG_ON(!timer->function); 428 BUG_ON(!timer->function);
408 429
430 timer_stats_timer_set_start_info(timer);
409 /* 431 /*
410 * This is a common optimization triggered by the 432 * This is a common optimization triggered by the
411 * networking code - if the timer is re-modified 433 * networking code - if the timer is re-modified
@@ -436,6 +458,7 @@ int del_timer(struct timer_list *timer)
436 unsigned long flags; 458 unsigned long flags;
437 int ret = 0; 459 int ret = 0;
438 460
461 timer_stats_timer_clear_start_info(timer);
439 if (timer_pending(timer)) { 462 if (timer_pending(timer)) {
440 base = lock_timer_base(timer, &flags); 463 base = lock_timer_base(timer, &flags);
441 if (timer_pending(timer)) { 464 if (timer_pending(timer)) {
@@ -569,6 +592,8 @@ static inline void __run_timers(tvec_base_t *base)
569 fn = timer->function; 592 fn = timer->function;
570 data = timer->data; 593 data = timer->data;
571 594
595 timer_stats_account_timer(timer);
596
572 set_running_timer(base, timer); 597 set_running_timer(base, timer);
573 detach_timer(timer, 1); 598 detach_timer(timer, 1);
574 spin_unlock_irq(&base->lock); 599 spin_unlock_irq(&base->lock);
@@ -591,105 +616,124 @@ static inline void __run_timers(tvec_base_t *base)
591 spin_unlock_irq(&base->lock); 616 spin_unlock_irq(&base->lock);
592} 617}
593 618
594#ifdef CONFIG_NO_IDLE_HZ 619#if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ)
595/* 620/*
596 * Find out when the next timer event is due to happen. This 621 * Find out when the next timer event is due to happen. This
597 * is used on S/390 to stop all activity when a cpus is idle. 622 * is used on S/390 to stop all activity when a cpus is idle.
598 * This functions needs to be called disabled. 623 * This functions needs to be called disabled.
599 */ 624 */
600unsigned long next_timer_interrupt(void) 625static unsigned long __next_timer_interrupt(tvec_base_t *base)
601{ 626{
602 tvec_base_t *base; 627 unsigned long timer_jiffies = base->timer_jiffies;
603 struct list_head *list; 628 unsigned long expires = timer_jiffies + (LONG_MAX >> 1);
629 int index, slot, array, found = 0;
604 struct timer_list *nte; 630 struct timer_list *nte;
605 unsigned long expires;
606 unsigned long hr_expires = MAX_JIFFY_OFFSET;
607 ktime_t hr_delta;
608 tvec_t *varray[4]; 631 tvec_t *varray[4];
609 int i, j;
610
611 hr_delta = hrtimer_get_next_event();
612 if (hr_delta.tv64 != KTIME_MAX) {
613 struct timespec tsdelta;
614 tsdelta = ktime_to_timespec(hr_delta);
615 hr_expires = timespec_to_jiffies(&tsdelta);
616 if (hr_expires < 3)
617 return hr_expires + jiffies;
618 }
619 hr_expires += jiffies;
620
621 base = __get_cpu_var(tvec_bases);
622 spin_lock(&base->lock);
623 expires = base->timer_jiffies + (LONG_MAX >> 1);
624 list = NULL;
625 632
626 /* Look for timer events in tv1. */ 633 /* Look for timer events in tv1. */
627 j = base->timer_jiffies & TVR_MASK; 634 index = slot = timer_jiffies & TVR_MASK;
628 do { 635 do {
629 list_for_each_entry(nte, base->tv1.vec + j, entry) { 636 list_for_each_entry(nte, base->tv1.vec + slot, entry) {
637 found = 1;
630 expires = nte->expires; 638 expires = nte->expires;
631 if (j < (base->timer_jiffies & TVR_MASK)) 639 /* Look at the cascade bucket(s)? */
632 list = base->tv2.vec + (INDEX(0)); 640 if (!index || slot < index)
633 goto found; 641 goto cascade;
642 return expires;
634 } 643 }
635 j = (j + 1) & TVR_MASK; 644 slot = (slot + 1) & TVR_MASK;
636 } while (j != (base->timer_jiffies & TVR_MASK)); 645 } while (slot != index);
646
647cascade:
648 /* Calculate the next cascade event */
649 if (index)
650 timer_jiffies += TVR_SIZE - index;
651 timer_jiffies >>= TVR_BITS;
637 652
638 /* Check tv2-tv5. */ 653 /* Check tv2-tv5. */
639 varray[0] = &base->tv2; 654 varray[0] = &base->tv2;
640 varray[1] = &base->tv3; 655 varray[1] = &base->tv3;
641 varray[2] = &base->tv4; 656 varray[2] = &base->tv4;
642 varray[3] = &base->tv5; 657 varray[3] = &base->tv5;
643 for (i = 0; i < 4; i++) { 658
644 j = INDEX(i); 659 for (array = 0; array < 4; array++) {
660 tvec_t *varp = varray[array];
661
662 index = slot = timer_jiffies & TVN_MASK;
645 do { 663 do {
646 if (list_empty(varray[i]->vec + j)) { 664 list_for_each_entry(nte, varp->vec + slot, entry) {
647 j = (j + 1) & TVN_MASK; 665 found = 1;
648 continue;
649 }
650 list_for_each_entry(nte, varray[i]->vec + j, entry)
651 if (time_before(nte->expires, expires)) 666 if (time_before(nte->expires, expires))
652 expires = nte->expires; 667 expires = nte->expires;
653 if (j < (INDEX(i)) && i < 3) 668 }
654 list = varray[i + 1]->vec + (INDEX(i + 1)); 669 /*
655 goto found; 670 * Do we still search for the first timer or are
656 } while (j != (INDEX(i))); 671 * we looking up the cascade buckets ?
657 } 672 */
658found: 673 if (found) {
659 if (list) { 674 /* Look at the cascade bucket(s)? */
660 /* 675 if (!index || slot < index)
661 * The search wrapped. We need to look at the next list 676 break;
662 * from next tv element that would cascade into tv element 677 return expires;
663 * where we found the timer element. 678 }
664 */ 679 slot = (slot + 1) & TVN_MASK;
665 list_for_each_entry(nte, list, entry) { 680 } while (slot != index);
666 if (time_before(nte->expires, expires)) 681
667 expires = nte->expires; 682 if (index)
668 } 683 timer_jiffies += TVN_SIZE - index;
684 timer_jiffies >>= TVN_BITS;
669 } 685 }
670 spin_unlock(&base->lock); 686 return expires;
687}
671 688
672 /* 689/*
673 * It can happen that other CPUs service timer IRQs and increment 690 * Check, if the next hrtimer event is before the next timer wheel
674 * jiffies, but we have not yet got a local timer tick to process 691 * event:
675 * the timer wheels. In that case, the expiry time can be before 692 */
676 * jiffies, but since the high-resolution timer here is relative to 693static unsigned long cmp_next_hrtimer_event(unsigned long now,
677 * jiffies, the default expression when high-resolution timers are 694 unsigned long expires)
678 * not active, 695{
679 * 696 ktime_t hr_delta = hrtimer_get_next_event();
680 * time_before(MAX_JIFFY_OFFSET + jiffies, expires) 697 struct timespec tsdelta;
681 * 698
682 * would falsely evaluate to true. If that is the case, just 699 if (hr_delta.tv64 == KTIME_MAX)
683 * return jiffies so that we can immediately fire the local timer 700 return expires;
684 */
685 if (time_before(expires, jiffies))
686 return jiffies;
687 701
688 if (time_before(hr_expires, expires)) 702 if (hr_delta.tv64 <= TICK_NSEC)
689 return hr_expires; 703 return now;
690 704
705 tsdelta = ktime_to_timespec(hr_delta);
706 now += timespec_to_jiffies(&tsdelta);
707 if (time_before(now, expires))
708 return now;
691 return expires; 709 return expires;
692} 710}
711
712/**
713 * next_timer_interrupt - return the jiffy of the next pending timer
714 */
715unsigned long get_next_timer_interrupt(unsigned long now)
716{
717 tvec_base_t *base = __get_cpu_var(tvec_bases);
718 unsigned long expires;
719
720 spin_lock(&base->lock);
721 expires = __next_timer_interrupt(base);
722 spin_unlock(&base->lock);
723
724 if (time_before_eq(expires, now))
725 return now;
726
727 return cmp_next_hrtimer_event(now, expires);
728}
729
730#ifdef CONFIG_NO_IDLE_HZ
731unsigned long next_timer_interrupt(void)
732{
733 return get_next_timer_interrupt(jiffies);
734}
735#endif
736
693#endif 737#endif
694 738
695/******************************************************************/ 739/******************************************************************/
@@ -832,32 +876,35 @@ EXPORT_SYMBOL(do_settimeofday);
832 * 876 *
833 * Accumulates current time interval and initializes new clocksource 877 * Accumulates current time interval and initializes new clocksource
834 */ 878 */
835static int change_clocksource(void) 879static void change_clocksource(void)
836{ 880{
837 struct clocksource *new; 881 struct clocksource *new;
838 cycle_t now; 882 cycle_t now;
839 u64 nsec; 883 u64 nsec;
884
840 new = clocksource_get_next(); 885 new = clocksource_get_next();
841 if (clock != new) { 886
842 now = clocksource_read(new); 887 if (clock == new)
843 nsec = __get_nsec_offset(); 888 return;
844 timespec_add_ns(&xtime, nsec); 889
845 890 now = clocksource_read(new);
846 clock = new; 891 nsec = __get_nsec_offset();
847 clock->cycle_last = now; 892 timespec_add_ns(&xtime, nsec);
848 printk(KERN_INFO "Time: %s clocksource has been installed.\n", 893
849 clock->name); 894 clock = new;
850 return 1; 895 clock->cycle_last = now;
851 } else if (clock->update_callback) { 896
852 return clock->update_callback(); 897 clock->error = 0;
853 } 898 clock->xtime_nsec = 0;
854 return 0; 899 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
900
901 tick_clock_notify();
902
903 printk(KERN_INFO "Time: %s clocksource has been installed.\n",
904 clock->name);
855} 905}
856#else 906#else
857static inline int change_clocksource(void) 907static inline void change_clocksource(void) { }
858{
859 return 0;
860}
861#endif 908#endif
862 909
863/** 910/**
@@ -871,33 +918,56 @@ int timekeeping_is_continuous(void)
871 do { 918 do {
872 seq = read_seqbegin(&xtime_lock); 919 seq = read_seqbegin(&xtime_lock);
873 920
874 ret = clock->is_continuous; 921 ret = clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
875 922
876 } while (read_seqretry(&xtime_lock, seq)); 923 } while (read_seqretry(&xtime_lock, seq));
877 924
878 return ret; 925 return ret;
879} 926}
880 927
928/**
929 * read_persistent_clock - Return time in seconds from the persistent clock.
930 *
931 * Weak dummy function for arches that do not yet support it.
932 * Returns seconds from epoch using the battery backed persistent clock.
933 * Returns zero if unsupported.
934 *
935 * XXX - Do be sure to remove it once all arches implement it.
936 */
937unsigned long __attribute__((weak)) read_persistent_clock(void)
938{
939 return 0;
940}
941
881/* 942/*
882 * timekeeping_init - Initializes the clocksource and common timekeeping values 943 * timekeeping_init - Initializes the clocksource and common timekeeping values
883 */ 944 */
884void __init timekeeping_init(void) 945void __init timekeeping_init(void)
885{ 946{
886 unsigned long flags; 947 unsigned long flags;
948 unsigned long sec = read_persistent_clock();
887 949
888 write_seqlock_irqsave(&xtime_lock, flags); 950 write_seqlock_irqsave(&xtime_lock, flags);
889 951
890 ntp_clear(); 952 ntp_clear();
891 953
892 clock = clocksource_get_next(); 954 clock = clocksource_get_next();
893 clocksource_calculate_interval(clock, tick_nsec); 955 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
894 clock->cycle_last = clocksource_read(clock); 956 clock->cycle_last = clocksource_read(clock);
895 957
958 xtime.tv_sec = sec;
959 xtime.tv_nsec = 0;
960 set_normalized_timespec(&wall_to_monotonic,
961 -xtime.tv_sec, -xtime.tv_nsec);
962
896 write_sequnlock_irqrestore(&xtime_lock, flags); 963 write_sequnlock_irqrestore(&xtime_lock, flags);
897} 964}
898 965
899 966/* flag for if timekeeping is suspended */
900static int timekeeping_suspended; 967static int timekeeping_suspended;
968/* time in seconds when suspend began */
969static unsigned long timekeeping_suspend_time;
970
901/** 971/**
902 * timekeeping_resume - Resumes the generic timekeeping subsystem. 972 * timekeeping_resume - Resumes the generic timekeeping subsystem.
903 * @dev: unused 973 * @dev: unused
@@ -909,13 +979,26 @@ static int timekeeping_suspended;
909static int timekeeping_resume(struct sys_device *dev) 979static int timekeeping_resume(struct sys_device *dev)
910{ 980{
911 unsigned long flags; 981 unsigned long flags;
982 unsigned long now = read_persistent_clock();
912 983
913 write_seqlock_irqsave(&xtime_lock, flags); 984 write_seqlock_irqsave(&xtime_lock, flags);
914 /* restart the last cycle value */ 985
986 if (now && (now > timekeeping_suspend_time)) {
987 unsigned long sleep_length = now - timekeeping_suspend_time;
988
989 xtime.tv_sec += sleep_length;
990 wall_to_monotonic.tv_sec -= sleep_length;
991 }
992 /* re-base the last cycle value */
915 clock->cycle_last = clocksource_read(clock); 993 clock->cycle_last = clocksource_read(clock);
916 clock->error = 0; 994 clock->error = 0;
917 timekeeping_suspended = 0; 995 timekeeping_suspended = 0;
918 write_sequnlock_irqrestore(&xtime_lock, flags); 996 write_sequnlock_irqrestore(&xtime_lock, flags);
997
998 touch_softlockup_watchdog();
999 /* Resume hrtimers */
1000 clock_was_set();
1001
919 return 0; 1002 return 0;
920} 1003}
921 1004
@@ -925,6 +1008,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
925 1008
926 write_seqlock_irqsave(&xtime_lock, flags); 1009 write_seqlock_irqsave(&xtime_lock, flags);
927 timekeeping_suspended = 1; 1010 timekeeping_suspended = 1;
1011 timekeeping_suspend_time = read_persistent_clock();
928 write_sequnlock_irqrestore(&xtime_lock, flags); 1012 write_sequnlock_irqrestore(&xtime_lock, flags);
929 return 0; 1013 return 0;
930} 1014}
@@ -1089,11 +1173,8 @@ static void update_wall_time(void)
1089 clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; 1173 clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
1090 1174
1091 /* check to see if there is a new clocksource to use */ 1175 /* check to see if there is a new clocksource to use */
1092 if (change_clocksource()) { 1176 change_clocksource();
1093 clock->error = 0; 1177 update_vsyscall(&xtime, clock);
1094 clock->xtime_nsec = 0;
1095 clocksource_calculate_interval(clock, tick_nsec);
1096 }
1097} 1178}
1098 1179
1099/* 1180/*
@@ -1162,11 +1243,9 @@ static inline void calc_load(unsigned long ticks)
1162 * This read-write spinlock protects us from races in SMP while 1243 * This read-write spinlock protects us from races in SMP while
1163 * playing with xtime and avenrun. 1244 * playing with xtime and avenrun.
1164 */ 1245 */
1165#ifndef ARCH_HAVE_XTIME_LOCK 1246__attribute__((weak)) __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
1166__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
1167 1247
1168EXPORT_SYMBOL(xtime_lock); 1248EXPORT_SYMBOL(xtime_lock);
1169#endif
1170 1249
1171/* 1250/*
1172 * This function runs timers and the timer-tq in bottom half context. 1251 * This function runs timers and the timer-tq in bottom half context.
@@ -1175,7 +1254,8 @@ static void run_timer_softirq(struct softirq_action *h)
1175{ 1254{
1176 tvec_base_t *base = __get_cpu_var(tvec_bases); 1255 tvec_base_t *base = __get_cpu_var(tvec_bases);
1177 1256
1178 hrtimer_run_queues(); 1257 hrtimer_run_queues();
1258
1179 if (time_after_eq(jiffies, base->timer_jiffies)) 1259 if (time_after_eq(jiffies, base->timer_jiffies))
1180 __run_timers(base); 1260 __run_timers(base);
1181} 1261}
@@ -1621,6 +1701,8 @@ void __init init_timers(void)
1621 int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, 1701 int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
1622 (void *)(long)smp_processor_id()); 1702 (void *)(long)smp_processor_id());
1623 1703
1704 init_timer_stats();
1705
1624 BUG_ON(err == NOTIFY_BAD); 1706 BUG_ON(err == NOTIFY_BAD);
1625 register_cpu_notifier(&timers_nb); 1707 register_cpu_notifier(&timers_nb);
1626 open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); 1708 open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL);