aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/timer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/timer.c')
-rw-r--r--kernel/timer.c215
1 files changed, 118 insertions, 97 deletions
diff --git a/kernel/timer.c b/kernel/timer.c
index b4555568b4e4..5db5a8d26811 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -37,6 +37,8 @@
37#include <linux/delay.h> 37#include <linux/delay.h>
38#include <linux/tick.h> 38#include <linux/tick.h>
39#include <linux/kallsyms.h> 39#include <linux/kallsyms.h>
40#include <linux/perf_event.h>
41#include <linux/sched.h>
40 42
41#include <asm/uaccess.h> 43#include <asm/uaccess.h>
42#include <asm/unistd.h> 44#include <asm/unistd.h>
@@ -44,6 +46,9 @@
44#include <asm/timex.h> 46#include <asm/timex.h>
45#include <asm/io.h> 47#include <asm/io.h>
46 48
49#define CREATE_TRACE_POINTS
50#include <trace/events/timer.h>
51
47u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; 52u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
48 53
49EXPORT_SYMBOL(jiffies_64); 54EXPORT_SYMBOL(jiffies_64);
@@ -70,6 +75,7 @@ struct tvec_base {
70 spinlock_t lock; 75 spinlock_t lock;
71 struct timer_list *running_timer; 76 struct timer_list *running_timer;
72 unsigned long timer_jiffies; 77 unsigned long timer_jiffies;
78 unsigned long next_timer;
73 struct tvec_root tv1; 79 struct tvec_root tv1;
74 struct tvec tv2; 80 struct tvec tv2;
75 struct tvec tv3; 81 struct tvec tv3;
@@ -378,6 +384,8 @@ static void timer_stats_account_timer(struct timer_list *timer)
378{ 384{
379 unsigned int flag = 0; 385 unsigned int flag = 0;
380 386
387 if (likely(!timer->start_site))
388 return;
381 if (unlikely(tbase_get_deferrable(timer->base))) 389 if (unlikely(tbase_get_deferrable(timer->base)))
382 flag |= TIMER_STATS_FLAG_DEFERRABLE; 390 flag |= TIMER_STATS_FLAG_DEFERRABLE;
383 391
@@ -516,6 +524,25 @@ static inline void debug_timer_activate(struct timer_list *timer) { }
516static inline void debug_timer_deactivate(struct timer_list *timer) { } 524static inline void debug_timer_deactivate(struct timer_list *timer) { }
517#endif 525#endif
518 526
527static inline void debug_init(struct timer_list *timer)
528{
529 debug_timer_init(timer);
530 trace_timer_init(timer);
531}
532
533static inline void
534debug_activate(struct timer_list *timer, unsigned long expires)
535{
536 debug_timer_activate(timer);
537 trace_timer_start(timer, expires);
538}
539
540static inline void debug_deactivate(struct timer_list *timer)
541{
542 debug_timer_deactivate(timer);
543 trace_timer_cancel(timer);
544}
545
519static void __init_timer(struct timer_list *timer, 546static void __init_timer(struct timer_list *timer,
520 const char *name, 547 const char *name,
521 struct lock_class_key *key) 548 struct lock_class_key *key)
@@ -531,17 +558,20 @@ static void __init_timer(struct timer_list *timer,
531} 558}
532 559
533/** 560/**
534 * init_timer - initialize a timer. 561 * init_timer_key - initialize a timer
535 * @timer: the timer to be initialized 562 * @timer: the timer to be initialized
563 * @name: name of the timer
564 * @key: lockdep class key of the fake lock used for tracking timer
565 * sync lock dependencies
536 * 566 *
537 * init_timer() must be done to a timer prior calling *any* of the 567 * init_timer_key() must be done to a timer prior calling *any* of the
538 * other timer functions. 568 * other timer functions.
539 */ 569 */
540void init_timer_key(struct timer_list *timer, 570void init_timer_key(struct timer_list *timer,
541 const char *name, 571 const char *name,
542 struct lock_class_key *key) 572 struct lock_class_key *key)
543{ 573{
544 debug_timer_init(timer); 574 debug_init(timer);
545 __init_timer(timer, name, key); 575 __init_timer(timer, name, key);
546} 576}
547EXPORT_SYMBOL(init_timer_key); 577EXPORT_SYMBOL(init_timer_key);
@@ -560,7 +590,7 @@ static inline void detach_timer(struct timer_list *timer,
560{ 590{
561 struct list_head *entry = &timer->entry; 591 struct list_head *entry = &timer->entry;
562 592
563 debug_timer_deactivate(timer); 593 debug_deactivate(timer);
564 594
565 __list_del(entry->prev, entry->next); 595 __list_del(entry->prev, entry->next);
566 if (clear_pending) 596 if (clear_pending)
@@ -601,13 +631,12 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
601} 631}
602 632
603static inline int 633static inline int
604__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) 634__mod_timer(struct timer_list *timer, unsigned long expires,
635 bool pending_only, int pinned)
605{ 636{
606 struct tvec_base *base, *new_base; 637 struct tvec_base *base, *new_base;
607 unsigned long flags; 638 unsigned long flags;
608 int ret; 639 int ret = 0 , cpu;
609
610 ret = 0;
611 640
612 timer_stats_timer_set_start_info(timer); 641 timer_stats_timer_set_start_info(timer);
613 BUG_ON(!timer->function); 642 BUG_ON(!timer->function);
@@ -616,16 +645,31 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
616 645
617 if (timer_pending(timer)) { 646 if (timer_pending(timer)) {
618 detach_timer(timer, 0); 647 detach_timer(timer, 0);
648 if (timer->expires == base->next_timer &&
649 !tbase_get_deferrable(timer->base))
650 base->next_timer = base->timer_jiffies;
619 ret = 1; 651 ret = 1;
620 } else { 652 } else {
621 if (pending_only) 653 if (pending_only)
622 goto out_unlock; 654 goto out_unlock;
623 } 655 }
624 656
625 debug_timer_activate(timer); 657 debug_activate(timer, expires);
626 658
627 new_base = __get_cpu_var(tvec_bases); 659 new_base = __get_cpu_var(tvec_bases);
628 660
661 cpu = smp_processor_id();
662
663#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
664 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
665 int preferred_cpu = get_nohz_load_balancer();
666
667 if (preferred_cpu >= 0)
668 cpu = preferred_cpu;
669 }
670#endif
671 new_base = per_cpu(tvec_bases, cpu);
672
629 if (base != new_base) { 673 if (base != new_base) {
630 /* 674 /*
631 * We are trying to schedule the timer on the local CPU. 675 * We are trying to schedule the timer on the local CPU.
@@ -645,6 +689,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
645 } 689 }
646 690
647 timer->expires = expires; 691 timer->expires = expires;
692 if (time_before(timer->expires, base->next_timer) &&
693 !tbase_get_deferrable(timer->base))
694 base->next_timer = timer->expires;
648 internal_add_timer(base, timer); 695 internal_add_timer(base, timer);
649 696
650out_unlock: 697out_unlock:
@@ -665,7 +712,7 @@ out_unlock:
665 */ 712 */
666int mod_timer_pending(struct timer_list *timer, unsigned long expires) 713int mod_timer_pending(struct timer_list *timer, unsigned long expires)
667{ 714{
668 return __mod_timer(timer, expires, true); 715 return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
669} 716}
670EXPORT_SYMBOL(mod_timer_pending); 717EXPORT_SYMBOL(mod_timer_pending);
671 718
@@ -696,14 +743,36 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
696 * networking code - if the timer is re-modified 743 * networking code - if the timer is re-modified
697 * to be the same thing then just return: 744 * to be the same thing then just return:
698 */ 745 */
699 if (timer->expires == expires && timer_pending(timer)) 746 if (timer_pending(timer) && timer->expires == expires)
700 return 1; 747 return 1;
701 748
702 return __mod_timer(timer, expires, false); 749 return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
703} 750}
704EXPORT_SYMBOL(mod_timer); 751EXPORT_SYMBOL(mod_timer);
705 752
706/** 753/**
754 * mod_timer_pinned - modify a timer's timeout
755 * @timer: the timer to be modified
756 * @expires: new timeout in jiffies
757 *
758 * mod_timer_pinned() is a way to update the expire field of an
759 * active timer (if the timer is inactive it will be activated)
760 * and not allow the timer to be migrated to a different CPU.
761 *
762 * mod_timer_pinned(timer, expires) is equivalent to:
763 *
764 * del_timer(timer); timer->expires = expires; add_timer(timer);
765 */
766int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
767{
768 if (timer->expires == expires && timer_pending(timer))
769 return 1;
770
771 return __mod_timer(timer, expires, false, TIMER_PINNED);
772}
773EXPORT_SYMBOL(mod_timer_pinned);
774
775/**
707 * add_timer - start a timer 776 * add_timer - start a timer
708 * @timer: the timer to be added 777 * @timer: the timer to be added
709 * 778 *
@@ -740,7 +809,10 @@ void add_timer_on(struct timer_list *timer, int cpu)
740 BUG_ON(timer_pending(timer) || !timer->function); 809 BUG_ON(timer_pending(timer) || !timer->function);
741 spin_lock_irqsave(&base->lock, flags); 810 spin_lock_irqsave(&base->lock, flags);
742 timer_set_base(timer, base); 811 timer_set_base(timer, base);
743 debug_timer_activate(timer); 812 debug_activate(timer, timer->expires);
813 if (time_before(timer->expires, base->next_timer) &&
814 !tbase_get_deferrable(timer->base))
815 base->next_timer = timer->expires;
744 internal_add_timer(base, timer); 816 internal_add_timer(base, timer);
745 /* 817 /*
746 * Check whether the other CPU is idle and needs to be 818 * Check whether the other CPU is idle and needs to be
@@ -753,6 +825,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
753 wake_up_idle_cpu(cpu); 825 wake_up_idle_cpu(cpu);
754 spin_unlock_irqrestore(&base->lock, flags); 826 spin_unlock_irqrestore(&base->lock, flags);
755} 827}
828EXPORT_SYMBOL_GPL(add_timer_on);
756 829
757/** 830/**
758 * del_timer - deactive a timer. 831 * del_timer - deactive a timer.
@@ -776,6 +849,9 @@ int del_timer(struct timer_list *timer)
776 base = lock_timer_base(timer, &flags); 849 base = lock_timer_base(timer, &flags);
777 if (timer_pending(timer)) { 850 if (timer_pending(timer)) {
778 detach_timer(timer, 1); 851 detach_timer(timer, 1);
852 if (timer->expires == base->next_timer &&
853 !tbase_get_deferrable(timer->base))
854 base->next_timer = base->timer_jiffies;
779 ret = 1; 855 ret = 1;
780 } 856 }
781 spin_unlock_irqrestore(&base->lock, flags); 857 spin_unlock_irqrestore(&base->lock, flags);
@@ -809,6 +885,9 @@ int try_to_del_timer_sync(struct timer_list *timer)
809 ret = 0; 885 ret = 0;
810 if (timer_pending(timer)) { 886 if (timer_pending(timer)) {
811 detach_timer(timer, 1); 887 detach_timer(timer, 1);
888 if (timer->expires == base->next_timer &&
889 !tbase_get_deferrable(timer->base))
890 base->next_timer = base->timer_jiffies;
812 ret = 1; 891 ret = 1;
813 } 892 }
814out: 893out:
@@ -943,7 +1022,9 @@ static inline void __run_timers(struct tvec_base *base)
943 */ 1022 */
944 lock_map_acquire(&lockdep_map); 1023 lock_map_acquire(&lockdep_map);
945 1024
1025 trace_timer_expire_entry(timer);
946 fn(data); 1026 fn(data);
1027 trace_timer_expire_exit(timer);
947 1028
948 lock_map_release(&lockdep_map); 1029 lock_map_release(&lockdep_map);
949 1030
@@ -966,8 +1047,8 @@ static inline void __run_timers(struct tvec_base *base)
966#ifdef CONFIG_NO_HZ 1047#ifdef CONFIG_NO_HZ
967/* 1048/*
968 * Find out when the next timer event is due to happen. This 1049 * Find out when the next timer event is due to happen. This
969 * is used on S/390 to stop all activity when a cpus is idle. 1050 * is used on S/390 to stop all activity when a CPU is idle.
970 * This functions needs to be called disabled. 1051 * This function needs to be called with interrupts disabled.
971 */ 1052 */
972static unsigned long __next_timer_interrupt(struct tvec_base *base) 1053static unsigned long __next_timer_interrupt(struct tvec_base *base)
973{ 1054{
@@ -1012,6 +1093,9 @@ cascade:
1012 index = slot = timer_jiffies & TVN_MASK; 1093 index = slot = timer_jiffies & TVN_MASK;
1013 do { 1094 do {
1014 list_for_each_entry(nte, varp->vec + slot, entry) { 1095 list_for_each_entry(nte, varp->vec + slot, entry) {
1096 if (tbase_get_deferrable(nte->base))
1097 continue;
1098
1015 found = 1; 1099 found = 1;
1016 if (time_before(nte->expires, expires)) 1100 if (time_before(nte->expires, expires))
1017 expires = nte->expires; 1101 expires = nte->expires;
@@ -1090,7 +1174,9 @@ unsigned long get_next_timer_interrupt(unsigned long now)
1090 unsigned long expires; 1174 unsigned long expires;
1091 1175
1092 spin_lock(&base->lock); 1176 spin_lock(&base->lock);
1093 expires = __next_timer_interrupt(base); 1177 if (time_before_eq(base->next_timer, base->timer_jiffies))
1178 base->next_timer = __next_timer_interrupt(base);
1179 expires = base->next_timer;
1094 spin_unlock(&base->lock); 1180 spin_unlock(&base->lock);
1095 1181
1096 if (time_before_eq(expires, now)) 1182 if (time_before_eq(expires, now))
@@ -1112,61 +1198,21 @@ void update_process_times(int user_tick)
1112 /* Note: this timer irq context must be accounted for as well. */ 1198 /* Note: this timer irq context must be accounted for as well. */
1113 account_process_tick(p, user_tick); 1199 account_process_tick(p, user_tick);
1114 run_local_timers(); 1200 run_local_timers();
1115 if (rcu_pending(cpu)) 1201 rcu_check_callbacks(cpu, user_tick);
1116 rcu_check_callbacks(cpu, user_tick);
1117 printk_tick(); 1202 printk_tick();
1118 scheduler_tick(); 1203 scheduler_tick();
1119 run_posix_cpu_timers(p); 1204 run_posix_cpu_timers(p);
1120} 1205}
1121 1206
1122/* 1207/*
1123 * Nr of active tasks - counted in fixed-point numbers
1124 */
1125static unsigned long count_active_tasks(void)
1126{
1127 return nr_active() * FIXED_1;
1128}
1129
1130/*
1131 * Hmm.. Changed this, as the GNU make sources (load.c) seems to
1132 * imply that avenrun[] is the standard name for this kind of thing.
1133 * Nothing else seems to be standardized: the fractional size etc
1134 * all seem to differ on different machines.
1135 *
1136 * Requires xtime_lock to access.
1137 */
1138unsigned long avenrun[3];
1139
1140EXPORT_SYMBOL(avenrun);
1141
1142/*
1143 * calc_load - given tick count, update the avenrun load estimates.
1144 * This is called while holding a write_lock on xtime_lock.
1145 */
1146static inline void calc_load(unsigned long ticks)
1147{
1148 unsigned long active_tasks; /* fixed-point */
1149 static int count = LOAD_FREQ;
1150
1151 count -= ticks;
1152 if (unlikely(count < 0)) {
1153 active_tasks = count_active_tasks();
1154 do {
1155 CALC_LOAD(avenrun[0], EXP_1, active_tasks);
1156 CALC_LOAD(avenrun[1], EXP_5, active_tasks);
1157 CALC_LOAD(avenrun[2], EXP_15, active_tasks);
1158 count += LOAD_FREQ;
1159 } while (count < 0);
1160 }
1161}
1162
1163/*
1164 * This function runs timers and the timer-tq in bottom half context. 1208 * This function runs timers and the timer-tq in bottom half context.
1165 */ 1209 */
1166static void run_timer_softirq(struct softirq_action *h) 1210static void run_timer_softirq(struct softirq_action *h)
1167{ 1211{
1168 struct tvec_base *base = __get_cpu_var(tvec_bases); 1212 struct tvec_base *base = __get_cpu_var(tvec_bases);
1169 1213
1214 perf_event_do_pending();
1215
1170 hrtimer_run_pending(); 1216 hrtimer_run_pending();
1171 1217
1172 if (time_after_eq(jiffies, base->timer_jiffies)) 1218 if (time_after_eq(jiffies, base->timer_jiffies))
@@ -1184,16 +1230,6 @@ void run_local_timers(void)
1184} 1230}
1185 1231
1186/* 1232/*
1187 * Called by the timer interrupt. xtime_lock must already be taken
1188 * by the timer IRQ!
1189 */
1190static inline void update_times(unsigned long ticks)
1191{
1192 update_wall_time();
1193 calc_load(ticks);
1194}
1195
1196/*
1197 * The 64-bit jiffies value is not atomic - you MUST NOT read it 1233 * The 64-bit jiffies value is not atomic - you MUST NOT read it
1198 * without sampling the sequence number in xtime_lock. 1234 * without sampling the sequence number in xtime_lock.
1199 * jiffies is defined in the linker script... 1235 * jiffies is defined in the linker script...
@@ -1202,7 +1238,8 @@ static inline void update_times(unsigned long ticks)
1202void do_timer(unsigned long ticks) 1238void do_timer(unsigned long ticks)
1203{ 1239{
1204 jiffies_64 += ticks; 1240 jiffies_64 += ticks;
1205 update_times(ticks); 1241 update_wall_time();
1242 calc_global_load();
1206} 1243}
1207 1244
1208#ifdef __ARCH_WANT_SYS_ALARM 1245#ifdef __ARCH_WANT_SYS_ALARM
@@ -1350,7 +1387,7 @@ signed long __sched schedule_timeout(signed long timeout)
1350 expire = timeout + jiffies; 1387 expire = timeout + jiffies;
1351 1388
1352 setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); 1389 setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
1353 __mod_timer(&timer, expire, false); 1390 __mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
1354 schedule(); 1391 schedule();
1355 del_singleshot_timer_sync(&timer); 1392 del_singleshot_timer_sync(&timer);
1356 1393
@@ -1403,37 +1440,17 @@ int do_sysinfo(struct sysinfo *info)
1403{ 1440{
1404 unsigned long mem_total, sav_total; 1441 unsigned long mem_total, sav_total;
1405 unsigned int mem_unit, bitcount; 1442 unsigned int mem_unit, bitcount;
1406 unsigned long seq; 1443 struct timespec tp;
1407 1444
1408 memset(info, 0, sizeof(struct sysinfo)); 1445 memset(info, 0, sizeof(struct sysinfo));
1409 1446
1410 do { 1447 ktime_get_ts(&tp);
1411 struct timespec tp; 1448 monotonic_to_bootbased(&tp);
1412 seq = read_seqbegin(&xtime_lock); 1449 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
1413
1414 /*
1415 * This is annoying. The below is the same thing
1416 * posix_get_clock_monotonic() does, but it wants to
1417 * take the lock which we want to cover the loads stuff
1418 * too.
1419 */
1420
1421 getnstimeofday(&tp);
1422 tp.tv_sec += wall_to_monotonic.tv_sec;
1423 tp.tv_nsec += wall_to_monotonic.tv_nsec;
1424 monotonic_to_bootbased(&tp);
1425 if (tp.tv_nsec - NSEC_PER_SEC >= 0) {
1426 tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
1427 tp.tv_sec++;
1428 }
1429 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
1430 1450
1431 info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); 1451 get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
1432 info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
1433 info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
1434 1452
1435 info->procs = nr_threads; 1453 info->procs = nr_threads;
1436 } while (read_seqretry(&xtime_lock, seq));
1437 1454
1438 si_meminfo(info); 1455 si_meminfo(info);
1439 si_swapinfo(info); 1456 si_swapinfo(info);
@@ -1547,6 +1564,7 @@ static int __cpuinit init_timers_cpu(int cpu)
1547 INIT_LIST_HEAD(base->tv1.vec + j); 1564 INIT_LIST_HEAD(base->tv1.vec + j);
1548 1565
1549 base->timer_jiffies = jiffies; 1566 base->timer_jiffies = jiffies;
1567 base->next_timer = base->timer_jiffies;
1550 return 0; 1568 return 0;
1551} 1569}
1552 1570
@@ -1559,6 +1577,9 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea
1559 timer = list_first_entry(head, struct timer_list, entry); 1577 timer = list_first_entry(head, struct timer_list, entry);
1560 detach_timer(timer, 0); 1578 detach_timer(timer, 0);
1561 timer_set_base(timer, new_base); 1579 timer_set_base(timer, new_base);
1580 if (time_before(timer->expires, new_base->next_timer) &&
1581 !tbase_get_deferrable(timer->base))
1582 new_base->next_timer = timer->expires;
1562 internal_add_timer(new_base, timer); 1583 internal_add_timer(new_base, timer);
1563 } 1584 }
1564} 1585}