diff options
Diffstat (limited to 'kernel/timer.c')
-rw-r--r-- | kernel/timer.c | 145 |
1 files changed, 59 insertions, 86 deletions
diff --git a/kernel/timer.c b/kernel/timer.c index cffffad01c31..a7f07d5a6241 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -37,6 +37,8 @@ | |||
37 | #include <linux/delay.h> | 37 | #include <linux/delay.h> |
38 | #include <linux/tick.h> | 38 | #include <linux/tick.h> |
39 | #include <linux/kallsyms.h> | 39 | #include <linux/kallsyms.h> |
40 | #include <linux/perf_counter.h> | ||
41 | #include <linux/sched.h> | ||
40 | 42 | ||
41 | #include <asm/uaccess.h> | 43 | #include <asm/uaccess.h> |
42 | #include <asm/unistd.h> | 44 | #include <asm/unistd.h> |
@@ -378,6 +380,8 @@ static void timer_stats_account_timer(struct timer_list *timer) | |||
378 | { | 380 | { |
379 | unsigned int flag = 0; | 381 | unsigned int flag = 0; |
380 | 382 | ||
383 | if (likely(!timer->start_site)) | ||
384 | return; | ||
381 | if (unlikely(tbase_get_deferrable(timer->base))) | 385 | if (unlikely(tbase_get_deferrable(timer->base))) |
382 | flag |= TIMER_STATS_FLAG_DEFERRABLE; | 386 | flag |= TIMER_STATS_FLAG_DEFERRABLE; |
383 | 387 | ||
@@ -604,13 +608,12 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer, | |||
604 | } | 608 | } |
605 | 609 | ||
606 | static inline int | 610 | static inline int |
607 | __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) | 611 | __mod_timer(struct timer_list *timer, unsigned long expires, |
612 | bool pending_only, int pinned) | ||
608 | { | 613 | { |
609 | struct tvec_base *base, *new_base; | 614 | struct tvec_base *base, *new_base; |
610 | unsigned long flags; | 615 | unsigned long flags; |
611 | int ret; | 616 | int ret = 0 , cpu; |
612 | |||
613 | ret = 0; | ||
614 | 617 | ||
615 | timer_stats_timer_set_start_info(timer); | 618 | timer_stats_timer_set_start_info(timer); |
616 | BUG_ON(!timer->function); | 619 | BUG_ON(!timer->function); |
@@ -629,6 +632,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) | |||
629 | 632 | ||
630 | new_base = __get_cpu_var(tvec_bases); | 633 | new_base = __get_cpu_var(tvec_bases); |
631 | 634 | ||
635 | cpu = smp_processor_id(); | ||
636 | |||
637 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) | ||
638 | if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) { | ||
639 | int preferred_cpu = get_nohz_load_balancer(); | ||
640 | |||
641 | if (preferred_cpu >= 0) | ||
642 | cpu = preferred_cpu; | ||
643 | } | ||
644 | #endif | ||
645 | new_base = per_cpu(tvec_bases, cpu); | ||
646 | |||
632 | if (base != new_base) { | 647 | if (base != new_base) { |
633 | /* | 648 | /* |
634 | * We are trying to schedule the timer on the local CPU. | 649 | * We are trying to schedule the timer on the local CPU. |
@@ -668,7 +683,7 @@ out_unlock: | |||
668 | */ | 683 | */ |
669 | int mod_timer_pending(struct timer_list *timer, unsigned long expires) | 684 | int mod_timer_pending(struct timer_list *timer, unsigned long expires) |
670 | { | 685 | { |
671 | return __mod_timer(timer, expires, true); | 686 | return __mod_timer(timer, expires, true, TIMER_NOT_PINNED); |
672 | } | 687 | } |
673 | EXPORT_SYMBOL(mod_timer_pending); | 688 | EXPORT_SYMBOL(mod_timer_pending); |
674 | 689 | ||
@@ -699,14 +714,36 @@ int mod_timer(struct timer_list *timer, unsigned long expires) | |||
699 | * networking code - if the timer is re-modified | 714 | * networking code - if the timer is re-modified |
700 | * to be the same thing then just return: | 715 | * to be the same thing then just return: |
701 | */ | 716 | */ |
702 | if (timer->expires == expires && timer_pending(timer)) | 717 | if (timer_pending(timer) && timer->expires == expires) |
703 | return 1; | 718 | return 1; |
704 | 719 | ||
705 | return __mod_timer(timer, expires, false); | 720 | return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); |
706 | } | 721 | } |
707 | EXPORT_SYMBOL(mod_timer); | 722 | EXPORT_SYMBOL(mod_timer); |
708 | 723 | ||
709 | /** | 724 | /** |
725 | * mod_timer_pinned - modify a timer's timeout | ||
726 | * @timer: the timer to be modified | ||
727 | * @expires: new timeout in jiffies | ||
728 | * | ||
729 | * mod_timer_pinned() is a way to update the expire field of an | ||
730 | * active timer (if the timer is inactive it will be activated) | ||
731 | * and not allow the timer to be migrated to a different CPU. | ||
732 | * | ||
733 | * mod_timer_pinned(timer, expires) is equivalent to: | ||
734 | * | ||
735 | * del_timer(timer); timer->expires = expires; add_timer(timer); | ||
736 | */ | ||
737 | int mod_timer_pinned(struct timer_list *timer, unsigned long expires) | ||
738 | { | ||
739 | if (timer->expires == expires && timer_pending(timer)) | ||
740 | return 1; | ||
741 | |||
742 | return __mod_timer(timer, expires, false, TIMER_PINNED); | ||
743 | } | ||
744 | EXPORT_SYMBOL(mod_timer_pinned); | ||
745 | |||
746 | /** | ||
710 | * add_timer - start a timer | 747 | * add_timer - start a timer |
711 | * @timer: the timer to be added | 748 | * @timer: the timer to be added |
712 | * | 749 | * |
@@ -756,6 +793,7 @@ void add_timer_on(struct timer_list *timer, int cpu) | |||
756 | wake_up_idle_cpu(cpu); | 793 | wake_up_idle_cpu(cpu); |
757 | spin_unlock_irqrestore(&base->lock, flags); | 794 | spin_unlock_irqrestore(&base->lock, flags); |
758 | } | 795 | } |
796 | EXPORT_SYMBOL_GPL(add_timer_on); | ||
759 | 797 | ||
760 | /** | 798 | /** |
761 | * del_timer - deactive a timer. | 799 | * del_timer - deactive a timer. |
@@ -1015,6 +1053,9 @@ cascade: | |||
1015 | index = slot = timer_jiffies & TVN_MASK; | 1053 | index = slot = timer_jiffies & TVN_MASK; |
1016 | do { | 1054 | do { |
1017 | list_for_each_entry(nte, varp->vec + slot, entry) { | 1055 | list_for_each_entry(nte, varp->vec + slot, entry) { |
1056 | if (tbase_get_deferrable(nte->base)) | ||
1057 | continue; | ||
1058 | |||
1018 | found = 1; | 1059 | found = 1; |
1019 | if (time_before(nte->expires, expires)) | 1060 | if (time_before(nte->expires, expires)) |
1020 | expires = nte->expires; | 1061 | expires = nte->expires; |
@@ -1123,53 +1164,14 @@ void update_process_times(int user_tick) | |||
1123 | } | 1164 | } |
1124 | 1165 | ||
1125 | /* | 1166 | /* |
1126 | * Nr of active tasks - counted in fixed-point numbers | ||
1127 | */ | ||
1128 | static unsigned long count_active_tasks(void) | ||
1129 | { | ||
1130 | return nr_active() * FIXED_1; | ||
1131 | } | ||
1132 | |||
1133 | /* | ||
1134 | * Hmm.. Changed this, as the GNU make sources (load.c) seems to | ||
1135 | * imply that avenrun[] is the standard name for this kind of thing. | ||
1136 | * Nothing else seems to be standardized: the fractional size etc | ||
1137 | * all seem to differ on different machines. | ||
1138 | * | ||
1139 | * Requires xtime_lock to access. | ||
1140 | */ | ||
1141 | unsigned long avenrun[3]; | ||
1142 | |||
1143 | EXPORT_SYMBOL(avenrun); | ||
1144 | |||
1145 | /* | ||
1146 | * calc_load - given tick count, update the avenrun load estimates. | ||
1147 | * This is called while holding a write_lock on xtime_lock. | ||
1148 | */ | ||
1149 | static inline void calc_load(unsigned long ticks) | ||
1150 | { | ||
1151 | unsigned long active_tasks; /* fixed-point */ | ||
1152 | static int count = LOAD_FREQ; | ||
1153 | |||
1154 | count -= ticks; | ||
1155 | if (unlikely(count < 0)) { | ||
1156 | active_tasks = count_active_tasks(); | ||
1157 | do { | ||
1158 | CALC_LOAD(avenrun[0], EXP_1, active_tasks); | ||
1159 | CALC_LOAD(avenrun[1], EXP_5, active_tasks); | ||
1160 | CALC_LOAD(avenrun[2], EXP_15, active_tasks); | ||
1161 | count += LOAD_FREQ; | ||
1162 | } while (count < 0); | ||
1163 | } | ||
1164 | } | ||
1165 | |||
1166 | /* | ||
1167 | * This function runs timers and the timer-tq in bottom half context. | 1167 | * This function runs timers and the timer-tq in bottom half context. |
1168 | */ | 1168 | */ |
1169 | static void run_timer_softirq(struct softirq_action *h) | 1169 | static void run_timer_softirq(struct softirq_action *h) |
1170 | { | 1170 | { |
1171 | struct tvec_base *base = __get_cpu_var(tvec_bases); | 1171 | struct tvec_base *base = __get_cpu_var(tvec_bases); |
1172 | 1172 | ||
1173 | perf_counter_do_pending(); | ||
1174 | |||
1173 | hrtimer_run_pending(); | 1175 | hrtimer_run_pending(); |
1174 | 1176 | ||
1175 | if (time_after_eq(jiffies, base->timer_jiffies)) | 1177 | if (time_after_eq(jiffies, base->timer_jiffies)) |
@@ -1187,16 +1189,6 @@ void run_local_timers(void) | |||
1187 | } | 1189 | } |
1188 | 1190 | ||
1189 | /* | 1191 | /* |
1190 | * Called by the timer interrupt. xtime_lock must already be taken | ||
1191 | * by the timer IRQ! | ||
1192 | */ | ||
1193 | static inline void update_times(unsigned long ticks) | ||
1194 | { | ||
1195 | update_wall_time(); | ||
1196 | calc_load(ticks); | ||
1197 | } | ||
1198 | |||
1199 | /* | ||
1200 | * The 64-bit jiffies value is not atomic - you MUST NOT read it | 1192 | * The 64-bit jiffies value is not atomic - you MUST NOT read it |
1201 | * without sampling the sequence number in xtime_lock. | 1193 | * without sampling the sequence number in xtime_lock. |
1202 | * jiffies is defined in the linker script... | 1194 | * jiffies is defined in the linker script... |
@@ -1205,7 +1197,8 @@ static inline void update_times(unsigned long ticks) | |||
1205 | void do_timer(unsigned long ticks) | 1197 | void do_timer(unsigned long ticks) |
1206 | { | 1198 | { |
1207 | jiffies_64 += ticks; | 1199 | jiffies_64 += ticks; |
1208 | update_times(ticks); | 1200 | update_wall_time(); |
1201 | calc_global_load(); | ||
1209 | } | 1202 | } |
1210 | 1203 | ||
1211 | #ifdef __ARCH_WANT_SYS_ALARM | 1204 | #ifdef __ARCH_WANT_SYS_ALARM |
@@ -1353,7 +1346,7 @@ signed long __sched schedule_timeout(signed long timeout) | |||
1353 | expire = timeout + jiffies; | 1346 | expire = timeout + jiffies; |
1354 | 1347 | ||
1355 | setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); | 1348 | setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); |
1356 | __mod_timer(&timer, expire, false); | 1349 | __mod_timer(&timer, expire, false, TIMER_NOT_PINNED); |
1357 | schedule(); | 1350 | schedule(); |
1358 | del_singleshot_timer_sync(&timer); | 1351 | del_singleshot_timer_sync(&timer); |
1359 | 1352 | ||
@@ -1406,37 +1399,17 @@ int do_sysinfo(struct sysinfo *info) | |||
1406 | { | 1399 | { |
1407 | unsigned long mem_total, sav_total; | 1400 | unsigned long mem_total, sav_total; |
1408 | unsigned int mem_unit, bitcount; | 1401 | unsigned int mem_unit, bitcount; |
1409 | unsigned long seq; | 1402 | struct timespec tp; |
1410 | 1403 | ||
1411 | memset(info, 0, sizeof(struct sysinfo)); | 1404 | memset(info, 0, sizeof(struct sysinfo)); |
1412 | 1405 | ||
1413 | do { | 1406 | ktime_get_ts(&tp); |
1414 | struct timespec tp; | 1407 | monotonic_to_bootbased(&tp); |
1415 | seq = read_seqbegin(&xtime_lock); | 1408 | info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); |
1416 | |||
1417 | /* | ||
1418 | * This is annoying. The below is the same thing | ||
1419 | * posix_get_clock_monotonic() does, but it wants to | ||
1420 | * take the lock which we want to cover the loads stuff | ||
1421 | * too. | ||
1422 | */ | ||
1423 | |||
1424 | getnstimeofday(&tp); | ||
1425 | tp.tv_sec += wall_to_monotonic.tv_sec; | ||
1426 | tp.tv_nsec += wall_to_monotonic.tv_nsec; | ||
1427 | monotonic_to_bootbased(&tp); | ||
1428 | if (tp.tv_nsec - NSEC_PER_SEC >= 0) { | ||
1429 | tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; | ||
1430 | tp.tv_sec++; | ||
1431 | } | ||
1432 | info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); | ||
1433 | 1409 | ||
1434 | info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); | 1410 | get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); |
1435 | info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); | ||
1436 | info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); | ||
1437 | 1411 | ||
1438 | info->procs = nr_threads; | 1412 | info->procs = nr_threads; |
1439 | } while (read_seqretry(&xtime_lock, seq)); | ||
1440 | 1413 | ||
1441 | si_meminfo(info); | 1414 | si_meminfo(info); |
1442 | si_swapinfo(info); | 1415 | si_swapinfo(info); |