diff options
Diffstat (limited to 'kernel/timer.c')
-rw-r--r-- | kernel/timer.c | 211 |
1 files changed, 156 insertions, 55 deletions
diff --git a/kernel/timer.c b/kernel/timer.c index 5db5a8d26811..f1b8afe1ad86 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/kallsyms.h> | 39 | #include <linux/kallsyms.h> |
40 | #include <linux/perf_event.h> | 40 | #include <linux/perf_event.h> |
41 | #include <linux/sched.h> | 41 | #include <linux/sched.h> |
42 | #include <linux/slab.h> | ||
42 | 43 | ||
43 | #include <asm/uaccess.h> | 44 | #include <asm/uaccess.h> |
44 | #include <asm/unistd.h> | 45 | #include <asm/unistd.h> |
@@ -89,8 +90,13 @@ static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; | |||
89 | 90 | ||
90 | /* | 91 | /* |
91 | * Note that all tvec_bases are 2 byte aligned and lower bit of | 92 | * Note that all tvec_bases are 2 byte aligned and lower bit of |
92 | * base in timer_list is guaranteed to be zero. Use the LSB for | 93 | * base in timer_list is guaranteed to be zero. Use the LSB to |
93 | * the new flag to indicate whether the timer is deferrable | 94 | * indicate whether the timer is deferrable. |
95 | * | ||
96 | * A deferrable timer will work normally when the system is busy, but | ||
97 | * will not cause a CPU to come out of idle just to service it; instead, | ||
98 | * the timer will be serviced when the CPU eventually wakes up with a | ||
99 | * subsequent non-deferrable timer. | ||
94 | */ | 100 | */ |
95 | #define TBASE_DEFERRABLE_FLAG (0x1) | 101 | #define TBASE_DEFERRABLE_FLAG (0x1) |
96 | 102 | ||
@@ -318,6 +324,24 @@ unsigned long round_jiffies_up_relative(unsigned long j) | |||
318 | } | 324 | } |
319 | EXPORT_SYMBOL_GPL(round_jiffies_up_relative); | 325 | EXPORT_SYMBOL_GPL(round_jiffies_up_relative); |
320 | 326 | ||
327 | /** | ||
328 | * set_timer_slack - set the allowed slack for a timer | ||
329 | * @slack_hz: the amount of time (in jiffies) allowed for rounding | ||
330 | * | ||
331 | * Set the amount of time, in jiffies, that a certain timer has | ||
332 | * in terms of slack. By setting this value, the timer subsystem | ||
333 | * will schedule the actual timer somewhere between | ||
334 | * the time mod_timer() asks for, and that time plus the slack. | ||
335 | * | ||
336 | * By setting the slack to -1, a percentage of the delay is used | ||
337 | * instead. | ||
338 | */ | ||
339 | void set_timer_slack(struct timer_list *timer, int slack_hz) | ||
340 | { | ||
341 | timer->slack = slack_hz; | ||
342 | } | ||
343 | EXPORT_SYMBOL_GPL(set_timer_slack); | ||
344 | |||
321 | 345 | ||
322 | static inline void set_running_timer(struct tvec_base *base, | 346 | static inline void set_running_timer(struct tvec_base *base, |
323 | struct timer_list *timer) | 347 | struct timer_list *timer) |
@@ -549,6 +573,7 @@ static void __init_timer(struct timer_list *timer, | |||
549 | { | 573 | { |
550 | timer->entry.next = NULL; | 574 | timer->entry.next = NULL; |
551 | timer->base = __raw_get_cpu_var(tvec_bases); | 575 | timer->base = __raw_get_cpu_var(tvec_bases); |
576 | timer->slack = -1; | ||
552 | #ifdef CONFIG_TIMER_STATS | 577 | #ifdef CONFIG_TIMER_STATS |
553 | timer->start_site = NULL; | 578 | timer->start_site = NULL; |
554 | timer->start_pid = -1; | 579 | timer->start_pid = -1; |
@@ -557,6 +582,19 @@ static void __init_timer(struct timer_list *timer, | |||
557 | lockdep_init_map(&timer->lockdep_map, name, key, 0); | 582 | lockdep_init_map(&timer->lockdep_map, name, key, 0); |
558 | } | 583 | } |
559 | 584 | ||
585 | void setup_deferrable_timer_on_stack_key(struct timer_list *timer, | ||
586 | const char *name, | ||
587 | struct lock_class_key *key, | ||
588 | void (*function)(unsigned long), | ||
589 | unsigned long data) | ||
590 | { | ||
591 | timer->function = function; | ||
592 | timer->data = data; | ||
593 | init_timer_on_stack_key(timer, name, key); | ||
594 | timer_set_deferrable(timer); | ||
595 | } | ||
596 | EXPORT_SYMBOL_GPL(setup_deferrable_timer_on_stack_key); | ||
597 | |||
560 | /** | 598 | /** |
561 | * init_timer_key - initialize a timer | 599 | * init_timer_key - initialize a timer |
562 | * @timer: the timer to be initialized | 600 | * @timer: the timer to be initialized |
@@ -656,17 +694,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, | |||
656 | 694 | ||
657 | debug_activate(timer, expires); | 695 | debug_activate(timer, expires); |
658 | 696 | ||
659 | new_base = __get_cpu_var(tvec_bases); | ||
660 | |||
661 | cpu = smp_processor_id(); | 697 | cpu = smp_processor_id(); |
662 | 698 | ||
663 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) | 699 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) |
664 | if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) { | 700 | if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) |
665 | int preferred_cpu = get_nohz_load_balancer(); | 701 | cpu = get_nohz_timer_target(); |
666 | |||
667 | if (preferred_cpu >= 0) | ||
668 | cpu = preferred_cpu; | ||
669 | } | ||
670 | #endif | 702 | #endif |
671 | new_base = per_cpu(tvec_bases, cpu); | 703 | new_base = per_cpu(tvec_bases, cpu); |
672 | 704 | ||
@@ -716,6 +748,46 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires) | |||
716 | } | 748 | } |
717 | EXPORT_SYMBOL(mod_timer_pending); | 749 | EXPORT_SYMBOL(mod_timer_pending); |
718 | 750 | ||
751 | /* | ||
752 | * Decide where to put the timer while taking the slack into account | ||
753 | * | ||
754 | * Algorithm: | ||
755 | * 1) calculate the maximum (absolute) time | ||
756 | * 2) calculate the highest bit where the expires and new max are different | ||
757 | * 3) use this bit to make a mask | ||
758 | * 4) use the bitmask to round down the maximum time, so that all last | ||
759 | * bits are zeros | ||
760 | */ | ||
761 | static inline | ||
762 | unsigned long apply_slack(struct timer_list *timer, unsigned long expires) | ||
763 | { | ||
764 | unsigned long expires_limit, mask; | ||
765 | int bit; | ||
766 | |||
767 | expires_limit = expires; | ||
768 | |||
769 | if (timer->slack >= 0) { | ||
770 | expires_limit = expires + timer->slack; | ||
771 | } else { | ||
772 | unsigned long now = jiffies; | ||
773 | |||
774 | /* No slack, if already expired else auto slack 0.4% */ | ||
775 | if (time_after(expires, now)) | ||
776 | expires_limit = expires + (expires - now)/256; | ||
777 | } | ||
778 | mask = expires ^ expires_limit; | ||
779 | if (mask == 0) | ||
780 | return expires; | ||
781 | |||
782 | bit = find_last_bit(&mask, BITS_PER_LONG); | ||
783 | |||
784 | mask = (1 << bit) - 1; | ||
785 | |||
786 | expires_limit = expires_limit & ~(mask); | ||
787 | |||
788 | return expires_limit; | ||
789 | } | ||
790 | |||
719 | /** | 791 | /** |
720 | * mod_timer - modify a timer's timeout | 792 | * mod_timer - modify a timer's timeout |
721 | * @timer: the timer to be modified | 793 | * @timer: the timer to be modified |
@@ -746,6 +818,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires) | |||
746 | if (timer_pending(timer) && timer->expires == expires) | 818 | if (timer_pending(timer) && timer->expires == expires) |
747 | return 1; | 819 | return 1; |
748 | 820 | ||
821 | expires = apply_slack(timer, expires); | ||
822 | |||
749 | return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); | 823 | return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); |
750 | } | 824 | } |
751 | EXPORT_SYMBOL(mod_timer); | 825 | EXPORT_SYMBOL(mod_timer); |
@@ -882,6 +956,7 @@ int try_to_del_timer_sync(struct timer_list *timer) | |||
882 | if (base->running_timer == timer) | 956 | if (base->running_timer == timer) |
883 | goto out; | 957 | goto out; |
884 | 958 | ||
959 | timer_stats_timer_clear_start_info(timer); | ||
885 | ret = 0; | 960 | ret = 0; |
886 | if (timer_pending(timer)) { | 961 | if (timer_pending(timer)) { |
887 | detach_timer(timer, 1); | 962 | detach_timer(timer, 1); |
@@ -955,6 +1030,47 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index) | |||
955 | return index; | 1030 | return index; |
956 | } | 1031 | } |
957 | 1032 | ||
1033 | static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), | ||
1034 | unsigned long data) | ||
1035 | { | ||
1036 | int preempt_count = preempt_count(); | ||
1037 | |||
1038 | #ifdef CONFIG_LOCKDEP | ||
1039 | /* | ||
1040 | * It is permissible to free the timer from inside the | ||
1041 | * function that is called from it, this we need to take into | ||
1042 | * account for lockdep too. To avoid bogus "held lock freed" | ||
1043 | * warnings as well as problems when looking into | ||
1044 | * timer->lockdep_map, make a copy and use that here. | ||
1045 | */ | ||
1046 | struct lockdep_map lockdep_map = timer->lockdep_map; | ||
1047 | #endif | ||
1048 | /* | ||
1049 | * Couple the lock chain with the lock chain at | ||
1050 | * del_timer_sync() by acquiring the lock_map around the fn() | ||
1051 | * call here and in del_timer_sync(). | ||
1052 | */ | ||
1053 | lock_map_acquire(&lockdep_map); | ||
1054 | |||
1055 | trace_timer_expire_entry(timer); | ||
1056 | fn(data); | ||
1057 | trace_timer_expire_exit(timer); | ||
1058 | |||
1059 | lock_map_release(&lockdep_map); | ||
1060 | |||
1061 | if (preempt_count != preempt_count()) { | ||
1062 | WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n", | ||
1063 | fn, preempt_count, preempt_count()); | ||
1064 | /* | ||
1065 | * Restore the preempt count. That gives us a decent | ||
1066 | * chance to survive and extract information. If the | ||
1067 | * callback kept a lock held, bad luck, but not worse | ||
1068 | * than the BUG() we had. | ||
1069 | */ | ||
1070 | preempt_count() = preempt_count; | ||
1071 | } | ||
1072 | } | ||
1073 | |||
958 | #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) | 1074 | #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) |
959 | 1075 | ||
960 | /** | 1076 | /** |
@@ -998,45 +1114,7 @@ static inline void __run_timers(struct tvec_base *base) | |||
998 | detach_timer(timer, 1); | 1114 | detach_timer(timer, 1); |
999 | 1115 | ||
1000 | spin_unlock_irq(&base->lock); | 1116 | spin_unlock_irq(&base->lock); |
1001 | { | 1117 | call_timer_fn(timer, fn, data); |
1002 | int preempt_count = preempt_count(); | ||
1003 | |||
1004 | #ifdef CONFIG_LOCKDEP | ||
1005 | /* | ||
1006 | * It is permissible to free the timer from | ||
1007 | * inside the function that is called from | ||
1008 | * it, this we need to take into account for | ||
1009 | * lockdep too. To avoid bogus "held lock | ||
1010 | * freed" warnings as well as problems when | ||
1011 | * looking into timer->lockdep_map, make a | ||
1012 | * copy and use that here. | ||
1013 | */ | ||
1014 | struct lockdep_map lockdep_map = | ||
1015 | timer->lockdep_map; | ||
1016 | #endif | ||
1017 | /* | ||
1018 | * Couple the lock chain with the lock chain at | ||
1019 | * del_timer_sync() by acquiring the lock_map | ||
1020 | * around the fn() call here and in | ||
1021 | * del_timer_sync(). | ||
1022 | */ | ||
1023 | lock_map_acquire(&lockdep_map); | ||
1024 | |||
1025 | trace_timer_expire_entry(timer); | ||
1026 | fn(data); | ||
1027 | trace_timer_expire_exit(timer); | ||
1028 | |||
1029 | lock_map_release(&lockdep_map); | ||
1030 | |||
1031 | if (preempt_count != preempt_count()) { | ||
1032 | printk(KERN_ERR "huh, entered %p " | ||
1033 | "with preempt_count %08x, exited" | ||
1034 | " with %08x?\n", | ||
1035 | fn, preempt_count, | ||
1036 | preempt_count()); | ||
1037 | BUG(); | ||
1038 | } | ||
1039 | } | ||
1040 | spin_lock_irq(&base->lock); | 1118 | spin_lock_irq(&base->lock); |
1041 | } | 1119 | } |
1042 | } | 1120 | } |
@@ -1200,6 +1278,7 @@ void update_process_times(int user_tick) | |||
1200 | run_local_timers(); | 1278 | run_local_timers(); |
1201 | rcu_check_callbacks(cpu, user_tick); | 1279 | rcu_check_callbacks(cpu, user_tick); |
1202 | printk_tick(); | 1280 | printk_tick(); |
1281 | perf_event_do_pending(); | ||
1203 | scheduler_tick(); | 1282 | scheduler_tick(); |
1204 | run_posix_cpu_timers(p); | 1283 | run_posix_cpu_timers(p); |
1205 | } | 1284 | } |
@@ -1211,8 +1290,6 @@ static void run_timer_softirq(struct softirq_action *h) | |||
1211 | { | 1290 | { |
1212 | struct tvec_base *base = __get_cpu_var(tvec_bases); | 1291 | struct tvec_base *base = __get_cpu_var(tvec_bases); |
1213 | 1292 | ||
1214 | perf_event_do_pending(); | ||
1215 | |||
1216 | hrtimer_run_pending(); | 1293 | hrtimer_run_pending(); |
1217 | 1294 | ||
1218 | if (time_after_eq(jiffies, base->timer_jiffies)) | 1295 | if (time_after_eq(jiffies, base->timer_jiffies)) |
@@ -1226,7 +1303,6 @@ void run_local_timers(void) | |||
1226 | { | 1303 | { |
1227 | hrtimer_run_queues(); | 1304 | hrtimer_run_queues(); |
1228 | raise_softirq(TIMER_SOFTIRQ); | 1305 | raise_softirq(TIMER_SOFTIRQ); |
1229 | softlockup_tick(); | ||
1230 | } | 1306 | } |
1231 | 1307 | ||
1232 | /* | 1308 | /* |
@@ -1621,11 +1697,14 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self, | |||
1621 | unsigned long action, void *hcpu) | 1697 | unsigned long action, void *hcpu) |
1622 | { | 1698 | { |
1623 | long cpu = (long)hcpu; | 1699 | long cpu = (long)hcpu; |
1700 | int err; | ||
1701 | |||
1624 | switch(action) { | 1702 | switch(action) { |
1625 | case CPU_UP_PREPARE: | 1703 | case CPU_UP_PREPARE: |
1626 | case CPU_UP_PREPARE_FROZEN: | 1704 | case CPU_UP_PREPARE_FROZEN: |
1627 | if (init_timers_cpu(cpu) < 0) | 1705 | err = init_timers_cpu(cpu); |
1628 | return NOTIFY_BAD; | 1706 | if (err < 0) |
1707 | return notifier_from_errno(err); | ||
1629 | break; | 1708 | break; |
1630 | #ifdef CONFIG_HOTPLUG_CPU | 1709 | #ifdef CONFIG_HOTPLUG_CPU |
1631 | case CPU_DEAD: | 1710 | case CPU_DEAD: |
@@ -1651,7 +1730,7 @@ void __init init_timers(void) | |||
1651 | 1730 | ||
1652 | init_timer_stats(); | 1731 | init_timer_stats(); |
1653 | 1732 | ||
1654 | BUG_ON(err == NOTIFY_BAD); | 1733 | BUG_ON(err != NOTIFY_OK); |
1655 | register_cpu_notifier(&timers_nb); | 1734 | register_cpu_notifier(&timers_nb); |
1656 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq); | 1735 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq); |
1657 | } | 1736 | } |
@@ -1684,3 +1763,25 @@ unsigned long msleep_interruptible(unsigned int msecs) | |||
1684 | } | 1763 | } |
1685 | 1764 | ||
1686 | EXPORT_SYMBOL(msleep_interruptible); | 1765 | EXPORT_SYMBOL(msleep_interruptible); |
1766 | |||
1767 | static int __sched do_usleep_range(unsigned long min, unsigned long max) | ||
1768 | { | ||
1769 | ktime_t kmin; | ||
1770 | unsigned long delta; | ||
1771 | |||
1772 | kmin = ktime_set(0, min * NSEC_PER_USEC); | ||
1773 | delta = (max - min) * NSEC_PER_USEC; | ||
1774 | return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL); | ||
1775 | } | ||
1776 | |||
1777 | /** | ||
1778 | * usleep_range - Drop in replacement for udelay where wakeup is flexible | ||
1779 | * @min: Minimum time in usecs to sleep | ||
1780 | * @max: Maximum time in usecs to sleep | ||
1781 | */ | ||
1782 | void usleep_range(unsigned long min, unsigned long max) | ||
1783 | { | ||
1784 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
1785 | do_usleep_range(min, max); | ||
1786 | } | ||
1787 | EXPORT_SYMBOL(usleep_range); | ||