diff options
Diffstat (limited to 'kernel/timer.c')
| -rw-r--r-- | kernel/timer.c | 214 |
1 files changed, 161 insertions, 53 deletions
diff --git a/kernel/timer.c b/kernel/timer.c index c61a7949387f..68a9ae7679b7 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -37,8 +37,9 @@ | |||
| 37 | #include <linux/delay.h> | 37 | #include <linux/delay.h> |
| 38 | #include <linux/tick.h> | 38 | #include <linux/tick.h> |
| 39 | #include <linux/kallsyms.h> | 39 | #include <linux/kallsyms.h> |
| 40 | #include <linux/perf_event.h> | 40 | #include <linux/irq_work.h> |
| 41 | #include <linux/sched.h> | 41 | #include <linux/sched.h> |
| 42 | #include <linux/slab.h> | ||
| 42 | 43 | ||
| 43 | #include <asm/uaccess.h> | 44 | #include <asm/uaccess.h> |
| 44 | #include <asm/unistd.h> | 45 | #include <asm/unistd.h> |
| @@ -89,8 +90,13 @@ static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; | |||
| 89 | 90 | ||
| 90 | /* | 91 | /* |
| 91 | * Note that all tvec_bases are 2 byte aligned and lower bit of | 92 | * Note that all tvec_bases are 2 byte aligned and lower bit of |
| 92 | * base in timer_list is guaranteed to be zero. Use the LSB for | 93 | * base in timer_list is guaranteed to be zero. Use the LSB to |
| 93 | * the new flag to indicate whether the timer is deferrable | 94 | * indicate whether the timer is deferrable. |
| 95 | * | ||
| 96 | * A deferrable timer will work normally when the system is busy, but | ||
| 97 | * will not cause a CPU to come out of idle just to service it; instead, | ||
| 98 | * the timer will be serviced when the CPU eventually wakes up with a | ||
| 99 | * subsequent non-deferrable timer. | ||
| 94 | */ | 100 | */ |
| 95 | #define TBASE_DEFERRABLE_FLAG (0x1) | 101 | #define TBASE_DEFERRABLE_FLAG (0x1) |
| 96 | 102 | ||
| @@ -318,6 +324,25 @@ unsigned long round_jiffies_up_relative(unsigned long j) | |||
| 318 | } | 324 | } |
| 319 | EXPORT_SYMBOL_GPL(round_jiffies_up_relative); | 325 | EXPORT_SYMBOL_GPL(round_jiffies_up_relative); |
| 320 | 326 | ||
| 327 | /** | ||
| 328 | * set_timer_slack - set the allowed slack for a timer | ||
| 329 | * @timer: the timer to be modified | ||
| 330 | * @slack_hz: the amount of time (in jiffies) allowed for rounding | ||
| 331 | * | ||
| 332 | * Set the amount of time, in jiffies, that a certain timer has | ||
| 333 | * in terms of slack. By setting this value, the timer subsystem | ||
| 334 | * will schedule the actual timer somewhere between | ||
| 335 | * the time mod_timer() asks for, and that time plus the slack. | ||
| 336 | * | ||
| 337 | * By setting the slack to -1, a percentage of the delay is used | ||
| 338 | * instead. | ||
| 339 | */ | ||
| 340 | void set_timer_slack(struct timer_list *timer, int slack_hz) | ||
| 341 | { | ||
| 342 | timer->slack = slack_hz; | ||
| 343 | } | ||
| 344 | EXPORT_SYMBOL_GPL(set_timer_slack); | ||
| 345 | |||
| 321 | 346 | ||
| 322 | static inline void set_running_timer(struct tvec_base *base, | 347 | static inline void set_running_timer(struct tvec_base *base, |
| 323 | struct timer_list *timer) | 348 | struct timer_list *timer) |
| @@ -549,6 +574,7 @@ static void __init_timer(struct timer_list *timer, | |||
| 549 | { | 574 | { |
| 550 | timer->entry.next = NULL; | 575 | timer->entry.next = NULL; |
| 551 | timer->base = __raw_get_cpu_var(tvec_bases); | 576 | timer->base = __raw_get_cpu_var(tvec_bases); |
| 577 | timer->slack = -1; | ||
| 552 | #ifdef CONFIG_TIMER_STATS | 578 | #ifdef CONFIG_TIMER_STATS |
| 553 | timer->start_site = NULL; | 579 | timer->start_site = NULL; |
| 554 | timer->start_pid = -1; | 580 | timer->start_pid = -1; |
| @@ -557,6 +583,19 @@ static void __init_timer(struct timer_list *timer, | |||
| 557 | lockdep_init_map(&timer->lockdep_map, name, key, 0); | 583 | lockdep_init_map(&timer->lockdep_map, name, key, 0); |
| 558 | } | 584 | } |
| 559 | 585 | ||
| 586 | void setup_deferrable_timer_on_stack_key(struct timer_list *timer, | ||
| 587 | const char *name, | ||
| 588 | struct lock_class_key *key, | ||
| 589 | void (*function)(unsigned long), | ||
| 590 | unsigned long data) | ||
| 591 | { | ||
| 592 | timer->function = function; | ||
| 593 | timer->data = data; | ||
| 594 | init_timer_on_stack_key(timer, name, key); | ||
| 595 | timer_set_deferrable(timer); | ||
| 596 | } | ||
| 597 | EXPORT_SYMBOL_GPL(setup_deferrable_timer_on_stack_key); | ||
| 598 | |||
| 560 | /** | 599 | /** |
| 561 | * init_timer_key - initialize a timer | 600 | * init_timer_key - initialize a timer |
| 562 | * @timer: the timer to be initialized | 601 | * @timer: the timer to be initialized |
| @@ -659,12 +698,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, | |||
| 659 | cpu = smp_processor_id(); | 698 | cpu = smp_processor_id(); |
| 660 | 699 | ||
| 661 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) | 700 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) |
| 662 | if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) { | 701 | if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) |
| 663 | int preferred_cpu = get_nohz_load_balancer(); | 702 | cpu = get_nohz_timer_target(); |
| 664 | |||
| 665 | if (preferred_cpu >= 0) | ||
| 666 | cpu = preferred_cpu; | ||
| 667 | } | ||
| 668 | #endif | 703 | #endif |
| 669 | new_base = per_cpu(tvec_bases, cpu); | 704 | new_base = per_cpu(tvec_bases, cpu); |
| 670 | 705 | ||
| @@ -714,6 +749,46 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires) | |||
| 714 | } | 749 | } |
| 715 | EXPORT_SYMBOL(mod_timer_pending); | 750 | EXPORT_SYMBOL(mod_timer_pending); |
| 716 | 751 | ||
| 752 | /* | ||
| 753 | * Decide where to put the timer while taking the slack into account | ||
| 754 | * | ||
| 755 | * Algorithm: | ||
| 756 | * 1) calculate the maximum (absolute) time | ||
| 757 | * 2) calculate the highest bit where the expires and new max are different | ||
| 758 | * 3) use this bit to make a mask | ||
| 759 | * 4) use the bitmask to round down the maximum time, so that all last | ||
| 760 | * bits are zeros | ||
| 761 | */ | ||
| 762 | static inline | ||
| 763 | unsigned long apply_slack(struct timer_list *timer, unsigned long expires) | ||
| 764 | { | ||
| 765 | unsigned long expires_limit, mask; | ||
| 766 | int bit; | ||
| 767 | |||
| 768 | expires_limit = expires; | ||
| 769 | |||
| 770 | if (timer->slack >= 0) { | ||
| 771 | expires_limit = expires + timer->slack; | ||
| 772 | } else { | ||
| 773 | unsigned long now = jiffies; | ||
| 774 | |||
| 775 | /* No slack, if already expired else auto slack 0.4% */ | ||
| 776 | if (time_after(expires, now)) | ||
| 777 | expires_limit = expires + (expires - now)/256; | ||
| 778 | } | ||
| 779 | mask = expires ^ expires_limit; | ||
| 780 | if (mask == 0) | ||
| 781 | return expires; | ||
| 782 | |||
| 783 | bit = find_last_bit(&mask, BITS_PER_LONG); | ||
| 784 | |||
| 785 | mask = (1 << bit) - 1; | ||
| 786 | |||
| 787 | expires_limit = expires_limit & ~(mask); | ||
| 788 | |||
| 789 | return expires_limit; | ||
| 790 | } | ||
| 791 | |||
| 717 | /** | 792 | /** |
| 718 | * mod_timer - modify a timer's timeout | 793 | * mod_timer - modify a timer's timeout |
| 719 | * @timer: the timer to be modified | 794 | * @timer: the timer to be modified |
| @@ -744,6 +819,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires) | |||
| 744 | if (timer_pending(timer) && timer->expires == expires) | 819 | if (timer_pending(timer) && timer->expires == expires) |
| 745 | return 1; | 820 | return 1; |
| 746 | 821 | ||
| 822 | expires = apply_slack(timer, expires); | ||
| 823 | |||
| 747 | return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); | 824 | return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); |
| 748 | } | 825 | } |
| 749 | EXPORT_SYMBOL(mod_timer); | 826 | EXPORT_SYMBOL(mod_timer); |
| @@ -880,6 +957,7 @@ int try_to_del_timer_sync(struct timer_list *timer) | |||
| 880 | if (base->running_timer == timer) | 957 | if (base->running_timer == timer) |
| 881 | goto out; | 958 | goto out; |
| 882 | 959 | ||
| 960 | timer_stats_timer_clear_start_info(timer); | ||
| 883 | ret = 0; | 961 | ret = 0; |
| 884 | if (timer_pending(timer)) { | 962 | if (timer_pending(timer)) { |
| 885 | detach_timer(timer, 1); | 963 | detach_timer(timer, 1); |
| @@ -953,6 +1031,47 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index) | |||
| 953 | return index; | 1031 | return index; |
| 954 | } | 1032 | } |
| 955 | 1033 | ||
| 1034 | static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), | ||
| 1035 | unsigned long data) | ||
| 1036 | { | ||
| 1037 | int preempt_count = preempt_count(); | ||
| 1038 | |||
| 1039 | #ifdef CONFIG_LOCKDEP | ||
| 1040 | /* | ||
| 1041 | * It is permissible to free the timer from inside the | ||
| 1042 | * function that is called from it, this we need to take into | ||
| 1043 | * account for lockdep too. To avoid bogus "held lock freed" | ||
| 1044 | * warnings as well as problems when looking into | ||
| 1045 | * timer->lockdep_map, make a copy and use that here. | ||
| 1046 | */ | ||
| 1047 | struct lockdep_map lockdep_map = timer->lockdep_map; | ||
| 1048 | #endif | ||
| 1049 | /* | ||
| 1050 | * Couple the lock chain with the lock chain at | ||
| 1051 | * del_timer_sync() by acquiring the lock_map around the fn() | ||
| 1052 | * call here and in del_timer_sync(). | ||
| 1053 | */ | ||
| 1054 | lock_map_acquire(&lockdep_map); | ||
| 1055 | |||
| 1056 | trace_timer_expire_entry(timer); | ||
| 1057 | fn(data); | ||
| 1058 | trace_timer_expire_exit(timer); | ||
| 1059 | |||
| 1060 | lock_map_release(&lockdep_map); | ||
| 1061 | |||
| 1062 | if (preempt_count != preempt_count()) { | ||
| 1063 | WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n", | ||
| 1064 | fn, preempt_count, preempt_count()); | ||
| 1065 | /* | ||
| 1066 | * Restore the preempt count. That gives us a decent | ||
| 1067 | * chance to survive and extract information. If the | ||
| 1068 | * callback kept a lock held, bad luck, but not worse | ||
| 1069 | * than the BUG() we had. | ||
| 1070 | */ | ||
| 1071 | preempt_count() = preempt_count; | ||
| 1072 | } | ||
| 1073 | } | ||
| 1074 | |||
| 956 | #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) | 1075 | #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) |
| 957 | 1076 | ||
| 958 | /** | 1077 | /** |
| @@ -996,45 +1115,7 @@ static inline void __run_timers(struct tvec_base *base) | |||
| 996 | detach_timer(timer, 1); | 1115 | detach_timer(timer, 1); |
| 997 | 1116 | ||
| 998 | spin_unlock_irq(&base->lock); | 1117 | spin_unlock_irq(&base->lock); |
| 999 | { | 1118 | call_timer_fn(timer, fn, data); |
| 1000 | int preempt_count = preempt_count(); | ||
| 1001 | |||
| 1002 | #ifdef CONFIG_LOCKDEP | ||
| 1003 | /* | ||
| 1004 | * It is permissible to free the timer from | ||
| 1005 | * inside the function that is called from | ||
| 1006 | * it, this we need to take into account for | ||
| 1007 | * lockdep too. To avoid bogus "held lock | ||
| 1008 | * freed" warnings as well as problems when | ||
| 1009 | * looking into timer->lockdep_map, make a | ||
| 1010 | * copy and use that here. | ||
| 1011 | */ | ||
| 1012 | struct lockdep_map lockdep_map = | ||
| 1013 | timer->lockdep_map; | ||
| 1014 | #endif | ||
| 1015 | /* | ||
| 1016 | * Couple the lock chain with the lock chain at | ||
| 1017 | * del_timer_sync() by acquiring the lock_map | ||
| 1018 | * around the fn() call here and in | ||
| 1019 | * del_timer_sync(). | ||
| 1020 | */ | ||
| 1021 | lock_map_acquire(&lockdep_map); | ||
| 1022 | |||
| 1023 | trace_timer_expire_entry(timer); | ||
| 1024 | fn(data); | ||
| 1025 | trace_timer_expire_exit(timer); | ||
| 1026 | |||
| 1027 | lock_map_release(&lockdep_map); | ||
| 1028 | |||
| 1029 | if (preempt_count != preempt_count()) { | ||
| 1030 | printk(KERN_ERR "huh, entered %p " | ||
| 1031 | "with preempt_count %08x, exited" | ||
| 1032 | " with %08x?\n", | ||
| 1033 | fn, preempt_count, | ||
| 1034 | preempt_count()); | ||
| 1035 | BUG(); | ||
| 1036 | } | ||
| 1037 | } | ||
| 1038 | spin_lock_irq(&base->lock); | 1119 | spin_lock_irq(&base->lock); |
| 1039 | } | 1120 | } |
| 1040 | } | 1121 | } |
| @@ -1198,7 +1279,10 @@ void update_process_times(int user_tick) | |||
| 1198 | run_local_timers(); | 1279 | run_local_timers(); |
| 1199 | rcu_check_callbacks(cpu, user_tick); | 1280 | rcu_check_callbacks(cpu, user_tick); |
| 1200 | printk_tick(); | 1281 | printk_tick(); |
| 1201 | perf_event_do_pending(); | 1282 | #ifdef CONFIG_IRQ_WORK |
| 1283 | if (in_irq()) | ||
| 1284 | irq_work_run(); | ||
| 1285 | #endif | ||
| 1202 | scheduler_tick(); | 1286 | scheduler_tick(); |
| 1203 | run_posix_cpu_timers(p); | 1287 | run_posix_cpu_timers(p); |
| 1204 | } | 1288 | } |
| @@ -1223,7 +1307,6 @@ void run_local_timers(void) | |||
| 1223 | { | 1307 | { |
| 1224 | hrtimer_run_queues(); | 1308 | hrtimer_run_queues(); |
| 1225 | raise_softirq(TIMER_SOFTIRQ); | 1309 | raise_softirq(TIMER_SOFTIRQ); |
| 1226 | softlockup_tick(); | ||
| 1227 | } | 1310 | } |
| 1228 | 1311 | ||
| 1229 | /* | 1312 | /* |
| @@ -1618,11 +1701,14 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self, | |||
| 1618 | unsigned long action, void *hcpu) | 1701 | unsigned long action, void *hcpu) |
| 1619 | { | 1702 | { |
| 1620 | long cpu = (long)hcpu; | 1703 | long cpu = (long)hcpu; |
| 1704 | int err; | ||
| 1705 | |||
| 1621 | switch(action) { | 1706 | switch(action) { |
| 1622 | case CPU_UP_PREPARE: | 1707 | case CPU_UP_PREPARE: |
| 1623 | case CPU_UP_PREPARE_FROZEN: | 1708 | case CPU_UP_PREPARE_FROZEN: |
| 1624 | if (init_timers_cpu(cpu) < 0) | 1709 | err = init_timers_cpu(cpu); |
| 1625 | return NOTIFY_BAD; | 1710 | if (err < 0) |
| 1711 | return notifier_from_errno(err); | ||
| 1626 | break; | 1712 | break; |
| 1627 | #ifdef CONFIG_HOTPLUG_CPU | 1713 | #ifdef CONFIG_HOTPLUG_CPU |
| 1628 | case CPU_DEAD: | 1714 | case CPU_DEAD: |
| @@ -1648,7 +1734,7 @@ void __init init_timers(void) | |||
| 1648 | 1734 | ||
| 1649 | init_timer_stats(); | 1735 | init_timer_stats(); |
| 1650 | 1736 | ||
| 1651 | BUG_ON(err == NOTIFY_BAD); | 1737 | BUG_ON(err != NOTIFY_OK); |
| 1652 | register_cpu_notifier(&timers_nb); | 1738 | register_cpu_notifier(&timers_nb); |
| 1653 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq); | 1739 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq); |
| 1654 | } | 1740 | } |
| @@ -1681,3 +1767,25 @@ unsigned long msleep_interruptible(unsigned int msecs) | |||
| 1681 | } | 1767 | } |
| 1682 | 1768 | ||
| 1683 | EXPORT_SYMBOL(msleep_interruptible); | 1769 | EXPORT_SYMBOL(msleep_interruptible); |
| 1770 | |||
| 1771 | static int __sched do_usleep_range(unsigned long min, unsigned long max) | ||
| 1772 | { | ||
| 1773 | ktime_t kmin; | ||
| 1774 | unsigned long delta; | ||
| 1775 | |||
| 1776 | kmin = ktime_set(0, min * NSEC_PER_USEC); | ||
| 1777 | delta = (max - min) * NSEC_PER_USEC; | ||
| 1778 | return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL); | ||
| 1779 | } | ||
| 1780 | |||
| 1781 | /** | ||
| 1782 | * usleep_range - Drop in replacement for udelay where wakeup is flexible | ||
| 1783 | * @min: Minimum time in usecs to sleep | ||
| 1784 | * @max: Maximum time in usecs to sleep | ||
| 1785 | */ | ||
| 1786 | void usleep_range(unsigned long min, unsigned long max) | ||
| 1787 | { | ||
| 1788 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
| 1789 | do_usleep_range(min, max); | ||
| 1790 | } | ||
| 1791 | EXPORT_SYMBOL(usleep_range); | ||
