aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/timer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/timer.c')
-rw-r--r--kernel/timer.c211
1 files changed, 156 insertions, 55 deletions
diff --git a/kernel/timer.c b/kernel/timer.c
index 5db5a8d26811..f1b8afe1ad86 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -39,6 +39,7 @@
39#include <linux/kallsyms.h> 39#include <linux/kallsyms.h>
40#include <linux/perf_event.h> 40#include <linux/perf_event.h>
41#include <linux/sched.h> 41#include <linux/sched.h>
42#include <linux/slab.h>
42 43
43#include <asm/uaccess.h> 44#include <asm/uaccess.h>
44#include <asm/unistd.h> 45#include <asm/unistd.h>
@@ -89,8 +90,13 @@ static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
89 90
90/* 91/*
91 * Note that all tvec_bases are 2 byte aligned and lower bit of 92 * Note that all tvec_bases are 2 byte aligned and lower bit of
92 * base in timer_list is guaranteed to be zero. Use the LSB for 93 * base in timer_list is guaranteed to be zero. Use the LSB to
93 * the new flag to indicate whether the timer is deferrable 94 * indicate whether the timer is deferrable.
95 *
96 * A deferrable timer will work normally when the system is busy, but
97 * will not cause a CPU to come out of idle just to service it; instead,
98 * the timer will be serviced when the CPU eventually wakes up with a
99 * subsequent non-deferrable timer.
94 */ 100 */
95#define TBASE_DEFERRABLE_FLAG (0x1) 101#define TBASE_DEFERRABLE_FLAG (0x1)
96 102
@@ -318,6 +324,24 @@ unsigned long round_jiffies_up_relative(unsigned long j)
318} 324}
319EXPORT_SYMBOL_GPL(round_jiffies_up_relative); 325EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
320 326
327/**
328 * set_timer_slack - set the allowed slack for a timer
329 * @slack_hz: the amount of time (in jiffies) allowed for rounding
330 *
331 * Set the amount of time, in jiffies, that a certain timer has
332 * in terms of slack. By setting this value, the timer subsystem
333 * will schedule the actual timer somewhere between
334 * the time mod_timer() asks for, and that time plus the slack.
335 *
336 * By setting the slack to -1, a percentage of the delay is used
337 * instead.
338 */
339void set_timer_slack(struct timer_list *timer, int slack_hz)
340{
341 timer->slack = slack_hz;
342}
343EXPORT_SYMBOL_GPL(set_timer_slack);
344
321 345
322static inline void set_running_timer(struct tvec_base *base, 346static inline void set_running_timer(struct tvec_base *base,
323 struct timer_list *timer) 347 struct timer_list *timer)
@@ -549,6 +573,7 @@ static void __init_timer(struct timer_list *timer,
549{ 573{
550 timer->entry.next = NULL; 574 timer->entry.next = NULL;
551 timer->base = __raw_get_cpu_var(tvec_bases); 575 timer->base = __raw_get_cpu_var(tvec_bases);
576 timer->slack = -1;
552#ifdef CONFIG_TIMER_STATS 577#ifdef CONFIG_TIMER_STATS
553 timer->start_site = NULL; 578 timer->start_site = NULL;
554 timer->start_pid = -1; 579 timer->start_pid = -1;
@@ -557,6 +582,19 @@ static void __init_timer(struct timer_list *timer,
557 lockdep_init_map(&timer->lockdep_map, name, key, 0); 582 lockdep_init_map(&timer->lockdep_map, name, key, 0);
558} 583}
559 584
585void setup_deferrable_timer_on_stack_key(struct timer_list *timer,
586 const char *name,
587 struct lock_class_key *key,
588 void (*function)(unsigned long),
589 unsigned long data)
590{
591 timer->function = function;
592 timer->data = data;
593 init_timer_on_stack_key(timer, name, key);
594 timer_set_deferrable(timer);
595}
596EXPORT_SYMBOL_GPL(setup_deferrable_timer_on_stack_key);
597
560/** 598/**
561 * init_timer_key - initialize a timer 599 * init_timer_key - initialize a timer
562 * @timer: the timer to be initialized 600 * @timer: the timer to be initialized
@@ -656,17 +694,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
656 694
657 debug_activate(timer, expires); 695 debug_activate(timer, expires);
658 696
659 new_base = __get_cpu_var(tvec_bases);
660
661 cpu = smp_processor_id(); 697 cpu = smp_processor_id();
662 698
663#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) 699#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
664 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) { 700 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))
665 int preferred_cpu = get_nohz_load_balancer(); 701 cpu = get_nohz_timer_target();
666
667 if (preferred_cpu >= 0)
668 cpu = preferred_cpu;
669 }
670#endif 702#endif
671 new_base = per_cpu(tvec_bases, cpu); 703 new_base = per_cpu(tvec_bases, cpu);
672 704
@@ -716,6 +748,46 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires)
716} 748}
717EXPORT_SYMBOL(mod_timer_pending); 749EXPORT_SYMBOL(mod_timer_pending);
718 750
751/*
752 * Decide where to put the timer while taking the slack into account
753 *
754 * Algorithm:
755 * 1) calculate the maximum (absolute) time
756 * 2) calculate the highest bit where the expires and new max are different
757 * 3) use this bit to make a mask
758 * 4) use the bitmask to round down the maximum time, so that all last
759 * bits are zeros
760 */
761static inline
762unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
763{
764 unsigned long expires_limit, mask;
765 int bit;
766
767 expires_limit = expires;
768
769 if (timer->slack >= 0) {
770 expires_limit = expires + timer->slack;
771 } else {
772 unsigned long now = jiffies;
773
774 /* No slack, if already expired else auto slack 0.4% */
775 if (time_after(expires, now))
776 expires_limit = expires + (expires - now)/256;
777 }
778 mask = expires ^ expires_limit;
779 if (mask == 0)
780 return expires;
781
782 bit = find_last_bit(&mask, BITS_PER_LONG);
783
784 mask = (1 << bit) - 1;
785
786 expires_limit = expires_limit & ~(mask);
787
788 return expires_limit;
789}
790
719/** 791/**
720 * mod_timer - modify a timer's timeout 792 * mod_timer - modify a timer's timeout
721 * @timer: the timer to be modified 793 * @timer: the timer to be modified
@@ -746,6 +818,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
746 if (timer_pending(timer) && timer->expires == expires) 818 if (timer_pending(timer) && timer->expires == expires)
747 return 1; 819 return 1;
748 820
821 expires = apply_slack(timer, expires);
822
749 return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); 823 return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
750} 824}
751EXPORT_SYMBOL(mod_timer); 825EXPORT_SYMBOL(mod_timer);
@@ -882,6 +956,7 @@ int try_to_del_timer_sync(struct timer_list *timer)
882 if (base->running_timer == timer) 956 if (base->running_timer == timer)
883 goto out; 957 goto out;
884 958
959 timer_stats_timer_clear_start_info(timer);
885 ret = 0; 960 ret = 0;
886 if (timer_pending(timer)) { 961 if (timer_pending(timer)) {
887 detach_timer(timer, 1); 962 detach_timer(timer, 1);
@@ -955,6 +1030,47 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)
955 return index; 1030 return index;
956} 1031}
957 1032
1033static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
1034 unsigned long data)
1035{
1036 int preempt_count = preempt_count();
1037
1038#ifdef CONFIG_LOCKDEP
1039 /*
1040 * It is permissible to free the timer from inside the
1041 * function that is called from it, this we need to take into
1042 * account for lockdep too. To avoid bogus "held lock freed"
1043 * warnings as well as problems when looking into
1044 * timer->lockdep_map, make a copy and use that here.
1045 */
1046 struct lockdep_map lockdep_map = timer->lockdep_map;
1047#endif
1048 /*
1049 * Couple the lock chain with the lock chain at
1050 * del_timer_sync() by acquiring the lock_map around the fn()
1051 * call here and in del_timer_sync().
1052 */
1053 lock_map_acquire(&lockdep_map);
1054
1055 trace_timer_expire_entry(timer);
1056 fn(data);
1057 trace_timer_expire_exit(timer);
1058
1059 lock_map_release(&lockdep_map);
1060
1061 if (preempt_count != preempt_count()) {
1062 WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
1063 fn, preempt_count, preempt_count());
1064 /*
1065 * Restore the preempt count. That gives us a decent
1066 * chance to survive and extract information. If the
1067 * callback kept a lock held, bad luck, but not worse
1068 * than the BUG() we had.
1069 */
1070 preempt_count() = preempt_count;
1071 }
1072}
1073
958#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) 1074#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
959 1075
960/** 1076/**
@@ -998,45 +1114,7 @@ static inline void __run_timers(struct tvec_base *base)
998 detach_timer(timer, 1); 1114 detach_timer(timer, 1);
999 1115
1000 spin_unlock_irq(&base->lock); 1116 spin_unlock_irq(&base->lock);
1001 { 1117 call_timer_fn(timer, fn, data);
1002 int preempt_count = preempt_count();
1003
1004#ifdef CONFIG_LOCKDEP
1005 /*
1006 * It is permissible to free the timer from
1007 * inside the function that is called from
1008 * it, this we need to take into account for
1009 * lockdep too. To avoid bogus "held lock
1010 * freed" warnings as well as problems when
1011 * looking into timer->lockdep_map, make a
1012 * copy and use that here.
1013 */
1014 struct lockdep_map lockdep_map =
1015 timer->lockdep_map;
1016#endif
1017 /*
1018 * Couple the lock chain with the lock chain at
1019 * del_timer_sync() by acquiring the lock_map
1020 * around the fn() call here and in
1021 * del_timer_sync().
1022 */
1023 lock_map_acquire(&lockdep_map);
1024
1025 trace_timer_expire_entry(timer);
1026 fn(data);
1027 trace_timer_expire_exit(timer);
1028
1029 lock_map_release(&lockdep_map);
1030
1031 if (preempt_count != preempt_count()) {
1032 printk(KERN_ERR "huh, entered %p "
1033 "with preempt_count %08x, exited"
1034 " with %08x?\n",
1035 fn, preempt_count,
1036 preempt_count());
1037 BUG();
1038 }
1039 }
1040 spin_lock_irq(&base->lock); 1118 spin_lock_irq(&base->lock);
1041 } 1119 }
1042 } 1120 }
@@ -1200,6 +1278,7 @@ void update_process_times(int user_tick)
1200 run_local_timers(); 1278 run_local_timers();
1201 rcu_check_callbacks(cpu, user_tick); 1279 rcu_check_callbacks(cpu, user_tick);
1202 printk_tick(); 1280 printk_tick();
1281 perf_event_do_pending();
1203 scheduler_tick(); 1282 scheduler_tick();
1204 run_posix_cpu_timers(p); 1283 run_posix_cpu_timers(p);
1205} 1284}
@@ -1211,8 +1290,6 @@ static void run_timer_softirq(struct softirq_action *h)
1211{ 1290{
1212 struct tvec_base *base = __get_cpu_var(tvec_bases); 1291 struct tvec_base *base = __get_cpu_var(tvec_bases);
1213 1292
1214 perf_event_do_pending();
1215
1216 hrtimer_run_pending(); 1293 hrtimer_run_pending();
1217 1294
1218 if (time_after_eq(jiffies, base->timer_jiffies)) 1295 if (time_after_eq(jiffies, base->timer_jiffies))
@@ -1226,7 +1303,6 @@ void run_local_timers(void)
1226{ 1303{
1227 hrtimer_run_queues(); 1304 hrtimer_run_queues();
1228 raise_softirq(TIMER_SOFTIRQ); 1305 raise_softirq(TIMER_SOFTIRQ);
1229 softlockup_tick();
1230} 1306}
1231 1307
1232/* 1308/*
@@ -1621,11 +1697,14 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self,
1621 unsigned long action, void *hcpu) 1697 unsigned long action, void *hcpu)
1622{ 1698{
1623 long cpu = (long)hcpu; 1699 long cpu = (long)hcpu;
1700 int err;
1701
1624 switch(action) { 1702 switch(action) {
1625 case CPU_UP_PREPARE: 1703 case CPU_UP_PREPARE:
1626 case CPU_UP_PREPARE_FROZEN: 1704 case CPU_UP_PREPARE_FROZEN:
1627 if (init_timers_cpu(cpu) < 0) 1705 err = init_timers_cpu(cpu);
1628 return NOTIFY_BAD; 1706 if (err < 0)
1707 return notifier_from_errno(err);
1629 break; 1708 break;
1630#ifdef CONFIG_HOTPLUG_CPU 1709#ifdef CONFIG_HOTPLUG_CPU
1631 case CPU_DEAD: 1710 case CPU_DEAD:
@@ -1651,7 +1730,7 @@ void __init init_timers(void)
1651 1730
1652 init_timer_stats(); 1731 init_timer_stats();
1653 1732
1654 BUG_ON(err == NOTIFY_BAD); 1733 BUG_ON(err != NOTIFY_OK);
1655 register_cpu_notifier(&timers_nb); 1734 register_cpu_notifier(&timers_nb);
1656 open_softirq(TIMER_SOFTIRQ, run_timer_softirq); 1735 open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
1657} 1736}
@@ -1684,3 +1763,25 @@ unsigned long msleep_interruptible(unsigned int msecs)
1684} 1763}
1685 1764
1686EXPORT_SYMBOL(msleep_interruptible); 1765EXPORT_SYMBOL(msleep_interruptible);
1766
1767static int __sched do_usleep_range(unsigned long min, unsigned long max)
1768{
1769 ktime_t kmin;
1770 unsigned long delta;
1771
1772 kmin = ktime_set(0, min * NSEC_PER_USEC);
1773 delta = (max - min) * NSEC_PER_USEC;
1774 return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
1775}
1776
1777/**
1778 * usleep_range - Drop in replacement for udelay where wakeup is flexible
1779 * @min: Minimum time in usecs to sleep
1780 * @max: Maximum time in usecs to sleep
1781 */
1782void usleep_range(unsigned long min, unsigned long max)
1783{
1784 __set_current_state(TASK_UNINTERRUPTIBLE);
1785 do_usleep_range(min, max);
1786}
1787EXPORT_SYMBOL(usleep_range);