aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/timer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/timer.c')
-rw-r--r--kernel/timer.c214
1 files changed, 161 insertions, 53 deletions
diff --git a/kernel/timer.c b/kernel/timer.c
index c61a7949387f..68a9ae7679b7 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -37,8 +37,9 @@
37#include <linux/delay.h> 37#include <linux/delay.h>
38#include <linux/tick.h> 38#include <linux/tick.h>
39#include <linux/kallsyms.h> 39#include <linux/kallsyms.h>
40#include <linux/perf_event.h> 40#include <linux/irq_work.h>
41#include <linux/sched.h> 41#include <linux/sched.h>
42#include <linux/slab.h>
42 43
43#include <asm/uaccess.h> 44#include <asm/uaccess.h>
44#include <asm/unistd.h> 45#include <asm/unistd.h>
@@ -89,8 +90,13 @@ static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
89 90
90/* 91/*
91 * Note that all tvec_bases are 2 byte aligned and lower bit of 92 * Note that all tvec_bases are 2 byte aligned and lower bit of
92 * base in timer_list is guaranteed to be zero. Use the LSB for 93 * base in timer_list is guaranteed to be zero. Use the LSB to
93 * the new flag to indicate whether the timer is deferrable 94 * indicate whether the timer is deferrable.
95 *
96 * A deferrable timer will work normally when the system is busy, but
97 * will not cause a CPU to come out of idle just to service it; instead,
98 * the timer will be serviced when the CPU eventually wakes up with a
99 * subsequent non-deferrable timer.
94 */ 100 */
95#define TBASE_DEFERRABLE_FLAG (0x1) 101#define TBASE_DEFERRABLE_FLAG (0x1)
96 102
@@ -318,6 +324,25 @@ unsigned long round_jiffies_up_relative(unsigned long j)
318} 324}
319EXPORT_SYMBOL_GPL(round_jiffies_up_relative); 325EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
320 326
327/**
328 * set_timer_slack - set the allowed slack for a timer
329 * @timer: the timer to be modified
330 * @slack_hz: the amount of time (in jiffies) allowed for rounding
331 *
332 * Set the amount of time, in jiffies, that a certain timer has
333 * in terms of slack. By setting this value, the timer subsystem
334 * will schedule the actual timer somewhere between
335 * the time mod_timer() asks for, and that time plus the slack.
336 *
337 * By setting the slack to -1, a percentage of the delay is used
338 * instead.
339 */
340void set_timer_slack(struct timer_list *timer, int slack_hz)
341{
342 timer->slack = slack_hz;
343}
344EXPORT_SYMBOL_GPL(set_timer_slack);
345
321 346
322static inline void set_running_timer(struct tvec_base *base, 347static inline void set_running_timer(struct tvec_base *base,
323 struct timer_list *timer) 348 struct timer_list *timer)
@@ -549,6 +574,7 @@ static void __init_timer(struct timer_list *timer,
549{ 574{
550 timer->entry.next = NULL; 575 timer->entry.next = NULL;
551 timer->base = __raw_get_cpu_var(tvec_bases); 576 timer->base = __raw_get_cpu_var(tvec_bases);
577 timer->slack = -1;
552#ifdef CONFIG_TIMER_STATS 578#ifdef CONFIG_TIMER_STATS
553 timer->start_site = NULL; 579 timer->start_site = NULL;
554 timer->start_pid = -1; 580 timer->start_pid = -1;
@@ -557,6 +583,19 @@ static void __init_timer(struct timer_list *timer,
557 lockdep_init_map(&timer->lockdep_map, name, key, 0); 583 lockdep_init_map(&timer->lockdep_map, name, key, 0);
558} 584}
559 585
586void setup_deferrable_timer_on_stack_key(struct timer_list *timer,
587 const char *name,
588 struct lock_class_key *key,
589 void (*function)(unsigned long),
590 unsigned long data)
591{
592 timer->function = function;
593 timer->data = data;
594 init_timer_on_stack_key(timer, name, key);
595 timer_set_deferrable(timer);
596}
597EXPORT_SYMBOL_GPL(setup_deferrable_timer_on_stack_key);
598
560/** 599/**
561 * init_timer_key - initialize a timer 600 * init_timer_key - initialize a timer
562 * @timer: the timer to be initialized 601 * @timer: the timer to be initialized
@@ -659,12 +698,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
659 cpu = smp_processor_id(); 698 cpu = smp_processor_id();
660 699
661#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) 700#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
662 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) { 701 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))
663 int preferred_cpu = get_nohz_load_balancer(); 702 cpu = get_nohz_timer_target();
664
665 if (preferred_cpu >= 0)
666 cpu = preferred_cpu;
667 }
668#endif 703#endif
669 new_base = per_cpu(tvec_bases, cpu); 704 new_base = per_cpu(tvec_bases, cpu);
670 705
@@ -714,6 +749,46 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires)
714} 749}
715EXPORT_SYMBOL(mod_timer_pending); 750EXPORT_SYMBOL(mod_timer_pending);
716 751
752/*
753 * Decide where to put the timer while taking the slack into account
754 *
755 * Algorithm:
756 * 1) calculate the maximum (absolute) time
757 * 2) calculate the highest bit where the expires and new max are different
758 * 3) use this bit to make a mask
759 * 4) use the bitmask to round down the maximum time, so that all last
760 * bits are zeros
761 */
762static inline
763unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
764{
765 unsigned long expires_limit, mask;
766 int bit;
767
768 expires_limit = expires;
769
770 if (timer->slack >= 0) {
771 expires_limit = expires + timer->slack;
772 } else {
773 unsigned long now = jiffies;
774
775 /* No slack, if already expired else auto slack 0.4% */
776 if (time_after(expires, now))
777 expires_limit = expires + (expires - now)/256;
778 }
779 mask = expires ^ expires_limit;
780 if (mask == 0)
781 return expires;
782
783 bit = find_last_bit(&mask, BITS_PER_LONG);
784
785 mask = (1 << bit) - 1;
786
787 expires_limit = expires_limit & ~(mask);
788
789 return expires_limit;
790}
791
717/** 792/**
718 * mod_timer - modify a timer's timeout 793 * mod_timer - modify a timer's timeout
719 * @timer: the timer to be modified 794 * @timer: the timer to be modified
@@ -744,6 +819,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
744 if (timer_pending(timer) && timer->expires == expires) 819 if (timer_pending(timer) && timer->expires == expires)
745 return 1; 820 return 1;
746 821
822 expires = apply_slack(timer, expires);
823
747 return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); 824 return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
748} 825}
749EXPORT_SYMBOL(mod_timer); 826EXPORT_SYMBOL(mod_timer);
@@ -880,6 +957,7 @@ int try_to_del_timer_sync(struct timer_list *timer)
880 if (base->running_timer == timer) 957 if (base->running_timer == timer)
881 goto out; 958 goto out;
882 959
960 timer_stats_timer_clear_start_info(timer);
883 ret = 0; 961 ret = 0;
884 if (timer_pending(timer)) { 962 if (timer_pending(timer)) {
885 detach_timer(timer, 1); 963 detach_timer(timer, 1);
@@ -953,6 +1031,47 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)
953 return index; 1031 return index;
954} 1032}
955 1033
1034static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
1035 unsigned long data)
1036{
1037 int preempt_count = preempt_count();
1038
1039#ifdef CONFIG_LOCKDEP
1040 /*
1041 * It is permissible to free the timer from inside the
1042 * function that is called from it, this we need to take into
1043 * account for lockdep too. To avoid bogus "held lock freed"
1044 * warnings as well as problems when looking into
1045 * timer->lockdep_map, make a copy and use that here.
1046 */
1047 struct lockdep_map lockdep_map = timer->lockdep_map;
1048#endif
1049 /*
1050 * Couple the lock chain with the lock chain at
1051 * del_timer_sync() by acquiring the lock_map around the fn()
1052 * call here and in del_timer_sync().
1053 */
1054 lock_map_acquire(&lockdep_map);
1055
1056 trace_timer_expire_entry(timer);
1057 fn(data);
1058 trace_timer_expire_exit(timer);
1059
1060 lock_map_release(&lockdep_map);
1061
1062 if (preempt_count != preempt_count()) {
1063 WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
1064 fn, preempt_count, preempt_count());
1065 /*
1066 * Restore the preempt count. That gives us a decent
1067 * chance to survive and extract information. If the
1068 * callback kept a lock held, bad luck, but not worse
1069 * than the BUG() we had.
1070 */
1071 preempt_count() = preempt_count;
1072 }
1073}
1074
956#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) 1075#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
957 1076
958/** 1077/**
@@ -996,45 +1115,7 @@ static inline void __run_timers(struct tvec_base *base)
996 detach_timer(timer, 1); 1115 detach_timer(timer, 1);
997 1116
998 spin_unlock_irq(&base->lock); 1117 spin_unlock_irq(&base->lock);
999 { 1118 call_timer_fn(timer, fn, data);
1000 int preempt_count = preempt_count();
1001
1002#ifdef CONFIG_LOCKDEP
1003 /*
1004 * It is permissible to free the timer from
1005 * inside the function that is called from
1006 * it, this we need to take into account for
1007 * lockdep too. To avoid bogus "held lock
1008 * freed" warnings as well as problems when
1009 * looking into timer->lockdep_map, make a
1010 * copy and use that here.
1011 */
1012 struct lockdep_map lockdep_map =
1013 timer->lockdep_map;
1014#endif
1015 /*
1016 * Couple the lock chain with the lock chain at
1017 * del_timer_sync() by acquiring the lock_map
1018 * around the fn() call here and in
1019 * del_timer_sync().
1020 */
1021 lock_map_acquire(&lockdep_map);
1022
1023 trace_timer_expire_entry(timer);
1024 fn(data);
1025 trace_timer_expire_exit(timer);
1026
1027 lock_map_release(&lockdep_map);
1028
1029 if (preempt_count != preempt_count()) {
1030 printk(KERN_ERR "huh, entered %p "
1031 "with preempt_count %08x, exited"
1032 " with %08x?\n",
1033 fn, preempt_count,
1034 preempt_count());
1035 BUG();
1036 }
1037 }
1038 spin_lock_irq(&base->lock); 1119 spin_lock_irq(&base->lock);
1039 } 1120 }
1040 } 1121 }
@@ -1198,7 +1279,10 @@ void update_process_times(int user_tick)
1198 run_local_timers(); 1279 run_local_timers();
1199 rcu_check_callbacks(cpu, user_tick); 1280 rcu_check_callbacks(cpu, user_tick);
1200 printk_tick(); 1281 printk_tick();
1201 perf_event_do_pending(); 1282#ifdef CONFIG_IRQ_WORK
1283 if (in_irq())
1284 irq_work_run();
1285#endif
1202 scheduler_tick(); 1286 scheduler_tick();
1203 run_posix_cpu_timers(p); 1287 run_posix_cpu_timers(p);
1204} 1288}
@@ -1223,7 +1307,6 @@ void run_local_timers(void)
1223{ 1307{
1224 hrtimer_run_queues(); 1308 hrtimer_run_queues();
1225 raise_softirq(TIMER_SOFTIRQ); 1309 raise_softirq(TIMER_SOFTIRQ);
1226 softlockup_tick();
1227} 1310}
1228 1311
1229/* 1312/*
@@ -1618,11 +1701,14 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self,
1618 unsigned long action, void *hcpu) 1701 unsigned long action, void *hcpu)
1619{ 1702{
1620 long cpu = (long)hcpu; 1703 long cpu = (long)hcpu;
1704 int err;
1705
1621 switch(action) { 1706 switch(action) {
1622 case CPU_UP_PREPARE: 1707 case CPU_UP_PREPARE:
1623 case CPU_UP_PREPARE_FROZEN: 1708 case CPU_UP_PREPARE_FROZEN:
1624 if (init_timers_cpu(cpu) < 0) 1709 err = init_timers_cpu(cpu);
1625 return NOTIFY_BAD; 1710 if (err < 0)
1711 return notifier_from_errno(err);
1626 break; 1712 break;
1627#ifdef CONFIG_HOTPLUG_CPU 1713#ifdef CONFIG_HOTPLUG_CPU
1628 case CPU_DEAD: 1714 case CPU_DEAD:
@@ -1648,7 +1734,7 @@ void __init init_timers(void)
1648 1734
1649 init_timer_stats(); 1735 init_timer_stats();
1650 1736
1651 BUG_ON(err == NOTIFY_BAD); 1737 BUG_ON(err != NOTIFY_OK);
1652 register_cpu_notifier(&timers_nb); 1738 register_cpu_notifier(&timers_nb);
1653 open_softirq(TIMER_SOFTIRQ, run_timer_softirq); 1739 open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
1654} 1740}
@@ -1681,3 +1767,25 @@ unsigned long msleep_interruptible(unsigned int msecs)
1681} 1767}
1682 1768
1683EXPORT_SYMBOL(msleep_interruptible); 1769EXPORT_SYMBOL(msleep_interruptible);
1770
1771static int __sched do_usleep_range(unsigned long min, unsigned long max)
1772{
1773 ktime_t kmin;
1774 unsigned long delta;
1775
1776 kmin = ktime_set(0, min * NSEC_PER_USEC);
1777 delta = (max - min) * NSEC_PER_USEC;
1778 return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
1779}
1780
1781/**
1782 * usleep_range - Drop in replacement for udelay where wakeup is flexible
1783 * @min: Minimum time in usecs to sleep
1784 * @max: Maximum time in usecs to sleep
1785 */
1786void usleep_range(unsigned long min, unsigned long max)
1787{
1788 __set_current_state(TASK_UNINTERRUPTIBLE);
1789 do_usleep_range(min, max);
1790}
1791EXPORT_SYMBOL(usleep_range);