From 06f71b922ce5a05352acd706564ca4ae1f2add0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 11 Mar 2010 14:04:46 -0800 Subject: timer: Print function name for timer callbacks modifying preemption count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A function scheduled with a timer must not exit with a different preempt count than it was entered. To make helping users running into the corresponding BUG() easier also print the name of the bad function not only its address. [ tglx: Sanitized printk ] Signed-off-by: Uwe Kleine-König Cc: johnstul@us.ibm.com Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- kernel/timer.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index c61a7949387f..f82f4bfe2d88 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1027,11 +1027,8 @@ static inline void __run_timers(struct tvec_base *base) lock_map_release(&lockdep_map); if (preempt_count != preempt_count()) { - printk(KERN_ERR "huh, entered %p " - "with preempt_count %08x, exited" - " with %08x?\n", - fn, preempt_count, - preempt_count()); + printk(KERN_ERR "timer: %pF preempt leak: %08x -> %08x\n", + fn, preempt_count, preempt_count()); BUG(); } } -- cgit v1.2.2 From 576da126a6c7364d70dfd58d0bbe43d05cf5859f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 Mar 2010 21:10:29 +0100 Subject: timer: Split out timer function call The ident level is starting to be annoying. More white space than actual code. Split out the timer function call into its own function. Signed-off-by: Thomas Gleixner --- kernel/timer.c | 72 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index f82f4bfe2d88..45229694dc6a 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -953,6 +953,41 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index) return index; } +static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), + unsigned long data) +{ + int preempt_count = preempt_count(); + +#ifdef CONFIG_LOCKDEP + /* + * It is permissible to free the timer from inside the + * function that is called from it, this we need to take into + * account for lockdep too. To avoid bogus "held lock freed" + * warnings as well as problems when looking into + * timer->lockdep_map, make a copy and use that here. + */ + struct lockdep_map lockdep_map = timer->lockdep_map; +#endif + /* + * Couple the lock chain with the lock chain at + * del_timer_sync() by acquiring the lock_map around the fn() + * call here and in del_timer_sync(). + */ + lock_map_acquire(&lockdep_map); + + trace_timer_expire_entry(timer); + fn(data); + trace_timer_expire_exit(timer); + + lock_map_release(&lockdep_map); + + if (preempt_count != preempt_count()) { + printk(KERN_ERR "timer: %pF preempt leak: %08x -> %08x\n", + fn, preempt_count, preempt_count()); + BUG(); + } +} + #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) /** @@ -996,42 +1031,7 @@ static inline void __run_timers(struct tvec_base *base) detach_timer(timer, 1); spin_unlock_irq(&base->lock); - { - int preempt_count = preempt_count(); - -#ifdef CONFIG_LOCKDEP - /* - * It is permissible to free the timer from - * inside the function that is called from - * it, this we need to take into account for - * lockdep too. To avoid bogus "held lock - * freed" warnings as well as problems when - * looking into timer->lockdep_map, make a - * copy and use that here. - */ - struct lockdep_map lockdep_map = - timer->lockdep_map; -#endif - /* - * Couple the lock chain with the lock chain at - * del_timer_sync() by acquiring the lock_map - * around the fn() call here and in - * del_timer_sync(). - */ - lock_map_acquire(&lockdep_map); - - trace_timer_expire_entry(timer); - fn(data); - trace_timer_expire_exit(timer); - - lock_map_release(&lockdep_map); - - if (preempt_count != preempt_count()) { - printk(KERN_ERR "timer: %pF preempt leak: %08x -> %08x\n", - fn, preempt_count, preempt_count()); - BUG(); - } - } + call_timer_fn(timer, fn, data); spin_lock_irq(&base->lock); } } -- cgit v1.2.2 From 802702e0c2618465b813242d4dfee6a233ba0beb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 Mar 2010 20:13:23 +0100 Subject: timer: Try to survive timer callback preempt_count leak If a timer callback leaks preempt_count we currently assert a BUG(). That makes it unnecessarily hard to retrieve information about the problem especially on laptops and headless stations. There is a decent chance to survive the preempt_count leak by restoring the preempt_count to the value before the callback. That allows in many cases to get valuable information about the root cause of the problem. We carried that fixup in preempt-rt for years and were able to decode such wreckage quite a few times. Signed-off-by: Thomas Gleixner Cc: Linux Torvalds Cc: Andrew Morton Cc: Arjan van de Veen --- kernel/timer.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index 45229694dc6a..7e12e7bc7ce6 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -982,9 +982,15 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), lock_map_release(&lockdep_map); if (preempt_count != preempt_count()) { - printk(KERN_ERR "timer: %pF preempt leak: %08x -> %08x\n", - fn, preempt_count, preempt_count()); - BUG(); + WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n", + fn, preempt_count, preempt_count()); + /* + * Restore the preempt count. That gives us a decent + * chance to survive and extract information. If the + * callback kept a lock held, bad luck, but not worse + * than the BUG() we had. + */ + preempt_count() = preempt_count; } } -- cgit v1.2.2 From 3bbb9ec946428b96657126768f65487a48dd090c Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 11 Mar 2010 14:04:36 -0800 Subject: timers: Introduce the concept of timer slack for legacy timers While HR timers have had the concept of timer slack for quite some time now, the legacy timers lacked this concept, and had to make do with round_jiffies() and friends. Timer slack is important for power management; grouping timers reduces the number of wakeups which in turn reduces power consumption. This patch introduces timer slack to the legacy timers using the following pieces: * A slack field in the timer struct * An api (set_timer_slack) that callers can use to set explicit timer slack * A default slack of 0.4% of the requested delay for callers that do not set any explicit slack * Rounding code that is part of mod_timer() that tries to group timers around jiffies values every 'power of two' (so quick timers will group around every 2, but longer timers will group around every 4, 8, 16, 32 etc) Signed-off-by: Arjan van de Ven Cc: johnstul@us.ibm.com Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- kernel/timer.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index 7e12e7bc7ce6..49773f38c9bc 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -318,6 +318,24 @@ unsigned long round_jiffies_up_relative(unsigned long j) } EXPORT_SYMBOL_GPL(round_jiffies_up_relative); +/** + * set_timer_slack - set the allowed slack for a timer + * @slack_hz: the amount of time (in jiffies) allowed for rounding + * + * Set the amount of time, in jiffies, that a certain timer has + * in terms of slack. By setting this value, the timer subsystem + * will schedule the actual timer somewhere between + * the time mod_timer() asks for, and that time plus the slack. + * + * By setting the slack to -1, a percentage of the delay is used + * instead. + */ +void set_timer_slack(struct timer_list *timer, int slack_hz) +{ + timer->slack = slack_hz; +} +EXPORT_SYMBOL_GPL(set_timer_slack); + static inline void set_running_timer(struct tvec_base *base, struct timer_list *timer) @@ -549,6 +567,7 @@ static void __init_timer(struct timer_list *timer, { timer->entry.next = NULL; timer->base = __raw_get_cpu_var(tvec_bases); + timer->slack = -1; #ifdef CONFIG_TIMER_STATS timer->start_site = NULL; timer->start_pid = -1; @@ -714,6 +733,41 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires) } EXPORT_SYMBOL(mod_timer_pending); +/* + * Decide where to put the timer while taking the slack into account + * + * Algorithm: + * 1) calculate the maximum (absolute) time + * 2) calculate the highest bit where the expires and new max are different + * 3) use this bit to make a mask + * 4) use the bitmask to round down the maximum time, so that all last + * bits are zeros + */ +static inline +unsigned long apply_slack(struct timer_list *timer, unsigned long expires) +{ + unsigned long expires_limit, mask; + int bit; + + expires_limit = expires + timer->slack; + + if (timer->slack < 0) /* auto slack: use 0.4% */ + expires_limit = expires + (expires - jiffies)/256; + + mask = expires ^ expires_limit; + + if (mask == 0) + return expires; + + bit = find_last_bit(&mask, BITS_PER_LONG); + + mask = (1 << bit) - 1; + + expires_limit = expires_limit & ~(mask); + + return expires_limit; +} + /** * mod_timer - modify a timer's timeout * @timer: the timer to be modified @@ -744,6 +798,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires) if (timer_pending(timer) && timer->expires == expires) return 1; + expires = apply_slack(timer, expires); + return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); } EXPORT_SYMBOL(mod_timer); -- cgit v1.2.2