diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2012-05-25 18:08:59 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2012-06-06 07:49:02 -0400 |
commit | e40468a54882ef7411fb178dbf2e465ec2349af7 (patch) | |
tree | 5c7859bb5b325752694b3fd0bccbce7b4997dab2 /kernel | |
parent | 99d5f3aac674fe081ffddd2dbb8946ccbc14c410 (diff) |
timers: Improve get_next_timer_interrupt()
Gilad reported at
http://lkml.kernel.org/r/1336056962-10465-2-git-send-email-gilad@benyossef.com
"Current timer code fails to correctly return a value meaning that
there is no future timer event, with the result that the timer keeps
getting re-armed in HZ one shot mode even when we could turn it off,
generating unneeded interrupts.
What is happening is that when __next_timer_interrupt() wishes
to return a value that signifies "there is no future timer
event", it returns (base->timer_jiffies + NEXT_TIMER_MAX_DELTA).
However, the code in tick_nohz_stop_sched_tick(), which called
__next_timer_interrupt() via get_next_timer_interrupt(),
compares the return value to (last_jiffies + NEXT_TIMER_MAX_DELTA)
to see if the timer needs to be re-armed.
base->timer_jiffies != last_jiffies and so tick_nohz_stop_sched_tick()
interperts the return value as indication that there is a distant
future event 12 days from now and programs the timer to fire next
after KTIME_MAX nsecs instead of avoiding to arm it. This ends up
causing a needless interrupt once every KTIME_MAX nsecs."
Fix this by using the new active timer accounting. This avoids scans
when no active timer is enqueued completely, so we don't have to rely
on base->timer_next and base->timer_jiffies anymore.
Reported-by: Gilad Ben-Yossef <gilad@benyossef.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20120525214819.317535385@linutronix.de
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/timer.c | 13 |
1 files changed, 8 insertions, 5 deletions
diff --git a/kernel/timer.c b/kernel/timer.c index 7fada698bd1a..a61c09374eba 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -1326,18 +1326,21 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now, | |||
1326 | unsigned long get_next_timer_interrupt(unsigned long now) | 1326 | unsigned long get_next_timer_interrupt(unsigned long now) |
1327 | { | 1327 | { |
1328 | struct tvec_base *base = __this_cpu_read(tvec_bases); | 1328 | struct tvec_base *base = __this_cpu_read(tvec_bases); |
1329 | unsigned long expires; | 1329 | unsigned long expires = now + NEXT_TIMER_MAX_DELTA; |
1330 | 1330 | ||
1331 | /* | 1331 | /* |
1332 | * Pretend that there is no timer pending if the cpu is offline. | 1332 | * Pretend that there is no timer pending if the cpu is offline. |
1333 | * Possible pending timers will be migrated later to an active cpu. | 1333 | * Possible pending timers will be migrated later to an active cpu. |
1334 | */ | 1334 | */ |
1335 | if (cpu_is_offline(smp_processor_id())) | 1335 | if (cpu_is_offline(smp_processor_id())) |
1336 | return now + NEXT_TIMER_MAX_DELTA; | 1336 | return expires; |
1337 | |||
1337 | spin_lock(&base->lock); | 1338 | spin_lock(&base->lock); |
1338 | if (time_before_eq(base->next_timer, base->timer_jiffies)) | 1339 | if (base->active_timers) { |
1339 | base->next_timer = __next_timer_interrupt(base); | 1340 | if (time_before_eq(base->next_timer, base->timer_jiffies)) |
1340 | expires = base->next_timer; | 1341 | base->next_timer = __next_timer_interrupt(base); |
1342 | expires = base->next_timer; | ||
1343 | } | ||
1341 | spin_unlock(&base->lock); | 1344 | spin_unlock(&base->lock); |
1342 | 1345 | ||
1343 | if (time_before_eq(expires, now)) | 1346 | if (time_before_eq(expires, now)) |