diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2009-11-12 16:12:06 -0500 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2009-11-13 14:46:24 -0500 |
commit | 27185016b806d5a1181ff501cae120582b2b27dd (patch) | |
tree | 44b0da428fccaea1ad83c537bcc99a80b9251227 | |
parent | 98962465ed9e6ea99c38e0af63fe1dcb5a79dc25 (diff) |
nohz: Track last do_timer() cpu
The previous patch which limits the sleep time to the maximum
deferment time of the time keeping clocksource has some limitations on
SMP machines: if all CPUs are idle then for all CPUs the maximum sleep
time is limited.
Solve this by keeping track of which cpu had the do_timer() duty
assigned last and limit the sleep time only for this cpu.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Cc: Jon Hunter <jon-hunter@ti.com>
Cc: John Stultz <johnstul@us.ibm.com>
-rw-r--r-- | include/linux/tick.h | 2 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 52 |
2 files changed, 30 insertions, 24 deletions
diff --git a/include/linux/tick.h b/include/linux/tick.h index 8dc082194b22..d2ae79e21be3 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h | |||
@@ -43,6 +43,7 @@ enum tick_nohz_mode { | |||
43 | * @idle_exittime: Time when the idle state was left | 43 | * @idle_exittime: Time when the idle state was left |
44 | * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped | 44 | * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped |
45 | * @sleep_length: Duration of the current idle sleep | 45 | * @sleep_length: Duration of the current idle sleep |
46 | * @do_timer_lst: CPU was the last one doing do_timer before going idle | ||
46 | */ | 47 | */ |
47 | struct tick_sched { | 48 | struct tick_sched { |
48 | struct hrtimer sched_timer; | 49 | struct hrtimer sched_timer; |
@@ -64,6 +65,7 @@ struct tick_sched { | |||
64 | unsigned long last_jiffies; | 65 | unsigned long last_jiffies; |
65 | unsigned long next_jiffies; | 66 | unsigned long next_jiffies; |
66 | ktime_t idle_expires; | 67 | ktime_t idle_expires; |
68 | int do_timer_last; | ||
67 | }; | 69 | }; |
68 | 70 | ||
69 | extern void __init tick_init(void); | 71 | extern void __init tick_init(void); |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a80b4644fe6b..df133bc29f89 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -263,17 +263,7 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
263 | seq = read_seqbegin(&xtime_lock); | 263 | seq = read_seqbegin(&xtime_lock); |
264 | last_update = last_jiffies_update; | 264 | last_update = last_jiffies_update; |
265 | last_jiffies = jiffies; | 265 | last_jiffies = jiffies; |
266 | 266 | time_delta = timekeeping_max_deferment(); | |
267 | /* | ||
268 | * On SMP we really should only care for the CPU which | ||
269 | * has the do_timer duty assigned. All other CPUs can | ||
270 | * sleep as long as they want. | ||
271 | */ | ||
272 | if (cpu == tick_do_timer_cpu || | ||
273 | tick_do_timer_cpu == TICK_DO_TIMER_NONE) | ||
274 | time_delta = timekeeping_max_deferment(); | ||
275 | else | ||
276 | time_delta = KTIME_MAX; | ||
277 | } while (read_seqretry(&xtime_lock, seq)); | 267 | } while (read_seqretry(&xtime_lock, seq)); |
278 | 268 | ||
279 | if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || | 269 | if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || |
@@ -296,6 +286,29 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
296 | if ((long)delta_jiffies >= 1) { | 286 | if ((long)delta_jiffies >= 1) { |
297 | 287 | ||
298 | /* | 288 | /* |
289 | * If this cpu is the one which updates jiffies, then | ||
290 | * give up the assignment and let it be taken by the | ||
291 | * cpu which runs the tick timer next, which might be | ||
292 | * this cpu as well. If we don't drop this here the | ||
293 | * jiffies might be stale and do_timer() never | ||
294 | * invoked. Keep track of the fact that it was the one | ||
295 | * which had the do_timer() duty last. If this cpu is | ||
296 | * the one which had the do_timer() duty last, we | ||
297 | * limit the sleep time to the timekeeping | ||
298 | * max_deferement value which we retrieved | ||
299 | * above. Otherwise we can sleep as long as we want. | ||
300 | */ | ||
301 | if (cpu == tick_do_timer_cpu) { | ||
302 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; | ||
303 | ts->do_timer_last = 1; | ||
304 | } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { | ||
305 | time_delta = KTIME_MAX; | ||
306 | ts->do_timer_last = 0; | ||
307 | } else if (!ts->do_timer_last) { | ||
308 | time_delta = KTIME_MAX; | ||
309 | } | ||
310 | |||
311 | /* | ||
299 | * calculate the expiry time for the next timer wheel | 312 | * calculate the expiry time for the next timer wheel |
300 | * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals | 313 | * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals |
301 | * that there is no timer pending or at least extremely | 314 | * that there is no timer pending or at least extremely |
@@ -312,21 +325,12 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
312 | */ | 325 | */ |
313 | time_delta = min_t(u64, time_delta, | 326 | time_delta = min_t(u64, time_delta, |
314 | tick_period.tv64 * delta_jiffies); | 327 | tick_period.tv64 * delta_jiffies); |
315 | expires = ktime_add_ns(last_update, time_delta); | ||
316 | } else { | ||
317 | expires.tv64 = KTIME_MAX; | ||
318 | } | 328 | } |
319 | 329 | ||
320 | /* | 330 | if (time_delta < KTIME_MAX) |
321 | * If this cpu is the one which updates jiffies, then | 331 | expires = ktime_add_ns(last_update, time_delta); |
322 | * give up the assignment and let it be taken by the | 332 | else |
323 | * cpu which runs the tick timer next, which might be | 333 | expires.tv64 = KTIME_MAX; |
324 | * this cpu as well. If we don't drop this here the | ||
325 | * jiffies might be stale and do_timer() never | ||
326 | * invoked. | ||
327 | */ | ||
328 | if (cpu == tick_do_timer_cpu) | ||
329 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; | ||
330 | 334 | ||
331 | if (delta_jiffies > 1) | 335 | if (delta_jiffies > 1) |
332 | cpumask_set_cpu(cpu, nohz_cpu_mask); | 336 | cpumask_set_cpu(cpu, nohz_cpu_mask); |