diff options
author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
---|---|---|
committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
commit | 8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch) | |
tree | a8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /kernel/time/tick-sched.c | |
parent | 406089d01562f1e2bf9f089fd7637009ebaad589 (diff) |
Patched in Tegra support.
Diffstat (limited to 'kernel/time/tick-sched.c')
-rw-r--r-- | kernel/time/tick-sched.c | 519 |
1 files changed, 216 insertions, 303 deletions
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d58e552d9fd..d5097c44b40 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -31,7 +31,7 @@ | |||
31 | static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); | 31 | static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); |
32 | 32 | ||
33 | /* | 33 | /* |
34 | * The time, when the last jiffy update happened. Protected by jiffies_lock. | 34 | * The time, when the last jiffy update happened. Protected by xtime_lock. |
35 | */ | 35 | */ |
36 | static ktime_t last_jiffies_update; | 36 | static ktime_t last_jiffies_update; |
37 | 37 | ||
@@ -49,14 +49,14 @@ static void tick_do_update_jiffies64(ktime_t now) | |||
49 | ktime_t delta; | 49 | ktime_t delta; |
50 | 50 | ||
51 | /* | 51 | /* |
52 | * Do a quick check without holding jiffies_lock: | 52 | * Do a quick check without holding xtime_lock: |
53 | */ | 53 | */ |
54 | delta = ktime_sub(now, last_jiffies_update); | 54 | delta = ktime_sub(now, last_jiffies_update); |
55 | if (delta.tv64 < tick_period.tv64) | 55 | if (delta.tv64 < tick_period.tv64) |
56 | return; | 56 | return; |
57 | 57 | ||
58 | /* Reevalute with jiffies_lock held */ | 58 | /* Reevalute with xtime_lock held */ |
59 | write_seqlock(&jiffies_lock); | 59 | write_seqlock(&xtime_lock); |
60 | 60 | ||
61 | delta = ktime_sub(now, last_jiffies_update); | 61 | delta = ktime_sub(now, last_jiffies_update); |
62 | if (delta.tv64 >= tick_period.tv64) { | 62 | if (delta.tv64 >= tick_period.tv64) { |
@@ -79,7 +79,7 @@ static void tick_do_update_jiffies64(ktime_t now) | |||
79 | /* Keep the tick_next_period variable up to date */ | 79 | /* Keep the tick_next_period variable up to date */ |
80 | tick_next_period = ktime_add(last_jiffies_update, tick_period); | 80 | tick_next_period = ktime_add(last_jiffies_update, tick_period); |
81 | } | 81 | } |
82 | write_sequnlock(&jiffies_lock); | 82 | write_sequnlock(&xtime_lock); |
83 | } | 83 | } |
84 | 84 | ||
85 | /* | 85 | /* |
@@ -89,58 +89,15 @@ static ktime_t tick_init_jiffy_update(void) | |||
89 | { | 89 | { |
90 | ktime_t period; | 90 | ktime_t period; |
91 | 91 | ||
92 | write_seqlock(&jiffies_lock); | 92 | write_seqlock(&xtime_lock); |
93 | /* Did we start the jiffies update yet ? */ | 93 | /* Did we start the jiffies update yet ? */ |
94 | if (last_jiffies_update.tv64 == 0) | 94 | if (last_jiffies_update.tv64 == 0) |
95 | last_jiffies_update = tick_next_period; | 95 | last_jiffies_update = tick_next_period; |
96 | period = last_jiffies_update; | 96 | period = last_jiffies_update; |
97 | write_sequnlock(&jiffies_lock); | 97 | write_sequnlock(&xtime_lock); |
98 | return period; | 98 | return period; |
99 | } | 99 | } |
100 | 100 | ||
101 | |||
102 | static void tick_sched_do_timer(ktime_t now) | ||
103 | { | ||
104 | int cpu = smp_processor_id(); | ||
105 | |||
106 | #ifdef CONFIG_NO_HZ | ||
107 | /* | ||
108 | * Check if the do_timer duty was dropped. We don't care about | ||
109 | * concurrency: This happens only when the cpu in charge went | ||
110 | * into a long sleep. If two cpus happen to assign themself to | ||
111 | * this duty, then the jiffies update is still serialized by | ||
112 | * jiffies_lock. | ||
113 | */ | ||
114 | if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) | ||
115 | tick_do_timer_cpu = cpu; | ||
116 | #endif | ||
117 | |||
118 | /* Check, if the jiffies need an update */ | ||
119 | if (tick_do_timer_cpu == cpu) | ||
120 | tick_do_update_jiffies64(now); | ||
121 | } | ||
122 | |||
123 | static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) | ||
124 | { | ||
125 | #ifdef CONFIG_NO_HZ | ||
126 | /* | ||
127 | * When we are idle and the tick is stopped, we have to touch | ||
128 | * the watchdog as we might not schedule for a really long | ||
129 | * time. This happens on complete idle SMP systems while | ||
130 | * waiting on the login prompt. We also increment the "start of | ||
131 | * idle" jiffy stamp so the idle accounting adjustment we do | ||
132 | * when we go busy again does not account too much ticks. | ||
133 | */ | ||
134 | if (ts->tick_stopped) { | ||
135 | touch_softlockup_watchdog(); | ||
136 | if (is_idle_task(current)) | ||
137 | ts->idle_jiffies++; | ||
138 | } | ||
139 | #endif | ||
140 | update_process_times(user_mode(regs)); | ||
141 | profile_tick(CPU_PROFILING); | ||
142 | } | ||
143 | |||
144 | /* | 101 | /* |
145 | * NOHZ - aka dynamic tick functionality | 102 | * NOHZ - aka dynamic tick functionality |
146 | */ | 103 | */ |
@@ -148,7 +105,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) | |||
148 | /* | 105 | /* |
149 | * NO HZ enabled ? | 106 | * NO HZ enabled ? |
150 | */ | 107 | */ |
151 | int tick_nohz_enabled __read_mostly = 1; | 108 | static int tick_nohz_enabled __read_mostly = 1; |
152 | 109 | ||
153 | /* | 110 | /* |
154 | * Enable / Disable tickless mode | 111 | * Enable / Disable tickless mode |
@@ -182,6 +139,7 @@ static void tick_nohz_update_jiffies(ktime_t now) | |||
182 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 139 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
183 | unsigned long flags; | 140 | unsigned long flags; |
184 | 141 | ||
142 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | ||
185 | ts->idle_waketime = now; | 143 | ts->idle_waketime = now; |
186 | 144 | ||
187 | local_irq_save(flags); | 145 | local_irq_save(flags); |
@@ -201,10 +159,9 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda | |||
201 | 159 | ||
202 | if (ts->idle_active) { | 160 | if (ts->idle_active) { |
203 | delta = ktime_sub(now, ts->idle_entrytime); | 161 | delta = ktime_sub(now, ts->idle_entrytime); |
162 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); | ||
204 | if (nr_iowait_cpu(cpu) > 0) | 163 | if (nr_iowait_cpu(cpu) > 0) |
205 | ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); | 164 | ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); |
206 | else | ||
207 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); | ||
208 | ts->idle_entrytime = now; | 165 | ts->idle_entrytime = now; |
209 | } | 166 | } |
210 | 167 | ||
@@ -225,7 +182,11 @@ static void tick_nohz_stop_idle(int cpu, ktime_t now) | |||
225 | 182 | ||
226 | static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) | 183 | static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) |
227 | { | 184 | { |
228 | ktime_t now = ktime_get(); | 185 | ktime_t now; |
186 | |||
187 | now = ktime_get(); | ||
188 | |||
189 | update_ts_time_stats(cpu, ts, now, NULL); | ||
229 | 190 | ||
230 | ts->idle_entrytime = now; | 191 | ts->idle_entrytime = now; |
231 | ts->idle_active = 1; | 192 | ts->idle_active = 1; |
@@ -236,11 +197,11 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) | |||
236 | /** | 197 | /** |
237 | * get_cpu_idle_time_us - get the total idle time of a cpu | 198 | * get_cpu_idle_time_us - get the total idle time of a cpu |
238 | * @cpu: CPU number to query | 199 | * @cpu: CPU number to query |
239 | * @last_update_time: variable to store update time in. Do not update | 200 | * @last_update_time: variable to store update time in |
240 | * counters if NULL. | ||
241 | * | 201 | * |
242 | * Return the cummulative idle time (since boot) for a given | 202 | * Return the cummulative idle time (since boot) for a given |
243 | * CPU, in microseconds. | 203 | * CPU, in microseconds. The idle time returned includes |
204 | * the iowait time (unlike what "top" and co report). | ||
244 | * | 205 | * |
245 | * This time is measured via accounting rather than sampling, | 206 | * This time is measured via accounting rather than sampling, |
246 | * and is as accurate as ktime_get() is. | 207 | * and is as accurate as ktime_get() is. |
@@ -250,35 +211,20 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) | |||
250 | u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) | 211 | u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) |
251 | { | 212 | { |
252 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 213 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
253 | ktime_t now, idle; | ||
254 | 214 | ||
255 | if (!tick_nohz_enabled) | 215 | if (!tick_nohz_enabled) |
256 | return -1; | 216 | return -1; |
257 | 217 | ||
258 | now = ktime_get(); | 218 | update_ts_time_stats(cpu, ts, ktime_get(), last_update_time); |
259 | if (last_update_time) { | ||
260 | update_ts_time_stats(cpu, ts, now, last_update_time); | ||
261 | idle = ts->idle_sleeptime; | ||
262 | } else { | ||
263 | if (ts->idle_active && !nr_iowait_cpu(cpu)) { | ||
264 | ktime_t delta = ktime_sub(now, ts->idle_entrytime); | ||
265 | |||
266 | idle = ktime_add(ts->idle_sleeptime, delta); | ||
267 | } else { | ||
268 | idle = ts->idle_sleeptime; | ||
269 | } | ||
270 | } | ||
271 | |||
272 | return ktime_to_us(idle); | ||
273 | 219 | ||
220 | return ktime_to_us(ts->idle_sleeptime); | ||
274 | } | 221 | } |
275 | EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); | 222 | EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); |
276 | 223 | ||
277 | /** | 224 | /* |
278 | * get_cpu_iowait_time_us - get the total iowait time of a cpu | 225 | * get_cpu_iowait_time_us - get the total iowait time of a cpu |
279 | * @cpu: CPU number to query | 226 | * @cpu: CPU number to query |
280 | * @last_update_time: variable to store update time in. Do not update | 227 | * @last_update_time: variable to store update time in |
281 | * counters if NULL. | ||
282 | * | 228 | * |
283 | * Return the cummulative iowait time (since boot) for a given | 229 | * Return the cummulative iowait time (since boot) for a given |
284 | * CPU, in microseconds. | 230 | * CPU, in microseconds. |
@@ -291,47 +237,93 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); | |||
291 | u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) | 237 | u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) |
292 | { | 238 | { |
293 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 239 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
294 | ktime_t now, iowait; | ||
295 | 240 | ||
296 | if (!tick_nohz_enabled) | 241 | if (!tick_nohz_enabled) |
297 | return -1; | 242 | return -1; |
298 | 243 | ||
299 | now = ktime_get(); | 244 | update_ts_time_stats(cpu, ts, ktime_get(), last_update_time); |
300 | if (last_update_time) { | ||
301 | update_ts_time_stats(cpu, ts, now, last_update_time); | ||
302 | iowait = ts->iowait_sleeptime; | ||
303 | } else { | ||
304 | if (ts->idle_active && nr_iowait_cpu(cpu) > 0) { | ||
305 | ktime_t delta = ktime_sub(now, ts->idle_entrytime); | ||
306 | |||
307 | iowait = ktime_add(ts->iowait_sleeptime, delta); | ||
308 | } else { | ||
309 | iowait = ts->iowait_sleeptime; | ||
310 | } | ||
311 | } | ||
312 | 245 | ||
313 | return ktime_to_us(iowait); | 246 | return ktime_to_us(ts->iowait_sleeptime); |
314 | } | 247 | } |
315 | EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); | 248 | EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); |
316 | 249 | ||
317 | static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | 250 | /** |
318 | ktime_t now, int cpu) | 251 | * tick_nohz_stop_sched_tick - stop the idle tick from the idle task |
252 | * | ||
253 | * When the next event is more than a tick into the future, stop the idle tick | ||
254 | * Called either from the idle loop or from irq_exit() when an idle period was | ||
255 | * just interrupted by an interrupt which did not cause a reschedule. | ||
256 | */ | ||
257 | void tick_nohz_stop_sched_tick(int inidle) | ||
319 | { | 258 | { |
320 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; | 259 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; |
321 | ktime_t last_update, expires, ret = { .tv64 = 0 }; | 260 | struct tick_sched *ts; |
322 | unsigned long rcu_delta_jiffies; | 261 | ktime_t last_update, expires, now; |
323 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | 262 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; |
324 | u64 time_delta; | 263 | u64 time_delta; |
264 | int cpu; | ||
265 | |||
266 | local_irq_save(flags); | ||
267 | |||
268 | cpu = smp_processor_id(); | ||
269 | ts = &per_cpu(tick_cpu_sched, cpu); | ||
270 | |||
271 | /* | ||
272 | * Call to tick_nohz_start_idle stops the last_update_time from being | ||
273 | * updated. Thus, it must not be called in the event we are called from | ||
274 | * irq_exit() with the prior state different than idle. | ||
275 | */ | ||
276 | if (!inidle && !ts->inidle) | ||
277 | goto end; | ||
278 | |||
279 | /* | ||
280 | * Set ts->inidle unconditionally. Even if the system did not | ||
281 | * switch to NOHZ mode the cpu frequency governers rely on the | ||
282 | * update of the idle time accounting in tick_nohz_start_idle(). | ||
283 | */ | ||
284 | ts->inidle = 1; | ||
325 | 285 | ||
286 | now = tick_nohz_start_idle(cpu, ts); | ||
287 | |||
288 | /* | ||
289 | * If this cpu is offline and it is the one which updates | ||
290 | * jiffies, then give up the assignment and let it be taken by | ||
291 | * the cpu which runs the tick timer next. If we don't drop | ||
292 | * this here the jiffies might be stale and do_timer() never | ||
293 | * invoked. | ||
294 | */ | ||
295 | if (unlikely(!cpu_online(cpu))) { | ||
296 | if (cpu == tick_do_timer_cpu) | ||
297 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; | ||
298 | } | ||
299 | |||
300 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) | ||
301 | goto end; | ||
302 | |||
303 | if (need_resched()) | ||
304 | goto end; | ||
305 | |||
306 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { | ||
307 | static int ratelimit; | ||
308 | |||
309 | if (ratelimit < 10) { | ||
310 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | ||
311 | (unsigned int) local_softirq_pending()); | ||
312 | ratelimit++; | ||
313 | } | ||
314 | goto end; | ||
315 | } | ||
316 | |||
317 | ts->idle_calls++; | ||
326 | /* Read jiffies and the time when jiffies were updated last */ | 318 | /* Read jiffies and the time when jiffies were updated last */ |
327 | do { | 319 | do { |
328 | seq = read_seqbegin(&jiffies_lock); | 320 | seq = read_seqbegin(&xtime_lock); |
329 | last_update = last_jiffies_update; | 321 | last_update = last_jiffies_update; |
330 | last_jiffies = jiffies; | 322 | last_jiffies = jiffies; |
331 | time_delta = timekeeping_max_deferment(); | 323 | time_delta = timekeeping_max_deferment(); |
332 | } while (read_seqretry(&jiffies_lock, seq)); | 324 | } while (read_seqretry(&xtime_lock, seq)); |
333 | 325 | ||
334 | if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) || | 326 | if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || |
335 | arch_needs_cpu(cpu)) { | 327 | arch_needs_cpu(cpu)) { |
336 | next_jiffies = last_jiffies + 1; | 328 | next_jiffies = last_jiffies + 1; |
337 | delta_jiffies = 1; | 329 | delta_jiffies = 1; |
@@ -339,10 +331,6 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |||
339 | /* Get the next timer wheel timer */ | 331 | /* Get the next timer wheel timer */ |
340 | next_jiffies = get_next_timer_interrupt(last_jiffies); | 332 | next_jiffies = get_next_timer_interrupt(last_jiffies); |
341 | delta_jiffies = next_jiffies - last_jiffies; | 333 | delta_jiffies = next_jiffies - last_jiffies; |
342 | if (rcu_delta_jiffies < delta_jiffies) { | ||
343 | next_jiffies = last_jiffies + rcu_delta_jiffies; | ||
344 | delta_jiffies = rcu_delta_jiffies; | ||
345 | } | ||
346 | } | 334 | } |
347 | /* | 335 | /* |
348 | * Do not stop the tick, if we are only one off | 336 | * Do not stop the tick, if we are only one off |
@@ -401,12 +389,13 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |||
401 | else | 389 | else |
402 | expires.tv64 = KTIME_MAX; | 390 | expires.tv64 = KTIME_MAX; |
403 | 391 | ||
392 | if (delta_jiffies > 1) | ||
393 | cpumask_set_cpu(cpu, nohz_cpu_mask); | ||
394 | |||
404 | /* Skip reprogram of event if its not changed */ | 395 | /* Skip reprogram of event if its not changed */ |
405 | if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) | 396 | if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) |
406 | goto out; | 397 | goto out; |
407 | 398 | ||
408 | ret = expires; | ||
409 | |||
410 | /* | 399 | /* |
411 | * nohz_stop_sched_tick can be called several times before | 400 | * nohz_stop_sched_tick can be called several times before |
412 | * the nohz_restart_sched_tick is called. This happens when | 401 | * the nohz_restart_sched_tick is called. This happens when |
@@ -415,13 +404,19 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |||
415 | * the scheduler tick in nohz_restart_sched_tick. | 404 | * the scheduler tick in nohz_restart_sched_tick. |
416 | */ | 405 | */ |
417 | if (!ts->tick_stopped) { | 406 | if (!ts->tick_stopped) { |
418 | nohz_balance_enter_idle(cpu); | 407 | select_nohz_load_balancer(1); |
419 | calc_load_enter_idle(); | ||
420 | 408 | ||
421 | ts->last_tick = hrtimer_get_expires(&ts->sched_timer); | 409 | ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); |
422 | ts->tick_stopped = 1; | 410 | ts->tick_stopped = 1; |
411 | ts->idle_jiffies = last_jiffies; | ||
412 | rcu_enter_nohz(); | ||
423 | } | 413 | } |
424 | 414 | ||
415 | ts->idle_sleeps++; | ||
416 | |||
417 | /* Mark expires */ | ||
418 | ts->idle_expires = expires; | ||
419 | |||
425 | /* | 420 | /* |
426 | * If the expiration time == KTIME_MAX, then | 421 | * If the expiration time == KTIME_MAX, then |
427 | * in this case we simply stop the tick timer. | 422 | * in this case we simply stop the tick timer. |
@@ -446,132 +441,15 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |||
446 | * softirq. | 441 | * softirq. |
447 | */ | 442 | */ |
448 | tick_do_update_jiffies64(ktime_get()); | 443 | tick_do_update_jiffies64(ktime_get()); |
444 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | ||
449 | } | 445 | } |
450 | raise_softirq_irqoff(TIMER_SOFTIRQ); | 446 | raise_softirq_irqoff(TIMER_SOFTIRQ); |
451 | out: | 447 | out: |
452 | ts->next_jiffies = next_jiffies; | 448 | ts->next_jiffies = next_jiffies; |
453 | ts->last_jiffies = last_jiffies; | 449 | ts->last_jiffies = last_jiffies; |
454 | ts->sleep_length = ktime_sub(dev->next_event, now); | 450 | ts->sleep_length = ktime_sub(dev->next_event, now); |
455 | 451 | end: | |
456 | return ret; | 452 | local_irq_restore(flags); |
457 | } | ||
458 | |||
459 | static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | ||
460 | { | ||
461 | /* | ||
462 | * If this cpu is offline and it is the one which updates | ||
463 | * jiffies, then give up the assignment and let it be taken by | ||
464 | * the cpu which runs the tick timer next. If we don't drop | ||
465 | * this here the jiffies might be stale and do_timer() never | ||
466 | * invoked. | ||
467 | */ | ||
468 | if (unlikely(!cpu_online(cpu))) { | ||
469 | if (cpu == tick_do_timer_cpu) | ||
470 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; | ||
471 | } | ||
472 | |||
473 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) | ||
474 | return false; | ||
475 | |||
476 | if (need_resched()) | ||
477 | return false; | ||
478 | |||
479 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { | ||
480 | static int ratelimit; | ||
481 | |||
482 | if (ratelimit < 10 && | ||
483 | (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { | ||
484 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | ||
485 | (unsigned int) local_softirq_pending()); | ||
486 | ratelimit++; | ||
487 | } | ||
488 | return false; | ||
489 | } | ||
490 | |||
491 | return true; | ||
492 | } | ||
493 | |||
494 | static void __tick_nohz_idle_enter(struct tick_sched *ts) | ||
495 | { | ||
496 | ktime_t now, expires; | ||
497 | int cpu = smp_processor_id(); | ||
498 | |||
499 | now = tick_nohz_start_idle(cpu, ts); | ||
500 | |||
501 | if (can_stop_idle_tick(cpu, ts)) { | ||
502 | int was_stopped = ts->tick_stopped; | ||
503 | |||
504 | ts->idle_calls++; | ||
505 | |||
506 | expires = tick_nohz_stop_sched_tick(ts, now, cpu); | ||
507 | if (expires.tv64 > 0LL) { | ||
508 | ts->idle_sleeps++; | ||
509 | ts->idle_expires = expires; | ||
510 | } | ||
511 | |||
512 | if (!was_stopped && ts->tick_stopped) | ||
513 | ts->idle_jiffies = ts->last_jiffies; | ||
514 | } | ||
515 | } | ||
516 | |||
517 | /** | ||
518 | * tick_nohz_idle_enter - stop the idle tick from the idle task | ||
519 | * | ||
520 | * When the next event is more than a tick into the future, stop the idle tick | ||
521 | * Called when we start the idle loop. | ||
522 | * | ||
523 | * The arch is responsible of calling: | ||
524 | * | ||
525 | * - rcu_idle_enter() after its last use of RCU before the CPU is put | ||
526 | * to sleep. | ||
527 | * - rcu_idle_exit() before the first use of RCU after the CPU is woken up. | ||
528 | */ | ||
529 | void tick_nohz_idle_enter(void) | ||
530 | { | ||
531 | struct tick_sched *ts; | ||
532 | |||
533 | WARN_ON_ONCE(irqs_disabled()); | ||
534 | |||
535 | /* | ||
536 | * Update the idle state in the scheduler domain hierarchy | ||
537 | * when tick_nohz_stop_sched_tick() is called from the idle loop. | ||
538 | * State will be updated to busy during the first busy tick after | ||
539 | * exiting idle. | ||
540 | */ | ||
541 | set_cpu_sd_state_idle(); | ||
542 | |||
543 | local_irq_disable(); | ||
544 | |||
545 | ts = &__get_cpu_var(tick_cpu_sched); | ||
546 | /* | ||
547 | * set ts->inidle unconditionally. even if the system did not | ||
548 | * switch to nohz mode the cpu frequency governers rely on the | ||
549 | * update of the idle time accounting in tick_nohz_start_idle(). | ||
550 | */ | ||
551 | ts->inidle = 1; | ||
552 | __tick_nohz_idle_enter(ts); | ||
553 | |||
554 | local_irq_enable(); | ||
555 | } | ||
556 | |||
557 | /** | ||
558 | * tick_nohz_irq_exit - update next tick event from interrupt exit | ||
559 | * | ||
560 | * When an interrupt fires while we are idle and it doesn't cause | ||
561 | * a reschedule, it may still add, modify or delete a timer, enqueue | ||
562 | * an RCU callback, etc... | ||
563 | * So we need to re-calculate and reprogram the next tick event. | ||
564 | */ | ||
565 | void tick_nohz_irq_exit(void) | ||
566 | { | ||
567 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
568 | |||
569 | if (!ts->inidle) | ||
570 | return; | ||
571 | |||
572 | /* Cancel the timer because CPU already waken up from the C-states*/ | ||
573 | menu_hrtimer_cancel(); | ||
574 | __tick_nohz_idle_enter(ts); | ||
575 | } | 453 | } |
576 | 454 | ||
577 | /** | 455 | /** |
@@ -589,7 +467,7 @@ ktime_t tick_nohz_get_sleep_length(void) | |||
589 | static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | 467 | static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) |
590 | { | 468 | { |
591 | hrtimer_cancel(&ts->sched_timer); | 469 | hrtimer_cancel(&ts->sched_timer); |
592 | hrtimer_set_expires(&ts->sched_timer, ts->last_tick); | 470 | hrtimer_set_expires(&ts->sched_timer, ts->idle_tick); |
593 | 471 | ||
594 | while (1) { | 472 | while (1) { |
595 | /* Forward the time to expire in the future */ | 473 | /* Forward the time to expire in the future */ |
@@ -606,33 +484,49 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | |||
606 | hrtimer_get_expires(&ts->sched_timer), 0)) | 484 | hrtimer_get_expires(&ts->sched_timer), 0)) |
607 | break; | 485 | break; |
608 | } | 486 | } |
609 | /* Reread time and update jiffies */ | 487 | /* Update jiffies and reread time */ |
610 | now = ktime_get(); | ||
611 | tick_do_update_jiffies64(now); | 488 | tick_do_update_jiffies64(now); |
489 | now = ktime_get(); | ||
612 | } | 490 | } |
613 | } | 491 | } |
614 | 492 | ||
615 | static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) | 493 | /** |
494 | * tick_nohz_restart_sched_tick - restart the idle tick from the idle task | ||
495 | * | ||
496 | * Restart the idle tick when the CPU is woken up from idle | ||
497 | */ | ||
498 | void tick_nohz_restart_sched_tick(void) | ||
616 | { | 499 | { |
617 | /* Update jiffies first */ | 500 | int cpu = smp_processor_id(); |
618 | tick_do_update_jiffies64(now); | 501 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
619 | update_cpu_load_nohz(); | 502 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
503 | unsigned long ticks; | ||
504 | #endif | ||
505 | ktime_t now; | ||
620 | 506 | ||
621 | calc_load_exit_idle(); | 507 | local_irq_disable(); |
622 | touch_softlockup_watchdog(); | 508 | if (ts->idle_active || (ts->inidle && ts->tick_stopped)) |
623 | /* | 509 | now = ktime_get(); |
624 | * Cancel the scheduled timer and restore the tick | ||
625 | */ | ||
626 | ts->tick_stopped = 0; | ||
627 | ts->idle_exittime = now; | ||
628 | 510 | ||
629 | tick_nohz_restart(ts, now); | 511 | if (ts->idle_active) |
630 | } | 512 | tick_nohz_stop_idle(cpu, now); |
513 | |||
514 | if (!ts->inidle || !ts->tick_stopped) { | ||
515 | ts->inidle = 0; | ||
516 | local_irq_enable(); | ||
517 | return; | ||
518 | } | ||
519 | |||
520 | ts->inidle = 0; | ||
521 | |||
522 | rcu_exit_nohz(); | ||
523 | |||
524 | /* Update jiffies first */ | ||
525 | select_nohz_load_balancer(0); | ||
526 | tick_do_update_jiffies64(now); | ||
527 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | ||
631 | 528 | ||
632 | static void tick_nohz_account_idle_ticks(struct tick_sched *ts) | ||
633 | { | ||
634 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 529 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
635 | unsigned long ticks; | ||
636 | /* | 530 | /* |
637 | * We stopped the tick in idle. Update process times would miss the | 531 | * We stopped the tick in idle. Update process times would miss the |
638 | * time we slept as update_process_times does only a 1 tick | 532 | * time we slept as update_process_times does only a 1 tick |
@@ -645,39 +539,15 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts) | |||
645 | if (ticks && ticks < LONG_MAX) | 539 | if (ticks && ticks < LONG_MAX) |
646 | account_idle_ticks(ticks); | 540 | account_idle_ticks(ticks); |
647 | #endif | 541 | #endif |
648 | } | ||
649 | 542 | ||
650 | /** | 543 | touch_softlockup_watchdog(); |
651 | * tick_nohz_idle_exit - restart the idle tick from the idle task | 544 | /* |
652 | * | 545 | * Cancel the scheduled timer and restore the tick |
653 | * Restart the idle tick when the CPU is woken up from idle | 546 | */ |
654 | * This also exit the RCU extended quiescent state. The CPU | 547 | ts->tick_stopped = 0; |
655 | * can use RCU again after this function is called. | 548 | ts->idle_exittime = now; |
656 | */ | ||
657 | void tick_nohz_idle_exit(void) | ||
658 | { | ||
659 | int cpu = smp_processor_id(); | ||
660 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | ||
661 | ktime_t now; | ||
662 | |||
663 | local_irq_disable(); | ||
664 | |||
665 | WARN_ON_ONCE(!ts->inidle); | ||
666 | |||
667 | ts->inidle = 0; | ||
668 | |||
669 | /* Cancel the timer because CPU already waken up from the C-states*/ | ||
670 | menu_hrtimer_cancel(); | ||
671 | if (ts->idle_active || ts->tick_stopped) | ||
672 | now = ktime_get(); | ||
673 | |||
674 | if (ts->idle_active) | ||
675 | tick_nohz_stop_idle(cpu, now); | ||
676 | 549 | ||
677 | if (ts->tick_stopped) { | 550 | tick_nohz_restart(ts, now); |
678 | tick_nohz_restart_sched_tick(ts, now); | ||
679 | tick_nohz_account_idle_ticks(ts); | ||
680 | } | ||
681 | 551 | ||
682 | local_irq_enable(); | 552 | local_irq_enable(); |
683 | } | 553 | } |
@@ -695,12 +565,40 @@ static void tick_nohz_handler(struct clock_event_device *dev) | |||
695 | { | 565 | { |
696 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 566 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
697 | struct pt_regs *regs = get_irq_regs(); | 567 | struct pt_regs *regs = get_irq_regs(); |
568 | int cpu = smp_processor_id(); | ||
698 | ktime_t now = ktime_get(); | 569 | ktime_t now = ktime_get(); |
699 | 570 | ||
700 | dev->next_event.tv64 = KTIME_MAX; | 571 | dev->next_event.tv64 = KTIME_MAX; |
701 | 572 | ||
702 | tick_sched_do_timer(now); | 573 | /* |
703 | tick_sched_handle(ts, regs); | 574 | * Check if the do_timer duty was dropped. We don't care about |
575 | * concurrency: This happens only when the cpu in charge went | ||
576 | * into a long sleep. If two cpus happen to assign themself to | ||
577 | * this duty, then the jiffies update is still serialized by | ||
578 | * xtime_lock. | ||
579 | */ | ||
580 | if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) | ||
581 | tick_do_timer_cpu = cpu; | ||
582 | |||
583 | /* Check, if the jiffies need an update */ | ||
584 | if (tick_do_timer_cpu == cpu) | ||
585 | tick_do_update_jiffies64(now); | ||
586 | |||
587 | /* | ||
588 | * When we are idle and the tick is stopped, we have to touch | ||
589 | * the watchdog as we might not schedule for a really long | ||
590 | * time. This happens on complete idle SMP systems while | ||
591 | * waiting on the login prompt. We also increment the "start | ||
592 | * of idle" jiffy stamp so the idle accounting adjustment we | ||
593 | * do when we go busy again does not account too much ticks. | ||
594 | */ | ||
595 | if (ts->tick_stopped) { | ||
596 | touch_softlockup_watchdog(); | ||
597 | ts->idle_jiffies++; | ||
598 | } | ||
599 | |||
600 | update_process_times(user_mode(regs)); | ||
601 | profile_tick(CPU_PROFILING); | ||
704 | 602 | ||
705 | while (tick_nohz_reprogram(ts, now)) { | 603 | while (tick_nohz_reprogram(ts, now)) { |
706 | now = ktime_get(); | 604 | now = ktime_get(); |
@@ -742,6 +640,8 @@ static void tick_nohz_switch_to_nohz(void) | |||
742 | next = ktime_add(next, tick_period); | 640 | next = ktime_add(next, tick_period); |
743 | } | 641 | } |
744 | local_irq_enable(); | 642 | local_irq_enable(); |
643 | |||
644 | printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); | ||
745 | } | 645 | } |
746 | 646 | ||
747 | /* | 647 | /* |
@@ -813,7 +713,7 @@ void tick_check_idle(int cpu) | |||
813 | #ifdef CONFIG_HIGH_RES_TIMERS | 713 | #ifdef CONFIG_HIGH_RES_TIMERS |
814 | /* | 714 | /* |
815 | * We rearm the timer until we get disabled by the idle code. | 715 | * We rearm the timer until we get disabled by the idle code. |
816 | * Called with interrupts disabled. | 716 | * Called with interrupts disabled and timer->base->cpu_base->lock held. |
817 | */ | 717 | */ |
818 | static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) | 718 | static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) |
819 | { | 719 | { |
@@ -821,31 +721,50 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) | |||
821 | container_of(timer, struct tick_sched, sched_timer); | 721 | container_of(timer, struct tick_sched, sched_timer); |
822 | struct pt_regs *regs = get_irq_regs(); | 722 | struct pt_regs *regs = get_irq_regs(); |
823 | ktime_t now = ktime_get(); | 723 | ktime_t now = ktime_get(); |
724 | int cpu = smp_processor_id(); | ||
824 | 725 | ||
825 | tick_sched_do_timer(now); | 726 | #ifdef CONFIG_NO_HZ |
727 | /* | ||
728 | * Check if the do_timer duty was dropped. We don't care about | ||
729 | * concurrency: This happens only when the cpu in charge went | ||
730 | * into a long sleep. If two cpus happen to assign themself to | ||
731 | * this duty, then the jiffies update is still serialized by | ||
732 | * xtime_lock. | ||
733 | */ | ||
734 | if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) | ||
735 | tick_do_timer_cpu = cpu; | ||
736 | #endif | ||
737 | |||
738 | /* Check, if the jiffies need an update */ | ||
739 | if (tick_do_timer_cpu == cpu) | ||
740 | tick_do_update_jiffies64(now); | ||
826 | 741 | ||
827 | /* | 742 | /* |
828 | * Do not call, when we are not in irq context and have | 743 | * Do not call, when we are not in irq context and have |
829 | * no valid regs pointer | 744 | * no valid regs pointer |
830 | */ | 745 | */ |
831 | if (regs) | 746 | if (regs) { |
832 | tick_sched_handle(ts, regs); | 747 | /* |
748 | * When we are idle and the tick is stopped, we have to touch | ||
749 | * the watchdog as we might not schedule for a really long | ||
750 | * time. This happens on complete idle SMP systems while | ||
751 | * waiting on the login prompt. We also increment the "start of | ||
752 | * idle" jiffy stamp so the idle accounting adjustment we do | ||
753 | * when we go busy again does not account too much ticks. | ||
754 | */ | ||
755 | if (ts->tick_stopped) { | ||
756 | touch_softlockup_watchdog(); | ||
757 | ts->idle_jiffies++; | ||
758 | } | ||
759 | update_process_times(user_mode(regs)); | ||
760 | profile_tick(CPU_PROFILING); | ||
761 | } | ||
833 | 762 | ||
834 | hrtimer_forward(timer, now, tick_period); | 763 | hrtimer_forward(timer, now, tick_period); |
835 | 764 | ||
836 | return HRTIMER_RESTART; | 765 | return HRTIMER_RESTART; |
837 | } | 766 | } |
838 | 767 | ||
839 | static int sched_skew_tick; | ||
840 | |||
841 | static int __init skew_tick(char *str) | ||
842 | { | ||
843 | get_option(&str, &sched_skew_tick); | ||
844 | |||
845 | return 0; | ||
846 | } | ||
847 | early_param("skew_tick", skew_tick); | ||
848 | |||
849 | /** | 768 | /** |
850 | * tick_setup_sched_timer - setup the tick emulation timer | 769 | * tick_setup_sched_timer - setup the tick emulation timer |
851 | */ | 770 | */ |
@@ -863,14 +782,6 @@ void tick_setup_sched_timer(void) | |||
863 | /* Get the next period (per cpu) */ | 782 | /* Get the next period (per cpu) */ |
864 | hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); | 783 | hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); |
865 | 784 | ||
866 | /* Offset the tick to avert jiffies_lock contention. */ | ||
867 | if (sched_skew_tick) { | ||
868 | u64 offset = ktime_to_ns(tick_period) >> 1; | ||
869 | do_div(offset, num_possible_cpus()); | ||
870 | offset *= smp_processor_id(); | ||
871 | hrtimer_add_expires_ns(&ts->sched_timer, offset); | ||
872 | } | ||
873 | |||
874 | for (;;) { | 785 | for (;;) { |
875 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 786 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
876 | hrtimer_start_expires(&ts->sched_timer, | 787 | hrtimer_start_expires(&ts->sched_timer, |
@@ -882,8 +793,10 @@ void tick_setup_sched_timer(void) | |||
882 | } | 793 | } |
883 | 794 | ||
884 | #ifdef CONFIG_NO_HZ | 795 | #ifdef CONFIG_NO_HZ |
885 | if (tick_nohz_enabled) | 796 | if (tick_nohz_enabled) { |
886 | ts->nohz_mode = NOHZ_MODE_HIGHRES; | 797 | ts->nohz_mode = NOHZ_MODE_HIGHRES; |
798 | printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); | ||
799 | } | ||
887 | #endif | 800 | #endif |
888 | } | 801 | } |
889 | #endif /* HIGH_RES_TIMERS */ | 802 | #endif /* HIGH_RES_TIMERS */ |