diff options
Diffstat (limited to 'kernel/time/tick-sched.c')
| -rw-r--r-- | kernel/time/tick-sched.c | 141 |
1 files changed, 88 insertions, 53 deletions
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 89aed5933ed4..f992762d7f51 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -134,18 +134,13 @@ __setup("nohz=", setup_tick_nohz); | |||
| 134 | * value. We do this unconditionally on any cpu, as we don't know whether the | 134 | * value. We do this unconditionally on any cpu, as we don't know whether the |
| 135 | * cpu, which has the update task assigned is in a long sleep. | 135 | * cpu, which has the update task assigned is in a long sleep. |
| 136 | */ | 136 | */ |
| 137 | static void tick_nohz_update_jiffies(void) | 137 | static void tick_nohz_update_jiffies(ktime_t now) |
| 138 | { | 138 | { |
| 139 | int cpu = smp_processor_id(); | 139 | int cpu = smp_processor_id(); |
| 140 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 140 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
| 141 | unsigned long flags; | 141 | unsigned long flags; |
| 142 | ktime_t now; | ||
| 143 | |||
| 144 | if (!ts->tick_stopped) | ||
| 145 | return; | ||
| 146 | 142 | ||
| 147 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | 143 | cpumask_clear_cpu(cpu, nohz_cpu_mask); |
| 148 | now = ktime_get(); | ||
| 149 | ts->idle_waketime = now; | 144 | ts->idle_waketime = now; |
| 150 | 145 | ||
| 151 | local_irq_save(flags); | 146 | local_irq_save(flags); |
| @@ -155,20 +150,17 @@ static void tick_nohz_update_jiffies(void) | |||
| 155 | touch_softlockup_watchdog(); | 150 | touch_softlockup_watchdog(); |
| 156 | } | 151 | } |
| 157 | 152 | ||
| 158 | static void tick_nohz_stop_idle(int cpu) | 153 | static void tick_nohz_stop_idle(int cpu, ktime_t now) |
| 159 | { | 154 | { |
| 160 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 155 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
| 156 | ktime_t delta; | ||
| 161 | 157 | ||
| 162 | if (ts->idle_active) { | 158 | delta = ktime_sub(now, ts->idle_entrytime); |
| 163 | ktime_t now, delta; | 159 | ts->idle_lastupdate = now; |
| 164 | now = ktime_get(); | 160 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); |
| 165 | delta = ktime_sub(now, ts->idle_entrytime); | 161 | ts->idle_active = 0; |
| 166 | ts->idle_lastupdate = now; | ||
| 167 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); | ||
| 168 | ts->idle_active = 0; | ||
| 169 | 162 | ||
| 170 | sched_clock_idle_wakeup_event(0); | 163 | sched_clock_idle_wakeup_event(0); |
| 171 | } | ||
| 172 | } | 164 | } |
| 173 | 165 | ||
| 174 | static ktime_t tick_nohz_start_idle(struct tick_sched *ts) | 166 | static ktime_t tick_nohz_start_idle(struct tick_sched *ts) |
| @@ -216,6 +208,7 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
| 216 | struct tick_sched *ts; | 208 | struct tick_sched *ts; |
| 217 | ktime_t last_update, expires, now; | 209 | ktime_t last_update, expires, now; |
| 218 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | 210 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; |
| 211 | u64 time_delta; | ||
| 219 | int cpu; | 212 | int cpu; |
| 220 | 213 | ||
| 221 | local_irq_save(flags); | 214 | local_irq_save(flags); |
| @@ -263,7 +256,7 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
| 263 | 256 | ||
| 264 | if (ratelimit < 10) { | 257 | if (ratelimit < 10) { |
| 265 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | 258 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", |
| 266 | local_softirq_pending()); | 259 | (unsigned int) local_softirq_pending()); |
| 267 | ratelimit++; | 260 | ratelimit++; |
| 268 | } | 261 | } |
| 269 | goto end; | 262 | goto end; |
| @@ -275,14 +268,18 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
| 275 | seq = read_seqbegin(&xtime_lock); | 268 | seq = read_seqbegin(&xtime_lock); |
| 276 | last_update = last_jiffies_update; | 269 | last_update = last_jiffies_update; |
| 277 | last_jiffies = jiffies; | 270 | last_jiffies = jiffies; |
| 271 | time_delta = timekeeping_max_deferment(); | ||
| 278 | } while (read_seqretry(&xtime_lock, seq)); | 272 | } while (read_seqretry(&xtime_lock, seq)); |
| 279 | 273 | ||
| 280 | /* Get the next timer wheel timer */ | 274 | if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || |
| 281 | next_jiffies = get_next_timer_interrupt(last_jiffies); | 275 | arch_needs_cpu(cpu)) { |
| 282 | delta_jiffies = next_jiffies - last_jiffies; | 276 | next_jiffies = last_jiffies + 1; |
| 283 | |||
| 284 | if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu)) | ||
| 285 | delta_jiffies = 1; | 277 | delta_jiffies = 1; |
| 278 | } else { | ||
| 279 | /* Get the next timer wheel timer */ | ||
| 280 | next_jiffies = get_next_timer_interrupt(last_jiffies); | ||
| 281 | delta_jiffies = next_jiffies - last_jiffies; | ||
| 282 | } | ||
| 286 | /* | 283 | /* |
| 287 | * Do not stop the tick, if we are only one off | 284 | * Do not stop the tick, if we are only one off |
| 288 | * or if the cpu is required for rcu | 285 | * or if the cpu is required for rcu |
| @@ -294,22 +291,51 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
| 294 | if ((long)delta_jiffies >= 1) { | 291 | if ((long)delta_jiffies >= 1) { |
| 295 | 292 | ||
| 296 | /* | 293 | /* |
| 297 | * calculate the expiry time for the next timer wheel | ||
| 298 | * timer | ||
| 299 | */ | ||
| 300 | expires = ktime_add_ns(last_update, tick_period.tv64 * | ||
| 301 | delta_jiffies); | ||
| 302 | |||
| 303 | /* | ||
| 304 | * If this cpu is the one which updates jiffies, then | 294 | * If this cpu is the one which updates jiffies, then |
| 305 | * give up the assignment and let it be taken by the | 295 | * give up the assignment and let it be taken by the |
| 306 | * cpu which runs the tick timer next, which might be | 296 | * cpu which runs the tick timer next, which might be |
| 307 | * this cpu as well. If we don't drop this here the | 297 | * this cpu as well. If we don't drop this here the |
| 308 | * jiffies might be stale and do_timer() never | 298 | * jiffies might be stale and do_timer() never |
| 309 | * invoked. | 299 | * invoked. Keep track of the fact that it was the one |
| 300 | * which had the do_timer() duty last. If this cpu is | ||
| 301 | * the one which had the do_timer() duty last, we | ||
| 302 | * limit the sleep time to the timekeeping | ||
| 303 | * max_deferement value which we retrieved | ||
| 304 | * above. Otherwise we can sleep as long as we want. | ||
| 310 | */ | 305 | */ |
| 311 | if (cpu == tick_do_timer_cpu) | 306 | if (cpu == tick_do_timer_cpu) { |
| 312 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; | 307 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; |
| 308 | ts->do_timer_last = 1; | ||
| 309 | } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { | ||
| 310 | time_delta = KTIME_MAX; | ||
| 311 | ts->do_timer_last = 0; | ||
| 312 | } else if (!ts->do_timer_last) { | ||
| 313 | time_delta = KTIME_MAX; | ||
| 314 | } | ||
| 315 | |||
| 316 | /* | ||
| 317 | * calculate the expiry time for the next timer wheel | ||
| 318 | * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals | ||
| 319 | * that there is no timer pending or at least extremely | ||
| 320 | * far into the future (12 days for HZ=1000). In this | ||
| 321 | * case we set the expiry to the end of time. | ||
| 322 | */ | ||
| 323 | if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) { | ||
| 324 | /* | ||
| 325 | * Calculate the time delta for the next timer event. | ||
| 326 | * If the time delta exceeds the maximum time delta | ||
| 327 | * permitted by the current clocksource then adjust | ||
| 328 | * the time delta accordingly to ensure the | ||
| 329 | * clocksource does not wrap. | ||
| 330 | */ | ||
| 331 | time_delta = min_t(u64, time_delta, | ||
| 332 | tick_period.tv64 * delta_jiffies); | ||
| 333 | } | ||
| 334 | |||
| 335 | if (time_delta < KTIME_MAX) | ||
| 336 | expires = ktime_add_ns(last_update, time_delta); | ||
| 337 | else | ||
| 338 | expires.tv64 = KTIME_MAX; | ||
| 313 | 339 | ||
| 314 | if (delta_jiffies > 1) | 340 | if (delta_jiffies > 1) |
| 315 | cpumask_set_cpu(cpu, nohz_cpu_mask); | 341 | cpumask_set_cpu(cpu, nohz_cpu_mask); |
| @@ -342,22 +368,19 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
| 342 | 368 | ||
| 343 | ts->idle_sleeps++; | 369 | ts->idle_sleeps++; |
| 344 | 370 | ||
| 371 | /* Mark expires */ | ||
| 372 | ts->idle_expires = expires; | ||
| 373 | |||
| 345 | /* | 374 | /* |
| 346 | * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that | 375 | * If the expiration time == KTIME_MAX, then |
| 347 | * there is no timer pending or at least extremly far | 376 | * in this case we simply stop the tick timer. |
| 348 | * into the future (12 days for HZ=1000). In this case | ||
| 349 | * we simply stop the tick timer: | ||
| 350 | */ | 377 | */ |
| 351 | if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) { | 378 | if (unlikely(expires.tv64 == KTIME_MAX)) { |
| 352 | ts->idle_expires.tv64 = KTIME_MAX; | ||
| 353 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) | 379 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) |
| 354 | hrtimer_cancel(&ts->sched_timer); | 380 | hrtimer_cancel(&ts->sched_timer); |
| 355 | goto out; | 381 | goto out; |
| 356 | } | 382 | } |
| 357 | 383 | ||
| 358 | /* Mark expiries */ | ||
| 359 | ts->idle_expires = expires; | ||
| 360 | |||
| 361 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 384 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
| 362 | hrtimer_start(&ts->sched_timer, expires, | 385 | hrtimer_start(&ts->sched_timer, expires, |
| 363 | HRTIMER_MODE_ABS_PINNED); | 386 | HRTIMER_MODE_ABS_PINNED); |
| @@ -436,7 +459,11 @@ void tick_nohz_restart_sched_tick(void) | |||
| 436 | ktime_t now; | 459 | ktime_t now; |
| 437 | 460 | ||
| 438 | local_irq_disable(); | 461 | local_irq_disable(); |
| 439 | tick_nohz_stop_idle(cpu); | 462 | if (ts->idle_active || (ts->inidle && ts->tick_stopped)) |
| 463 | now = ktime_get(); | ||
| 464 | |||
| 465 | if (ts->idle_active) | ||
| 466 | tick_nohz_stop_idle(cpu, now); | ||
| 440 | 467 | ||
| 441 | if (!ts->inidle || !ts->tick_stopped) { | 468 | if (!ts->inidle || !ts->tick_stopped) { |
| 442 | ts->inidle = 0; | 469 | ts->inidle = 0; |
| @@ -450,7 +477,6 @@ void tick_nohz_restart_sched_tick(void) | |||
| 450 | 477 | ||
| 451 | /* Update jiffies first */ | 478 | /* Update jiffies first */ |
| 452 | select_nohz_load_balancer(0); | 479 | select_nohz_load_balancer(0); |
| 453 | now = ktime_get(); | ||
| 454 | tick_do_update_jiffies64(now); | 480 | tick_do_update_jiffies64(now); |
| 455 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | 481 | cpumask_clear_cpu(cpu, nohz_cpu_mask); |
| 456 | 482 | ||
| @@ -584,22 +610,18 @@ static void tick_nohz_switch_to_nohz(void) | |||
| 584 | * timer and do not touch the other magic bits which need to be done | 610 | * timer and do not touch the other magic bits which need to be done |
| 585 | * when idle is left. | 611 | * when idle is left. |
| 586 | */ | 612 | */ |
| 587 | static void tick_nohz_kick_tick(int cpu) | 613 | static void tick_nohz_kick_tick(int cpu, ktime_t now) |
| 588 | { | 614 | { |
| 589 | #if 0 | 615 | #if 0 |
| 590 | /* Switch back to 2.6.27 behaviour */ | 616 | /* Switch back to 2.6.27 behaviour */ |
| 591 | 617 | ||
| 592 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 618 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
| 593 | ktime_t delta, now; | 619 | ktime_t delta; |
| 594 | |||
| 595 | if (!ts->tick_stopped) | ||
| 596 | return; | ||
| 597 | 620 | ||
| 598 | /* | 621 | /* |
| 599 | * Do not touch the tick device, when the next expiry is either | 622 | * Do not touch the tick device, when the next expiry is either |
| 600 | * already reached or less/equal than the tick period. | 623 | * already reached or less/equal than the tick period. |
| 601 | */ | 624 | */ |
| 602 | now = ktime_get(); | ||
| 603 | delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now); | 625 | delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now); |
| 604 | if (delta.tv64 <= tick_period.tv64) | 626 | if (delta.tv64 <= tick_period.tv64) |
| 605 | return; | 627 | return; |
| @@ -608,9 +630,26 @@ static void tick_nohz_kick_tick(int cpu) | |||
| 608 | #endif | 630 | #endif |
| 609 | } | 631 | } |
| 610 | 632 | ||
| 633 | static inline void tick_check_nohz(int cpu) | ||
| 634 | { | ||
| 635 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | ||
| 636 | ktime_t now; | ||
| 637 | |||
| 638 | if (!ts->idle_active && !ts->tick_stopped) | ||
| 639 | return; | ||
| 640 | now = ktime_get(); | ||
| 641 | if (ts->idle_active) | ||
| 642 | tick_nohz_stop_idle(cpu, now); | ||
| 643 | if (ts->tick_stopped) { | ||
| 644 | tick_nohz_update_jiffies(now); | ||
| 645 | tick_nohz_kick_tick(cpu, now); | ||
| 646 | } | ||
| 647 | } | ||
| 648 | |||
| 611 | #else | 649 | #else |
| 612 | 650 | ||
| 613 | static inline void tick_nohz_switch_to_nohz(void) { } | 651 | static inline void tick_nohz_switch_to_nohz(void) { } |
| 652 | static inline void tick_check_nohz(int cpu) { } | ||
| 614 | 653 | ||
| 615 | #endif /* NO_HZ */ | 654 | #endif /* NO_HZ */ |
| 616 | 655 | ||
| @@ -620,11 +659,7 @@ static inline void tick_nohz_switch_to_nohz(void) { } | |||
| 620 | void tick_check_idle(int cpu) | 659 | void tick_check_idle(int cpu) |
| 621 | { | 660 | { |
| 622 | tick_check_oneshot_broadcast(cpu); | 661 | tick_check_oneshot_broadcast(cpu); |
| 623 | #ifdef CONFIG_NO_HZ | 662 | tick_check_nohz(cpu); |
| 624 | tick_nohz_stop_idle(cpu); | ||
| 625 | tick_nohz_update_jiffies(); | ||
| 626 | tick_nohz_kick_tick(cpu); | ||
| 627 | #endif | ||
| 628 | } | 663 | } |
| 629 | 664 | ||
| 630 | /* | 665 | /* |
