diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 145 |
1 files changed, 37 insertions, 108 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 49d2fa7b687a..52b7efd27416 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -492,8 +492,11 @@ struct rq { | |||
492 | #define CPU_LOAD_IDX_MAX 5 | 492 | #define CPU_LOAD_IDX_MAX 5 |
493 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; | 493 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; |
494 | #ifdef CONFIG_NO_HZ | 494 | #ifdef CONFIG_NO_HZ |
495 | u64 nohz_stamp; | ||
495 | unsigned char in_nohz_recently; | 496 | unsigned char in_nohz_recently; |
496 | #endif | 497 | #endif |
498 | unsigned int skip_clock_update; | ||
499 | |||
497 | /* capture load from *all* tasks on this cpu: */ | 500 | /* capture load from *all* tasks on this cpu: */ |
498 | struct load_weight load; | 501 | struct load_weight load; |
499 | unsigned long nr_load_updates; | 502 | unsigned long nr_load_updates; |
@@ -591,6 +594,13 @@ static inline | |||
591 | void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | 594 | void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) |
592 | { | 595 | { |
593 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); | 596 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); |
597 | |||
598 | /* | ||
599 | * A queue event has occurred, and we're going to schedule. In | ||
600 | * this case, we can save a useless back to back clock update. | ||
601 | */ | ||
602 | if (test_tsk_need_resched(p)) | ||
603 | rq->skip_clock_update = 1; | ||
594 | } | 604 | } |
595 | 605 | ||
596 | static inline int cpu_of(struct rq *rq) | 606 | static inline int cpu_of(struct rq *rq) |
@@ -625,7 +635,8 @@ static inline int cpu_of(struct rq *rq) | |||
625 | 635 | ||
626 | inline void update_rq_clock(struct rq *rq) | 636 | inline void update_rq_clock(struct rq *rq) |
627 | { | 637 | { |
628 | rq->clock = sched_clock_cpu(cpu_of(rq)); | 638 | if (!rq->skip_clock_update) |
639 | rq->clock = sched_clock_cpu(cpu_of(rq)); | ||
629 | } | 640 | } |
630 | 641 | ||
631 | /* | 642 | /* |
@@ -1228,6 +1239,17 @@ void wake_up_idle_cpu(int cpu) | |||
1228 | if (!tsk_is_polling(rq->idle)) | 1239 | if (!tsk_is_polling(rq->idle)) |
1229 | smp_send_reschedule(cpu); | 1240 | smp_send_reschedule(cpu); |
1230 | } | 1241 | } |
1242 | |||
1243 | int nohz_ratelimit(int cpu) | ||
1244 | { | ||
1245 | struct rq *rq = cpu_rq(cpu); | ||
1246 | u64 diff = rq->clock - rq->nohz_stamp; | ||
1247 | |||
1248 | rq->nohz_stamp = rq->clock; | ||
1249 | |||
1250 | return diff < (NSEC_PER_SEC / HZ) >> 1; | ||
1251 | } | ||
1252 | |||
1231 | #endif /* CONFIG_NO_HZ */ | 1253 | #endif /* CONFIG_NO_HZ */ |
1232 | 1254 | ||
1233 | static u64 sched_avg_period(void) | 1255 | static u64 sched_avg_period(void) |
@@ -1770,8 +1792,6 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2) | |||
1770 | raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING); | 1792 | raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING); |
1771 | } | 1793 | } |
1772 | } | 1794 | } |
1773 | update_rq_clock(rq1); | ||
1774 | update_rq_clock(rq2); | ||
1775 | } | 1795 | } |
1776 | 1796 | ||
1777 | /* | 1797 | /* |
@@ -1868,9 +1888,7 @@ static void update_avg(u64 *avg, u64 sample) | |||
1868 | static void | 1888 | static void |
1869 | enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head) | 1889 | enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head) |
1870 | { | 1890 | { |
1871 | if (wakeup) | 1891 | update_rq_clock(rq); |
1872 | p->se.start_runtime = p->se.sum_exec_runtime; | ||
1873 | |||
1874 | sched_info_queued(p); | 1892 | sched_info_queued(p); |
1875 | p->sched_class->enqueue_task(rq, p, wakeup, head); | 1893 | p->sched_class->enqueue_task(rq, p, wakeup, head); |
1876 | p->se.on_rq = 1; | 1894 | p->se.on_rq = 1; |
@@ -1878,17 +1896,7 @@ enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head) | |||
1878 | 1896 | ||
1879 | static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) | 1897 | static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) |
1880 | { | 1898 | { |
1881 | if (sleep) { | 1899 | update_rq_clock(rq); |
1882 | if (p->se.last_wakeup) { | ||
1883 | update_avg(&p->se.avg_overlap, | ||
1884 | p->se.sum_exec_runtime - p->se.last_wakeup); | ||
1885 | p->se.last_wakeup = 0; | ||
1886 | } else { | ||
1887 | update_avg(&p->se.avg_wakeup, | ||
1888 | sysctl_sched_wakeup_granularity); | ||
1889 | } | ||
1890 | } | ||
1891 | |||
1892 | sched_info_dequeued(p); | 1900 | sched_info_dequeued(p); |
1893 | p->sched_class->dequeue_task(rq, p, sleep); | 1901 | p->sched_class->dequeue_task(rq, p, sleep); |
1894 | p->se.on_rq = 0; | 1902 | p->se.on_rq = 0; |
@@ -2361,14 +2369,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2361 | unsigned long flags; | 2369 | unsigned long flags; |
2362 | struct rq *rq; | 2370 | struct rq *rq; |
2363 | 2371 | ||
2364 | if (!sched_feat(SYNC_WAKEUPS)) | ||
2365 | wake_flags &= ~WF_SYNC; | ||
2366 | |||
2367 | this_cpu = get_cpu(); | 2372 | this_cpu = get_cpu(); |
2368 | 2373 | ||
2369 | smp_wmb(); | 2374 | smp_wmb(); |
2370 | rq = task_rq_lock(p, &flags); | 2375 | rq = task_rq_lock(p, &flags); |
2371 | update_rq_clock(rq); | ||
2372 | if (!(p->state & state)) | 2376 | if (!(p->state & state)) |
2373 | goto out; | 2377 | goto out; |
2374 | 2378 | ||
@@ -2409,7 +2413,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2409 | 2413 | ||
2410 | rq = cpu_rq(cpu); | 2414 | rq = cpu_rq(cpu); |
2411 | raw_spin_lock(&rq->lock); | 2415 | raw_spin_lock(&rq->lock); |
2412 | update_rq_clock(rq); | ||
2413 | 2416 | ||
2414 | /* | 2417 | /* |
2415 | * We migrated the task without holding either rq->lock, however | 2418 | * We migrated the task without holding either rq->lock, however |
@@ -2437,34 +2440,18 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2437 | 2440 | ||
2438 | out_activate: | 2441 | out_activate: |
2439 | #endif /* CONFIG_SMP */ | 2442 | #endif /* CONFIG_SMP */ |
2440 | schedstat_inc(p, se.nr_wakeups); | 2443 | schedstat_inc(p, se.statistics.nr_wakeups); |
2441 | if (wake_flags & WF_SYNC) | 2444 | if (wake_flags & WF_SYNC) |
2442 | schedstat_inc(p, se.nr_wakeups_sync); | 2445 | schedstat_inc(p, se.statistics.nr_wakeups_sync); |
2443 | if (orig_cpu != cpu) | 2446 | if (orig_cpu != cpu) |
2444 | schedstat_inc(p, se.nr_wakeups_migrate); | 2447 | schedstat_inc(p, se.statistics.nr_wakeups_migrate); |
2445 | if (cpu == this_cpu) | 2448 | if (cpu == this_cpu) |
2446 | schedstat_inc(p, se.nr_wakeups_local); | 2449 | schedstat_inc(p, se.statistics.nr_wakeups_local); |
2447 | else | 2450 | else |
2448 | schedstat_inc(p, se.nr_wakeups_remote); | 2451 | schedstat_inc(p, se.statistics.nr_wakeups_remote); |
2449 | activate_task(rq, p, 1); | 2452 | activate_task(rq, p, 1); |
2450 | success = 1; | 2453 | success = 1; |
2451 | 2454 | ||
2452 | /* | ||
2453 | * Only attribute actual wakeups done by this task. | ||
2454 | */ | ||
2455 | if (!in_interrupt()) { | ||
2456 | struct sched_entity *se = ¤t->se; | ||
2457 | u64 sample = se->sum_exec_runtime; | ||
2458 | |||
2459 | if (se->last_wakeup) | ||
2460 | sample -= se->last_wakeup; | ||
2461 | else | ||
2462 | sample -= se->start_runtime; | ||
2463 | update_avg(&se->avg_wakeup, sample); | ||
2464 | |||
2465 | se->last_wakeup = se->sum_exec_runtime; | ||
2466 | } | ||
2467 | |||
2468 | out_running: | 2455 | out_running: |
2469 | trace_sched_wakeup(rq, p, success); | 2456 | trace_sched_wakeup(rq, p, success); |
2470 | check_preempt_curr(rq, p, wake_flags); | 2457 | check_preempt_curr(rq, p, wake_flags); |
@@ -2526,42 +2513,9 @@ static void __sched_fork(struct task_struct *p) | |||
2526 | p->se.sum_exec_runtime = 0; | 2513 | p->se.sum_exec_runtime = 0; |
2527 | p->se.prev_sum_exec_runtime = 0; | 2514 | p->se.prev_sum_exec_runtime = 0; |
2528 | p->se.nr_migrations = 0; | 2515 | p->se.nr_migrations = 0; |
2529 | p->se.last_wakeup = 0; | ||
2530 | p->se.avg_overlap = 0; | ||
2531 | p->se.start_runtime = 0; | ||
2532 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; | ||
2533 | 2516 | ||
2534 | #ifdef CONFIG_SCHEDSTATS | 2517 | #ifdef CONFIG_SCHEDSTATS |
2535 | p->se.wait_start = 0; | 2518 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); |
2536 | p->se.wait_max = 0; | ||
2537 | p->se.wait_count = 0; | ||
2538 | p->se.wait_sum = 0; | ||
2539 | |||
2540 | p->se.sleep_start = 0; | ||
2541 | p->se.sleep_max = 0; | ||
2542 | p->se.sum_sleep_runtime = 0; | ||
2543 | |||
2544 | p->se.block_start = 0; | ||
2545 | p->se.block_max = 0; | ||
2546 | p->se.exec_max = 0; | ||
2547 | p->se.slice_max = 0; | ||
2548 | |||
2549 | p->se.nr_migrations_cold = 0; | ||
2550 | p->se.nr_failed_migrations_affine = 0; | ||
2551 | p->se.nr_failed_migrations_running = 0; | ||
2552 | p->se.nr_failed_migrations_hot = 0; | ||
2553 | p->se.nr_forced_migrations = 0; | ||
2554 | |||
2555 | p->se.nr_wakeups = 0; | ||
2556 | p->se.nr_wakeups_sync = 0; | ||
2557 | p->se.nr_wakeups_migrate = 0; | ||
2558 | p->se.nr_wakeups_local = 0; | ||
2559 | p->se.nr_wakeups_remote = 0; | ||
2560 | p->se.nr_wakeups_affine = 0; | ||
2561 | p->se.nr_wakeups_affine_attempts = 0; | ||
2562 | p->se.nr_wakeups_passive = 0; | ||
2563 | p->se.nr_wakeups_idle = 0; | ||
2564 | |||
2565 | #endif | 2519 | #endif |
2566 | 2520 | ||
2567 | INIT_LIST_HEAD(&p->rt.run_list); | 2521 | INIT_LIST_HEAD(&p->rt.run_list); |
@@ -2675,7 +2629,6 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2675 | 2629 | ||
2676 | BUG_ON(p->state != TASK_WAKING); | 2630 | BUG_ON(p->state != TASK_WAKING); |
2677 | p->state = TASK_RUNNING; | 2631 | p->state = TASK_RUNNING; |
2678 | update_rq_clock(rq); | ||
2679 | activate_task(rq, p, 0); | 2632 | activate_task(rq, p, 0); |
2680 | trace_sched_wakeup_new(rq, p, 1); | 2633 | trace_sched_wakeup_new(rq, p, 1); |
2681 | check_preempt_curr(rq, p, WF_FORK); | 2634 | check_preempt_curr(rq, p, WF_FORK); |
@@ -3629,23 +3582,9 @@ static inline void schedule_debug(struct task_struct *prev) | |||
3629 | 3582 | ||
3630 | static void put_prev_task(struct rq *rq, struct task_struct *prev) | 3583 | static void put_prev_task(struct rq *rq, struct task_struct *prev) |
3631 | { | 3584 | { |
3632 | if (prev->state == TASK_RUNNING) { | 3585 | if (prev->se.on_rq) |
3633 | u64 runtime = prev->se.sum_exec_runtime; | 3586 | update_rq_clock(rq); |
3634 | 3587 | rq->skip_clock_update = 0; | |
3635 | runtime -= prev->se.prev_sum_exec_runtime; | ||
3636 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | ||
3637 | |||
3638 | /* | ||
3639 | * In order to avoid avg_overlap growing stale when we are | ||
3640 | * indeed overlapping and hence not getting put to sleep, grow | ||
3641 | * the avg_overlap on preemption. | ||
3642 | * | ||
3643 | * We use the average preemption runtime because that | ||
3644 | * correlates to the amount of cache footprint a task can | ||
3645 | * build up. | ||
3646 | */ | ||
3647 | update_avg(&prev->se.avg_overlap, runtime); | ||
3648 | } | ||
3649 | prev->sched_class->put_prev_task(rq, prev); | 3588 | prev->sched_class->put_prev_task(rq, prev); |
3650 | } | 3589 | } |
3651 | 3590 | ||
@@ -3708,7 +3647,6 @@ need_resched_nonpreemptible: | |||
3708 | hrtick_clear(rq); | 3647 | hrtick_clear(rq); |
3709 | 3648 | ||
3710 | raw_spin_lock_irq(&rq->lock); | 3649 | raw_spin_lock_irq(&rq->lock); |
3711 | update_rq_clock(rq); | ||
3712 | clear_tsk_need_resched(prev); | 3650 | clear_tsk_need_resched(prev); |
3713 | 3651 | ||
3714 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | 3652 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
@@ -4265,7 +4203,6 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
4265 | BUG_ON(prio < 0 || prio > MAX_PRIO); | 4203 | BUG_ON(prio < 0 || prio > MAX_PRIO); |
4266 | 4204 | ||
4267 | rq = task_rq_lock(p, &flags); | 4205 | rq = task_rq_lock(p, &flags); |
4268 | update_rq_clock(rq); | ||
4269 | 4206 | ||
4270 | oldprio = p->prio; | 4207 | oldprio = p->prio; |
4271 | prev_class = p->sched_class; | 4208 | prev_class = p->sched_class; |
@@ -4308,7 +4245,6 @@ void set_user_nice(struct task_struct *p, long nice) | |||
4308 | * the task might be in the middle of scheduling on another CPU. | 4245 | * the task might be in the middle of scheduling on another CPU. |
4309 | */ | 4246 | */ |
4310 | rq = task_rq_lock(p, &flags); | 4247 | rq = task_rq_lock(p, &flags); |
4311 | update_rq_clock(rq); | ||
4312 | /* | 4248 | /* |
4313 | * The RT priorities are set via sched_setscheduler(), but we still | 4249 | * The RT priorities are set via sched_setscheduler(), but we still |
4314 | * allow the 'normal' nice value to be set - but as expected | 4250 | * allow the 'normal' nice value to be set - but as expected |
@@ -4591,7 +4527,6 @@ recheck: | |||
4591 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | 4527 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
4592 | goto recheck; | 4528 | goto recheck; |
4593 | } | 4529 | } |
4594 | update_rq_clock(rq); | ||
4595 | on_rq = p->se.on_rq; | 4530 | on_rq = p->se.on_rq; |
4596 | running = task_current(rq, p); | 4531 | running = task_current(rq, p); |
4597 | if (on_rq) | 4532 | if (on_rq) |
@@ -5602,7 +5537,6 @@ void sched_idle_next(void) | |||
5602 | 5537 | ||
5603 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); | 5538 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); |
5604 | 5539 | ||
5605 | update_rq_clock(rq); | ||
5606 | activate_task(rq, p, 0); | 5540 | activate_task(rq, p, 0); |
5607 | 5541 | ||
5608 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 5542 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
@@ -5657,7 +5591,6 @@ static void migrate_dead_tasks(unsigned int dead_cpu) | |||
5657 | for ( ; ; ) { | 5591 | for ( ; ; ) { |
5658 | if (!rq->nr_running) | 5592 | if (!rq->nr_running) |
5659 | break; | 5593 | break; |
5660 | update_rq_clock(rq); | ||
5661 | next = pick_next_task(rq); | 5594 | next = pick_next_task(rq); |
5662 | if (!next) | 5595 | if (!next) |
5663 | break; | 5596 | break; |
@@ -5941,7 +5874,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
5941 | rq->migration_thread = NULL; | 5874 | rq->migration_thread = NULL; |
5942 | /* Idle task back to normal (off runqueue, low prio) */ | 5875 | /* Idle task back to normal (off runqueue, low prio) */ |
5943 | raw_spin_lock_irq(&rq->lock); | 5876 | raw_spin_lock_irq(&rq->lock); |
5944 | update_rq_clock(rq); | ||
5945 | deactivate_task(rq, rq->idle, 0); | 5877 | deactivate_task(rq, rq->idle, 0); |
5946 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); | 5878 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); |
5947 | rq->idle->sched_class = &idle_sched_class; | 5879 | rq->idle->sched_class = &idle_sched_class; |
@@ -7891,7 +7823,6 @@ static void normalize_task(struct rq *rq, struct task_struct *p) | |||
7891 | { | 7823 | { |
7892 | int on_rq; | 7824 | int on_rq; |
7893 | 7825 | ||
7894 | update_rq_clock(rq); | ||
7895 | on_rq = p->se.on_rq; | 7826 | on_rq = p->se.on_rq; |
7896 | if (on_rq) | 7827 | if (on_rq) |
7897 | deactivate_task(rq, p, 0); | 7828 | deactivate_task(rq, p, 0); |
@@ -7918,9 +7849,9 @@ void normalize_rt_tasks(void) | |||
7918 | 7849 | ||
7919 | p->se.exec_start = 0; | 7850 | p->se.exec_start = 0; |
7920 | #ifdef CONFIG_SCHEDSTATS | 7851 | #ifdef CONFIG_SCHEDSTATS |
7921 | p->se.wait_start = 0; | 7852 | p->se.statistics.wait_start = 0; |
7922 | p->se.sleep_start = 0; | 7853 | p->se.statistics.sleep_start = 0; |
7923 | p->se.block_start = 0; | 7854 | p->se.statistics.block_start = 0; |
7924 | #endif | 7855 | #endif |
7925 | 7856 | ||
7926 | if (!rt_task(p)) { | 7857 | if (!rt_task(p)) { |
@@ -8253,8 +8184,6 @@ void sched_move_task(struct task_struct *tsk) | |||
8253 | 8184 | ||
8254 | rq = task_rq_lock(tsk, &flags); | 8185 | rq = task_rq_lock(tsk, &flags); |
8255 | 8186 | ||
8256 | update_rq_clock(rq); | ||
8257 | |||
8258 | running = task_current(rq, tsk); | 8187 | running = task_current(rq, tsk); |
8259 | on_rq = tsk->se.on_rq; | 8188 | on_rq = tsk->se.on_rq; |
8260 | 8189 | ||