diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 150 |
1 files changed, 127 insertions, 23 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 410eec404133..dfae1bf6d5b2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -223,7 +223,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | |||
223 | { | 223 | { |
224 | ktime_t now; | 224 | ktime_t now; |
225 | 225 | ||
226 | if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF) | 226 | if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) |
227 | return; | 227 | return; |
228 | 228 | ||
229 | if (hrtimer_active(&rt_b->rt_period_timer)) | 229 | if (hrtimer_active(&rt_b->rt_period_timer)) |
@@ -467,11 +467,17 @@ struct rt_rq { | |||
467 | struct rt_prio_array active; | 467 | struct rt_prio_array active; |
468 | unsigned long rt_nr_running; | 468 | unsigned long rt_nr_running; |
469 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED | 469 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
470 | int highest_prio; /* highest queued rt task prio */ | 470 | struct { |
471 | int curr; /* highest queued rt task prio */ | ||
472 | #ifdef CONFIG_SMP | ||
473 | int next; /* next highest */ | ||
474 | #endif | ||
475 | } highest_prio; | ||
471 | #endif | 476 | #endif |
472 | #ifdef CONFIG_SMP | 477 | #ifdef CONFIG_SMP |
473 | unsigned long rt_nr_migratory; | 478 | unsigned long rt_nr_migratory; |
474 | int overloaded; | 479 | int overloaded; |
480 | struct plist_head pushable_tasks; | ||
475 | #endif | 481 | #endif |
476 | int rt_throttled; | 482 | int rt_throttled; |
477 | u64 rt_time; | 483 | u64 rt_time; |
@@ -549,7 +555,6 @@ struct rq { | |||
549 | unsigned long nr_running; | 555 | unsigned long nr_running; |
550 | #define CPU_LOAD_IDX_MAX 5 | 556 | #define CPU_LOAD_IDX_MAX 5 |
551 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; | 557 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; |
552 | unsigned char idle_at_tick; | ||
553 | #ifdef CONFIG_NO_HZ | 558 | #ifdef CONFIG_NO_HZ |
554 | unsigned long last_tick_seen; | 559 | unsigned long last_tick_seen; |
555 | unsigned char in_nohz_recently; | 560 | unsigned char in_nohz_recently; |
@@ -590,6 +595,7 @@ struct rq { | |||
590 | struct root_domain *rd; | 595 | struct root_domain *rd; |
591 | struct sched_domain *sd; | 596 | struct sched_domain *sd; |
592 | 597 | ||
598 | unsigned char idle_at_tick; | ||
593 | /* For active balancing */ | 599 | /* For active balancing */ |
594 | int active_balance; | 600 | int active_balance; |
595 | int push_cpu; | 601 | int push_cpu; |
@@ -1610,21 +1616,42 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd) | |||
1610 | 1616 | ||
1611 | #endif | 1617 | #endif |
1612 | 1618 | ||
1619 | #ifdef CONFIG_PREEMPT | ||
1620 | |||
1613 | /* | 1621 | /* |
1614 | * double_lock_balance - lock the busiest runqueue, this_rq is locked already. | 1622 | * fair double_lock_balance: Safely acquires both rq->locks in a fair |
1623 | * way at the expense of forcing extra atomic operations in all | ||
1624 | * invocations. This assures that the double_lock is acquired using the | ||
1625 | * same underlying policy as the spinlock_t on this architecture, which | ||
1626 | * reduces latency compared to the unfair variant below. However, it | ||
1627 | * also adds more overhead and therefore may reduce throughput. | ||
1615 | */ | 1628 | */ |
1616 | static int double_lock_balance(struct rq *this_rq, struct rq *busiest) | 1629 | static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) |
1630 | __releases(this_rq->lock) | ||
1631 | __acquires(busiest->lock) | ||
1632 | __acquires(this_rq->lock) | ||
1633 | { | ||
1634 | spin_unlock(&this_rq->lock); | ||
1635 | double_rq_lock(this_rq, busiest); | ||
1636 | |||
1637 | return 1; | ||
1638 | } | ||
1639 | |||
1640 | #else | ||
1641 | /* | ||
1642 | * Unfair double_lock_balance: Optimizes throughput at the expense of | ||
1643 | * latency by eliminating extra atomic operations when the locks are | ||
1644 | * already in proper order on entry. This favors lower cpu-ids and will | ||
1645 | * grant the double lock to lower cpus over higher ids under contention, | ||
1646 | * regardless of entry order into the function. | ||
1647 | */ | ||
1648 | static int _double_lock_balance(struct rq *this_rq, struct rq *busiest) | ||
1617 | __releases(this_rq->lock) | 1649 | __releases(this_rq->lock) |
1618 | __acquires(busiest->lock) | 1650 | __acquires(busiest->lock) |
1619 | __acquires(this_rq->lock) | 1651 | __acquires(this_rq->lock) |
1620 | { | 1652 | { |
1621 | int ret = 0; | 1653 | int ret = 0; |
1622 | 1654 | ||
1623 | if (unlikely(!irqs_disabled())) { | ||
1624 | /* printk() doesn't work good under rq->lock */ | ||
1625 | spin_unlock(&this_rq->lock); | ||
1626 | BUG_ON(1); | ||
1627 | } | ||
1628 | if (unlikely(!spin_trylock(&busiest->lock))) { | 1655 | if (unlikely(!spin_trylock(&busiest->lock))) { |
1629 | if (busiest < this_rq) { | 1656 | if (busiest < this_rq) { |
1630 | spin_unlock(&this_rq->lock); | 1657 | spin_unlock(&this_rq->lock); |
@@ -1637,6 +1664,22 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest) | |||
1637 | return ret; | 1664 | return ret; |
1638 | } | 1665 | } |
1639 | 1666 | ||
1667 | #endif /* CONFIG_PREEMPT */ | ||
1668 | |||
1669 | /* | ||
1670 | * double_lock_balance - lock the busiest runqueue, this_rq is locked already. | ||
1671 | */ | ||
1672 | static int double_lock_balance(struct rq *this_rq, struct rq *busiest) | ||
1673 | { | ||
1674 | if (unlikely(!irqs_disabled())) { | ||
1675 | /* printk() doesn't work good under rq->lock */ | ||
1676 | spin_unlock(&this_rq->lock); | ||
1677 | BUG_ON(1); | ||
1678 | } | ||
1679 | |||
1680 | return _double_lock_balance(this_rq, busiest); | ||
1681 | } | ||
1682 | |||
1640 | static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) | 1683 | static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) |
1641 | __releases(busiest->lock) | 1684 | __releases(busiest->lock) |
1642 | { | 1685 | { |
@@ -1705,6 +1748,9 @@ static void update_avg(u64 *avg, u64 sample) | |||
1705 | 1748 | ||
1706 | static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) | 1749 | static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) |
1707 | { | 1750 | { |
1751 | if (wakeup) | ||
1752 | p->se.start_runtime = p->se.sum_exec_runtime; | ||
1753 | |||
1708 | sched_info_queued(p); | 1754 | sched_info_queued(p); |
1709 | p->sched_class->enqueue_task(rq, p, wakeup); | 1755 | p->sched_class->enqueue_task(rq, p, wakeup); |
1710 | p->se.on_rq = 1; | 1756 | p->se.on_rq = 1; |
@@ -1712,10 +1758,15 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) | |||
1712 | 1758 | ||
1713 | static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) | 1759 | static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) |
1714 | { | 1760 | { |
1715 | if (sleep && p->se.last_wakeup) { | 1761 | if (sleep) { |
1716 | update_avg(&p->se.avg_overlap, | 1762 | if (p->se.last_wakeup) { |
1717 | p->se.sum_exec_runtime - p->se.last_wakeup); | 1763 | update_avg(&p->se.avg_overlap, |
1718 | p->se.last_wakeup = 0; | 1764 | p->se.sum_exec_runtime - p->se.last_wakeup); |
1765 | p->se.last_wakeup = 0; | ||
1766 | } else { | ||
1767 | update_avg(&p->se.avg_wakeup, | ||
1768 | sysctl_sched_wakeup_granularity); | ||
1769 | } | ||
1719 | } | 1770 | } |
1720 | 1771 | ||
1721 | sched_info_dequeued(p); | 1772 | sched_info_dequeued(p); |
@@ -2345,6 +2396,22 @@ out_activate: | |||
2345 | activate_task(rq, p, 1); | 2396 | activate_task(rq, p, 1); |
2346 | success = 1; | 2397 | success = 1; |
2347 | 2398 | ||
2399 | /* | ||
2400 | * Only attribute actual wakeups done by this task. | ||
2401 | */ | ||
2402 | if (!in_interrupt()) { | ||
2403 | struct sched_entity *se = ¤t->se; | ||
2404 | u64 sample = se->sum_exec_runtime; | ||
2405 | |||
2406 | if (se->last_wakeup) | ||
2407 | sample -= se->last_wakeup; | ||
2408 | else | ||
2409 | sample -= se->start_runtime; | ||
2410 | update_avg(&se->avg_wakeup, sample); | ||
2411 | |||
2412 | se->last_wakeup = se->sum_exec_runtime; | ||
2413 | } | ||
2414 | |||
2348 | out_running: | 2415 | out_running: |
2349 | trace_sched_wakeup(rq, p, success); | 2416 | trace_sched_wakeup(rq, p, success); |
2350 | check_preempt_curr(rq, p, sync); | 2417 | check_preempt_curr(rq, p, sync); |
@@ -2355,8 +2422,6 @@ out_running: | |||
2355 | p->sched_class->task_wake_up(rq, p); | 2422 | p->sched_class->task_wake_up(rq, p); |
2356 | #endif | 2423 | #endif |
2357 | out: | 2424 | out: |
2358 | current->se.last_wakeup = current->se.sum_exec_runtime; | ||
2359 | |||
2360 | task_rq_unlock(rq, &flags); | 2425 | task_rq_unlock(rq, &flags); |
2361 | 2426 | ||
2362 | return success; | 2427 | return success; |
@@ -2386,6 +2451,8 @@ static void __sched_fork(struct task_struct *p) | |||
2386 | p->se.prev_sum_exec_runtime = 0; | 2451 | p->se.prev_sum_exec_runtime = 0; |
2387 | p->se.last_wakeup = 0; | 2452 | p->se.last_wakeup = 0; |
2388 | p->se.avg_overlap = 0; | 2453 | p->se.avg_overlap = 0; |
2454 | p->se.start_runtime = 0; | ||
2455 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; | ||
2389 | 2456 | ||
2390 | #ifdef CONFIG_SCHEDSTATS | 2457 | #ifdef CONFIG_SCHEDSTATS |
2391 | p->se.wait_start = 0; | 2458 | p->se.wait_start = 0; |
@@ -2448,6 +2515,8 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2448 | /* Want to start with kernel preemption disabled. */ | 2515 | /* Want to start with kernel preemption disabled. */ |
2449 | task_thread_info(p)->preempt_count = 1; | 2516 | task_thread_info(p)->preempt_count = 1; |
2450 | #endif | 2517 | #endif |
2518 | plist_node_init(&p->pushable_tasks, MAX_PRIO); | ||
2519 | |||
2451 | put_cpu(); | 2520 | put_cpu(); |
2452 | } | 2521 | } |
2453 | 2522 | ||
@@ -2588,6 +2657,12 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
2588 | { | 2657 | { |
2589 | struct mm_struct *mm = rq->prev_mm; | 2658 | struct mm_struct *mm = rq->prev_mm; |
2590 | long prev_state; | 2659 | long prev_state; |
2660 | #ifdef CONFIG_SMP | ||
2661 | int post_schedule = 0; | ||
2662 | |||
2663 | if (current->sched_class->needs_post_schedule) | ||
2664 | post_schedule = current->sched_class->needs_post_schedule(rq); | ||
2665 | #endif | ||
2591 | 2666 | ||
2592 | rq->prev_mm = NULL; | 2667 | rq->prev_mm = NULL; |
2593 | 2668 | ||
@@ -2606,7 +2681,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
2606 | finish_arch_switch(prev); | 2681 | finish_arch_switch(prev); |
2607 | finish_lock_switch(rq, prev); | 2682 | finish_lock_switch(rq, prev); |
2608 | #ifdef CONFIG_SMP | 2683 | #ifdef CONFIG_SMP |
2609 | if (current->sched_class->post_schedule) | 2684 | if (post_schedule) |
2610 | current->sched_class->post_schedule(rq); | 2685 | current->sched_class->post_schedule(rq); |
2611 | #endif | 2686 | #endif |
2612 | 2687 | ||
@@ -2987,6 +3062,16 @@ next: | |||
2987 | pulled++; | 3062 | pulled++; |
2988 | rem_load_move -= p->se.load.weight; | 3063 | rem_load_move -= p->se.load.weight; |
2989 | 3064 | ||
3065 | #ifdef CONFIG_PREEMPT | ||
3066 | /* | ||
3067 | * NEWIDLE balancing is a source of latency, so preemptible kernels | ||
3068 | * will stop after the first task is pulled to minimize the critical | ||
3069 | * section. | ||
3070 | */ | ||
3071 | if (idle == CPU_NEWLY_IDLE) | ||
3072 | goto out; | ||
3073 | #endif | ||
3074 | |||
2990 | /* | 3075 | /* |
2991 | * We only want to steal up to the prescribed amount of weighted load. | 3076 | * We only want to steal up to the prescribed amount of weighted load. |
2992 | */ | 3077 | */ |
@@ -3033,9 +3118,15 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
3033 | sd, idle, all_pinned, &this_best_prio); | 3118 | sd, idle, all_pinned, &this_best_prio); |
3034 | class = class->next; | 3119 | class = class->next; |
3035 | 3120 | ||
3121 | #ifdef CONFIG_PREEMPT | ||
3122 | /* | ||
3123 | * NEWIDLE balancing is a source of latency, so preemptible | ||
3124 | * kernels will stop after the first task is pulled to minimize | ||
3125 | * the critical section. | ||
3126 | */ | ||
3036 | if (idle == CPU_NEWLY_IDLE && this_rq->nr_running) | 3127 | if (idle == CPU_NEWLY_IDLE && this_rq->nr_running) |
3037 | break; | 3128 | break; |
3038 | 3129 | #endif | |
3039 | } while (class && max_load_move > total_load_moved); | 3130 | } while (class && max_load_move > total_load_moved); |
3040 | 3131 | ||
3041 | return total_load_moved > 0; | 3132 | return total_load_moved > 0; |
@@ -5145,7 +5236,7 @@ SYSCALL_DEFINE1(nice, int, increment) | |||
5145 | if (increment > 40) | 5236 | if (increment > 40) |
5146 | increment = 40; | 5237 | increment = 40; |
5147 | 5238 | ||
5148 | nice = PRIO_TO_NICE(current->static_prio) + increment; | 5239 | nice = TASK_NICE(current) + increment; |
5149 | if (nice < -20) | 5240 | if (nice < -20) |
5150 | nice = -20; | 5241 | nice = -20; |
5151 | if (nice > 19) | 5242 | if (nice > 19) |
@@ -8218,11 +8309,15 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | |||
8218 | __set_bit(MAX_RT_PRIO, array->bitmap); | 8309 | __set_bit(MAX_RT_PRIO, array->bitmap); |
8219 | 8310 | ||
8220 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED | 8311 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
8221 | rt_rq->highest_prio = MAX_RT_PRIO; | 8312 | rt_rq->highest_prio.curr = MAX_RT_PRIO; |
8313 | #ifdef CONFIG_SMP | ||
8314 | rt_rq->highest_prio.next = MAX_RT_PRIO; | ||
8315 | #endif | ||
8222 | #endif | 8316 | #endif |
8223 | #ifdef CONFIG_SMP | 8317 | #ifdef CONFIG_SMP |
8224 | rt_rq->rt_nr_migratory = 0; | 8318 | rt_rq->rt_nr_migratory = 0; |
8225 | rt_rq->overloaded = 0; | 8319 | rt_rq->overloaded = 0; |
8320 | plist_head_init(&rq->rt.pushable_tasks, &rq->lock); | ||
8226 | #endif | 8321 | #endif |
8227 | 8322 | ||
8228 | rt_rq->rt_time = 0; | 8323 | rt_rq->rt_time = 0; |
@@ -9224,6 +9319,16 @@ static int sched_rt_global_constraints(void) | |||
9224 | 9319 | ||
9225 | return ret; | 9320 | return ret; |
9226 | } | 9321 | } |
9322 | |||
9323 | int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) | ||
9324 | { | ||
9325 | /* Don't accept realtime tasks when there is no way for them to run */ | ||
9326 | if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0) | ||
9327 | return 0; | ||
9328 | |||
9329 | return 1; | ||
9330 | } | ||
9331 | |||
9227 | #else /* !CONFIG_RT_GROUP_SCHED */ | 9332 | #else /* !CONFIG_RT_GROUP_SCHED */ |
9228 | static int sched_rt_global_constraints(void) | 9333 | static int sched_rt_global_constraints(void) |
9229 | { | 9334 | { |
@@ -9317,8 +9422,7 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
9317 | struct task_struct *tsk) | 9422 | struct task_struct *tsk) |
9318 | { | 9423 | { |
9319 | #ifdef CONFIG_RT_GROUP_SCHED | 9424 | #ifdef CONFIG_RT_GROUP_SCHED |
9320 | /* Don't accept realtime tasks when there is no way for them to run */ | 9425 | if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) |
9321 | if (rt_task(tsk) && cgroup_tg(cgrp)->rt_bandwidth.rt_runtime == 0) | ||
9322 | return -EINVAL; | 9426 | return -EINVAL; |
9323 | #else | 9427 | #else |
9324 | /* We don't support RT-tasks being in separate groups */ | 9428 | /* We don't support RT-tasks being in separate groups */ |
@@ -9589,7 +9693,7 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) | |||
9589 | struct cpuacct *ca; | 9693 | struct cpuacct *ca; |
9590 | int cpu; | 9694 | int cpu; |
9591 | 9695 | ||
9592 | if (!cpuacct_subsys.active) | 9696 | if (unlikely(!cpuacct_subsys.active)) |
9593 | return; | 9697 | return; |
9594 | 9698 | ||
9595 | cpu = task_cpu(tsk); | 9699 | cpu = task_cpu(tsk); |