aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c150
1 files changed, 127 insertions, 23 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 410eec404133..dfae1bf6d5b2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -223,7 +223,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
223{ 223{
224 ktime_t now; 224 ktime_t now;
225 225
226 if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF) 226 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
227 return; 227 return;
228 228
229 if (hrtimer_active(&rt_b->rt_period_timer)) 229 if (hrtimer_active(&rt_b->rt_period_timer))
@@ -467,11 +467,17 @@ struct rt_rq {
467 struct rt_prio_array active; 467 struct rt_prio_array active;
468 unsigned long rt_nr_running; 468 unsigned long rt_nr_running;
469#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 469#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
470 int highest_prio; /* highest queued rt task prio */ 470 struct {
471 int curr; /* highest queued rt task prio */
472#ifdef CONFIG_SMP
473 int next; /* next highest */
474#endif
475 } highest_prio;
471#endif 476#endif
472#ifdef CONFIG_SMP 477#ifdef CONFIG_SMP
473 unsigned long rt_nr_migratory; 478 unsigned long rt_nr_migratory;
474 int overloaded; 479 int overloaded;
480 struct plist_head pushable_tasks;
475#endif 481#endif
476 int rt_throttled; 482 int rt_throttled;
477 u64 rt_time; 483 u64 rt_time;
@@ -549,7 +555,6 @@ struct rq {
549 unsigned long nr_running; 555 unsigned long nr_running;
550 #define CPU_LOAD_IDX_MAX 5 556 #define CPU_LOAD_IDX_MAX 5
551 unsigned long cpu_load[CPU_LOAD_IDX_MAX]; 557 unsigned long cpu_load[CPU_LOAD_IDX_MAX];
552 unsigned char idle_at_tick;
553#ifdef CONFIG_NO_HZ 558#ifdef CONFIG_NO_HZ
554 unsigned long last_tick_seen; 559 unsigned long last_tick_seen;
555 unsigned char in_nohz_recently; 560 unsigned char in_nohz_recently;
@@ -590,6 +595,7 @@ struct rq {
590 struct root_domain *rd; 595 struct root_domain *rd;
591 struct sched_domain *sd; 596 struct sched_domain *sd;
592 597
598 unsigned char idle_at_tick;
593 /* For active balancing */ 599 /* For active balancing */
594 int active_balance; 600 int active_balance;
595 int push_cpu; 601 int push_cpu;
@@ -1610,21 +1616,42 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd)
1610 1616
1611#endif 1617#endif
1612 1618
1619#ifdef CONFIG_PREEMPT
1620
1613/* 1621/*
1614 * double_lock_balance - lock the busiest runqueue, this_rq is locked already. 1622 * fair double_lock_balance: Safely acquires both rq->locks in a fair
1623 * way at the expense of forcing extra atomic operations in all
1624 * invocations. This assures that the double_lock is acquired using the
1625 * same underlying policy as the spinlock_t on this architecture, which
1626 * reduces latency compared to the unfair variant below. However, it
1627 * also adds more overhead and therefore may reduce throughput.
1615 */ 1628 */
1616static int double_lock_balance(struct rq *this_rq, struct rq *busiest) 1629static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
1630 __releases(this_rq->lock)
1631 __acquires(busiest->lock)
1632 __acquires(this_rq->lock)
1633{
1634 spin_unlock(&this_rq->lock);
1635 double_rq_lock(this_rq, busiest);
1636
1637 return 1;
1638}
1639
1640#else
1641/*
1642 * Unfair double_lock_balance: Optimizes throughput at the expense of
1643 * latency by eliminating extra atomic operations when the locks are
1644 * already in proper order on entry. This favors lower cpu-ids and will
1645 * grant the double lock to lower cpus over higher ids under contention,
1646 * regardless of entry order into the function.
1647 */
1648static int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
1617 __releases(this_rq->lock) 1649 __releases(this_rq->lock)
1618 __acquires(busiest->lock) 1650 __acquires(busiest->lock)
1619 __acquires(this_rq->lock) 1651 __acquires(this_rq->lock)
1620{ 1652{
1621 int ret = 0; 1653 int ret = 0;
1622 1654
1623 if (unlikely(!irqs_disabled())) {
1624 /* printk() doesn't work good under rq->lock */
1625 spin_unlock(&this_rq->lock);
1626 BUG_ON(1);
1627 }
1628 if (unlikely(!spin_trylock(&busiest->lock))) { 1655 if (unlikely(!spin_trylock(&busiest->lock))) {
1629 if (busiest < this_rq) { 1656 if (busiest < this_rq) {
1630 spin_unlock(&this_rq->lock); 1657 spin_unlock(&this_rq->lock);
@@ -1637,6 +1664,22 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
1637 return ret; 1664 return ret;
1638} 1665}
1639 1666
1667#endif /* CONFIG_PREEMPT */
1668
1669/*
1670 * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
1671 */
1672static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
1673{
1674 if (unlikely(!irqs_disabled())) {
1675 /* printk() doesn't work good under rq->lock */
1676 spin_unlock(&this_rq->lock);
1677 BUG_ON(1);
1678 }
1679
1680 return _double_lock_balance(this_rq, busiest);
1681}
1682
1640static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) 1683static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
1641 __releases(busiest->lock) 1684 __releases(busiest->lock)
1642{ 1685{
@@ -1705,6 +1748,9 @@ static void update_avg(u64 *avg, u64 sample)
1705 1748
1706static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) 1749static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
1707{ 1750{
1751 if (wakeup)
1752 p->se.start_runtime = p->se.sum_exec_runtime;
1753
1708 sched_info_queued(p); 1754 sched_info_queued(p);
1709 p->sched_class->enqueue_task(rq, p, wakeup); 1755 p->sched_class->enqueue_task(rq, p, wakeup);
1710 p->se.on_rq = 1; 1756 p->se.on_rq = 1;
@@ -1712,10 +1758,15 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
1712 1758
1713static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) 1759static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
1714{ 1760{
1715 if (sleep && p->se.last_wakeup) { 1761 if (sleep) {
1716 update_avg(&p->se.avg_overlap, 1762 if (p->se.last_wakeup) {
1717 p->se.sum_exec_runtime - p->se.last_wakeup); 1763 update_avg(&p->se.avg_overlap,
1718 p->se.last_wakeup = 0; 1764 p->se.sum_exec_runtime - p->se.last_wakeup);
1765 p->se.last_wakeup = 0;
1766 } else {
1767 update_avg(&p->se.avg_wakeup,
1768 sysctl_sched_wakeup_granularity);
1769 }
1719 } 1770 }
1720 1771
1721 sched_info_dequeued(p); 1772 sched_info_dequeued(p);
@@ -2345,6 +2396,22 @@ out_activate:
2345 activate_task(rq, p, 1); 2396 activate_task(rq, p, 1);
2346 success = 1; 2397 success = 1;
2347 2398
2399 /*
2400 * Only attribute actual wakeups done by this task.
2401 */
2402 if (!in_interrupt()) {
2403 struct sched_entity *se = &current->se;
2404 u64 sample = se->sum_exec_runtime;
2405
2406 if (se->last_wakeup)
2407 sample -= se->last_wakeup;
2408 else
2409 sample -= se->start_runtime;
2410 update_avg(&se->avg_wakeup, sample);
2411
2412 se->last_wakeup = se->sum_exec_runtime;
2413 }
2414
2348out_running: 2415out_running:
2349 trace_sched_wakeup(rq, p, success); 2416 trace_sched_wakeup(rq, p, success);
2350 check_preempt_curr(rq, p, sync); 2417 check_preempt_curr(rq, p, sync);
@@ -2355,8 +2422,6 @@ out_running:
2355 p->sched_class->task_wake_up(rq, p); 2422 p->sched_class->task_wake_up(rq, p);
2356#endif 2423#endif
2357out: 2424out:
2358 current->se.last_wakeup = current->se.sum_exec_runtime;
2359
2360 task_rq_unlock(rq, &flags); 2425 task_rq_unlock(rq, &flags);
2361 2426
2362 return success; 2427 return success;
@@ -2386,6 +2451,8 @@ static void __sched_fork(struct task_struct *p)
2386 p->se.prev_sum_exec_runtime = 0; 2451 p->se.prev_sum_exec_runtime = 0;
2387 p->se.last_wakeup = 0; 2452 p->se.last_wakeup = 0;
2388 p->se.avg_overlap = 0; 2453 p->se.avg_overlap = 0;
2454 p->se.start_runtime = 0;
2455 p->se.avg_wakeup = sysctl_sched_wakeup_granularity;
2389 2456
2390#ifdef CONFIG_SCHEDSTATS 2457#ifdef CONFIG_SCHEDSTATS
2391 p->se.wait_start = 0; 2458 p->se.wait_start = 0;
@@ -2448,6 +2515,8 @@ void sched_fork(struct task_struct *p, int clone_flags)
2448 /* Want to start with kernel preemption disabled. */ 2515 /* Want to start with kernel preemption disabled. */
2449 task_thread_info(p)->preempt_count = 1; 2516 task_thread_info(p)->preempt_count = 1;
2450#endif 2517#endif
2518 plist_node_init(&p->pushable_tasks, MAX_PRIO);
2519
2451 put_cpu(); 2520 put_cpu();
2452} 2521}
2453 2522
@@ -2588,6 +2657,12 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
2588{ 2657{
2589 struct mm_struct *mm = rq->prev_mm; 2658 struct mm_struct *mm = rq->prev_mm;
2590 long prev_state; 2659 long prev_state;
2660#ifdef CONFIG_SMP
2661 int post_schedule = 0;
2662
2663 if (current->sched_class->needs_post_schedule)
2664 post_schedule = current->sched_class->needs_post_schedule(rq);
2665#endif
2591 2666
2592 rq->prev_mm = NULL; 2667 rq->prev_mm = NULL;
2593 2668
@@ -2606,7 +2681,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
2606 finish_arch_switch(prev); 2681 finish_arch_switch(prev);
2607 finish_lock_switch(rq, prev); 2682 finish_lock_switch(rq, prev);
2608#ifdef CONFIG_SMP 2683#ifdef CONFIG_SMP
2609 if (current->sched_class->post_schedule) 2684 if (post_schedule)
2610 current->sched_class->post_schedule(rq); 2685 current->sched_class->post_schedule(rq);
2611#endif 2686#endif
2612 2687
@@ -2987,6 +3062,16 @@ next:
2987 pulled++; 3062 pulled++;
2988 rem_load_move -= p->se.load.weight; 3063 rem_load_move -= p->se.load.weight;
2989 3064
3065#ifdef CONFIG_PREEMPT
3066 /*
3067 * NEWIDLE balancing is a source of latency, so preemptible kernels
3068 * will stop after the first task is pulled to minimize the critical
3069 * section.
3070 */
3071 if (idle == CPU_NEWLY_IDLE)
3072 goto out;
3073#endif
3074
2990 /* 3075 /*
2991 * We only want to steal up to the prescribed amount of weighted load. 3076 * We only want to steal up to the prescribed amount of weighted load.
2992 */ 3077 */
@@ -3033,9 +3118,15 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
3033 sd, idle, all_pinned, &this_best_prio); 3118 sd, idle, all_pinned, &this_best_prio);
3034 class = class->next; 3119 class = class->next;
3035 3120
3121#ifdef CONFIG_PREEMPT
3122 /*
3123 * NEWIDLE balancing is a source of latency, so preemptible
3124 * kernels will stop after the first task is pulled to minimize
3125 * the critical section.
3126 */
3036 if (idle == CPU_NEWLY_IDLE && this_rq->nr_running) 3127 if (idle == CPU_NEWLY_IDLE && this_rq->nr_running)
3037 break; 3128 break;
3038 3129#endif
3039 } while (class && max_load_move > total_load_moved); 3130 } while (class && max_load_move > total_load_moved);
3040 3131
3041 return total_load_moved > 0; 3132 return total_load_moved > 0;
@@ -5145,7 +5236,7 @@ SYSCALL_DEFINE1(nice, int, increment)
5145 if (increment > 40) 5236 if (increment > 40)
5146 increment = 40; 5237 increment = 40;
5147 5238
5148 nice = PRIO_TO_NICE(current->static_prio) + increment; 5239 nice = TASK_NICE(current) + increment;
5149 if (nice < -20) 5240 if (nice < -20)
5150 nice = -20; 5241 nice = -20;
5151 if (nice > 19) 5242 if (nice > 19)
@@ -8218,11 +8309,15 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
8218 __set_bit(MAX_RT_PRIO, array->bitmap); 8309 __set_bit(MAX_RT_PRIO, array->bitmap);
8219 8310
8220#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 8311#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
8221 rt_rq->highest_prio = MAX_RT_PRIO; 8312 rt_rq->highest_prio.curr = MAX_RT_PRIO;
8313#ifdef CONFIG_SMP
8314 rt_rq->highest_prio.next = MAX_RT_PRIO;
8315#endif
8222#endif 8316#endif
8223#ifdef CONFIG_SMP 8317#ifdef CONFIG_SMP
8224 rt_rq->rt_nr_migratory = 0; 8318 rt_rq->rt_nr_migratory = 0;
8225 rt_rq->overloaded = 0; 8319 rt_rq->overloaded = 0;
8320 plist_head_init(&rq->rt.pushable_tasks, &rq->lock);
8226#endif 8321#endif
8227 8322
8228 rt_rq->rt_time = 0; 8323 rt_rq->rt_time = 0;
@@ -9224,6 +9319,16 @@ static int sched_rt_global_constraints(void)
9224 9319
9225 return ret; 9320 return ret;
9226} 9321}
9322
9323int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
9324{
9325 /* Don't accept realtime tasks when there is no way for them to run */
9326 if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
9327 return 0;
9328
9329 return 1;
9330}
9331
9227#else /* !CONFIG_RT_GROUP_SCHED */ 9332#else /* !CONFIG_RT_GROUP_SCHED */
9228static int sched_rt_global_constraints(void) 9333static int sched_rt_global_constraints(void)
9229{ 9334{
@@ -9317,8 +9422,7 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
9317 struct task_struct *tsk) 9422 struct task_struct *tsk)
9318{ 9423{
9319#ifdef CONFIG_RT_GROUP_SCHED 9424#ifdef CONFIG_RT_GROUP_SCHED
9320 /* Don't accept realtime tasks when there is no way for them to run */ 9425 if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
9321 if (rt_task(tsk) && cgroup_tg(cgrp)->rt_bandwidth.rt_runtime == 0)
9322 return -EINVAL; 9426 return -EINVAL;
9323#else 9427#else
9324 /* We don't support RT-tasks being in separate groups */ 9428 /* We don't support RT-tasks being in separate groups */
@@ -9589,7 +9693,7 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
9589 struct cpuacct *ca; 9693 struct cpuacct *ca;
9590 int cpu; 9694 int cpu;
9591 9695
9592 if (!cpuacct_subsys.active) 9696 if (unlikely(!cpuacct_subsys.active))
9593 return; 9697 return;
9594 9698
9595 cpu = task_cpu(tsk); 9699 cpu = task_cpu(tsk);