aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c146
1 files changed, 123 insertions, 23 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 8ee437a5ec1d..fcc3483e9955 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -467,11 +467,17 @@ struct rt_rq {
467 struct rt_prio_array active; 467 struct rt_prio_array active;
468 unsigned long rt_nr_running; 468 unsigned long rt_nr_running;
469#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 469#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
470 int highest_prio; /* highest queued rt task prio */ 470 struct {
471 int curr; /* highest queued rt task prio */
472#ifdef CONFIG_SMP
473 int next; /* next highest */
474#endif
475 } highest_prio;
471#endif 476#endif
472#ifdef CONFIG_SMP 477#ifdef CONFIG_SMP
473 unsigned long rt_nr_migratory; 478 unsigned long rt_nr_migratory;
474 int overloaded; 479 int overloaded;
480 struct plist_head pushable_tasks;
475#endif 481#endif
476 int rt_throttled; 482 int rt_throttled;
477 u64 rt_time; 483 u64 rt_time;
@@ -1610,21 +1616,42 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd)
1610 1616
1611#endif 1617#endif
1612 1618
1619#ifdef CONFIG_PREEMPT
1620
1613/* 1621/*
1614 * double_lock_balance - lock the busiest runqueue, this_rq is locked already. 1622 * fair double_lock_balance: Safely acquires both rq->locks in a fair
1623 * way at the expense of forcing extra atomic operations in all
1624 * invocations. This assures that the double_lock is acquired using the
1625 * same underlying policy as the spinlock_t on this architecture, which
1626 * reduces latency compared to the unfair variant below. However, it
1627 * also adds more overhead and therefore may reduce throughput.
1615 */ 1628 */
1616static int double_lock_balance(struct rq *this_rq, struct rq *busiest) 1629static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
1630 __releases(this_rq->lock)
1631 __acquires(busiest->lock)
1632 __acquires(this_rq->lock)
1633{
1634 spin_unlock(&this_rq->lock);
1635 double_rq_lock(this_rq, busiest);
1636
1637 return 1;
1638}
1639
1640#else
1641/*
1642 * Unfair double_lock_balance: Optimizes throughput at the expense of
1643 * latency by eliminating extra atomic operations when the locks are
1644 * already in proper order on entry. This favors lower cpu-ids and will
1645 * grant the double lock to lower cpus over higher ids under contention,
1646 * regardless of entry order into the function.
1647 */
1648static int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
1617 __releases(this_rq->lock) 1649 __releases(this_rq->lock)
1618 __acquires(busiest->lock) 1650 __acquires(busiest->lock)
1619 __acquires(this_rq->lock) 1651 __acquires(this_rq->lock)
1620{ 1652{
1621 int ret = 0; 1653 int ret = 0;
1622 1654
1623 if (unlikely(!irqs_disabled())) {
1624 /* printk() doesn't work good under rq->lock */
1625 spin_unlock(&this_rq->lock);
1626 BUG_ON(1);
1627 }
1628 if (unlikely(!spin_trylock(&busiest->lock))) { 1655 if (unlikely(!spin_trylock(&busiest->lock))) {
1629 if (busiest < this_rq) { 1656 if (busiest < this_rq) {
1630 spin_unlock(&this_rq->lock); 1657 spin_unlock(&this_rq->lock);
@@ -1637,6 +1664,22 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
1637 return ret; 1664 return ret;
1638} 1665}
1639 1666
1667#endif /* CONFIG_PREEMPT */
1668
1669/*
1670 * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
1671 */
1672static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
1673{
1674 if (unlikely(!irqs_disabled())) {
1675 /* printk() doesn't work good under rq->lock */
1676 spin_unlock(&this_rq->lock);
1677 BUG_ON(1);
1678 }
1679
1680 return _double_lock_balance(this_rq, busiest);
1681}
1682
1640static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) 1683static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
1641 __releases(busiest->lock) 1684 __releases(busiest->lock)
1642{ 1685{
@@ -1705,6 +1748,9 @@ static void update_avg(u64 *avg, u64 sample)
1705 1748
1706static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) 1749static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
1707{ 1750{
1751 if (wakeup)
1752 p->se.start_runtime = p->se.sum_exec_runtime;
1753
1708 sched_info_queued(p); 1754 sched_info_queued(p);
1709 p->sched_class->enqueue_task(rq, p, wakeup); 1755 p->sched_class->enqueue_task(rq, p, wakeup);
1710 p->se.on_rq = 1; 1756 p->se.on_rq = 1;
@@ -1712,10 +1758,15 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
1712 1758
1713static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) 1759static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
1714{ 1760{
1715 if (sleep && p->se.last_wakeup) { 1761 if (sleep) {
1716 update_avg(&p->se.avg_overlap, 1762 if (p->se.last_wakeup) {
1717 p->se.sum_exec_runtime - p->se.last_wakeup); 1763 update_avg(&p->se.avg_overlap,
1718 p->se.last_wakeup = 0; 1764 p->se.sum_exec_runtime - p->se.last_wakeup);
1765 p->se.last_wakeup = 0;
1766 } else {
1767 update_avg(&p->se.avg_wakeup,
1768 sysctl_sched_wakeup_granularity);
1769 }
1719 } 1770 }
1720 1771
1721 sched_info_dequeued(p); 1772 sched_info_dequeued(p);
@@ -2355,6 +2406,22 @@ out_activate:
2355 activate_task(rq, p, 1); 2406 activate_task(rq, p, 1);
2356 success = 1; 2407 success = 1;
2357 2408
2409 /*
2410 * Only attribute actual wakeups done by this task.
2411 */
2412 if (!in_interrupt()) {
2413 struct sched_entity *se = &current->se;
2414 u64 sample = se->sum_exec_runtime;
2415
2416 if (se->last_wakeup)
2417 sample -= se->last_wakeup;
2418 else
2419 sample -= se->start_runtime;
2420 update_avg(&se->avg_wakeup, sample);
2421
2422 se->last_wakeup = se->sum_exec_runtime;
2423 }
2424
2358out_running: 2425out_running:
2359 trace_sched_wakeup(rq, p, success); 2426 trace_sched_wakeup(rq, p, success);
2360 check_preempt_curr(rq, p, sync); 2427 check_preempt_curr(rq, p, sync);
@@ -2365,8 +2432,6 @@ out_running:
2365 p->sched_class->task_wake_up(rq, p); 2432 p->sched_class->task_wake_up(rq, p);
2366#endif 2433#endif
2367out: 2434out:
2368 current->se.last_wakeup = current->se.sum_exec_runtime;
2369
2370 task_rq_unlock(rq, &flags); 2435 task_rq_unlock(rq, &flags);
2371 2436
2372 return success; 2437 return success;
@@ -2396,6 +2461,8 @@ static void __sched_fork(struct task_struct *p)
2396 p->se.prev_sum_exec_runtime = 0; 2461 p->se.prev_sum_exec_runtime = 0;
2397 p->se.last_wakeup = 0; 2462 p->se.last_wakeup = 0;
2398 p->se.avg_overlap = 0; 2463 p->se.avg_overlap = 0;
2464 p->se.start_runtime = 0;
2465 p->se.avg_wakeup = sysctl_sched_wakeup_granularity;
2399 2466
2400#ifdef CONFIG_SCHEDSTATS 2467#ifdef CONFIG_SCHEDSTATS
2401 p->se.wait_start = 0; 2468 p->se.wait_start = 0;
@@ -2458,6 +2525,8 @@ void sched_fork(struct task_struct *p, int clone_flags)
2458 /* Want to start with kernel preemption disabled. */ 2525 /* Want to start with kernel preemption disabled. */
2459 task_thread_info(p)->preempt_count = 1; 2526 task_thread_info(p)->preempt_count = 1;
2460#endif 2527#endif
2528 plist_node_init(&p->pushable_tasks, MAX_PRIO);
2529
2461 put_cpu(); 2530 put_cpu();
2462} 2531}
2463 2532
@@ -2598,6 +2667,12 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
2598{ 2667{
2599 struct mm_struct *mm = rq->prev_mm; 2668 struct mm_struct *mm = rq->prev_mm;
2600 long prev_state; 2669 long prev_state;
2670#ifdef CONFIG_SMP
2671 int post_schedule = 0;
2672
2673 if (current->sched_class->needs_post_schedule)
2674 post_schedule = current->sched_class->needs_post_schedule(rq);
2675#endif
2601 2676
2602 rq->prev_mm = NULL; 2677 rq->prev_mm = NULL;
2603 2678
@@ -2616,7 +2691,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
2616 finish_arch_switch(prev); 2691 finish_arch_switch(prev);
2617 finish_lock_switch(rq, prev); 2692 finish_lock_switch(rq, prev);
2618#ifdef CONFIG_SMP 2693#ifdef CONFIG_SMP
2619 if (current->sched_class->post_schedule) 2694 if (post_schedule)
2620 current->sched_class->post_schedule(rq); 2695 current->sched_class->post_schedule(rq);
2621#endif 2696#endif
2622 2697
@@ -2997,6 +3072,16 @@ next:
2997 pulled++; 3072 pulled++;
2998 rem_load_move -= p->se.load.weight; 3073 rem_load_move -= p->se.load.weight;
2999 3074
3075#ifdef CONFIG_PREEMPT
3076 /*
3077 * NEWIDLE balancing is a source of latency, so preemptible kernels
3078 * will stop after the first task is pulled to minimize the critical
3079 * section.
3080 */
3081 if (idle == CPU_NEWLY_IDLE)
3082 goto out;
3083#endif
3084
3000 /* 3085 /*
3001 * We only want to steal up to the prescribed amount of weighted load. 3086 * We only want to steal up to the prescribed amount of weighted load.
3002 */ 3087 */
@@ -3043,9 +3128,15 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
3043 sd, idle, all_pinned, &this_best_prio); 3128 sd, idle, all_pinned, &this_best_prio);
3044 class = class->next; 3129 class = class->next;
3045 3130
3131#ifdef CONFIG_PREEMPT
3132 /*
3133 * NEWIDLE balancing is a source of latency, so preemptible
3134 * kernels will stop after the first task is pulled to minimize
3135 * the critical section.
3136 */
3046 if (idle == CPU_NEWLY_IDLE && this_rq->nr_running) 3137 if (idle == CPU_NEWLY_IDLE && this_rq->nr_running)
3047 break; 3138 break;
3048 3139#endif
3049 } while (class && max_load_move > total_load_moved); 3140 } while (class && max_load_move > total_load_moved);
3050 3141
3051 return total_load_moved > 0; 3142 return total_load_moved > 0;
@@ -3890,19 +3981,24 @@ int select_nohz_load_balancer(int stop_tick)
3890 int cpu = smp_processor_id(); 3981 int cpu = smp_processor_id();
3891 3982
3892 if (stop_tick) { 3983 if (stop_tick) {
3893 cpumask_set_cpu(cpu, nohz.cpu_mask);
3894 cpu_rq(cpu)->in_nohz_recently = 1; 3984 cpu_rq(cpu)->in_nohz_recently = 1;
3895 3985
3896 /* 3986 if (!cpu_active(cpu)) {
3897 * If we are going offline and still the leader, give up! 3987 if (atomic_read(&nohz.load_balancer) != cpu)
3898 */ 3988 return 0;
3899 if (!cpu_active(cpu) && 3989
3900 atomic_read(&nohz.load_balancer) == cpu) { 3990 /*
3991 * If we are going offline and still the leader,
3992 * give up!
3993 */
3901 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) 3994 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
3902 BUG(); 3995 BUG();
3996
3903 return 0; 3997 return 0;
3904 } 3998 }
3905 3999
4000 cpumask_set_cpu(cpu, nohz.cpu_mask);
4001
3906 /* time for ilb owner also to sleep */ 4002 /* time for ilb owner also to sleep */
3907 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { 4003 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
3908 if (atomic_read(&nohz.load_balancer) == cpu) 4004 if (atomic_read(&nohz.load_balancer) == cpu)
@@ -8214,11 +8310,15 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
8214 __set_bit(MAX_RT_PRIO, array->bitmap); 8310 __set_bit(MAX_RT_PRIO, array->bitmap);
8215 8311
8216#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 8312#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
8217 rt_rq->highest_prio = MAX_RT_PRIO; 8313 rt_rq->highest_prio.curr = MAX_RT_PRIO;
8314#ifdef CONFIG_SMP
8315 rt_rq->highest_prio.next = MAX_RT_PRIO;
8316#endif
8218#endif 8317#endif
8219#ifdef CONFIG_SMP 8318#ifdef CONFIG_SMP
8220 rt_rq->rt_nr_migratory = 0; 8319 rt_rq->rt_nr_migratory = 0;
8221 rt_rq->overloaded = 0; 8320 rt_rq->overloaded = 0;
8321 plist_head_init(&rq->rt.pushable_tasks, &rq->lock);
8222#endif 8322#endif
8223 8323
8224 rt_rq->rt_time = 0; 8324 rt_rq->rt_time = 0;