diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 144 |
1 files changed, 124 insertions, 20 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index c1d0ed360088..5faf5d482fcd 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -467,11 +467,17 @@ struct rt_rq { | |||
467 | struct rt_prio_array active; | 467 | struct rt_prio_array active; |
468 | unsigned long rt_nr_running; | 468 | unsigned long rt_nr_running; |
469 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED | 469 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
470 | int highest_prio; /* highest queued rt task prio */ | 470 | struct { |
471 | int curr; /* highest queued rt task prio */ | ||
472 | #ifdef CONFIG_SMP | ||
473 | int next; /* next highest */ | ||
474 | #endif | ||
475 | } highest_prio; | ||
471 | #endif | 476 | #endif |
472 | #ifdef CONFIG_SMP | 477 | #ifdef CONFIG_SMP |
473 | unsigned long rt_nr_migratory; | 478 | unsigned long rt_nr_migratory; |
474 | int overloaded; | 479 | int overloaded; |
480 | struct plist_head pushable_tasks; | ||
475 | #endif | 481 | #endif |
476 | int rt_throttled; | 482 | int rt_throttled; |
477 | u64 rt_time; | 483 | u64 rt_time; |
@@ -1610,21 +1616,42 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd) | |||
1610 | 1616 | ||
1611 | #endif | 1617 | #endif |
1612 | 1618 | ||
1619 | #ifdef CONFIG_PREEMPT | ||
1620 | |||
1613 | /* | 1621 | /* |
1614 | * double_lock_balance - lock the busiest runqueue, this_rq is locked already. | 1622 | * fair double_lock_balance: Safely acquires both rq->locks in a fair |
1623 | * way at the expense of forcing extra atomic operations in all | ||
1624 | * invocations. This assures that the double_lock is acquired using the | ||
1625 | * same underlying policy as the spinlock_t on this architecture, which | ||
1626 | * reduces latency compared to the unfair variant below. However, it | ||
1627 | * also adds more overhead and therefore may reduce throughput. | ||
1615 | */ | 1628 | */ |
1616 | static int double_lock_balance(struct rq *this_rq, struct rq *busiest) | 1629 | static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) |
1630 | __releases(this_rq->lock) | ||
1631 | __acquires(busiest->lock) | ||
1632 | __acquires(this_rq->lock) | ||
1633 | { | ||
1634 | spin_unlock(&this_rq->lock); | ||
1635 | double_rq_lock(this_rq, busiest); | ||
1636 | |||
1637 | return 1; | ||
1638 | } | ||
1639 | |||
1640 | #else | ||
1641 | /* | ||
1642 | * Unfair double_lock_balance: Optimizes throughput at the expense of | ||
1643 | * latency by eliminating extra atomic operations when the locks are | ||
1644 | * already in proper order on entry. This favors lower cpu-ids and will | ||
1645 | * grant the double lock to lower cpus over higher ids under contention, | ||
1646 | * regardless of entry order into the function. | ||
1647 | */ | ||
1648 | static int _double_lock_balance(struct rq *this_rq, struct rq *busiest) | ||
1617 | __releases(this_rq->lock) | 1649 | __releases(this_rq->lock) |
1618 | __acquires(busiest->lock) | 1650 | __acquires(busiest->lock) |
1619 | __acquires(this_rq->lock) | 1651 | __acquires(this_rq->lock) |
1620 | { | 1652 | { |
1621 | int ret = 0; | 1653 | int ret = 0; |
1622 | 1654 | ||
1623 | if (unlikely(!irqs_disabled())) { | ||
1624 | /* printk() doesn't work good under rq->lock */ | ||
1625 | spin_unlock(&this_rq->lock); | ||
1626 | BUG_ON(1); | ||
1627 | } | ||
1628 | if (unlikely(!spin_trylock(&busiest->lock))) { | 1655 | if (unlikely(!spin_trylock(&busiest->lock))) { |
1629 | if (busiest < this_rq) { | 1656 | if (busiest < this_rq) { |
1630 | spin_unlock(&this_rq->lock); | 1657 | spin_unlock(&this_rq->lock); |
@@ -1637,6 +1664,22 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest) | |||
1637 | return ret; | 1664 | return ret; |
1638 | } | 1665 | } |
1639 | 1666 | ||
1667 | #endif /* CONFIG_PREEMPT */ | ||
1668 | |||
1669 | /* | ||
1670 | * double_lock_balance - lock the busiest runqueue, this_rq is locked already. | ||
1671 | */ | ||
1672 | static int double_lock_balance(struct rq *this_rq, struct rq *busiest) | ||
1673 | { | ||
1674 | if (unlikely(!irqs_disabled())) { | ||
1675 | /* printk() doesn't work good under rq->lock */ | ||
1676 | spin_unlock(&this_rq->lock); | ||
1677 | BUG_ON(1); | ||
1678 | } | ||
1679 | |||
1680 | return _double_lock_balance(this_rq, busiest); | ||
1681 | } | ||
1682 | |||
1640 | static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) | 1683 | static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) |
1641 | __releases(busiest->lock) | 1684 | __releases(busiest->lock) |
1642 | { | 1685 | { |
@@ -1705,6 +1748,9 @@ static void update_avg(u64 *avg, u64 sample) | |||
1705 | 1748 | ||
1706 | static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) | 1749 | static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) |
1707 | { | 1750 | { |
1751 | if (wakeup) | ||
1752 | p->se.start_runtime = p->se.sum_exec_runtime; | ||
1753 | |||
1708 | sched_info_queued(p); | 1754 | sched_info_queued(p); |
1709 | p->sched_class->enqueue_task(rq, p, wakeup); | 1755 | p->sched_class->enqueue_task(rq, p, wakeup); |
1710 | p->se.on_rq = 1; | 1756 | p->se.on_rq = 1; |
@@ -1712,10 +1758,15 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) | |||
1712 | 1758 | ||
1713 | static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) | 1759 | static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) |
1714 | { | 1760 | { |
1715 | if (sleep && p->se.last_wakeup) { | 1761 | if (sleep) { |
1716 | update_avg(&p->se.avg_overlap, | 1762 | if (p->se.last_wakeup) { |
1717 | p->se.sum_exec_runtime - p->se.last_wakeup); | 1763 | update_avg(&p->se.avg_overlap, |
1718 | p->se.last_wakeup = 0; | 1764 | p->se.sum_exec_runtime - p->se.last_wakeup); |
1765 | p->se.last_wakeup = 0; | ||
1766 | } else { | ||
1767 | update_avg(&p->se.avg_wakeup, | ||
1768 | sysctl_sched_wakeup_granularity); | ||
1769 | } | ||
1719 | } | 1770 | } |
1720 | 1771 | ||
1721 | sched_info_dequeued(p); | 1772 | sched_info_dequeued(p); |
@@ -2345,6 +2396,22 @@ out_activate: | |||
2345 | activate_task(rq, p, 1); | 2396 | activate_task(rq, p, 1); |
2346 | success = 1; | 2397 | success = 1; |
2347 | 2398 | ||
2399 | /* | ||
2400 | * Only attribute actual wakeups done by this task. | ||
2401 | */ | ||
2402 | if (!in_interrupt()) { | ||
2403 | struct sched_entity *se = ¤t->se; | ||
2404 | u64 sample = se->sum_exec_runtime; | ||
2405 | |||
2406 | if (se->last_wakeup) | ||
2407 | sample -= se->last_wakeup; | ||
2408 | else | ||
2409 | sample -= se->start_runtime; | ||
2410 | update_avg(&se->avg_wakeup, sample); | ||
2411 | |||
2412 | se->last_wakeup = se->sum_exec_runtime; | ||
2413 | } | ||
2414 | |||
2348 | out_running: | 2415 | out_running: |
2349 | trace_sched_wakeup(rq, p, success); | 2416 | trace_sched_wakeup(rq, p, success); |
2350 | check_preempt_curr(rq, p, sync); | 2417 | check_preempt_curr(rq, p, sync); |
@@ -2355,8 +2422,6 @@ out_running: | |||
2355 | p->sched_class->task_wake_up(rq, p); | 2422 | p->sched_class->task_wake_up(rq, p); |
2356 | #endif | 2423 | #endif |
2357 | out: | 2424 | out: |
2358 | current->se.last_wakeup = current->se.sum_exec_runtime; | ||
2359 | |||
2360 | task_rq_unlock(rq, &flags); | 2425 | task_rq_unlock(rq, &flags); |
2361 | 2426 | ||
2362 | return success; | 2427 | return success; |
@@ -2386,6 +2451,8 @@ static void __sched_fork(struct task_struct *p) | |||
2386 | p->se.prev_sum_exec_runtime = 0; | 2451 | p->se.prev_sum_exec_runtime = 0; |
2387 | p->se.last_wakeup = 0; | 2452 | p->se.last_wakeup = 0; |
2388 | p->se.avg_overlap = 0; | 2453 | p->se.avg_overlap = 0; |
2454 | p->se.start_runtime = 0; | ||
2455 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; | ||
2389 | 2456 | ||
2390 | #ifdef CONFIG_SCHEDSTATS | 2457 | #ifdef CONFIG_SCHEDSTATS |
2391 | p->se.wait_start = 0; | 2458 | p->se.wait_start = 0; |
@@ -2448,6 +2515,8 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2448 | /* Want to start with kernel preemption disabled. */ | 2515 | /* Want to start with kernel preemption disabled. */ |
2449 | task_thread_info(p)->preempt_count = 1; | 2516 | task_thread_info(p)->preempt_count = 1; |
2450 | #endif | 2517 | #endif |
2518 | plist_node_init(&p->pushable_tasks, MAX_PRIO); | ||
2519 | |||
2451 | put_cpu(); | 2520 | put_cpu(); |
2452 | } | 2521 | } |
2453 | 2522 | ||
@@ -2588,6 +2657,12 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
2588 | { | 2657 | { |
2589 | struct mm_struct *mm = rq->prev_mm; | 2658 | struct mm_struct *mm = rq->prev_mm; |
2590 | long prev_state; | 2659 | long prev_state; |
2660 | #ifdef CONFIG_SMP | ||
2661 | int post_schedule = 0; | ||
2662 | |||
2663 | if (current->sched_class->needs_post_schedule) | ||
2664 | post_schedule = current->sched_class->needs_post_schedule(rq); | ||
2665 | #endif | ||
2591 | 2666 | ||
2592 | rq->prev_mm = NULL; | 2667 | rq->prev_mm = NULL; |
2593 | 2668 | ||
@@ -2606,7 +2681,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
2606 | finish_arch_switch(prev); | 2681 | finish_arch_switch(prev); |
2607 | finish_lock_switch(rq, prev); | 2682 | finish_lock_switch(rq, prev); |
2608 | #ifdef CONFIG_SMP | 2683 | #ifdef CONFIG_SMP |
2609 | if (current->sched_class->post_schedule) | 2684 | if (post_schedule) |
2610 | current->sched_class->post_schedule(rq); | 2685 | current->sched_class->post_schedule(rq); |
2611 | #endif | 2686 | #endif |
2612 | 2687 | ||
@@ -2987,6 +3062,16 @@ next: | |||
2987 | pulled++; | 3062 | pulled++; |
2988 | rem_load_move -= p->se.load.weight; | 3063 | rem_load_move -= p->se.load.weight; |
2989 | 3064 | ||
3065 | #ifdef CONFIG_PREEMPT | ||
3066 | /* | ||
3067 | * NEWIDLE balancing is a source of latency, so preemptible kernels | ||
3068 | * will stop after the first task is pulled to minimize the critical | ||
3069 | * section. | ||
3070 | */ | ||
3071 | if (idle == CPU_NEWLY_IDLE) | ||
3072 | goto out; | ||
3073 | #endif | ||
3074 | |||
2990 | /* | 3075 | /* |
2991 | * We only want to steal up to the prescribed amount of weighted load. | 3076 | * We only want to steal up to the prescribed amount of weighted load. |
2992 | */ | 3077 | */ |
@@ -3033,9 +3118,15 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
3033 | sd, idle, all_pinned, &this_best_prio); | 3118 | sd, idle, all_pinned, &this_best_prio); |
3034 | class = class->next; | 3119 | class = class->next; |
3035 | 3120 | ||
3121 | #ifdef CONFIG_PREEMPT | ||
3122 | /* | ||
3123 | * NEWIDLE balancing is a source of latency, so preemptible | ||
3124 | * kernels will stop after the first task is pulled to minimize | ||
3125 | * the critical section. | ||
3126 | */ | ||
3036 | if (idle == CPU_NEWLY_IDLE && this_rq->nr_running) | 3127 | if (idle == CPU_NEWLY_IDLE && this_rq->nr_running) |
3037 | break; | 3128 | break; |
3038 | 3129 | #endif | |
3039 | } while (class && max_load_move > total_load_moved); | 3130 | } while (class && max_load_move > total_load_moved); |
3040 | 3131 | ||
3041 | return total_load_moved > 0; | 3132 | return total_load_moved > 0; |
@@ -6944,20 +7035,26 @@ static void free_rootdomain(struct root_domain *rd) | |||
6944 | 7035 | ||
6945 | static void rq_attach_root(struct rq *rq, struct root_domain *rd) | 7036 | static void rq_attach_root(struct rq *rq, struct root_domain *rd) |
6946 | { | 7037 | { |
7038 | struct root_domain *old_rd = NULL; | ||
6947 | unsigned long flags; | 7039 | unsigned long flags; |
6948 | 7040 | ||
6949 | spin_lock_irqsave(&rq->lock, flags); | 7041 | spin_lock_irqsave(&rq->lock, flags); |
6950 | 7042 | ||
6951 | if (rq->rd) { | 7043 | if (rq->rd) { |
6952 | struct root_domain *old_rd = rq->rd; | 7044 | old_rd = rq->rd; |
6953 | 7045 | ||
6954 | if (cpumask_test_cpu(rq->cpu, old_rd->online)) | 7046 | if (cpumask_test_cpu(rq->cpu, old_rd->online)) |
6955 | set_rq_offline(rq); | 7047 | set_rq_offline(rq); |
6956 | 7048 | ||
6957 | cpumask_clear_cpu(rq->cpu, old_rd->span); | 7049 | cpumask_clear_cpu(rq->cpu, old_rd->span); |
6958 | 7050 | ||
6959 | if (atomic_dec_and_test(&old_rd->refcount)) | 7051 | /* |
6960 | free_rootdomain(old_rd); | 7052 | * If we dont want to free the old_rt yet then |
7053 | * set old_rd to NULL to skip the freeing later | ||
7054 | * in this function: | ||
7055 | */ | ||
7056 | if (!atomic_dec_and_test(&old_rd->refcount)) | ||
7057 | old_rd = NULL; | ||
6961 | } | 7058 | } |
6962 | 7059 | ||
6963 | atomic_inc(&rd->refcount); | 7060 | atomic_inc(&rd->refcount); |
@@ -6968,6 +7065,9 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
6968 | set_rq_online(rq); | 7065 | set_rq_online(rq); |
6969 | 7066 | ||
6970 | spin_unlock_irqrestore(&rq->lock, flags); | 7067 | spin_unlock_irqrestore(&rq->lock, flags); |
7068 | |||
7069 | if (old_rd) | ||
7070 | free_rootdomain(old_rd); | ||
6971 | } | 7071 | } |
6972 | 7072 | ||
6973 | static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) | 7073 | static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) |
@@ -8209,11 +8309,15 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | |||
8209 | __set_bit(MAX_RT_PRIO, array->bitmap); | 8309 | __set_bit(MAX_RT_PRIO, array->bitmap); |
8210 | 8310 | ||
8211 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED | 8311 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
8212 | rt_rq->highest_prio = MAX_RT_PRIO; | 8312 | rt_rq->highest_prio.curr = MAX_RT_PRIO; |
8313 | #ifdef CONFIG_SMP | ||
8314 | rt_rq->highest_prio.next = MAX_RT_PRIO; | ||
8315 | #endif | ||
8213 | #endif | 8316 | #endif |
8214 | #ifdef CONFIG_SMP | 8317 | #ifdef CONFIG_SMP |
8215 | rt_rq->rt_nr_migratory = 0; | 8318 | rt_rq->rt_nr_migratory = 0; |
8216 | rt_rq->overloaded = 0; | 8319 | rt_rq->overloaded = 0; |
8320 | plist_head_init(&rq->rt.pushable_tasks, &rq->lock); | ||
8217 | #endif | 8321 | #endif |
8218 | 8322 | ||
8219 | rt_rq->rt_time = 0; | 8323 | rt_rq->rt_time = 0; |