diff options
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r-- | kernel/sched/fair.c | 118 |
1 files changed, 50 insertions, 68 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c219bf8d704c..6b800a14b990 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -597,7 +597,7 @@ calc_delta_fair(unsigned long delta, struct sched_entity *se) | |||
597 | /* | 597 | /* |
598 | * The idea is to set a period in which each task runs once. | 598 | * The idea is to set a period in which each task runs once. |
599 | * | 599 | * |
600 | * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch | 600 | * When there are too many tasks (sched_nr_latency) we have to stretch |
601 | * this period because otherwise the slices get too small. | 601 | * this period because otherwise the slices get too small. |
602 | * | 602 | * |
603 | * p = (nr <= nl) ? l : l*nr/nl | 603 | * p = (nr <= nl) ? l : l*nr/nl |
@@ -2052,7 +2052,7 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | |||
2052 | hrtimer_cancel(&cfs_b->slack_timer); | 2052 | hrtimer_cancel(&cfs_b->slack_timer); |
2053 | } | 2053 | } |
2054 | 2054 | ||
2055 | void unthrottle_offline_cfs_rqs(struct rq *rq) | 2055 | static void unthrottle_offline_cfs_rqs(struct rq *rq) |
2056 | { | 2056 | { |
2057 | struct cfs_rq *cfs_rq; | 2057 | struct cfs_rq *cfs_rq; |
2058 | 2058 | ||
@@ -2106,7 +2106,7 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) | |||
2106 | return NULL; | 2106 | return NULL; |
2107 | } | 2107 | } |
2108 | static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} | 2108 | static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} |
2109 | void unthrottle_offline_cfs_rqs(struct rq *rq) {} | 2109 | static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {} |
2110 | 2110 | ||
2111 | #endif /* CONFIG_CFS_BANDWIDTH */ | 2111 | #endif /* CONFIG_CFS_BANDWIDTH */ |
2112 | 2112 | ||
@@ -2637,6 +2637,8 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
2637 | int cpu = smp_processor_id(); | 2637 | int cpu = smp_processor_id(); |
2638 | int prev_cpu = task_cpu(p); | 2638 | int prev_cpu = task_cpu(p); |
2639 | struct sched_domain *sd; | 2639 | struct sched_domain *sd; |
2640 | struct sched_group *sg; | ||
2641 | int i; | ||
2640 | 2642 | ||
2641 | /* | 2643 | /* |
2642 | * If the task is going to be woken-up on this cpu and if it is | 2644 | * If the task is going to be woken-up on this cpu and if it is |
@@ -2653,17 +2655,29 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
2653 | return prev_cpu; | 2655 | return prev_cpu; |
2654 | 2656 | ||
2655 | /* | 2657 | /* |
2656 | * Otherwise, check assigned siblings to find an elegible idle cpu. | 2658 | * Otherwise, iterate the domains and find an elegible idle cpu. |
2657 | */ | 2659 | */ |
2658 | sd = rcu_dereference(per_cpu(sd_llc, target)); | 2660 | sd = rcu_dereference(per_cpu(sd_llc, target)); |
2659 | |||
2660 | for_each_lower_domain(sd) { | 2661 | for_each_lower_domain(sd) { |
2661 | if (!cpumask_test_cpu(sd->idle_buddy, tsk_cpus_allowed(p))) | 2662 | sg = sd->groups; |
2662 | continue; | 2663 | do { |
2663 | if (idle_cpu(sd->idle_buddy)) | 2664 | if (!cpumask_intersects(sched_group_cpus(sg), |
2664 | return sd->idle_buddy; | 2665 | tsk_cpus_allowed(p))) |
2665 | } | 2666 | goto next; |
2666 | 2667 | ||
2668 | for_each_cpu(i, sched_group_cpus(sg)) { | ||
2669 | if (!idle_cpu(i)) | ||
2670 | goto next; | ||
2671 | } | ||
2672 | |||
2673 | target = cpumask_first_and(sched_group_cpus(sg), | ||
2674 | tsk_cpus_allowed(p)); | ||
2675 | goto done; | ||
2676 | next: | ||
2677 | sg = sg->next; | ||
2678 | } while (sg != sd->groups); | ||
2679 | } | ||
2680 | done: | ||
2667 | return target; | 2681 | return target; |
2668 | } | 2682 | } |
2669 | 2683 | ||
@@ -2686,7 +2700,6 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) | |||
2686 | int prev_cpu = task_cpu(p); | 2700 | int prev_cpu = task_cpu(p); |
2687 | int new_cpu = cpu; | 2701 | int new_cpu = cpu; |
2688 | int want_affine = 0; | 2702 | int want_affine = 0; |
2689 | int want_sd = 1; | ||
2690 | int sync = wake_flags & WF_SYNC; | 2703 | int sync = wake_flags & WF_SYNC; |
2691 | 2704 | ||
2692 | if (p->nr_cpus_allowed == 1) | 2705 | if (p->nr_cpus_allowed == 1) |
@@ -2704,48 +2717,21 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) | |||
2704 | continue; | 2717 | continue; |
2705 | 2718 | ||
2706 | /* | 2719 | /* |
2707 | * If power savings logic is enabled for a domain, see if we | ||
2708 | * are not overloaded, if so, don't balance wider. | ||
2709 | */ | ||
2710 | if (tmp->flags & (SD_PREFER_LOCAL)) { | ||
2711 | unsigned long power = 0; | ||
2712 | unsigned long nr_running = 0; | ||
2713 | unsigned long capacity; | ||
2714 | int i; | ||
2715 | |||
2716 | for_each_cpu(i, sched_domain_span(tmp)) { | ||
2717 | power += power_of(i); | ||
2718 | nr_running += cpu_rq(i)->cfs.nr_running; | ||
2719 | } | ||
2720 | |||
2721 | capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE); | ||
2722 | |||
2723 | if (nr_running < capacity) | ||
2724 | want_sd = 0; | ||
2725 | } | ||
2726 | |||
2727 | /* | ||
2728 | * If both cpu and prev_cpu are part of this domain, | 2720 | * If both cpu and prev_cpu are part of this domain, |
2729 | * cpu is a valid SD_WAKE_AFFINE target. | 2721 | * cpu is a valid SD_WAKE_AFFINE target. |
2730 | */ | 2722 | */ |
2731 | if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && | 2723 | if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && |
2732 | cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { | 2724 | cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { |
2733 | affine_sd = tmp; | 2725 | affine_sd = tmp; |
2734 | want_affine = 0; | ||
2735 | } | ||
2736 | |||
2737 | if (!want_sd && !want_affine) | ||
2738 | break; | 2726 | break; |
2727 | } | ||
2739 | 2728 | ||
2740 | if (!(tmp->flags & sd_flag)) | 2729 | if (tmp->flags & sd_flag) |
2741 | continue; | ||
2742 | |||
2743 | if (want_sd) | ||
2744 | sd = tmp; | 2730 | sd = tmp; |
2745 | } | 2731 | } |
2746 | 2732 | ||
2747 | if (affine_sd) { | 2733 | if (affine_sd) { |
2748 | if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) | 2734 | if (cpu != prev_cpu && wake_affine(affine_sd, p, sync)) |
2749 | prev_cpu = cpu; | 2735 | prev_cpu = cpu; |
2750 | 2736 | ||
2751 | new_cpu = select_idle_sibling(p, prev_cpu); | 2737 | new_cpu = select_idle_sibling(p, prev_cpu); |
@@ -3658,7 +3644,6 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) | |||
3658 | * @group: sched_group whose statistics are to be updated. | 3644 | * @group: sched_group whose statistics are to be updated. |
3659 | * @load_idx: Load index of sched_domain of this_cpu for load calc. | 3645 | * @load_idx: Load index of sched_domain of this_cpu for load calc. |
3660 | * @local_group: Does group contain this_cpu. | 3646 | * @local_group: Does group contain this_cpu. |
3661 | * @cpus: Set of cpus considered for load balancing. | ||
3662 | * @balance: Should we balance. | 3647 | * @balance: Should we balance. |
3663 | * @sgs: variable to hold the statistics for this group. | 3648 | * @sgs: variable to hold the statistics for this group. |
3664 | */ | 3649 | */ |
@@ -3805,7 +3790,6 @@ static bool update_sd_pick_busiest(struct lb_env *env, | |||
3805 | /** | 3790 | /** |
3806 | * update_sd_lb_stats - Update sched_domain's statistics for load balancing. | 3791 | * update_sd_lb_stats - Update sched_domain's statistics for load balancing. |
3807 | * @env: The load balancing environment. | 3792 | * @env: The load balancing environment. |
3808 | * @cpus: Set of cpus considered for load balancing. | ||
3809 | * @balance: Should we balance. | 3793 | * @balance: Should we balance. |
3810 | * @sds: variable to hold the statistics for this sched_domain. | 3794 | * @sds: variable to hold the statistics for this sched_domain. |
3811 | */ | 3795 | */ |
@@ -4283,7 +4267,7 @@ redo: | |||
4283 | goto out_balanced; | 4267 | goto out_balanced; |
4284 | } | 4268 | } |
4285 | 4269 | ||
4286 | BUG_ON(busiest == this_rq); | 4270 | BUG_ON(busiest == env.dst_rq); |
4287 | 4271 | ||
4288 | schedstat_add(sd, lb_imbalance[idle], env.imbalance); | 4272 | schedstat_add(sd, lb_imbalance[idle], env.imbalance); |
4289 | 4273 | ||
@@ -4304,7 +4288,7 @@ redo: | |||
4304 | update_h_load(env.src_cpu); | 4288 | update_h_load(env.src_cpu); |
4305 | more_balance: | 4289 | more_balance: |
4306 | local_irq_save(flags); | 4290 | local_irq_save(flags); |
4307 | double_rq_lock(this_rq, busiest); | 4291 | double_rq_lock(env.dst_rq, busiest); |
4308 | 4292 | ||
4309 | /* | 4293 | /* |
4310 | * cur_ld_moved - load moved in current iteration | 4294 | * cur_ld_moved - load moved in current iteration |
@@ -4312,7 +4296,7 @@ more_balance: | |||
4312 | */ | 4296 | */ |
4313 | cur_ld_moved = move_tasks(&env); | 4297 | cur_ld_moved = move_tasks(&env); |
4314 | ld_moved += cur_ld_moved; | 4298 | ld_moved += cur_ld_moved; |
4315 | double_rq_unlock(this_rq, busiest); | 4299 | double_rq_unlock(env.dst_rq, busiest); |
4316 | local_irq_restore(flags); | 4300 | local_irq_restore(flags); |
4317 | 4301 | ||
4318 | if (env.flags & LBF_NEED_BREAK) { | 4302 | if (env.flags & LBF_NEED_BREAK) { |
@@ -4348,8 +4332,7 @@ more_balance: | |||
4348 | if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0 && | 4332 | if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0 && |
4349 | lb_iterations++ < max_lb_iterations) { | 4333 | lb_iterations++ < max_lb_iterations) { |
4350 | 4334 | ||
4351 | this_rq = cpu_rq(env.new_dst_cpu); | 4335 | env.dst_rq = cpu_rq(env.new_dst_cpu); |
4352 | env.dst_rq = this_rq; | ||
4353 | env.dst_cpu = env.new_dst_cpu; | 4336 | env.dst_cpu = env.new_dst_cpu; |
4354 | env.flags &= ~LBF_SOME_PINNED; | 4337 | env.flags &= ~LBF_SOME_PINNED; |
4355 | env.loop = 0; | 4338 | env.loop = 0; |
@@ -4634,7 +4617,7 @@ static void nohz_balancer_kick(int cpu) | |||
4634 | return; | 4617 | return; |
4635 | } | 4618 | } |
4636 | 4619 | ||
4637 | static inline void clear_nohz_tick_stopped(int cpu) | 4620 | static inline void nohz_balance_exit_idle(int cpu) |
4638 | { | 4621 | { |
4639 | if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { | 4622 | if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { |
4640 | cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); | 4623 | cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); |
@@ -4674,28 +4657,23 @@ void set_cpu_sd_state_idle(void) | |||
4674 | } | 4657 | } |
4675 | 4658 | ||
4676 | /* | 4659 | /* |
4677 | * This routine will record that this cpu is going idle with tick stopped. | 4660 | * This routine will record that the cpu is going idle with tick stopped. |
4678 | * This info will be used in performing idle load balancing in the future. | 4661 | * This info will be used in performing idle load balancing in the future. |
4679 | */ | 4662 | */ |
4680 | void select_nohz_load_balancer(int stop_tick) | 4663 | void nohz_balance_enter_idle(int cpu) |
4681 | { | 4664 | { |
4682 | int cpu = smp_processor_id(); | ||
4683 | |||
4684 | /* | 4665 | /* |
4685 | * If this cpu is going down, then nothing needs to be done. | 4666 | * If this cpu is going down, then nothing needs to be done. |
4686 | */ | 4667 | */ |
4687 | if (!cpu_active(cpu)) | 4668 | if (!cpu_active(cpu)) |
4688 | return; | 4669 | return; |
4689 | 4670 | ||
4690 | if (stop_tick) { | 4671 | if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) |
4691 | if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) | 4672 | return; |
4692 | return; | ||
4693 | 4673 | ||
4694 | cpumask_set_cpu(cpu, nohz.idle_cpus_mask); | 4674 | cpumask_set_cpu(cpu, nohz.idle_cpus_mask); |
4695 | atomic_inc(&nohz.nr_cpus); | 4675 | atomic_inc(&nohz.nr_cpus); |
4696 | set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); | 4676 | set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); |
4697 | } | ||
4698 | return; | ||
4699 | } | 4677 | } |
4700 | 4678 | ||
4701 | static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, | 4679 | static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, |
@@ -4703,7 +4681,7 @@ static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, | |||
4703 | { | 4681 | { |
4704 | switch (action & ~CPU_TASKS_FROZEN) { | 4682 | switch (action & ~CPU_TASKS_FROZEN) { |
4705 | case CPU_DYING: | 4683 | case CPU_DYING: |
4706 | clear_nohz_tick_stopped(smp_processor_id()); | 4684 | nohz_balance_exit_idle(smp_processor_id()); |
4707 | return NOTIFY_OK; | 4685 | return NOTIFY_OK; |
4708 | default: | 4686 | default: |
4709 | return NOTIFY_DONE; | 4687 | return NOTIFY_DONE; |
@@ -4825,14 +4803,15 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) | |||
4825 | if (need_resched()) | 4803 | if (need_resched()) |
4826 | break; | 4804 | break; |
4827 | 4805 | ||
4828 | raw_spin_lock_irq(&this_rq->lock); | 4806 | rq = cpu_rq(balance_cpu); |
4829 | update_rq_clock(this_rq); | 4807 | |
4830 | update_idle_cpu_load(this_rq); | 4808 | raw_spin_lock_irq(&rq->lock); |
4831 | raw_spin_unlock_irq(&this_rq->lock); | 4809 | update_rq_clock(rq); |
4810 | update_idle_cpu_load(rq); | ||
4811 | raw_spin_unlock_irq(&rq->lock); | ||
4832 | 4812 | ||
4833 | rebalance_domains(balance_cpu, CPU_IDLE); | 4813 | rebalance_domains(balance_cpu, CPU_IDLE); |
4834 | 4814 | ||
4835 | rq = cpu_rq(balance_cpu); | ||
4836 | if (time_after(this_rq->next_balance, rq->next_balance)) | 4815 | if (time_after(this_rq->next_balance, rq->next_balance)) |
4837 | this_rq->next_balance = rq->next_balance; | 4816 | this_rq->next_balance = rq->next_balance; |
4838 | } | 4817 | } |
@@ -4863,7 +4842,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu) | |||
4863 | * busy tick after returning from idle, we will update the busy stats. | 4842 | * busy tick after returning from idle, we will update the busy stats. |
4864 | */ | 4843 | */ |
4865 | set_cpu_sd_state_busy(); | 4844 | set_cpu_sd_state_busy(); |
4866 | clear_nohz_tick_stopped(cpu); | 4845 | nohz_balance_exit_idle(cpu); |
4867 | 4846 | ||
4868 | /* | 4847 | /* |
4869 | * None are in tickless mode and hence no need for NOHZ idle load | 4848 | * None are in tickless mode and hence no need for NOHZ idle load |
@@ -4956,6 +4935,9 @@ static void rq_online_fair(struct rq *rq) | |||
4956 | static void rq_offline_fair(struct rq *rq) | 4935 | static void rq_offline_fair(struct rq *rq) |
4957 | { | 4936 | { |
4958 | update_sysctl(); | 4937 | update_sysctl(); |
4938 | |||
4939 | /* Ensure any throttled groups are reachable by pick_next_task */ | ||
4940 | unthrottle_offline_cfs_rqs(rq); | ||
4959 | } | 4941 | } |
4960 | 4942 | ||
4961 | #endif /* CONFIG_SMP */ | 4943 | #endif /* CONFIG_SMP */ |