aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched/fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r--kernel/sched/fair.c118
1 files changed, 50 insertions, 68 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c219bf8d704c..6b800a14b990 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -597,7 +597,7 @@ calc_delta_fair(unsigned long delta, struct sched_entity *se)
597/* 597/*
598 * The idea is to set a period in which each task runs once. 598 * The idea is to set a period in which each task runs once.
599 * 599 *
600 * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch 600 * When there are too many tasks (sched_nr_latency) we have to stretch
601 * this period because otherwise the slices get too small. 601 * this period because otherwise the slices get too small.
602 * 602 *
603 * p = (nr <= nl) ? l : l*nr/nl 603 * p = (nr <= nl) ? l : l*nr/nl
@@ -2052,7 +2052,7 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
2052 hrtimer_cancel(&cfs_b->slack_timer); 2052 hrtimer_cancel(&cfs_b->slack_timer);
2053} 2053}
2054 2054
2055void unthrottle_offline_cfs_rqs(struct rq *rq) 2055static void unthrottle_offline_cfs_rqs(struct rq *rq)
2056{ 2056{
2057 struct cfs_rq *cfs_rq; 2057 struct cfs_rq *cfs_rq;
2058 2058
@@ -2106,7 +2106,7 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
2106 return NULL; 2106 return NULL;
2107} 2107}
2108static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} 2108static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
2109void unthrottle_offline_cfs_rqs(struct rq *rq) {} 2109static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {}
2110 2110
2111#endif /* CONFIG_CFS_BANDWIDTH */ 2111#endif /* CONFIG_CFS_BANDWIDTH */
2112 2112
@@ -2637,6 +2637,8 @@ static int select_idle_sibling(struct task_struct *p, int target)
2637 int cpu = smp_processor_id(); 2637 int cpu = smp_processor_id();
2638 int prev_cpu = task_cpu(p); 2638 int prev_cpu = task_cpu(p);
2639 struct sched_domain *sd; 2639 struct sched_domain *sd;
2640 struct sched_group *sg;
2641 int i;
2640 2642
2641 /* 2643 /*
2642 * If the task is going to be woken-up on this cpu and if it is 2644 * If the task is going to be woken-up on this cpu and if it is
@@ -2653,17 +2655,29 @@ static int select_idle_sibling(struct task_struct *p, int target)
2653 return prev_cpu; 2655 return prev_cpu;
2654 2656
2655 /* 2657 /*
2656 * Otherwise, check assigned siblings to find an elegible idle cpu. 2658 * Otherwise, iterate the domains and find an elegible idle cpu.
2657 */ 2659 */
2658 sd = rcu_dereference(per_cpu(sd_llc, target)); 2660 sd = rcu_dereference(per_cpu(sd_llc, target));
2659
2660 for_each_lower_domain(sd) { 2661 for_each_lower_domain(sd) {
2661 if (!cpumask_test_cpu(sd->idle_buddy, tsk_cpus_allowed(p))) 2662 sg = sd->groups;
2662 continue; 2663 do {
2663 if (idle_cpu(sd->idle_buddy)) 2664 if (!cpumask_intersects(sched_group_cpus(sg),
2664 return sd->idle_buddy; 2665 tsk_cpus_allowed(p)))
2665 } 2666 goto next;
2666 2667
2668 for_each_cpu(i, sched_group_cpus(sg)) {
2669 if (!idle_cpu(i))
2670 goto next;
2671 }
2672
2673 target = cpumask_first_and(sched_group_cpus(sg),
2674 tsk_cpus_allowed(p));
2675 goto done;
2676next:
2677 sg = sg->next;
2678 } while (sg != sd->groups);
2679 }
2680done:
2667 return target; 2681 return target;
2668} 2682}
2669 2683
@@ -2686,7 +2700,6 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
2686 int prev_cpu = task_cpu(p); 2700 int prev_cpu = task_cpu(p);
2687 int new_cpu = cpu; 2701 int new_cpu = cpu;
2688 int want_affine = 0; 2702 int want_affine = 0;
2689 int want_sd = 1;
2690 int sync = wake_flags & WF_SYNC; 2703 int sync = wake_flags & WF_SYNC;
2691 2704
2692 if (p->nr_cpus_allowed == 1) 2705 if (p->nr_cpus_allowed == 1)
@@ -2704,48 +2717,21 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
2704 continue; 2717 continue;
2705 2718
2706 /* 2719 /*
2707 * If power savings logic is enabled for a domain, see if we
2708 * are not overloaded, if so, don't balance wider.
2709 */
2710 if (tmp->flags & (SD_PREFER_LOCAL)) {
2711 unsigned long power = 0;
2712 unsigned long nr_running = 0;
2713 unsigned long capacity;
2714 int i;
2715
2716 for_each_cpu(i, sched_domain_span(tmp)) {
2717 power += power_of(i);
2718 nr_running += cpu_rq(i)->cfs.nr_running;
2719 }
2720
2721 capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE);
2722
2723 if (nr_running < capacity)
2724 want_sd = 0;
2725 }
2726
2727 /*
2728 * If both cpu and prev_cpu are part of this domain, 2720 * If both cpu and prev_cpu are part of this domain,
2729 * cpu is a valid SD_WAKE_AFFINE target. 2721 * cpu is a valid SD_WAKE_AFFINE target.
2730 */ 2722 */
2731 if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && 2723 if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
2732 cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { 2724 cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
2733 affine_sd = tmp; 2725 affine_sd = tmp;
2734 want_affine = 0;
2735 }
2736
2737 if (!want_sd && !want_affine)
2738 break; 2726 break;
2727 }
2739 2728
2740 if (!(tmp->flags & sd_flag)) 2729 if (tmp->flags & sd_flag)
2741 continue;
2742
2743 if (want_sd)
2744 sd = tmp; 2730 sd = tmp;
2745 } 2731 }
2746 2732
2747 if (affine_sd) { 2733 if (affine_sd) {
2748 if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) 2734 if (cpu != prev_cpu && wake_affine(affine_sd, p, sync))
2749 prev_cpu = cpu; 2735 prev_cpu = cpu;
2750 2736
2751 new_cpu = select_idle_sibling(p, prev_cpu); 2737 new_cpu = select_idle_sibling(p, prev_cpu);
@@ -3658,7 +3644,6 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
3658 * @group: sched_group whose statistics are to be updated. 3644 * @group: sched_group whose statistics are to be updated.
3659 * @load_idx: Load index of sched_domain of this_cpu for load calc. 3645 * @load_idx: Load index of sched_domain of this_cpu for load calc.
3660 * @local_group: Does group contain this_cpu. 3646 * @local_group: Does group contain this_cpu.
3661 * @cpus: Set of cpus considered for load balancing.
3662 * @balance: Should we balance. 3647 * @balance: Should we balance.
3663 * @sgs: variable to hold the statistics for this group. 3648 * @sgs: variable to hold the statistics for this group.
3664 */ 3649 */
@@ -3805,7 +3790,6 @@ static bool update_sd_pick_busiest(struct lb_env *env,
3805/** 3790/**
3806 * update_sd_lb_stats - Update sched_domain's statistics for load balancing. 3791 * update_sd_lb_stats - Update sched_domain's statistics for load balancing.
3807 * @env: The load balancing environment. 3792 * @env: The load balancing environment.
3808 * @cpus: Set of cpus considered for load balancing.
3809 * @balance: Should we balance. 3793 * @balance: Should we balance.
3810 * @sds: variable to hold the statistics for this sched_domain. 3794 * @sds: variable to hold the statistics for this sched_domain.
3811 */ 3795 */
@@ -4283,7 +4267,7 @@ redo:
4283 goto out_balanced; 4267 goto out_balanced;
4284 } 4268 }
4285 4269
4286 BUG_ON(busiest == this_rq); 4270 BUG_ON(busiest == env.dst_rq);
4287 4271
4288 schedstat_add(sd, lb_imbalance[idle], env.imbalance); 4272 schedstat_add(sd, lb_imbalance[idle], env.imbalance);
4289 4273
@@ -4304,7 +4288,7 @@ redo:
4304 update_h_load(env.src_cpu); 4288 update_h_load(env.src_cpu);
4305more_balance: 4289more_balance:
4306 local_irq_save(flags); 4290 local_irq_save(flags);
4307 double_rq_lock(this_rq, busiest); 4291 double_rq_lock(env.dst_rq, busiest);
4308 4292
4309 /* 4293 /*
4310 * cur_ld_moved - load moved in current iteration 4294 * cur_ld_moved - load moved in current iteration
@@ -4312,7 +4296,7 @@ more_balance:
4312 */ 4296 */
4313 cur_ld_moved = move_tasks(&env); 4297 cur_ld_moved = move_tasks(&env);
4314 ld_moved += cur_ld_moved; 4298 ld_moved += cur_ld_moved;
4315 double_rq_unlock(this_rq, busiest); 4299 double_rq_unlock(env.dst_rq, busiest);
4316 local_irq_restore(flags); 4300 local_irq_restore(flags);
4317 4301
4318 if (env.flags & LBF_NEED_BREAK) { 4302 if (env.flags & LBF_NEED_BREAK) {
@@ -4348,8 +4332,7 @@ more_balance:
4348 if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0 && 4332 if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0 &&
4349 lb_iterations++ < max_lb_iterations) { 4333 lb_iterations++ < max_lb_iterations) {
4350 4334
4351 this_rq = cpu_rq(env.new_dst_cpu); 4335 env.dst_rq = cpu_rq(env.new_dst_cpu);
4352 env.dst_rq = this_rq;
4353 env.dst_cpu = env.new_dst_cpu; 4336 env.dst_cpu = env.new_dst_cpu;
4354 env.flags &= ~LBF_SOME_PINNED; 4337 env.flags &= ~LBF_SOME_PINNED;
4355 env.loop = 0; 4338 env.loop = 0;
@@ -4634,7 +4617,7 @@ static void nohz_balancer_kick(int cpu)
4634 return; 4617 return;
4635} 4618}
4636 4619
4637static inline void clear_nohz_tick_stopped(int cpu) 4620static inline void nohz_balance_exit_idle(int cpu)
4638{ 4621{
4639 if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { 4622 if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
4640 cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); 4623 cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
@@ -4674,28 +4657,23 @@ void set_cpu_sd_state_idle(void)
4674} 4657}
4675 4658
4676/* 4659/*
4677 * This routine will record that this cpu is going idle with tick stopped. 4660 * This routine will record that the cpu is going idle with tick stopped.
4678 * This info will be used in performing idle load balancing in the future. 4661 * This info will be used in performing idle load balancing in the future.
4679 */ 4662 */
4680void select_nohz_load_balancer(int stop_tick) 4663void nohz_balance_enter_idle(int cpu)
4681{ 4664{
4682 int cpu = smp_processor_id();
4683
4684 /* 4665 /*
4685 * If this cpu is going down, then nothing needs to be done. 4666 * If this cpu is going down, then nothing needs to be done.
4686 */ 4667 */
4687 if (!cpu_active(cpu)) 4668 if (!cpu_active(cpu))
4688 return; 4669 return;
4689 4670
4690 if (stop_tick) { 4671 if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))
4691 if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) 4672 return;
4692 return;
4693 4673
4694 cpumask_set_cpu(cpu, nohz.idle_cpus_mask); 4674 cpumask_set_cpu(cpu, nohz.idle_cpus_mask);
4695 atomic_inc(&nohz.nr_cpus); 4675 atomic_inc(&nohz.nr_cpus);
4696 set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); 4676 set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
4697 }
4698 return;
4699} 4677}
4700 4678
4701static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, 4679static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb,
@@ -4703,7 +4681,7 @@ static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb,
4703{ 4681{
4704 switch (action & ~CPU_TASKS_FROZEN) { 4682 switch (action & ~CPU_TASKS_FROZEN) {
4705 case CPU_DYING: 4683 case CPU_DYING:
4706 clear_nohz_tick_stopped(smp_processor_id()); 4684 nohz_balance_exit_idle(smp_processor_id());
4707 return NOTIFY_OK; 4685 return NOTIFY_OK;
4708 default: 4686 default:
4709 return NOTIFY_DONE; 4687 return NOTIFY_DONE;
@@ -4825,14 +4803,15 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
4825 if (need_resched()) 4803 if (need_resched())
4826 break; 4804 break;
4827 4805
4828 raw_spin_lock_irq(&this_rq->lock); 4806 rq = cpu_rq(balance_cpu);
4829 update_rq_clock(this_rq); 4807
4830 update_idle_cpu_load(this_rq); 4808 raw_spin_lock_irq(&rq->lock);
4831 raw_spin_unlock_irq(&this_rq->lock); 4809 update_rq_clock(rq);
4810 update_idle_cpu_load(rq);
4811 raw_spin_unlock_irq(&rq->lock);
4832 4812
4833 rebalance_domains(balance_cpu, CPU_IDLE); 4813 rebalance_domains(balance_cpu, CPU_IDLE);
4834 4814
4835 rq = cpu_rq(balance_cpu);
4836 if (time_after(this_rq->next_balance, rq->next_balance)) 4815 if (time_after(this_rq->next_balance, rq->next_balance))
4837 this_rq->next_balance = rq->next_balance; 4816 this_rq->next_balance = rq->next_balance;
4838 } 4817 }
@@ -4863,7 +4842,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
4863 * busy tick after returning from idle, we will update the busy stats. 4842 * busy tick after returning from idle, we will update the busy stats.
4864 */ 4843 */
4865 set_cpu_sd_state_busy(); 4844 set_cpu_sd_state_busy();
4866 clear_nohz_tick_stopped(cpu); 4845 nohz_balance_exit_idle(cpu);
4867 4846
4868 /* 4847 /*
4869 * None are in tickless mode and hence no need for NOHZ idle load 4848 * None are in tickless mode and hence no need for NOHZ idle load
@@ -4956,6 +4935,9 @@ static void rq_online_fair(struct rq *rq)
4956static void rq_offline_fair(struct rq *rq) 4935static void rq_offline_fair(struct rq *rq)
4957{ 4936{
4958 update_sysctl(); 4937 update_sysctl();
4938
4939 /* Ensure any throttled groups are reachable by pick_next_task */
4940 unthrottle_offline_cfs_rqs(rq);
4959} 4941}
4960 4942
4961#endif /* CONFIG_SMP */ 4943#endif /* CONFIG_SMP */