aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched/fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r--kernel/sched/fair.c77
1 files changed, 49 insertions, 28 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 22321db64952..96e2b18b6283 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2052,7 +2052,7 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
2052 hrtimer_cancel(&cfs_b->slack_timer); 2052 hrtimer_cancel(&cfs_b->slack_timer);
2053} 2053}
2054 2054
2055void unthrottle_offline_cfs_rqs(struct rq *rq) 2055static void unthrottle_offline_cfs_rqs(struct rq *rq)
2056{ 2056{
2057 struct cfs_rq *cfs_rq; 2057 struct cfs_rq *cfs_rq;
2058 2058
@@ -2106,7 +2106,7 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
2106 return NULL; 2106 return NULL;
2107} 2107}
2108static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} 2108static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
2109void unthrottle_offline_cfs_rqs(struct rq *rq) {} 2109static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {}
2110 2110
2111#endif /* CONFIG_CFS_BANDWIDTH */ 2111#endif /* CONFIG_CFS_BANDWIDTH */
2112 2112
@@ -2637,6 +2637,8 @@ static int select_idle_sibling(struct task_struct *p, int target)
2637 int cpu = smp_processor_id(); 2637 int cpu = smp_processor_id();
2638 int prev_cpu = task_cpu(p); 2638 int prev_cpu = task_cpu(p);
2639 struct sched_domain *sd; 2639 struct sched_domain *sd;
2640 struct sched_group *sg;
2641 int i;
2640 2642
2641 /* 2643 /*
2642 * If the task is going to be woken-up on this cpu and if it is 2644 * If the task is going to be woken-up on this cpu and if it is
@@ -2653,17 +2655,29 @@ static int select_idle_sibling(struct task_struct *p, int target)
2653 return prev_cpu; 2655 return prev_cpu;
2654 2656
2655 /* 2657 /*
2656 * Otherwise, check assigned siblings to find an elegible idle cpu. 2658 * Otherwise, iterate the domains and find an elegible idle cpu.
2657 */ 2659 */
2658 sd = rcu_dereference(per_cpu(sd_llc, target)); 2660 sd = rcu_dereference(per_cpu(sd_llc, target));
2659
2660 for_each_lower_domain(sd) { 2661 for_each_lower_domain(sd) {
2661 if (!cpumask_test_cpu(sd->idle_buddy, tsk_cpus_allowed(p))) 2662 sg = sd->groups;
2662 continue; 2663 do {
2663 if (idle_cpu(sd->idle_buddy)) 2664 if (!cpumask_intersects(sched_group_cpus(sg),
2664 return sd->idle_buddy; 2665 tsk_cpus_allowed(p)))
2665 } 2666 goto next;
2666 2667
2668 for_each_cpu(i, sched_group_cpus(sg)) {
2669 if (!idle_cpu(i))
2670 goto next;
2671 }
2672
2673 target = cpumask_first_and(sched_group_cpus(sg),
2674 tsk_cpus_allowed(p));
2675 goto done;
2676next:
2677 sg = sg->next;
2678 } while (sg != sd->groups);
2679 }
2680done:
2667 return target; 2681 return target;
2668} 2682}
2669 2683
@@ -3069,6 +3083,9 @@ struct lb_env {
3069 int new_dst_cpu; 3083 int new_dst_cpu;
3070 enum cpu_idle_type idle; 3084 enum cpu_idle_type idle;
3071 long imbalance; 3085 long imbalance;
3086 /* The set of CPUs under consideration for load-balancing */
3087 struct cpumask *cpus;
3088
3072 unsigned int flags; 3089 unsigned int flags;
3073 3090
3074 unsigned int loop; 3091 unsigned int loop;
@@ -3384,6 +3401,14 @@ static int tg_load_down(struct task_group *tg, void *data)
3384 3401
3385static void update_h_load(long cpu) 3402static void update_h_load(long cpu)
3386{ 3403{
3404 struct rq *rq = cpu_rq(cpu);
3405 unsigned long now = jiffies;
3406
3407 if (rq->h_load_throttle == now)
3408 return;
3409
3410 rq->h_load_throttle = now;
3411
3387 rcu_read_lock(); 3412 rcu_read_lock();
3388 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); 3413 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
3389 rcu_read_unlock(); 3414 rcu_read_unlock();
@@ -3647,14 +3672,12 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
3647 * @group: sched_group whose statistics are to be updated. 3672 * @group: sched_group whose statistics are to be updated.
3648 * @load_idx: Load index of sched_domain of this_cpu for load calc. 3673 * @load_idx: Load index of sched_domain of this_cpu for load calc.
3649 * @local_group: Does group contain this_cpu. 3674 * @local_group: Does group contain this_cpu.
3650 * @cpus: Set of cpus considered for load balancing.
3651 * @balance: Should we balance. 3675 * @balance: Should we balance.
3652 * @sgs: variable to hold the statistics for this group. 3676 * @sgs: variable to hold the statistics for this group.
3653 */ 3677 */
3654static inline void update_sg_lb_stats(struct lb_env *env, 3678static inline void update_sg_lb_stats(struct lb_env *env,
3655 struct sched_group *group, int load_idx, 3679 struct sched_group *group, int load_idx,
3656 int local_group, const struct cpumask *cpus, 3680 int local_group, int *balance, struct sg_lb_stats *sgs)
3657 int *balance, struct sg_lb_stats *sgs)
3658{ 3681{
3659 unsigned long nr_running, max_nr_running, min_nr_running; 3682 unsigned long nr_running, max_nr_running, min_nr_running;
3660 unsigned long load, max_cpu_load, min_cpu_load; 3683 unsigned long load, max_cpu_load, min_cpu_load;
@@ -3671,7 +3694,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
3671 max_nr_running = 0; 3694 max_nr_running = 0;
3672 min_nr_running = ~0UL; 3695 min_nr_running = ~0UL;
3673 3696
3674 for_each_cpu_and(i, sched_group_cpus(group), cpus) { 3697 for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
3675 struct rq *rq = cpu_rq(i); 3698 struct rq *rq = cpu_rq(i);
3676 3699
3677 nr_running = rq->nr_running; 3700 nr_running = rq->nr_running;
@@ -3795,13 +3818,11 @@ static bool update_sd_pick_busiest(struct lb_env *env,
3795/** 3818/**
3796 * update_sd_lb_stats - Update sched_domain's statistics for load balancing. 3819 * update_sd_lb_stats - Update sched_domain's statistics for load balancing.
3797 * @env: The load balancing environment. 3820 * @env: The load balancing environment.
3798 * @cpus: Set of cpus considered for load balancing.
3799 * @balance: Should we balance. 3821 * @balance: Should we balance.
3800 * @sds: variable to hold the statistics for this sched_domain. 3822 * @sds: variable to hold the statistics for this sched_domain.
3801 */ 3823 */
3802static inline void update_sd_lb_stats(struct lb_env *env, 3824static inline void update_sd_lb_stats(struct lb_env *env,
3803 const struct cpumask *cpus, 3825 int *balance, struct sd_lb_stats *sds)
3804 int *balance, struct sd_lb_stats *sds)
3805{ 3826{
3806 struct sched_domain *child = env->sd->child; 3827 struct sched_domain *child = env->sd->child;
3807 struct sched_group *sg = env->sd->groups; 3828 struct sched_group *sg = env->sd->groups;
@@ -3818,8 +3839,7 @@ static inline void update_sd_lb_stats(struct lb_env *env,
3818 3839
3819 local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg)); 3840 local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg));
3820 memset(&sgs, 0, sizeof(sgs)); 3841 memset(&sgs, 0, sizeof(sgs));
3821 update_sg_lb_stats(env, sg, load_idx, local_group, 3842 update_sg_lb_stats(env, sg, load_idx, local_group, balance, &sgs);
3822 cpus, balance, &sgs);
3823 3843
3824 if (local_group && !(*balance)) 3844 if (local_group && !(*balance))
3825 return; 3845 return;
@@ -4055,7 +4075,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
4055 * to restore balance. 4075 * to restore balance.
4056 * 4076 *
4057 * @env: The load balancing environment. 4077 * @env: The load balancing environment.
4058 * @cpus: The set of CPUs under consideration for load-balancing.
4059 * @balance: Pointer to a variable indicating if this_cpu 4078 * @balance: Pointer to a variable indicating if this_cpu
4060 * is the appropriate cpu to perform load balancing at this_level. 4079 * is the appropriate cpu to perform load balancing at this_level.
4061 * 4080 *
@@ -4065,7 +4084,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
4065 * put to idle by rebalancing its tasks onto our group. 4084 * put to idle by rebalancing its tasks onto our group.
4066 */ 4085 */
4067static struct sched_group * 4086static struct sched_group *
4068find_busiest_group(struct lb_env *env, const struct cpumask *cpus, int *balance) 4087find_busiest_group(struct lb_env *env, int *balance)
4069{ 4088{
4070 struct sd_lb_stats sds; 4089 struct sd_lb_stats sds;
4071 4090
@@ -4075,7 +4094,7 @@ find_busiest_group(struct lb_env *env, const struct cpumask *cpus, int *balance)
4075 * Compute the various statistics relavent for load balancing at 4094 * Compute the various statistics relavent for load balancing at
4076 * this level. 4095 * this level.
4077 */ 4096 */
4078 update_sd_lb_stats(env, cpus, balance, &sds); 4097 update_sd_lb_stats(env, balance, &sds);
4079 4098
4080 /* 4099 /*
4081 * this_cpu is not the appropriate cpu to perform load balancing at 4100 * this_cpu is not the appropriate cpu to perform load balancing at
@@ -4155,8 +4174,7 @@ ret:
4155 * find_busiest_queue - find the busiest runqueue among the cpus in group. 4174 * find_busiest_queue - find the busiest runqueue among the cpus in group.
4156 */ 4175 */
4157static struct rq *find_busiest_queue(struct lb_env *env, 4176static struct rq *find_busiest_queue(struct lb_env *env,
4158 struct sched_group *group, 4177 struct sched_group *group)
4159 const struct cpumask *cpus)
4160{ 4178{
4161 struct rq *busiest = NULL, *rq; 4179 struct rq *busiest = NULL, *rq;
4162 unsigned long max_load = 0; 4180 unsigned long max_load = 0;
@@ -4171,7 +4189,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
4171 if (!capacity) 4189 if (!capacity)
4172 capacity = fix_small_capacity(env->sd, group); 4190 capacity = fix_small_capacity(env->sd, group);
4173 4191
4174 if (!cpumask_test_cpu(i, cpus)) 4192 if (!cpumask_test_cpu(i, env->cpus))
4175 continue; 4193 continue;
4176 4194
4177 rq = cpu_rq(i); 4195 rq = cpu_rq(i);
@@ -4252,6 +4270,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
4252 .dst_grpmask = sched_group_cpus(sd->groups), 4270 .dst_grpmask = sched_group_cpus(sd->groups),
4253 .idle = idle, 4271 .idle = idle,
4254 .loop_break = sched_nr_migrate_break, 4272 .loop_break = sched_nr_migrate_break,
4273 .cpus = cpus,
4255 }; 4274 };
4256 4275
4257 cpumask_copy(cpus, cpu_active_mask); 4276 cpumask_copy(cpus, cpu_active_mask);
@@ -4260,7 +4279,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
4260 schedstat_inc(sd, lb_count[idle]); 4279 schedstat_inc(sd, lb_count[idle]);
4261 4280
4262redo: 4281redo:
4263 group = find_busiest_group(&env, cpus, balance); 4282 group = find_busiest_group(&env, balance);
4264 4283
4265 if (*balance == 0) 4284 if (*balance == 0)
4266 goto out_balanced; 4285 goto out_balanced;
@@ -4270,7 +4289,7 @@ redo:
4270 goto out_balanced; 4289 goto out_balanced;
4271 } 4290 }
4272 4291
4273 busiest = find_busiest_queue(&env, group, cpus); 4292 busiest = find_busiest_queue(&env, group);
4274 if (!busiest) { 4293 if (!busiest) {
4275 schedstat_inc(sd, lb_nobusyq[idle]); 4294 schedstat_inc(sd, lb_nobusyq[idle]);
4276 goto out_balanced; 4295 goto out_balanced;
@@ -4294,11 +4313,10 @@ redo:
4294 env.src_rq = busiest; 4313 env.src_rq = busiest;
4295 env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running); 4314 env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running);
4296 4315
4316 update_h_load(env.src_cpu);
4297more_balance: 4317more_balance:
4298 local_irq_save(flags); 4318 local_irq_save(flags);
4299 double_rq_lock(this_rq, busiest); 4319 double_rq_lock(this_rq, busiest);
4300 if (!env.loop)
4301 update_h_load(env.src_cpu);
4302 4320
4303 /* 4321 /*
4304 * cur_ld_moved - load moved in current iteration 4322 * cur_ld_moved - load moved in current iteration
@@ -4950,6 +4968,9 @@ static void rq_online_fair(struct rq *rq)
4950static void rq_offline_fair(struct rq *rq) 4968static void rq_offline_fair(struct rq *rq)
4951{ 4969{
4952 update_sysctl(); 4970 update_sysctl();
4971
4972 /* Ensure any throttled groups are reachable by pick_next_task */
4973 unthrottle_offline_cfs_rqs(rq);
4953} 4974}
4954 4975
4955#endif /* CONFIG_SMP */ 4976#endif /* CONFIG_SMP */