diff options
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r-- | kernel/sched/fair.c | 77 |
1 files changed, 49 insertions, 28 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 22321db64952..96e2b18b6283 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -2052,7 +2052,7 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | |||
2052 | hrtimer_cancel(&cfs_b->slack_timer); | 2052 | hrtimer_cancel(&cfs_b->slack_timer); |
2053 | } | 2053 | } |
2054 | 2054 | ||
2055 | void unthrottle_offline_cfs_rqs(struct rq *rq) | 2055 | static void unthrottle_offline_cfs_rqs(struct rq *rq) |
2056 | { | 2056 | { |
2057 | struct cfs_rq *cfs_rq; | 2057 | struct cfs_rq *cfs_rq; |
2058 | 2058 | ||
@@ -2106,7 +2106,7 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) | |||
2106 | return NULL; | 2106 | return NULL; |
2107 | } | 2107 | } |
2108 | static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} | 2108 | static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} |
2109 | void unthrottle_offline_cfs_rqs(struct rq *rq) {} | 2109 | static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {} |
2110 | 2110 | ||
2111 | #endif /* CONFIG_CFS_BANDWIDTH */ | 2111 | #endif /* CONFIG_CFS_BANDWIDTH */ |
2112 | 2112 | ||
@@ -2637,6 +2637,8 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
2637 | int cpu = smp_processor_id(); | 2637 | int cpu = smp_processor_id(); |
2638 | int prev_cpu = task_cpu(p); | 2638 | int prev_cpu = task_cpu(p); |
2639 | struct sched_domain *sd; | 2639 | struct sched_domain *sd; |
2640 | struct sched_group *sg; | ||
2641 | int i; | ||
2640 | 2642 | ||
2641 | /* | 2643 | /* |
2642 | * If the task is going to be woken-up on this cpu and if it is | 2644 | * If the task is going to be woken-up on this cpu and if it is |
@@ -2653,17 +2655,29 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
2653 | return prev_cpu; | 2655 | return prev_cpu; |
2654 | 2656 | ||
2655 | /* | 2657 | /* |
2656 | * Otherwise, check assigned siblings to find an elegible idle cpu. | 2658 | * Otherwise, iterate the domains and find an elegible idle cpu. |
2657 | */ | 2659 | */ |
2658 | sd = rcu_dereference(per_cpu(sd_llc, target)); | 2660 | sd = rcu_dereference(per_cpu(sd_llc, target)); |
2659 | |||
2660 | for_each_lower_domain(sd) { | 2661 | for_each_lower_domain(sd) { |
2661 | if (!cpumask_test_cpu(sd->idle_buddy, tsk_cpus_allowed(p))) | 2662 | sg = sd->groups; |
2662 | continue; | 2663 | do { |
2663 | if (idle_cpu(sd->idle_buddy)) | 2664 | if (!cpumask_intersects(sched_group_cpus(sg), |
2664 | return sd->idle_buddy; | 2665 | tsk_cpus_allowed(p))) |
2665 | } | 2666 | goto next; |
2666 | 2667 | ||
2668 | for_each_cpu(i, sched_group_cpus(sg)) { | ||
2669 | if (!idle_cpu(i)) | ||
2670 | goto next; | ||
2671 | } | ||
2672 | |||
2673 | target = cpumask_first_and(sched_group_cpus(sg), | ||
2674 | tsk_cpus_allowed(p)); | ||
2675 | goto done; | ||
2676 | next: | ||
2677 | sg = sg->next; | ||
2678 | } while (sg != sd->groups); | ||
2679 | } | ||
2680 | done: | ||
2667 | return target; | 2681 | return target; |
2668 | } | 2682 | } |
2669 | 2683 | ||
@@ -3069,6 +3083,9 @@ struct lb_env { | |||
3069 | int new_dst_cpu; | 3083 | int new_dst_cpu; |
3070 | enum cpu_idle_type idle; | 3084 | enum cpu_idle_type idle; |
3071 | long imbalance; | 3085 | long imbalance; |
3086 | /* The set of CPUs under consideration for load-balancing */ | ||
3087 | struct cpumask *cpus; | ||
3088 | |||
3072 | unsigned int flags; | 3089 | unsigned int flags; |
3073 | 3090 | ||
3074 | unsigned int loop; | 3091 | unsigned int loop; |
@@ -3384,6 +3401,14 @@ static int tg_load_down(struct task_group *tg, void *data) | |||
3384 | 3401 | ||
3385 | static void update_h_load(long cpu) | 3402 | static void update_h_load(long cpu) |
3386 | { | 3403 | { |
3404 | struct rq *rq = cpu_rq(cpu); | ||
3405 | unsigned long now = jiffies; | ||
3406 | |||
3407 | if (rq->h_load_throttle == now) | ||
3408 | return; | ||
3409 | |||
3410 | rq->h_load_throttle = now; | ||
3411 | |||
3387 | rcu_read_lock(); | 3412 | rcu_read_lock(); |
3388 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); | 3413 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); |
3389 | rcu_read_unlock(); | 3414 | rcu_read_unlock(); |
@@ -3647,14 +3672,12 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) | |||
3647 | * @group: sched_group whose statistics are to be updated. | 3672 | * @group: sched_group whose statistics are to be updated. |
3648 | * @load_idx: Load index of sched_domain of this_cpu for load calc. | 3673 | * @load_idx: Load index of sched_domain of this_cpu for load calc. |
3649 | * @local_group: Does group contain this_cpu. | 3674 | * @local_group: Does group contain this_cpu. |
3650 | * @cpus: Set of cpus considered for load balancing. | ||
3651 | * @balance: Should we balance. | 3675 | * @balance: Should we balance. |
3652 | * @sgs: variable to hold the statistics for this group. | 3676 | * @sgs: variable to hold the statistics for this group. |
3653 | */ | 3677 | */ |
3654 | static inline void update_sg_lb_stats(struct lb_env *env, | 3678 | static inline void update_sg_lb_stats(struct lb_env *env, |
3655 | struct sched_group *group, int load_idx, | 3679 | struct sched_group *group, int load_idx, |
3656 | int local_group, const struct cpumask *cpus, | 3680 | int local_group, int *balance, struct sg_lb_stats *sgs) |
3657 | int *balance, struct sg_lb_stats *sgs) | ||
3658 | { | 3681 | { |
3659 | unsigned long nr_running, max_nr_running, min_nr_running; | 3682 | unsigned long nr_running, max_nr_running, min_nr_running; |
3660 | unsigned long load, max_cpu_load, min_cpu_load; | 3683 | unsigned long load, max_cpu_load, min_cpu_load; |
@@ -3671,7 +3694,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, | |||
3671 | max_nr_running = 0; | 3694 | max_nr_running = 0; |
3672 | min_nr_running = ~0UL; | 3695 | min_nr_running = ~0UL; |
3673 | 3696 | ||
3674 | for_each_cpu_and(i, sched_group_cpus(group), cpus) { | 3697 | for_each_cpu_and(i, sched_group_cpus(group), env->cpus) { |
3675 | struct rq *rq = cpu_rq(i); | 3698 | struct rq *rq = cpu_rq(i); |
3676 | 3699 | ||
3677 | nr_running = rq->nr_running; | 3700 | nr_running = rq->nr_running; |
@@ -3795,13 +3818,11 @@ static bool update_sd_pick_busiest(struct lb_env *env, | |||
3795 | /** | 3818 | /** |
3796 | * update_sd_lb_stats - Update sched_domain's statistics for load balancing. | 3819 | * update_sd_lb_stats - Update sched_domain's statistics for load balancing. |
3797 | * @env: The load balancing environment. | 3820 | * @env: The load balancing environment. |
3798 | * @cpus: Set of cpus considered for load balancing. | ||
3799 | * @balance: Should we balance. | 3821 | * @balance: Should we balance. |
3800 | * @sds: variable to hold the statistics for this sched_domain. | 3822 | * @sds: variable to hold the statistics for this sched_domain. |
3801 | */ | 3823 | */ |
3802 | static inline void update_sd_lb_stats(struct lb_env *env, | 3824 | static inline void update_sd_lb_stats(struct lb_env *env, |
3803 | const struct cpumask *cpus, | 3825 | int *balance, struct sd_lb_stats *sds) |
3804 | int *balance, struct sd_lb_stats *sds) | ||
3805 | { | 3826 | { |
3806 | struct sched_domain *child = env->sd->child; | 3827 | struct sched_domain *child = env->sd->child; |
3807 | struct sched_group *sg = env->sd->groups; | 3828 | struct sched_group *sg = env->sd->groups; |
@@ -3818,8 +3839,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, | |||
3818 | 3839 | ||
3819 | local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg)); | 3840 | local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg)); |
3820 | memset(&sgs, 0, sizeof(sgs)); | 3841 | memset(&sgs, 0, sizeof(sgs)); |
3821 | update_sg_lb_stats(env, sg, load_idx, local_group, | 3842 | update_sg_lb_stats(env, sg, load_idx, local_group, balance, &sgs); |
3822 | cpus, balance, &sgs); | ||
3823 | 3843 | ||
3824 | if (local_group && !(*balance)) | 3844 | if (local_group && !(*balance)) |
3825 | return; | 3845 | return; |
@@ -4055,7 +4075,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s | |||
4055 | * to restore balance. | 4075 | * to restore balance. |
4056 | * | 4076 | * |
4057 | * @env: The load balancing environment. | 4077 | * @env: The load balancing environment. |
4058 | * @cpus: The set of CPUs under consideration for load-balancing. | ||
4059 | * @balance: Pointer to a variable indicating if this_cpu | 4078 | * @balance: Pointer to a variable indicating if this_cpu |
4060 | * is the appropriate cpu to perform load balancing at this_level. | 4079 | * is the appropriate cpu to perform load balancing at this_level. |
4061 | * | 4080 | * |
@@ -4065,7 +4084,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s | |||
4065 | * put to idle by rebalancing its tasks onto our group. | 4084 | * put to idle by rebalancing its tasks onto our group. |
4066 | */ | 4085 | */ |
4067 | static struct sched_group * | 4086 | static struct sched_group * |
4068 | find_busiest_group(struct lb_env *env, const struct cpumask *cpus, int *balance) | 4087 | find_busiest_group(struct lb_env *env, int *balance) |
4069 | { | 4088 | { |
4070 | struct sd_lb_stats sds; | 4089 | struct sd_lb_stats sds; |
4071 | 4090 | ||
@@ -4075,7 +4094,7 @@ find_busiest_group(struct lb_env *env, const struct cpumask *cpus, int *balance) | |||
4075 | * Compute the various statistics relavent for load balancing at | 4094 | * Compute the various statistics relavent for load balancing at |
4076 | * this level. | 4095 | * this level. |
4077 | */ | 4096 | */ |
4078 | update_sd_lb_stats(env, cpus, balance, &sds); | 4097 | update_sd_lb_stats(env, balance, &sds); |
4079 | 4098 | ||
4080 | /* | 4099 | /* |
4081 | * this_cpu is not the appropriate cpu to perform load balancing at | 4100 | * this_cpu is not the appropriate cpu to perform load balancing at |
@@ -4155,8 +4174,7 @@ ret: | |||
4155 | * find_busiest_queue - find the busiest runqueue among the cpus in group. | 4174 | * find_busiest_queue - find the busiest runqueue among the cpus in group. |
4156 | */ | 4175 | */ |
4157 | static struct rq *find_busiest_queue(struct lb_env *env, | 4176 | static struct rq *find_busiest_queue(struct lb_env *env, |
4158 | struct sched_group *group, | 4177 | struct sched_group *group) |
4159 | const struct cpumask *cpus) | ||
4160 | { | 4178 | { |
4161 | struct rq *busiest = NULL, *rq; | 4179 | struct rq *busiest = NULL, *rq; |
4162 | unsigned long max_load = 0; | 4180 | unsigned long max_load = 0; |
@@ -4171,7 +4189,7 @@ static struct rq *find_busiest_queue(struct lb_env *env, | |||
4171 | if (!capacity) | 4189 | if (!capacity) |
4172 | capacity = fix_small_capacity(env->sd, group); | 4190 | capacity = fix_small_capacity(env->sd, group); |
4173 | 4191 | ||
4174 | if (!cpumask_test_cpu(i, cpus)) | 4192 | if (!cpumask_test_cpu(i, env->cpus)) |
4175 | continue; | 4193 | continue; |
4176 | 4194 | ||
4177 | rq = cpu_rq(i); | 4195 | rq = cpu_rq(i); |
@@ -4252,6 +4270,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
4252 | .dst_grpmask = sched_group_cpus(sd->groups), | 4270 | .dst_grpmask = sched_group_cpus(sd->groups), |
4253 | .idle = idle, | 4271 | .idle = idle, |
4254 | .loop_break = sched_nr_migrate_break, | 4272 | .loop_break = sched_nr_migrate_break, |
4273 | .cpus = cpus, | ||
4255 | }; | 4274 | }; |
4256 | 4275 | ||
4257 | cpumask_copy(cpus, cpu_active_mask); | 4276 | cpumask_copy(cpus, cpu_active_mask); |
@@ -4260,7 +4279,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
4260 | schedstat_inc(sd, lb_count[idle]); | 4279 | schedstat_inc(sd, lb_count[idle]); |
4261 | 4280 | ||
4262 | redo: | 4281 | redo: |
4263 | group = find_busiest_group(&env, cpus, balance); | 4282 | group = find_busiest_group(&env, balance); |
4264 | 4283 | ||
4265 | if (*balance == 0) | 4284 | if (*balance == 0) |
4266 | goto out_balanced; | 4285 | goto out_balanced; |
@@ -4270,7 +4289,7 @@ redo: | |||
4270 | goto out_balanced; | 4289 | goto out_balanced; |
4271 | } | 4290 | } |
4272 | 4291 | ||
4273 | busiest = find_busiest_queue(&env, group, cpus); | 4292 | busiest = find_busiest_queue(&env, group); |
4274 | if (!busiest) { | 4293 | if (!busiest) { |
4275 | schedstat_inc(sd, lb_nobusyq[idle]); | 4294 | schedstat_inc(sd, lb_nobusyq[idle]); |
4276 | goto out_balanced; | 4295 | goto out_balanced; |
@@ -4294,11 +4313,10 @@ redo: | |||
4294 | env.src_rq = busiest; | 4313 | env.src_rq = busiest; |
4295 | env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running); | 4314 | env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running); |
4296 | 4315 | ||
4316 | update_h_load(env.src_cpu); | ||
4297 | more_balance: | 4317 | more_balance: |
4298 | local_irq_save(flags); | 4318 | local_irq_save(flags); |
4299 | double_rq_lock(this_rq, busiest); | 4319 | double_rq_lock(this_rq, busiest); |
4300 | if (!env.loop) | ||
4301 | update_h_load(env.src_cpu); | ||
4302 | 4320 | ||
4303 | /* | 4321 | /* |
4304 | * cur_ld_moved - load moved in current iteration | 4322 | * cur_ld_moved - load moved in current iteration |
@@ -4950,6 +4968,9 @@ static void rq_online_fair(struct rq *rq) | |||
4950 | static void rq_offline_fair(struct rq *rq) | 4968 | static void rq_offline_fair(struct rq *rq) |
4951 | { | 4969 | { |
4952 | update_sysctl(); | 4970 | update_sysctl(); |
4971 | |||
4972 | /* Ensure any throttled groups are reachable by pick_next_task */ | ||
4973 | unthrottle_offline_cfs_rqs(rq); | ||
4953 | } | 4974 | } |
4954 | 4975 | ||
4955 | #endif /* CONFIG_SMP */ | 4976 | #endif /* CONFIG_SMP */ |