summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorRik van Riel <riel@redhat.com>2014-07-28 14:16:28 -0400
committerIngo Molnar <mingo@kernel.org>2014-08-12 06:48:19 -0400
commitcaeb178c60f4f93f1b45c0bc056b5cf6d217b67f (patch)
treee504af44e202c14e2422f492a922af93cda816f7 /kernel
parent743cb1ff191f00fee653212bdbcee1e56086d6ce (diff)
sched/fair: Make update_sd_pick_busiest() return 'true' on a busier sd
Currently update_sd_pick_busiest only identifies the busiest sd that is either overloaded, or has a group imbalance. When no sd is imbalanced or overloaded, the load balancer fails to find the busiest domain. This breaks load balancing between domains that are not overloaded, in the !SD_ASYM_PACKING case. This patch makes update_sd_pick_busiest return true when the busiest sd yet is encountered. Groups are ranked in the order overloaded > imbalanced > other, with higher ranked groups getting priority even when their load is lower. This is necessary due to the possibility of unequal capacities and cpumasks between domains within a sched group. Behaviour for SD_ASYM_PACKING does not seem to match the comment, but I have no hardware to test that so I have left the behaviour of that code unchanged. Enum for group classification suggested by Peter Zijlstra. Signed-off-by: Rik van Riel <riel@redhat.com> [peterz: replaced sg_lb_stats::group_imb with the new enum group_type in an attempt to avoid endless recalculation] Signed-off-by: Peter Zijlstra <peterz@infradead.org> Acked-by: Vincent Guittot <vincent.guittot@linaro.org> Acked-by: Michael Neuling <mikey@neuling.org> Cc: ktkhai@parallels.com Cc: tim.c.chen@linux.intel.com Cc: nicolas.pitre@linaro.org Cc: jhladky@redhat.com Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: http://lkml.kernel.org/r/20140729152743.GI3935@laptop Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/fair.c49
1 files changed, 37 insertions, 12 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e9477e6193fc..94377254254e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5559,6 +5559,13 @@ static unsigned long task_h_load(struct task_struct *p)
5559#endif 5559#endif
5560 5560
5561/********** Helpers for find_busiest_group ************************/ 5561/********** Helpers for find_busiest_group ************************/
5562
5563enum group_type {
5564 group_other = 0,
5565 group_imbalanced,
5566 group_overloaded,
5567};
5568
5562/* 5569/*
5563 * sg_lb_stats - stats of a sched_group required for load_balancing 5570 * sg_lb_stats - stats of a sched_group required for load_balancing
5564 */ 5571 */
@@ -5572,7 +5579,7 @@ struct sg_lb_stats {
5572 unsigned int group_capacity_factor; 5579 unsigned int group_capacity_factor;
5573 unsigned int idle_cpus; 5580 unsigned int idle_cpus;
5574 unsigned int group_weight; 5581 unsigned int group_weight;
5575 int group_imb; /* Is there an imbalance in the group ? */ 5582 enum group_type group_type;
5576 int group_has_free_capacity; 5583 int group_has_free_capacity;
5577#ifdef CONFIG_NUMA_BALANCING 5584#ifdef CONFIG_NUMA_BALANCING
5578 unsigned int nr_numa_running; 5585 unsigned int nr_numa_running;
@@ -5610,6 +5617,8 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
5610 .total_capacity = 0UL, 5617 .total_capacity = 0UL,
5611 .busiest_stat = { 5618 .busiest_stat = {
5612 .avg_load = 0UL, 5619 .avg_load = 0UL,
5620 .sum_nr_running = 0,
5621 .group_type = group_other,
5613 }, 5622 },
5614 }; 5623 };
5615} 5624}
@@ -5891,6 +5900,18 @@ static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *gro
5891 return capacity_factor; 5900 return capacity_factor;
5892} 5901}
5893 5902
5903static enum group_type
5904group_classify(struct sched_group *group, struct sg_lb_stats *sgs)
5905{
5906 if (sgs->sum_nr_running > sgs->group_capacity_factor)
5907 return group_overloaded;
5908
5909 if (sg_imbalanced(group))
5910 return group_imbalanced;
5911
5912 return group_other;
5913}
5914
5894/** 5915/**
5895 * update_sg_lb_stats - Update sched_group's statistics for load balancing. 5916 * update_sg_lb_stats - Update sched_group's statistics for load balancing.
5896 * @env: The load balancing environment. 5917 * @env: The load balancing environment.
@@ -5942,9 +5963,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
5942 sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; 5963 sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
5943 5964
5944 sgs->group_weight = group->group_weight; 5965 sgs->group_weight = group->group_weight;
5945
5946 sgs->group_imb = sg_imbalanced(group);
5947 sgs->group_capacity_factor = sg_capacity_factor(env, group); 5966 sgs->group_capacity_factor = sg_capacity_factor(env, group);
5967 sgs->group_type = group_classify(group, sgs);
5948 5968
5949 if (sgs->group_capacity_factor > sgs->sum_nr_running) 5969 if (sgs->group_capacity_factor > sgs->sum_nr_running)
5950 sgs->group_has_free_capacity = 1; 5970 sgs->group_has_free_capacity = 1;
@@ -5968,13 +5988,19 @@ static bool update_sd_pick_busiest(struct lb_env *env,
5968 struct sched_group *sg, 5988 struct sched_group *sg,
5969 struct sg_lb_stats *sgs) 5989 struct sg_lb_stats *sgs)
5970{ 5990{
5971 if (sgs->avg_load <= sds->busiest_stat.avg_load) 5991 struct sg_lb_stats *busiest = &sds->busiest_stat;
5972 return false;
5973 5992
5974 if (sgs->sum_nr_running > sgs->group_capacity_factor) 5993 if (sgs->group_type > busiest->group_type)
5975 return true; 5994 return true;
5976 5995
5977 if (sgs->group_imb) 5996 if (sgs->group_type < busiest->group_type)
5997 return false;
5998
5999 if (sgs->avg_load <= busiest->avg_load)
6000 return false;
6001
6002 /* This is the busiest node in its class. */
6003 if (!(env->sd->flags & SD_ASYM_PACKING))
5978 return true; 6004 return true;
5979 6005
5980 /* 6006 /*
@@ -5982,8 +6008,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
5982 * numbered CPUs in the group, therefore mark all groups 6008 * numbered CPUs in the group, therefore mark all groups
5983 * higher than ourself as busy. 6009 * higher than ourself as busy.
5984 */ 6010 */
5985 if ((env->sd->flags & SD_ASYM_PACKING) && sgs->sum_nr_running && 6011 if (sgs->sum_nr_running && env->dst_cpu < group_first_cpu(sg)) {
5986 env->dst_cpu < group_first_cpu(sg)) {
5987 if (!sds->busiest) 6012 if (!sds->busiest)
5988 return true; 6013 return true;
5989 6014
@@ -6228,7 +6253,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
6228 local = &sds->local_stat; 6253 local = &sds->local_stat;
6229 busiest = &sds->busiest_stat; 6254 busiest = &sds->busiest_stat;
6230 6255
6231 if (busiest->group_imb) { 6256 if (busiest->group_type == group_imbalanced) {
6232 /* 6257 /*
6233 * In the group_imb case we cannot rely on group-wide averages 6258 * In the group_imb case we cannot rely on group-wide averages
6234 * to ensure cpu-load equilibrium, look at wider averages. XXX 6259 * to ensure cpu-load equilibrium, look at wider averages. XXX
@@ -6248,7 +6273,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
6248 return fix_small_imbalance(env, sds); 6273 return fix_small_imbalance(env, sds);
6249 } 6274 }
6250 6275
6251 if (busiest->sum_nr_running > busiest->group_capacity_factor) { 6276 if (busiest->group_type == group_overloaded) {
6252 /* 6277 /*
6253 * Don't want to pull so many tasks that a group would go idle. 6278 * Don't want to pull so many tasks that a group would go idle.
6254 * Except of course for the group_imb case, since then we might 6279 * Except of course for the group_imb case, since then we might
@@ -6337,7 +6362,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
6337 * work because they assume all things are equal, which typically 6362 * work because they assume all things are equal, which typically
6338 * isn't true due to cpus_allowed constraints and the like. 6363 * isn't true due to cpus_allowed constraints and the like.
6339 */ 6364 */
6340 if (busiest->group_imb) 6365 if (busiest->group_type == group_imbalanced)
6341 goto force_balance; 6366 goto force_balance;
6342 6367
6343 /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */ 6368 /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */