sched/numa: Use effective_load() to balance NUMA loads

When CONFIG_FAIR_GROUP_SCHED is enabled, the load that a task places on a CPU is determined by the group the task is in. The active groups on the source and destination CPU can be different, resulting in a different load contribution by the same task at its source and at its destination. As a result, the load needs to be calculated separately for each CPU, instead of estimated once with task_h_load(). Getting this calculation right allows some workloads to converge, where previously the last thread could get stuck on another node, without being able to migrate to its final destination. Signed-off-by: Rik van Riel <riel@redhat.com> Cc: mgorman@suse.de Cc: chegu_vinod@hp.com Cc: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1403538378-31571-3-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Rik van Riel <riel@redhat.com> 2014-06-23 11:46:14 -0400
committer: Ingo Molnar <mingo@kernel.org> 2014-07-05 05:17:35 -0400
commit: 6dc1a672ab15604947361dcd02e459effa09bad5 (patch)
tree: 1489a94ff9417433749d4adc181ed6952cb807c1 /kernel/sched
parent: 28a21745190a0ca613cab817bfe3dc65373158bf (diff)
1 files changed, 14 insertions, 6 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f287d0b4007a..d6526d2cf173 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1151,6 +1151,7 @@ static void task_numa_compare(struct task_numa_env *env,
        struct rq *src_rq = cpu_rq(env->src_cpu);
        struct rq *dst_rq = cpu_rq(env->dst_cpu);
        struct task_struct *cur;
+        struct task_group *tg;
        long src_load, dst_load;
        long load;
        long imp = (groupimp > 0) ? groupimp : taskimp;
@@ -1225,14 +1226,21 @@ static void task_numa_compare(struct task_numa_env *env,
         * In the overloaded case, try and keep the load balanced.
         */
 balance:
-        load = task_h_load(env->p);
+        src_load = env->src_stats.load;
-        dst_load = env->dst_stats.load + load;
+        dst_load = env->dst_stats.load;
-        src_load = env->src_stats.load - load;
+        /* Calculate the effect of moving env->p from src to dst. */
+        load = env->p->se.load.weight;
+        tg = task_group(env->p);
+        src_load += effective_load(tg, env->src_cpu, -load, -load);
+        dst_load += effective_load(tg, env->dst_cpu, load, load);
        if (cur) {
-                load = task_h_load(cur);
+                /* Cur moves in the opposite direction. */
-                dst_load -= load;
+                load = cur->se.load.weight;
-                src_load += load;
+                tg = task_group(cur);
+                src_load += effective_load(tg, env->src_cpu, load, load);
+                dst_load += effective_load(tg, env->dst_cpu, -load, -load);
        }
        if (load_too_imbalanced(src_load, dst_load, env))
author	Rik van Riel <riel@redhat.com>	2014-06-23 11:46:14 -0400
committer	Ingo Molnar <mingo@kernel.org>	2014-07-05 05:17:35 -0400
commit	6dc1a672ab15604947361dcd02e459effa09bad5 (patch)
tree	1489a94ff9417433749d4adc181ed6952cb807c1 /kernel/sched
parent	28a21745190a0ca613cab817bfe3dc65373158bf (diff)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f287d0b4007a..d6526d2cf173 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c
@@ -1151,6 +1151,7 @@ static void task_numa_compare(struct task_numa_env *env,
1151	struct rq *src_rq = cpu_rq(env->src_cpu);	1151	struct rq *src_rq = cpu_rq(env->src_cpu);
1152	struct rq *dst_rq = cpu_rq(env->dst_cpu);	1152	struct rq *dst_rq = cpu_rq(env->dst_cpu);
1153	struct task_struct *cur;	1153	struct task_struct *cur;
		1154	struct task_group *tg;
1154	long src_load, dst_load;	1155	long src_load, dst_load;
1155	long load;	1156	long load;
1156	long imp = (groupimp > 0) ? groupimp : taskimp;	1157	long imp = (groupimp > 0) ? groupimp : taskimp;
@@ -1225,14 +1226,21 @@ static void task_numa_compare(struct task_numa_env *env,
1225	* In the overloaded case, try and keep the load balanced.	1226	* In the overloaded case, try and keep the load balanced.
1226	*/	1227	*/
1227	balance:	1228	balance:
1228	load = task_h_load(env->p);	1229	src_load = env->src_stats.load;
1229	dst_load = env->dst_stats.load + load;	1230	dst_load = env->dst_stats.load;
1230	src_load = env->src_stats.load - load;	1231
		1232	/* Calculate the effect of moving env->p from src to dst. */
		1233	load = env->p->se.load.weight;
		1234	tg = task_group(env->p);
		1235	src_load += effective_load(tg, env->src_cpu, -load, -load);
		1236	dst_load += effective_load(tg, env->dst_cpu, load, load);
1231		1237
1232	if (cur) {	1238	if (cur) {
1233	load = task_h_load(cur);	1239	/* Cur moves in the opposite direction. */
1234	dst_load -= load;	1240	load = cur->se.load.weight;
1235	src_load += load;	1241	tg = task_group(cur);
		1242	src_load += effective_load(tg, env->src_cpu, load, load);
		1243	dst_load += effective_load(tg, env->dst_cpu, -load, -load);
1236	}	1244	}
1237		1245
1238	if (load_too_imbalanced(src_load, dst_load, env))	1246	if (load_too_imbalanced(src_load, dst_load, env))