diff options
author | Mike Galbraith <efault@gmx.de> | 2009-10-27 10:35:38 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-11-04 12:46:22 -0500 |
commit | a1f84a3ab8e002159498814eaa7e48c33752b04b (patch) | |
tree | 070b6c105c510460b314c20e17de4b5b89eb6a48 | |
parent | acc3f5d7cabbfd6cec71f0c1f9900621fa2d6ae7 (diff) |
sched: Check for an idle shared cache in select_task_rq_fair()
When waking affine, check for an idle shared cache, and if
found, wake to that CPU/sibling instead of the waker's CPU.
This improves pgsql+oltp ramp up by roughly 8%. Possibly more
for other loads, depending on overlap. The trade-off is a
roughly 1% peak downturn if tasks are truly synchronous.
Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: <stable@kernel.org>
LKML-Reference: <1256654138.17752.7.camel@marge.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | kernel/sched_fair.c | 33 |
1 files changed, 29 insertions, 4 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 4e777b47eeda..da87385683cc 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1372,11 +1372,36 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
1372 | want_sd = 0; | 1372 | want_sd = 0; |
1373 | } | 1373 | } |
1374 | 1374 | ||
1375 | if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && | 1375 | if (want_affine && (tmp->flags & SD_WAKE_AFFINE)) { |
1376 | cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { | 1376 | int candidate = -1, i; |
1377 | 1377 | ||
1378 | affine_sd = tmp; | 1378 | if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) |
1379 | want_affine = 0; | 1379 | candidate = cpu; |
1380 | |||
1381 | /* | ||
1382 | * Check for an idle shared cache. | ||
1383 | */ | ||
1384 | if (tmp->flags & SD_PREFER_SIBLING) { | ||
1385 | if (candidate == cpu) { | ||
1386 | if (!cpu_rq(prev_cpu)->cfs.nr_running) | ||
1387 | candidate = prev_cpu; | ||
1388 | } | ||
1389 | |||
1390 | if (candidate == -1 || candidate == cpu) { | ||
1391 | for_each_cpu(i, sched_domain_span(tmp)) { | ||
1392 | if (!cpu_rq(i)->cfs.nr_running) { | ||
1393 | candidate = i; | ||
1394 | break; | ||
1395 | } | ||
1396 | } | ||
1397 | } | ||
1398 | } | ||
1399 | |||
1400 | if (candidate >= 0) { | ||
1401 | affine_sd = tmp; | ||
1402 | want_affine = 0; | ||
1403 | cpu = candidate; | ||
1404 | } | ||
1380 | } | 1405 | } |
1381 | 1406 | ||
1382 | if (!want_sd && !want_affine) | 1407 | if (!want_sd && !want_affine) |