diff options
author | Suresh Siddha <suresh.b.siddha@intel.com> | 2010-03-31 19:47:45 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-04-23 05:02:02 -0400 |
commit | 99bd5e2f245d8cd17d040c82d40becdb3efd9b69 (patch) | |
tree | 9dbfd8d1a9148bad45e5c3c067a05f414134083b /kernel/sched_fair.c | |
parent | 669c55e9f99b90e46eaa0f98a67ec53d46dc969a (diff) |
sched: Fix select_idle_sibling() logic in select_task_rq_fair()
Issues in the current select_idle_sibling() logic in select_task_rq_fair()
in the context of a task wake-up:
a) Once we select the idle sibling, we use that domain (spanning the cpu that
the task is currently woken-up and the idle sibling that we found) in our
wake_affine() decisions. This domain is completely different from the
domain(we are supposed to use) that spans the cpu that the task currently
woken-up and the cpu where the task previously ran.
b) We do select_idle_sibling() check only for the cpu that the task is
currently woken-up on. If select_task_rq_fair() selects the previously run
cpu for waking the task, doing a select_idle_sibling() check
for that cpu also helps and we don't do this currently.
c) In the scenarios where the cpu that the task is woken-up is busy but
with its HT siblings are idle, we are selecting the task be woken-up
on the idle HT sibling instead of a core that it previously ran
and currently completely idle. i.e., we are not taking decisions based on
wake_affine() but directly selecting an idle sibling that can cause
an imbalance at the SMT/MC level which will be later corrected by the
periodic load balancer.
Fix this by first going through the load imbalance calculations using
wake_affine() and once we make a decision of woken-up cpu vs previously-ran cpu,
then choose a possible idle sibling for waking up the task on.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1270079265.7835.8.camel@sbs-t61.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 82 |
1 files changed, 40 insertions, 42 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 0a413c7e3ab8..cbd8b8a296d1 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1375,29 +1375,48 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | |||
1375 | /* | 1375 | /* |
1376 | * Try and locate an idle CPU in the sched_domain. | 1376 | * Try and locate an idle CPU in the sched_domain. |
1377 | */ | 1377 | */ |
1378 | static int | 1378 | static int select_idle_sibling(struct task_struct *p, int target) |
1379 | select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target) | ||
1380 | { | 1379 | { |
1381 | int cpu = smp_processor_id(); | 1380 | int cpu = smp_processor_id(); |
1382 | int prev_cpu = task_cpu(p); | 1381 | int prev_cpu = task_cpu(p); |
1382 | struct sched_domain *sd; | ||
1383 | int i; | 1383 | int i; |
1384 | 1384 | ||
1385 | /* | 1385 | /* |
1386 | * If this domain spans both cpu and prev_cpu (see the SD_WAKE_AFFINE | 1386 | * If the task is going to be woken-up on this cpu and if it is |
1387 | * test in select_task_rq_fair) and the prev_cpu is idle then that's | 1387 | * already idle, then it is the right target. |
1388 | * always a better target than the current cpu. | 1388 | */ |
1389 | if (target == cpu && idle_cpu(cpu)) | ||
1390 | return cpu; | ||
1391 | |||
1392 | /* | ||
1393 | * If the task is going to be woken-up on the cpu where it previously | ||
1394 | * ran and if it is currently idle, then it the right target. | ||
1389 | */ | 1395 | */ |
1390 | if (target == cpu && !cpu_rq(prev_cpu)->cfs.nr_running) | 1396 | if (target == prev_cpu && idle_cpu(prev_cpu)) |
1391 | return prev_cpu; | 1397 | return prev_cpu; |
1392 | 1398 | ||
1393 | /* | 1399 | /* |
1394 | * Otherwise, iterate the domain and find an elegible idle cpu. | 1400 | * Otherwise, iterate the domains and find an elegible idle cpu. |
1395 | */ | 1401 | */ |
1396 | for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) { | 1402 | for_each_domain(target, sd) { |
1397 | if (!cpu_rq(i)->cfs.nr_running) { | 1403 | if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) |
1398 | target = i; | ||
1399 | break; | 1404 | break; |
1405 | |||
1406 | for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) { | ||
1407 | if (idle_cpu(i)) { | ||
1408 | target = i; | ||
1409 | break; | ||
1410 | } | ||
1400 | } | 1411 | } |
1412 | |||
1413 | /* | ||
1414 | * Lets stop looking for an idle sibling when we reached | ||
1415 | * the domain that spans the current cpu and prev_cpu. | ||
1416 | */ | ||
1417 | if (cpumask_test_cpu(cpu, sched_domain_span(sd)) && | ||
1418 | cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) | ||
1419 | break; | ||
1401 | } | 1420 | } |
1402 | 1421 | ||
1403 | return target; | 1422 | return target; |
@@ -1421,7 +1440,7 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_ | |||
1421 | int cpu = smp_processor_id(); | 1440 | int cpu = smp_processor_id(); |
1422 | int prev_cpu = task_cpu(p); | 1441 | int prev_cpu = task_cpu(p); |
1423 | int new_cpu = cpu; | 1442 | int new_cpu = cpu; |
1424 | int want_affine = 0, cpu_idle = !current->pid; | 1443 | int want_affine = 0; |
1425 | int want_sd = 1; | 1444 | int want_sd = 1; |
1426 | int sync = wake_flags & WF_SYNC; | 1445 | int sync = wake_flags & WF_SYNC; |
1427 | 1446 | ||
@@ -1460,36 +1479,13 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_ | |||
1460 | } | 1479 | } |
1461 | 1480 | ||
1462 | /* | 1481 | /* |
1463 | * While iterating the domains looking for a spanning | 1482 | * If both cpu and prev_cpu are part of this domain, |
1464 | * WAKE_AFFINE domain, adjust the affine target to any idle cpu | 1483 | * cpu is a valid SD_WAKE_AFFINE target. |
1465 | * in cache sharing domains along the way. | ||
1466 | */ | 1484 | */ |
1467 | if (want_affine) { | 1485 | if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && |
1468 | int target = -1; | 1486 | cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { |
1469 | 1487 | affine_sd = tmp; | |
1470 | /* | 1488 | want_affine = 0; |
1471 | * If both cpu and prev_cpu are part of this domain, | ||
1472 | * cpu is a valid SD_WAKE_AFFINE target. | ||
1473 | */ | ||
1474 | if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) | ||
1475 | target = cpu; | ||
1476 | |||
1477 | /* | ||
1478 | * If there's an idle sibling in this domain, make that | ||
1479 | * the wake_affine target instead of the current cpu. | ||
1480 | */ | ||
1481 | if (!cpu_idle && tmp->flags & SD_SHARE_PKG_RESOURCES) | ||
1482 | target = select_idle_sibling(p, tmp, target); | ||
1483 | |||
1484 | if (target >= 0) { | ||
1485 | if (tmp->flags & SD_WAKE_AFFINE) { | ||
1486 | affine_sd = tmp; | ||
1487 | want_affine = 0; | ||
1488 | if (target != cpu) | ||
1489 | cpu_idle = 1; | ||
1490 | } | ||
1491 | cpu = target; | ||
1492 | } | ||
1493 | } | 1489 | } |
1494 | 1490 | ||
1495 | if (!want_sd && !want_affine) | 1491 | if (!want_sd && !want_affine) |
@@ -1520,8 +1516,10 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_ | |||
1520 | #endif | 1516 | #endif |
1521 | 1517 | ||
1522 | if (affine_sd) { | 1518 | if (affine_sd) { |
1523 | if (cpu_idle || cpu == prev_cpu || wake_affine(affine_sd, p, sync)) | 1519 | if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) |
1524 | return cpu; | 1520 | return select_idle_sibling(p, cpu); |
1521 | else | ||
1522 | return select_idle_sibling(p, prev_cpu); | ||
1525 | } | 1523 | } |
1526 | 1524 | ||
1527 | while (sd) { | 1525 | while (sd) { |