diff options
| author | Suresh Siddha <suresh.b.siddha@intel.com> | 2010-03-31 19:47:45 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2010-04-23 05:02:02 -0400 |
| commit | 99bd5e2f245d8cd17d040c82d40becdb3efd9b69 (patch) | |
| tree | 9dbfd8d1a9148bad45e5c3c067a05f414134083b | |
| parent | 669c55e9f99b90e46eaa0f98a67ec53d46dc969a (diff) | |
sched: Fix select_idle_sibling() logic in select_task_rq_fair()
Issues in the current select_idle_sibling() logic in select_task_rq_fair()
in the context of a task wake-up:
a) Once we select the idle sibling, we use that domain (spanning the cpu that
the task is currently woken-up and the idle sibling that we found) in our
wake_affine() decisions. This domain is completely different from the
domain(we are supposed to use) that spans the cpu that the task currently
woken-up and the cpu where the task previously ran.
b) We do select_idle_sibling() check only for the cpu that the task is
currently woken-up on. If select_task_rq_fair() selects the previously run
cpu for waking the task, doing a select_idle_sibling() check
for that cpu also helps and we don't do this currently.
c) In the scenarios where the cpu that the task is woken-up is busy but
with its HT siblings are idle, we are selecting the task be woken-up
on the idle HT sibling instead of a core that it previously ran
and currently completely idle. i.e., we are not taking decisions based on
wake_affine() but directly selecting an idle sibling that can cause
an imbalance at the SMT/MC level which will be later corrected by the
periodic load balancer.
Fix this by first going through the load imbalance calculations using
wake_affine() and once we make a decision of woken-up cpu vs previously-ran cpu,
then choose a possible idle sibling for waking up the task on.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1270079265.7835.8.camel@sbs-t61.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
| -rw-r--r-- | kernel/sched_fair.c | 82 |
1 files changed, 40 insertions, 42 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 0a413c7e3ab8..cbd8b8a296d1 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -1375,29 +1375,48 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | |||
| 1375 | /* | 1375 | /* |
| 1376 | * Try and locate an idle CPU in the sched_domain. | 1376 | * Try and locate an idle CPU in the sched_domain. |
| 1377 | */ | 1377 | */ |
| 1378 | static int | 1378 | static int select_idle_sibling(struct task_struct *p, int target) |
| 1379 | select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target) | ||
| 1380 | { | 1379 | { |
| 1381 | int cpu = smp_processor_id(); | 1380 | int cpu = smp_processor_id(); |
| 1382 | int prev_cpu = task_cpu(p); | 1381 | int prev_cpu = task_cpu(p); |
| 1382 | struct sched_domain *sd; | ||
| 1383 | int i; | 1383 | int i; |
| 1384 | 1384 | ||
| 1385 | /* | 1385 | /* |
| 1386 | * If this domain spans both cpu and prev_cpu (see the SD_WAKE_AFFINE | 1386 | * If the task is going to be woken-up on this cpu and if it is |
| 1387 | * test in select_task_rq_fair) and the prev_cpu is idle then that's | 1387 | * already idle, then it is the right target. |
| 1388 | * always a better target than the current cpu. | 1388 | */ |
| 1389 | if (target == cpu && idle_cpu(cpu)) | ||
| 1390 | return cpu; | ||
| 1391 | |||
| 1392 | /* | ||
| 1393 | * If the task is going to be woken-up on the cpu where it previously | ||
| 1394 | * ran and if it is currently idle, then it the right target. | ||
| 1389 | */ | 1395 | */ |
| 1390 | if (target == cpu && !cpu_rq(prev_cpu)->cfs.nr_running) | 1396 | if (target == prev_cpu && idle_cpu(prev_cpu)) |
| 1391 | return prev_cpu; | 1397 | return prev_cpu; |
| 1392 | 1398 | ||
| 1393 | /* | 1399 | /* |
| 1394 | * Otherwise, iterate the domain and find an elegible idle cpu. | 1400 | * Otherwise, iterate the domains and find an elegible idle cpu. |
| 1395 | */ | 1401 | */ |
| 1396 | for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) { | 1402 | for_each_domain(target, sd) { |
| 1397 | if (!cpu_rq(i)->cfs.nr_running) { | 1403 | if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) |
| 1398 | target = i; | ||
| 1399 | break; | 1404 | break; |
| 1405 | |||
| 1406 | for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) { | ||
| 1407 | if (idle_cpu(i)) { | ||
| 1408 | target = i; | ||
| 1409 | break; | ||
| 1410 | } | ||
| 1400 | } | 1411 | } |
| 1412 | |||
| 1413 | /* | ||
| 1414 | * Lets stop looking for an idle sibling when we reached | ||
| 1415 | * the domain that spans the current cpu and prev_cpu. | ||
| 1416 | */ | ||
| 1417 | if (cpumask_test_cpu(cpu, sched_domain_span(sd)) && | ||
| 1418 | cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) | ||
| 1419 | break; | ||
| 1401 | } | 1420 | } |
| 1402 | 1421 | ||
| 1403 | return target; | 1422 | return target; |
| @@ -1421,7 +1440,7 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_ | |||
| 1421 | int cpu = smp_processor_id(); | 1440 | int cpu = smp_processor_id(); |
| 1422 | int prev_cpu = task_cpu(p); | 1441 | int prev_cpu = task_cpu(p); |
| 1423 | int new_cpu = cpu; | 1442 | int new_cpu = cpu; |
| 1424 | int want_affine = 0, cpu_idle = !current->pid; | 1443 | int want_affine = 0; |
| 1425 | int want_sd = 1; | 1444 | int want_sd = 1; |
| 1426 | int sync = wake_flags & WF_SYNC; | 1445 | int sync = wake_flags & WF_SYNC; |
| 1427 | 1446 | ||
| @@ -1460,36 +1479,13 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_ | |||
| 1460 | } | 1479 | } |
| 1461 | 1480 | ||
| 1462 | /* | 1481 | /* |
| 1463 | * While iterating the domains looking for a spanning | 1482 | * If both cpu and prev_cpu are part of this domain, |
| 1464 | * WAKE_AFFINE domain, adjust the affine target to any idle cpu | 1483 | * cpu is a valid SD_WAKE_AFFINE target. |
| 1465 | * in cache sharing domains along the way. | ||
| 1466 | */ | 1484 | */ |
| 1467 | if (want_affine) { | 1485 | if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && |
| 1468 | int target = -1; | 1486 | cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { |
| 1469 | 1487 | affine_sd = tmp; | |
| 1470 | /* | 1488 | want_affine = 0; |
| 1471 | * If both cpu and prev_cpu are part of this domain, | ||
| 1472 | * cpu is a valid SD_WAKE_AFFINE target. | ||
| 1473 | */ | ||
| 1474 | if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) | ||
| 1475 | target = cpu; | ||
| 1476 | |||
| 1477 | /* | ||
| 1478 | * If there's an idle sibling in this domain, make that | ||
| 1479 | * the wake_affine target instead of the current cpu. | ||
| 1480 | */ | ||
| 1481 | if (!cpu_idle && tmp->flags & SD_SHARE_PKG_RESOURCES) | ||
| 1482 | target = select_idle_sibling(p, tmp, target); | ||
| 1483 | |||
| 1484 | if (target >= 0) { | ||
| 1485 | if (tmp->flags & SD_WAKE_AFFINE) { | ||
| 1486 | affine_sd = tmp; | ||
| 1487 | want_affine = 0; | ||
| 1488 | if (target != cpu) | ||
| 1489 | cpu_idle = 1; | ||
| 1490 | } | ||
| 1491 | cpu = target; | ||
| 1492 | } | ||
| 1493 | } | 1489 | } |
| 1494 | 1490 | ||
| 1495 | if (!want_sd && !want_affine) | 1491 | if (!want_sd && !want_affine) |
| @@ -1520,8 +1516,10 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_ | |||
| 1520 | #endif | 1516 | #endif |
| 1521 | 1517 | ||
| 1522 | if (affine_sd) { | 1518 | if (affine_sd) { |
| 1523 | if (cpu_idle || cpu == prev_cpu || wake_affine(affine_sd, p, sync)) | 1519 | if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) |
| 1524 | return cpu; | 1520 | return select_idle_sibling(p, cpu); |
| 1521 | else | ||
| 1522 | return select_idle_sibling(p, prev_cpu); | ||
| 1525 | } | 1523 | } |
| 1526 | 1524 | ||
| 1527 | while (sd) { | 1525 | while (sd) { |
