aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
authorSuresh Siddha <suresh.b.siddha@intel.com>2010-03-31 19:47:45 -0400
committerIngo Molnar <mingo@elte.hu>2010-04-23 05:02:02 -0400
commit99bd5e2f245d8cd17d040c82d40becdb3efd9b69 (patch)
tree9dbfd8d1a9148bad45e5c3c067a05f414134083b /kernel/sched_fair.c
parent669c55e9f99b90e46eaa0f98a67ec53d46dc969a (diff)
sched: Fix select_idle_sibling() logic in select_task_rq_fair()
Issues in the current select_idle_sibling() logic in select_task_rq_fair() in the context of a task wake-up: a) Once we select the idle sibling, we use that domain (spanning the cpu that the task is currently woken-up and the idle sibling that we found) in our wake_affine() decisions. This domain is completely different from the domain(we are supposed to use) that spans the cpu that the task currently woken-up and the cpu where the task previously ran. b) We do select_idle_sibling() check only for the cpu that the task is currently woken-up on. If select_task_rq_fair() selects the previously run cpu for waking the task, doing a select_idle_sibling() check for that cpu also helps and we don't do this currently. c) In the scenarios where the cpu that the task is woken-up is busy but with its HT siblings are idle, we are selecting the task be woken-up on the idle HT sibling instead of a core that it previously ran and currently completely idle. i.e., we are not taking decisions based on wake_affine() but directly selecting an idle sibling that can cause an imbalance at the SMT/MC level which will be later corrected by the periodic load balancer. Fix this by first going through the load imbalance calculations using wake_affine() and once we make a decision of woken-up cpu vs previously-ran cpu, then choose a possible idle sibling for waking up the task on. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1270079265.7835.8.camel@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c82
1 files changed, 40 insertions, 42 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 0a413c7e3ab8..cbd8b8a296d1 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1375,29 +1375,48 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
1375/* 1375/*
1376 * Try and locate an idle CPU in the sched_domain. 1376 * Try and locate an idle CPU in the sched_domain.
1377 */ 1377 */
1378static int 1378static int select_idle_sibling(struct task_struct *p, int target)
1379select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target)
1380{ 1379{
1381 int cpu = smp_processor_id(); 1380 int cpu = smp_processor_id();
1382 int prev_cpu = task_cpu(p); 1381 int prev_cpu = task_cpu(p);
1382 struct sched_domain *sd;
1383 int i; 1383 int i;
1384 1384
1385 /* 1385 /*
1386 * If this domain spans both cpu and prev_cpu (see the SD_WAKE_AFFINE 1386 * If the task is going to be woken-up on this cpu and if it is
1387 * test in select_task_rq_fair) and the prev_cpu is idle then that's 1387 * already idle, then it is the right target.
1388 * always a better target than the current cpu. 1388 */
1389 if (target == cpu && idle_cpu(cpu))
1390 return cpu;
1391
1392 /*
1393 * If the task is going to be woken-up on the cpu where it previously
1394 * ran and if it is currently idle, then it the right target.
1389 */ 1395 */
1390 if (target == cpu && !cpu_rq(prev_cpu)->cfs.nr_running) 1396 if (target == prev_cpu && idle_cpu(prev_cpu))
1391 return prev_cpu; 1397 return prev_cpu;
1392 1398
1393 /* 1399 /*
1394 * Otherwise, iterate the domain and find an elegible idle cpu. 1400 * Otherwise, iterate the domains and find an elegible idle cpu.
1395 */ 1401 */
1396 for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) { 1402 for_each_domain(target, sd) {
1397 if (!cpu_rq(i)->cfs.nr_running) { 1403 if (!(sd->flags & SD_SHARE_PKG_RESOURCES))
1398 target = i;
1399 break; 1404 break;
1405
1406 for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
1407 if (idle_cpu(i)) {
1408 target = i;
1409 break;
1410 }
1400 } 1411 }
1412
1413 /*
1414 * Lets stop looking for an idle sibling when we reached
1415 * the domain that spans the current cpu and prev_cpu.
1416 */
1417 if (cpumask_test_cpu(cpu, sched_domain_span(sd)) &&
1418 cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
1419 break;
1401 } 1420 }
1402 1421
1403 return target; 1422 return target;
@@ -1421,7 +1440,7 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
1421 int cpu = smp_processor_id(); 1440 int cpu = smp_processor_id();
1422 int prev_cpu = task_cpu(p); 1441 int prev_cpu = task_cpu(p);
1423 int new_cpu = cpu; 1442 int new_cpu = cpu;
1424 int want_affine = 0, cpu_idle = !current->pid; 1443 int want_affine = 0;
1425 int want_sd = 1; 1444 int want_sd = 1;
1426 int sync = wake_flags & WF_SYNC; 1445 int sync = wake_flags & WF_SYNC;
1427 1446
@@ -1460,36 +1479,13 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
1460 } 1479 }
1461 1480
1462 /* 1481 /*
1463 * While iterating the domains looking for a spanning 1482 * If both cpu and prev_cpu are part of this domain,
1464 * WAKE_AFFINE domain, adjust the affine target to any idle cpu 1483 * cpu is a valid SD_WAKE_AFFINE target.
1465 * in cache sharing domains along the way.
1466 */ 1484 */
1467 if (want_affine) { 1485 if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
1468 int target = -1; 1486 cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
1469 1487 affine_sd = tmp;
1470 /* 1488 want_affine = 0;
1471 * If both cpu and prev_cpu are part of this domain,
1472 * cpu is a valid SD_WAKE_AFFINE target.
1473 */
1474 if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp)))
1475 target = cpu;
1476
1477 /*
1478 * If there's an idle sibling in this domain, make that
1479 * the wake_affine target instead of the current cpu.
1480 */
1481 if (!cpu_idle && tmp->flags & SD_SHARE_PKG_RESOURCES)
1482 target = select_idle_sibling(p, tmp, target);
1483
1484 if (target >= 0) {
1485 if (tmp->flags & SD_WAKE_AFFINE) {
1486 affine_sd = tmp;
1487 want_affine = 0;
1488 if (target != cpu)
1489 cpu_idle = 1;
1490 }
1491 cpu = target;
1492 }
1493 } 1489 }
1494 1490
1495 if (!want_sd && !want_affine) 1491 if (!want_sd && !want_affine)
@@ -1520,8 +1516,10 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
1520#endif 1516#endif
1521 1517
1522 if (affine_sd) { 1518 if (affine_sd) {
1523 if (cpu_idle || cpu == prev_cpu || wake_affine(affine_sd, p, sync)) 1519 if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
1524 return cpu; 1520 return select_idle_sibling(p, cpu);
1521 else
1522 return select_idle_sibling(p, prev_cpu);
1525 } 1523 }
1526 1524
1527 while (sd) { 1525 while (sd) {