aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-09-19 05:27:32 -0400
committerIngo Molnar <mingo@elte.hu>2009-09-19 05:28:41 -0400
commit929bf0d0156562ce631728b6fa53d68004d456d2 (patch)
tree739063990a8077b29ef97e69d73bce94573daae4 /kernel/sched_fair.c
parentdef0a9b2573e00ab0b486cb5382625203ab4c4a6 (diff)
parent202c4675c55ddf6b443c7e057d2dff6b42ef71aa (diff)
Merge branch 'linus' into perfcounters/core
Merge reason: Bring in tracing changes we depend on. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c414
1 files changed, 261 insertions, 153 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a097e909e80f..990b188803ce 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -712,7 +712,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
712 712
713 if (!initial) { 713 if (!initial) {
714 /* sleeps upto a single latency don't count. */ 714 /* sleeps upto a single latency don't count. */
715 if (sched_feat(NEW_FAIR_SLEEPERS)) { 715 if (sched_feat(FAIR_SLEEPERS)) {
716 unsigned long thresh = sysctl_sched_latency; 716 unsigned long thresh = sysctl_sched_latency;
717 717
718 /* 718 /*
@@ -726,6 +726,13 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
726 task_of(se)->policy != SCHED_IDLE)) 726 task_of(se)->policy != SCHED_IDLE))
727 thresh = calc_delta_fair(thresh, se); 727 thresh = calc_delta_fair(thresh, se);
728 728
729 /*
730 * Halve their sleep time's effect, to allow
731 * for a gentler effect of sleepers:
732 */
733 if (sched_feat(GENTLE_FAIR_SLEEPERS))
734 thresh >>= 1;
735
729 vruntime -= thresh; 736 vruntime -= thresh;
730 } 737 }
731 } 738 }
@@ -758,10 +765,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
758 765
759static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) 766static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
760{ 767{
761 if (cfs_rq->last == se) 768 if (!se || cfs_rq->last == se)
762 cfs_rq->last = NULL; 769 cfs_rq->last = NULL;
763 770
764 if (cfs_rq->next == se) 771 if (!se || cfs_rq->next == se)
765 cfs_rq->next = NULL; 772 cfs_rq->next = NULL;
766} 773}
767 774
@@ -1063,83 +1070,6 @@ static void yield_task_fair(struct rq *rq)
1063 se->vruntime = rightmost->vruntime + 1; 1070 se->vruntime = rightmost->vruntime + 1;
1064} 1071}
1065 1072
1066/*
1067 * wake_idle() will wake a task on an idle cpu if task->cpu is
1068 * not idle and an idle cpu is available. The span of cpus to
1069 * search starts with cpus closest then further out as needed,
1070 * so we always favor a closer, idle cpu.
1071 * Domains may include CPUs that are not usable for migration,
1072 * hence we need to mask them out (rq->rd->online)
1073 *
1074 * Returns the CPU we should wake onto.
1075 */
1076#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
1077
1078#define cpu_rd_active(cpu, rq) cpumask_test_cpu(cpu, rq->rd->online)
1079
1080static int wake_idle(int cpu, struct task_struct *p)
1081{
1082 struct sched_domain *sd;
1083 int i;
1084 unsigned int chosen_wakeup_cpu;
1085 int this_cpu;
1086 struct rq *task_rq = task_rq(p);
1087
1088 /*
1089 * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu
1090 * are idle and this is not a kernel thread and this task's affinity
1091 * allows it to be moved to preferred cpu, then just move!
1092 */
1093
1094 this_cpu = smp_processor_id();
1095 chosen_wakeup_cpu =
1096 cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu;
1097
1098 if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP &&
1099 idle_cpu(cpu) && idle_cpu(this_cpu) &&
1100 p->mm && !(p->flags & PF_KTHREAD) &&
1101 cpu_isset(chosen_wakeup_cpu, p->cpus_allowed))
1102 return chosen_wakeup_cpu;
1103
1104 /*
1105 * If it is idle, then it is the best cpu to run this task.
1106 *
1107 * This cpu is also the best, if it has more than one task already.
1108 * Siblings must be also busy(in most cases) as they didn't already
1109 * pickup the extra load from this cpu and hence we need not check
1110 * sibling runqueue info. This will avoid the checks and cache miss
1111 * penalities associated with that.
1112 */
1113 if (idle_cpu(cpu) || cpu_rq(cpu)->cfs.nr_running > 1)
1114 return cpu;
1115
1116 for_each_domain(cpu, sd) {
1117 if ((sd->flags & SD_WAKE_IDLE)
1118 || ((sd->flags & SD_WAKE_IDLE_FAR)
1119 && !task_hot(p, task_rq->clock, sd))) {
1120 for_each_cpu_and(i, sched_domain_span(sd),
1121 &p->cpus_allowed) {
1122 if (cpu_rd_active(i, task_rq) && idle_cpu(i)) {
1123 if (i != task_cpu(p)) {
1124 schedstat_inc(p,
1125 se.nr_wakeups_idle);
1126 }
1127 return i;
1128 }
1129 }
1130 } else {
1131 break;
1132 }
1133 }
1134 return cpu;
1135}
1136#else /* !ARCH_HAS_SCHED_WAKE_IDLE*/
1137static inline int wake_idle(int cpu, struct task_struct *p)
1138{
1139 return cpu;
1140}
1141#endif
1142
1143#ifdef CONFIG_SMP 1073#ifdef CONFIG_SMP
1144 1074
1145#ifdef CONFIG_FAIR_GROUP_SCHED 1075#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -1226,25 +1156,34 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
1226 1156
1227#endif 1157#endif
1228 1158
1229static int 1159static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1230wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1231 struct task_struct *p, int prev_cpu, int this_cpu, int sync,
1232 int idx, unsigned long load, unsigned long this_load,
1233 unsigned int imbalance)
1234{ 1160{
1235 struct task_struct *curr = this_rq->curr; 1161 struct task_struct *curr = current;
1236 struct task_group *tg; 1162 unsigned long this_load, load;
1237 unsigned long tl = this_load; 1163 int idx, this_cpu, prev_cpu;
1238 unsigned long tl_per_task; 1164 unsigned long tl_per_task;
1165 unsigned int imbalance;
1166 struct task_group *tg;
1239 unsigned long weight; 1167 unsigned long weight;
1240 int balanced; 1168 int balanced;
1241 1169
1242 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) 1170 idx = sd->wake_idx;
1243 return 0; 1171 this_cpu = smp_processor_id();
1172 prev_cpu = task_cpu(p);
1173 load = source_load(prev_cpu, idx);
1174 this_load = target_load(this_cpu, idx);
1244 1175
1245 if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost || 1176 if (sync) {
1246 p->se.avg_overlap > sysctl_sched_migration_cost)) 1177 if (sched_feat(SYNC_LESS) &&
1247 sync = 0; 1178 (curr->se.avg_overlap > sysctl_sched_migration_cost ||
1179 p->se.avg_overlap > sysctl_sched_migration_cost))
1180 sync = 0;
1181 } else {
1182 if (sched_feat(SYNC_MORE) &&
1183 (curr->se.avg_overlap < sysctl_sched_migration_cost &&
1184 p->se.avg_overlap < sysctl_sched_migration_cost))
1185 sync = 1;
1186 }
1248 1187
1249 /* 1188 /*
1250 * If sync wakeup then subtract the (maximum possible) 1189 * If sync wakeup then subtract the (maximum possible)
@@ -1255,24 +1194,26 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1255 tg = task_group(current); 1194 tg = task_group(current);
1256 weight = current->se.load.weight; 1195 weight = current->se.load.weight;
1257 1196
1258 tl += effective_load(tg, this_cpu, -weight, -weight); 1197 this_load += effective_load(tg, this_cpu, -weight, -weight);
1259 load += effective_load(tg, prev_cpu, 0, -weight); 1198 load += effective_load(tg, prev_cpu, 0, -weight);
1260 } 1199 }
1261 1200
1262 tg = task_group(p); 1201 tg = task_group(p);
1263 weight = p->se.load.weight; 1202 weight = p->se.load.weight;
1264 1203
1204 imbalance = 100 + (sd->imbalance_pct - 100) / 2;
1205
1265 /* 1206 /*
1266 * In low-load situations, where prev_cpu is idle and this_cpu is idle 1207 * In low-load situations, where prev_cpu is idle and this_cpu is idle
1267 * due to the sync cause above having dropped tl to 0, we'll always have 1208 * due to the sync cause above having dropped this_load to 0, we'll
1268 * an imbalance, but there's really nothing you can do about that, so 1209 * always have an imbalance, but there's really nothing you can do
1269 * that's good too. 1210 * about that, so that's good too.
1270 * 1211 *
1271 * Otherwise check if either cpus are near enough in load to allow this 1212 * Otherwise check if either cpus are near enough in load to allow this
1272 * task to be woken on this_cpu. 1213 * task to be woken on this_cpu.
1273 */ 1214 */
1274 balanced = !tl || 1215 balanced = !this_load ||
1275 100*(tl + effective_load(tg, this_cpu, weight, weight)) <= 1216 100*(this_load + effective_load(tg, this_cpu, weight, weight)) <=
1276 imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); 1217 imbalance*(load + effective_load(tg, prev_cpu, 0, weight));
1277 1218
1278 /* 1219 /*
@@ -1286,14 +1227,15 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1286 schedstat_inc(p, se.nr_wakeups_affine_attempts); 1227 schedstat_inc(p, se.nr_wakeups_affine_attempts);
1287 tl_per_task = cpu_avg_load_per_task(this_cpu); 1228 tl_per_task = cpu_avg_load_per_task(this_cpu);
1288 1229
1289 if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <= 1230 if (balanced ||
1290 tl_per_task)) { 1231 (this_load <= load &&
1232 this_load + target_load(prev_cpu, idx) <= tl_per_task)) {
1291 /* 1233 /*
1292 * This domain has SD_WAKE_AFFINE and 1234 * This domain has SD_WAKE_AFFINE and
1293 * p is cache cold in this domain, and 1235 * p is cache cold in this domain, and
1294 * there is no bad imbalance. 1236 * there is no bad imbalance.
1295 */ 1237 */
1296 schedstat_inc(this_sd, ttwu_move_affine); 1238 schedstat_inc(sd, ttwu_move_affine);
1297 schedstat_inc(p, se.nr_wakeups_affine); 1239 schedstat_inc(p, se.nr_wakeups_affine);
1298 1240
1299 return 1; 1241 return 1;
@@ -1301,65 +1243,215 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1301 return 0; 1243 return 0;
1302} 1244}
1303 1245
1304static int select_task_rq_fair(struct task_struct *p, int sync) 1246/*
1247 * find_idlest_group finds and returns the least busy CPU group within the
1248 * domain.
1249 */
1250static struct sched_group *
1251find_idlest_group(struct sched_domain *sd, struct task_struct *p,
1252 int this_cpu, int load_idx)
1305{ 1253{
1306 struct sched_domain *sd, *this_sd = NULL; 1254 struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups;
1307 int prev_cpu, this_cpu, new_cpu; 1255 unsigned long min_load = ULONG_MAX, this_load = 0;
1308 unsigned long load, this_load; 1256 int imbalance = 100 + (sd->imbalance_pct-100)/2;
1309 struct rq *this_rq;
1310 unsigned int imbalance;
1311 int idx;
1312 1257
1313 prev_cpu = task_cpu(p); 1258 do {
1314 this_cpu = smp_processor_id(); 1259 unsigned long load, avg_load;
1315 this_rq = cpu_rq(this_cpu); 1260 int local_group;
1316 new_cpu = prev_cpu; 1261 int i;
1317 1262
1318 /* 1263 /* Skip over this group if it has no CPUs allowed */
1319 * 'this_sd' is the first domain that both 1264 if (!cpumask_intersects(sched_group_cpus(group),
1320 * this_cpu and prev_cpu are present in: 1265 &p->cpus_allowed))
1321 */ 1266 continue;
1322 for_each_domain(this_cpu, sd) { 1267
1323 if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) { 1268 local_group = cpumask_test_cpu(this_cpu,
1324 this_sd = sd; 1269 sched_group_cpus(group));
1325 break; 1270
1271 /* Tally up the load of all CPUs in the group */
1272 avg_load = 0;
1273
1274 for_each_cpu(i, sched_group_cpus(group)) {
1275 /* Bias balancing toward cpus of our domain */
1276 if (local_group)
1277 load = source_load(i, load_idx);
1278 else
1279 load = target_load(i, load_idx);
1280
1281 avg_load += load;
1282 }
1283
1284 /* Adjust by relative CPU power of the group */
1285 avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
1286
1287 if (local_group) {
1288 this_load = avg_load;
1289 this = group;
1290 } else if (avg_load < min_load) {
1291 min_load = avg_load;
1292 idlest = group;
1293 }
1294 } while (group = group->next, group != sd->groups);
1295
1296 if (!idlest || 100*this_load < imbalance*min_load)
1297 return NULL;
1298 return idlest;
1299}
1300
1301/*
1302 * find_idlest_cpu - find the idlest cpu among the cpus in group.
1303 */
1304static int
1305find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
1306{
1307 unsigned long load, min_load = ULONG_MAX;
1308 int idlest = -1;
1309 int i;
1310
1311 /* Traverse only the allowed CPUs */
1312 for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
1313 load = weighted_cpuload(i);
1314
1315 if (load < min_load || (load == min_load && i == this_cpu)) {
1316 min_load = load;
1317 idlest = i;
1326 } 1318 }
1327 } 1319 }
1328 1320
1329 if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed))) 1321 return idlest;
1330 goto out; 1322}
1331 1323
1332 /* 1324/*
1333 * Check for affine wakeup and passive balancing possibilities. 1325 * sched_balance_self: balance the current task (running on cpu) in domains
1334 */ 1326 * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
1335 if (!this_sd) 1327 * SD_BALANCE_EXEC.
1328 *
1329 * Balance, ie. select the least loaded group.
1330 *
1331 * Returns the target CPU number, or the same CPU if no balancing is needed.
1332 *
1333 * preempt must be disabled.
1334 */
1335static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
1336{
1337 struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
1338 int cpu = smp_processor_id();
1339 int prev_cpu = task_cpu(p);
1340 int new_cpu = cpu;
1341 int want_affine = 0;
1342 int want_sd = 1;
1343 int sync = wake_flags & WF_SYNC;
1344
1345 if (sd_flag & SD_BALANCE_WAKE) {
1346 if (sched_feat(AFFINE_WAKEUPS))
1347 want_affine = 1;
1348 new_cpu = prev_cpu;
1349 }
1350
1351 rcu_read_lock();
1352 for_each_domain(cpu, tmp) {
1353 /*
1354 * If power savings logic is enabled for a domain, see if we
1355 * are not overloaded, if so, don't balance wider.
1356 */
1357 if (tmp->flags & (SD_POWERSAVINGS_BALANCE|SD_PREFER_LOCAL)) {
1358 unsigned long power = 0;
1359 unsigned long nr_running = 0;
1360 unsigned long capacity;
1361 int i;
1362
1363 for_each_cpu(i, sched_domain_span(tmp)) {
1364 power += power_of(i);
1365 nr_running += cpu_rq(i)->cfs.nr_running;
1366 }
1367
1368 capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
1369
1370 if (tmp->flags & SD_POWERSAVINGS_BALANCE)
1371 nr_running /= 2;
1372
1373 if (nr_running < capacity)
1374 want_sd = 0;
1375 }
1376
1377 if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
1378 cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
1379
1380 affine_sd = tmp;
1381 want_affine = 0;
1382 }
1383
1384 if (!want_sd && !want_affine)
1385 break;
1386
1387 if (!(tmp->flags & sd_flag))
1388 continue;
1389
1390 if (want_sd)
1391 sd = tmp;
1392 }
1393
1394 if (sched_feat(LB_SHARES_UPDATE)) {
1395 /*
1396 * Pick the largest domain to update shares over
1397 */
1398 tmp = sd;
1399 if (affine_sd && (!tmp ||
1400 cpumask_weight(sched_domain_span(affine_sd)) >
1401 cpumask_weight(sched_domain_span(sd))))
1402 tmp = affine_sd;
1403
1404 if (tmp)
1405 update_shares(tmp);
1406 }
1407
1408 if (affine_sd && wake_affine(affine_sd, p, sync)) {
1409 new_cpu = cpu;
1336 goto out; 1410 goto out;
1411 }
1337 1412
1338 idx = this_sd->wake_idx; 1413 while (sd) {
1414 int load_idx = sd->forkexec_idx;
1415 struct sched_group *group;
1416 int weight;
1339 1417
1340 imbalance = 100 + (this_sd->imbalance_pct - 100) / 2; 1418 if (!(sd->flags & sd_flag)) {
1419 sd = sd->child;
1420 continue;
1421 }
1341 1422
1342 load = source_load(prev_cpu, idx); 1423 if (sd_flag & SD_BALANCE_WAKE)
1343 this_load = target_load(this_cpu, idx); 1424 load_idx = sd->wake_idx;
1344 1425
1345 if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx, 1426 group = find_idlest_group(sd, p, cpu, load_idx);
1346 load, this_load, imbalance)) 1427 if (!group) {
1347 return this_cpu; 1428 sd = sd->child;
1429 continue;
1430 }
1348 1431
1349 /* 1432 new_cpu = find_idlest_cpu(group, p, cpu);
1350 * Start passive balancing when half the imbalance_pct 1433 if (new_cpu == -1 || new_cpu == cpu) {
1351 * limit is reached. 1434 /* Now try balancing at a lower domain level of cpu */
1352 */ 1435 sd = sd->child;
1353 if (this_sd->flags & SD_WAKE_BALANCE) { 1436 continue;
1354 if (imbalance*this_load <= 100*load) {
1355 schedstat_inc(this_sd, ttwu_move_balance);
1356 schedstat_inc(p, se.nr_wakeups_passive);
1357 return this_cpu;
1358 } 1437 }
1438
1439 /* Now try balancing at a lower domain level of new_cpu */
1440 cpu = new_cpu;
1441 weight = cpumask_weight(sched_domain_span(sd));
1442 sd = NULL;
1443 for_each_domain(cpu, tmp) {
1444 if (weight <= cpumask_weight(sched_domain_span(tmp)))
1445 break;
1446 if (tmp->flags & sd_flag)
1447 sd = tmp;
1448 }
1449 /* while loop will break here if sd == NULL */
1359 } 1450 }
1360 1451
1361out: 1452out:
1362 return wake_idle(new_cpu, p); 1453 rcu_read_unlock();
1454 return new_cpu;
1363} 1455}
1364#endif /* CONFIG_SMP */ 1456#endif /* CONFIG_SMP */
1365 1457
@@ -1472,11 +1564,12 @@ static void set_next_buddy(struct sched_entity *se)
1472/* 1564/*
1473 * Preempt the current task with a newly woken task if needed: 1565 * Preempt the current task with a newly woken task if needed:
1474 */ 1566 */
1475static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) 1567static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
1476{ 1568{
1477 struct task_struct *curr = rq->curr; 1569 struct task_struct *curr = rq->curr;
1478 struct sched_entity *se = &curr->se, *pse = &p->se; 1570 struct sched_entity *se = &curr->se, *pse = &p->se;
1479 struct cfs_rq *cfs_rq = task_cfs_rq(curr); 1571 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1572 int sync = wake_flags & WF_SYNC;
1480 1573
1481 update_curr(cfs_rq); 1574 update_curr(cfs_rq);
1482 1575
@@ -1502,7 +1595,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
1502 */ 1595 */
1503 if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) 1596 if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle))
1504 set_last_buddy(se); 1597 set_last_buddy(se);
1505 set_next_buddy(pse); 1598 if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK))
1599 set_next_buddy(pse);
1506 1600
1507 /* 1601 /*
1508 * We can come here with TIF_NEED_RESCHED already set from new task 1602 * We can come here with TIF_NEED_RESCHED already set from new task
@@ -1524,16 +1618,25 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
1524 return; 1618 return;
1525 } 1619 }
1526 1620
1527 if (!sched_feat(WAKEUP_PREEMPT)) 1621 if ((sched_feat(WAKEUP_SYNC) && sync) ||
1528 return; 1622 (sched_feat(WAKEUP_OVERLAP) &&
1529 1623 (se->avg_overlap < sysctl_sched_migration_cost &&
1530 if (sched_feat(WAKEUP_OVERLAP) && (sync || 1624 pse->avg_overlap < sysctl_sched_migration_cost))) {
1531 (se->avg_overlap < sysctl_sched_migration_cost &&
1532 pse->avg_overlap < sysctl_sched_migration_cost))) {
1533 resched_task(curr); 1625 resched_task(curr);
1534 return; 1626 return;
1535 } 1627 }
1536 1628
1629 if (sched_feat(WAKEUP_RUNNING)) {
1630 if (pse->avg_running < se->avg_running) {
1631 set_next_buddy(pse);
1632 resched_task(curr);
1633 return;
1634 }
1635 }
1636
1637 if (!sched_feat(WAKEUP_PREEMPT))
1638 return;
1639
1537 find_matching_se(&se, &pse); 1640 find_matching_se(&se, &pse);
1538 1641
1539 BUG_ON(!pse); 1642 BUG_ON(!pse);
@@ -1556,8 +1659,13 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
1556 /* 1659 /*
1557 * If se was a buddy, clear it so that it will have to earn 1660 * If se was a buddy, clear it so that it will have to earn
1558 * the favour again. 1661 * the favour again.
1662 *
1663 * If se was not a buddy, clear the buddies because neither
1664 * was elegible to run, let them earn it again.
1665 *
1666 * IOW. unconditionally clear buddies.
1559 */ 1667 */
1560 __clear_buddies(cfs_rq, se); 1668 __clear_buddies(cfs_rq, NULL);
1561 set_next_entity(cfs_rq, se); 1669 set_next_entity(cfs_rq, se);
1562 cfs_rq = group_cfs_rq(se); 1670 cfs_rq = group_cfs_rq(se);
1563 } while (cfs_rq); 1671 } while (cfs_rq);