aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c468
1 files changed, 298 insertions, 170 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index aa7f84121016..4e777b47eeda 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -384,10 +384,10 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
384 384
385#ifdef CONFIG_SCHED_DEBUG 385#ifdef CONFIG_SCHED_DEBUG
386int sched_nr_latency_handler(struct ctl_table *table, int write, 386int sched_nr_latency_handler(struct ctl_table *table, int write,
387 struct file *filp, void __user *buffer, size_t *lenp, 387 void __user *buffer, size_t *lenp,
388 loff_t *ppos) 388 loff_t *ppos)
389{ 389{
390 int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); 390 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
391 391
392 if (ret || !write) 392 if (ret || !write)
393 return ret; 393 return ret;
@@ -513,6 +513,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
513 if (entity_is_task(curr)) { 513 if (entity_is_task(curr)) {
514 struct task_struct *curtask = task_of(curr); 514 struct task_struct *curtask = task_of(curr);
515 515
516 trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
516 cpuacct_charge(curtask, delta_exec); 517 cpuacct_charge(curtask, delta_exec);
517 account_group_exec_runtime(curtask, delta_exec); 518 account_group_exec_runtime(curtask, delta_exec);
518 } 519 }
@@ -709,24 +710,28 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
709 if (initial && sched_feat(START_DEBIT)) 710 if (initial && sched_feat(START_DEBIT))
710 vruntime += sched_vslice(cfs_rq, se); 711 vruntime += sched_vslice(cfs_rq, se);
711 712
712 if (!initial) { 713 /* sleeps up to a single latency don't count. */
713 /* sleeps upto a single latency don't count. */ 714 if (!initial && sched_feat(FAIR_SLEEPERS)) {
714 if (sched_feat(NEW_FAIR_SLEEPERS)) { 715 unsigned long thresh = sysctl_sched_latency;
715 unsigned long thresh = sysctl_sched_latency;
716 716
717 /* 717 /*
718 * Convert the sleeper threshold into virtual time. 718 * Convert the sleeper threshold into virtual time.
719 * SCHED_IDLE is a special sub-class. We care about 719 * SCHED_IDLE is a special sub-class. We care about
720 * fairness only relative to other SCHED_IDLE tasks, 720 * fairness only relative to other SCHED_IDLE tasks,
721 * all of which have the same weight. 721 * all of which have the same weight.
722 */ 722 */
723 if (sched_feat(NORMALIZED_SLEEPER) && 723 if (sched_feat(NORMALIZED_SLEEPER) && (!entity_is_task(se) ||
724 (!entity_is_task(se) || 724 task_of(se)->policy != SCHED_IDLE))
725 task_of(se)->policy != SCHED_IDLE)) 725 thresh = calc_delta_fair(thresh, se);
726 thresh = calc_delta_fair(thresh, se);
727 726
728 vruntime -= thresh; 727 /*
729 } 728 * Halve their sleep time's effect, to allow
729 * for a gentler effect of sleepers:
730 */
731 if (sched_feat(GENTLE_FAIR_SLEEPERS))
732 thresh >>= 1;
733
734 vruntime -= thresh;
730 } 735 }
731 736
732 /* ensure we never gain time by being placed backwards. */ 737 /* ensure we never gain time by being placed backwards. */
@@ -757,10 +762,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
757 762
758static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) 763static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
759{ 764{
760 if (cfs_rq->last == se) 765 if (!se || cfs_rq->last == se)
761 cfs_rq->last = NULL; 766 cfs_rq->last = NULL;
762 767
763 if (cfs_rq->next == se) 768 if (!se || cfs_rq->next == se)
764 cfs_rq->next = NULL; 769 cfs_rq->next = NULL;
765} 770}
766 771
@@ -1062,83 +1067,6 @@ static void yield_task_fair(struct rq *rq)
1062 se->vruntime = rightmost->vruntime + 1; 1067 se->vruntime = rightmost->vruntime + 1;
1063} 1068}
1064 1069
1065/*
1066 * wake_idle() will wake a task on an idle cpu if task->cpu is
1067 * not idle and an idle cpu is available. The span of cpus to
1068 * search starts with cpus closest then further out as needed,
1069 * so we always favor a closer, idle cpu.
1070 * Domains may include CPUs that are not usable for migration,
1071 * hence we need to mask them out (rq->rd->online)
1072 *
1073 * Returns the CPU we should wake onto.
1074 */
1075#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
1076
1077#define cpu_rd_active(cpu, rq) cpumask_test_cpu(cpu, rq->rd->online)
1078
1079static int wake_idle(int cpu, struct task_struct *p)
1080{
1081 struct sched_domain *sd;
1082 int i;
1083 unsigned int chosen_wakeup_cpu;
1084 int this_cpu;
1085 struct rq *task_rq = task_rq(p);
1086
1087 /*
1088 * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu
1089 * are idle and this is not a kernel thread and this task's affinity
1090 * allows it to be moved to preferred cpu, then just move!
1091 */
1092
1093 this_cpu = smp_processor_id();
1094 chosen_wakeup_cpu =
1095 cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu;
1096
1097 if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP &&
1098 idle_cpu(cpu) && idle_cpu(this_cpu) &&
1099 p->mm && !(p->flags & PF_KTHREAD) &&
1100 cpu_isset(chosen_wakeup_cpu, p->cpus_allowed))
1101 return chosen_wakeup_cpu;
1102
1103 /*
1104 * If it is idle, then it is the best cpu to run this task.
1105 *
1106 * This cpu is also the best, if it has more than one task already.
1107 * Siblings must be also busy(in most cases) as they didn't already
1108 * pickup the extra load from this cpu and hence we need not check
1109 * sibling runqueue info. This will avoid the checks and cache miss
1110 * penalities associated with that.
1111 */
1112 if (idle_cpu(cpu) || cpu_rq(cpu)->cfs.nr_running > 1)
1113 return cpu;
1114
1115 for_each_domain(cpu, sd) {
1116 if ((sd->flags & SD_WAKE_IDLE)
1117 || ((sd->flags & SD_WAKE_IDLE_FAR)
1118 && !task_hot(p, task_rq->clock, sd))) {
1119 for_each_cpu_and(i, sched_domain_span(sd),
1120 &p->cpus_allowed) {
1121 if (cpu_rd_active(i, task_rq) && idle_cpu(i)) {
1122 if (i != task_cpu(p)) {
1123 schedstat_inc(p,
1124 se.nr_wakeups_idle);
1125 }
1126 return i;
1127 }
1128 }
1129 } else {
1130 break;
1131 }
1132 }
1133 return cpu;
1134}
1135#else /* !ARCH_HAS_SCHED_WAKE_IDLE*/
1136static inline int wake_idle(int cpu, struct task_struct *p)
1137{
1138 return cpu;
1139}
1140#endif
1141
1142#ifdef CONFIG_SMP 1070#ifdef CONFIG_SMP
1143 1071
1144#ifdef CONFIG_FAIR_GROUP_SCHED 1072#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -1225,25 +1153,34 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
1225 1153
1226#endif 1154#endif
1227 1155
1228static int 1156static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1229wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1230 struct task_struct *p, int prev_cpu, int this_cpu, int sync,
1231 int idx, unsigned long load, unsigned long this_load,
1232 unsigned int imbalance)
1233{ 1157{
1234 struct task_struct *curr = this_rq->curr; 1158 struct task_struct *curr = current;
1235 struct task_group *tg; 1159 unsigned long this_load, load;
1236 unsigned long tl = this_load; 1160 int idx, this_cpu, prev_cpu;
1237 unsigned long tl_per_task; 1161 unsigned long tl_per_task;
1162 unsigned int imbalance;
1163 struct task_group *tg;
1238 unsigned long weight; 1164 unsigned long weight;
1239 int balanced; 1165 int balanced;
1240 1166
1241 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) 1167 idx = sd->wake_idx;
1242 return 0; 1168 this_cpu = smp_processor_id();
1169 prev_cpu = task_cpu(p);
1170 load = source_load(prev_cpu, idx);
1171 this_load = target_load(this_cpu, idx);
1243 1172
1244 if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost || 1173 if (sync) {
1245 p->se.avg_overlap > sysctl_sched_migration_cost)) 1174 if (sched_feat(SYNC_LESS) &&
1246 sync = 0; 1175 (curr->se.avg_overlap > sysctl_sched_migration_cost ||
1176 p->se.avg_overlap > sysctl_sched_migration_cost))
1177 sync = 0;
1178 } else {
1179 if (sched_feat(SYNC_MORE) &&
1180 (curr->se.avg_overlap < sysctl_sched_migration_cost &&
1181 p->se.avg_overlap < sysctl_sched_migration_cost))
1182 sync = 1;
1183 }
1247 1184
1248 /* 1185 /*
1249 * If sync wakeup then subtract the (maximum possible) 1186 * If sync wakeup then subtract the (maximum possible)
@@ -1254,24 +1191,26 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1254 tg = task_group(current); 1191 tg = task_group(current);
1255 weight = current->se.load.weight; 1192 weight = current->se.load.weight;
1256 1193
1257 tl += effective_load(tg, this_cpu, -weight, -weight); 1194 this_load += effective_load(tg, this_cpu, -weight, -weight);
1258 load += effective_load(tg, prev_cpu, 0, -weight); 1195 load += effective_load(tg, prev_cpu, 0, -weight);
1259 } 1196 }
1260 1197
1261 tg = task_group(p); 1198 tg = task_group(p);
1262 weight = p->se.load.weight; 1199 weight = p->se.load.weight;
1263 1200
1201 imbalance = 100 + (sd->imbalance_pct - 100) / 2;
1202
1264 /* 1203 /*
1265 * In low-load situations, where prev_cpu is idle and this_cpu is idle 1204 * In low-load situations, where prev_cpu is idle and this_cpu is idle
1266 * due to the sync cause above having dropped tl to 0, we'll always have 1205 * due to the sync cause above having dropped this_load to 0, we'll
1267 * an imbalance, but there's really nothing you can do about that, so 1206 * always have an imbalance, but there's really nothing you can do
1268 * that's good too. 1207 * about that, so that's good too.
1269 * 1208 *
1270 * Otherwise check if either cpus are near enough in load to allow this 1209 * Otherwise check if either cpus are near enough in load to allow this
1271 * task to be woken on this_cpu. 1210 * task to be woken on this_cpu.
1272 */ 1211 */
1273 balanced = !tl || 1212 balanced = !this_load ||
1274 100*(tl + effective_load(tg, this_cpu, weight, weight)) <= 1213 100*(this_load + effective_load(tg, this_cpu, weight, weight)) <=
1275 imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); 1214 imbalance*(load + effective_load(tg, prev_cpu, 0, weight));
1276 1215
1277 /* 1216 /*
@@ -1285,14 +1224,15 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1285 schedstat_inc(p, se.nr_wakeups_affine_attempts); 1224 schedstat_inc(p, se.nr_wakeups_affine_attempts);
1286 tl_per_task = cpu_avg_load_per_task(this_cpu); 1225 tl_per_task = cpu_avg_load_per_task(this_cpu);
1287 1226
1288 if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <= 1227 if (balanced ||
1289 tl_per_task)) { 1228 (this_load <= load &&
1229 this_load + target_load(prev_cpu, idx) <= tl_per_task)) {
1290 /* 1230 /*
1291 * This domain has SD_WAKE_AFFINE and 1231 * This domain has SD_WAKE_AFFINE and
1292 * p is cache cold in this domain, and 1232 * p is cache cold in this domain, and
1293 * there is no bad imbalance. 1233 * there is no bad imbalance.
1294 */ 1234 */
1295 schedstat_inc(this_sd, ttwu_move_affine); 1235 schedstat_inc(sd, ttwu_move_affine);
1296 schedstat_inc(p, se.nr_wakeups_affine); 1236 schedstat_inc(p, se.nr_wakeups_affine);
1297 1237
1298 return 1; 1238 return 1;
@@ -1300,65 +1240,216 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1300 return 0; 1240 return 0;
1301} 1241}
1302 1242
1303static int select_task_rq_fair(struct task_struct *p, int sync) 1243/*
1244 * find_idlest_group finds and returns the least busy CPU group within the
1245 * domain.
1246 */
1247static struct sched_group *
1248find_idlest_group(struct sched_domain *sd, struct task_struct *p,
1249 int this_cpu, int load_idx)
1304{ 1250{
1305 struct sched_domain *sd, *this_sd = NULL; 1251 struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups;
1306 int prev_cpu, this_cpu, new_cpu; 1252 unsigned long min_load = ULONG_MAX, this_load = 0;
1307 unsigned long load, this_load; 1253 int imbalance = 100 + (sd->imbalance_pct-100)/2;
1308 struct rq *this_rq;
1309 unsigned int imbalance;
1310 int idx;
1311 1254
1312 prev_cpu = task_cpu(p); 1255 do {
1313 this_cpu = smp_processor_id(); 1256 unsigned long load, avg_load;
1314 this_rq = cpu_rq(this_cpu); 1257 int local_group;
1315 new_cpu = prev_cpu; 1258 int i;
1316 1259
1317 /* 1260 /* Skip over this group if it has no CPUs allowed */
1318 * 'this_sd' is the first domain that both 1261 if (!cpumask_intersects(sched_group_cpus(group),
1319 * this_cpu and prev_cpu are present in: 1262 &p->cpus_allowed))
1320 */ 1263 continue;
1321 for_each_domain(this_cpu, sd) { 1264
1322 if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) { 1265 local_group = cpumask_test_cpu(this_cpu,
1323 this_sd = sd; 1266 sched_group_cpus(group));
1324 break; 1267
1268 /* Tally up the load of all CPUs in the group */
1269 avg_load = 0;
1270
1271 for_each_cpu(i, sched_group_cpus(group)) {
1272 /* Bias balancing toward cpus of our domain */
1273 if (local_group)
1274 load = source_load(i, load_idx);
1275 else
1276 load = target_load(i, load_idx);
1277
1278 avg_load += load;
1279 }
1280
1281 /* Adjust by relative CPU power of the group */
1282 avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
1283
1284 if (local_group) {
1285 this_load = avg_load;
1286 this = group;
1287 } else if (avg_load < min_load) {
1288 min_load = avg_load;
1289 idlest = group;
1290 }
1291 } while (group = group->next, group != sd->groups);
1292
1293 if (!idlest || 100*this_load < imbalance*min_load)
1294 return NULL;
1295 return idlest;
1296}
1297
1298/*
1299 * find_idlest_cpu - find the idlest cpu among the cpus in group.
1300 */
1301static int
1302find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
1303{
1304 unsigned long load, min_load = ULONG_MAX;
1305 int idlest = -1;
1306 int i;
1307
1308 /* Traverse only the allowed CPUs */
1309 for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
1310 load = weighted_cpuload(i);
1311
1312 if (load < min_load || (load == min_load && i == this_cpu)) {
1313 min_load = load;
1314 idlest = i;
1325 } 1315 }
1326 } 1316 }
1327 1317
1328 if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed))) 1318 return idlest;
1329 goto out; 1319}
1330 1320
1331 /* 1321/*
1332 * Check for affine wakeup and passive balancing possibilities. 1322 * sched_balance_self: balance the current task (running on cpu) in domains
1333 */ 1323 * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
1334 if (!this_sd) 1324 * SD_BALANCE_EXEC.
1325 *
1326 * Balance, ie. select the least loaded group.
1327 *
1328 * Returns the target CPU number, or the same CPU if no balancing is needed.
1329 *
1330 * preempt must be disabled.
1331 */
1332static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
1333{
1334 struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
1335 int cpu = smp_processor_id();
1336 int prev_cpu = task_cpu(p);
1337 int new_cpu = cpu;
1338 int want_affine = 0;
1339 int want_sd = 1;
1340 int sync = wake_flags & WF_SYNC;
1341
1342 if (sd_flag & SD_BALANCE_WAKE) {
1343 if (sched_feat(AFFINE_WAKEUPS) &&
1344 cpumask_test_cpu(cpu, &p->cpus_allowed))
1345 want_affine = 1;
1346 new_cpu = prev_cpu;
1347 }
1348
1349 rcu_read_lock();
1350 for_each_domain(cpu, tmp) {
1351 /*
1352 * If power savings logic is enabled for a domain, see if we
1353 * are not overloaded, if so, don't balance wider.
1354 */
1355 if (tmp->flags & (SD_POWERSAVINGS_BALANCE|SD_PREFER_LOCAL)) {
1356 unsigned long power = 0;
1357 unsigned long nr_running = 0;
1358 unsigned long capacity;
1359 int i;
1360
1361 for_each_cpu(i, sched_domain_span(tmp)) {
1362 power += power_of(i);
1363 nr_running += cpu_rq(i)->cfs.nr_running;
1364 }
1365
1366 capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
1367
1368 if (tmp->flags & SD_POWERSAVINGS_BALANCE)
1369 nr_running /= 2;
1370
1371 if (nr_running < capacity)
1372 want_sd = 0;
1373 }
1374
1375 if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
1376 cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
1377
1378 affine_sd = tmp;
1379 want_affine = 0;
1380 }
1381
1382 if (!want_sd && !want_affine)
1383 break;
1384
1385 if (!(tmp->flags & sd_flag))
1386 continue;
1387
1388 if (want_sd)
1389 sd = tmp;
1390 }
1391
1392 if (sched_feat(LB_SHARES_UPDATE)) {
1393 /*
1394 * Pick the largest domain to update shares over
1395 */
1396 tmp = sd;
1397 if (affine_sd && (!tmp ||
1398 cpumask_weight(sched_domain_span(affine_sd)) >
1399 cpumask_weight(sched_domain_span(sd))))
1400 tmp = affine_sd;
1401
1402 if (tmp)
1403 update_shares(tmp);
1404 }
1405
1406 if (affine_sd && wake_affine(affine_sd, p, sync)) {
1407 new_cpu = cpu;
1335 goto out; 1408 goto out;
1409 }
1336 1410
1337 idx = this_sd->wake_idx; 1411 while (sd) {
1412 int load_idx = sd->forkexec_idx;
1413 struct sched_group *group;
1414 int weight;
1338 1415
1339 imbalance = 100 + (this_sd->imbalance_pct - 100) / 2; 1416 if (!(sd->flags & sd_flag)) {
1417 sd = sd->child;
1418 continue;
1419 }
1340 1420
1341 load = source_load(prev_cpu, idx); 1421 if (sd_flag & SD_BALANCE_WAKE)
1342 this_load = target_load(this_cpu, idx); 1422 load_idx = sd->wake_idx;
1343 1423
1344 if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx, 1424 group = find_idlest_group(sd, p, cpu, load_idx);
1345 load, this_load, imbalance)) 1425 if (!group) {
1346 return this_cpu; 1426 sd = sd->child;
1427 continue;
1428 }
1347 1429
1348 /* 1430 new_cpu = find_idlest_cpu(group, p, cpu);
1349 * Start passive balancing when half the imbalance_pct 1431 if (new_cpu == -1 || new_cpu == cpu) {
1350 * limit is reached. 1432 /* Now try balancing at a lower domain level of cpu */
1351 */ 1433 sd = sd->child;
1352 if (this_sd->flags & SD_WAKE_BALANCE) { 1434 continue;
1353 if (imbalance*this_load <= 100*load) { 1435 }
1354 schedstat_inc(this_sd, ttwu_move_balance); 1436
1355 schedstat_inc(p, se.nr_wakeups_passive); 1437 /* Now try balancing at a lower domain level of new_cpu */
1356 return this_cpu; 1438 cpu = new_cpu;
1439 weight = cpumask_weight(sched_domain_span(sd));
1440 sd = NULL;
1441 for_each_domain(cpu, tmp) {
1442 if (weight <= cpumask_weight(sched_domain_span(tmp)))
1443 break;
1444 if (tmp->flags & sd_flag)
1445 sd = tmp;
1357 } 1446 }
1447 /* while loop will break here if sd == NULL */
1358 } 1448 }
1359 1449
1360out: 1450out:
1361 return wake_idle(new_cpu, p); 1451 rcu_read_unlock();
1452 return new_cpu;
1362} 1453}
1363#endif /* CONFIG_SMP */ 1454#endif /* CONFIG_SMP */
1364 1455
@@ -1471,11 +1562,12 @@ static void set_next_buddy(struct sched_entity *se)
1471/* 1562/*
1472 * Preempt the current task with a newly woken task if needed: 1563 * Preempt the current task with a newly woken task if needed:
1473 */ 1564 */
1474static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) 1565static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
1475{ 1566{
1476 struct task_struct *curr = rq->curr; 1567 struct task_struct *curr = rq->curr;
1477 struct sched_entity *se = &curr->se, *pse = &p->se; 1568 struct sched_entity *se = &curr->se, *pse = &p->se;
1478 struct cfs_rq *cfs_rq = task_cfs_rq(curr); 1569 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1570 int sync = wake_flags & WF_SYNC;
1479 1571
1480 update_curr(cfs_rq); 1572 update_curr(cfs_rq);
1481 1573
@@ -1501,7 +1593,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
1501 */ 1593 */
1502 if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) 1594 if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle))
1503 set_last_buddy(se); 1595 set_last_buddy(se);
1504 set_next_buddy(pse); 1596 if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK))
1597 set_next_buddy(pse);
1505 1598
1506 /* 1599 /*
1507 * We can come here with TIF_NEED_RESCHED already set from new task 1600 * We can come here with TIF_NEED_RESCHED already set from new task
@@ -1523,16 +1616,25 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
1523 return; 1616 return;
1524 } 1617 }
1525 1618
1526 if (!sched_feat(WAKEUP_PREEMPT)) 1619 if ((sched_feat(WAKEUP_SYNC) && sync) ||
1527 return; 1620 (sched_feat(WAKEUP_OVERLAP) &&
1528 1621 (se->avg_overlap < sysctl_sched_migration_cost &&
1529 if (sched_feat(WAKEUP_OVERLAP) && (sync || 1622 pse->avg_overlap < sysctl_sched_migration_cost))) {
1530 (se->avg_overlap < sysctl_sched_migration_cost &&
1531 pse->avg_overlap < sysctl_sched_migration_cost))) {
1532 resched_task(curr); 1623 resched_task(curr);
1533 return; 1624 return;
1534 } 1625 }
1535 1626
1627 if (sched_feat(WAKEUP_RUNNING)) {
1628 if (pse->avg_running < se->avg_running) {
1629 set_next_buddy(pse);
1630 resched_task(curr);
1631 return;
1632 }
1633 }
1634
1635 if (!sched_feat(WAKEUP_PREEMPT))
1636 return;
1637
1536 find_matching_se(&se, &pse); 1638 find_matching_se(&se, &pse);
1537 1639
1538 BUG_ON(!pse); 1640 BUG_ON(!pse);
@@ -1555,8 +1657,13 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
1555 /* 1657 /*
1556 * If se was a buddy, clear it so that it will have to earn 1658 * If se was a buddy, clear it so that it will have to earn
1557 * the favour again. 1659 * the favour again.
1660 *
1661 * If se was not a buddy, clear the buddies because neither
1662 * was elegible to run, let them earn it again.
1663 *
1664 * IOW. unconditionally clear buddies.
1558 */ 1665 */
1559 __clear_buddies(cfs_rq, se); 1666 __clear_buddies(cfs_rq, NULL);
1560 set_next_entity(cfs_rq, se); 1667 set_next_entity(cfs_rq, se);
1561 cfs_rq = group_cfs_rq(se); 1668 cfs_rq = group_cfs_rq(se);
1562 } while (cfs_rq); 1669 } while (cfs_rq);
@@ -1832,6 +1939,25 @@ static void moved_group_fair(struct task_struct *p)
1832} 1939}
1833#endif 1940#endif
1834 1941
1942unsigned int get_rr_interval_fair(struct task_struct *task)
1943{
1944 struct sched_entity *se = &task->se;
1945 unsigned long flags;
1946 struct rq *rq;
1947 unsigned int rr_interval = 0;
1948
1949 /*
1950 * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise
1951 * idle runqueue:
1952 */
1953 rq = task_rq_lock(task, &flags);
1954 if (rq->cfs.load.weight)
1955 rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
1956 task_rq_unlock(rq, &flags);
1957
1958 return rr_interval;
1959}
1960
1835/* 1961/*
1836 * All the scheduling class methods: 1962 * All the scheduling class methods:
1837 */ 1963 */
@@ -1860,6 +1986,8 @@ static const struct sched_class fair_sched_class = {
1860 .prio_changed = prio_changed_fair, 1986 .prio_changed = prio_changed_fair,
1861 .switched_to = switched_to_fair, 1987 .switched_to = switched_to_fair,
1862 1988
1989 .get_rr_interval = get_rr_interval_fair,
1990
1863#ifdef CONFIG_FAIR_GROUP_SCHED 1991#ifdef CONFIG_FAIR_GROUP_SCHED
1864 .moved_group = moved_group_fair, 1992 .moved_group = moved_group_fair,
1865#endif 1993#endif