aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c235
1 files changed, 59 insertions, 176 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index fb8994c6d4bb..f604dae71316 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -409,64 +409,6 @@ static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
409} 409}
410 410
411/* 411/*
412 * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in
413 * that it favours >=0 over <0.
414 *
415 * -20 |
416 * |
417 * 0 --------+-------
418 * .'
419 * 19 .'
420 *
421 */
422static unsigned long
423calc_delta_asym(unsigned long delta, struct sched_entity *se)
424{
425 struct load_weight lw = {
426 .weight = NICE_0_LOAD,
427 .inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT)
428 };
429
430 for_each_sched_entity(se) {
431 struct load_weight *se_lw = &se->load;
432 unsigned long rw = cfs_rq_of(se)->load.weight;
433
434#ifdef CONFIG_FAIR_SCHED_GROUP
435 struct cfs_rq *cfs_rq = se->my_q;
436 struct task_group *tg = NULL
437
438 if (cfs_rq)
439 tg = cfs_rq->tg;
440
441 if (tg && tg->shares < NICE_0_LOAD) {
442 /*
443 * scale shares to what it would have been had
444 * tg->weight been NICE_0_LOAD:
445 *
446 * weight = 1024 * shares / tg->weight
447 */
448 lw.weight *= se->load.weight;
449 lw.weight /= tg->shares;
450
451 lw.inv_weight = 0;
452
453 se_lw = &lw;
454 rw += lw.weight - se->load.weight;
455 } else
456#endif
457
458 if (se->load.weight < NICE_0_LOAD) {
459 se_lw = &lw;
460 rw += NICE_0_LOAD - se->load.weight;
461 }
462
463 delta = calc_delta_mine(delta, rw, se_lw);
464 }
465
466 return delta;
467}
468
469/*
470 * Update the current task's runtime statistics. Skip current tasks that 412 * Update the current task's runtime statistics. Skip current tasks that
471 * are not in our scheduling class. 413 * are not in our scheduling class.
472 */ 414 */
@@ -507,6 +449,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
507 struct task_struct *curtask = task_of(curr); 449 struct task_struct *curtask = task_of(curr);
508 450
509 cpuacct_charge(curtask, delta_exec); 451 cpuacct_charge(curtask, delta_exec);
452 account_group_exec_runtime(curtask, delta_exec);
510 } 453 }
511} 454}
512 455
@@ -586,11 +529,12 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
586 update_load_add(&cfs_rq->load, se->load.weight); 529 update_load_add(&cfs_rq->load, se->load.weight);
587 if (!parent_entity(se)) 530 if (!parent_entity(se))
588 inc_cpu_load(rq_of(cfs_rq), se->load.weight); 531 inc_cpu_load(rq_of(cfs_rq), se->load.weight);
589 if (entity_is_task(se)) 532 if (entity_is_task(se)) {
590 add_cfs_task_weight(cfs_rq, se->load.weight); 533 add_cfs_task_weight(cfs_rq, se->load.weight);
534 list_add(&se->group_node, &cfs_rq->tasks);
535 }
591 cfs_rq->nr_running++; 536 cfs_rq->nr_running++;
592 se->on_rq = 1; 537 se->on_rq = 1;
593 list_add(&se->group_node, &cfs_rq->tasks);
594} 538}
595 539
596static void 540static void
@@ -599,11 +543,12 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
599 update_load_sub(&cfs_rq->load, se->load.weight); 543 update_load_sub(&cfs_rq->load, se->load.weight);
600 if (!parent_entity(se)) 544 if (!parent_entity(se))
601 dec_cpu_load(rq_of(cfs_rq), se->load.weight); 545 dec_cpu_load(rq_of(cfs_rq), se->load.weight);
602 if (entity_is_task(se)) 546 if (entity_is_task(se)) {
603 add_cfs_task_weight(cfs_rq, -se->load.weight); 547 add_cfs_task_weight(cfs_rq, -se->load.weight);
548 list_del_init(&se->group_node);
549 }
604 cfs_rq->nr_running--; 550 cfs_rq->nr_running--;
605 se->on_rq = 0; 551 se->on_rq = 0;
606 list_del_init(&se->group_node);
607} 552}
608 553
609static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) 554static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -1085,7 +1030,6 @@ static long effective_load(struct task_group *tg, int cpu,
1085 long wl, long wg) 1030 long wl, long wg)
1086{ 1031{
1087 struct sched_entity *se = tg->se[cpu]; 1032 struct sched_entity *se = tg->se[cpu];
1088 long more_w;
1089 1033
1090 if (!tg->parent) 1034 if (!tg->parent)
1091 return wl; 1035 return wl;
@@ -1097,18 +1041,17 @@ static long effective_load(struct task_group *tg, int cpu,
1097 if (!wl && sched_feat(ASYM_EFF_LOAD)) 1041 if (!wl && sched_feat(ASYM_EFF_LOAD))
1098 return wl; 1042 return wl;
1099 1043
1100 /*
1101 * Instead of using this increment, also add the difference
1102 * between when the shares were last updated and now.
1103 */
1104 more_w = se->my_q->load.weight - se->my_q->rq_weight;
1105 wl += more_w;
1106 wg += more_w;
1107
1108 for_each_sched_entity(se) { 1044 for_each_sched_entity(se) {
1109#define D(n) (likely(n) ? (n) : 1)
1110
1111 long S, rw, s, a, b; 1045 long S, rw, s, a, b;
1046 long more_w;
1047
1048 /*
1049 * Instead of using this increment, also add the difference
1050 * between when the shares were last updated and now.
1051 */
1052 more_w = se->my_q->load.weight - se->my_q->rq_weight;
1053 wl += more_w;
1054 wg += more_w;
1112 1055
1113 S = se->my_q->tg->shares; 1056 S = se->my_q->tg->shares;
1114 s = se->my_q->shares; 1057 s = se->my_q->shares;
@@ -1117,7 +1060,11 @@ static long effective_load(struct task_group *tg, int cpu,
1117 a = S*(rw + wl); 1060 a = S*(rw + wl);
1118 b = S*rw + s*wg; 1061 b = S*rw + s*wg;
1119 1062
1120 wl = s*(a-b)/D(b); 1063 wl = s*(a-b);
1064
1065 if (likely(b))
1066 wl /= b;
1067
1121 /* 1068 /*
1122 * Assume the group is already running and will 1069 * Assume the group is already running and will
1123 * thus already be accounted for in the weight. 1070 * thus already be accounted for in the weight.
@@ -1126,7 +1073,6 @@ static long effective_load(struct task_group *tg, int cpu,
1126 * alter the group weight. 1073 * alter the group weight.
1127 */ 1074 */
1128 wg = 0; 1075 wg = 0;
1129#undef D
1130 } 1076 }
1131 1077
1132 return wl; 1078 return wl;
@@ -1143,7 +1089,7 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
1143#endif 1089#endif
1144 1090
1145static int 1091static int
1146wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, 1092wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1147 struct task_struct *p, int prev_cpu, int this_cpu, int sync, 1093 struct task_struct *p, int prev_cpu, int this_cpu, int sync,
1148 int idx, unsigned long load, unsigned long this_load, 1094 int idx, unsigned long load, unsigned long this_load,
1149 unsigned int imbalance) 1095 unsigned int imbalance)
@@ -1158,6 +1104,11 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
1158 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) 1104 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
1159 return 0; 1105 return 0;
1160 1106
1107 if (!sync && sched_feat(SYNC_WAKEUPS) &&
1108 curr->se.avg_overlap < sysctl_sched_migration_cost &&
1109 p->se.avg_overlap < sysctl_sched_migration_cost)
1110 sync = 1;
1111
1161 /* 1112 /*
1162 * If sync wakeup then subtract the (maximum possible) 1113 * If sync wakeup then subtract the (maximum possible)
1163 * effect of the currently running task from the load 1114 * effect of the currently running task from the load
@@ -1182,17 +1133,14 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
1182 * a reasonable amount of time then attract this newly 1133 * a reasonable amount of time then attract this newly
1183 * woken task: 1134 * woken task:
1184 */ 1135 */
1185 if (sync && balanced) { 1136 if (sync && balanced)
1186 if (curr->se.avg_overlap < sysctl_sched_migration_cost && 1137 return 1;
1187 p->se.avg_overlap < sysctl_sched_migration_cost)
1188 return 1;
1189 }
1190 1138
1191 schedstat_inc(p, se.nr_wakeups_affine_attempts); 1139 schedstat_inc(p, se.nr_wakeups_affine_attempts);
1192 tl_per_task = cpu_avg_load_per_task(this_cpu); 1140 tl_per_task = cpu_avg_load_per_task(this_cpu);
1193 1141
1194 if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) || 1142 if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <=
1195 balanced) { 1143 tl_per_task)) {
1196 /* 1144 /*
1197 * This domain has SD_WAKE_AFFINE and 1145 * This domain has SD_WAKE_AFFINE and
1198 * p is cache cold in this domain, and 1146 * p is cache cold in this domain, and
@@ -1211,16 +1159,17 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
1211 struct sched_domain *sd, *this_sd = NULL; 1159 struct sched_domain *sd, *this_sd = NULL;
1212 int prev_cpu, this_cpu, new_cpu; 1160 int prev_cpu, this_cpu, new_cpu;
1213 unsigned long load, this_load; 1161 unsigned long load, this_load;
1214 struct rq *rq, *this_rq; 1162 struct rq *this_rq;
1215 unsigned int imbalance; 1163 unsigned int imbalance;
1216 int idx; 1164 int idx;
1217 1165
1218 prev_cpu = task_cpu(p); 1166 prev_cpu = task_cpu(p);
1219 rq = task_rq(p);
1220 this_cpu = smp_processor_id(); 1167 this_cpu = smp_processor_id();
1221 this_rq = cpu_rq(this_cpu); 1168 this_rq = cpu_rq(this_cpu);
1222 new_cpu = prev_cpu; 1169 new_cpu = prev_cpu;
1223 1170
1171 if (prev_cpu == this_cpu)
1172 goto out;
1224 /* 1173 /*
1225 * 'this_sd' is the first domain that both 1174 * 'this_sd' is the first domain that both
1226 * this_cpu and prev_cpu are present in: 1175 * this_cpu and prev_cpu are present in:
@@ -1248,13 +1197,10 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
1248 load = source_load(prev_cpu, idx); 1197 load = source_load(prev_cpu, idx);
1249 this_load = target_load(this_cpu, idx); 1198 this_load = target_load(this_cpu, idx);
1250 1199
1251 if (wake_affine(rq, this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx, 1200 if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
1252 load, this_load, imbalance)) 1201 load, this_load, imbalance))
1253 return this_cpu; 1202 return this_cpu;
1254 1203
1255 if (prev_cpu == this_cpu)
1256 goto out;
1257
1258 /* 1204 /*
1259 * Start passive balancing when half the imbalance_pct 1205 * Start passive balancing when half the imbalance_pct
1260 * limit is reached. 1206 * limit is reached.
@@ -1281,62 +1227,20 @@ static unsigned long wakeup_gran(struct sched_entity *se)
1281 * + nice tasks. 1227 * + nice tasks.
1282 */ 1228 */
1283 if (sched_feat(ASYM_GRAN)) 1229 if (sched_feat(ASYM_GRAN))
1284 gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se); 1230 gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load);
1285 else
1286 gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
1287 1231
1288 return gran; 1232 return gran;
1289} 1233}
1290 1234
1291/* 1235/*
1292 * Should 'se' preempt 'curr'.
1293 *
1294 * |s1
1295 * |s2
1296 * |s3
1297 * g
1298 * |<--->|c
1299 *
1300 * w(c, s1) = -1
1301 * w(c, s2) = 0
1302 * w(c, s3) = 1
1303 *
1304 */
1305static int
1306wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
1307{
1308 s64 gran, vdiff = curr->vruntime - se->vruntime;
1309
1310 if (vdiff < 0)
1311 return -1;
1312
1313 gran = wakeup_gran(curr);
1314 if (vdiff > gran)
1315 return 1;
1316
1317 return 0;
1318}
1319
1320/* return depth at which a sched entity is present in the hierarchy */
1321static inline int depth_se(struct sched_entity *se)
1322{
1323 int depth = 0;
1324
1325 for_each_sched_entity(se)
1326 depth++;
1327
1328 return depth;
1329}
1330
1331/*
1332 * Preempt the current task with a newly woken task if needed: 1236 * Preempt the current task with a newly woken task if needed:
1333 */ 1237 */
1334static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) 1238static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
1335{ 1239{
1336 struct task_struct *curr = rq->curr; 1240 struct task_struct *curr = rq->curr;
1337 struct cfs_rq *cfs_rq = task_cfs_rq(curr); 1241 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1338 struct sched_entity *se = &curr->se, *pse = &p->se; 1242 struct sched_entity *se = &curr->se, *pse = &p->se;
1339 int se_depth, pse_depth; 1243 s64 delta_exec;
1340 1244
1341 if (unlikely(rt_prio(p->prio))) { 1245 if (unlikely(rt_prio(p->prio))) {
1342 update_rq_clock(rq); 1246 update_rq_clock(rq);
@@ -1351,6 +1255,13 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
1351 cfs_rq_of(pse)->next = pse; 1255 cfs_rq_of(pse)->next = pse;
1352 1256
1353 /* 1257 /*
1258 * We can come here with TIF_NEED_RESCHED already set from new task
1259 * wake up path.
1260 */
1261 if (test_tsk_need_resched(curr))
1262 return;
1263
1264 /*
1354 * Batch tasks do not preempt (their preemption is driven by 1265 * Batch tasks do not preempt (their preemption is driven by
1355 * the tick): 1266 * the tick):
1356 */ 1267 */
@@ -1360,33 +1271,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
1360 if (!sched_feat(WAKEUP_PREEMPT)) 1271 if (!sched_feat(WAKEUP_PREEMPT))
1361 return; 1272 return;
1362 1273
1363 /* 1274 if (sched_feat(WAKEUP_OVERLAP) && (sync ||
1364 * preemption test can be made between sibling entities who are in the 1275 (se->avg_overlap < sysctl_sched_migration_cost &&
1365 * same cfs_rq i.e who have a common parent. Walk up the hierarchy of 1276 pse->avg_overlap < sysctl_sched_migration_cost))) {
1366 * both tasks until we find their ancestors who are siblings of common 1277 resched_task(curr);
1367 * parent. 1278 return;
1368 */
1369
1370 /* First walk up until both entities are at same depth */
1371 se_depth = depth_se(se);
1372 pse_depth = depth_se(pse);
1373
1374 while (se_depth > pse_depth) {
1375 se_depth--;
1376 se = parent_entity(se);
1377 }
1378
1379 while (pse_depth > se_depth) {
1380 pse_depth--;
1381 pse = parent_entity(pse);
1382 }
1383
1384 while (!is_same_group(se, pse)) {
1385 se = parent_entity(se);
1386 pse = parent_entity(pse);
1387 } 1279 }
1388 1280
1389 if (wakeup_preempt_entity(se, pse) == 1) 1281 delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
1282 if (delta_exec > wakeup_gran(pse))
1390 resched_task(curr); 1283 resched_task(curr);
1391} 1284}
1392 1285
@@ -1445,19 +1338,9 @@ __load_balance_iterator(struct cfs_rq *cfs_rq, struct list_head *next)
1445 if (next == &cfs_rq->tasks) 1338 if (next == &cfs_rq->tasks)
1446 return NULL; 1339 return NULL;
1447 1340
1448 /* Skip over entities that are not tasks */ 1341 se = list_entry(next, struct sched_entity, group_node);
1449 do { 1342 p = task_of(se);
1450 se = list_entry(next, struct sched_entity, group_node); 1343 cfs_rq->balance_iterator = next->next;
1451 next = next->next;
1452 } while (next != &cfs_rq->tasks && !entity_is_task(se));
1453
1454 if (next == &cfs_rq->tasks)
1455 return NULL;
1456
1457 cfs_rq->balance_iterator = next;
1458
1459 if (entity_is_task(se))
1460 p = task_of(se);
1461 1344
1462 return p; 1345 return p;
1463} 1346}
@@ -1507,7 +1390,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1507 rcu_read_lock(); 1390 rcu_read_lock();
1508 update_h_load(busiest_cpu); 1391 update_h_load(busiest_cpu);
1509 1392
1510 list_for_each_entry(tg, &task_groups, list) { 1393 list_for_each_entry_rcu(tg, &task_groups, list) {
1511 struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu]; 1394 struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
1512 unsigned long busiest_h_load = busiest_cfs_rq->h_load; 1395 unsigned long busiest_h_load = busiest_cfs_rq->h_load;
1513 unsigned long busiest_weight = busiest_cfs_rq->load.weight; 1396 unsigned long busiest_weight = busiest_cfs_rq->load.weight;
@@ -1620,10 +1503,10 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
1620 * 'current' within the tree based on its new key value. 1503 * 'current' within the tree based on its new key value.
1621 */ 1504 */
1622 swap(curr->vruntime, se->vruntime); 1505 swap(curr->vruntime, se->vruntime);
1506 resched_task(rq->curr);
1623 } 1507 }
1624 1508
1625 enqueue_task_fair(rq, p, 0); 1509 enqueue_task_fair(rq, p, 0);
1626 resched_task(rq->curr);
1627} 1510}
1628 1511
1629/* 1512/*
@@ -1642,7 +1525,7 @@ static void prio_changed_fair(struct rq *rq, struct task_struct *p,
1642 if (p->prio > oldprio) 1525 if (p->prio > oldprio)
1643 resched_task(rq->curr); 1526 resched_task(rq->curr);
1644 } else 1527 } else
1645 check_preempt_curr(rq, p); 1528 check_preempt_curr(rq, p, 0);
1646} 1529}
1647 1530
1648/* 1531/*
@@ -1659,7 +1542,7 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p,
1659 if (running) 1542 if (running)
1660 resched_task(rq->curr); 1543 resched_task(rq->curr);
1661 else 1544 else
1662 check_preempt_curr(rq, p); 1545 check_preempt_curr(rq, p, 0);
1663} 1546}
1664 1547
1665/* Account for a task changing its policy or group. 1548/* Account for a task changing its policy or group.