diff options
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 254 |
1 files changed, 91 insertions, 163 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index e24ecd39c4b8..08ae848b71d4 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -334,34 +334,6 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, | |||
334 | #endif | 334 | #endif |
335 | 335 | ||
336 | /* | 336 | /* |
337 | * delta *= w / rw | ||
338 | */ | ||
339 | static inline unsigned long | ||
340 | calc_delta_weight(unsigned long delta, struct sched_entity *se) | ||
341 | { | ||
342 | for_each_sched_entity(se) { | ||
343 | delta = calc_delta_mine(delta, | ||
344 | se->load.weight, &cfs_rq_of(se)->load); | ||
345 | } | ||
346 | |||
347 | return delta; | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * delta *= rw / w | ||
352 | */ | ||
353 | static inline unsigned long | ||
354 | calc_delta_fair(unsigned long delta, struct sched_entity *se) | ||
355 | { | ||
356 | for_each_sched_entity(se) { | ||
357 | delta = calc_delta_mine(delta, | ||
358 | cfs_rq_of(se)->load.weight, &se->load); | ||
359 | } | ||
360 | |||
361 | return delta; | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * The idea is to set a period in which each task runs once. | 337 | * The idea is to set a period in which each task runs once. |
366 | * | 338 | * |
367 | * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch | 339 | * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch |
@@ -390,54 +362,47 @@ static u64 __sched_period(unsigned long nr_running) | |||
390 | */ | 362 | */ |
391 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) | 363 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) |
392 | { | 364 | { |
393 | return calc_delta_weight(__sched_period(cfs_rq->nr_running), se); | 365 | u64 slice = __sched_period(cfs_rq->nr_running); |
366 | |||
367 | for_each_sched_entity(se) { | ||
368 | cfs_rq = cfs_rq_of(se); | ||
369 | |||
370 | slice *= se->load.weight; | ||
371 | do_div(slice, cfs_rq->load.weight); | ||
372 | } | ||
373 | |||
374 | |||
375 | return slice; | ||
394 | } | 376 | } |
395 | 377 | ||
396 | /* | 378 | /* |
397 | * We calculate the vruntime slice of a to be inserted task | 379 | * We calculate the vruntime slice of a to be inserted task |
398 | * | 380 | * |
399 | * vs = s*rw/w = p | 381 | * vs = s/w = p/rw |
400 | */ | 382 | */ |
401 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) | 383 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) |
402 | { | 384 | { |
403 | unsigned long nr_running = cfs_rq->nr_running; | 385 | unsigned long nr_running = cfs_rq->nr_running; |
386 | unsigned long weight; | ||
387 | u64 vslice; | ||
404 | 388 | ||
405 | if (!se->on_rq) | 389 | if (!se->on_rq) |
406 | nr_running++; | 390 | nr_running++; |
407 | 391 | ||
408 | return __sched_period(nr_running); | 392 | vslice = __sched_period(nr_running); |
409 | } | ||
410 | |||
411 | /* | ||
412 | * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in | ||
413 | * that it favours >=0 over <0. | ||
414 | * | ||
415 | * -20 | | ||
416 | * | | ||
417 | * 0 --------+------- | ||
418 | * .' | ||
419 | * 19 .' | ||
420 | * | ||
421 | */ | ||
422 | static unsigned long | ||
423 | calc_delta_asym(unsigned long delta, struct sched_entity *se) | ||
424 | { | ||
425 | struct load_weight lw = { | ||
426 | .weight = NICE_0_LOAD, | ||
427 | .inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT) | ||
428 | }; | ||
429 | 393 | ||
430 | for_each_sched_entity(se) { | 394 | for_each_sched_entity(se) { |
431 | struct load_weight *se_lw = &se->load; | 395 | cfs_rq = cfs_rq_of(se); |
432 | 396 | ||
433 | if (se->load.weight < NICE_0_LOAD) | 397 | weight = cfs_rq->load.weight; |
434 | se_lw = &lw; | 398 | if (!se->on_rq) |
399 | weight += se->load.weight; | ||
435 | 400 | ||
436 | delta = calc_delta_mine(delta, | 401 | vslice *= NICE_0_LOAD; |
437 | cfs_rq_of(se)->load.weight, se_lw); | 402 | do_div(vslice, weight); |
438 | } | 403 | } |
439 | 404 | ||
440 | return delta; | 405 | return vslice; |
441 | } | 406 | } |
442 | 407 | ||
443 | /* | 408 | /* |
@@ -454,7 +419,11 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |||
454 | 419 | ||
455 | curr->sum_exec_runtime += delta_exec; | 420 | curr->sum_exec_runtime += delta_exec; |
456 | schedstat_add(cfs_rq, exec_clock, delta_exec); | 421 | schedstat_add(cfs_rq, exec_clock, delta_exec); |
457 | delta_exec_weighted = calc_delta_fair(delta_exec, curr); | 422 | delta_exec_weighted = delta_exec; |
423 | if (unlikely(curr->load.weight != NICE_0_LOAD)) { | ||
424 | delta_exec_weighted = calc_delta_fair(delta_exec_weighted, | ||
425 | &curr->load); | ||
426 | } | ||
458 | curr->vruntime += delta_exec_weighted; | 427 | curr->vruntime += delta_exec_weighted; |
459 | } | 428 | } |
460 | 429 | ||
@@ -541,27 +510,10 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
541 | * Scheduling class queueing methods: | 510 | * Scheduling class queueing methods: |
542 | */ | 511 | */ |
543 | 512 | ||
544 | #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED | ||
545 | static void | ||
546 | add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight) | ||
547 | { | ||
548 | cfs_rq->task_weight += weight; | ||
549 | } | ||
550 | #else | ||
551 | static inline void | ||
552 | add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight) | ||
553 | { | ||
554 | } | ||
555 | #endif | ||
556 | |||
557 | static void | 513 | static void |
558 | account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) | 514 | account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) |
559 | { | 515 | { |
560 | update_load_add(&cfs_rq->load, se->load.weight); | 516 | update_load_add(&cfs_rq->load, se->load.weight); |
561 | if (!parent_entity(se)) | ||
562 | inc_cpu_load(rq_of(cfs_rq), se->load.weight); | ||
563 | if (entity_is_task(se)) | ||
564 | add_cfs_task_weight(cfs_rq, se->load.weight); | ||
565 | cfs_rq->nr_running++; | 517 | cfs_rq->nr_running++; |
566 | se->on_rq = 1; | 518 | se->on_rq = 1; |
567 | list_add(&se->group_node, &cfs_rq->tasks); | 519 | list_add(&se->group_node, &cfs_rq->tasks); |
@@ -571,10 +523,6 @@ static void | |||
571 | account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) | 523 | account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) |
572 | { | 524 | { |
573 | update_load_sub(&cfs_rq->load, se->load.weight); | 525 | update_load_sub(&cfs_rq->load, se->load.weight); |
574 | if (!parent_entity(se)) | ||
575 | dec_cpu_load(rq_of(cfs_rq), se->load.weight); | ||
576 | if (entity_is_task(se)) | ||
577 | add_cfs_task_weight(cfs_rq, -se->load.weight); | ||
578 | cfs_rq->nr_running--; | 526 | cfs_rq->nr_running--; |
579 | se->on_rq = 0; | 527 | se->on_rq = 0; |
580 | list_del_init(&se->group_node); | 528 | list_del_init(&se->group_node); |
@@ -661,17 +609,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
661 | 609 | ||
662 | if (!initial) { | 610 | if (!initial) { |
663 | /* sleeps upto a single latency don't count. */ | 611 | /* sleeps upto a single latency don't count. */ |
664 | if (sched_feat(NEW_FAIR_SLEEPERS)) { | 612 | if (sched_feat(NEW_FAIR_SLEEPERS)) |
665 | unsigned long thresh = sysctl_sched_latency; | 613 | vruntime -= sysctl_sched_latency; |
666 | |||
667 | /* | ||
668 | * convert the sleeper threshold into virtual time | ||
669 | */ | ||
670 | if (sched_feat(NORMALIZED_SLEEPER)) | ||
671 | thresh = calc_delta_fair(thresh, se); | ||
672 | |||
673 | vruntime -= thresh; | ||
674 | } | ||
675 | 614 | ||
676 | /* ensure we never gain time by being placed backwards. */ | 615 | /* ensure we never gain time by being placed backwards. */ |
677 | vruntime = max_vruntime(se->vruntime, vruntime); | 616 | vruntime = max_vruntime(se->vruntime, vruntime); |
@@ -1057,16 +996,27 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, | |||
1057 | struct task_struct *curr = this_rq->curr; | 996 | struct task_struct *curr = this_rq->curr; |
1058 | unsigned long tl = this_load; | 997 | unsigned long tl = this_load; |
1059 | unsigned long tl_per_task; | 998 | unsigned long tl_per_task; |
999 | int balanced; | ||
1060 | 1000 | ||
1061 | if (!(this_sd->flags & SD_WAKE_AFFINE)) | 1001 | if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) |
1062 | return 0; | 1002 | return 0; |
1063 | 1003 | ||
1064 | /* | 1004 | /* |
1005 | * If sync wakeup then subtract the (maximum possible) | ||
1006 | * effect of the currently running task from the load | ||
1007 | * of the current CPU: | ||
1008 | */ | ||
1009 | if (sync) | ||
1010 | tl -= current->se.load.weight; | ||
1011 | |||
1012 | balanced = 100*(tl + p->se.load.weight) <= imbalance*load; | ||
1013 | |||
1014 | /* | ||
1065 | * If the currently running task will sleep within | 1015 | * If the currently running task will sleep within |
1066 | * a reasonable amount of time then attract this newly | 1016 | * a reasonable amount of time then attract this newly |
1067 | * woken task: | 1017 | * woken task: |
1068 | */ | 1018 | */ |
1069 | if (sync && curr->sched_class == &fair_sched_class) { | 1019 | if (sync && balanced && curr->sched_class == &fair_sched_class) { |
1070 | if (curr->se.avg_overlap < sysctl_sched_migration_cost && | 1020 | if (curr->se.avg_overlap < sysctl_sched_migration_cost && |
1071 | p->se.avg_overlap < sysctl_sched_migration_cost) | 1021 | p->se.avg_overlap < sysctl_sched_migration_cost) |
1072 | return 1; | 1022 | return 1; |
@@ -1075,16 +1025,8 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, | |||
1075 | schedstat_inc(p, se.nr_wakeups_affine_attempts); | 1025 | schedstat_inc(p, se.nr_wakeups_affine_attempts); |
1076 | tl_per_task = cpu_avg_load_per_task(this_cpu); | 1026 | tl_per_task = cpu_avg_load_per_task(this_cpu); |
1077 | 1027 | ||
1078 | /* | ||
1079 | * If sync wakeup then subtract the (maximum possible) | ||
1080 | * effect of the currently running task from the load | ||
1081 | * of the current CPU: | ||
1082 | */ | ||
1083 | if (sync) | ||
1084 | tl -= current->se.load.weight; | ||
1085 | |||
1086 | if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) || | 1028 | if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) || |
1087 | 100*(tl + p->se.load.weight) <= imbalance*load) { | 1029 | balanced) { |
1088 | /* | 1030 | /* |
1089 | * This domain has SD_WAKE_AFFINE and | 1031 | * This domain has SD_WAKE_AFFINE and |
1090 | * p is cache cold in this domain, and | 1032 | * p is cache cold in this domain, and |
@@ -1169,10 +1111,11 @@ static unsigned long wakeup_gran(struct sched_entity *se) | |||
1169 | unsigned long gran = sysctl_sched_wakeup_granularity; | 1111 | unsigned long gran = sysctl_sched_wakeup_granularity; |
1170 | 1112 | ||
1171 | /* | 1113 | /* |
1172 | * More easily preempt - nice tasks, while not making it harder for | 1114 | * More easily preempt - nice tasks, while not making |
1173 | * + nice tasks. | 1115 | * it harder for + nice tasks. |
1174 | */ | 1116 | */ |
1175 | gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se); | 1117 | if (unlikely(se->load.weight > NICE_0_LOAD)) |
1118 | gran = calc_delta_fair(gran, &se->load); | ||
1176 | 1119 | ||
1177 | return gran; | 1120 | return gran; |
1178 | } | 1121 | } |
@@ -1366,90 +1309,75 @@ static struct task_struct *load_balance_next_fair(void *arg) | |||
1366 | return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator); | 1309 | return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator); |
1367 | } | 1310 | } |
1368 | 1311 | ||
1369 | static unsigned long | 1312 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1370 | __load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 1313 | static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) |
1371 | unsigned long max_load_move, struct sched_domain *sd, | ||
1372 | enum cpu_idle_type idle, int *all_pinned, int *this_best_prio, | ||
1373 | struct cfs_rq *cfs_rq) | ||
1374 | { | 1314 | { |
1375 | struct rq_iterator cfs_rq_iterator; | 1315 | struct sched_entity *curr; |
1316 | struct task_struct *p; | ||
1376 | 1317 | ||
1377 | cfs_rq_iterator.start = load_balance_start_fair; | 1318 | if (!cfs_rq->nr_running || !first_fair(cfs_rq)) |
1378 | cfs_rq_iterator.next = load_balance_next_fair; | 1319 | return MAX_PRIO; |
1379 | cfs_rq_iterator.arg = cfs_rq; | 1320 | |
1321 | curr = cfs_rq->curr; | ||
1322 | if (!curr) | ||
1323 | curr = __pick_next_entity(cfs_rq); | ||
1324 | |||
1325 | p = task_of(curr); | ||
1380 | 1326 | ||
1381 | return balance_tasks(this_rq, this_cpu, busiest, | 1327 | return p->prio; |
1382 | max_load_move, sd, idle, all_pinned, | ||
1383 | this_best_prio, &cfs_rq_iterator); | ||
1384 | } | 1328 | } |
1329 | #endif | ||
1385 | 1330 | ||
1386 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1387 | static unsigned long | 1331 | static unsigned long |
1388 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 1332 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
1389 | unsigned long max_load_move, | 1333 | unsigned long max_load_move, |
1390 | struct sched_domain *sd, enum cpu_idle_type idle, | 1334 | struct sched_domain *sd, enum cpu_idle_type idle, |
1391 | int *all_pinned, int *this_best_prio) | 1335 | int *all_pinned, int *this_best_prio) |
1392 | { | 1336 | { |
1337 | struct cfs_rq *busy_cfs_rq; | ||
1393 | long rem_load_move = max_load_move; | 1338 | long rem_load_move = max_load_move; |
1394 | int busiest_cpu = cpu_of(busiest); | 1339 | struct rq_iterator cfs_rq_iterator; |
1395 | struct task_group *tg; | ||
1396 | |||
1397 | rcu_read_lock(); | ||
1398 | list_for_each_entry(tg, &task_groups, list) { | ||
1399 | long imbalance; | ||
1400 | unsigned long this_weight, busiest_weight; | ||
1401 | long rem_load, max_load, moved_load; | ||
1402 | |||
1403 | /* | ||
1404 | * empty group | ||
1405 | */ | ||
1406 | if (!aggregate(tg, sd)->task_weight) | ||
1407 | continue; | ||
1408 | |||
1409 | rem_load = rem_load_move * aggregate(tg, sd)->rq_weight; | ||
1410 | rem_load /= aggregate(tg, sd)->load + 1; | ||
1411 | |||
1412 | this_weight = tg->cfs_rq[this_cpu]->task_weight; | ||
1413 | busiest_weight = tg->cfs_rq[busiest_cpu]->task_weight; | ||
1414 | 1340 | ||
1415 | imbalance = (busiest_weight - this_weight) / 2; | 1341 | cfs_rq_iterator.start = load_balance_start_fair; |
1342 | cfs_rq_iterator.next = load_balance_next_fair; | ||
1416 | 1343 | ||
1417 | if (imbalance < 0) | 1344 | for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { |
1418 | imbalance = busiest_weight; | 1345 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1346 | struct cfs_rq *this_cfs_rq; | ||
1347 | long imbalance; | ||
1348 | unsigned long maxload; | ||
1419 | 1349 | ||
1420 | max_load = max(rem_load, imbalance); | 1350 | this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu); |
1421 | moved_load = __load_balance_fair(this_rq, this_cpu, busiest, | ||
1422 | max_load, sd, idle, all_pinned, this_best_prio, | ||
1423 | tg->cfs_rq[busiest_cpu]); | ||
1424 | 1351 | ||
1425 | if (!moved_load) | 1352 | imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight; |
1353 | /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */ | ||
1354 | if (imbalance <= 0) | ||
1426 | continue; | 1355 | continue; |
1427 | 1356 | ||
1428 | move_group_shares(tg, sd, busiest_cpu, this_cpu); | 1357 | /* Don't pull more than imbalance/2 */ |
1358 | imbalance /= 2; | ||
1359 | maxload = min(rem_load_move, imbalance); | ||
1429 | 1360 | ||
1430 | moved_load *= aggregate(tg, sd)->load; | 1361 | *this_best_prio = cfs_rq_best_prio(this_cfs_rq); |
1431 | moved_load /= aggregate(tg, sd)->rq_weight + 1; | 1362 | #else |
1363 | # define maxload rem_load_move | ||
1364 | #endif | ||
1365 | /* | ||
1366 | * pass busy_cfs_rq argument into | ||
1367 | * load_balance_[start|next]_fair iterators | ||
1368 | */ | ||
1369 | cfs_rq_iterator.arg = busy_cfs_rq; | ||
1370 | rem_load_move -= balance_tasks(this_rq, this_cpu, busiest, | ||
1371 | maxload, sd, idle, all_pinned, | ||
1372 | this_best_prio, | ||
1373 | &cfs_rq_iterator); | ||
1432 | 1374 | ||
1433 | rem_load_move -= moved_load; | 1375 | if (rem_load_move <= 0) |
1434 | if (rem_load_move < 0) | ||
1435 | break; | 1376 | break; |
1436 | } | 1377 | } |
1437 | rcu_read_unlock(); | ||
1438 | 1378 | ||
1439 | return max_load_move - rem_load_move; | 1379 | return max_load_move - rem_load_move; |
1440 | } | 1380 | } |
1441 | #else | ||
1442 | static unsigned long | ||
1443 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
1444 | unsigned long max_load_move, | ||
1445 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
1446 | int *all_pinned, int *this_best_prio) | ||
1447 | { | ||
1448 | return __load_balance_fair(this_rq, this_cpu, busiest, | ||
1449 | max_load_move, sd, idle, all_pinned, | ||
1450 | this_best_prio, &busiest->cfs); | ||
1451 | } | ||
1452 | #endif | ||
1453 | 1381 | ||
1454 | static int | 1382 | static int |
1455 | move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 1383 | move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |