diff options
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 99 |
1 files changed, 93 insertions, 6 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 76411950ff3b..5a2089492a98 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -706,6 +706,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
706 | } | 706 | } |
707 | 707 | ||
708 | #ifdef CONFIG_FAIR_GROUP_SCHED | 708 | #ifdef CONFIG_FAIR_GROUP_SCHED |
709 | /* we need this in update_cfs_load and load-balance functions below */ | ||
710 | static inline int throttled_hierarchy(struct cfs_rq *cfs_rq); | ||
709 | # ifdef CONFIG_SMP | 711 | # ifdef CONFIG_SMP |
710 | static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, | 712 | static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, |
711 | int global_update) | 713 | int global_update) |
@@ -728,7 +730,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
728 | u64 now, delta; | 730 | u64 now, delta; |
729 | unsigned long load = cfs_rq->load.weight; | 731 | unsigned long load = cfs_rq->load.weight; |
730 | 732 | ||
731 | if (cfs_rq->tg == &root_task_group) | 733 | if (cfs_rq->tg == &root_task_group || throttled_hierarchy(cfs_rq)) |
732 | return; | 734 | return; |
733 | 735 | ||
734 | now = rq_of(cfs_rq)->clock_task; | 736 | now = rq_of(cfs_rq)->clock_task; |
@@ -837,7 +839,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq) | |||
837 | 839 | ||
838 | tg = cfs_rq->tg; | 840 | tg = cfs_rq->tg; |
839 | se = tg->se[cpu_of(rq_of(cfs_rq))]; | 841 | se = tg->se[cpu_of(rq_of(cfs_rq))]; |
840 | if (!se) | 842 | if (!se || throttled_hierarchy(cfs_rq)) |
841 | return; | 843 | return; |
842 | #ifndef CONFIG_SMP | 844 | #ifndef CONFIG_SMP |
843 | if (likely(se->load.weight == tg->shares)) | 845 | if (likely(se->load.weight == tg->shares)) |
@@ -1403,6 +1405,65 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) | |||
1403 | return cfs_rq->throttled; | 1405 | return cfs_rq->throttled; |
1404 | } | 1406 | } |
1405 | 1407 | ||
1408 | /* check whether cfs_rq, or any parent, is throttled */ | ||
1409 | static inline int throttled_hierarchy(struct cfs_rq *cfs_rq) | ||
1410 | { | ||
1411 | return cfs_rq->throttle_count; | ||
1412 | } | ||
1413 | |||
1414 | /* | ||
1415 | * Ensure that neither of the group entities corresponding to src_cpu or | ||
1416 | * dest_cpu are members of a throttled hierarchy when performing group | ||
1417 | * load-balance operations. | ||
1418 | */ | ||
1419 | static inline int throttled_lb_pair(struct task_group *tg, | ||
1420 | int src_cpu, int dest_cpu) | ||
1421 | { | ||
1422 | struct cfs_rq *src_cfs_rq, *dest_cfs_rq; | ||
1423 | |||
1424 | src_cfs_rq = tg->cfs_rq[src_cpu]; | ||
1425 | dest_cfs_rq = tg->cfs_rq[dest_cpu]; | ||
1426 | |||
1427 | return throttled_hierarchy(src_cfs_rq) || | ||
1428 | throttled_hierarchy(dest_cfs_rq); | ||
1429 | } | ||
1430 | |||
1431 | /* updated child weight may affect parent so we have to do this bottom up */ | ||
1432 | static int tg_unthrottle_up(struct task_group *tg, void *data) | ||
1433 | { | ||
1434 | struct rq *rq = data; | ||
1435 | struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)]; | ||
1436 | |||
1437 | cfs_rq->throttle_count--; | ||
1438 | #ifdef CONFIG_SMP | ||
1439 | if (!cfs_rq->throttle_count) { | ||
1440 | u64 delta = rq->clock_task - cfs_rq->load_stamp; | ||
1441 | |||
1442 | /* leaving throttled state, advance shares averaging windows */ | ||
1443 | cfs_rq->load_stamp += delta; | ||
1444 | cfs_rq->load_last += delta; | ||
1445 | |||
1446 | /* update entity weight now that we are on_rq again */ | ||
1447 | update_cfs_shares(cfs_rq); | ||
1448 | } | ||
1449 | #endif | ||
1450 | |||
1451 | return 0; | ||
1452 | } | ||
1453 | |||
1454 | static int tg_throttle_down(struct task_group *tg, void *data) | ||
1455 | { | ||
1456 | struct rq *rq = data; | ||
1457 | struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)]; | ||
1458 | |||
1459 | /* group is entering throttled state, record last load */ | ||
1460 | if (!cfs_rq->throttle_count) | ||
1461 | update_cfs_load(cfs_rq, 0); | ||
1462 | cfs_rq->throttle_count++; | ||
1463 | |||
1464 | return 0; | ||
1465 | } | ||
1466 | |||
1406 | static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq) | 1467 | static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq) |
1407 | { | 1468 | { |
1408 | struct rq *rq = rq_of(cfs_rq); | 1469 | struct rq *rq = rq_of(cfs_rq); |
@@ -1413,7 +1474,9 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq) | |||
1413 | se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; | 1474 | se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; |
1414 | 1475 | ||
1415 | /* account load preceding throttle */ | 1476 | /* account load preceding throttle */ |
1416 | update_cfs_load(cfs_rq, 0); | 1477 | rcu_read_lock(); |
1478 | walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq); | ||
1479 | rcu_read_unlock(); | ||
1417 | 1480 | ||
1418 | task_delta = cfs_rq->h_nr_running; | 1481 | task_delta = cfs_rq->h_nr_running; |
1419 | for_each_sched_entity(se) { | 1482 | for_each_sched_entity(se) { |
@@ -1454,6 +1517,10 @@ static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) | |||
1454 | list_del_rcu(&cfs_rq->throttled_list); | 1517 | list_del_rcu(&cfs_rq->throttled_list); |
1455 | raw_spin_unlock(&cfs_b->lock); | 1518 | raw_spin_unlock(&cfs_b->lock); |
1456 | 1519 | ||
1520 | update_rq_clock(rq); | ||
1521 | /* update hierarchical throttle state */ | ||
1522 | walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq); | ||
1523 | |||
1457 | if (!cfs_rq->load.weight) | 1524 | if (!cfs_rq->load.weight) |
1458 | return; | 1525 | return; |
1459 | 1526 | ||
@@ -1598,6 +1665,17 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) | |||
1598 | { | 1665 | { |
1599 | return 0; | 1666 | return 0; |
1600 | } | 1667 | } |
1668 | |||
1669 | static inline int throttled_hierarchy(struct cfs_rq *cfs_rq) | ||
1670 | { | ||
1671 | return 0; | ||
1672 | } | ||
1673 | |||
1674 | static inline int throttled_lb_pair(struct task_group *tg, | ||
1675 | int src_cpu, int dest_cpu) | ||
1676 | { | ||
1677 | return 0; | ||
1678 | } | ||
1601 | #endif | 1679 | #endif |
1602 | 1680 | ||
1603 | /************************************************** | 1681 | /************************************************** |
@@ -2493,6 +2571,9 @@ move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2493 | 2571 | ||
2494 | for_each_leaf_cfs_rq(busiest, cfs_rq) { | 2572 | for_each_leaf_cfs_rq(busiest, cfs_rq) { |
2495 | list_for_each_entry_safe(p, n, &cfs_rq->tasks, se.group_node) { | 2573 | list_for_each_entry_safe(p, n, &cfs_rq->tasks, se.group_node) { |
2574 | if (throttled_lb_pair(task_group(p), | ||
2575 | busiest->cpu, this_cpu)) | ||
2576 | break; | ||
2496 | 2577 | ||
2497 | if (!can_migrate_task(p, busiest, this_cpu, | 2578 | if (!can_migrate_task(p, busiest, this_cpu, |
2498 | sd, idle, &pinned)) | 2579 | sd, idle, &pinned)) |
@@ -2608,8 +2689,13 @@ static void update_shares(int cpu) | |||
2608 | * Iterates the task_group tree in a bottom up fashion, see | 2689 | * Iterates the task_group tree in a bottom up fashion, see |
2609 | * list_add_leaf_cfs_rq() for details. | 2690 | * list_add_leaf_cfs_rq() for details. |
2610 | */ | 2691 | */ |
2611 | for_each_leaf_cfs_rq(rq, cfs_rq) | 2692 | for_each_leaf_cfs_rq(rq, cfs_rq) { |
2693 | /* throttled entities do not contribute to load */ | ||
2694 | if (throttled_hierarchy(cfs_rq)) | ||
2695 | continue; | ||
2696 | |||
2612 | update_shares_cpu(cfs_rq->tg, cpu); | 2697 | update_shares_cpu(cfs_rq->tg, cpu); |
2698 | } | ||
2613 | rcu_read_unlock(); | 2699 | rcu_read_unlock(); |
2614 | } | 2700 | } |
2615 | 2701 | ||
@@ -2659,9 +2745,10 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2659 | u64 rem_load, moved_load; | 2745 | u64 rem_load, moved_load; |
2660 | 2746 | ||
2661 | /* | 2747 | /* |
2662 | * empty group | 2748 | * empty group or part of a throttled hierarchy |
2663 | */ | 2749 | */ |
2664 | if (!busiest_cfs_rq->task_weight) | 2750 | if (!busiest_cfs_rq->task_weight || |
2751 | throttled_lb_pair(busiest_cfs_rq->tg, cpu_of(busiest), this_cpu)) | ||
2665 | continue; | 2752 | continue; |
2666 | 2753 | ||
2667 | rem_load = (u64)rem_load_move * busiest_weight; | 2754 | rem_load = (u64)rem_load_move * busiest_weight; |