aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
authorPaul Turner <pjt@google.com>2011-07-21 12:43:36 -0400
committerIngo Molnar <mingo@elte.hu>2011-08-14 06:03:40 -0400
commit64660c864f46202b932b911a69deb09805bdbaf8 (patch)
treecf49455195a184e4962c6cac2c39c4f690d74ddc /kernel/sched_fair.c
parent8277434ef1202ce30315f8edb3fc760aa6e74493 (diff)
sched: Prevent interactions with throttled entities
From the perspective of load-balance and shares distribution, throttled entities should be invisible. However, both of these operations work on 'active' lists and are not inherently aware of what group hierarchies may be present. In some cases this may be side-stepped (e.g. we could sideload via tg_load_down in load balance) while in others (e.g. update_shares()) it is more difficult to compute without incurring some O(n^2) costs. Instead, track hierarchicaal throttled state at time of transition. This allows us to easily identify whether an entity belongs to a throttled hierarchy and avoid incorrect interactions with it. Also, when an entity leaves a throttled hierarchy we need to advance its time averaging for shares averaging so that the elapsed throttled time is not considered as part of the cfs_rq's operation. We also use this information to prevent buddy interactions in the wakeup and yield_to() paths. Signed-off-by: Paul Turner <pjt@google.com> Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20110721184757.777916795@google.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c99
1 files changed, 93 insertions, 6 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 76411950ff3b..5a2089492a98 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -706,6 +706,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
706} 706}
707 707
708#ifdef CONFIG_FAIR_GROUP_SCHED 708#ifdef CONFIG_FAIR_GROUP_SCHED
709/* we need this in update_cfs_load and load-balance functions below */
710static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
709# ifdef CONFIG_SMP 711# ifdef CONFIG_SMP
710static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, 712static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq,
711 int global_update) 713 int global_update)
@@ -728,7 +730,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
728 u64 now, delta; 730 u64 now, delta;
729 unsigned long load = cfs_rq->load.weight; 731 unsigned long load = cfs_rq->load.weight;
730 732
731 if (cfs_rq->tg == &root_task_group) 733 if (cfs_rq->tg == &root_task_group || throttled_hierarchy(cfs_rq))
732 return; 734 return;
733 735
734 now = rq_of(cfs_rq)->clock_task; 736 now = rq_of(cfs_rq)->clock_task;
@@ -837,7 +839,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq)
837 839
838 tg = cfs_rq->tg; 840 tg = cfs_rq->tg;
839 se = tg->se[cpu_of(rq_of(cfs_rq))]; 841 se = tg->se[cpu_of(rq_of(cfs_rq))];
840 if (!se) 842 if (!se || throttled_hierarchy(cfs_rq))
841 return; 843 return;
842#ifndef CONFIG_SMP 844#ifndef CONFIG_SMP
843 if (likely(se->load.weight == tg->shares)) 845 if (likely(se->load.weight == tg->shares))
@@ -1403,6 +1405,65 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
1403 return cfs_rq->throttled; 1405 return cfs_rq->throttled;
1404} 1406}
1405 1407
1408/* check whether cfs_rq, or any parent, is throttled */
1409static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
1410{
1411 return cfs_rq->throttle_count;
1412}
1413
1414/*
1415 * Ensure that neither of the group entities corresponding to src_cpu or
1416 * dest_cpu are members of a throttled hierarchy when performing group
1417 * load-balance operations.
1418 */
1419static inline int throttled_lb_pair(struct task_group *tg,
1420 int src_cpu, int dest_cpu)
1421{
1422 struct cfs_rq *src_cfs_rq, *dest_cfs_rq;
1423
1424 src_cfs_rq = tg->cfs_rq[src_cpu];
1425 dest_cfs_rq = tg->cfs_rq[dest_cpu];
1426
1427 return throttled_hierarchy(src_cfs_rq) ||
1428 throttled_hierarchy(dest_cfs_rq);
1429}
1430
1431/* updated child weight may affect parent so we have to do this bottom up */
1432static int tg_unthrottle_up(struct task_group *tg, void *data)
1433{
1434 struct rq *rq = data;
1435 struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
1436
1437 cfs_rq->throttle_count--;
1438#ifdef CONFIG_SMP
1439 if (!cfs_rq->throttle_count) {
1440 u64 delta = rq->clock_task - cfs_rq->load_stamp;
1441
1442 /* leaving throttled state, advance shares averaging windows */
1443 cfs_rq->load_stamp += delta;
1444 cfs_rq->load_last += delta;
1445
1446 /* update entity weight now that we are on_rq again */
1447 update_cfs_shares(cfs_rq);
1448 }
1449#endif
1450
1451 return 0;
1452}
1453
1454static int tg_throttle_down(struct task_group *tg, void *data)
1455{
1456 struct rq *rq = data;
1457 struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
1458
1459 /* group is entering throttled state, record last load */
1460 if (!cfs_rq->throttle_count)
1461 update_cfs_load(cfs_rq, 0);
1462 cfs_rq->throttle_count++;
1463
1464 return 0;
1465}
1466
1406static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq) 1467static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq)
1407{ 1468{
1408 struct rq *rq = rq_of(cfs_rq); 1469 struct rq *rq = rq_of(cfs_rq);
@@ -1413,7 +1474,9 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq)
1413 se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; 1474 se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
1414 1475
1415 /* account load preceding throttle */ 1476 /* account load preceding throttle */
1416 update_cfs_load(cfs_rq, 0); 1477 rcu_read_lock();
1478 walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
1479 rcu_read_unlock();
1417 1480
1418 task_delta = cfs_rq->h_nr_running; 1481 task_delta = cfs_rq->h_nr_running;
1419 for_each_sched_entity(se) { 1482 for_each_sched_entity(se) {
@@ -1454,6 +1517,10 @@ static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
1454 list_del_rcu(&cfs_rq->throttled_list); 1517 list_del_rcu(&cfs_rq->throttled_list);
1455 raw_spin_unlock(&cfs_b->lock); 1518 raw_spin_unlock(&cfs_b->lock);
1456 1519
1520 update_rq_clock(rq);
1521 /* update hierarchical throttle state */
1522 walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
1523
1457 if (!cfs_rq->load.weight) 1524 if (!cfs_rq->load.weight)
1458 return; 1525 return;
1459 1526
@@ -1598,6 +1665,17 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
1598{ 1665{
1599 return 0; 1666 return 0;
1600} 1667}
1668
1669static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
1670{
1671 return 0;
1672}
1673
1674static inline int throttled_lb_pair(struct task_group *tg,
1675 int src_cpu, int dest_cpu)
1676{
1677 return 0;
1678}
1601#endif 1679#endif
1602 1680
1603/************************************************** 1681/**************************************************
@@ -2493,6 +2571,9 @@ move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
2493 2571
2494 for_each_leaf_cfs_rq(busiest, cfs_rq) { 2572 for_each_leaf_cfs_rq(busiest, cfs_rq) {
2495 list_for_each_entry_safe(p, n, &cfs_rq->tasks, se.group_node) { 2573 list_for_each_entry_safe(p, n, &cfs_rq->tasks, se.group_node) {
2574 if (throttled_lb_pair(task_group(p),
2575 busiest->cpu, this_cpu))
2576 break;
2496 2577
2497 if (!can_migrate_task(p, busiest, this_cpu, 2578 if (!can_migrate_task(p, busiest, this_cpu,
2498 sd, idle, &pinned)) 2579 sd, idle, &pinned))
@@ -2608,8 +2689,13 @@ static void update_shares(int cpu)
2608 * Iterates the task_group tree in a bottom up fashion, see 2689 * Iterates the task_group tree in a bottom up fashion, see
2609 * list_add_leaf_cfs_rq() for details. 2690 * list_add_leaf_cfs_rq() for details.
2610 */ 2691 */
2611 for_each_leaf_cfs_rq(rq, cfs_rq) 2692 for_each_leaf_cfs_rq(rq, cfs_rq) {
2693 /* throttled entities do not contribute to load */
2694 if (throttled_hierarchy(cfs_rq))
2695 continue;
2696
2612 update_shares_cpu(cfs_rq->tg, cpu); 2697 update_shares_cpu(cfs_rq->tg, cpu);
2698 }
2613 rcu_read_unlock(); 2699 rcu_read_unlock();
2614} 2700}
2615 2701
@@ -2659,9 +2745,10 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
2659 u64 rem_load, moved_load; 2745 u64 rem_load, moved_load;
2660 2746
2661 /* 2747 /*
2662 * empty group 2748 * empty group or part of a throttled hierarchy
2663 */ 2749 */
2664 if (!busiest_cfs_rq->task_weight) 2750 if (!busiest_cfs_rq->task_weight ||
2751 throttled_lb_pair(busiest_cfs_rq->tg, cpu_of(busiest), this_cpu))
2665 continue; 2752 continue;
2666 2753
2667 rem_load = (u64)rem_load_move * busiest_weight; 2754 rem_load = (u64)rem_load_move * busiest_weight;