aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c118
1 files changed, 65 insertions, 53 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 433491c2dc8f..bc8ee9993814 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -135,14 +135,6 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
135 return grp->my_q; 135 return grp->my_q;
136} 136}
137 137
138/* Given a group's cfs_rq on one cpu, return its corresponding cfs_rq on
139 * another cpu ('this_cpu')
140 */
141static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
142{
143 return cfs_rq->tg->cfs_rq[this_cpu];
144}
145
146static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) 138static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
147{ 139{
148 if (!cfs_rq->on_list) { 140 if (!cfs_rq->on_list) {
@@ -271,11 +263,6 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
271 return NULL; 263 return NULL;
272} 264}
273 265
274static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
275{
276 return &cpu_rq(this_cpu)->cfs;
277}
278
279static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) 266static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
280{ 267{
281} 268}
@@ -334,11 +321,6 @@ static inline int entity_before(struct sched_entity *a,
334 return (s64)(a->vruntime - b->vruntime) < 0; 321 return (s64)(a->vruntime - b->vruntime) < 0;
335} 322}
336 323
337static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
338{
339 return se->vruntime - cfs_rq->min_vruntime;
340}
341
342static void update_min_vruntime(struct cfs_rq *cfs_rq) 324static void update_min_vruntime(struct cfs_rq *cfs_rq)
343{ 325{
344 u64 vruntime = cfs_rq->min_vruntime; 326 u64 vruntime = cfs_rq->min_vruntime;
@@ -372,7 +354,6 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
372 struct rb_node **link = &cfs_rq->tasks_timeline.rb_node; 354 struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
373 struct rb_node *parent = NULL; 355 struct rb_node *parent = NULL;
374 struct sched_entity *entry; 356 struct sched_entity *entry;
375 s64 key = entity_key(cfs_rq, se);
376 int leftmost = 1; 357 int leftmost = 1;
377 358
378 /* 359 /*
@@ -385,7 +366,7 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
385 * We dont care about collisions. Nodes with 366 * We dont care about collisions. Nodes with
386 * the same key stay together. 367 * the same key stay together.
387 */ 368 */
388 if (key < entity_key(cfs_rq, entry)) { 369 if (entity_before(se, entry)) {
389 link = &parent->rb_left; 370 link = &parent->rb_left;
390 } else { 371 } else {
391 link = &parent->rb_right; 372 link = &parent->rb_right;
@@ -1336,7 +1317,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
1336 } 1317 }
1337 1318
1338 for_each_sched_entity(se) { 1319 for_each_sched_entity(se) {
1339 struct cfs_rq *cfs_rq = cfs_rq_of(se); 1320 cfs_rq = cfs_rq_of(se);
1340 1321
1341 update_cfs_load(cfs_rq, 0); 1322 update_cfs_load(cfs_rq, 0);
1342 update_cfs_shares(cfs_rq); 1323 update_cfs_shares(cfs_rq);
@@ -1370,13 +1351,16 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
1370 */ 1351 */
1371 if (task_sleep && parent_entity(se)) 1352 if (task_sleep && parent_entity(se))
1372 set_next_buddy(parent_entity(se)); 1353 set_next_buddy(parent_entity(se));
1354
1355 /* avoid re-evaluating load for this entity */
1356 se = parent_entity(se);
1373 break; 1357 break;
1374 } 1358 }
1375 flags |= DEQUEUE_SLEEP; 1359 flags |= DEQUEUE_SLEEP;
1376 } 1360 }
1377 1361
1378 for_each_sched_entity(se) { 1362 for_each_sched_entity(se) {
1379 struct cfs_rq *cfs_rq = cfs_rq_of(se); 1363 cfs_rq = cfs_rq_of(se);
1380 1364
1381 update_cfs_load(cfs_rq, 0); 1365 update_cfs_load(cfs_rq, 0);
1382 update_cfs_shares(cfs_rq); 1366 update_cfs_shares(cfs_rq);
@@ -1481,7 +1465,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1481 * effect of the currently running task from the load 1465 * effect of the currently running task from the load
1482 * of the current CPU: 1466 * of the current CPU:
1483 */ 1467 */
1484 rcu_read_lock();
1485 if (sync) { 1468 if (sync) {
1486 tg = task_group(current); 1469 tg = task_group(current);
1487 weight = current->se.load.weight; 1470 weight = current->se.load.weight;
@@ -1517,7 +1500,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1517 balanced = this_eff_load <= prev_eff_load; 1500 balanced = this_eff_load <= prev_eff_load;
1518 } else 1501 } else
1519 balanced = true; 1502 balanced = true;
1520 rcu_read_unlock();
1521 1503
1522 /* 1504 /*
1523 * If the currently running task will sleep within 1505 * If the currently running task will sleep within
@@ -1585,7 +1567,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
1585 } 1567 }
1586 1568
1587 /* Adjust by relative CPU power of the group */ 1569 /* Adjust by relative CPU power of the group */
1588 avg_load = (avg_load * SCHED_POWER_SCALE) / group->cpu_power; 1570 avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgp->power;
1589 1571
1590 if (local_group) { 1572 if (local_group) {
1591 this_load = avg_load; 1573 this_load = avg_load;
@@ -1921,8 +1903,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1921 if (!sched_feat(WAKEUP_PREEMPT)) 1903 if (!sched_feat(WAKEUP_PREEMPT))
1922 return; 1904 return;
1923 1905
1924 update_curr(cfs_rq);
1925 find_matching_se(&se, &pse); 1906 find_matching_se(&se, &pse);
1907 update_curr(cfs_rq_of(se));
1926 BUG_ON(!pse); 1908 BUG_ON(!pse);
1927 if (wakeup_preempt_entity(se, pse) == 1) { 1909 if (wakeup_preempt_entity(se, pse) == 1) {
1928 /* 1910 /*
@@ -2231,11 +2213,43 @@ static void update_shares(int cpu)
2231 struct rq *rq = cpu_rq(cpu); 2213 struct rq *rq = cpu_rq(cpu);
2232 2214
2233 rcu_read_lock(); 2215 rcu_read_lock();
2216 /*
2217 * Iterates the task_group tree in a bottom up fashion, see
2218 * list_add_leaf_cfs_rq() for details.
2219 */
2234 for_each_leaf_cfs_rq(rq, cfs_rq) 2220 for_each_leaf_cfs_rq(rq, cfs_rq)
2235 update_shares_cpu(cfs_rq->tg, cpu); 2221 update_shares_cpu(cfs_rq->tg, cpu);
2236 rcu_read_unlock(); 2222 rcu_read_unlock();
2237} 2223}
2238 2224
2225/*
2226 * Compute the cpu's hierarchical load factor for each task group.
2227 * This needs to be done in a top-down fashion because the load of a child
2228 * group is a fraction of its parents load.
2229 */
2230static int tg_load_down(struct task_group *tg, void *data)
2231{
2232 unsigned long load;
2233 long cpu = (long)data;
2234
2235 if (!tg->parent) {
2236 load = cpu_rq(cpu)->load.weight;
2237 } else {
2238 load = tg->parent->cfs_rq[cpu]->h_load;
2239 load *= tg->se[cpu]->load.weight;
2240 load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
2241 }
2242
2243 tg->cfs_rq[cpu]->h_load = load;
2244
2245 return 0;
2246}
2247
2248static void update_h_load(long cpu)
2249{
2250 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
2251}
2252
2239static unsigned long 2253static unsigned long
2240load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, 2254load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
2241 unsigned long max_load_move, 2255 unsigned long max_load_move,
@@ -2243,14 +2257,12 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
2243 int *all_pinned) 2257 int *all_pinned)
2244{ 2258{
2245 long rem_load_move = max_load_move; 2259 long rem_load_move = max_load_move;
2246 int busiest_cpu = cpu_of(busiest); 2260 struct cfs_rq *busiest_cfs_rq;
2247 struct task_group *tg;
2248 2261
2249 rcu_read_lock(); 2262 rcu_read_lock();
2250 update_h_load(busiest_cpu); 2263 update_h_load(cpu_of(busiest));
2251 2264
2252 list_for_each_entry_rcu(tg, &task_groups, list) { 2265 for_each_leaf_cfs_rq(busiest, busiest_cfs_rq) {
2253 struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
2254 unsigned long busiest_h_load = busiest_cfs_rq->h_load; 2266 unsigned long busiest_h_load = busiest_cfs_rq->h_load;
2255 unsigned long busiest_weight = busiest_cfs_rq->load.weight; 2267 unsigned long busiest_weight = busiest_cfs_rq->load.weight;
2256 u64 rem_load, moved_load; 2268 u64 rem_load, moved_load;
@@ -2631,7 +2643,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
2631 power >>= SCHED_POWER_SHIFT; 2643 power >>= SCHED_POWER_SHIFT;
2632 } 2644 }
2633 2645
2634 sdg->cpu_power_orig = power; 2646 sdg->sgp->power_orig = power;
2635 2647
2636 if (sched_feat(ARCH_POWER)) 2648 if (sched_feat(ARCH_POWER))
2637 power *= arch_scale_freq_power(sd, cpu); 2649 power *= arch_scale_freq_power(sd, cpu);
@@ -2647,7 +2659,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
2647 power = 1; 2659 power = 1;
2648 2660
2649 cpu_rq(cpu)->cpu_power = power; 2661 cpu_rq(cpu)->cpu_power = power;
2650 sdg->cpu_power = power; 2662 sdg->sgp->power = power;
2651} 2663}
2652 2664
2653static void update_group_power(struct sched_domain *sd, int cpu) 2665static void update_group_power(struct sched_domain *sd, int cpu)
@@ -2665,11 +2677,11 @@ static void update_group_power(struct sched_domain *sd, int cpu)
2665 2677
2666 group = child->groups; 2678 group = child->groups;
2667 do { 2679 do {
2668 power += group->cpu_power; 2680 power += group->sgp->power;
2669 group = group->next; 2681 group = group->next;
2670 } while (group != child->groups); 2682 } while (group != child->groups);
2671 2683
2672 sdg->cpu_power = power; 2684 sdg->sgp->power = power;
2673} 2685}
2674 2686
2675/* 2687/*
@@ -2691,7 +2703,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
2691 /* 2703 /*
2692 * If ~90% of the cpu_power is still there, we're good. 2704 * If ~90% of the cpu_power is still there, we're good.
2693 */ 2705 */
2694 if (group->cpu_power * 32 > group->cpu_power_orig * 29) 2706 if (group->sgp->power * 32 > group->sgp->power_orig * 29)
2695 return 1; 2707 return 1;
2696 2708
2697 return 0; 2709 return 0;
@@ -2771,7 +2783,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2771 } 2783 }
2772 2784
2773 /* Adjust by relative CPU power of the group */ 2785 /* Adjust by relative CPU power of the group */
2774 sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->cpu_power; 2786 sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->sgp->power;
2775 2787
2776 /* 2788 /*
2777 * Consider the group unbalanced when the imbalance is larger 2789 * Consider the group unbalanced when the imbalance is larger
@@ -2788,7 +2800,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2788 if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1) 2800 if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1)
2789 sgs->group_imb = 1; 2801 sgs->group_imb = 1;
2790 2802
2791 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, 2803 sgs->group_capacity = DIV_ROUND_CLOSEST(group->sgp->power,
2792 SCHED_POWER_SCALE); 2804 SCHED_POWER_SCALE);
2793 if (!sgs->group_capacity) 2805 if (!sgs->group_capacity)
2794 sgs->group_capacity = fix_small_capacity(sd, group); 2806 sgs->group_capacity = fix_small_capacity(sd, group);
@@ -2877,7 +2889,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
2877 return; 2889 return;
2878 2890
2879 sds->total_load += sgs.group_load; 2891 sds->total_load += sgs.group_load;
2880 sds->total_pwr += sg->cpu_power; 2892 sds->total_pwr += sg->sgp->power;
2881 2893
2882 /* 2894 /*
2883 * In case the child domain prefers tasks go to siblings 2895 * In case the child domain prefers tasks go to siblings
@@ -2962,7 +2974,7 @@ static int check_asym_packing(struct sched_domain *sd,
2962 if (this_cpu > busiest_cpu) 2974 if (this_cpu > busiest_cpu)
2963 return 0; 2975 return 0;
2964 2976
2965 *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power, 2977 *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->sgp->power,
2966 SCHED_POWER_SCALE); 2978 SCHED_POWER_SCALE);
2967 return 1; 2979 return 1;
2968} 2980}
@@ -2993,7 +3005,7 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
2993 3005
2994 scaled_busy_load_per_task = sds->busiest_load_per_task 3006 scaled_busy_load_per_task = sds->busiest_load_per_task
2995 * SCHED_POWER_SCALE; 3007 * SCHED_POWER_SCALE;
2996 scaled_busy_load_per_task /= sds->busiest->cpu_power; 3008 scaled_busy_load_per_task /= sds->busiest->sgp->power;
2997 3009
2998 if (sds->max_load - sds->this_load + scaled_busy_load_per_task >= 3010 if (sds->max_load - sds->this_load + scaled_busy_load_per_task >=
2999 (scaled_busy_load_per_task * imbn)) { 3011 (scaled_busy_load_per_task * imbn)) {
@@ -3007,28 +3019,28 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
3007 * moving them. 3019 * moving them.
3008 */ 3020 */
3009 3021
3010 pwr_now += sds->busiest->cpu_power * 3022 pwr_now += sds->busiest->sgp->power *
3011 min(sds->busiest_load_per_task, sds->max_load); 3023 min(sds->busiest_load_per_task, sds->max_load);
3012 pwr_now += sds->this->cpu_power * 3024 pwr_now += sds->this->sgp->power *
3013 min(sds->this_load_per_task, sds->this_load); 3025 min(sds->this_load_per_task, sds->this_load);
3014 pwr_now /= SCHED_POWER_SCALE; 3026 pwr_now /= SCHED_POWER_SCALE;
3015 3027
3016 /* Amount of load we'd subtract */ 3028 /* Amount of load we'd subtract */
3017 tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) / 3029 tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
3018 sds->busiest->cpu_power; 3030 sds->busiest->sgp->power;
3019 if (sds->max_load > tmp) 3031 if (sds->max_load > tmp)
3020 pwr_move += sds->busiest->cpu_power * 3032 pwr_move += sds->busiest->sgp->power *
3021 min(sds->busiest_load_per_task, sds->max_load - tmp); 3033 min(sds->busiest_load_per_task, sds->max_load - tmp);
3022 3034
3023 /* Amount of load we'd add */ 3035 /* Amount of load we'd add */
3024 if (sds->max_load * sds->busiest->cpu_power < 3036 if (sds->max_load * sds->busiest->sgp->power <
3025 sds->busiest_load_per_task * SCHED_POWER_SCALE) 3037 sds->busiest_load_per_task * SCHED_POWER_SCALE)
3026 tmp = (sds->max_load * sds->busiest->cpu_power) / 3038 tmp = (sds->max_load * sds->busiest->sgp->power) /
3027 sds->this->cpu_power; 3039 sds->this->sgp->power;
3028 else 3040 else
3029 tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) / 3041 tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
3030 sds->this->cpu_power; 3042 sds->this->sgp->power;
3031 pwr_move += sds->this->cpu_power * 3043 pwr_move += sds->this->sgp->power *
3032 min(sds->this_load_per_task, sds->this_load + tmp); 3044 min(sds->this_load_per_task, sds->this_load + tmp);
3033 pwr_move /= SCHED_POWER_SCALE; 3045 pwr_move /= SCHED_POWER_SCALE;
3034 3046
@@ -3074,7 +3086,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
3074 3086
3075 load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE); 3087 load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE);
3076 3088
3077 load_above_capacity /= sds->busiest->cpu_power; 3089 load_above_capacity /= sds->busiest->sgp->power;
3078 } 3090 }
3079 3091
3080 /* 3092 /*
@@ -3090,8 +3102,8 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
3090 max_pull = min(sds->max_load - sds->avg_load, load_above_capacity); 3102 max_pull = min(sds->max_load - sds->avg_load, load_above_capacity);
3091 3103
3092 /* How much load to actually move to equalise the imbalance */ 3104 /* How much load to actually move to equalise the imbalance */
3093 *imbalance = min(max_pull * sds->busiest->cpu_power, 3105 *imbalance = min(max_pull * sds->busiest->sgp->power,
3094 (sds->avg_load - sds->this_load) * sds->this->cpu_power) 3106 (sds->avg_load - sds->this_load) * sds->this->sgp->power)
3095 / SCHED_POWER_SCALE; 3107 / SCHED_POWER_SCALE;
3096 3108
3097 /* 3109 /*