aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-05-23 15:53:48 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-23 15:53:48 -0400
commit15a3d11b0f2ebdfb3591e411e268aa81998d4723 (patch)
tree56e1d7715653871f015341c7edabd08045f5fc28 /kernel
parent1f3a8e093f470ef193b0ca6011d90180331c8b53 (diff)
parentc8b281161dfa4bb5d5be63fb036ce19347b88c63 (diff)
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: sched: Increase SCHED_LOAD_SCALE resolution sched: Introduce SCHED_POWER_SCALE to scale cpu_power calculations sched: Cleanup set_load_weight()
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c39
-rw-r--r--kernel/sched_fair.c52
2 files changed, 54 insertions, 37 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 0516af415085..2d12893b8b0f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -293,7 +293,7 @@ static DEFINE_SPINLOCK(task_group_lock);
293 * limitation from this.) 293 * limitation from this.)
294 */ 294 */
295#define MIN_SHARES 2 295#define MIN_SHARES 2
296#define MAX_SHARES (1UL << 18) 296#define MAX_SHARES (1UL << (18 + SCHED_LOAD_RESOLUTION))
297 297
298static int root_task_group_load = ROOT_TASK_GROUP_LOAD; 298static int root_task_group_load = ROOT_TASK_GROUP_LOAD;
299#endif 299#endif
@@ -1330,13 +1330,25 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
1330{ 1330{
1331 u64 tmp; 1331 u64 tmp;
1332 1332
1333 tmp = (u64)delta_exec * weight; 1333 /*
1334 * weight can be less than 2^SCHED_LOAD_RESOLUTION for task group sched
1335 * entities since MIN_SHARES = 2. Treat weight as 1 if less than
1336 * 2^SCHED_LOAD_RESOLUTION.
1337 */
1338 if (likely(weight > (1UL << SCHED_LOAD_RESOLUTION)))
1339 tmp = (u64)delta_exec * scale_load_down(weight);
1340 else
1341 tmp = (u64)delta_exec;
1334 1342
1335 if (!lw->inv_weight) { 1343 if (!lw->inv_weight) {
1336 if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST)) 1344 unsigned long w = scale_load_down(lw->weight);
1345
1346 if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST))
1337 lw->inv_weight = 1; 1347 lw->inv_weight = 1;
1348 else if (unlikely(!w))
1349 lw->inv_weight = WMULT_CONST;
1338 else 1350 else
1339 lw->inv_weight = WMULT_CONST / lw->weight; 1351 lw->inv_weight = WMULT_CONST / w;
1340 } 1352 }
1341 1353
1342 /* 1354 /*
@@ -1778,17 +1790,20 @@ static void dec_nr_running(struct rq *rq)
1778 1790
1779static void set_load_weight(struct task_struct *p) 1791static void set_load_weight(struct task_struct *p)
1780{ 1792{
1793 int prio = p->static_prio - MAX_RT_PRIO;
1794 struct load_weight *load = &p->se.load;
1795
1781 /* 1796 /*
1782 * SCHED_IDLE tasks get minimal weight: 1797 * SCHED_IDLE tasks get minimal weight:
1783 */ 1798 */
1784 if (p->policy == SCHED_IDLE) { 1799 if (p->policy == SCHED_IDLE) {
1785 p->se.load.weight = WEIGHT_IDLEPRIO; 1800 load->weight = scale_load(WEIGHT_IDLEPRIO);
1786 p->se.load.inv_weight = WMULT_IDLEPRIO; 1801 load->inv_weight = WMULT_IDLEPRIO;
1787 return; 1802 return;
1788 } 1803 }
1789 1804
1790 p->se.load.weight = prio_to_weight[p->static_prio - MAX_RT_PRIO]; 1805 load->weight = scale_load(prio_to_weight[prio]);
1791 p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO]; 1806 load->inv_weight = prio_to_wmult[prio];
1792} 1807}
1793 1808
1794static void enqueue_task(struct rq *rq, struct task_struct *p, int flags) 1809static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
@@ -6527,7 +6542,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6527 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); 6542 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
6528 6543
6529 printk(KERN_CONT " %s", str); 6544 printk(KERN_CONT " %s", str);
6530 if (group->cpu_power != SCHED_LOAD_SCALE) { 6545 if (group->cpu_power != SCHED_POWER_SCALE) {
6531 printk(KERN_CONT " (cpu_power = %d)", 6546 printk(KERN_CONT " (cpu_power = %d)",
6532 group->cpu_power); 6547 group->cpu_power);
6533 } 6548 }
@@ -7902,7 +7917,7 @@ void __init sched_init(void)
7902#ifdef CONFIG_SMP 7917#ifdef CONFIG_SMP
7903 rq->sd = NULL; 7918 rq->sd = NULL;
7904 rq->rd = NULL; 7919 rq->rd = NULL;
7905 rq->cpu_power = SCHED_LOAD_SCALE; 7920 rq->cpu_power = SCHED_POWER_SCALE;
7906 rq->post_schedule = 0; 7921 rq->post_schedule = 0;
7907 rq->active_balance = 0; 7922 rq->active_balance = 0;
7908 rq->next_balance = jiffies; 7923 rq->next_balance = jiffies;
@@ -8806,14 +8821,14 @@ cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
8806static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, 8821static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype,
8807 u64 shareval) 8822 u64 shareval)
8808{ 8823{
8809 return sched_group_set_shares(cgroup_tg(cgrp), shareval); 8824 return sched_group_set_shares(cgroup_tg(cgrp), scale_load(shareval));
8810} 8825}
8811 8826
8812static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) 8827static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft)
8813{ 8828{
8814 struct task_group *tg = cgroup_tg(cgrp); 8829 struct task_group *tg = cgroup_tg(cgrp);
8815 8830
8816 return (u64) tg->shares; 8831 return (u64) scale_load_down(tg->shares);
8817} 8832}
8818#endif /* CONFIG_FAIR_GROUP_SCHED */ 8833#endif /* CONFIG_FAIR_GROUP_SCHED */
8819 8834
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 37f22626225e..e32a9b70ee9c 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1584,7 +1584,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
1584 } 1584 }
1585 1585
1586 /* Adjust by relative CPU power of the group */ 1586 /* Adjust by relative CPU power of the group */
1587 avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; 1587 avg_load = (avg_load * SCHED_POWER_SCALE) / group->cpu_power;
1588 1588
1589 if (local_group) { 1589 if (local_group) {
1590 this_load = avg_load; 1590 this_load = avg_load;
@@ -1722,7 +1722,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
1722 nr_running += cpu_rq(i)->cfs.nr_running; 1722 nr_running += cpu_rq(i)->cfs.nr_running;
1723 } 1723 }
1724 1724
1725 capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE); 1725 capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE);
1726 1726
1727 if (tmp->flags & SD_POWERSAVINGS_BALANCE) 1727 if (tmp->flags & SD_POWERSAVINGS_BALANCE)
1728 nr_running /= 2; 1728 nr_running /= 2;
@@ -2570,7 +2570,7 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
2570 2570
2571unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu) 2571unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)
2572{ 2572{
2573 return SCHED_LOAD_SCALE; 2573 return SCHED_POWER_SCALE;
2574} 2574}
2575 2575
2576unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu) 2576unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
@@ -2607,10 +2607,10 @@ unsigned long scale_rt_power(int cpu)
2607 available = total - rq->rt_avg; 2607 available = total - rq->rt_avg;
2608 } 2608 }
2609 2609
2610 if (unlikely((s64)total < SCHED_LOAD_SCALE)) 2610 if (unlikely((s64)total < SCHED_POWER_SCALE))
2611 total = SCHED_LOAD_SCALE; 2611 total = SCHED_POWER_SCALE;
2612 2612
2613 total >>= SCHED_LOAD_SHIFT; 2613 total >>= SCHED_POWER_SHIFT;
2614 2614
2615 return div_u64(available, total); 2615 return div_u64(available, total);
2616} 2616}
@@ -2618,7 +2618,7 @@ unsigned long scale_rt_power(int cpu)
2618static void update_cpu_power(struct sched_domain *sd, int cpu) 2618static void update_cpu_power(struct sched_domain *sd, int cpu)
2619{ 2619{
2620 unsigned long weight = sd->span_weight; 2620 unsigned long weight = sd->span_weight;
2621 unsigned long power = SCHED_LOAD_SCALE; 2621 unsigned long power = SCHED_POWER_SCALE;
2622 struct sched_group *sdg = sd->groups; 2622 struct sched_group *sdg = sd->groups;
2623 2623
2624 if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { 2624 if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
@@ -2627,7 +2627,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
2627 else 2627 else
2628 power *= default_scale_smt_power(sd, cpu); 2628 power *= default_scale_smt_power(sd, cpu);
2629 2629
2630 power >>= SCHED_LOAD_SHIFT; 2630 power >>= SCHED_POWER_SHIFT;
2631 } 2631 }
2632 2632
2633 sdg->cpu_power_orig = power; 2633 sdg->cpu_power_orig = power;
@@ -2637,10 +2637,10 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
2637 else 2637 else
2638 power *= default_scale_freq_power(sd, cpu); 2638 power *= default_scale_freq_power(sd, cpu);
2639 2639
2640 power >>= SCHED_LOAD_SHIFT; 2640 power >>= SCHED_POWER_SHIFT;
2641 2641
2642 power *= scale_rt_power(cpu); 2642 power *= scale_rt_power(cpu);
2643 power >>= SCHED_LOAD_SHIFT; 2643 power >>= SCHED_POWER_SHIFT;
2644 2644
2645 if (!power) 2645 if (!power)
2646 power = 1; 2646 power = 1;
@@ -2682,7 +2682,7 @@ static inline int
2682fix_small_capacity(struct sched_domain *sd, struct sched_group *group) 2682fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
2683{ 2683{
2684 /* 2684 /*
2685 * Only siblings can have significantly less than SCHED_LOAD_SCALE 2685 * Only siblings can have significantly less than SCHED_POWER_SCALE
2686 */ 2686 */
2687 if (!(sd->flags & SD_SHARE_CPUPOWER)) 2687 if (!(sd->flags & SD_SHARE_CPUPOWER))
2688 return 0; 2688 return 0;
@@ -2770,7 +2770,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2770 } 2770 }
2771 2771
2772 /* Adjust by relative CPU power of the group */ 2772 /* Adjust by relative CPU power of the group */
2773 sgs->avg_load = (sgs->group_load * SCHED_LOAD_SCALE) / group->cpu_power; 2773 sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->cpu_power;
2774 2774
2775 /* 2775 /*
2776 * Consider the group unbalanced when the imbalance is larger 2776 * Consider the group unbalanced when the imbalance is larger
@@ -2787,7 +2787,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2787 if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1) 2787 if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1)
2788 sgs->group_imb = 1; 2788 sgs->group_imb = 1;
2789 2789
2790 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); 2790 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power,
2791 SCHED_POWER_SCALE);
2791 if (!sgs->group_capacity) 2792 if (!sgs->group_capacity)
2792 sgs->group_capacity = fix_small_capacity(sd, group); 2793 sgs->group_capacity = fix_small_capacity(sd, group);
2793 sgs->group_weight = group->group_weight; 2794 sgs->group_weight = group->group_weight;
@@ -2961,7 +2962,7 @@ static int check_asym_packing(struct sched_domain *sd,
2961 return 0; 2962 return 0;
2962 2963
2963 *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power, 2964 *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power,
2964 SCHED_LOAD_SCALE); 2965 SCHED_POWER_SCALE);
2965 return 1; 2966 return 1;
2966} 2967}
2967 2968
@@ -2990,7 +2991,7 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
2990 cpu_avg_load_per_task(this_cpu); 2991 cpu_avg_load_per_task(this_cpu);
2991 2992
2992 scaled_busy_load_per_task = sds->busiest_load_per_task 2993 scaled_busy_load_per_task = sds->busiest_load_per_task
2993 * SCHED_LOAD_SCALE; 2994 * SCHED_POWER_SCALE;
2994 scaled_busy_load_per_task /= sds->busiest->cpu_power; 2995 scaled_busy_load_per_task /= sds->busiest->cpu_power;
2995 2996
2996 if (sds->max_load - sds->this_load + scaled_busy_load_per_task >= 2997 if (sds->max_load - sds->this_load + scaled_busy_load_per_task >=
@@ -3009,10 +3010,10 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
3009 min(sds->busiest_load_per_task, sds->max_load); 3010 min(sds->busiest_load_per_task, sds->max_load);
3010 pwr_now += sds->this->cpu_power * 3011 pwr_now += sds->this->cpu_power *
3011 min(sds->this_load_per_task, sds->this_load); 3012 min(sds->this_load_per_task, sds->this_load);
3012 pwr_now /= SCHED_LOAD_SCALE; 3013 pwr_now /= SCHED_POWER_SCALE;
3013 3014
3014 /* Amount of load we'd subtract */ 3015 /* Amount of load we'd subtract */
3015 tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) / 3016 tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
3016 sds->busiest->cpu_power; 3017 sds->busiest->cpu_power;
3017 if (sds->max_load > tmp) 3018 if (sds->max_load > tmp)
3018 pwr_move += sds->busiest->cpu_power * 3019 pwr_move += sds->busiest->cpu_power *
@@ -3020,15 +3021,15 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
3020 3021
3021 /* Amount of load we'd add */ 3022 /* Amount of load we'd add */
3022 if (sds->max_load * sds->busiest->cpu_power < 3023 if (sds->max_load * sds->busiest->cpu_power <
3023 sds->busiest_load_per_task * SCHED_LOAD_SCALE) 3024 sds->busiest_load_per_task * SCHED_POWER_SCALE)
3024 tmp = (sds->max_load * sds->busiest->cpu_power) / 3025 tmp = (sds->max_load * sds->busiest->cpu_power) /
3025 sds->this->cpu_power; 3026 sds->this->cpu_power;
3026 else 3027 else
3027 tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) / 3028 tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
3028 sds->this->cpu_power; 3029 sds->this->cpu_power;
3029 pwr_move += sds->this->cpu_power * 3030 pwr_move += sds->this->cpu_power *
3030 min(sds->this_load_per_task, sds->this_load + tmp); 3031 min(sds->this_load_per_task, sds->this_load + tmp);
3031 pwr_move /= SCHED_LOAD_SCALE; 3032 pwr_move /= SCHED_POWER_SCALE;
3032 3033
3033 /* Move if we gain throughput */ 3034 /* Move if we gain throughput */
3034 if (pwr_move > pwr_now) 3035 if (pwr_move > pwr_now)
@@ -3070,7 +3071,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
3070 load_above_capacity = (sds->busiest_nr_running - 3071 load_above_capacity = (sds->busiest_nr_running -
3071 sds->busiest_group_capacity); 3072 sds->busiest_group_capacity);
3072 3073
3073 load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_LOAD_SCALE); 3074 load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE);
3074 3075
3075 load_above_capacity /= sds->busiest->cpu_power; 3076 load_above_capacity /= sds->busiest->cpu_power;
3076 } 3077 }
@@ -3090,7 +3091,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
3090 /* How much load to actually move to equalise the imbalance */ 3091 /* How much load to actually move to equalise the imbalance */
3091 *imbalance = min(max_pull * sds->busiest->cpu_power, 3092 *imbalance = min(max_pull * sds->busiest->cpu_power,
3092 (sds->avg_load - sds->this_load) * sds->this->cpu_power) 3093 (sds->avg_load - sds->this_load) * sds->this->cpu_power)
3093 / SCHED_LOAD_SCALE; 3094 / SCHED_POWER_SCALE;
3094 3095
3095 /* 3096 /*
3096 * if *imbalance is less than the average load per runnable task 3097 * if *imbalance is less than the average load per runnable task
@@ -3159,7 +3160,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3159 if (!sds.busiest || sds.busiest_nr_running == 0) 3160 if (!sds.busiest || sds.busiest_nr_running == 0)
3160 goto out_balanced; 3161 goto out_balanced;
3161 3162
3162 sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr; 3163 sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_pwr;
3163 3164
3164 /* 3165 /*
3165 * If the busiest group is imbalanced the below checks don't 3166 * If the busiest group is imbalanced the below checks don't
@@ -3238,7 +3239,8 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
3238 3239
3239 for_each_cpu(i, sched_group_cpus(group)) { 3240 for_each_cpu(i, sched_group_cpus(group)) {
3240 unsigned long power = power_of(i); 3241 unsigned long power = power_of(i);
3241 unsigned long capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE); 3242 unsigned long capacity = DIV_ROUND_CLOSEST(power,
3243 SCHED_POWER_SCALE);
3242 unsigned long wl; 3244 unsigned long wl;
3243 3245
3244 if (!capacity) 3246 if (!capacity)
@@ -3263,7 +3265,7 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
3263 * the load can be moved away from the cpu that is potentially 3265 * the load can be moved away from the cpu that is potentially
3264 * running at a lower capacity. 3266 * running at a lower capacity.
3265 */ 3267 */
3266 wl = (wl * SCHED_LOAD_SCALE) / power; 3268 wl = (wl * SCHED_POWER_SCALE) / power;
3267 3269
3268 if (wl > max_load) { 3270 if (wl > max_load) {
3269 max_load = wl; 3271 max_load = wl;