aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-09-01 04:34:39 -0400
committerIngo Molnar <mingo@elte.hu>2009-09-04 04:09:56 -0400
commit18a3885fc1ffa92c2212ff0afdf033403d5b0fa0 (patch)
treee83c640c68d957b793b56e90274aa2f3d0e97655
parentd899a789c28ded9c72b57ddb61868d6b8fc23e80 (diff)
sched: Remove reciprocal for cpu_power
Its a source of fail, also, now that cpu_power is dynamical, its a waste of time. before: <idle>-0 [000] 132.877936: find_busiest_group: avg_load: 0 group_load: 8241 power: 1 after: bash-1689 [001] 137.862151: find_busiest_group: avg_load: 10636288 group_load: 10387 power: 1 [ v2: build fix from From: Andreas Herrmann ] Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Tested-by: Andreas Herrmann <andreas.herrmann3@amd.com> Acked-by: Andreas Herrmann <andreas.herrmann3@amd.com> Acked-by: Gautham R Shenoy <ego@in.ibm.com> Cc: Balbir Singh <balbir@in.ibm.com> LKML-Reference: <20090901083826.425896304@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/sched.h10
-rw-r--r--kernel/sched.c101
2 files changed, 36 insertions, 75 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c67ddf309c8..3b7f43e3b73 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -860,15 +860,9 @@ struct sched_group {
860 860
861 /* 861 /*
862 * CPU power of this group, SCHED_LOAD_SCALE being max power for a 862 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
863 * single CPU. This is read only (except for setup, hotplug CPU). 863 * single CPU.
864 * Note : Never change cpu_power without recompute its reciprocal
865 */ 864 */
866 unsigned int __cpu_power; 865 unsigned int cpu_power;
867 /*
868 * reciprocal value of cpu_power to avoid expensive divides
869 * (see include/linux/reciprocal_div.h)
870 */
871 u32 reciprocal_cpu_power;
872 866
873 /* 867 /*
874 * The CPUs this group covers. 868 * The CPUs this group covers.
diff --git a/kernel/sched.c b/kernel/sched.c
index e1ebf9b00f5..b5378534685 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -64,7 +64,6 @@
64#include <linux/tsacct_kern.h> 64#include <linux/tsacct_kern.h>
65#include <linux/kprobes.h> 65#include <linux/kprobes.h>
66#include <linux/delayacct.h> 66#include <linux/delayacct.h>
67#include <linux/reciprocal_div.h>
68#include <linux/unistd.h> 67#include <linux/unistd.h>
69#include <linux/pagemap.h> 68#include <linux/pagemap.h>
70#include <linux/hrtimer.h> 69#include <linux/hrtimer.h>
@@ -120,30 +119,8 @@
120 */ 119 */
121#define RUNTIME_INF ((u64)~0ULL) 120#define RUNTIME_INF ((u64)~0ULL)
122 121
123#ifdef CONFIG_SMP
124
125static void double_rq_lock(struct rq *rq1, struct rq *rq2); 122static void double_rq_lock(struct rq *rq1, struct rq *rq2);
126 123
127/*
128 * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
129 * Since cpu_power is a 'constant', we can use a reciprocal divide.
130 */
131static inline u32 sg_div_cpu_power(const struct sched_group *sg, u32 load)
132{
133 return reciprocal_divide(load, sg->reciprocal_cpu_power);
134}
135
136/*
137 * Each time a sched group cpu_power is changed,
138 * we must compute its reciprocal value
139 */
140static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val)
141{
142 sg->__cpu_power += val;
143 sg->reciprocal_cpu_power = reciprocal_value(sg->__cpu_power);
144}
145#endif
146
147static inline int rt_policy(int policy) 124static inline int rt_policy(int policy)
148{ 125{
149 if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) 126 if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR))
@@ -2335,8 +2312,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
2335 } 2312 }
2336 2313
2337 /* Adjust by relative CPU power of the group */ 2314 /* Adjust by relative CPU power of the group */
2338 avg_load = sg_div_cpu_power(group, 2315 avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
2339 avg_load * SCHED_LOAD_SCALE);
2340 2316
2341 if (local_group) { 2317 if (local_group) {
2342 this_load = avg_load; 2318 this_load = avg_load;
@@ -3768,7 +3744,6 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
3768 unsigned long weight = cpumask_weight(sched_domain_span(sd)); 3744 unsigned long weight = cpumask_weight(sched_domain_span(sd));
3769 unsigned long power = SCHED_LOAD_SCALE; 3745 unsigned long power = SCHED_LOAD_SCALE;
3770 struct sched_group *sdg = sd->groups; 3746 struct sched_group *sdg = sd->groups;
3771 unsigned long old = sdg->__cpu_power;
3772 3747
3773 /* here we could scale based on cpufreq */ 3748 /* here we could scale based on cpufreq */
3774 3749
@@ -3783,33 +3758,26 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
3783 if (!power) 3758 if (!power)
3784 power = 1; 3759 power = 1;
3785 3760
3786 if (power != old) { 3761 sdg->cpu_power = power;
3787 sdg->__cpu_power = power;
3788 sdg->reciprocal_cpu_power = reciprocal_value(power);
3789 }
3790} 3762}
3791 3763
3792static void update_group_power(struct sched_domain *sd, int cpu) 3764static void update_group_power(struct sched_domain *sd, int cpu)
3793{ 3765{
3794 struct sched_domain *child = sd->child; 3766 struct sched_domain *child = sd->child;
3795 struct sched_group *group, *sdg = sd->groups; 3767 struct sched_group *group, *sdg = sd->groups;
3796 unsigned long power = sdg->__cpu_power;
3797 3768
3798 if (!child) { 3769 if (!child) {
3799 update_cpu_power(sd, cpu); 3770 update_cpu_power(sd, cpu);
3800 return; 3771 return;
3801 } 3772 }
3802 3773
3803 sdg->__cpu_power = 0; 3774 sdg->cpu_power = 0;
3804 3775
3805 group = child->groups; 3776 group = child->groups;
3806 do { 3777 do {
3807 sdg->__cpu_power += group->__cpu_power; 3778 sdg->cpu_power += group->cpu_power;
3808 group = group->next; 3779 group = group->next;
3809 } while (group != child->groups); 3780 } while (group != child->groups);
3810
3811 if (power != sdg->__cpu_power)
3812 sdg->reciprocal_cpu_power = reciprocal_value(sdg->__cpu_power);
3813} 3781}
3814 3782
3815/** 3783/**
@@ -3889,8 +3857,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
3889 } 3857 }
3890 3858
3891 /* Adjust by relative CPU power of the group */ 3859 /* Adjust by relative CPU power of the group */
3892 sgs->avg_load = sg_div_cpu_power(group, 3860 sgs->avg_load = (sgs->group_load * SCHED_LOAD_SCALE) / group->cpu_power;
3893 sgs->group_load * SCHED_LOAD_SCALE);
3894 3861
3895 3862
3896 /* 3863 /*
@@ -3902,14 +3869,14 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
3902 * normalized nr_running number somewhere that negates 3869 * normalized nr_running number somewhere that negates
3903 * the hierarchy? 3870 * the hierarchy?
3904 */ 3871 */
3905 avg_load_per_task = sg_div_cpu_power(group, 3872 avg_load_per_task = (sum_avg_load_per_task * SCHED_LOAD_SCALE) /
3906 sum_avg_load_per_task * SCHED_LOAD_SCALE); 3873 group->cpu_power;
3907 3874
3908 if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task) 3875 if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
3909 sgs->group_imb = 1; 3876 sgs->group_imb = 1;
3910 3877
3911 sgs->group_capacity = 3878 sgs->group_capacity =
3912 DIV_ROUND_CLOSEST(group->__cpu_power, SCHED_LOAD_SCALE); 3879 DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
3913} 3880}
3914 3881
3915/** 3882/**
@@ -3951,7 +3918,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
3951 return; 3918 return;
3952 3919
3953 sds->total_load += sgs.group_load; 3920 sds->total_load += sgs.group_load;
3954 sds->total_pwr += group->__cpu_power; 3921 sds->total_pwr += group->cpu_power;
3955 3922
3956 /* 3923 /*
3957 * In case the child domain prefers tasks go to siblings 3924 * In case the child domain prefers tasks go to siblings
@@ -4016,28 +3983,28 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
4016 * moving them. 3983 * moving them.
4017 */ 3984 */
4018 3985
4019 pwr_now += sds->busiest->__cpu_power * 3986 pwr_now += sds->busiest->cpu_power *
4020 min(sds->busiest_load_per_task, sds->max_load); 3987 min(sds->busiest_load_per_task, sds->max_load);
4021 pwr_now += sds->this->__cpu_power * 3988 pwr_now += sds->this->cpu_power *
4022 min(sds->this_load_per_task, sds->this_load); 3989 min(sds->this_load_per_task, sds->this_load);
4023 pwr_now /= SCHED_LOAD_SCALE; 3990 pwr_now /= SCHED_LOAD_SCALE;
4024 3991
4025 /* Amount of load we'd subtract */ 3992 /* Amount of load we'd subtract */
4026 tmp = sg_div_cpu_power(sds->busiest, 3993 tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) /
4027 sds->busiest_load_per_task * SCHED_LOAD_SCALE); 3994 sds->busiest->cpu_power;
4028 if (sds->max_load > tmp) 3995 if (sds->max_load > tmp)
4029 pwr_move += sds->busiest->__cpu_power * 3996 pwr_move += sds->busiest->cpu_power *
4030 min(sds->busiest_load_per_task, sds->max_load - tmp); 3997 min(sds->busiest_load_per_task, sds->max_load - tmp);
4031 3998
4032 /* Amount of load we'd add */ 3999 /* Amount of load we'd add */
4033 if (sds->max_load * sds->busiest->__cpu_power < 4000 if (sds->max_load * sds->busiest->cpu_power <
4034 sds->busiest_load_per_task * SCHED_LOAD_SCALE) 4001 sds->busiest_load_per_task * SCHED_LOAD_SCALE)
4035 tmp = sg_div_cpu_power(sds->this, 4002 tmp = (sds->max_load * sds->busiest->cpu_power) /
4036 sds->max_load * sds->busiest->__cpu_power); 4003 sds->this->cpu_power;
4037 else 4004 else
4038 tmp = sg_div_cpu_power(sds->this, 4005 tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) /
4039 sds->busiest_load_per_task * SCHED_LOAD_SCALE); 4006 sds->this->cpu_power;
4040 pwr_move += sds->this->__cpu_power * 4007 pwr_move += sds->this->cpu_power *
4041 min(sds->this_load_per_task, sds->this_load + tmp); 4008 min(sds->this_load_per_task, sds->this_load + tmp);
4042 pwr_move /= SCHED_LOAD_SCALE; 4009 pwr_move /= SCHED_LOAD_SCALE;
4043 4010
@@ -4072,8 +4039,8 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
4072 sds->max_load - sds->busiest_load_per_task); 4039 sds->max_load - sds->busiest_load_per_task);
4073 4040
4074 /* How much load to actually move to equalise the imbalance */ 4041 /* How much load to actually move to equalise the imbalance */
4075 *imbalance = min(max_pull * sds->busiest->__cpu_power, 4042 *imbalance = min(max_pull * sds->busiest->cpu_power,
4076 (sds->avg_load - sds->this_load) * sds->this->__cpu_power) 4043 (sds->avg_load - sds->this_load) * sds->this->cpu_power)
4077 / SCHED_LOAD_SCALE; 4044 / SCHED_LOAD_SCALE;
4078 4045
4079 /* 4046 /*
@@ -4208,7 +4175,7 @@ static unsigned long power_of(int cpu)
4208 if (!group) 4175 if (!group)
4209 return SCHED_LOAD_SCALE; 4176 return SCHED_LOAD_SCALE;
4210 4177
4211 return group->__cpu_power; 4178 return group->cpu_power;
4212} 4179}
4213 4180
4214/* 4181/*
@@ -7922,7 +7889,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
7922 break; 7889 break;
7923 } 7890 }
7924 7891
7925 if (!group->__cpu_power) { 7892 if (!group->cpu_power) {
7926 printk(KERN_CONT "\n"); 7893 printk(KERN_CONT "\n");
7927 printk(KERN_ERR "ERROR: domain->cpu_power not " 7894 printk(KERN_ERR "ERROR: domain->cpu_power not "
7928 "set\n"); 7895 "set\n");
@@ -7946,9 +7913,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
7946 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); 7913 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
7947 7914
7948 printk(KERN_CONT " %s", str); 7915 printk(KERN_CONT " %s", str);
7949 if (group->__cpu_power != SCHED_LOAD_SCALE) { 7916 if (group->cpu_power != SCHED_LOAD_SCALE) {
7950 printk(KERN_CONT " (__cpu_power = %d)", 7917 printk(KERN_CONT " (cpu_power = %d)",
7951 group->__cpu_power); 7918 group->cpu_power);
7952 } 7919 }
7953 7920
7954 group = group->next; 7921 group = group->next;
@@ -8233,7 +8200,7 @@ init_sched_build_groups(const struct cpumask *span,
8233 continue; 8200 continue;
8234 8201
8235 cpumask_clear(sched_group_cpus(sg)); 8202 cpumask_clear(sched_group_cpus(sg));
8236 sg->__cpu_power = 0; 8203 sg->cpu_power = 0;
8237 8204
8238 for_each_cpu(j, span) { 8205 for_each_cpu(j, span) {
8239 if (group_fn(j, cpu_map, NULL, tmpmask) != group) 8206 if (group_fn(j, cpu_map, NULL, tmpmask) != group)
@@ -8491,7 +8458,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
8491 continue; 8458 continue;
8492 } 8459 }
8493 8460
8494 sg_inc_cpu_power(sg, sd->groups->__cpu_power); 8461 sg->cpu_power += sd->groups->cpu_power;
8495 } 8462 }
8496 sg = sg->next; 8463 sg = sg->next;
8497 } while (sg != group_head); 8464 } while (sg != group_head);
@@ -8528,7 +8495,7 @@ static int build_numa_sched_groups(struct s_data *d,
8528 sd->groups = sg; 8495 sd->groups = sg;
8529 } 8496 }
8530 8497
8531 sg->__cpu_power = 0; 8498 sg->cpu_power = 0;
8532 cpumask_copy(sched_group_cpus(sg), d->nodemask); 8499 cpumask_copy(sched_group_cpus(sg), d->nodemask);
8533 sg->next = sg; 8500 sg->next = sg;
8534 cpumask_or(d->covered, d->covered, d->nodemask); 8501 cpumask_or(d->covered, d->covered, d->nodemask);
@@ -8551,7 +8518,7 @@ static int build_numa_sched_groups(struct s_data *d,
8551 "Can not alloc domain group for node %d\n", j); 8518 "Can not alloc domain group for node %d\n", j);
8552 return -ENOMEM; 8519 return -ENOMEM;
8553 } 8520 }
8554 sg->__cpu_power = 0; 8521 sg->cpu_power = 0;
8555 cpumask_copy(sched_group_cpus(sg), d->tmpmask); 8522 cpumask_copy(sched_group_cpus(sg), d->tmpmask);
8556 sg->next = prev->next; 8523 sg->next = prev->next;
8557 cpumask_or(d->covered, d->covered, d->tmpmask); 8524 cpumask_or(d->covered, d->covered, d->tmpmask);
@@ -8629,7 +8596,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
8629 8596
8630 child = sd->child; 8597 child = sd->child;
8631 8598
8632 sd->groups->__cpu_power = 0; 8599 sd->groups->cpu_power = 0;
8633 8600
8634 if (!child) { 8601 if (!child) {
8635 power = SCHED_LOAD_SCALE; 8602 power = SCHED_LOAD_SCALE;
@@ -8645,7 +8612,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
8645 power /= weight; 8612 power /= weight;
8646 power >>= SCHED_LOAD_SHIFT; 8613 power >>= SCHED_LOAD_SHIFT;
8647 } 8614 }
8648 sg_inc_cpu_power(sd->groups, power); 8615 sd->groups->cpu_power += power;
8649 return; 8616 return;
8650 } 8617 }
8651 8618
@@ -8654,7 +8621,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
8654 */ 8621 */
8655 group = child->groups; 8622 group = child->groups;
8656 do { 8623 do {
8657 sg_inc_cpu_power(sd->groups, group->__cpu_power); 8624 sd->groups->cpu_power += group->cpu_power;
8658 group = group->next; 8625 group = group->next;
8659 } while (group != child->groups); 8626 } while (group != child->groups);
8660} 8627}