diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-09-01 04:34:39 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-09-04 04:09:56 -0400 |
commit | 18a3885fc1ffa92c2212ff0afdf033403d5b0fa0 (patch) | |
tree | e83c640c68d957b793b56e90274aa2f3d0e97655 | |
parent | d899a789c28ded9c72b57ddb61868d6b8fc23e80 (diff) |
sched: Remove reciprocal for cpu_power
Its a source of fail, also, now that cpu_power is dynamical,
its a waste of time.
before:
<idle>-0 [000] 132.877936: find_busiest_group: avg_load: 0 group_load: 8241 power: 1
after:
bash-1689 [001] 137.862151: find_busiest_group: avg_load: 10636288 group_load: 10387 power: 1
[ v2: build fix from From: Andreas Herrmann ]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Acked-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Acked-by: Gautham R Shenoy <ego@in.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
LKML-Reference: <20090901083826.425896304@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/sched.h | 10 | ||||
-rw-r--r-- | kernel/sched.c | 101 |
2 files changed, 36 insertions, 75 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index c67ddf309c8..3b7f43e3b73 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -860,15 +860,9 @@ struct sched_group { | |||
860 | 860 | ||
861 | /* | 861 | /* |
862 | * CPU power of this group, SCHED_LOAD_SCALE being max power for a | 862 | * CPU power of this group, SCHED_LOAD_SCALE being max power for a |
863 | * single CPU. This is read only (except for setup, hotplug CPU). | 863 | * single CPU. |
864 | * Note : Never change cpu_power without recompute its reciprocal | ||
865 | */ | 864 | */ |
866 | unsigned int __cpu_power; | 865 | unsigned int cpu_power; |
867 | /* | ||
868 | * reciprocal value of cpu_power to avoid expensive divides | ||
869 | * (see include/linux/reciprocal_div.h) | ||
870 | */ | ||
871 | u32 reciprocal_cpu_power; | ||
872 | 866 | ||
873 | /* | 867 | /* |
874 | * The CPUs this group covers. | 868 | * The CPUs this group covers. |
diff --git a/kernel/sched.c b/kernel/sched.c index e1ebf9b00f5..b5378534685 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -64,7 +64,6 @@ | |||
64 | #include <linux/tsacct_kern.h> | 64 | #include <linux/tsacct_kern.h> |
65 | #include <linux/kprobes.h> | 65 | #include <linux/kprobes.h> |
66 | #include <linux/delayacct.h> | 66 | #include <linux/delayacct.h> |
67 | #include <linux/reciprocal_div.h> | ||
68 | #include <linux/unistd.h> | 67 | #include <linux/unistd.h> |
69 | #include <linux/pagemap.h> | 68 | #include <linux/pagemap.h> |
70 | #include <linux/hrtimer.h> | 69 | #include <linux/hrtimer.h> |
@@ -120,30 +119,8 @@ | |||
120 | */ | 119 | */ |
121 | #define RUNTIME_INF ((u64)~0ULL) | 120 | #define RUNTIME_INF ((u64)~0ULL) |
122 | 121 | ||
123 | #ifdef CONFIG_SMP | ||
124 | |||
125 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | 122 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); |
126 | 123 | ||
127 | /* | ||
128 | * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) | ||
129 | * Since cpu_power is a 'constant', we can use a reciprocal divide. | ||
130 | */ | ||
131 | static inline u32 sg_div_cpu_power(const struct sched_group *sg, u32 load) | ||
132 | { | ||
133 | return reciprocal_divide(load, sg->reciprocal_cpu_power); | ||
134 | } | ||
135 | |||
136 | /* | ||
137 | * Each time a sched group cpu_power is changed, | ||
138 | * we must compute its reciprocal value | ||
139 | */ | ||
140 | static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val) | ||
141 | { | ||
142 | sg->__cpu_power += val; | ||
143 | sg->reciprocal_cpu_power = reciprocal_value(sg->__cpu_power); | ||
144 | } | ||
145 | #endif | ||
146 | |||
147 | static inline int rt_policy(int policy) | 124 | static inline int rt_policy(int policy) |
148 | { | 125 | { |
149 | if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) | 126 | if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) |
@@ -2335,8 +2312,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | |||
2335 | } | 2312 | } |
2336 | 2313 | ||
2337 | /* Adjust by relative CPU power of the group */ | 2314 | /* Adjust by relative CPU power of the group */ |
2338 | avg_load = sg_div_cpu_power(group, | 2315 | avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; |
2339 | avg_load * SCHED_LOAD_SCALE); | ||
2340 | 2316 | ||
2341 | if (local_group) { | 2317 | if (local_group) { |
2342 | this_load = avg_load; | 2318 | this_load = avg_load; |
@@ -3768,7 +3744,6 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) | |||
3768 | unsigned long weight = cpumask_weight(sched_domain_span(sd)); | 3744 | unsigned long weight = cpumask_weight(sched_domain_span(sd)); |
3769 | unsigned long power = SCHED_LOAD_SCALE; | 3745 | unsigned long power = SCHED_LOAD_SCALE; |
3770 | struct sched_group *sdg = sd->groups; | 3746 | struct sched_group *sdg = sd->groups; |
3771 | unsigned long old = sdg->__cpu_power; | ||
3772 | 3747 | ||
3773 | /* here we could scale based on cpufreq */ | 3748 | /* here we could scale based on cpufreq */ |
3774 | 3749 | ||
@@ -3783,33 +3758,26 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) | |||
3783 | if (!power) | 3758 | if (!power) |
3784 | power = 1; | 3759 | power = 1; |
3785 | 3760 | ||
3786 | if (power != old) { | 3761 | sdg->cpu_power = power; |
3787 | sdg->__cpu_power = power; | ||
3788 | sdg->reciprocal_cpu_power = reciprocal_value(power); | ||
3789 | } | ||
3790 | } | 3762 | } |
3791 | 3763 | ||
3792 | static void update_group_power(struct sched_domain *sd, int cpu) | 3764 | static void update_group_power(struct sched_domain *sd, int cpu) |
3793 | { | 3765 | { |
3794 | struct sched_domain *child = sd->child; | 3766 | struct sched_domain *child = sd->child; |
3795 | struct sched_group *group, *sdg = sd->groups; | 3767 | struct sched_group *group, *sdg = sd->groups; |
3796 | unsigned long power = sdg->__cpu_power; | ||
3797 | 3768 | ||
3798 | if (!child) { | 3769 | if (!child) { |
3799 | update_cpu_power(sd, cpu); | 3770 | update_cpu_power(sd, cpu); |
3800 | return; | 3771 | return; |
3801 | } | 3772 | } |
3802 | 3773 | ||
3803 | sdg->__cpu_power = 0; | 3774 | sdg->cpu_power = 0; |
3804 | 3775 | ||
3805 | group = child->groups; | 3776 | group = child->groups; |
3806 | do { | 3777 | do { |
3807 | sdg->__cpu_power += group->__cpu_power; | 3778 | sdg->cpu_power += group->cpu_power; |
3808 | group = group->next; | 3779 | group = group->next; |
3809 | } while (group != child->groups); | 3780 | } while (group != child->groups); |
3810 | |||
3811 | if (power != sdg->__cpu_power) | ||
3812 | sdg->reciprocal_cpu_power = reciprocal_value(sdg->__cpu_power); | ||
3813 | } | 3781 | } |
3814 | 3782 | ||
3815 | /** | 3783 | /** |
@@ -3889,8 +3857,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
3889 | } | 3857 | } |
3890 | 3858 | ||
3891 | /* Adjust by relative CPU power of the group */ | 3859 | /* Adjust by relative CPU power of the group */ |
3892 | sgs->avg_load = sg_div_cpu_power(group, | 3860 | sgs->avg_load = (sgs->group_load * SCHED_LOAD_SCALE) / group->cpu_power; |
3893 | sgs->group_load * SCHED_LOAD_SCALE); | ||
3894 | 3861 | ||
3895 | 3862 | ||
3896 | /* | 3863 | /* |
@@ -3902,14 +3869,14 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
3902 | * normalized nr_running number somewhere that negates | 3869 | * normalized nr_running number somewhere that negates |
3903 | * the hierarchy? | 3870 | * the hierarchy? |
3904 | */ | 3871 | */ |
3905 | avg_load_per_task = sg_div_cpu_power(group, | 3872 | avg_load_per_task = (sum_avg_load_per_task * SCHED_LOAD_SCALE) / |
3906 | sum_avg_load_per_task * SCHED_LOAD_SCALE); | 3873 | group->cpu_power; |
3907 | 3874 | ||
3908 | if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task) | 3875 | if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task) |
3909 | sgs->group_imb = 1; | 3876 | sgs->group_imb = 1; |
3910 | 3877 | ||
3911 | sgs->group_capacity = | 3878 | sgs->group_capacity = |
3912 | DIV_ROUND_CLOSEST(group->__cpu_power, SCHED_LOAD_SCALE); | 3879 | DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); |
3913 | } | 3880 | } |
3914 | 3881 | ||
3915 | /** | 3882 | /** |
@@ -3951,7 +3918,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, | |||
3951 | return; | 3918 | return; |
3952 | 3919 | ||
3953 | sds->total_load += sgs.group_load; | 3920 | sds->total_load += sgs.group_load; |
3954 | sds->total_pwr += group->__cpu_power; | 3921 | sds->total_pwr += group->cpu_power; |
3955 | 3922 | ||
3956 | /* | 3923 | /* |
3957 | * In case the child domain prefers tasks go to siblings | 3924 | * In case the child domain prefers tasks go to siblings |
@@ -4016,28 +3983,28 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds, | |||
4016 | * moving them. | 3983 | * moving them. |
4017 | */ | 3984 | */ |
4018 | 3985 | ||
4019 | pwr_now += sds->busiest->__cpu_power * | 3986 | pwr_now += sds->busiest->cpu_power * |
4020 | min(sds->busiest_load_per_task, sds->max_load); | 3987 | min(sds->busiest_load_per_task, sds->max_load); |
4021 | pwr_now += sds->this->__cpu_power * | 3988 | pwr_now += sds->this->cpu_power * |
4022 | min(sds->this_load_per_task, sds->this_load); | 3989 | min(sds->this_load_per_task, sds->this_load); |
4023 | pwr_now /= SCHED_LOAD_SCALE; | 3990 | pwr_now /= SCHED_LOAD_SCALE; |
4024 | 3991 | ||
4025 | /* Amount of load we'd subtract */ | 3992 | /* Amount of load we'd subtract */ |
4026 | tmp = sg_div_cpu_power(sds->busiest, | 3993 | tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) / |
4027 | sds->busiest_load_per_task * SCHED_LOAD_SCALE); | 3994 | sds->busiest->cpu_power; |
4028 | if (sds->max_load > tmp) | 3995 | if (sds->max_load > tmp) |
4029 | pwr_move += sds->busiest->__cpu_power * | 3996 | pwr_move += sds->busiest->cpu_power * |
4030 | min(sds->busiest_load_per_task, sds->max_load - tmp); | 3997 | min(sds->busiest_load_per_task, sds->max_load - tmp); |
4031 | 3998 | ||
4032 | /* Amount of load we'd add */ | 3999 | /* Amount of load we'd add */ |
4033 | if (sds->max_load * sds->busiest->__cpu_power < | 4000 | if (sds->max_load * sds->busiest->cpu_power < |
4034 | sds->busiest_load_per_task * SCHED_LOAD_SCALE) | 4001 | sds->busiest_load_per_task * SCHED_LOAD_SCALE) |
4035 | tmp = sg_div_cpu_power(sds->this, | 4002 | tmp = (sds->max_load * sds->busiest->cpu_power) / |
4036 | sds->max_load * sds->busiest->__cpu_power); | 4003 | sds->this->cpu_power; |
4037 | else | 4004 | else |
4038 | tmp = sg_div_cpu_power(sds->this, | 4005 | tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) / |
4039 | sds->busiest_load_per_task * SCHED_LOAD_SCALE); | 4006 | sds->this->cpu_power; |
4040 | pwr_move += sds->this->__cpu_power * | 4007 | pwr_move += sds->this->cpu_power * |
4041 | min(sds->this_load_per_task, sds->this_load + tmp); | 4008 | min(sds->this_load_per_task, sds->this_load + tmp); |
4042 | pwr_move /= SCHED_LOAD_SCALE; | 4009 | pwr_move /= SCHED_LOAD_SCALE; |
4043 | 4010 | ||
@@ -4072,8 +4039,8 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, | |||
4072 | sds->max_load - sds->busiest_load_per_task); | 4039 | sds->max_load - sds->busiest_load_per_task); |
4073 | 4040 | ||
4074 | /* How much load to actually move to equalise the imbalance */ | 4041 | /* How much load to actually move to equalise the imbalance */ |
4075 | *imbalance = min(max_pull * sds->busiest->__cpu_power, | 4042 | *imbalance = min(max_pull * sds->busiest->cpu_power, |
4076 | (sds->avg_load - sds->this_load) * sds->this->__cpu_power) | 4043 | (sds->avg_load - sds->this_load) * sds->this->cpu_power) |
4077 | / SCHED_LOAD_SCALE; | 4044 | / SCHED_LOAD_SCALE; |
4078 | 4045 | ||
4079 | /* | 4046 | /* |
@@ -4208,7 +4175,7 @@ static unsigned long power_of(int cpu) | |||
4208 | if (!group) | 4175 | if (!group) |
4209 | return SCHED_LOAD_SCALE; | 4176 | return SCHED_LOAD_SCALE; |
4210 | 4177 | ||
4211 | return group->__cpu_power; | 4178 | return group->cpu_power; |
4212 | } | 4179 | } |
4213 | 4180 | ||
4214 | /* | 4181 | /* |
@@ -7922,7 +7889,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
7922 | break; | 7889 | break; |
7923 | } | 7890 | } |
7924 | 7891 | ||
7925 | if (!group->__cpu_power) { | 7892 | if (!group->cpu_power) { |
7926 | printk(KERN_CONT "\n"); | 7893 | printk(KERN_CONT "\n"); |
7927 | printk(KERN_ERR "ERROR: domain->cpu_power not " | 7894 | printk(KERN_ERR "ERROR: domain->cpu_power not " |
7928 | "set\n"); | 7895 | "set\n"); |
@@ -7946,9 +7913,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
7946 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); | 7913 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); |
7947 | 7914 | ||
7948 | printk(KERN_CONT " %s", str); | 7915 | printk(KERN_CONT " %s", str); |
7949 | if (group->__cpu_power != SCHED_LOAD_SCALE) { | 7916 | if (group->cpu_power != SCHED_LOAD_SCALE) { |
7950 | printk(KERN_CONT " (__cpu_power = %d)", | 7917 | printk(KERN_CONT " (cpu_power = %d)", |
7951 | group->__cpu_power); | 7918 | group->cpu_power); |
7952 | } | 7919 | } |
7953 | 7920 | ||
7954 | group = group->next; | 7921 | group = group->next; |
@@ -8233,7 +8200,7 @@ init_sched_build_groups(const struct cpumask *span, | |||
8233 | continue; | 8200 | continue; |
8234 | 8201 | ||
8235 | cpumask_clear(sched_group_cpus(sg)); | 8202 | cpumask_clear(sched_group_cpus(sg)); |
8236 | sg->__cpu_power = 0; | 8203 | sg->cpu_power = 0; |
8237 | 8204 | ||
8238 | for_each_cpu(j, span) { | 8205 | for_each_cpu(j, span) { |
8239 | if (group_fn(j, cpu_map, NULL, tmpmask) != group) | 8206 | if (group_fn(j, cpu_map, NULL, tmpmask) != group) |
@@ -8491,7 +8458,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) | |||
8491 | continue; | 8458 | continue; |
8492 | } | 8459 | } |
8493 | 8460 | ||
8494 | sg_inc_cpu_power(sg, sd->groups->__cpu_power); | 8461 | sg->cpu_power += sd->groups->cpu_power; |
8495 | } | 8462 | } |
8496 | sg = sg->next; | 8463 | sg = sg->next; |
8497 | } while (sg != group_head); | 8464 | } while (sg != group_head); |
@@ -8528,7 +8495,7 @@ static int build_numa_sched_groups(struct s_data *d, | |||
8528 | sd->groups = sg; | 8495 | sd->groups = sg; |
8529 | } | 8496 | } |
8530 | 8497 | ||
8531 | sg->__cpu_power = 0; | 8498 | sg->cpu_power = 0; |
8532 | cpumask_copy(sched_group_cpus(sg), d->nodemask); | 8499 | cpumask_copy(sched_group_cpus(sg), d->nodemask); |
8533 | sg->next = sg; | 8500 | sg->next = sg; |
8534 | cpumask_or(d->covered, d->covered, d->nodemask); | 8501 | cpumask_or(d->covered, d->covered, d->nodemask); |
@@ -8551,7 +8518,7 @@ static int build_numa_sched_groups(struct s_data *d, | |||
8551 | "Can not alloc domain group for node %d\n", j); | 8518 | "Can not alloc domain group for node %d\n", j); |
8552 | return -ENOMEM; | 8519 | return -ENOMEM; |
8553 | } | 8520 | } |
8554 | sg->__cpu_power = 0; | 8521 | sg->cpu_power = 0; |
8555 | cpumask_copy(sched_group_cpus(sg), d->tmpmask); | 8522 | cpumask_copy(sched_group_cpus(sg), d->tmpmask); |
8556 | sg->next = prev->next; | 8523 | sg->next = prev->next; |
8557 | cpumask_or(d->covered, d->covered, d->tmpmask); | 8524 | cpumask_or(d->covered, d->covered, d->tmpmask); |
@@ -8629,7 +8596,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
8629 | 8596 | ||
8630 | child = sd->child; | 8597 | child = sd->child; |
8631 | 8598 | ||
8632 | sd->groups->__cpu_power = 0; | 8599 | sd->groups->cpu_power = 0; |
8633 | 8600 | ||
8634 | if (!child) { | 8601 | if (!child) { |
8635 | power = SCHED_LOAD_SCALE; | 8602 | power = SCHED_LOAD_SCALE; |
@@ -8645,7 +8612,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
8645 | power /= weight; | 8612 | power /= weight; |
8646 | power >>= SCHED_LOAD_SHIFT; | 8613 | power >>= SCHED_LOAD_SHIFT; |
8647 | } | 8614 | } |
8648 | sg_inc_cpu_power(sd->groups, power); | 8615 | sd->groups->cpu_power += power; |
8649 | return; | 8616 | return; |
8650 | } | 8617 | } |
8651 | 8618 | ||
@@ -8654,7 +8621,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
8654 | */ | 8621 | */ |
8655 | group = child->groups; | 8622 | group = child->groups; |
8656 | do { | 8623 | do { |
8657 | sg_inc_cpu_power(sd->groups, group->__cpu_power); | 8624 | sd->groups->cpu_power += group->cpu_power; |
8658 | group = group->next; | 8625 | group = group->next; |
8659 | } while (group != child->groups); | 8626 | } while (group != child->groups); |
8660 | } | 8627 | } |