diff options
author | Vincent Guittot <vincent.guittot@linaro.org> | 2011-12-12 14:21:08 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2012-01-27 07:28:49 -0500 |
commit | 4ec4412e1e91f44a3dcb97b6c9172a13fc78bac9 (patch) | |
tree | a98f635907975c2f14c01c24418e102337250b8d /kernel/sched/fair.c | |
parent | 39be350127ec60a078edffe5b4915dafba4ba514 (diff) |
sched: Ensure cpu_power periodic update
With a lot of small tasks, the softirq sched is nearly never called
when no_hz is enabled. In this case load_balance() is mainly called
with the newly_idle mode which doesn't update the cpu_power.
Add a next_update field which ensure a maximum update period when
there is short activity.
Having stale cpu_power information can skew the load-balancing
decisions, this is cured by the guaranteed update.
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1323717668-2143-1-git-send-email-vincent.guittot@linaro.org
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r-- | kernel/sched/fair.c | 24 |
1 files changed, 16 insertions, 8 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7c6414fc669d..8e77a6bd597b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -215,6 +215,8 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, | |||
215 | 215 | ||
216 | const struct sched_class fair_sched_class; | 216 | const struct sched_class fair_sched_class; |
217 | 217 | ||
218 | static unsigned long __read_mostly max_load_balance_interval = HZ/10; | ||
219 | |||
218 | /************************************************************** | 220 | /************************************************************** |
219 | * CFS operations on generic schedulable entities: | 221 | * CFS operations on generic schedulable entities: |
220 | */ | 222 | */ |
@@ -3776,6 +3778,11 @@ void update_group_power(struct sched_domain *sd, int cpu) | |||
3776 | struct sched_domain *child = sd->child; | 3778 | struct sched_domain *child = sd->child; |
3777 | struct sched_group *group, *sdg = sd->groups; | 3779 | struct sched_group *group, *sdg = sd->groups; |
3778 | unsigned long power; | 3780 | unsigned long power; |
3781 | unsigned long interval; | ||
3782 | |||
3783 | interval = msecs_to_jiffies(sd->balance_interval); | ||
3784 | interval = clamp(interval, 1UL, max_load_balance_interval); | ||
3785 | sdg->sgp->next_update = jiffies + interval; | ||
3779 | 3786 | ||
3780 | if (!child) { | 3787 | if (!child) { |
3781 | update_cpu_power(sd, cpu); | 3788 | update_cpu_power(sd, cpu); |
@@ -3883,12 +3890,15 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
3883 | * domains. In the newly idle case, we will allow all the cpu's | 3890 | * domains. In the newly idle case, we will allow all the cpu's |
3884 | * to do the newly idle load balance. | 3891 | * to do the newly idle load balance. |
3885 | */ | 3892 | */ |
3886 | if (idle != CPU_NEWLY_IDLE && local_group) { | 3893 | if (local_group) { |
3887 | if (balance_cpu != this_cpu) { | 3894 | if (idle != CPU_NEWLY_IDLE) { |
3888 | *balance = 0; | 3895 | if (balance_cpu != this_cpu) { |
3889 | return; | 3896 | *balance = 0; |
3890 | } | 3897 | return; |
3891 | update_group_power(sd, this_cpu); | 3898 | } |
3899 | update_group_power(sd, this_cpu); | ||
3900 | } else if (time_after_eq(jiffies, group->sgp->next_update)) | ||
3901 | update_group_power(sd, this_cpu); | ||
3892 | } | 3902 | } |
3893 | 3903 | ||
3894 | /* Adjust by relative CPU power of the group */ | 3904 | /* Adjust by relative CPU power of the group */ |
@@ -4945,8 +4955,6 @@ static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, | |||
4945 | 4955 | ||
4946 | static DEFINE_SPINLOCK(balancing); | 4956 | static DEFINE_SPINLOCK(balancing); |
4947 | 4957 | ||
4948 | static unsigned long __read_mostly max_load_balance_interval = HZ/10; | ||
4949 | |||
4950 | /* | 4958 | /* |
4951 | * Scale the max load_balance interval with the number of CPUs in the system. | 4959 | * Scale the max load_balance interval with the number of CPUs in the system. |
4952 | * This trades load-balance latency on larger machines for less cross talk. | 4960 | * This trades load-balance latency on larger machines for less cross talk. |