diff options
author | Suresh Siddha <suresh.b.siddha@intel.com> | 2009-02-04 14:59:44 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-02-04 16:31:19 -0500 |
commit | 483b4ee60edbefdfbff0dd538fb81f368d9e7c0d (patch) | |
tree | 2fbf152e6bb6377a16a1fb0d1aa503d19e0c0f63 | |
parent | 35626129abcd6a7547e84c817ef5b6eff7a8758b (diff) |
sched: fix nohz load balancer on cpu offline
Christian Borntraeger reports:
> After a logical cpu offline, even on a complete idle system, there
> is one cpu with full ticks. It turns out that nohz.cpu_mask has the
> the offlined cpu still set.
>
> In select_nohz_load_balancer() we check if the system is completely
> idle to turn of load balancing. We compare cpu_online_map with
> nohz.cpu_mask. Since cpu_online_map is updated on cpu unplug,
> but nohz.cpu_mask is not, the check fails and the scheduler believes
> that we need an "idle load balancer" even on a fully idle system.
> Since the ilb cpu does not deactivate the timer tick this breaks NOHZ.
Fix the select_nohz_load_balancer() to not set the nohz.cpu_mask
while a cpu is going offline.
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | kernel/sched.c | 17 |
1 files changed, 11 insertions, 6 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 242d0d47a70d..e1fc67d0674c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -3890,19 +3890,24 @@ int select_nohz_load_balancer(int stop_tick) | |||
3890 | int cpu = smp_processor_id(); | 3890 | int cpu = smp_processor_id(); |
3891 | 3891 | ||
3892 | if (stop_tick) { | 3892 | if (stop_tick) { |
3893 | cpumask_set_cpu(cpu, nohz.cpu_mask); | ||
3894 | cpu_rq(cpu)->in_nohz_recently = 1; | 3893 | cpu_rq(cpu)->in_nohz_recently = 1; |
3895 | 3894 | ||
3896 | /* | 3895 | if (!cpu_active(cpu)) { |
3897 | * If we are going offline and still the leader, give up! | 3896 | if (atomic_read(&nohz.load_balancer) != cpu) |
3898 | */ | 3897 | return 0; |
3899 | if (!cpu_active(cpu) && | 3898 | |
3900 | atomic_read(&nohz.load_balancer) == cpu) { | 3899 | /* |
3900 | * If we are going offline and still the leader, | ||
3901 | * give up! | ||
3902 | */ | ||
3901 | if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) | 3903 | if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) |
3902 | BUG(); | 3904 | BUG(); |
3905 | |||
3903 | return 0; | 3906 | return 0; |
3904 | } | 3907 | } |
3905 | 3908 | ||
3909 | cpumask_set_cpu(cpu, nohz.cpu_mask); | ||
3910 | |||
3906 | /* time for ilb owner also to sleep */ | 3911 | /* time for ilb owner also to sleep */ |
3907 | if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { | 3912 | if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { |
3908 | if (atomic_read(&nohz.load_balancer) == cpu) | 3913 | if (atomic_read(&nohz.load_balancer) == cpu) |