diff options
author | Vincent Guittot <vincent.guittot@linaro.org> | 2013-04-23 10:59:02 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-04-26 06:13:44 -0400 |
commit | 25f55d9d01ad7a7ad248fd5af1d22675ffd202c5 (patch) | |
tree | dc6ba0d1f4dc8119c7dd4d0844fa31a8ed60bf6f /kernel/sched | |
parent | e02e60c109ca70935bad1131976bdbf5160cf576 (diff) |
sched: Fix init NOHZ_IDLE flag
On my SMP platform which is made of 5 cores in 2 clusters, I
have the nr_busy_cpu field of sched_group_power struct that is
not null when the platform is fully idle - which makes the
scheduler unhappy.
The root cause is:
During the boot sequence, some CPUs reach the idle loop and set
their NOHZ_IDLE flag while waiting for others CPUs to boot. But
the nr_busy_cpus field is initialized later with the assumption
that all CPUs are in the busy state whereas some CPUs have
already set their NOHZ_IDLE flag.
More generally, the NOHZ_IDLE flag must be initialized when new
sched_domains are created in order to ensure that NOHZ_IDLE and
nr_busy_cpus are aligned.
This condition can be ensured by adding a synchronize_rcu()
between the destruction of old sched_domains and the creation of
new ones so the NOHZ_IDLE flag will not be updated with old
sched_domain once it has been initialized. But this solution
introduces a additionnal latency in the rebuild sequence that is
called during cpu hotplug.
As suggested by Frederic Weisbecker, another solution is to have
the same rcu lifecycle for both NOHZ_IDLE and sched_domain
struct. A new nohz_idle field is added to sched_domain so both
status and sched_domain will share the same RCU lifecycle and
will be always synchronized. In addition, there is no more need
to protect nohz_idle against concurrent access as it is only
modified by 2 exclusive functions called by local cpu.
This solution has been prefered to the creation of a new struct
with an extra pointer indirection for sched_domain.
The synchronization is done at the cost of :
- An additional indirection and a rcu_dereference for accessing nohz_idle.
- We use only the nohz_idle field of the top sched_domain.
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: linaro-kernel@lists.linaro.org
Cc: peterz@infradead.org
Cc: fweisbec@gmail.com
Cc: pjt@google.com
Cc: rostedt@goodmis.org
Cc: efault@gmx.de
Link: http://lkml.kernel.org/r/1366729142-14662-1-git-send-email-vincent.guittot@linaro.org
[ Fixed !NO_HZ build bug. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/fair.c | 26 | ||||
-rw-r--r-- | kernel/sched/sched.h | 1 |
2 files changed, 16 insertions, 11 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index acaf567a03d2..8bf7081b1ec5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -5420,13 +5420,16 @@ static inline void set_cpu_sd_state_busy(void) | |||
5420 | struct sched_domain *sd; | 5420 | struct sched_domain *sd; |
5421 | int cpu = smp_processor_id(); | 5421 | int cpu = smp_processor_id(); |
5422 | 5422 | ||
5423 | if (!test_bit(NOHZ_IDLE, nohz_flags(cpu))) | ||
5424 | return; | ||
5425 | clear_bit(NOHZ_IDLE, nohz_flags(cpu)); | ||
5426 | |||
5427 | rcu_read_lock(); | 5423 | rcu_read_lock(); |
5428 | for_each_domain(cpu, sd) | 5424 | sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); |
5425 | |||
5426 | if (!sd || !sd->nohz_idle) | ||
5427 | goto unlock; | ||
5428 | sd->nohz_idle = 0; | ||
5429 | |||
5430 | for (; sd; sd = sd->parent) | ||
5429 | atomic_inc(&sd->groups->sgp->nr_busy_cpus); | 5431 | atomic_inc(&sd->groups->sgp->nr_busy_cpus); |
5432 | unlock: | ||
5430 | rcu_read_unlock(); | 5433 | rcu_read_unlock(); |
5431 | } | 5434 | } |
5432 | 5435 | ||
@@ -5435,13 +5438,16 @@ void set_cpu_sd_state_idle(void) | |||
5435 | struct sched_domain *sd; | 5438 | struct sched_domain *sd; |
5436 | int cpu = smp_processor_id(); | 5439 | int cpu = smp_processor_id(); |
5437 | 5440 | ||
5438 | if (test_bit(NOHZ_IDLE, nohz_flags(cpu))) | ||
5439 | return; | ||
5440 | set_bit(NOHZ_IDLE, nohz_flags(cpu)); | ||
5441 | |||
5442 | rcu_read_lock(); | 5441 | rcu_read_lock(); |
5443 | for_each_domain(cpu, sd) | 5442 | sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); |
5443 | |||
5444 | if (!sd || sd->nohz_idle) | ||
5445 | goto unlock; | ||
5446 | sd->nohz_idle = 1; | ||
5447 | |||
5448 | for (; sd; sd = sd->parent) | ||
5444 | atomic_dec(&sd->groups->sgp->nr_busy_cpus); | 5449 | atomic_dec(&sd->groups->sgp->nr_busy_cpus); |
5450 | unlock: | ||
5445 | rcu_read_unlock(); | 5451 | rcu_read_unlock(); |
5446 | } | 5452 | } |
5447 | 5453 | ||
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 605426a63588..4c225c4c7111 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1303,7 +1303,6 @@ extern void account_cfs_bandwidth_used(int enabled, int was_enabled); | |||
1303 | enum rq_nohz_flag_bits { | 1303 | enum rq_nohz_flag_bits { |
1304 | NOHZ_TICK_STOPPED, | 1304 | NOHZ_TICK_STOPPED, |
1305 | NOHZ_BALANCE_KICK, | 1305 | NOHZ_BALANCE_KICK, |
1306 | NOHZ_IDLE, | ||
1307 | }; | 1306 | }; |
1308 | 1307 | ||
1309 | #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) | 1308 | #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) |