diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-11-25 07:31:39 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-12-06 15:10:56 -0500 |
commit | 6ad4c18884e864cf4c77f9074d3d1816063f99cd (patch) | |
tree | f09643f6148b576fa2d23bf7d4b37d082d94e267 /kernel/sched.c | |
parent | e1b8090bdf125f8b2e192149547fead7f302a89c (diff) |
sched: Fix balance vs hotplug race
Since (e761b77: cpu hotplug, sched: Introduce cpu_active_map and redo
sched domain managment) we have cpu_active_mask which is suppose to rule
scheduler migration and load-balancing, except it never (fully) did.
The particular problem being solved here is a crash in try_to_wake_up()
where select_task_rq() ends up selecting an offline cpu because
select_task_rq_fair() trusts the sched_domain tree to reflect the
current state of affairs, similarly select_task_rq_rt() trusts the
root_domain.
However, the sched_domains are updated from CPU_DEAD, which is after the
cpu is taken offline and after stop_machine is done. Therefore it can
race perfectly well with code assuming the domains are right.
Cure this by building the domains from cpu_active_mask on
CPU_DOWN_PREPARE.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 32 |
1 files changed, 17 insertions, 15 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index aa31244caa9f..281da29d0801 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -4134,7 +4134,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
4134 | unsigned long flags; | 4134 | unsigned long flags; |
4135 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4135 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
4136 | 4136 | ||
4137 | cpumask_copy(cpus, cpu_online_mask); | 4137 | cpumask_copy(cpus, cpu_active_mask); |
4138 | 4138 | ||
4139 | /* | 4139 | /* |
4140 | * When power savings policy is enabled for the parent domain, idle | 4140 | * When power savings policy is enabled for the parent domain, idle |
@@ -4297,7 +4297,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
4297 | int all_pinned = 0; | 4297 | int all_pinned = 0; |
4298 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4298 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
4299 | 4299 | ||
4300 | cpumask_copy(cpus, cpu_online_mask); | 4300 | cpumask_copy(cpus, cpu_active_mask); |
4301 | 4301 | ||
4302 | /* | 4302 | /* |
4303 | * When power savings policy is enabled for the parent domain, idle | 4303 | * When power savings policy is enabled for the parent domain, idle |
@@ -4694,7 +4694,7 @@ int select_nohz_load_balancer(int stop_tick) | |||
4694 | cpumask_set_cpu(cpu, nohz.cpu_mask); | 4694 | cpumask_set_cpu(cpu, nohz.cpu_mask); |
4695 | 4695 | ||
4696 | /* time for ilb owner also to sleep */ | 4696 | /* time for ilb owner also to sleep */ |
4697 | if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { | 4697 | if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) { |
4698 | if (atomic_read(&nohz.load_balancer) == cpu) | 4698 | if (atomic_read(&nohz.load_balancer) == cpu) |
4699 | atomic_set(&nohz.load_balancer, -1); | 4699 | atomic_set(&nohz.load_balancer, -1); |
4700 | return 0; | 4700 | return 0; |
@@ -7093,7 +7093,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7093 | int ret = 0; | 7093 | int ret = 0; |
7094 | 7094 | ||
7095 | rq = task_rq_lock(p, &flags); | 7095 | rq = task_rq_lock(p, &flags); |
7096 | if (!cpumask_intersects(new_mask, cpu_online_mask)) { | 7096 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { |
7097 | ret = -EINVAL; | 7097 | ret = -EINVAL; |
7098 | goto out; | 7098 | goto out; |
7099 | } | 7099 | } |
@@ -7115,7 +7115,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7115 | if (cpumask_test_cpu(task_cpu(p), new_mask)) | 7115 | if (cpumask_test_cpu(task_cpu(p), new_mask)) |
7116 | goto out; | 7116 | goto out; |
7117 | 7117 | ||
7118 | if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) { | 7118 | if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) { |
7119 | /* Need help from migration thread: drop lock and wait. */ | 7119 | /* Need help from migration thread: drop lock and wait. */ |
7120 | struct task_struct *mt = rq->migration_thread; | 7120 | struct task_struct *mt = rq->migration_thread; |
7121 | 7121 | ||
@@ -7269,19 +7269,19 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | |||
7269 | 7269 | ||
7270 | again: | 7270 | again: |
7271 | /* Look for allowed, online CPU in same node. */ | 7271 | /* Look for allowed, online CPU in same node. */ |
7272 | for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask) | 7272 | for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) |
7273 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | 7273 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) |
7274 | goto move; | 7274 | goto move; |
7275 | 7275 | ||
7276 | /* Any allowed, online CPU? */ | 7276 | /* Any allowed, online CPU? */ |
7277 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask); | 7277 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); |
7278 | if (dest_cpu < nr_cpu_ids) | 7278 | if (dest_cpu < nr_cpu_ids) |
7279 | goto move; | 7279 | goto move; |
7280 | 7280 | ||
7281 | /* No more Mr. Nice Guy. */ | 7281 | /* No more Mr. Nice Guy. */ |
7282 | if (dest_cpu >= nr_cpu_ids) { | 7282 | if (dest_cpu >= nr_cpu_ids) { |
7283 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); | 7283 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); |
7284 | dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed); | 7284 | dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); |
7285 | 7285 | ||
7286 | /* | 7286 | /* |
7287 | * Don't tell them about moving exiting tasks or | 7287 | * Don't tell them about moving exiting tasks or |
@@ -7310,7 +7310,7 @@ move: | |||
7310 | */ | 7310 | */ |
7311 | static void migrate_nr_uninterruptible(struct rq *rq_src) | 7311 | static void migrate_nr_uninterruptible(struct rq *rq_src) |
7312 | { | 7312 | { |
7313 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask)); | 7313 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); |
7314 | unsigned long flags; | 7314 | unsigned long flags; |
7315 | 7315 | ||
7316 | local_irq_save(flags); | 7316 | local_irq_save(flags); |
@@ -7564,7 +7564,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu) | |||
7564 | static struct ctl_table_header *sd_sysctl_header; | 7564 | static struct ctl_table_header *sd_sysctl_header; |
7565 | static void register_sched_domain_sysctl(void) | 7565 | static void register_sched_domain_sysctl(void) |
7566 | { | 7566 | { |
7567 | int i, cpu_num = num_online_cpus(); | 7567 | int i, cpu_num = num_possible_cpus(); |
7568 | struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); | 7568 | struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); |
7569 | char buf[32]; | 7569 | char buf[32]; |
7570 | 7570 | ||
@@ -7574,7 +7574,7 @@ static void register_sched_domain_sysctl(void) | |||
7574 | if (entry == NULL) | 7574 | if (entry == NULL) |
7575 | return; | 7575 | return; |
7576 | 7576 | ||
7577 | for_each_online_cpu(i) { | 7577 | for_each_possible_cpu(i) { |
7578 | snprintf(buf, 32, "cpu%d", i); | 7578 | snprintf(buf, 32, "cpu%d", i); |
7579 | entry->procname = kstrdup(buf, GFP_KERNEL); | 7579 | entry->procname = kstrdup(buf, GFP_KERNEL); |
7580 | entry->mode = 0555; | 7580 | entry->mode = 0555; |
@@ -9100,7 +9100,7 @@ match1: | |||
9100 | if (doms_new == NULL) { | 9100 | if (doms_new == NULL) { |
9101 | ndoms_cur = 0; | 9101 | ndoms_cur = 0; |
9102 | doms_new = &fallback_doms; | 9102 | doms_new = &fallback_doms; |
9103 | cpumask_andnot(doms_new[0], cpu_online_mask, cpu_isolated_map); | 9103 | cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map); |
9104 | WARN_ON_ONCE(dattr_new); | 9104 | WARN_ON_ONCE(dattr_new); |
9105 | } | 9105 | } |
9106 | 9106 | ||
@@ -9231,8 +9231,10 @@ static int update_sched_domains(struct notifier_block *nfb, | |||
9231 | switch (action) { | 9231 | switch (action) { |
9232 | case CPU_ONLINE: | 9232 | case CPU_ONLINE: |
9233 | case CPU_ONLINE_FROZEN: | 9233 | case CPU_ONLINE_FROZEN: |
9234 | case CPU_DEAD: | 9234 | case CPU_DOWN_PREPARE: |
9235 | case CPU_DEAD_FROZEN: | 9235 | case CPU_DOWN_PREPARE_FROZEN: |
9236 | case CPU_DOWN_FAILED: | ||
9237 | case CPU_DOWN_FAILED_FROZEN: | ||
9236 | partition_sched_domains(1, NULL, NULL); | 9238 | partition_sched_domains(1, NULL, NULL); |
9237 | return NOTIFY_OK; | 9239 | return NOTIFY_OK; |
9238 | 9240 | ||
@@ -9279,7 +9281,7 @@ void __init sched_init_smp(void) | |||
9279 | #endif | 9281 | #endif |
9280 | get_online_cpus(); | 9282 | get_online_cpus(); |
9281 | mutex_lock(&sched_domains_mutex); | 9283 | mutex_lock(&sched_domains_mutex); |
9282 | arch_init_sched_domains(cpu_online_mask); | 9284 | arch_init_sched_domains(cpu_active_mask); |
9283 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); | 9285 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); |
9284 | if (cpumask_empty(non_isolated_cpus)) | 9286 | if (cpumask_empty(non_isolated_cpus)) |
9285 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); | 9287 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); |