sched: Fix balance vs hotplug race

Since (e761b77: cpu hotplug, sched: Introduce cpu_active_map and redo sched domain managment) we have cpu_active_mask which is suppose to rule scheduler migration and load-balancing, except it never (fully) did. The particular problem being solved here is a crash in try_to_wake_up() where select_task_rq() ends up selecting an offline cpu because select_task_rq_fair() trusts the sched_domain tree to reflect the current state of affairs, similarly select_task_rq_rt() trusts the root_domain. However, the sched_domains are updated from CPU_DEAD, which is after the cpu is taken offline and after stop_machine is done. Therefore it can race perfectly well with code assuming the domains are right. Cure this by building the domains from cpu_active_mask on CPU_DOWN_PREPARE. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Peter Zijlstra <a.p.zijlstra@chello.nl> 2009-11-25 07:31:39 -0500
committer: Ingo Molnar <mingo@elte.hu> 2009-12-06 15:10:56 -0500
commit: 6ad4c18884e864cf4c77f9074d3d1816063f99cd (patch)
tree: f09643f6148b576fa2d23bf7d4b37d082d94e267 /kernel/cpu.c
parent: e1b8090bdf125f8b2e192149547fead7f302a89c (diff)
1 files changed, 13 insertions, 5 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6ba0f1ecb212..b21688640377 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -212,6 +212,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
        err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
                                        hcpu, -1, &nr_calls);
        if (err == NOTIFY_BAD) {
+                set_cpu_active(cpu, true);
                nr_calls--;
                __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
                                          hcpu, nr_calls, NULL);
@@ -223,11 +225,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
        /* Ensure that we are not runnable on dying cpu */
        cpumask_copy(old_allowed, &current->cpus_allowed);
-        set_cpus_allowed_ptr(current,
+        set_cpus_allowed_ptr(current, cpu_active_mask);
-                             cpumask_of(cpumask_any_but(cpu_online_mask, cpu)));
        err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
        if (err) {
+                set_cpu_active(cpu, true);
                /* CPU didn't die: tell everyone.  Can't complain. */
                if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
                                            hcpu) == NOTIFY_BAD)
@@ -292,9 +294,6 @@ int __ref cpu_down(unsigned int cpu)
        err = _cpu_down(cpu, 0);
-        if (cpu_online(cpu))
-                set_cpu_active(cpu, true);
 out:
        cpu_maps_update_done();
        stop_machine_destroy();
@@ -387,6 +386,15 @@ int disable_nonboot_cpus(void)
         * with the userspace trying to use the CPU hotplug at the same time
         */
        cpumask_clear(frozen_cpus);
+        for_each_online_cpu(cpu) {
+                if (cpu == first_cpu)
+                        continue;
+                set_cpu_active(cpu, false);
+        }
+        synchronize_sched();
        printk("Disabling non-boot CPUs ...\n");
        for_each_online_cpu(cpu) {
                if (cpu == first_cpu)
author	Peter Zijlstra <a.p.zijlstra@chello.nl>	2009-11-25 07:31:39 -0500
committer	Ingo Molnar <mingo@elte.hu>	2009-12-06 15:10:56 -0500
commit	6ad4c18884e864cf4c77f9074d3d1816063f99cd (patch)
tree	f09643f6148b576fa2d23bf7d4b37d082d94e267 /kernel/cpu.c
parent	e1b8090bdf125f8b2e192149547fead7f302a89c (diff)