diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-11-25 07:31:39 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-12-06 15:10:56 -0500 |
commit | 6ad4c18884e864cf4c77f9074d3d1816063f99cd (patch) | |
tree | f09643f6148b576fa2d23bf7d4b37d082d94e267 /kernel/cpuset.c | |
parent | e1b8090bdf125f8b2e192149547fead7f302a89c (diff) |
sched: Fix balance vs hotplug race
Since (e761b77: cpu hotplug, sched: Introduce cpu_active_map and redo
sched domain managment) we have cpu_active_mask which is suppose to rule
scheduler migration and load-balancing, except it never (fully) did.
The particular problem being solved here is a crash in try_to_wake_up()
where select_task_rq() ends up selecting an offline cpu because
select_task_rq_fair() trusts the sched_domain tree to reflect the
current state of affairs, similarly select_task_rq_rt() trusts the
root_domain.
However, the sched_domains are updated from CPU_DEAD, which is after the
cpu is taken offline and after stop_machine is done. Therefore it can
race perfectly well with code assuming the domains are right.
Cure this by building the domains from cpu_active_mask on
CPU_DOWN_PREPARE.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 16 |
1 files changed, 9 insertions, 7 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 43fb7e800028..ba401fab459f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -872,7 +872,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, | |||
872 | if (retval < 0) | 872 | if (retval < 0) |
873 | return retval; | 873 | return retval; |
874 | 874 | ||
875 | if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask)) | 875 | if (!cpumask_subset(trialcs->cpus_allowed, cpu_active_mask)) |
876 | return -EINVAL; | 876 | return -EINVAL; |
877 | } | 877 | } |
878 | retval = validate_change(cs, trialcs); | 878 | retval = validate_change(cs, trialcs); |
@@ -2010,7 +2010,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
2010 | } | 2010 | } |
2011 | 2011 | ||
2012 | /* Continue past cpusets with all cpus, mems online */ | 2012 | /* Continue past cpusets with all cpus, mems online */ |
2013 | if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) && | 2013 | if (cpumask_subset(cp->cpus_allowed, cpu_active_mask) && |
2014 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) | 2014 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) |
2015 | continue; | 2015 | continue; |
2016 | 2016 | ||
@@ -2019,7 +2019,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
2019 | /* Remove offline cpus and mems from this cpuset. */ | 2019 | /* Remove offline cpus and mems from this cpuset. */ |
2020 | mutex_lock(&callback_mutex); | 2020 | mutex_lock(&callback_mutex); |
2021 | cpumask_and(cp->cpus_allowed, cp->cpus_allowed, | 2021 | cpumask_and(cp->cpus_allowed, cp->cpus_allowed, |
2022 | cpu_online_mask); | 2022 | cpu_active_mask); |
2023 | nodes_and(cp->mems_allowed, cp->mems_allowed, | 2023 | nodes_and(cp->mems_allowed, cp->mems_allowed, |
2024 | node_states[N_HIGH_MEMORY]); | 2024 | node_states[N_HIGH_MEMORY]); |
2025 | mutex_unlock(&callback_mutex); | 2025 | mutex_unlock(&callback_mutex); |
@@ -2057,8 +2057,10 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, | |||
2057 | switch (phase) { | 2057 | switch (phase) { |
2058 | case CPU_ONLINE: | 2058 | case CPU_ONLINE: |
2059 | case CPU_ONLINE_FROZEN: | 2059 | case CPU_ONLINE_FROZEN: |
2060 | case CPU_DEAD: | 2060 | case CPU_DOWN_PREPARE: |
2061 | case CPU_DEAD_FROZEN: | 2061 | case CPU_DOWN_PREPARE_FROZEN: |
2062 | case CPU_DOWN_FAILED: | ||
2063 | case CPU_DOWN_FAILED_FROZEN: | ||
2062 | break; | 2064 | break; |
2063 | 2065 | ||
2064 | default: | 2066 | default: |
@@ -2067,7 +2069,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, | |||
2067 | 2069 | ||
2068 | cgroup_lock(); | 2070 | cgroup_lock(); |
2069 | mutex_lock(&callback_mutex); | 2071 | mutex_lock(&callback_mutex); |
2070 | cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); | 2072 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); |
2071 | mutex_unlock(&callback_mutex); | 2073 | mutex_unlock(&callback_mutex); |
2072 | scan_for_empty_cpusets(&top_cpuset); | 2074 | scan_for_empty_cpusets(&top_cpuset); |
2073 | ndoms = generate_sched_domains(&doms, &attr); | 2075 | ndoms = generate_sched_domains(&doms, &attr); |
@@ -2114,7 +2116,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self, | |||
2114 | 2116 | ||
2115 | void __init cpuset_init_smp(void) | 2117 | void __init cpuset_init_smp(void) |
2116 | { | 2118 | { |
2117 | cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); | 2119 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); |
2118 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; | 2120 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; |
2119 | 2121 | ||
2120 | hotcpu_notifier(cpuset_track_online_cpus, 0); | 2122 | hotcpu_notifier(cpuset_track_online_cpus, 0); |