aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-11-25 07:31:39 -0500
committerIngo Molnar <mingo@elte.hu>2009-12-06 15:10:56 -0500
commit6ad4c18884e864cf4c77f9074d3d1816063f99cd (patch)
treef09643f6148b576fa2d23bf7d4b37d082d94e267 /kernel/cpuset.c
parente1b8090bdf125f8b2e192149547fead7f302a89c (diff)
sched: Fix balance vs hotplug race
Since (e761b77: cpu hotplug, sched: Introduce cpu_active_map and redo sched domain managment) we have cpu_active_mask which is suppose to rule scheduler migration and load-balancing, except it never (fully) did. The particular problem being solved here is a crash in try_to_wake_up() where select_task_rq() ends up selecting an offline cpu because select_task_rq_fair() trusts the sched_domain tree to reflect the current state of affairs, similarly select_task_rq_rt() trusts the root_domain. However, the sched_domains are updated from CPU_DEAD, which is after the cpu is taken offline and after stop_machine is done. Therefore it can race perfectly well with code assuming the domains are right. Cure this by building the domains from cpu_active_mask on CPU_DOWN_PREPARE. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c16
1 files changed, 9 insertions, 7 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 43fb7e800028..ba401fab459f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -872,7 +872,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
872 if (retval < 0) 872 if (retval < 0)
873 return retval; 873 return retval;
874 874
875 if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask)) 875 if (!cpumask_subset(trialcs->cpus_allowed, cpu_active_mask))
876 return -EINVAL; 876 return -EINVAL;
877 } 877 }
878 retval = validate_change(cs, trialcs); 878 retval = validate_change(cs, trialcs);
@@ -2010,7 +2010,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2010 } 2010 }
2011 2011
2012 /* Continue past cpusets with all cpus, mems online */ 2012 /* Continue past cpusets with all cpus, mems online */
2013 if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) && 2013 if (cpumask_subset(cp->cpus_allowed, cpu_active_mask) &&
2014 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) 2014 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
2015 continue; 2015 continue;
2016 2016
@@ -2019,7 +2019,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2019 /* Remove offline cpus and mems from this cpuset. */ 2019 /* Remove offline cpus and mems from this cpuset. */
2020 mutex_lock(&callback_mutex); 2020 mutex_lock(&callback_mutex);
2021 cpumask_and(cp->cpus_allowed, cp->cpus_allowed, 2021 cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
2022 cpu_online_mask); 2022 cpu_active_mask);
2023 nodes_and(cp->mems_allowed, cp->mems_allowed, 2023 nodes_and(cp->mems_allowed, cp->mems_allowed,
2024 node_states[N_HIGH_MEMORY]); 2024 node_states[N_HIGH_MEMORY]);
2025 mutex_unlock(&callback_mutex); 2025 mutex_unlock(&callback_mutex);
@@ -2057,8 +2057,10 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
2057 switch (phase) { 2057 switch (phase) {
2058 case CPU_ONLINE: 2058 case CPU_ONLINE:
2059 case CPU_ONLINE_FROZEN: 2059 case CPU_ONLINE_FROZEN:
2060 case CPU_DEAD: 2060 case CPU_DOWN_PREPARE:
2061 case CPU_DEAD_FROZEN: 2061 case CPU_DOWN_PREPARE_FROZEN:
2062 case CPU_DOWN_FAILED:
2063 case CPU_DOWN_FAILED_FROZEN:
2062 break; 2064 break;
2063 2065
2064 default: 2066 default:
@@ -2067,7 +2069,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
2067 2069
2068 cgroup_lock(); 2070 cgroup_lock();
2069 mutex_lock(&callback_mutex); 2071 mutex_lock(&callback_mutex);
2070 cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); 2072 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
2071 mutex_unlock(&callback_mutex); 2073 mutex_unlock(&callback_mutex);
2072 scan_for_empty_cpusets(&top_cpuset); 2074 scan_for_empty_cpusets(&top_cpuset);
2073 ndoms = generate_sched_domains(&doms, &attr); 2075 ndoms = generate_sched_domains(&doms, &attr);
@@ -2114,7 +2116,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
2114 2116
2115void __init cpuset_init_smp(void) 2117void __init cpuset_init_smp(void)
2116{ 2118{
2117 cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); 2119 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
2118 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 2120 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2119 2121
2120 hotcpu_notifier(cpuset_track_online_cpus, 0); 2122 hotcpu_notifier(cpuset_track_online_cpus, 0);