diff options
author | Tejun Heo <tj@kernel.org> | 2010-06-08 15:40:36 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2010-06-08 15:40:36 -0400 |
commit | 3a101d0548e925ab16ca6aaa8cf4f767d322ddb0 (patch) | |
tree | b90d8c5f2efe30fcfa49a00fdea037567c6cd46f /kernel/cpuset.c | |
parent | 50a323b73069b169385a8ac65633dee837a7d13f (diff) |
sched: adjust when cpu_active and cpuset configurations are updated during cpu on/offlining
Currently, when a cpu goes down, cpu_active is cleared before
CPU_DOWN_PREPARE starts and cpuset configuration is updated from a
default priority cpu notifier. When a cpu is coming up, it's set
before CPU_ONLINE but cpuset configuration again is updated from the
same cpu notifier.
For cpu notifiers, this presents an inconsistent state. Threads which
a CPU_DOWN_PREPARE notifier expects to be bound to the CPU can be
migrated to other cpus because the cpu is no more inactive.
Fix it by updating cpu_active in the highest priority cpu notifier and
cpuset configuration in the second highest when a cpu is coming up.
Down path is updated similarly. This guarantees that all other cpu
notifiers see consistent cpu_active and cpuset configuration.
cpuset_track_online_cpus() notifier is converted to
cpuset_update_active_cpus() which just updates the configuration and
now called from cpuset_cpu_[in]active() notifiers registered from
sched_init_smp(). If cpuset is disabled, cpuset_update_active_cpus()
degenerates into partition_sched_domains() making separate notifier
for !CONFIG_CPUSETS unnecessary.
This problem is triggered by cmwq. During CPU_DOWN_PREPARE, hotplug
callback creates a kthread and kthread_bind()s it to the target cpu,
and the thread is expected to run on that cpu.
* Ingo's test discovered __cpuinit/exit markups were incorrect.
Fixed.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Menage <menage@google.com>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 21 |
1 files changed, 2 insertions, 19 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 02b9611eadde..05727dcaa80d 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -2113,31 +2113,17 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
2113 | * but making no active use of cpusets. | 2113 | * but making no active use of cpusets. |
2114 | * | 2114 | * |
2115 | * This routine ensures that top_cpuset.cpus_allowed tracks | 2115 | * This routine ensures that top_cpuset.cpus_allowed tracks |
2116 | * cpu_online_map on each CPU hotplug (cpuhp) event. | 2116 | * cpu_active_mask on each CPU hotplug (cpuhp) event. |
2117 | * | 2117 | * |
2118 | * Called within get_online_cpus(). Needs to call cgroup_lock() | 2118 | * Called within get_online_cpus(). Needs to call cgroup_lock() |
2119 | * before calling generate_sched_domains(). | 2119 | * before calling generate_sched_domains(). |
2120 | */ | 2120 | */ |
2121 | static int cpuset_track_online_cpus(struct notifier_block *unused_nb, | 2121 | void __cpuexit cpuset_update_active_cpus(void) |
2122 | unsigned long phase, void *unused_cpu) | ||
2123 | { | 2122 | { |
2124 | struct sched_domain_attr *attr; | 2123 | struct sched_domain_attr *attr; |
2125 | cpumask_var_t *doms; | 2124 | cpumask_var_t *doms; |
2126 | int ndoms; | 2125 | int ndoms; |
2127 | 2126 | ||
2128 | switch (phase) { | ||
2129 | case CPU_ONLINE: | ||
2130 | case CPU_ONLINE_FROZEN: | ||
2131 | case CPU_DOWN_PREPARE: | ||
2132 | case CPU_DOWN_PREPARE_FROZEN: | ||
2133 | case CPU_DOWN_FAILED: | ||
2134 | case CPU_DOWN_FAILED_FROZEN: | ||
2135 | break; | ||
2136 | |||
2137 | default: | ||
2138 | return NOTIFY_DONE; | ||
2139 | } | ||
2140 | |||
2141 | cgroup_lock(); | 2127 | cgroup_lock(); |
2142 | mutex_lock(&callback_mutex); | 2128 | mutex_lock(&callback_mutex); |
2143 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); | 2129 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); |
@@ -2148,8 +2134,6 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, | |||
2148 | 2134 | ||
2149 | /* Have scheduler rebuild the domains */ | 2135 | /* Have scheduler rebuild the domains */ |
2150 | partition_sched_domains(ndoms, doms, attr); | 2136 | partition_sched_domains(ndoms, doms, attr); |
2151 | |||
2152 | return NOTIFY_OK; | ||
2153 | } | 2137 | } |
2154 | 2138 | ||
2155 | #ifdef CONFIG_MEMORY_HOTPLUG | 2139 | #ifdef CONFIG_MEMORY_HOTPLUG |
@@ -2203,7 +2187,6 @@ void __init cpuset_init_smp(void) | |||
2203 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); | 2187 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); |
2204 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; | 2188 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; |
2205 | 2189 | ||
2206 | hotcpu_notifier(cpuset_track_online_cpus, 0); | ||
2207 | hotplug_memory_notifier(cpuset_track_online_nodes, 10); | 2190 | hotplug_memory_notifier(cpuset_track_online_nodes, 10); |
2208 | 2191 | ||
2209 | cpuset_wq = create_singlethread_workqueue("cpuset"); | 2192 | cpuset_wq = create_singlethread_workqueue("cpuset"); |