aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2012-03-20 10:57:01 -0400
committerIngo Molnar <mingo@kernel.org>2012-03-27 08:50:14 -0400
commit2baab4e90495ebc9826c93f79d74d6e60a828d24 (patch)
tree6e6316694a9bc69e517e50b0d01043c4c9e83888 /kernel
parentbc758133ed73d4b06952bec21da23e28e62bf3ba (diff)
sched: Fix select_fallback_rq() vs cpu_active/cpu_online
Commit 5fbd036b55 ("sched: Cleanup cpu_active madness"), which was supposed to finally sort the cpu_active mess, instead uncovered more. Since CPU_STARTING is ran before setting the cpu online, there's a (small) window where the cpu has active,!online. If during this time there's a wakeup of a task that used to reside on that cpu select_task_rq() will use select_fallback_rq() to compute an alternative cpu to run on since we find !online. select_fallback_rq() however will compute the new cpu against cpu_active, this means that it can return the same cpu it started out with, the !online one, since that cpu is in fact marked active. This results in us trying to scheduling a task on an offline cpu and triggering a WARN in the IPI code. The solution proposed by Chuansheng Liu of setting cpu_active in set_cpu_online() is buggy, firstly not all archs actually use set_cpu_online(), secondly, not all archs call set_cpu_online() with IRQs disabled, this means we would introduce either the same race or the race from fd8a7de17 ("x86: cpu-hotplug: Prevent softirq wakeup on wrong CPU") -- albeit much narrower. [ By setting online first and active later we have a window of online,!active, fresh and bound kthreads have task_cpu() of 0 and since cpu0 isn't in tsk_cpus_allowed() we end up in select_fallback_rq() which excludes !active, resulting in a reset of ->cpus_allowed and the thread running all over the place. ] The solution is to re-work select_fallback_rq() to require active _and_ online. This makes the active,!online case work as expected, OTOH archs running CPU_STARTING after setting online are now vulnerable to the issue from fd8a7de17 -- these are alpha and blackfin. Reported-by: Chuansheng Liu <chuansheng.liu@intel.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Frysinger <vapier@gentoo.org> Cc: linux-alpha@vger.kernel.org Link: http://lkml.kernel.org/n/tip-hubqk1i10o4dpvlm06gq7v6j@git.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c20
-rw-r--r--kernel/sched/core.c62
2 files changed, 50 insertions, 32 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index a09ac2b9a661..c9837b74ab96 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2195,7 +2195,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
2195 mutex_unlock(&callback_mutex); 2195 mutex_unlock(&callback_mutex);
2196} 2196}
2197 2197
2198int cpuset_cpus_allowed_fallback(struct task_struct *tsk) 2198void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
2199{ 2199{
2200 const struct cpuset *cs; 2200 const struct cpuset *cs;
2201 int cpu; 2201 int cpu;
@@ -2219,22 +2219,10 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
2219 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary 2219 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
2220 * set any mask even if it is not right from task_cs() pov, 2220 * set any mask even if it is not right from task_cs() pov,
2221 * the pending set_cpus_allowed_ptr() will fix things. 2221 * the pending set_cpus_allowed_ptr() will fix things.
2222 *
2223 * select_fallback_rq() will fix things ups and set cpu_possible_mask
2224 * if required.
2222 */ 2225 */
2223
2224 cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask);
2225 if (cpu >= nr_cpu_ids) {
2226 /*
2227 * Either tsk->cpus_allowed is wrong (see above) or it
2228 * is actually empty. The latter case is only possible
2229 * if we are racing with remove_tasks_in_empty_cpuset().
2230 * Like above we can temporary set any mask and rely on
2231 * set_cpus_allowed_ptr() as synchronization point.
2232 */
2233 do_set_cpus_allowed(tsk, cpu_possible_mask);
2234 cpu = cpumask_any(cpu_active_mask);
2235 }
2236
2237 return cpu;
2238} 2226}
2239 2227
2240void cpuset_init_current_mems_allowed(void) 2228void cpuset_init_current_mems_allowed(void)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e3ccc13c4caa..9c1629c90b2d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1263,29 +1263,59 @@ EXPORT_SYMBOL_GPL(kick_process);
1263 */ 1263 */
1264static int select_fallback_rq(int cpu, struct task_struct *p) 1264static int select_fallback_rq(int cpu, struct task_struct *p)
1265{ 1265{
1266 int dest_cpu;
1267 const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); 1266 const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
1267 enum { cpuset, possible, fail } state = cpuset;
1268 int dest_cpu;
1268 1269
1269 /* Look for allowed, online CPU in same node. */ 1270 /* Look for allowed, online CPU in same node. */
1270 for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) 1271 for_each_cpu_mask(dest_cpu, *nodemask) {
1272 if (!cpu_online(dest_cpu))
1273 continue;
1274 if (!cpu_active(dest_cpu))
1275 continue;
1271 if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) 1276 if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
1272 return dest_cpu; 1277 return dest_cpu;
1278 }
1273 1279
1274 /* Any allowed, online CPU? */ 1280 for (;;) {
1275 dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask); 1281 /* Any allowed, online CPU? */
1276 if (dest_cpu < nr_cpu_ids) 1282 for_each_cpu_mask(dest_cpu, *tsk_cpus_allowed(p)) {
1277 return dest_cpu; 1283 if (!cpu_online(dest_cpu))
1284 continue;
1285 if (!cpu_active(dest_cpu))
1286 continue;
1287 goto out;
1288 }
1278 1289
1279 /* No more Mr. Nice Guy. */ 1290 switch (state) {
1280 dest_cpu = cpuset_cpus_allowed_fallback(p); 1291 case cpuset:
1281 /* 1292 /* No more Mr. Nice Guy. */
1282 * Don't tell them about moving exiting tasks or 1293 cpuset_cpus_allowed_fallback(p);
1283 * kernel threads (both mm NULL), since they never 1294 state = possible;
1284 * leave kernel. 1295 break;
1285 */ 1296
1286 if (p->mm && printk_ratelimit()) { 1297 case possible:
1287 printk_sched("process %d (%s) no longer affine to cpu%d\n", 1298 do_set_cpus_allowed(p, cpu_possible_mask);
1288 task_pid_nr(p), p->comm, cpu); 1299 state = fail;
1300 break;
1301
1302 case fail:
1303 BUG();
1304 break;
1305 }
1306 }
1307
1308out:
1309 if (state != cpuset) {
1310 /*
1311 * Don't tell them about moving exiting tasks or
1312 * kernel threads (both mm NULL), since they never
1313 * leave kernel.
1314 */
1315 if (p->mm && printk_ratelimit()) {
1316 printk_sched("process %d (%s) no longer affine to cpu%d\n",
1317 task_pid_nr(p), p->comm, cpu);
1318 }
1289 } 1319 }
1290 1320
1291 return dest_cpu; 1321 return dest_cpu;