aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorGregory Haskins <ghaskins@novell.com>2008-12-29 09:39:53 -0500
committerGregory Haskins <ghaskins@novell.com>2008-12-29 09:39:53 -0500
commit1563513d34ed4b12ef32bc2adde4a53ce05701a1 (patch)
treec8937f0dfd7998f3c9316beadc31c2b437b21157 /kernel
parent917b627d4d981dc614519d7b34ea31a976b14e12 (diff)
RT: fix push_rt_task() to handle dequeue_pushable properly
A panic was discovered by Chirag Jog where a BUG_ON sanity check in the new "pushable_task" logic would trigger a panic under certain circumstances: http://lkml.org/lkml/2008/9/25/189 Gilles Carry discovered that the root cause was attributed to the pushable_tasks list getting corrupted in the push_rt_task logic. This was the result of a dropped rq lock in double_lock_balance allowing a task in the process of being pushed to potentially migrate away, and thus corrupt the pushable_tasks() list. I traced back the problem as introduced by the pushable_tasks patch that went in recently. There is a "retry" path in push_rt_task() that actually had a compound conditional to decide whether to retry or exit. I missed the meaning behind the rationale for the virtual "if(!task) goto out;" portion of the compound statement and thus did not handle it properly. The new pushable_tasks logic actually creates three distinct conditions: 1) an untouched and unpushable task should be dequeued 2) a migrated task where more pushable tasks remain should be retried 3) a migrated task where no more pushable tasks exist should exit The original logic mushed (1) and (3) together, resulting in the system dequeuing a migrated task (against an unlocked foreign run-queue nonetheless). To fix this, we get rid of the notion of "paranoid" and we support the three unique conditions properly. The paranoid feature is no longer relevant with the new pushable logic (since pushable naturally limits the loop) anyway, so lets just remove it. Reported-By: Chirag Jog <chirag@linux.vnet.ibm.com> Found-by: Gilles Carry <gilles.carry@bull.net> Signed-off-by: Gregory Haskins <ghaskins@novell.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched_rt.c34
1 files changed, 22 insertions, 12 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index fe9da6084c87..64a8f0aa117b 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1192,7 +1192,6 @@ static int push_rt_task(struct rq *rq)
1192{ 1192{
1193 struct task_struct *next_task; 1193 struct task_struct *next_task;
1194 struct rq *lowest_rq; 1194 struct rq *lowest_rq;
1195 int paranoid = RT_MAX_TRIES;
1196 1195
1197 if (!rq->rt.overloaded) 1196 if (!rq->rt.overloaded)
1198 return 0; 1197 return 0;
@@ -1226,23 +1225,34 @@ static int push_rt_task(struct rq *rq)
1226 struct task_struct *task; 1225 struct task_struct *task;
1227 /* 1226 /*
1228 * find lock_lowest_rq releases rq->lock 1227 * find lock_lowest_rq releases rq->lock
1229 * so it is possible that next_task has changed. 1228 * so it is possible that next_task has migrated.
1230 * If it has, then try again. 1229 *
1230 * We need to make sure that the task is still on the same
1231 * run-queue and is also still the next task eligible for
1232 * pushing.
1231 */ 1233 */
1232 task = pick_next_pushable_task(rq); 1234 task = pick_next_pushable_task(rq);
1233 if (unlikely(task != next_task) && task && paranoid--) { 1235 if (task_cpu(next_task) == rq->cpu && task == next_task) {
1234 put_task_struct(next_task); 1236 /*
1235 next_task = task; 1237 * If we get here, the task hasnt moved at all, but
1236 goto retry; 1238 * it has failed to push. We will not try again,
1239 * since the other cpus will pull from us when they
1240 * are ready.
1241 */
1242 dequeue_pushable_task(rq, next_task);
1243 goto out;
1237 } 1244 }
1238 1245
1246 if (!task)
1247 /* No more tasks, just exit */
1248 goto out;
1249
1239 /* 1250 /*
1240 * Once we have failed to push this task, we will not 1251 * Something has shifted, try again.
1241 * try again, since the other cpus will pull from us
1242 * when they are ready
1243 */ 1252 */
1244 dequeue_pushable_task(rq, next_task); 1253 put_task_struct(next_task);
1245 goto out; 1254 next_task = task;
1255 goto retry;
1246 } 1256 }
1247 1257
1248 deactivate_task(rq, next_task, 0); 1258 deactivate_task(rq, next_task, 0);