aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-09-05 02:16:32 -0400
committerTejun Heo <tj@kernel.org>2012-09-05 19:10:15 -0400
commitec58815ab0409a921a7c9744eb4ca44866b14d71 (patch)
tree228f1fb9035cc0b3f60fc14707614305608c96d1 /kernel
parent90beca5de591e12482a812f23a7f10690962ed4a (diff)
workqueue: fix possible deadlock in idle worker rebinding
Currently, rebind_workers() and idle_worker_rebind() are two-way interlocked. rebind_workers() waits for idle workers to finish rebinding and rebound idle workers wait for rebind_workers() to finish rebinding busy workers before proceeding. Unfortunately, this isn't enough. The second wait from idle workers is implemented as follows. wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND)); rebind_workers() clears WORKER_REBIND, wakes up the idle workers and then returns. If CPU hotplug cycle happens again before one of the idle workers finishes the above wait_event(), rebind_workers() will repeat the first part of the handshake - set WORKER_REBIND again and wait for the idle worker to finish rebinding - and this leads to deadlock because the idle worker would be waiting for WORKER_REBIND to clear. This is fixed by adding another interlocking step at the end - rebind_workers() now waits for all the idle workers to finish the above WORKER_REBIND wait before returning. This ensures that all rebinding steps are complete on all idle workers before the next hotplug cycle can happen. This problem was diagnosed by Lai Jiangshan who also posted a patch to fix the issue, upon which this patch is based. This is the minimal fix and further patches are scheduled for the next merge window to simplify the CPU hotplug path. Signed-off-by: Tejun Heo <tj@kernel.org> Original-patch-by: Lai Jiangshan <laijs@cn.fujitsu.com> LKML-Reference: <1346516916-1991-3-git-send-email-laijs@cn.fujitsu.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/workqueue.c29
1 files changed, 27 insertions, 2 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d79a18d0c42e..dc7b8458e275 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1326,6 +1326,15 @@ static void idle_worker_rebind(struct worker *worker)
1326 1326
1327 /* we did our part, wait for rebind_workers() to finish up */ 1327 /* we did our part, wait for rebind_workers() to finish up */
1328 wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND)); 1328 wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND));
1329
1330 /*
1331 * rebind_workers() shouldn't finish until all workers passed the
1332 * above WORKER_REBIND wait. Tell it when done.
1333 */
1334 spin_lock_irq(&worker->pool->gcwq->lock);
1335 if (!--worker->idle_rebind->cnt)
1336 complete(&worker->idle_rebind->done);
1337 spin_unlock_irq(&worker->pool->gcwq->lock);
1329} 1338}
1330 1339
1331/* 1340/*
@@ -1448,12 +1457,28 @@ retry:
1448 * be cleared inside idle_worker_rebind(). Clear and release. 1457 * be cleared inside idle_worker_rebind(). Clear and release.
1449 * Clearing %WORKER_REBIND from this foreign context is safe 1458 * Clearing %WORKER_REBIND from this foreign context is safe
1450 * because these workers are still guaranteed to be idle. 1459 * because these workers are still guaranteed to be idle.
1460 *
1461 * We need to make sure all idle workers passed WORKER_REBIND wait
1462 * in idle_worker_rebind() before returning; otherwise, workers can
1463 * get stuck at the wait if hotplug cycle repeats.
1451 */ 1464 */
1452 for_each_worker_pool(pool, gcwq) 1465 idle_rebind.cnt = 1;
1453 list_for_each_entry(worker, &pool->idle_list, entry) 1466 INIT_COMPLETION(idle_rebind.done);
1467
1468 for_each_worker_pool(pool, gcwq) {
1469 list_for_each_entry(worker, &pool->idle_list, entry) {
1454 worker->flags &= ~WORKER_REBIND; 1470 worker->flags &= ~WORKER_REBIND;
1471 idle_rebind.cnt++;
1472 }
1473 }
1455 1474
1456 wake_up_all(&gcwq->rebind_hold); 1475 wake_up_all(&gcwq->rebind_hold);
1476
1477 if (--idle_rebind.cnt) {
1478 spin_unlock_irq(&gcwq->lock);
1479 wait_for_completion(&idle_rebind.done);
1480 spin_lock_irq(&gcwq->lock);
1481 }
1457} 1482}
1458 1483
1459static struct worker *alloc_worker(void) 1484static struct worker *alloc_worker(void)