aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLai Jiangshan <laijs@cn.fujitsu.com>2013-03-08 18:18:28 -0500
committerTejun Heo <tj@kernel.org>2013-03-08 18:18:28 -0500
commiteb2834285cf172856cd12f66892fc7467935ebed (patch)
treebf0a3125e85b7acf2dc43e0d4f82b68459e77a36 /kernel
parent6dbe51c251a327e012439c4772097a13df43c5b8 (diff)
workqueue: fix possible pool stall bug in wq_unbind_fn()
Since multiple pools per cpu have been introduced, wq_unbind_fn() has a subtle bug which may theoretically stall work item processing. The problem is two-fold. * wq_unbind_fn() depends on the worker executing wq_unbind_fn() itself to start unbound chain execution, which works fine when there was only single pool. With multiple pools, only the pool which is running wq_unbind_fn() - the highpri one - is guaranteed to have such kick-off. The other pool could stall when its busy workers block. * The current code is setting WORKER_UNBIND / POOL_DISASSOCIATED of the two pools in succession without initiating work execution inbetween. Because setting the flags requires grabbing assoc_mutex which is held while new workers are created, this could lead to stalls if a pool's manager is waiting for the previous pool's work items to release memory. This is almost purely theoretical tho. Update wq_unbind_fn() such that it sets WORKER_UNBIND / POOL_DISASSOCIATED, goes over schedule() and explicitly kicks off execution for a pool and then moves on to the next one. tj: Updated comments and description. Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: Tejun Heo <tj@kernel.org> Cc: stable@vger.kernel.org
Diffstat (limited to 'kernel')
-rw-r--r--kernel/workqueue.c44
1 files changed, 25 insertions, 19 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 81f2457811eb..604801b91cba 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3446,28 +3446,34 @@ static void wq_unbind_fn(struct work_struct *work)
3446 3446
3447 spin_unlock_irq(&pool->lock); 3447 spin_unlock_irq(&pool->lock);
3448 mutex_unlock(&pool->assoc_mutex); 3448 mutex_unlock(&pool->assoc_mutex);
3449 }
3450 3449
3451 /* 3450 /*
3452 * Call schedule() so that we cross rq->lock and thus can guarantee 3451 * Call schedule() so that we cross rq->lock and thus can
3453 * sched callbacks see the %WORKER_UNBOUND flag. This is necessary 3452 * guarantee sched callbacks see the %WORKER_UNBOUND flag.
3454 * as scheduler callbacks may be invoked from other cpus. 3453 * This is necessary as scheduler callbacks may be invoked
3455 */ 3454 * from other cpus.
3456 schedule(); 3455 */
3456 schedule();
3457 3457
3458 /* 3458 /*
3459 * Sched callbacks are disabled now. Zap nr_running. After this, 3459 * Sched callbacks are disabled now. Zap nr_running.
3460 * nr_running stays zero and need_more_worker() and keep_working() 3460 * After this, nr_running stays zero and need_more_worker()
3461 * are always true as long as the worklist is not empty. Pools on 3461 * and keep_working() are always true as long as the
3462 * @cpu now behave as unbound (in terms of concurrency management) 3462 * worklist is not empty. This pool now behaves as an
3463 * pools which are served by workers tied to the CPU. 3463 * unbound (in terms of concurrency management) pool which
3464 * 3464 * are served by workers tied to the pool.
3465 * On return from this function, the current worker would trigger 3465 */
3466 * unbound chain execution of pending work items if other workers
3467 * didn't already.
3468 */
3469 for_each_std_worker_pool(pool, cpu)
3470 atomic_set(&pool->nr_running, 0); 3466 atomic_set(&pool->nr_running, 0);
3467
3468 /*
3469 * With concurrency management just turned off, a busy
3470 * worker blocking could lead to lengthy stalls. Kick off
3471 * unbound chain execution of currently pending work items.
3472 */
3473 spin_lock_irq(&pool->lock);
3474 wake_up_worker(pool);
3475 spin_unlock_irq(&pool->lock);
3476 }
3471} 3477}
3472 3478
3473/* 3479/*