aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-10-23 11:24:52 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-10-23 11:24:52 -0400
commit06987dad0a563e406e7841df0f8759368523714f (patch)
tree5d3a6eb2b673df374c1227c2cd16e1d024c3039d
parent2f1b11c575acb2fc9e63b8ce11e4a3f6069ba6f6 (diff)
parent692b48258dda7c302e777d7d5f4217244478f1f6 (diff)
Merge branch 'for-4.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
Pull workqueue fix from Tejun Heo: "This is a fix for an old bug in workqueue. Workqueue used a mutex to arbitrate who gets to be the manager of a pool. When the manager role gets released, the mutex gets unlocked while holding the pool's irqsafe spinlock. This can lead to deadlocks as mutex's internal spinlock isn't irqsafe. This got discovered by recent fixes to mutex lockdep annotations. The fix is a bit invasive for rc6 but if anything were wrong with the fix it would likely have already blown up in -next, and we want the fix in -stable anyway" * 'for-4.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq: workqueue: replace pool->manager_arb mutex with a flag
-rw-r--r--kernel/workqueue.c37
1 files changed, 15 insertions, 22 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 64d0edf428f8..a2dccfe1acec 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -68,6 +68,7 @@ enum {
68 * attach_mutex to avoid changing binding state while 68 * attach_mutex to avoid changing binding state while
69 * worker_attach_to_pool() is in progress. 69 * worker_attach_to_pool() is in progress.
70 */ 70 */
71 POOL_MANAGER_ACTIVE = 1 << 0, /* being managed */
71 POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ 72 POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
72 73
73 /* worker flags */ 74 /* worker flags */
@@ -165,7 +166,6 @@ struct worker_pool {
165 /* L: hash of busy workers */ 166 /* L: hash of busy workers */
166 167
167 /* see manage_workers() for details on the two manager mutexes */ 168 /* see manage_workers() for details on the two manager mutexes */
168 struct mutex manager_arb; /* manager arbitration */
169 struct worker *manager; /* L: purely informational */ 169 struct worker *manager; /* L: purely informational */
170 struct mutex attach_mutex; /* attach/detach exclusion */ 170 struct mutex attach_mutex; /* attach/detach exclusion */
171 struct list_head workers; /* A: attached workers */ 171 struct list_head workers; /* A: attached workers */
@@ -299,6 +299,7 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
299 299
300static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */ 300static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
301static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ 301static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
302static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
302 303
303static LIST_HEAD(workqueues); /* PR: list of all workqueues */ 304static LIST_HEAD(workqueues); /* PR: list of all workqueues */
304static bool workqueue_freezing; /* PL: have wqs started freezing? */ 305static bool workqueue_freezing; /* PL: have wqs started freezing? */
@@ -801,7 +802,7 @@ static bool need_to_create_worker(struct worker_pool *pool)
801/* Do we have too many workers and should some go away? */ 802/* Do we have too many workers and should some go away? */
802static bool too_many_workers(struct worker_pool *pool) 803static bool too_many_workers(struct worker_pool *pool)
803{ 804{
804 bool managing = mutex_is_locked(&pool->manager_arb); 805 bool managing = pool->flags & POOL_MANAGER_ACTIVE;
805 int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ 806 int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
806 int nr_busy = pool->nr_workers - nr_idle; 807 int nr_busy = pool->nr_workers - nr_idle;
807 808
@@ -1980,24 +1981,17 @@ static bool manage_workers(struct worker *worker)
1980{ 1981{
1981 struct worker_pool *pool = worker->pool; 1982 struct worker_pool *pool = worker->pool;
1982 1983
1983 /* 1984 if (pool->flags & POOL_MANAGER_ACTIVE)
1984 * Anyone who successfully grabs manager_arb wins the arbitration
1985 * and becomes the manager. mutex_trylock() on pool->manager_arb
1986 * failure while holding pool->lock reliably indicates that someone
1987 * else is managing the pool and the worker which failed trylock
1988 * can proceed to executing work items. This means that anyone
1989 * grabbing manager_arb is responsible for actually performing
1990 * manager duties. If manager_arb is grabbed and released without
1991 * actual management, the pool may stall indefinitely.
1992 */
1993 if (!mutex_trylock(&pool->manager_arb))
1994 return false; 1985 return false;
1986
1987 pool->flags |= POOL_MANAGER_ACTIVE;
1995 pool->manager = worker; 1988 pool->manager = worker;
1996 1989
1997 maybe_create_worker(pool); 1990 maybe_create_worker(pool);
1998 1991
1999 pool->manager = NULL; 1992 pool->manager = NULL;
2000 mutex_unlock(&pool->manager_arb); 1993 pool->flags &= ~POOL_MANAGER_ACTIVE;
1994 wake_up(&wq_manager_wait);
2001 return true; 1995 return true;
2002} 1996}
2003 1997
@@ -3248,7 +3242,6 @@ static int init_worker_pool(struct worker_pool *pool)
3248 setup_timer(&pool->mayday_timer, pool_mayday_timeout, 3242 setup_timer(&pool->mayday_timer, pool_mayday_timeout,
3249 (unsigned long)pool); 3243 (unsigned long)pool);
3250 3244
3251 mutex_init(&pool->manager_arb);
3252 mutex_init(&pool->attach_mutex); 3245 mutex_init(&pool->attach_mutex);
3253 INIT_LIST_HEAD(&pool->workers); 3246 INIT_LIST_HEAD(&pool->workers);
3254 3247
@@ -3318,13 +3311,15 @@ static void put_unbound_pool(struct worker_pool *pool)
3318 hash_del(&pool->hash_node); 3311 hash_del(&pool->hash_node);
3319 3312
3320 /* 3313 /*
3321 * Become the manager and destroy all workers. Grabbing 3314 * Become the manager and destroy all workers. This prevents
3322 * manager_arb prevents @pool's workers from blocking on 3315 * @pool's workers from blocking on attach_mutex. We're the last
3323 * attach_mutex. 3316 * manager and @pool gets freed with the flag set.
3324 */ 3317 */
3325 mutex_lock(&pool->manager_arb);
3326
3327 spin_lock_irq(&pool->lock); 3318 spin_lock_irq(&pool->lock);
3319 wait_event_lock_irq(wq_manager_wait,
3320 !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
3321 pool->flags |= POOL_MANAGER_ACTIVE;
3322
3328 while ((worker = first_idle_worker(pool))) 3323 while ((worker = first_idle_worker(pool)))
3329 destroy_worker(worker); 3324 destroy_worker(worker);
3330 WARN_ON(pool->nr_workers || pool->nr_idle); 3325 WARN_ON(pool->nr_workers || pool->nr_idle);
@@ -3338,8 +3333,6 @@ static void put_unbound_pool(struct worker_pool *pool)
3338 if (pool->detach_completion) 3333 if (pool->detach_completion)
3339 wait_for_completion(pool->detach_completion); 3334 wait_for_completion(pool->detach_completion);
3340 3335
3341 mutex_unlock(&pool->manager_arb);
3342
3343 /* shut down the timers */ 3336 /* shut down the timers */
3344 del_timer_sync(&pool->idle_timer); 3337 del_timer_sync(&pool->idle_timer);
3345 del_timer_sync(&pool->mayday_timer); 3338 del_timer_sync(&pool->mayday_timer);