aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2017-10-09 11:04:13 -0400
committerTejun Heo <tj@kernel.org>2017-10-10 10:13:57 -0400
commit692b48258dda7c302e777d7d5f4217244478f1f6 (patch)
tree8eeb2b93ef9002c4f8b5c8a46fd87020dcd1bfd8
parent47684e111f52fface17820d3ef84cc845b26070e (diff)
workqueue: replace pool->manager_arb mutex with a flag
Josef reported a HARDIRQ-safe -> HARDIRQ-unsafe lock order detected by lockdep: [ 1270.472259] WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected [ 1270.472783] 4.14.0-rc1-xfstests-12888-g76833e8 #110 Not tainted [ 1270.473240] ----------------------------------------------------- [ 1270.473710] kworker/u5:2/5157 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: [ 1270.474239] (&(&lock->wait_lock)->rlock){+.+.}, at: [<ffffffff8da253d2>] __mutex_unlock_slowpath+0xa2/0x280 [ 1270.474994] [ 1270.474994] and this task is already holding: [ 1270.475440] (&pool->lock/1){-.-.}, at: [<ffffffff8d2992f6>] worker_thread+0x366/0x3c0 [ 1270.476046] which would create a new lock dependency: [ 1270.476436] (&pool->lock/1){-.-.} -> (&(&lock->wait_lock)->rlock){+.+.} [ 1270.476949] [ 1270.476949] but this new dependency connects a HARDIRQ-irq-safe lock: [ 1270.477553] (&pool->lock/1){-.-.} ... [ 1270.488900] to a HARDIRQ-irq-unsafe lock: [ 1270.489327] (&(&lock->wait_lock)->rlock){+.+.} ... [ 1270.494735] Possible interrupt unsafe locking scenario: [ 1270.494735] [ 1270.495250] CPU0 CPU1 [ 1270.495600] ---- ---- [ 1270.495947] lock(&(&lock->wait_lock)->rlock); [ 1270.496295] local_irq_disable(); [ 1270.496753] lock(&pool->lock/1); [ 1270.497205] lock(&(&lock->wait_lock)->rlock); [ 1270.497744] <Interrupt> [ 1270.497948] lock(&pool->lock/1); , which will cause a irq inversion deadlock if the above lock scenario happens. The root cause of this safe -> unsafe lock order is the mutex_unlock(pool->manager_arb) in manage_workers() with pool->lock held. Unlocking mutex while holding an irq spinlock was never safe and this problem has been around forever but it never got noticed because the only time the mutex is usually trylocked while holding irqlock making actual failures very unlikely and lockdep annotation missed the condition until the recent b9c16a0e1f73 ("locking/mutex: Fix lockdep_assert_held() fail"). Using mutex for pool->manager_arb has always been a bit of stretch. It primarily is an mechanism to arbitrate managership between workers which can easily be done with a pool flag. The only reason it became a mutex is that pool destruction path wants to exclude parallel managing operations. This patch replaces the mutex with a new pool flag POOL_MANAGER_ACTIVE and make the destruction path wait for the current manager on a wait queue. v2: Drop unnecessary flag clearing before pool destruction as suggested by Boqun. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Josef Bacik <josef@toxicpanda.com> Reviewed-by: Lai Jiangshan <jiangshanlai@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: stable@vger.kernel.org
-rw-r--r--kernel/workqueue.c37
1 files changed, 15 insertions, 22 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 64d0edf428f8..a2dccfe1acec 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -68,6 +68,7 @@ enum {
68 * attach_mutex to avoid changing binding state while 68 * attach_mutex to avoid changing binding state while
69 * worker_attach_to_pool() is in progress. 69 * worker_attach_to_pool() is in progress.
70 */ 70 */
71 POOL_MANAGER_ACTIVE = 1 << 0, /* being managed */
71 POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ 72 POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
72 73
73 /* worker flags */ 74 /* worker flags */
@@ -165,7 +166,6 @@ struct worker_pool {
165 /* L: hash of busy workers */ 166 /* L: hash of busy workers */
166 167
167 /* see manage_workers() for details on the two manager mutexes */ 168 /* see manage_workers() for details on the two manager mutexes */
168 struct mutex manager_arb; /* manager arbitration */
169 struct worker *manager; /* L: purely informational */ 169 struct worker *manager; /* L: purely informational */
170 struct mutex attach_mutex; /* attach/detach exclusion */ 170 struct mutex attach_mutex; /* attach/detach exclusion */
171 struct list_head workers; /* A: attached workers */ 171 struct list_head workers; /* A: attached workers */
@@ -299,6 +299,7 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
299 299
300static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */ 300static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
301static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ 301static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
302static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
302 303
303static LIST_HEAD(workqueues); /* PR: list of all workqueues */ 304static LIST_HEAD(workqueues); /* PR: list of all workqueues */
304static bool workqueue_freezing; /* PL: have wqs started freezing? */ 305static bool workqueue_freezing; /* PL: have wqs started freezing? */
@@ -801,7 +802,7 @@ static bool need_to_create_worker(struct worker_pool *pool)
801/* Do we have too many workers and should some go away? */ 802/* Do we have too many workers and should some go away? */
802static bool too_many_workers(struct worker_pool *pool) 803static bool too_many_workers(struct worker_pool *pool)
803{ 804{
804 bool managing = mutex_is_locked(&pool->manager_arb); 805 bool managing = pool->flags & POOL_MANAGER_ACTIVE;
805 int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ 806 int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
806 int nr_busy = pool->nr_workers - nr_idle; 807 int nr_busy = pool->nr_workers - nr_idle;
807 808
@@ -1980,24 +1981,17 @@ static bool manage_workers(struct worker *worker)
1980{ 1981{
1981 struct worker_pool *pool = worker->pool; 1982 struct worker_pool *pool = worker->pool;
1982 1983
1983 /* 1984 if (pool->flags & POOL_MANAGER_ACTIVE)
1984 * Anyone who successfully grabs manager_arb wins the arbitration
1985 * and becomes the manager. mutex_trylock() on pool->manager_arb
1986 * failure while holding pool->lock reliably indicates that someone
1987 * else is managing the pool and the worker which failed trylock
1988 * can proceed to executing work items. This means that anyone
1989 * grabbing manager_arb is responsible for actually performing
1990 * manager duties. If manager_arb is grabbed and released without
1991 * actual management, the pool may stall indefinitely.
1992 */
1993 if (!mutex_trylock(&pool->manager_arb))
1994 return false; 1985 return false;
1986
1987 pool->flags |= POOL_MANAGER_ACTIVE;
1995 pool->manager = worker; 1988 pool->manager = worker;
1996 1989
1997 maybe_create_worker(pool); 1990 maybe_create_worker(pool);
1998 1991
1999 pool->manager = NULL; 1992 pool->manager = NULL;
2000 mutex_unlock(&pool->manager_arb); 1993 pool->flags &= ~POOL_MANAGER_ACTIVE;
1994 wake_up(&wq_manager_wait);
2001 return true; 1995 return true;
2002} 1996}
2003 1997
@@ -3248,7 +3242,6 @@ static int init_worker_pool(struct worker_pool *pool)
3248 setup_timer(&pool->mayday_timer, pool_mayday_timeout, 3242 setup_timer(&pool->mayday_timer, pool_mayday_timeout,
3249 (unsigned long)pool); 3243 (unsigned long)pool);
3250 3244
3251 mutex_init(&pool->manager_arb);
3252 mutex_init(&pool->attach_mutex); 3245 mutex_init(&pool->attach_mutex);
3253 INIT_LIST_HEAD(&pool->workers); 3246 INIT_LIST_HEAD(&pool->workers);
3254 3247
@@ -3318,13 +3311,15 @@ static void put_unbound_pool(struct worker_pool *pool)
3318 hash_del(&pool->hash_node); 3311 hash_del(&pool->hash_node);
3319 3312
3320 /* 3313 /*
3321 * Become the manager and destroy all workers. Grabbing 3314 * Become the manager and destroy all workers. This prevents
3322 * manager_arb prevents @pool's workers from blocking on 3315 * @pool's workers from blocking on attach_mutex. We're the last
3323 * attach_mutex. 3316 * manager and @pool gets freed with the flag set.
3324 */ 3317 */
3325 mutex_lock(&pool->manager_arb);
3326
3327 spin_lock_irq(&pool->lock); 3318 spin_lock_irq(&pool->lock);
3319 wait_event_lock_irq(wq_manager_wait,
3320 !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
3321 pool->flags |= POOL_MANAGER_ACTIVE;
3322
3328 while ((worker = first_idle_worker(pool))) 3323 while ((worker = first_idle_worker(pool)))
3329 destroy_worker(worker); 3324 destroy_worker(worker);
3330 WARN_ON(pool->nr_workers || pool->nr_idle); 3325 WARN_ON(pool->nr_workers || pool->nr_idle);
@@ -3338,8 +3333,6 @@ static void put_unbound_pool(struct worker_pool *pool)
3338 if (pool->detach_completion) 3333 if (pool->detach_completion)
3339 wait_for_completion(pool->detach_completion); 3334 wait_for_completion(pool->detach_completion);
3340 3335
3341 mutex_unlock(&pool->manager_arb);
3342
3343 /* shut down the timers */ 3336 /* shut down the timers */
3344 del_timer_sync(&pool->idle_timer); 3337 del_timer_sync(&pool->idle_timer);
3345 del_timer_sync(&pool->mayday_timer); 3338 del_timer_sync(&pool->mayday_timer);