1 files changed, 89 insertions, 21 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 692d97628a10..1e1373bcb3e3 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -66,6 +66,7 @@ enum {
        /* pool flags */
        POOL_MANAGE_WORKERS     = 1 << 0,       /* need to manage workers */
+        POOL_MANAGING_WORKERS   = 1 << 1,       /* managing workers */
        /* worker flags */
        WORKER_STARTED          = 1 << 0,       /* started */
@@ -652,7 +653,7 @@ static bool need_to_manage_workers(struct worker_pool *pool)
 /* Do we have too many workers and should some go away? */
 static bool too_many_workers(struct worker_pool *pool)
 {
-        bool managing = mutex_is_locked(&pool->manager_mutex);
+        bool managing = pool->flags & POOL_MANAGING_WORKERS;
        int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
        int nr_busy = pool->nr_workers - nr_idle;
@@ -1326,6 +1327,15 @@ static void idle_worker_rebind(struct worker *worker)
        /* we did our part, wait for rebind_workers() to finish up */
        wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND));
+        /*
+         * rebind_workers() shouldn't finish until all workers passed the
+         * above WORKER_REBIND wait.  Tell it when done.
+         */
+        spin_lock_irq(&worker->pool->gcwq->lock);
+        if (!--worker->idle_rebind->cnt)
+                complete(&worker->idle_rebind->done);
+        spin_unlock_irq(&worker->pool->gcwq->lock);
 }
 /*
@@ -1396,12 +1406,15 @@ retry:
        /* set REBIND and kick idle ones, we'll wait for these later */
        for_each_worker_pool(pool, gcwq) {
                list_for_each_entry(worker, &pool->idle_list, entry) {
+                        unsigned long worker_flags = worker->flags;
                        if (worker->flags & WORKER_REBIND)
                                continue;
-                        /* morph UNBOUND to REBIND */
+                        /* morph UNBOUND to REBIND atomically */
-                        worker->flags &= ~WORKER_UNBOUND;
+                        worker_flags &= ~WORKER_UNBOUND;
-                        worker->flags |= WORKER_REBIND;
+                        worker_flags |= WORKER_REBIND;
+                        ACCESS_ONCE(worker->flags) = worker_flags;
                        idle_rebind.cnt++;
                        worker->idle_rebind = &idle_rebind;
@@ -1419,25 +1432,15 @@ retry:
                goto retry;
        }
-        /*
+        /* all idle workers are rebound, rebind busy workers */
-         * All idle workers are rebound and waiting for %WORKER_REBIND to
-         * be cleared inside idle_worker_rebind().  Clear and release.
-         * Clearing %WORKER_REBIND from this foreign context is safe
-         * because these workers are still guaranteed to be idle.
-         */
-        for_each_worker_pool(pool, gcwq)
-                list_for_each_entry(worker, &pool->idle_list, entry)
-                        worker->flags &= ~WORKER_REBIND;
-        wake_up_all(&gcwq->rebind_hold);
-        /* rebind busy workers */
        for_each_busy_worker(worker, i, pos, gcwq) {
                struct work_struct *rebind_work = &worker->rebind_work;
+                unsigned long worker_flags = worker->flags;
-                /* morph UNBOUND to REBIND */
+                /* morph UNBOUND to REBIND atomically */
-                worker->flags &= ~WORKER_UNBOUND;
+                worker_flags &= ~WORKER_UNBOUND;
-                worker->flags |= WORKER_REBIND;
+                worker_flags |= WORKER_REBIND;
+                ACCESS_ONCE(worker->flags) = worker_flags;
                if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
                                     work_data_bits(rebind_work)))
@@ -1449,6 +1452,34 @@ retry:
                            worker->scheduled.next,
                            work_color_to_flags(WORK_NO_COLOR));
        }
+        /*
+         * All idle workers are rebound and waiting for %WORKER_REBIND to
+         * be cleared inside idle_worker_rebind().  Clear and release.
+         * Clearing %WORKER_REBIND from this foreign context is safe
+         * because these workers are still guaranteed to be idle.
+         *
+         * We need to make sure all idle workers passed WORKER_REBIND wait
+         * in idle_worker_rebind() before returning; otherwise, workers can
+         * get stuck at the wait if hotplug cycle repeats.
+         */
+        idle_rebind.cnt = 1;
+        INIT_COMPLETION(idle_rebind.done);
+        for_each_worker_pool(pool, gcwq) {
+                list_for_each_entry(worker, &pool->idle_list, entry) {
+                        worker->flags &= ~WORKER_REBIND;
+                        idle_rebind.cnt++;
+                }
+        }
+        wake_up_all(&gcwq->rebind_hold);
+        if (--idle_rebind.cnt) {
+                spin_unlock_irq(&gcwq->lock);
+                wait_for_completion(&idle_rebind.done);
+                spin_lock_irq(&gcwq->lock);
+        }
 }
 static struct worker *alloc_worker(void)
@@ -1794,9 +1825,45 @@ static bool manage_workers(struct worker *worker)
        struct worker_pool *pool = worker->pool;
        bool ret = false;
-        if (!mutex_trylock(&pool->manager_mutex))
+        if (pool->flags & POOL_MANAGING_WORKERS)
                return ret;
+        pool->flags |= POOL_MANAGING_WORKERS;
+        /*
+         * To simplify both worker management and CPU hotplug, hold off
+         * management while hotplug is in progress.  CPU hotplug path can't
+         * grab %POOL_MANAGING_WORKERS to achieve this because that can
+         * lead to idle worker depletion (all become busy thinking someone
+         * else is managing) which in turn can result in deadlock under
+         * extreme circumstances.  Use @pool->manager_mutex to synchronize
+         * manager against CPU hotplug.
+         *
+         * manager_mutex would always be free unless CPU hotplug is in
+         * progress.  trylock first without dropping @gcwq->lock.
+         */
+        if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
+                spin_unlock_irq(&pool->gcwq->lock);
+                mutex_lock(&pool->manager_mutex);
+                /*
+                 * CPU hotplug could have happened while we were waiting
+                 * for manager_mutex.  Hotplug itself can't handle us
+                 * because manager isn't either on idle or busy list, and
+                 * @gcwq's state and ours could have deviated.
+                 *
+                 * As hotplug is now excluded via manager_mutex, we can
+                 * simply try to bind.  It will succeed or fail depending
+                 * on @gcwq's current state.  Try it and adjust
+                 * %WORKER_UNBOUND accordingly.
+                 */
+                if (worker_maybe_bind_and_lock(worker))
+                        worker->flags &= ~WORKER_UNBOUND;
+                else
+                        worker->flags |= WORKER_UNBOUND;
+                ret = true;
+        }
        pool->flags &= ~POOL_MANAGE_WORKERS;
        /*
@@ -1806,6 +1873,7 @@ static bool manage_workers(struct worker *worker)
        ret |= maybe_destroy_workers(pool);
        ret |= maybe_create_worker(pool);
+        pool->flags &= ~POOL_MANAGING_WORKERS;
        mutex_unlock(&pool->manager_mutex);
        return ret;
 }

diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 692d97628a10..1e1373bcb3e3 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c
@@ -66,6 +66,7 @@ enum {
66		66
67	/* pool flags */	67	/* pool flags */
68	POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */	68	POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
		69	POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */
69		70
70	/* worker flags */	71	/* worker flags */
71	WORKER_STARTED = 1 << 0, /* started */	72	WORKER_STARTED = 1 << 0, /* started */
@@ -652,7 +653,7 @@ static bool need_to_manage_workers(struct worker_pool *pool)
652	/* Do we have too many workers and should some go away? */	653	/* Do we have too many workers and should some go away? */
653	static bool too_many_workers(struct worker_pool *pool)	654	static bool too_many_workers(struct worker_pool *pool)
654	{	655	{
655	bool managing = mutex_is_locked(&pool->manager_mutex);	656	bool managing = pool->flags & POOL_MANAGING_WORKERS;
656	int nr_idle = pool->nr_idle + managing; /* manager is considered idle */	657	int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
657	int nr_busy = pool->nr_workers - nr_idle;	658	int nr_busy = pool->nr_workers - nr_idle;
658		659
@@ -1326,6 +1327,15 @@ static void idle_worker_rebind(struct worker *worker)
1326		1327
1327	/* we did our part, wait for rebind_workers() to finish up */	1328	/* we did our part, wait for rebind_workers() to finish up */
1328	wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND));	1329	wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND));
		1330
		1331	/*
		1332	* rebind_workers() shouldn't finish until all workers passed the
		1333	* above WORKER_REBIND wait. Tell it when done.
		1334	*/
		1335	spin_lock_irq(&worker->pool->gcwq->lock);
		1336	if (!--worker->idle_rebind->cnt)
		1337	complete(&worker->idle_rebind->done);
		1338	spin_unlock_irq(&worker->pool->gcwq->lock);
1329	}	1339	}
1330		1340
1331	/*	1341	/*
@@ -1396,12 +1406,15 @@ retry:
1396	/* set REBIND and kick idle ones, we'll wait for these later */	1406	/* set REBIND and kick idle ones, we'll wait for these later */
1397	for_each_worker_pool(pool, gcwq) {	1407	for_each_worker_pool(pool, gcwq) {
1398	list_for_each_entry(worker, &pool->idle_list, entry) {	1408	list_for_each_entry(worker, &pool->idle_list, entry) {
		1409	unsigned long worker_flags = worker->flags;
		1410
1399	if (worker->flags & WORKER_REBIND)	1411	if (worker->flags & WORKER_REBIND)
1400	continue;	1412	continue;
1401		1413
1402	/* morph UNBOUND to REBIND */	1414	/* morph UNBOUND to REBIND atomically */
1403	worker->flags &= ~WORKER_UNBOUND;	1415	worker_flags &= ~WORKER_UNBOUND;
1404	worker->flags \|= WORKER_REBIND;	1416	worker_flags \|= WORKER_REBIND;
		1417	ACCESS_ONCE(worker->flags) = worker_flags;
1405		1418
1406	idle_rebind.cnt++;	1419	idle_rebind.cnt++;
1407	worker->idle_rebind = &idle_rebind;	1420	worker->idle_rebind = &idle_rebind;
@@ -1419,25 +1432,15 @@ retry:
1419	goto retry;	1432	goto retry;
1420	}	1433	}
1421		1434
1422	/*	1435	/* all idle workers are rebound, rebind busy workers */
1423	* All idle workers are rebound and waiting for %WORKER_REBIND to
1424	* be cleared inside idle_worker_rebind(). Clear and release.
1425	* Clearing %WORKER_REBIND from this foreign context is safe
1426	* because these workers are still guaranteed to be idle.
1427	*/
1428	for_each_worker_pool(pool, gcwq)
1429	list_for_each_entry(worker, &pool->idle_list, entry)
1430	worker->flags &= ~WORKER_REBIND;
1431
1432	wake_up_all(&gcwq->rebind_hold);
1433
1434	/* rebind busy workers */
1435	for_each_busy_worker(worker, i, pos, gcwq) {	1436	for_each_busy_worker(worker, i, pos, gcwq) {
1436	struct work_struct *rebind_work = &worker->rebind_work;	1437	struct work_struct *rebind_work = &worker->rebind_work;
		1438	unsigned long worker_flags = worker->flags;
1437		1439
1438	/* morph UNBOUND to REBIND */	1440	/* morph UNBOUND to REBIND atomically */
1439	worker->flags &= ~WORKER_UNBOUND;	1441	worker_flags &= ~WORKER_UNBOUND;
1440	worker->flags \|= WORKER_REBIND;	1442	worker_flags \|= WORKER_REBIND;
		1443	ACCESS_ONCE(worker->flags) = worker_flags;
1441		1444
1442	if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,	1445	if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
1443	work_data_bits(rebind_work)))	1446	work_data_bits(rebind_work)))
@@ -1449,6 +1452,34 @@ retry:
1449	worker->scheduled.next,	1452	worker->scheduled.next,
1450	work_color_to_flags(WORK_NO_COLOR));	1453	work_color_to_flags(WORK_NO_COLOR));
1451	}	1454	}
		1455
		1456	/*
		1457	* All idle workers are rebound and waiting for %WORKER_REBIND to
		1458	* be cleared inside idle_worker_rebind(). Clear and release.
		1459	* Clearing %WORKER_REBIND from this foreign context is safe
		1460	* because these workers are still guaranteed to be idle.
		1461	*
		1462	* We need to make sure all idle workers passed WORKER_REBIND wait
		1463	* in idle_worker_rebind() before returning; otherwise, workers can
		1464	* get stuck at the wait if hotplug cycle repeats.
		1465	*/
		1466	idle_rebind.cnt = 1;
		1467	INIT_COMPLETION(idle_rebind.done);
		1468
		1469	for_each_worker_pool(pool, gcwq) {
		1470	list_for_each_entry(worker, &pool->idle_list, entry) {
		1471	worker->flags &= ~WORKER_REBIND;
		1472	idle_rebind.cnt++;
		1473	}
		1474	}
		1475
		1476	wake_up_all(&gcwq->rebind_hold);
		1477
		1478	if (--idle_rebind.cnt) {
		1479	spin_unlock_irq(&gcwq->lock);
		1480	wait_for_completion(&idle_rebind.done);
		1481	spin_lock_irq(&gcwq->lock);
		1482	}
1452	}	1483	}
1453		1484
1454	static struct worker *alloc_worker(void)	1485	static struct worker *alloc_worker(void)
@@ -1794,9 +1825,45 @@ static bool manage_workers(struct worker *worker)
1794	struct worker_pool *pool = worker->pool;	1825	struct worker_pool *pool = worker->pool;
1795	bool ret = false;	1826	bool ret = false;
1796		1827
1797	if (!mutex_trylock(&pool->manager_mutex))	1828	if (pool->flags & POOL_MANAGING_WORKERS)
1798	return ret;	1829	return ret;
1799		1830
		1831	pool->flags \|= POOL_MANAGING_WORKERS;
		1832
		1833	/*
		1834	* To simplify both worker management and CPU hotplug, hold off
		1835	* management while hotplug is in progress. CPU hotplug path can't
		1836	* grab %POOL_MANAGING_WORKERS to achieve this because that can
		1837	* lead to idle worker depletion (all become busy thinking someone
		1838	* else is managing) which in turn can result in deadlock under
		1839	* extreme circumstances. Use @pool->manager_mutex to synchronize
		1840	* manager against CPU hotplug.
		1841	*
		1842	* manager_mutex would always be free unless CPU hotplug is in
		1843	* progress. trylock first without dropping @gcwq->lock.
		1844	*/
		1845	if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
		1846	spin_unlock_irq(&pool->gcwq->lock);
		1847	mutex_lock(&pool->manager_mutex);
		1848	/*
		1849	* CPU hotplug could have happened while we were waiting
		1850	* for manager_mutex. Hotplug itself can't handle us
		1851	* because manager isn't either on idle or busy list, and
		1852	* @gcwq's state and ours could have deviated.
		1853	*
		1854	* As hotplug is now excluded via manager_mutex, we can
		1855	* simply try to bind. It will succeed or fail depending
		1856	* on @gcwq's current state. Try it and adjust
		1857	* %WORKER_UNBOUND accordingly.
		1858	*/
		1859	if (worker_maybe_bind_and_lock(worker))
		1860	worker->flags &= ~WORKER_UNBOUND;
		1861	else
		1862	worker->flags \|= WORKER_UNBOUND;
		1863
		1864	ret = true;
		1865	}
		1866
1800	pool->flags &= ~POOL_MANAGE_WORKERS;	1867	pool->flags &= ~POOL_MANAGE_WORKERS;
1801		1868
1802	/*	1869	/*
@@ -1806,6 +1873,7 @@ static bool manage_workers(struct worker *worker)
1806	ret \|= maybe_destroy_workers(pool);	1873	ret \|= maybe_destroy_workers(pool);
1807	ret \|= maybe_create_worker(pool);	1874	ret \|= maybe_create_worker(pool);
1808		1875
		1876	pool->flags &= ~POOL_MANAGING_WORKERS;
1809	mutex_unlock(&pool->manager_mutex);	1877	mutex_unlock(&pool->manager_mutex);
1810	return ret;	1878	return ret;
1811	}	1879	}