1 files changed, 107 insertions, 40 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 692d97628a10..3c5a79e2134c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -66,6 +66,7 @@ enum {
        /* pool flags */
        POOL_MANAGE_WORKERS     = 1 << 0,       /* need to manage workers */
+        POOL_MANAGING_WORKERS   = 1 << 1,       /* managing workers */
        /* worker flags */
        WORKER_STARTED          = 1 << 0,       /* started */
@@ -652,7 +653,7 @@ static bool need_to_manage_workers(struct worker_pool *pool)
 /* Do we have too many workers and should some go away? */
 static bool too_many_workers(struct worker_pool *pool)
 {
-        bool managing = mutex_is_locked(&pool->manager_mutex);
+        bool managing = pool->flags & POOL_MANAGING_WORKERS;
        int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
        int nr_busy = pool->nr_workers - nr_idle;
@@ -1326,6 +1327,15 @@ static void idle_worker_rebind(struct worker *worker)
        /* we did our part, wait for rebind_workers() to finish up */
        wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND));
+        /*
+         * rebind_workers() shouldn't finish until all workers passed the
+         * above WORKER_REBIND wait.  Tell it when done.
+         */
+        spin_lock_irq(&worker->pool->gcwq->lock);
+        if (!--worker->idle_rebind->cnt)
+                complete(&worker->idle_rebind->done);
+        spin_unlock_irq(&worker->pool->gcwq->lock);
 }
 /*
@@ -1339,8 +1349,16 @@ static void busy_worker_rebind_fn(struct work_struct *work)
        struct worker *worker = container_of(work, struct worker, rebind_work);
        struct global_cwq *gcwq = worker->pool->gcwq;
-        if (worker_maybe_bind_and_lock(worker))
+        worker_maybe_bind_and_lock(worker);
-                worker_clr_flags(worker, WORKER_REBIND);
+        /*
+         * %WORKER_REBIND must be cleared even if the above binding failed;
+         * otherwise, we may confuse the next CPU_UP cycle or oops / get
+         * stuck by calling idle_worker_rebind() prematurely.  If CPU went
+         * down again inbetween, %WORKER_UNBOUND would be set, so clearing
+         * %WORKER_REBIND is always safe.
+         */
+        worker_clr_flags(worker, WORKER_REBIND);
        spin_unlock_irq(&gcwq->lock);
 }
@@ -1396,12 +1414,15 @@ retry:
        /* set REBIND and kick idle ones, we'll wait for these later */
        for_each_worker_pool(pool, gcwq) {
                list_for_each_entry(worker, &pool->idle_list, entry) {
+                        unsigned long worker_flags = worker->flags;
                        if (worker->flags & WORKER_REBIND)
                                continue;
-                        /* morph UNBOUND to REBIND */
+                        /* morph UNBOUND to REBIND atomically */
-                        worker->flags &= ~WORKER_UNBOUND;
+                        worker_flags &= ~WORKER_UNBOUND;
-                        worker->flags |= WORKER_REBIND;
+                        worker_flags |= WORKER_REBIND;
+                        ACCESS_ONCE(worker->flags) = worker_flags;
                        idle_rebind.cnt++;
                        worker->idle_rebind = &idle_rebind;
@@ -1419,25 +1440,15 @@ retry:
                goto retry;
        }
-        /*
+        /* all idle workers are rebound, rebind busy workers */
-         * All idle workers are rebound and waiting for %WORKER_REBIND to
-         * be cleared inside idle_worker_rebind().  Clear and release.
-         * Clearing %WORKER_REBIND from this foreign context is safe
-         * because these workers are still guaranteed to be idle.
-         */
-        for_each_worker_pool(pool, gcwq)
-                list_for_each_entry(worker, &pool->idle_list, entry)
-                        worker->flags &= ~WORKER_REBIND;
-        wake_up_all(&gcwq->rebind_hold);
-        /* rebind busy workers */
        for_each_busy_worker(worker, i, pos, gcwq) {
                struct work_struct *rebind_work = &worker->rebind_work;
+                unsigned long worker_flags = worker->flags;
-                /* morph UNBOUND to REBIND */
+                /* morph UNBOUND to REBIND atomically */
-                worker->flags &= ~WORKER_UNBOUND;
+                worker_flags &= ~WORKER_UNBOUND;
-                worker->flags |= WORKER_REBIND;
+                worker_flags |= WORKER_REBIND;
+                ACCESS_ONCE(worker->flags) = worker_flags;
                if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
                                     work_data_bits(rebind_work)))
@@ -1449,6 +1460,34 @@ retry:
                            worker->scheduled.next,
                            work_color_to_flags(WORK_NO_COLOR));
        }
+        /*
+         * All idle workers are rebound and waiting for %WORKER_REBIND to
+         * be cleared inside idle_worker_rebind().  Clear and release.
+         * Clearing %WORKER_REBIND from this foreign context is safe
+         * because these workers are still guaranteed to be idle.
+         *
+         * We need to make sure all idle workers passed WORKER_REBIND wait
+         * in idle_worker_rebind() before returning; otherwise, workers can
+         * get stuck at the wait if hotplug cycle repeats.
+         */
+        idle_rebind.cnt = 1;
+        INIT_COMPLETION(idle_rebind.done);
+        for_each_worker_pool(pool, gcwq) {
+                list_for_each_entry(worker, &pool->idle_list, entry) {
+                        worker->flags &= ~WORKER_REBIND;
+                        idle_rebind.cnt++;
+                }
+        }
+        wake_up_all(&gcwq->rebind_hold);
+        if (--idle_rebind.cnt) {
+                spin_unlock_irq(&gcwq->lock);
+                wait_for_completion(&idle_rebind.done);
+                spin_lock_irq(&gcwq->lock);
+        }
 }
 static struct worker *alloc_worker(void)
@@ -1794,9 +1833,45 @@ static bool manage_workers(struct worker *worker)
        struct worker_pool *pool = worker->pool;
        bool ret = false;
-        if (!mutex_trylock(&pool->manager_mutex))
+        if (pool->flags & POOL_MANAGING_WORKERS)
                return ret;
+        pool->flags |= POOL_MANAGING_WORKERS;
+        /*
+         * To simplify both worker management and CPU hotplug, hold off
+         * management while hotplug is in progress.  CPU hotplug path can't
+         * grab %POOL_MANAGING_WORKERS to achieve this because that can
+         * lead to idle worker depletion (all become busy thinking someone
+         * else is managing) which in turn can result in deadlock under
+         * extreme circumstances.  Use @pool->manager_mutex to synchronize
+         * manager against CPU hotplug.
+         *
+         * manager_mutex would always be free unless CPU hotplug is in
+         * progress.  trylock first without dropping @gcwq->lock.
+         */
+        if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
+                spin_unlock_irq(&pool->gcwq->lock);
+                mutex_lock(&pool->manager_mutex);
+                /*
+                 * CPU hotplug could have happened while we were waiting
+                 * for manager_mutex.  Hotplug itself can't handle us
+                 * because manager isn't either on idle or busy list, and
+                 * @gcwq's state and ours could have deviated.
+                 *
+                 * As hotplug is now excluded via manager_mutex, we can
+                 * simply try to bind.  It will succeed or fail depending
+                 * on @gcwq's current state.  Try it and adjust
+                 * %WORKER_UNBOUND accordingly.
+                 */
+                if (worker_maybe_bind_and_lock(worker))
+                        worker->flags &= ~WORKER_UNBOUND;
+                else
+                        worker->flags |= WORKER_UNBOUND;
+                ret = true;
+        }
        pool->flags &= ~POOL_MANAGE_WORKERS;
        /*
@@ -1806,6 +1881,7 @@ static bool manage_workers(struct worker *worker)
        ret |= maybe_destroy_workers(pool);
        ret |= maybe_create_worker(pool);
+        pool->flags &= ~POOL_MANAGING_WORKERS;
        mutex_unlock(&pool->manager_mutex);
        return ret;
 }
@@ -3500,18 +3576,17 @@ static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb,
 #ifdef CONFIG_SMP
 struct work_for_cpu {
-        struct completion completion;
+        struct work_struct work;
        long (*fn)(void *);
        void *arg;
        long ret;
 };
-static int do_work_for_cpu(void *_wfc)
+static void work_for_cpu_fn(struct work_struct *work)
 {
-        struct work_for_cpu *wfc = _wfc;
+        struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
        wfc->ret = wfc->fn(wfc->arg);
-        complete(&wfc->completion);
-        return 0;
 }
 /**
@@ -3526,19 +3601,11 @@ static int do_work_for_cpu(void *_wfc)
 */
 long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
 {
-        struct task_struct *sub_thread;
+        struct work_for_cpu wfc = { .fn = fn, .arg = arg };
-        struct work_for_cpu wfc = {
-                .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
-                .fn = fn,
-                .arg = arg,
-        };
-        sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu");
+        INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
-        if (IS_ERR(sub_thread))
+        schedule_work_on(cpu, &wfc.work);
-                return PTR_ERR(sub_thread);
+        flush_work(&wfc.work);
-        kthread_bind(sub_thread, cpu);
-        wake_up_process(sub_thread);
-        wait_for_completion(&wfc.completion);
        return wfc.ret;
 }
 EXPORT_SYMBOL_GPL(work_on_cpu);

diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 692d97628a10..3c5a79e2134c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c
@@ -66,6 +66,7 @@ enum {
66		66
67	/* pool flags */	67	/* pool flags */
68	POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */	68	POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
		69	POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */
69		70
70	/* worker flags */	71	/* worker flags */
71	WORKER_STARTED = 1 << 0, /* started */	72	WORKER_STARTED = 1 << 0, /* started */
@@ -652,7 +653,7 @@ static bool need_to_manage_workers(struct worker_pool *pool)
652	/* Do we have too many workers and should some go away? */	653	/* Do we have too many workers and should some go away? */
653	static bool too_many_workers(struct worker_pool *pool)	654	static bool too_many_workers(struct worker_pool *pool)
654	{	655	{
655	bool managing = mutex_is_locked(&pool->manager_mutex);	656	bool managing = pool->flags & POOL_MANAGING_WORKERS;
656	int nr_idle = pool->nr_idle + managing; /* manager is considered idle */	657	int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
657	int nr_busy = pool->nr_workers - nr_idle;	658	int nr_busy = pool->nr_workers - nr_idle;
658		659
@@ -1326,6 +1327,15 @@ static void idle_worker_rebind(struct worker *worker)
1326		1327
1327	/* we did our part, wait for rebind_workers() to finish up */	1328	/* we did our part, wait for rebind_workers() to finish up */
1328	wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND));	1329	wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND));
		1330
		1331	/*
		1332	* rebind_workers() shouldn't finish until all workers passed the
		1333	* above WORKER_REBIND wait. Tell it when done.
		1334	*/
		1335	spin_lock_irq(&worker->pool->gcwq->lock);
		1336	if (!--worker->idle_rebind->cnt)
		1337	complete(&worker->idle_rebind->done);
		1338	spin_unlock_irq(&worker->pool->gcwq->lock);
1329	}	1339	}
1330		1340
1331	/*	1341	/*
@@ -1339,8 +1349,16 @@ static void busy_worker_rebind_fn(struct work_struct *work)
1339	struct worker *worker = container_of(work, struct worker, rebind_work);	1349	struct worker *worker = container_of(work, struct worker, rebind_work);
1340	struct global_cwq *gcwq = worker->pool->gcwq;	1350	struct global_cwq *gcwq = worker->pool->gcwq;
1341		1351
1342	if (worker_maybe_bind_and_lock(worker))	1352	worker_maybe_bind_and_lock(worker);
1343	worker_clr_flags(worker, WORKER_REBIND);	1353
		1354	/*
		1355	* %WORKER_REBIND must be cleared even if the above binding failed;
		1356	* otherwise, we may confuse the next CPU_UP cycle or oops / get
		1357	* stuck by calling idle_worker_rebind() prematurely. If CPU went
		1358	* down again inbetween, %WORKER_UNBOUND would be set, so clearing
		1359	* %WORKER_REBIND is always safe.
		1360	*/
		1361	worker_clr_flags(worker, WORKER_REBIND);
1344		1362
1345	spin_unlock_irq(&gcwq->lock);	1363	spin_unlock_irq(&gcwq->lock);
1346	}	1364	}
@@ -1396,12 +1414,15 @@ retry:
1396	/* set REBIND and kick idle ones, we'll wait for these later */	1414	/* set REBIND and kick idle ones, we'll wait for these later */
1397	for_each_worker_pool(pool, gcwq) {	1415	for_each_worker_pool(pool, gcwq) {
1398	list_for_each_entry(worker, &pool->idle_list, entry) {	1416	list_for_each_entry(worker, &pool->idle_list, entry) {
		1417	unsigned long worker_flags = worker->flags;
		1418
1399	if (worker->flags & WORKER_REBIND)	1419	if (worker->flags & WORKER_REBIND)
1400	continue;	1420	continue;
1401		1421
1402	/* morph UNBOUND to REBIND */	1422	/* morph UNBOUND to REBIND atomically */
1403	worker->flags &= ~WORKER_UNBOUND;	1423	worker_flags &= ~WORKER_UNBOUND;
1404	worker->flags \|= WORKER_REBIND;	1424	worker_flags \|= WORKER_REBIND;
		1425	ACCESS_ONCE(worker->flags) = worker_flags;
1405		1426
1406	idle_rebind.cnt++;	1427	idle_rebind.cnt++;
1407	worker->idle_rebind = &idle_rebind;	1428	worker->idle_rebind = &idle_rebind;
@@ -1419,25 +1440,15 @@ retry:
1419	goto retry;	1440	goto retry;
1420	}	1441	}
1421		1442
1422	/*	1443	/* all idle workers are rebound, rebind busy workers */
1423	* All idle workers are rebound and waiting for %WORKER_REBIND to
1424	* be cleared inside idle_worker_rebind(). Clear and release.
1425	* Clearing %WORKER_REBIND from this foreign context is safe
1426	* because these workers are still guaranteed to be idle.
1427	*/
1428	for_each_worker_pool(pool, gcwq)
1429	list_for_each_entry(worker, &pool->idle_list, entry)
1430	worker->flags &= ~WORKER_REBIND;
1431
1432	wake_up_all(&gcwq->rebind_hold);
1433
1434	/* rebind busy workers */
1435	for_each_busy_worker(worker, i, pos, gcwq) {	1444	for_each_busy_worker(worker, i, pos, gcwq) {
1436	struct work_struct *rebind_work = &worker->rebind_work;	1445	struct work_struct *rebind_work = &worker->rebind_work;
		1446	unsigned long worker_flags = worker->flags;
1437		1447
1438	/* morph UNBOUND to REBIND */	1448	/* morph UNBOUND to REBIND atomically */
1439	worker->flags &= ~WORKER_UNBOUND;	1449	worker_flags &= ~WORKER_UNBOUND;
1440	worker->flags \|= WORKER_REBIND;	1450	worker_flags \|= WORKER_REBIND;
		1451	ACCESS_ONCE(worker->flags) = worker_flags;
1441		1452
1442	if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,	1453	if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
1443	work_data_bits(rebind_work)))	1454	work_data_bits(rebind_work)))
@@ -1449,6 +1460,34 @@ retry:
1449	worker->scheduled.next,	1460	worker->scheduled.next,
1450	work_color_to_flags(WORK_NO_COLOR));	1461	work_color_to_flags(WORK_NO_COLOR));
1451	}	1462	}
		1463
		1464	/*
		1465	* All idle workers are rebound and waiting for %WORKER_REBIND to
		1466	* be cleared inside idle_worker_rebind(). Clear and release.
		1467	* Clearing %WORKER_REBIND from this foreign context is safe
		1468	* because these workers are still guaranteed to be idle.
		1469	*
		1470	* We need to make sure all idle workers passed WORKER_REBIND wait
		1471	* in idle_worker_rebind() before returning; otherwise, workers can
		1472	* get stuck at the wait if hotplug cycle repeats.
		1473	*/
		1474	idle_rebind.cnt = 1;
		1475	INIT_COMPLETION(idle_rebind.done);
		1476
		1477	for_each_worker_pool(pool, gcwq) {
		1478	list_for_each_entry(worker, &pool->idle_list, entry) {
		1479	worker->flags &= ~WORKER_REBIND;
		1480	idle_rebind.cnt++;
		1481	}
		1482	}
		1483
		1484	wake_up_all(&gcwq->rebind_hold);
		1485
		1486	if (--idle_rebind.cnt) {
		1487	spin_unlock_irq(&gcwq->lock);
		1488	wait_for_completion(&idle_rebind.done);
		1489	spin_lock_irq(&gcwq->lock);
		1490	}
1452	}	1491	}
1453		1492
1454	static struct worker *alloc_worker(void)	1493	static struct worker *alloc_worker(void)
@@ -1794,9 +1833,45 @@ static bool manage_workers(struct worker *worker)
1794	struct worker_pool *pool = worker->pool;	1833	struct worker_pool *pool = worker->pool;
1795	bool ret = false;	1834	bool ret = false;
1796		1835
1797	if (!mutex_trylock(&pool->manager_mutex))	1836	if (pool->flags & POOL_MANAGING_WORKERS)
1798	return ret;	1837	return ret;
1799		1838
		1839	pool->flags \|= POOL_MANAGING_WORKERS;
		1840
		1841	/*
		1842	* To simplify both worker management and CPU hotplug, hold off
		1843	* management while hotplug is in progress. CPU hotplug path can't
		1844	* grab %POOL_MANAGING_WORKERS to achieve this because that can
		1845	* lead to idle worker depletion (all become busy thinking someone
		1846	* else is managing) which in turn can result in deadlock under
		1847	* extreme circumstances. Use @pool->manager_mutex to synchronize
		1848	* manager against CPU hotplug.
		1849	*
		1850	* manager_mutex would always be free unless CPU hotplug is in
		1851	* progress. trylock first without dropping @gcwq->lock.
		1852	*/
		1853	if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
		1854	spin_unlock_irq(&pool->gcwq->lock);
		1855	mutex_lock(&pool->manager_mutex);
		1856	/*
		1857	* CPU hotplug could have happened while we were waiting
		1858	* for manager_mutex. Hotplug itself can't handle us
		1859	* because manager isn't either on idle or busy list, and
		1860	* @gcwq's state and ours could have deviated.
		1861	*
		1862	* As hotplug is now excluded via manager_mutex, we can
		1863	* simply try to bind. It will succeed or fail depending
		1864	* on @gcwq's current state. Try it and adjust
		1865	* %WORKER_UNBOUND accordingly.
		1866	*/
		1867	if (worker_maybe_bind_and_lock(worker))
		1868	worker->flags &= ~WORKER_UNBOUND;
		1869	else
		1870	worker->flags \|= WORKER_UNBOUND;
		1871
		1872	ret = true;
		1873	}
		1874
1800	pool->flags &= ~POOL_MANAGE_WORKERS;	1875	pool->flags &= ~POOL_MANAGE_WORKERS;
1801		1876
1802	/*	1877	/*
@@ -1806,6 +1881,7 @@ static bool manage_workers(struct worker *worker)
1806	ret \|= maybe_destroy_workers(pool);	1881	ret \|= maybe_destroy_workers(pool);
1807	ret \|= maybe_create_worker(pool);	1882	ret \|= maybe_create_worker(pool);
1808		1883
		1884	pool->flags &= ~POOL_MANAGING_WORKERS;
1809	mutex_unlock(&pool->manager_mutex);	1885	mutex_unlock(&pool->manager_mutex);
1810	return ret;	1886	return ret;
1811	}	1887	}
@@ -3500,18 +3576,17 @@ static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb,
3500	#ifdef CONFIG_SMP	3576	#ifdef CONFIG_SMP
3501		3577
3502	struct work_for_cpu {	3578	struct work_for_cpu {
3503	struct completion completion;	3579	struct work_struct work;
3504	long (fn)(void );	3580	long (fn)(void );
3505	void *arg;	3581	void *arg;
3506	long ret;	3582	long ret;
3507	};	3583	};
3508		3584
3509	static int do_work_for_cpu(void *_wfc)	3585	static void work_for_cpu_fn(struct work_struct *work)
3510	{	3586	{
3511	struct work_for_cpu *wfc = _wfc;	3587	struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
		3588
3512	wfc->ret = wfc->fn(wfc->arg);	3589	wfc->ret = wfc->fn(wfc->arg);
3513	complete(&wfc->completion);
3514	return 0;
3515	}	3590	}
3516		3591
3517	/**	3592	/**
@@ -3526,19 +3601,11 @@ static int do_work_for_cpu(void *_wfc)
3526	*/	3601	*/
3527	long work_on_cpu(unsigned int cpu, long (fn)(void ), void *arg)	3602	long work_on_cpu(unsigned int cpu, long (fn)(void ), void *arg)
3528	{	3603	{
3529	struct task_struct *sub_thread;	3604	struct work_for_cpu wfc = { .fn = fn, .arg = arg };
3530	struct work_for_cpu wfc = {
3531	.completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
3532	.fn = fn,
3533	.arg = arg,
3534	};
3535		3605
3536	sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu");	3606	INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
3537	if (IS_ERR(sub_thread))	3607	schedule_work_on(cpu, &wfc.work);
3538	return PTR_ERR(sub_thread);	3608	flush_work(&wfc.work);
3539	kthread_bind(sub_thread, cpu);
3540	wake_up_process(sub_thread);
3541	wait_for_completion(&wfc.completion);
3542	return wfc.ret;	3609	return wfc.ret;
3543	}	3610	}
3544	EXPORT_SYMBOL_GPL(work_on_cpu);	3611	EXPORT_SYMBOL_GPL(work_on_cpu);