aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/workqueue.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r--kernel/workqueue.c147
1 files changed, 107 insertions, 40 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 692d97628a10..3c5a79e2134c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -66,6 +66,7 @@ enum {
66 66
67 /* pool flags */ 67 /* pool flags */
68 POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ 68 POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
69 POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */
69 70
70 /* worker flags */ 71 /* worker flags */
71 WORKER_STARTED = 1 << 0, /* started */ 72 WORKER_STARTED = 1 << 0, /* started */
@@ -652,7 +653,7 @@ static bool need_to_manage_workers(struct worker_pool *pool)
652/* Do we have too many workers and should some go away? */ 653/* Do we have too many workers and should some go away? */
653static bool too_many_workers(struct worker_pool *pool) 654static bool too_many_workers(struct worker_pool *pool)
654{ 655{
655 bool managing = mutex_is_locked(&pool->manager_mutex); 656 bool managing = pool->flags & POOL_MANAGING_WORKERS;
656 int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ 657 int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
657 int nr_busy = pool->nr_workers - nr_idle; 658 int nr_busy = pool->nr_workers - nr_idle;
658 659
@@ -1326,6 +1327,15 @@ static void idle_worker_rebind(struct worker *worker)
1326 1327
1327 /* we did our part, wait for rebind_workers() to finish up */ 1328 /* we did our part, wait for rebind_workers() to finish up */
1328 wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND)); 1329 wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND));
1330
1331 /*
1332 * rebind_workers() shouldn't finish until all workers passed the
1333 * above WORKER_REBIND wait. Tell it when done.
1334 */
1335 spin_lock_irq(&worker->pool->gcwq->lock);
1336 if (!--worker->idle_rebind->cnt)
1337 complete(&worker->idle_rebind->done);
1338 spin_unlock_irq(&worker->pool->gcwq->lock);
1329} 1339}
1330 1340
1331/* 1341/*
@@ -1339,8 +1349,16 @@ static void busy_worker_rebind_fn(struct work_struct *work)
1339 struct worker *worker = container_of(work, struct worker, rebind_work); 1349 struct worker *worker = container_of(work, struct worker, rebind_work);
1340 struct global_cwq *gcwq = worker->pool->gcwq; 1350 struct global_cwq *gcwq = worker->pool->gcwq;
1341 1351
1342 if (worker_maybe_bind_and_lock(worker)) 1352 worker_maybe_bind_and_lock(worker);
1343 worker_clr_flags(worker, WORKER_REBIND); 1353
1354 /*
1355 * %WORKER_REBIND must be cleared even if the above binding failed;
1356 * otherwise, we may confuse the next CPU_UP cycle or oops / get
1357 * stuck by calling idle_worker_rebind() prematurely. If CPU went
1358 * down again inbetween, %WORKER_UNBOUND would be set, so clearing
1359 * %WORKER_REBIND is always safe.
1360 */
1361 worker_clr_flags(worker, WORKER_REBIND);
1344 1362
1345 spin_unlock_irq(&gcwq->lock); 1363 spin_unlock_irq(&gcwq->lock);
1346} 1364}
@@ -1396,12 +1414,15 @@ retry:
1396 /* set REBIND and kick idle ones, we'll wait for these later */ 1414 /* set REBIND and kick idle ones, we'll wait for these later */
1397 for_each_worker_pool(pool, gcwq) { 1415 for_each_worker_pool(pool, gcwq) {
1398 list_for_each_entry(worker, &pool->idle_list, entry) { 1416 list_for_each_entry(worker, &pool->idle_list, entry) {
1417 unsigned long worker_flags = worker->flags;
1418
1399 if (worker->flags & WORKER_REBIND) 1419 if (worker->flags & WORKER_REBIND)
1400 continue; 1420 continue;
1401 1421
1402 /* morph UNBOUND to REBIND */ 1422 /* morph UNBOUND to REBIND atomically */
1403 worker->flags &= ~WORKER_UNBOUND; 1423 worker_flags &= ~WORKER_UNBOUND;
1404 worker->flags |= WORKER_REBIND; 1424 worker_flags |= WORKER_REBIND;
1425 ACCESS_ONCE(worker->flags) = worker_flags;
1405 1426
1406 idle_rebind.cnt++; 1427 idle_rebind.cnt++;
1407 worker->idle_rebind = &idle_rebind; 1428 worker->idle_rebind = &idle_rebind;
@@ -1419,25 +1440,15 @@ retry:
1419 goto retry; 1440 goto retry;
1420 } 1441 }
1421 1442
1422 /* 1443 /* all idle workers are rebound, rebind busy workers */
1423 * All idle workers are rebound and waiting for %WORKER_REBIND to
1424 * be cleared inside idle_worker_rebind(). Clear and release.
1425 * Clearing %WORKER_REBIND from this foreign context is safe
1426 * because these workers are still guaranteed to be idle.
1427 */
1428 for_each_worker_pool(pool, gcwq)
1429 list_for_each_entry(worker, &pool->idle_list, entry)
1430 worker->flags &= ~WORKER_REBIND;
1431
1432 wake_up_all(&gcwq->rebind_hold);
1433
1434 /* rebind busy workers */
1435 for_each_busy_worker(worker, i, pos, gcwq) { 1444 for_each_busy_worker(worker, i, pos, gcwq) {
1436 struct work_struct *rebind_work = &worker->rebind_work; 1445 struct work_struct *rebind_work = &worker->rebind_work;
1446 unsigned long worker_flags = worker->flags;
1437 1447
1438 /* morph UNBOUND to REBIND */ 1448 /* morph UNBOUND to REBIND atomically */
1439 worker->flags &= ~WORKER_UNBOUND; 1449 worker_flags &= ~WORKER_UNBOUND;
1440 worker->flags |= WORKER_REBIND; 1450 worker_flags |= WORKER_REBIND;
1451 ACCESS_ONCE(worker->flags) = worker_flags;
1441 1452
1442 if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, 1453 if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
1443 work_data_bits(rebind_work))) 1454 work_data_bits(rebind_work)))
@@ -1449,6 +1460,34 @@ retry:
1449 worker->scheduled.next, 1460 worker->scheduled.next,
1450 work_color_to_flags(WORK_NO_COLOR)); 1461 work_color_to_flags(WORK_NO_COLOR));
1451 } 1462 }
1463
1464 /*
1465 * All idle workers are rebound and waiting for %WORKER_REBIND to
1466 * be cleared inside idle_worker_rebind(). Clear and release.
1467 * Clearing %WORKER_REBIND from this foreign context is safe
1468 * because these workers are still guaranteed to be idle.
1469 *
1470 * We need to make sure all idle workers passed WORKER_REBIND wait
1471 * in idle_worker_rebind() before returning; otherwise, workers can
1472 * get stuck at the wait if hotplug cycle repeats.
1473 */
1474 idle_rebind.cnt = 1;
1475 INIT_COMPLETION(idle_rebind.done);
1476
1477 for_each_worker_pool(pool, gcwq) {
1478 list_for_each_entry(worker, &pool->idle_list, entry) {
1479 worker->flags &= ~WORKER_REBIND;
1480 idle_rebind.cnt++;
1481 }
1482 }
1483
1484 wake_up_all(&gcwq->rebind_hold);
1485
1486 if (--idle_rebind.cnt) {
1487 spin_unlock_irq(&gcwq->lock);
1488 wait_for_completion(&idle_rebind.done);
1489 spin_lock_irq(&gcwq->lock);
1490 }
1452} 1491}
1453 1492
1454static struct worker *alloc_worker(void) 1493static struct worker *alloc_worker(void)
@@ -1794,9 +1833,45 @@ static bool manage_workers(struct worker *worker)
1794 struct worker_pool *pool = worker->pool; 1833 struct worker_pool *pool = worker->pool;
1795 bool ret = false; 1834 bool ret = false;
1796 1835
1797 if (!mutex_trylock(&pool->manager_mutex)) 1836 if (pool->flags & POOL_MANAGING_WORKERS)
1798 return ret; 1837 return ret;
1799 1838
1839 pool->flags |= POOL_MANAGING_WORKERS;
1840
1841 /*
1842 * To simplify both worker management and CPU hotplug, hold off
1843 * management while hotplug is in progress. CPU hotplug path can't
1844 * grab %POOL_MANAGING_WORKERS to achieve this because that can
1845 * lead to idle worker depletion (all become busy thinking someone
1846 * else is managing) which in turn can result in deadlock under
1847 * extreme circumstances. Use @pool->manager_mutex to synchronize
1848 * manager against CPU hotplug.
1849 *
1850 * manager_mutex would always be free unless CPU hotplug is in
1851 * progress. trylock first without dropping @gcwq->lock.
1852 */
1853 if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
1854 spin_unlock_irq(&pool->gcwq->lock);
1855 mutex_lock(&pool->manager_mutex);
1856 /*
1857 * CPU hotplug could have happened while we were waiting
1858 * for manager_mutex. Hotplug itself can't handle us
1859 * because manager isn't either on idle or busy list, and
1860 * @gcwq's state and ours could have deviated.
1861 *
1862 * As hotplug is now excluded via manager_mutex, we can
1863 * simply try to bind. It will succeed or fail depending
1864 * on @gcwq's current state. Try it and adjust
1865 * %WORKER_UNBOUND accordingly.
1866 */
1867 if (worker_maybe_bind_and_lock(worker))
1868 worker->flags &= ~WORKER_UNBOUND;
1869 else
1870 worker->flags |= WORKER_UNBOUND;
1871
1872 ret = true;
1873 }
1874
1800 pool->flags &= ~POOL_MANAGE_WORKERS; 1875 pool->flags &= ~POOL_MANAGE_WORKERS;
1801 1876
1802 /* 1877 /*
@@ -1806,6 +1881,7 @@ static bool manage_workers(struct worker *worker)
1806 ret |= maybe_destroy_workers(pool); 1881 ret |= maybe_destroy_workers(pool);
1807 ret |= maybe_create_worker(pool); 1882 ret |= maybe_create_worker(pool);
1808 1883
1884 pool->flags &= ~POOL_MANAGING_WORKERS;
1809 mutex_unlock(&pool->manager_mutex); 1885 mutex_unlock(&pool->manager_mutex);
1810 return ret; 1886 return ret;
1811} 1887}
@@ -3500,18 +3576,17 @@ static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb,
3500#ifdef CONFIG_SMP 3576#ifdef CONFIG_SMP
3501 3577
3502struct work_for_cpu { 3578struct work_for_cpu {
3503 struct completion completion; 3579 struct work_struct work;
3504 long (*fn)(void *); 3580 long (*fn)(void *);
3505 void *arg; 3581 void *arg;
3506 long ret; 3582 long ret;
3507}; 3583};
3508 3584
3509static int do_work_for_cpu(void *_wfc) 3585static void work_for_cpu_fn(struct work_struct *work)
3510{ 3586{
3511 struct work_for_cpu *wfc = _wfc; 3587 struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
3588
3512 wfc->ret = wfc->fn(wfc->arg); 3589 wfc->ret = wfc->fn(wfc->arg);
3513 complete(&wfc->completion);
3514 return 0;
3515} 3590}
3516 3591
3517/** 3592/**
@@ -3526,19 +3601,11 @@ static int do_work_for_cpu(void *_wfc)
3526 */ 3601 */
3527long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) 3602long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
3528{ 3603{
3529 struct task_struct *sub_thread; 3604 struct work_for_cpu wfc = { .fn = fn, .arg = arg };
3530 struct work_for_cpu wfc = {
3531 .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
3532 .fn = fn,
3533 .arg = arg,
3534 };
3535 3605
3536 sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu"); 3606 INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
3537 if (IS_ERR(sub_thread)) 3607 schedule_work_on(cpu, &wfc.work);
3538 return PTR_ERR(sub_thread); 3608 flush_work(&wfc.work);
3539 kthread_bind(sub_thread, cpu);
3540 wake_up_process(sub_thread);
3541 wait_for_completion(&wfc.completion);
3542 return wfc.ret; 3609 return wfc.ret;
3543} 3610}
3544EXPORT_SYMBOL_GPL(work_on_cpu); 3611EXPORT_SYMBOL_GPL(work_on_cpu);