diff options
Diffstat (limited to 'kernel/workqueue.c')
| -rw-r--r-- | kernel/workqueue.c | 147 |
1 files changed, 107 insertions, 40 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 692d97628a10..3c5a79e2134c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -66,6 +66,7 @@ enum { | |||
| 66 | 66 | ||
| 67 | /* pool flags */ | 67 | /* pool flags */ |
| 68 | POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ | 68 | POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ |
| 69 | POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */ | ||
| 69 | 70 | ||
| 70 | /* worker flags */ | 71 | /* worker flags */ |
| 71 | WORKER_STARTED = 1 << 0, /* started */ | 72 | WORKER_STARTED = 1 << 0, /* started */ |
| @@ -652,7 +653,7 @@ static bool need_to_manage_workers(struct worker_pool *pool) | |||
| 652 | /* Do we have too many workers and should some go away? */ | 653 | /* Do we have too many workers and should some go away? */ |
| 653 | static bool too_many_workers(struct worker_pool *pool) | 654 | static bool too_many_workers(struct worker_pool *pool) |
| 654 | { | 655 | { |
| 655 | bool managing = mutex_is_locked(&pool->manager_mutex); | 656 | bool managing = pool->flags & POOL_MANAGING_WORKERS; |
| 656 | int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ | 657 | int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ |
| 657 | int nr_busy = pool->nr_workers - nr_idle; | 658 | int nr_busy = pool->nr_workers - nr_idle; |
| 658 | 659 | ||
| @@ -1326,6 +1327,15 @@ static void idle_worker_rebind(struct worker *worker) | |||
| 1326 | 1327 | ||
| 1327 | /* we did our part, wait for rebind_workers() to finish up */ | 1328 | /* we did our part, wait for rebind_workers() to finish up */ |
| 1328 | wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND)); | 1329 | wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND)); |
| 1330 | |||
| 1331 | /* | ||
| 1332 | * rebind_workers() shouldn't finish until all workers passed the | ||
| 1333 | * above WORKER_REBIND wait. Tell it when done. | ||
| 1334 | */ | ||
| 1335 | spin_lock_irq(&worker->pool->gcwq->lock); | ||
| 1336 | if (!--worker->idle_rebind->cnt) | ||
| 1337 | complete(&worker->idle_rebind->done); | ||
| 1338 | spin_unlock_irq(&worker->pool->gcwq->lock); | ||
| 1329 | } | 1339 | } |
| 1330 | 1340 | ||
| 1331 | /* | 1341 | /* |
| @@ -1339,8 +1349,16 @@ static void busy_worker_rebind_fn(struct work_struct *work) | |||
| 1339 | struct worker *worker = container_of(work, struct worker, rebind_work); | 1349 | struct worker *worker = container_of(work, struct worker, rebind_work); |
| 1340 | struct global_cwq *gcwq = worker->pool->gcwq; | 1350 | struct global_cwq *gcwq = worker->pool->gcwq; |
| 1341 | 1351 | ||
| 1342 | if (worker_maybe_bind_and_lock(worker)) | 1352 | worker_maybe_bind_and_lock(worker); |
| 1343 | worker_clr_flags(worker, WORKER_REBIND); | 1353 | |
| 1354 | /* | ||
| 1355 | * %WORKER_REBIND must be cleared even if the above binding failed; | ||
| 1356 | * otherwise, we may confuse the next CPU_UP cycle or oops / get | ||
| 1357 | * stuck by calling idle_worker_rebind() prematurely. If CPU went | ||
| 1358 | * down again inbetween, %WORKER_UNBOUND would be set, so clearing | ||
| 1359 | * %WORKER_REBIND is always safe. | ||
| 1360 | */ | ||
| 1361 | worker_clr_flags(worker, WORKER_REBIND); | ||
| 1344 | 1362 | ||
| 1345 | spin_unlock_irq(&gcwq->lock); | 1363 | spin_unlock_irq(&gcwq->lock); |
| 1346 | } | 1364 | } |
| @@ -1396,12 +1414,15 @@ retry: | |||
| 1396 | /* set REBIND and kick idle ones, we'll wait for these later */ | 1414 | /* set REBIND and kick idle ones, we'll wait for these later */ |
| 1397 | for_each_worker_pool(pool, gcwq) { | 1415 | for_each_worker_pool(pool, gcwq) { |
| 1398 | list_for_each_entry(worker, &pool->idle_list, entry) { | 1416 | list_for_each_entry(worker, &pool->idle_list, entry) { |
| 1417 | unsigned long worker_flags = worker->flags; | ||
| 1418 | |||
| 1399 | if (worker->flags & WORKER_REBIND) | 1419 | if (worker->flags & WORKER_REBIND) |
| 1400 | continue; | 1420 | continue; |
| 1401 | 1421 | ||
| 1402 | /* morph UNBOUND to REBIND */ | 1422 | /* morph UNBOUND to REBIND atomically */ |
| 1403 | worker->flags &= ~WORKER_UNBOUND; | 1423 | worker_flags &= ~WORKER_UNBOUND; |
| 1404 | worker->flags |= WORKER_REBIND; | 1424 | worker_flags |= WORKER_REBIND; |
| 1425 | ACCESS_ONCE(worker->flags) = worker_flags; | ||
| 1405 | 1426 | ||
| 1406 | idle_rebind.cnt++; | 1427 | idle_rebind.cnt++; |
| 1407 | worker->idle_rebind = &idle_rebind; | 1428 | worker->idle_rebind = &idle_rebind; |
| @@ -1419,25 +1440,15 @@ retry: | |||
| 1419 | goto retry; | 1440 | goto retry; |
| 1420 | } | 1441 | } |
| 1421 | 1442 | ||
| 1422 | /* | 1443 | /* all idle workers are rebound, rebind busy workers */ |
| 1423 | * All idle workers are rebound and waiting for %WORKER_REBIND to | ||
| 1424 | * be cleared inside idle_worker_rebind(). Clear and release. | ||
| 1425 | * Clearing %WORKER_REBIND from this foreign context is safe | ||
| 1426 | * because these workers are still guaranteed to be idle. | ||
| 1427 | */ | ||
| 1428 | for_each_worker_pool(pool, gcwq) | ||
| 1429 | list_for_each_entry(worker, &pool->idle_list, entry) | ||
| 1430 | worker->flags &= ~WORKER_REBIND; | ||
| 1431 | |||
| 1432 | wake_up_all(&gcwq->rebind_hold); | ||
| 1433 | |||
| 1434 | /* rebind busy workers */ | ||
| 1435 | for_each_busy_worker(worker, i, pos, gcwq) { | 1444 | for_each_busy_worker(worker, i, pos, gcwq) { |
| 1436 | struct work_struct *rebind_work = &worker->rebind_work; | 1445 | struct work_struct *rebind_work = &worker->rebind_work; |
| 1446 | unsigned long worker_flags = worker->flags; | ||
| 1437 | 1447 | ||
| 1438 | /* morph UNBOUND to REBIND */ | 1448 | /* morph UNBOUND to REBIND atomically */ |
| 1439 | worker->flags &= ~WORKER_UNBOUND; | 1449 | worker_flags &= ~WORKER_UNBOUND; |
| 1440 | worker->flags |= WORKER_REBIND; | 1450 | worker_flags |= WORKER_REBIND; |
| 1451 | ACCESS_ONCE(worker->flags) = worker_flags; | ||
| 1441 | 1452 | ||
| 1442 | if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, | 1453 | if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, |
| 1443 | work_data_bits(rebind_work))) | 1454 | work_data_bits(rebind_work))) |
| @@ -1449,6 +1460,34 @@ retry: | |||
| 1449 | worker->scheduled.next, | 1460 | worker->scheduled.next, |
| 1450 | work_color_to_flags(WORK_NO_COLOR)); | 1461 | work_color_to_flags(WORK_NO_COLOR)); |
| 1451 | } | 1462 | } |
| 1463 | |||
| 1464 | /* | ||
| 1465 | * All idle workers are rebound and waiting for %WORKER_REBIND to | ||
| 1466 | * be cleared inside idle_worker_rebind(). Clear and release. | ||
| 1467 | * Clearing %WORKER_REBIND from this foreign context is safe | ||
| 1468 | * because these workers are still guaranteed to be idle. | ||
| 1469 | * | ||
| 1470 | * We need to make sure all idle workers passed WORKER_REBIND wait | ||
| 1471 | * in idle_worker_rebind() before returning; otherwise, workers can | ||
| 1472 | * get stuck at the wait if hotplug cycle repeats. | ||
| 1473 | */ | ||
| 1474 | idle_rebind.cnt = 1; | ||
| 1475 | INIT_COMPLETION(idle_rebind.done); | ||
| 1476 | |||
| 1477 | for_each_worker_pool(pool, gcwq) { | ||
| 1478 | list_for_each_entry(worker, &pool->idle_list, entry) { | ||
| 1479 | worker->flags &= ~WORKER_REBIND; | ||
| 1480 | idle_rebind.cnt++; | ||
| 1481 | } | ||
| 1482 | } | ||
| 1483 | |||
| 1484 | wake_up_all(&gcwq->rebind_hold); | ||
| 1485 | |||
| 1486 | if (--idle_rebind.cnt) { | ||
| 1487 | spin_unlock_irq(&gcwq->lock); | ||
| 1488 | wait_for_completion(&idle_rebind.done); | ||
| 1489 | spin_lock_irq(&gcwq->lock); | ||
| 1490 | } | ||
| 1452 | } | 1491 | } |
| 1453 | 1492 | ||
| 1454 | static struct worker *alloc_worker(void) | 1493 | static struct worker *alloc_worker(void) |
| @@ -1794,9 +1833,45 @@ static bool manage_workers(struct worker *worker) | |||
| 1794 | struct worker_pool *pool = worker->pool; | 1833 | struct worker_pool *pool = worker->pool; |
| 1795 | bool ret = false; | 1834 | bool ret = false; |
| 1796 | 1835 | ||
| 1797 | if (!mutex_trylock(&pool->manager_mutex)) | 1836 | if (pool->flags & POOL_MANAGING_WORKERS) |
| 1798 | return ret; | 1837 | return ret; |
| 1799 | 1838 | ||
| 1839 | pool->flags |= POOL_MANAGING_WORKERS; | ||
| 1840 | |||
| 1841 | /* | ||
| 1842 | * To simplify both worker management and CPU hotplug, hold off | ||
| 1843 | * management while hotplug is in progress. CPU hotplug path can't | ||
| 1844 | * grab %POOL_MANAGING_WORKERS to achieve this because that can | ||
| 1845 | * lead to idle worker depletion (all become busy thinking someone | ||
| 1846 | * else is managing) which in turn can result in deadlock under | ||
| 1847 | * extreme circumstances. Use @pool->manager_mutex to synchronize | ||
| 1848 | * manager against CPU hotplug. | ||
| 1849 | * | ||
| 1850 | * manager_mutex would always be free unless CPU hotplug is in | ||
| 1851 | * progress. trylock first without dropping @gcwq->lock. | ||
| 1852 | */ | ||
| 1853 | if (unlikely(!mutex_trylock(&pool->manager_mutex))) { | ||
| 1854 | spin_unlock_irq(&pool->gcwq->lock); | ||
| 1855 | mutex_lock(&pool->manager_mutex); | ||
| 1856 | /* | ||
| 1857 | * CPU hotplug could have happened while we were waiting | ||
| 1858 | * for manager_mutex. Hotplug itself can't handle us | ||
| 1859 | * because manager isn't either on idle or busy list, and | ||
| 1860 | * @gcwq's state and ours could have deviated. | ||
| 1861 | * | ||
| 1862 | * As hotplug is now excluded via manager_mutex, we can | ||
| 1863 | * simply try to bind. It will succeed or fail depending | ||
| 1864 | * on @gcwq's current state. Try it and adjust | ||
| 1865 | * %WORKER_UNBOUND accordingly. | ||
| 1866 | */ | ||
| 1867 | if (worker_maybe_bind_and_lock(worker)) | ||
| 1868 | worker->flags &= ~WORKER_UNBOUND; | ||
| 1869 | else | ||
| 1870 | worker->flags |= WORKER_UNBOUND; | ||
| 1871 | |||
| 1872 | ret = true; | ||
| 1873 | } | ||
| 1874 | |||
| 1800 | pool->flags &= ~POOL_MANAGE_WORKERS; | 1875 | pool->flags &= ~POOL_MANAGE_WORKERS; |
| 1801 | 1876 | ||
| 1802 | /* | 1877 | /* |
| @@ -1806,6 +1881,7 @@ static bool manage_workers(struct worker *worker) | |||
| 1806 | ret |= maybe_destroy_workers(pool); | 1881 | ret |= maybe_destroy_workers(pool); |
| 1807 | ret |= maybe_create_worker(pool); | 1882 | ret |= maybe_create_worker(pool); |
| 1808 | 1883 | ||
| 1884 | pool->flags &= ~POOL_MANAGING_WORKERS; | ||
| 1809 | mutex_unlock(&pool->manager_mutex); | 1885 | mutex_unlock(&pool->manager_mutex); |
| 1810 | return ret; | 1886 | return ret; |
| 1811 | } | 1887 | } |
| @@ -3500,18 +3576,17 @@ static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb, | |||
| 3500 | #ifdef CONFIG_SMP | 3576 | #ifdef CONFIG_SMP |
| 3501 | 3577 | ||
| 3502 | struct work_for_cpu { | 3578 | struct work_for_cpu { |
| 3503 | struct completion completion; | 3579 | struct work_struct work; |
| 3504 | long (*fn)(void *); | 3580 | long (*fn)(void *); |
| 3505 | void *arg; | 3581 | void *arg; |
| 3506 | long ret; | 3582 | long ret; |
| 3507 | }; | 3583 | }; |
| 3508 | 3584 | ||
| 3509 | static int do_work_for_cpu(void *_wfc) | 3585 | static void work_for_cpu_fn(struct work_struct *work) |
| 3510 | { | 3586 | { |
| 3511 | struct work_for_cpu *wfc = _wfc; | 3587 | struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work); |
| 3588 | |||
| 3512 | wfc->ret = wfc->fn(wfc->arg); | 3589 | wfc->ret = wfc->fn(wfc->arg); |
| 3513 | complete(&wfc->completion); | ||
| 3514 | return 0; | ||
| 3515 | } | 3590 | } |
| 3516 | 3591 | ||
| 3517 | /** | 3592 | /** |
| @@ -3526,19 +3601,11 @@ static int do_work_for_cpu(void *_wfc) | |||
| 3526 | */ | 3601 | */ |
| 3527 | long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) | 3602 | long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) |
| 3528 | { | 3603 | { |
| 3529 | struct task_struct *sub_thread; | 3604 | struct work_for_cpu wfc = { .fn = fn, .arg = arg }; |
| 3530 | struct work_for_cpu wfc = { | ||
| 3531 | .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion), | ||
| 3532 | .fn = fn, | ||
| 3533 | .arg = arg, | ||
| 3534 | }; | ||
| 3535 | 3605 | ||
| 3536 | sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu"); | 3606 | INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn); |
| 3537 | if (IS_ERR(sub_thread)) | 3607 | schedule_work_on(cpu, &wfc.work); |
| 3538 | return PTR_ERR(sub_thread); | 3608 | flush_work(&wfc.work); |
| 3539 | kthread_bind(sub_thread, cpu); | ||
| 3540 | wake_up_process(sub_thread); | ||
| 3541 | wait_for_completion(&wfc.completion); | ||
| 3542 | return wfc.ret; | 3609 | return wfc.ret; |
| 3543 | } | 3610 | } |
| 3544 | EXPORT_SYMBOL_GPL(work_on_cpu); | 3611 | EXPORT_SYMBOL_GPL(work_on_cpu); |
