aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-07-17 15:39:27 -0400
committerTejun Heo <tj@kernel.org>2012-07-17 15:39:27 -0400
commit25511a477657884d2164f338341fa89652610507 (patch)
treedbea343f762f154c28b6db423f0220f090d94d60 /kernel
parentbc2ae0f5bb2f39e6db06a62f9d353e4601a332a1 (diff)
workqueue: reimplement CPU online rebinding to handle idle workers
Currently, if there are left workers when a CPU is being brough back online, the trustee kills all idle workers and scheduled rebind_work so that they re-bind to the CPU after the currently executing work is finished. This works for busy workers because concurrency management doesn't try to wake up them from scheduler callbacks, which require the target task to be on the local run queue. The busy worker bumps concurrency counter appropriately as it clears WORKER_UNBOUND from the rebind work item and it's bound to the CPU before returning to the idle state. To reduce CPU on/offlining overhead (as many embedded systems use it for powersaving) and simplify the code path, workqueue is planned to be modified to retain idle workers across CPU on/offlining. This patch reimplements CPU online rebinding such that it can also handle idle workers. As noted earlier, due to the local wakeup requirement, rebinding idle workers is tricky. All idle workers must be re-bound before scheduler callbacks are enabled. This is achieved by interlocking idle re-binding. Idle workers are requested to re-bind and then hold until all idle re-binding is complete so that no bound worker starts executing work item. Only after all idle workers are re-bound and parked, CPU_ONLINE proceeds to release them and queue rebind work item to busy workers thus guaranteeing scheduler callbacks aren't invoked until all idle workers are ready. worker_rebind_fn() is renamed to busy_worker_rebind_fn() and idle_worker_rebind() for idle workers is added. Rebinding logic is moved to rebind_workers() and now called from CPU_ONLINE after flushing trustee. While at it, add CPU sanity check in worker_thread(). Note that now a worker may become idle or the manager between trustee release and rebinding during CPU_ONLINE. As the previous patch updated create_worker() so that it can be used by regular manager while unbound and this patch implements idle re-binding, this is safe. This prepares for removal of trustee and keeping idle workers across CPU hotplugs. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: "Rafael J. Wysocki" <rjw@sisk.pl>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/workqueue.c215
1 files changed, 166 insertions, 49 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e1d05e51a80a..6927fecae412 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -133,6 +133,7 @@ enum {
133 133
134struct global_cwq; 134struct global_cwq;
135struct worker_pool; 135struct worker_pool;
136struct idle_rebind;
136 137
137/* 138/*
138 * The poor guys doing the actual heavy lifting. All on-duty workers 139 * The poor guys doing the actual heavy lifting. All on-duty workers
@@ -154,7 +155,10 @@ struct worker {
154 unsigned long last_active; /* L: last active timestamp */ 155 unsigned long last_active; /* L: last active timestamp */
155 unsigned int flags; /* X: flags */ 156 unsigned int flags; /* X: flags */
156 int id; /* I: worker id */ 157 int id; /* I: worker id */
157 struct work_struct rebind_work; /* L: rebind worker to cpu */ 158
159 /* for rebinding worker to CPU */
160 struct idle_rebind *idle_rebind; /* L: for idle worker */
161 struct work_struct rebind_work; /* L: for busy worker */
158}; 162};
159 163
160struct worker_pool { 164struct worker_pool {
@@ -190,6 +194,8 @@ struct global_cwq {
190 194
191 struct worker_pool pools[2]; /* normal and highpri pools */ 195 struct worker_pool pools[2]; /* normal and highpri pools */
192 196
197 wait_queue_head_t rebind_hold; /* rebind hold wait */
198
193 struct task_struct *trustee; /* L: for gcwq shutdown */ 199 struct task_struct *trustee; /* L: for gcwq shutdown */
194 unsigned int trustee_state; /* L: trustee state */ 200 unsigned int trustee_state; /* L: trustee state */
195 wait_queue_head_t trustee_wait; /* trustee wait */ 201 wait_queue_head_t trustee_wait; /* trustee wait */
@@ -1314,13 +1320,37 @@ __acquires(&gcwq->lock)
1314 } 1320 }
1315} 1321}
1316 1322
1323struct idle_rebind {
1324 int cnt; /* # workers to be rebound */
1325 struct completion done; /* all workers rebound */
1326};
1327
1328/*
1329 * Rebind an idle @worker to its CPU. During CPU onlining, this has to
1330 * happen synchronously for idle workers. worker_thread() will test
1331 * %WORKER_REBIND before leaving idle and call this function.
1332 */
1333static void idle_worker_rebind(struct worker *worker)
1334{
1335 struct global_cwq *gcwq = worker->pool->gcwq;
1336
1337 /* CPU must be online at this point */
1338 WARN_ON(!worker_maybe_bind_and_lock(worker));
1339 if (!--worker->idle_rebind->cnt)
1340 complete(&worker->idle_rebind->done);
1341 spin_unlock_irq(&worker->pool->gcwq->lock);
1342
1343 /* we did our part, wait for rebind_workers() to finish up */
1344 wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND));
1345}
1346
1317/* 1347/*
1318 * Function for worker->rebind_work used to rebind unbound busy workers to 1348 * Function for @worker->rebind.work used to rebind unbound busy workers to
1319 * the associated cpu which is coming back online. This is scheduled by 1349 * the associated cpu which is coming back online. This is scheduled by
1320 * cpu up but can race with other cpu hotplug operations and may be 1350 * cpu up but can race with other cpu hotplug operations and may be
1321 * executed twice without intervening cpu down. 1351 * executed twice without intervening cpu down.
1322 */ 1352 */
1323static void worker_rebind_fn(struct work_struct *work) 1353static void busy_worker_rebind_fn(struct work_struct *work)
1324{ 1354{
1325 struct worker *worker = container_of(work, struct worker, rebind_work); 1355 struct worker *worker = container_of(work, struct worker, rebind_work);
1326 struct global_cwq *gcwq = worker->pool->gcwq; 1356 struct global_cwq *gcwq = worker->pool->gcwq;
@@ -1331,6 +1361,112 @@ static void worker_rebind_fn(struct work_struct *work)
1331 spin_unlock_irq(&gcwq->lock); 1361 spin_unlock_irq(&gcwq->lock);
1332} 1362}
1333 1363
1364/**
1365 * rebind_workers - rebind all workers of a gcwq to the associated CPU
1366 * @gcwq: gcwq of interest
1367 *
1368 * @gcwq->cpu is coming online. Rebind all workers to the CPU. Rebinding
1369 * is different for idle and busy ones.
1370 *
1371 * The idle ones should be rebound synchronously and idle rebinding should
1372 * be complete before any worker starts executing work items with
1373 * concurrency management enabled; otherwise, scheduler may oops trying to
1374 * wake up non-local idle worker from wq_worker_sleeping().
1375 *
1376 * This is achieved by repeatedly requesting rebinding until all idle
1377 * workers are known to have been rebound under @gcwq->lock and holding all
1378 * idle workers from becoming busy until idle rebinding is complete.
1379 *
1380 * Once idle workers are rebound, busy workers can be rebound as they
1381 * finish executing their current work items. Queueing the rebind work at
1382 * the head of their scheduled lists is enough. Note that nr_running will
1383 * be properbly bumped as busy workers rebind.
1384 *
1385 * On return, all workers are guaranteed to either be bound or have rebind
1386 * work item scheduled.
1387 */
1388static void rebind_workers(struct global_cwq *gcwq)
1389 __releases(&gcwq->lock) __acquires(&gcwq->lock)
1390{
1391 struct idle_rebind idle_rebind;
1392 struct worker_pool *pool;
1393 struct worker *worker;
1394 struct hlist_node *pos;
1395 int i;
1396
1397 lockdep_assert_held(&gcwq->lock);
1398
1399 for_each_worker_pool(pool, gcwq)
1400 lockdep_assert_held(&pool->manager_mutex);
1401
1402 /*
1403 * Rebind idle workers. Interlocked both ways. We wait for
1404 * workers to rebind via @idle_rebind.done. Workers will wait for
1405 * us to finish up by watching %WORKER_REBIND.
1406 */
1407 init_completion(&idle_rebind.done);
1408retry:
1409 idle_rebind.cnt = 1;
1410 INIT_COMPLETION(idle_rebind.done);
1411
1412 /* set REBIND and kick idle ones, we'll wait for these later */
1413 for_each_worker_pool(pool, gcwq) {
1414 list_for_each_entry(worker, &pool->idle_list, entry) {
1415 if (worker->flags & WORKER_REBIND)
1416 continue;
1417
1418 /* morph UNBOUND to REBIND */
1419 worker->flags &= ~WORKER_UNBOUND;
1420 worker->flags |= WORKER_REBIND;
1421
1422 idle_rebind.cnt++;
1423 worker->idle_rebind = &idle_rebind;
1424
1425 /* worker_thread() will call idle_worker_rebind() */
1426 wake_up_process(worker->task);
1427 }
1428 }
1429
1430 if (--idle_rebind.cnt) {
1431 spin_unlock_irq(&gcwq->lock);
1432 wait_for_completion(&idle_rebind.done);
1433 spin_lock_irq(&gcwq->lock);
1434 /* busy ones might have become idle while waiting, retry */
1435 goto retry;
1436 }
1437
1438 /*
1439 * All idle workers are rebound and waiting for %WORKER_REBIND to
1440 * be cleared inside idle_worker_rebind(). Clear and release.
1441 * Clearing %WORKER_REBIND from this foreign context is safe
1442 * because these workers are still guaranteed to be idle.
1443 */
1444 for_each_worker_pool(pool, gcwq)
1445 list_for_each_entry(worker, &pool->idle_list, entry)
1446 worker->flags &= ~WORKER_REBIND;
1447
1448 wake_up_all(&gcwq->rebind_hold);
1449
1450 /* rebind busy workers */
1451 for_each_busy_worker(worker, i, pos, gcwq) {
1452 struct work_struct *rebind_work = &worker->rebind_work;
1453
1454 /* morph UNBOUND to REBIND */
1455 worker->flags &= ~WORKER_UNBOUND;
1456 worker->flags |= WORKER_REBIND;
1457
1458 if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
1459 work_data_bits(rebind_work)))
1460 continue;
1461
1462 /* wq doesn't matter, use the default one */
1463 debug_work_activate(rebind_work);
1464 insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work,
1465 worker->scheduled.next,
1466 work_color_to_flags(WORK_NO_COLOR));
1467 }
1468}
1469
1334static struct worker *alloc_worker(void) 1470static struct worker *alloc_worker(void)
1335{ 1471{
1336 struct worker *worker; 1472 struct worker *worker;
@@ -1339,7 +1475,7 @@ static struct worker *alloc_worker(void)
1339 if (worker) { 1475 if (worker) {
1340 INIT_LIST_HEAD(&worker->entry); 1476 INIT_LIST_HEAD(&worker->entry);
1341 INIT_LIST_HEAD(&worker->scheduled); 1477 INIT_LIST_HEAD(&worker->scheduled);
1342 INIT_WORK(&worker->rebind_work, worker_rebind_fn); 1478 INIT_WORK(&worker->rebind_work, busy_worker_rebind_fn);
1343 /* on creation a worker is in !idle && prep state */ 1479 /* on creation a worker is in !idle && prep state */
1344 worker->flags = WORKER_PREP; 1480 worker->flags = WORKER_PREP;
1345 } 1481 }
@@ -1829,6 +1965,9 @@ __acquires(&gcwq->lock)
1829 1965
1830 lockdep_copy_map(&lockdep_map, &work->lockdep_map); 1966 lockdep_copy_map(&lockdep_map, &work->lockdep_map);
1831#endif 1967#endif
1968 WARN_ON_ONCE(!(worker->flags & (WORKER_UNBOUND | WORKER_REBIND)) &&
1969 raw_smp_processor_id() != gcwq->cpu);
1970
1832 /* 1971 /*
1833 * A single work shouldn't be executed concurrently by 1972 * A single work shouldn't be executed concurrently by
1834 * multiple workers on a single cpu. Check whether anyone is 1973 * multiple workers on a single cpu. Check whether anyone is
@@ -1946,11 +2085,20 @@ static int worker_thread(void *__worker)
1946woke_up: 2085woke_up:
1947 spin_lock_irq(&gcwq->lock); 2086 spin_lock_irq(&gcwq->lock);
1948 2087
1949 /* DIE can be set only while we're idle, checking here is enough */ 2088 /*
1950 if (worker->flags & WORKER_DIE) { 2089 * DIE can be set only while idle and REBIND set while busy has
2090 * @worker->rebind_work scheduled. Checking here is enough.
2091 */
2092 if (unlikely(worker->flags & (WORKER_REBIND | WORKER_DIE))) {
1951 spin_unlock_irq(&gcwq->lock); 2093 spin_unlock_irq(&gcwq->lock);
1952 worker->task->flags &= ~PF_WQ_WORKER; 2094
1953 return 0; 2095 if (worker->flags & WORKER_DIE) {
2096 worker->task->flags &= ~PF_WQ_WORKER;
2097 return 0;
2098 }
2099
2100 idle_worker_rebind(worker);
2101 goto woke_up;
1954 } 2102 }
1955 2103
1956 worker_leave_idle(worker); 2104 worker_leave_idle(worker);
@@ -3468,42 +3616,6 @@ static int __cpuinit trustee_thread(void *__gcwq)
3468 } 3616 }
3469 } while (i && rc >= 0); 3617 } while (i && rc >= 0);
3470 3618
3471 /*
3472 * At this point, either draining has completed and no worker
3473 * is left, or cpu down has been canceled or the cpu is being
3474 * brought back up. There shouldn't be any idle one left.
3475 * Tell the remaining busy ones to rebind once it finishes the
3476 * currently scheduled works by scheduling the rebind_work.
3477 */
3478 for_each_worker_pool(pool, gcwq)
3479 WARN_ON(!list_empty(&pool->idle_list));
3480
3481 /* if we're reassociating, clear DISASSOCIATED */
3482 if (gcwq->trustee_state == TRUSTEE_RELEASE)
3483 gcwq->flags &= ~GCWQ_DISASSOCIATED;
3484
3485 for_each_busy_worker(worker, i, pos, gcwq) {
3486 struct work_struct *rebind_work = &worker->rebind_work;
3487
3488 /*
3489 * Rebind_work may race with future cpu hotplug
3490 * operations. Use a separate flag to mark that
3491 * rebinding is scheduled.
3492 */
3493 worker->flags |= WORKER_REBIND;
3494 worker->flags &= ~WORKER_UNBOUND;
3495
3496 /* queue rebind_work, wq doesn't matter, use the default one */
3497 if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
3498 work_data_bits(rebind_work)))
3499 continue;
3500
3501 debug_work_activate(rebind_work);
3502 insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work,
3503 worker->scheduled.next,
3504 work_color_to_flags(WORK_NO_COLOR));
3505 }
3506
3507 gcwq_release_management(gcwq); 3619 gcwq_release_management(gcwq);
3508 3620
3509 /* notify completion */ 3621 /* notify completion */
@@ -3609,13 +3721,16 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
3609 wait_trustee_state(gcwq, TRUSTEE_DONE); 3721 wait_trustee_state(gcwq, TRUSTEE_DONE);
3610 } 3722 }
3611 3723
3612 /* 3724 spin_unlock_irq(&gcwq->lock);
3613 * Either DISASSOCIATED is already cleared or no worker is 3725 gcwq_claim_management(gcwq);
3614 * left on the gcwq. Safe to clear DISASSOCIATED without 3726 spin_lock_irq(&gcwq->lock);
3615 * claiming managers. 3727
3616 */
3617 gcwq->flags &= ~GCWQ_DISASSOCIATED; 3728 gcwq->flags &= ~GCWQ_DISASSOCIATED;
3618 3729
3730 rebind_workers(gcwq);
3731
3732 gcwq_release_management(gcwq);
3733
3619 /* 3734 /*
3620 * Trustee is done and there might be no worker left. 3735 * Trustee is done and there might be no worker left.
3621 * Put the first_idle in and request a real manager to 3736 * Put the first_idle in and request a real manager to
@@ -3910,6 +4025,8 @@ static int __init init_workqueues(void)
3910 ida_init(&pool->worker_ida); 4025 ida_init(&pool->worker_ida);
3911 } 4026 }
3912 4027
4028 init_waitqueue_head(&gcwq->rebind_hold);
4029
3913 gcwq->trustee_state = TRUSTEE_DONE; 4030 gcwq->trustee_state = TRUSTEE_DONE;
3914 init_waitqueue_head(&gcwq->trustee_wait); 4031 init_waitqueue_head(&gcwq->trustee_wait);
3915 } 4032 }