workqueue: directly restore CPU affinity of workers from CPU_ONLINE

Rebinding workers of a per-cpu pool after a CPU comes online involves a lot of back-and-forth mostly because only the task itself could adjust CPU affinity if PF_THREAD_BOUND was set. As CPU_ONLINE itself couldn't adjust affinity, it had to somehow coerce the workers themselves to perform set_cpus_allowed_ptr(). Due to the various states a worker can be in, this led to three different paths a worker may be rebound. worker->rebind_work is queued to busy workers. Idle ones are signaled by unlinking worker->entry and call idle_worker_rebind(). The manager isn't covered by either and implements its own mechanism. PF_THREAD_BOUND has been relaced with PF_NO_SETAFFINITY and CPU_ONLINE itself now can manipulate CPU affinity of workers. This patch replaces the existing rebind mechanism with direct one where CPU_ONLINE iterates over all workers using for_each_pool_worker(), restores CPU affinity, and clears WORKER_UNBOUND. There are a couple subtleties. All bound idle workers should have their runqueues set to that of the bound CPU; however, if the target task isn't running, set_cpus_allowed_ptr() just updates the cpus_allowed mask deferring the actual migration to when the task wakes up. This is worked around by waking up idle workers after restoring CPU affinity before any workers can become bound. Another subtlety is stems from matching @pool->nr_running with the number of running unbound workers. While DISASSOCIATED, all workers are unbound and nr_running is zero. As workers become bound again, nr_running needs to be adjusted accordingly; however, there is no good way to tell whether a given worker is running without poking into scheduler internals. Instead of clearing UNBOUND directly, rebind_workers() replaces UNBOUND with another new NOT_RUNNING flag - REBOUND, which will later be cleared by the workers themselves while preparing for the next round of work item execution. The only change needed for the workers is clearing REBOUND along with PREP. * This patch leaves for_each_busy_worker() without any user. Removed. * idle_worker_rebind(), busy_worker_rebind_fn(), worker->rebind_work and rebind logic in manager_workers() removed. * worker_thread() now looks at WORKER_DIE instead of testing whether @worker->entry is empty to determine whether it needs to do something special as dying is the only special thing now. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
author: Tejun Heo <tj@kernel.org> 2013-03-19 16:45:21 -0400
committer: Tejun Heo <tj@kernel.org> 2013-03-19 16:45:21 -0400
commit: a9ab775bcadf122d91e1a201eb66ae2eec90365a (patch)
tree: 98f30f2272d2ad62258744a48570c49ecfab66af /kernel/workqueue.c
parent: bd7c089eb25b26d2e03fd34f97e5517a4463f871 (diff)
1 files changed, 64 insertions, 128 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3e297c574be8..9508b5ed7336 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -75,9 +75,10 @@ enum {
        WORKER_PREP             = 1 << 3,       /* preparing to run works */
        WORKER_CPU_INTENSIVE    = 1 << 6,       /* cpu intensive */
        WORKER_UNBOUND          = 1 << 7,       /* worker is unbound */
+        WORKER_REBOUND          = 1 << 8,       /* worker was rebound */
-        WORKER_NOT_RUNNING      = WORKER_PREP | WORKER_UNBOUND |
+        WORKER_NOT_RUNNING      = WORKER_PREP | WORKER_CPU_INTENSIVE |
-                                  WORKER_CPU_INTENSIVE,
+                                  WORKER_UNBOUND | WORKER_REBOUND,
        NR_STD_WORKER_POOLS     = 2,            /* # standard pools per cpu */
@@ -316,9 +317,6 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
             (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
             (pool)++)
-#define for_each_busy_worker(worker, i, pool)                           \
-        hash_for_each(pool->busy_hash, i, worker, hentry)
 /**
 * for_each_pool - iterate through all worker_pools in the system
 * @pool: iteration cursor
@@ -1612,37 +1610,6 @@ __acquires(&pool->lock)
        }
 }
-/*
- * Rebind an idle @worker to its CPU.  worker_thread() will test
- * list_empty(@worker->entry) before leaving idle and call this function.
- */
-static void idle_worker_rebind(struct worker *worker)
-{
-        /* CPU may go down again inbetween, clear UNBOUND only on success */
-        if (worker_maybe_bind_and_lock(worker->pool))
-                worker_clr_flags(worker, WORKER_UNBOUND);
-        /* rebind complete, become available again */
-        list_add(&worker->entry, &worker->pool->idle_list);
-        spin_unlock_irq(&worker->pool->lock);
-}
-/*
- * Function for @worker->rebind.work used to rebind unbound busy workers to
- * the associated cpu which is coming back online.  This is scheduled by
- * cpu up but can race with other cpu hotplug operations and may be
- * executed twice without intervening cpu down.
- */
-static void busy_worker_rebind_fn(struct work_struct *work)
-{
-        struct worker *worker = container_of(work, struct worker, rebind_work);
-        if (worker_maybe_bind_and_lock(worker->pool))
-                worker_clr_flags(worker, WORKER_UNBOUND);
-        spin_unlock_irq(&worker->pool->lock);
-}
 static struct worker *alloc_worker(void)
 {
        struct worker *worker;
@@ -1651,7 +1618,6 @@ static struct worker *alloc_worker(void)
        if (worker) {
                INIT_LIST_HEAD(&worker->entry);
                INIT_LIST_HEAD(&worker->scheduled);
-                INIT_WORK(&worker->rebind_work, busy_worker_rebind_fn);
                /* on creation a worker is in !idle && prep state */
                worker->flags = WORKER_PREP;
        }
@@ -2053,22 +2019,6 @@ static bool manage_workers(struct worker *worker)
        if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
                spin_unlock_irq(&pool->lock);
                mutex_lock(&pool->manager_mutex);
-                /*
-                 * CPU hotplug could have happened while we were waiting
-                 * for assoc_mutex.  Hotplug itself can't handle us
-                 * because manager isn't either on idle or busy list, and
-                 * @pool's state and ours could have deviated.
-                 *
-                 * As hotplug is now excluded via manager_mutex, we can
-                 * simply try to bind.  It will succeed or fail depending
-                 * on @pool's current state.  Try it and adjust
-                 * %WORKER_UNBOUND accordingly.
-                 */
-                if (worker_maybe_bind_and_lock(pool))
-                        worker->flags &= ~WORKER_UNBOUND;
-                else
-                        worker->flags |= WORKER_UNBOUND;
                ret = true;
        }
@@ -2252,19 +2202,12 @@ static int worker_thread(void *__worker)
 woke_up:
        spin_lock_irq(&pool->lock);
-        /* we are off idle list if destruction or rebind is requested */
+        /* am I supposed to die? */
-        if (unlikely(list_empty(&worker->entry))) {
+        if (unlikely(worker->flags & WORKER_DIE)) {
                spin_unlock_irq(&pool->lock);
+                WARN_ON_ONCE(!list_empty(&worker->entry));
-                /* if DIE is set, destruction is requested */
+                worker->task->flags &= ~PF_WQ_WORKER;
-                if (worker->flags & WORKER_DIE) {
+                return 0;
-                        worker->task->flags &= ~PF_WQ_WORKER;
-                        return 0;
-                }
-                /* otherwise, rebind */
-                idle_worker_rebind(worker);
-                goto woke_up;
        }
        worker_leave_idle(worker);
@@ -2285,11 +2228,13 @@ recheck:
        WARN_ON_ONCE(!list_empty(&worker->scheduled));
        /*
-         * When control reaches this point, we're guaranteed to have
+         * Finish PREP stage.  We're guaranteed to have at least one idle
-         * at least one idle worker or that someone else has already
+         * worker or that someone else has already assumed the manager
-         * assumed the manager role.
+         * role.  This is where @worker starts participating in concurrency
+         * management if applicable and concurrency management is restored
+         * after being rebound.  See rebind_workers() for details.
         */
-        worker_clr_flags(worker, WORKER_PREP);
+        worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);
        do {
                struct work_struct *work =
@@ -4076,7 +4021,7 @@ static void wq_unbind_fn(struct work_struct *work)
        int cpu = smp_processor_id();
        struct worker_pool *pool;
        struct worker *worker;
-        int i;
+        int wi;
        for_each_cpu_worker_pool(pool, cpu) {
                WARN_ON_ONCE(cpu != smp_processor_id());
@@ -4091,10 +4036,7 @@ static void wq_unbind_fn(struct work_struct *work)
                 * before the last CPU down must be on the cpu.  After
                 * this, they may become diasporas.
                 */
-                list_for_each_entry(worker, &pool->idle_list, entry)
+                for_each_pool_worker(worker, wi, pool)
-                        worker->flags |= WORKER_UNBOUND;
-                for_each_busy_worker(worker, i, pool)
                        worker->flags |= WORKER_UNBOUND;
                pool->flags |= POOL_DISASSOCIATED;
@@ -4129,71 +4071,64 @@ static void wq_unbind_fn(struct work_struct *work)
 * rebind_workers - rebind all workers of a pool to the associated CPU
 * @pool: pool of interest
 *
- * @pool->cpu is coming online.  Rebind all workers to the CPU.  Rebinding
+ * @pool->cpu is coming online.  Rebind all workers to the CPU.
- * is different for idle and busy ones.
- *
- * Idle ones will be removed from the idle_list and woken up.  They will
- * add themselves back after completing rebind.  This ensures that the
- * idle_list doesn't contain any unbound workers when re-bound busy workers
- * try to perform local wake-ups for concurrency management.
- *
- * Busy workers can rebind after they finish their current work items.
- * Queueing the rebind work item at the head of the scheduled list is
- * enough.  Note that nr_running will be properly bumped as busy workers
- * rebind.
- *
- * On return, all non-manager workers are scheduled for rebind - see
- * manage_workers() for the manager special case.  Any idle worker
- * including the manager will not appear on @idle_list until rebind is
- * complete, making local wake-ups safe.
 */
 static void rebind_workers(struct worker_pool *pool)
 {
-        struct worker *worker, *n;
+        struct worker *worker;
-        int i;
+        int wi;
        lockdep_assert_held(&pool->manager_mutex);
-        lockdep_assert_held(&pool->lock);
-        /* dequeue and kick idle ones */
-        list_for_each_entry_safe(worker, n, &pool->idle_list, entry) {
-                /*
-                 * idle workers should be off @pool->idle_list until rebind
-                 * is complete to avoid receiving premature local wake-ups.
-                 */
-                list_del_init(&worker->entry);
-                /*
+        /*
-                 * worker_thread() will see the above dequeuing and call
+         * Restore CPU affinity of all workers.  As all idle workers should
-                 * idle_worker_rebind().
+         * be on the run-queue of the associated CPU before any local
-                 */
+         * wake-ups for concurrency management happen, restore CPU affinty
-                wake_up_process(worker->task);
+         * of all workers first and then clear UNBOUND.  As we're called
-        }
+         * from CPU_ONLINE, the following shouldn't fail.
+         */
-        /* rebind busy workers */
+        for_each_pool_worker(worker, wi, pool)
-        for_each_busy_worker(worker, i, pool) {
+                WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
-                struct work_struct *rebind_work = &worker->rebind_work;
+                                                  pool->attrs->cpumask) < 0);
-                struct workqueue_struct *wq;
-                if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
+        spin_lock_irq(&pool->lock);
-                                     work_data_bits(rebind_work)))
-                        continue;
-                debug_work_activate(rebind_work);
+        for_each_pool_worker(worker, wi, pool) {
+                unsigned int worker_flags = worker->flags;
                /*
-                 * wq doesn't really matter but let's keep @worker->pool
+                 * A bound idle worker should actually be on the runqueue
-                 * and @pwq->pool consistent for sanity.
+                 * of the associated CPU for local wake-ups targeting it to
+                 * work.  Kick all idle workers so that they migrate to the
+                 * associated CPU.  Doing this in the same loop as
+                 * replacing UNBOUND with REBOUND is safe as no worker will
+                 * be bound before @pool->lock is released.
                 */
-                if (worker->pool->attrs->nice < 0)
+                if (worker_flags & WORKER_IDLE)
-                        wq = system_highpri_wq;
+                        wake_up_process(worker->task);
-                else
-                        wq = system_wq;
-                insert_work(per_cpu_ptr(wq->cpu_pwqs, pool->cpu), rebind_work,
+                /*
-                            worker->scheduled.next,
+                 * We want to clear UNBOUND but can't directly call
-                            work_color_to_flags(WORK_NO_COLOR));
+                 * worker_clr_flags() or adjust nr_running.  Atomically
+                 * replace UNBOUND with another NOT_RUNNING flag REBOUND.
+                 * @worker will clear REBOUND using worker_clr_flags() when
+                 * it initiates the next execution cycle thus restoring
+                 * concurrency management.  Note that when or whether
+                 * @worker clears REBOUND doesn't affect correctness.
+                 *
+                 * ACCESS_ONCE() is necessary because @worker->flags may be
+                 * tested without holding any lock in
+                 * wq_worker_waking_up().  Without it, NOT_RUNNING test may
+                 * fail incorrectly leading to premature concurrency
+                 * management operations.
+                 */
+                WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
+                worker_flags |= WORKER_REBOUND;
+                worker_flags &= ~WORKER_UNBOUND;
+                ACCESS_ONCE(worker->flags) = worker_flags;
        }
+        spin_unlock_irq(&pool->lock);
 }
 /*
@@ -4221,12 +4156,13 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
        case CPU_ONLINE:
                for_each_cpu_worker_pool(pool, cpu) {
                        mutex_lock(&pool->manager_mutex);
-                        spin_lock_irq(&pool->lock);
+                        spin_lock_irq(&pool->lock);
                        pool->flags &= ~POOL_DISASSOCIATED;
+                        spin_unlock_irq(&pool->lock);
                        rebind_workers(pool);
-                        spin_unlock_irq(&pool->lock);
                        mutex_unlock(&pool->manager_mutex);
                }
                break;
author	Tejun Heo <tj@kernel.org>	2013-03-19 16:45:21 -0400
committer	Tejun Heo <tj@kernel.org>	2013-03-19 16:45:21 -0400
commit	a9ab775bcadf122d91e1a201eb66ae2eec90365a (patch)
tree	98f30f2272d2ad62258744a48570c49ecfab66af /kernel/workqueue.c
parent	bd7c089eb25b26d2e03fd34f97e5517a4463f871 (diff)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 3e297c574be8..9508b5ed7336 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c
@@ -75,9 +75,10 @@ enum {
75	WORKER_PREP = 1 << 3, /* preparing to run works */	75	WORKER_PREP = 1 << 3, /* preparing to run works */
76	WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */	76	WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */
77	WORKER_UNBOUND = 1 << 7, /* worker is unbound */	77	WORKER_UNBOUND = 1 << 7, /* worker is unbound */
		78	WORKER_REBOUND = 1 << 8, /* worker was rebound */
78		79
79	WORKER_NOT_RUNNING = WORKER_PREP \| WORKER_UNBOUND \|	80	WORKER_NOT_RUNNING = WORKER_PREP \| WORKER_CPU_INTENSIVE \|
80	WORKER_CPU_INTENSIVE,	81	WORKER_UNBOUND \| WORKER_REBOUND,
81		82
82	NR_STD_WORKER_POOLS = 2, /* # standard pools per cpu */	83	NR_STD_WORKER_POOLS = 2, /* # standard pools per cpu */
83		84
@@ -316,9 +317,6 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
316	(pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \	317	(pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
317	(pool)++)	318	(pool)++)
318		319
319	#define for_each_busy_worker(worker, i, pool) \
320	hash_for_each(pool->busy_hash, i, worker, hentry)
321
322	/**	320	/**
323	* for_each_pool - iterate through all worker_pools in the system	321	* for_each_pool - iterate through all worker_pools in the system
324	* @pool: iteration cursor	322	* @pool: iteration cursor
@@ -1612,37 +1610,6 @@ __acquires(&pool->lock)
1612	}	1610	}
1613	}	1611	}
1614		1612
1615	/*
1616	* Rebind an idle @worker to its CPU. worker_thread() will test
1617	* list_empty(@worker->entry) before leaving idle and call this function.
1618	*/
1619	static void idle_worker_rebind(struct worker *worker)
1620	{
1621	/* CPU may go down again inbetween, clear UNBOUND only on success */
1622	if (worker_maybe_bind_and_lock(worker->pool))
1623	worker_clr_flags(worker, WORKER_UNBOUND);
1624
1625	/* rebind complete, become available again */
1626	list_add(&worker->entry, &worker->pool->idle_list);
1627	spin_unlock_irq(&worker->pool->lock);
1628	}
1629
1630	/*
1631	* Function for @worker->rebind.work used to rebind unbound busy workers to
1632	* the associated cpu which is coming back online. This is scheduled by
1633	* cpu up but can race with other cpu hotplug operations and may be
1634	* executed twice without intervening cpu down.
1635	*/
1636	static void busy_worker_rebind_fn(struct work_struct *work)
1637	{
1638	struct worker *worker = container_of(work, struct worker, rebind_work);
1639
1640	if (worker_maybe_bind_and_lock(worker->pool))
1641	worker_clr_flags(worker, WORKER_UNBOUND);
1642
1643	spin_unlock_irq(&worker->pool->lock);
1644	}
1645
1646	static struct worker *alloc_worker(void)	1613	static struct worker *alloc_worker(void)
1647	{	1614	{
1648	struct worker *worker;	1615	struct worker *worker;
@@ -1651,7 +1618,6 @@ static struct worker *alloc_worker(void)
1651	if (worker) {	1618	if (worker) {
1652	INIT_LIST_HEAD(&worker->entry);	1619	INIT_LIST_HEAD(&worker->entry);
1653	INIT_LIST_HEAD(&worker->scheduled);	1620	INIT_LIST_HEAD(&worker->scheduled);
1654	INIT_WORK(&worker->rebind_work, busy_worker_rebind_fn);
1655	/* on creation a worker is in !idle && prep state */	1621	/* on creation a worker is in !idle && prep state */
1656	worker->flags = WORKER_PREP;	1622	worker->flags = WORKER_PREP;
1657	}	1623	}
@@ -2053,22 +2019,6 @@ static bool manage_workers(struct worker *worker)
2053	if (unlikely(!mutex_trylock(&pool->manager_mutex))) {	2019	if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
2054	spin_unlock_irq(&pool->lock);	2020	spin_unlock_irq(&pool->lock);
2055	mutex_lock(&pool->manager_mutex);	2021	mutex_lock(&pool->manager_mutex);
2056	/*
2057	* CPU hotplug could have happened while we were waiting
2058	* for assoc_mutex. Hotplug itself can't handle us
2059	* because manager isn't either on idle or busy list, and
2060	* @pool's state and ours could have deviated.
2061	*
2062	* As hotplug is now excluded via manager_mutex, we can
2063	* simply try to bind. It will succeed or fail depending
2064	* on @pool's current state. Try it and adjust
2065	* %WORKER_UNBOUND accordingly.
2066	*/
2067	if (worker_maybe_bind_and_lock(pool))
2068	worker->flags &= ~WORKER_UNBOUND;
2069	else
2070	worker->flags \|= WORKER_UNBOUND;
2071
2072	ret = true;	2022	ret = true;
2073	}	2023	}
2074		2024
@@ -2252,19 +2202,12 @@ static int worker_thread(void *__worker)
2252	woke_up:	2202	woke_up:
2253	spin_lock_irq(&pool->lock);	2203	spin_lock_irq(&pool->lock);
2254		2204
2255	/* we are off idle list if destruction or rebind is requested */	2205	/* am I supposed to die? */
2256	if (unlikely(list_empty(&worker->entry))) {	2206	if (unlikely(worker->flags & WORKER_DIE)) {
2257	spin_unlock_irq(&pool->lock);	2207	spin_unlock_irq(&pool->lock);
2258		2208	WARN_ON_ONCE(!list_empty(&worker->entry));
2259	/* if DIE is set, destruction is requested */	2209	worker->task->flags &= ~PF_WQ_WORKER;
2260	if (worker->flags & WORKER_DIE) {	2210	return 0;
2261	worker->task->flags &= ~PF_WQ_WORKER;
2262	return 0;
2263	}
2264
2265	/* otherwise, rebind */
2266	idle_worker_rebind(worker);
2267	goto woke_up;
2268	}	2211	}
2269		2212
2270	worker_leave_idle(worker);	2213	worker_leave_idle(worker);
@@ -2285,11 +2228,13 @@ recheck:
2285	WARN_ON_ONCE(!list_empty(&worker->scheduled));	2228	WARN_ON_ONCE(!list_empty(&worker->scheduled));
2286		2229
2287	/*	2230	/*
2288	* When control reaches this point, we're guaranteed to have	2231	* Finish PREP stage. We're guaranteed to have at least one idle
2289	* at least one idle worker or that someone else has already	2232	* worker or that someone else has already assumed the manager
2290	* assumed the manager role.	2233	* role. This is where @worker starts participating in concurrency
		2234	* management if applicable and concurrency management is restored
		2235	* after being rebound. See rebind_workers() for details.
2291	*/	2236	*/
2292	worker_clr_flags(worker, WORKER_PREP);	2237	worker_clr_flags(worker, WORKER_PREP \| WORKER_REBOUND);
2293		2238
2294	do {	2239	do {
2295	struct work_struct *work =	2240	struct work_struct *work =
@@ -4076,7 +4021,7 @@ static void wq_unbind_fn(struct work_struct *work)
4076	int cpu = smp_processor_id();	4021	int cpu = smp_processor_id();
4077	struct worker_pool *pool;	4022	struct worker_pool *pool;
4078	struct worker *worker;	4023	struct worker *worker;
4079	int i;	4024	int wi;
4080		4025
4081	for_each_cpu_worker_pool(pool, cpu) {	4026	for_each_cpu_worker_pool(pool, cpu) {
4082	WARN_ON_ONCE(cpu != smp_processor_id());	4027	WARN_ON_ONCE(cpu != smp_processor_id());
@@ -4091,10 +4036,7 @@ static void wq_unbind_fn(struct work_struct *work)
4091	* before the last CPU down must be on the cpu. After	4036	* before the last CPU down must be on the cpu. After
4092	* this, they may become diasporas.	4037	* this, they may become diasporas.
4093	*/	4038	*/
4094	list_for_each_entry(worker, &pool->idle_list, entry)	4039	for_each_pool_worker(worker, wi, pool)
4095	worker->flags \|= WORKER_UNBOUND;
4096
4097	for_each_busy_worker(worker, i, pool)
4098	worker->flags \|= WORKER_UNBOUND;	4040	worker->flags \|= WORKER_UNBOUND;
4099		4041
4100	pool->flags \|= POOL_DISASSOCIATED;	4042	pool->flags \|= POOL_DISASSOCIATED;
@@ -4129,71 +4071,64 @@ static void wq_unbind_fn(struct work_struct *work)
4129	* rebind_workers - rebind all workers of a pool to the associated CPU	4071	* rebind_workers - rebind all workers of a pool to the associated CPU
4130	* @pool: pool of interest	4072	* @pool: pool of interest
4131	*	4073	*
4132	* @pool->cpu is coming online. Rebind all workers to the CPU. Rebinding	4074	* @pool->cpu is coming online. Rebind all workers to the CPU.
4133	* is different for idle and busy ones.
4134	*
4135	* Idle ones will be removed from the idle_list and woken up. They will
4136	* add themselves back after completing rebind. This ensures that the
4137	* idle_list doesn't contain any unbound workers when re-bound busy workers
4138	* try to perform local wake-ups for concurrency management.
4139	*
4140	* Busy workers can rebind after they finish their current work items.
4141	* Queueing the rebind work item at the head of the scheduled list is
4142	* enough. Note that nr_running will be properly bumped as busy workers
4143	* rebind.
4144	*
4145	* On return, all non-manager workers are scheduled for rebind - see
4146	* manage_workers() for the manager special case. Any idle worker
4147	* including the manager will not appear on @idle_list until rebind is
4148	* complete, making local wake-ups safe.
4149	*/	4075	*/
4150	static void rebind_workers(struct worker_pool *pool)	4076	static void rebind_workers(struct worker_pool *pool)
4151	{	4077	{
4152	struct worker worker, n;	4078	struct worker *worker;
4153	int i;	4079	int wi;
4154		4080
4155	lockdep_assert_held(&pool->manager_mutex);	4081	lockdep_assert_held(&pool->manager_mutex);
4156	lockdep_assert_held(&pool->lock);
4157
4158	/* dequeue and kick idle ones */
4159	list_for_each_entry_safe(worker, n, &pool->idle_list, entry) {
4160	/*
4161	* idle workers should be off @pool->idle_list until rebind
4162	* is complete to avoid receiving premature local wake-ups.
4163	*/
4164	list_del_init(&worker->entry);
4165		4082
4166	/*	4083	/*
4167	* worker_thread() will see the above dequeuing and call	4084	* Restore CPU affinity of all workers. As all idle workers should
4168	* idle_worker_rebind().	4085	* be on the run-queue of the associated CPU before any local
4169	*/	4086	* wake-ups for concurrency management happen, restore CPU affinty
4170	wake_up_process(worker->task);	4087	* of all workers first and then clear UNBOUND. As we're called
4171	}	4088	* from CPU_ONLINE, the following shouldn't fail.
4172		4089	*/
4173	/* rebind busy workers */	4090	for_each_pool_worker(worker, wi, pool)
4174	for_each_busy_worker(worker, i, pool) {	4091	WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
4175	struct work_struct *rebind_work = &worker->rebind_work;	4092	pool->attrs->cpumask) < 0);
4176	struct workqueue_struct *wq;
4177		4093
4178	if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,	4094	spin_lock_irq(&pool->lock);
4179	work_data_bits(rebind_work)))
4180	continue;
4181		4095
4182	debug_work_activate(rebind_work);	4096	for_each_pool_worker(worker, wi, pool) {
		4097	unsigned int worker_flags = worker->flags;
4183		4098
4184	/*	4099	/*
4185	* wq doesn't really matter but let's keep @worker->pool	4100	* A bound idle worker should actually be on the runqueue
4186	* and @pwq->pool consistent for sanity.	4101	* of the associated CPU for local wake-ups targeting it to
		4102	* work. Kick all idle workers so that they migrate to the
		4103	* associated CPU. Doing this in the same loop as
		4104	* replacing UNBOUND with REBOUND is safe as no worker will
		4105	* be bound before @pool->lock is released.
4187	*/	4106	*/
4188	if (worker->pool->attrs->nice < 0)	4107	if (worker_flags & WORKER_IDLE)
4189	wq = system_highpri_wq;	4108	wake_up_process(worker->task);
4190	else
4191	wq = system_wq;
4192		4109
4193	insert_work(per_cpu_ptr(wq->cpu_pwqs, pool->cpu), rebind_work,	4110	/*
4194	worker->scheduled.next,	4111	* We want to clear UNBOUND but can't directly call
4195	work_color_to_flags(WORK_NO_COLOR));	4112	* worker_clr_flags() or adjust nr_running. Atomically
		4113	* replace UNBOUND with another NOT_RUNNING flag REBOUND.
		4114	* @worker will clear REBOUND using worker_clr_flags() when
		4115	* it initiates the next execution cycle thus restoring
		4116	* concurrency management. Note that when or whether
		4117	* @worker clears REBOUND doesn't affect correctness.
		4118	*
		4119	* ACCESS_ONCE() is necessary because @worker->flags may be
		4120	* tested without holding any lock in
		4121	* wq_worker_waking_up(). Without it, NOT_RUNNING test may
		4122	* fail incorrectly leading to premature concurrency
		4123	* management operations.
		4124	*/
		4125	WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
		4126	worker_flags \|= WORKER_REBOUND;
		4127	worker_flags &= ~WORKER_UNBOUND;
		4128	ACCESS_ONCE(worker->flags) = worker_flags;
4196	}	4129	}
		4130
		4131	spin_unlock_irq(&pool->lock);
4197	}	4132	}
4198		4133
4199	/*	4134	/*
@@ -4221,12 +4156,13 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
4221	case CPU_ONLINE:	4156	case CPU_ONLINE:
4222	for_each_cpu_worker_pool(pool, cpu) {	4157	for_each_cpu_worker_pool(pool, cpu) {
4223	mutex_lock(&pool->manager_mutex);	4158	mutex_lock(&pool->manager_mutex);
4224	spin_lock_irq(&pool->lock);
4225		4159
		4160	spin_lock_irq(&pool->lock);
4226	pool->flags &= ~POOL_DISASSOCIATED;	4161	pool->flags &= ~POOL_DISASSOCIATED;
		4162	spin_unlock_irq(&pool->lock);
		4163
4227	rebind_workers(pool);	4164	rebind_workers(pool);
4228		4165
4229	spin_unlock_irq(&pool->lock);
4230	mutex_unlock(&pool->manager_mutex);	4166	mutex_unlock(&pool->manager_mutex);
4231	}	4167	}
4232	break;	4168	break;