aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/workqueue.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r--kernel/workqueue.c1586
1 files changed, 746 insertions, 840 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 042d221d33cc..b48cd597145d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -41,32 +41,31 @@
41#include <linux/debug_locks.h> 41#include <linux/debug_locks.h>
42#include <linux/lockdep.h> 42#include <linux/lockdep.h>
43#include <linux/idr.h> 43#include <linux/idr.h>
44#include <linux/hashtable.h>
44 45
45#include "workqueue_sched.h" 46#include "workqueue_internal.h"
46 47
47enum { 48enum {
48 /* 49 /*
49 * global_cwq flags 50 * worker_pool flags
50 * 51 *
51 * A bound gcwq is either associated or disassociated with its CPU. 52 * A bound pool is either associated or disassociated with its CPU.
52 * While associated (!DISASSOCIATED), all workers are bound to the 53 * While associated (!DISASSOCIATED), all workers are bound to the
53 * CPU and none has %WORKER_UNBOUND set and concurrency management 54 * CPU and none has %WORKER_UNBOUND set and concurrency management
54 * is in effect. 55 * is in effect.
55 * 56 *
56 * While DISASSOCIATED, the cpu may be offline and all workers have 57 * While DISASSOCIATED, the cpu may be offline and all workers have
57 * %WORKER_UNBOUND set and concurrency management disabled, and may 58 * %WORKER_UNBOUND set and concurrency management disabled, and may
58 * be executing on any CPU. The gcwq behaves as an unbound one. 59 * be executing on any CPU. The pool behaves as an unbound one.
59 * 60 *
60 * Note that DISASSOCIATED can be flipped only while holding 61 * Note that DISASSOCIATED can be flipped only while holding
61 * assoc_mutex of all pools on the gcwq to avoid changing binding 62 * assoc_mutex to avoid changing binding state while
62 * state while create_worker() is in progress. 63 * create_worker() is in progress.
63 */ 64 */
64 GCWQ_DISASSOCIATED = 1 << 0, /* cpu can't serve workers */
65 GCWQ_FREEZING = 1 << 1, /* freeze in progress */
66
67 /* pool flags */
68 POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ 65 POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
69 POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */ 66 POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */
67 POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
68 POOL_FREEZING = 1 << 3, /* freeze in progress */
70 69
71 /* worker flags */ 70 /* worker flags */
72 WORKER_STARTED = 1 << 0, /* started */ 71 WORKER_STARTED = 1 << 0, /* started */
@@ -79,11 +78,9 @@ enum {
79 WORKER_NOT_RUNNING = WORKER_PREP | WORKER_UNBOUND | 78 WORKER_NOT_RUNNING = WORKER_PREP | WORKER_UNBOUND |
80 WORKER_CPU_INTENSIVE, 79 WORKER_CPU_INTENSIVE,
81 80
82 NR_WORKER_POOLS = 2, /* # worker pools per gcwq */ 81 NR_STD_WORKER_POOLS = 2, /* # standard pools per cpu */
83 82
84 BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */ 83 BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */
85 BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER,
86 BUSY_WORKER_HASH_MASK = BUSY_WORKER_HASH_SIZE - 1,
87 84
88 MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */ 85 MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */
89 IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */ 86 IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */
@@ -111,48 +108,24 @@ enum {
111 * P: Preemption protected. Disabling preemption is enough and should 108 * P: Preemption protected. Disabling preemption is enough and should
112 * only be modified and accessed from the local cpu. 109 * only be modified and accessed from the local cpu.
113 * 110 *
114 * L: gcwq->lock protected. Access with gcwq->lock held. 111 * L: pool->lock protected. Access with pool->lock held.
115 * 112 *
116 * X: During normal operation, modification requires gcwq->lock and 113 * X: During normal operation, modification requires pool->lock and should
117 * should be done only from local cpu. Either disabling preemption 114 * be done only from local cpu. Either disabling preemption on local
118 * on local cpu or grabbing gcwq->lock is enough for read access. 115 * cpu or grabbing pool->lock is enough for read access. If
119 * If GCWQ_DISASSOCIATED is set, it's identical to L. 116 * POOL_DISASSOCIATED is set, it's identical to L.
120 * 117 *
121 * F: wq->flush_mutex protected. 118 * F: wq->flush_mutex protected.
122 * 119 *
123 * W: workqueue_lock protected. 120 * W: workqueue_lock protected.
124 */ 121 */
125 122
126struct global_cwq; 123/* struct worker is defined in workqueue_internal.h */
127struct worker_pool;
128
129/*
130 * The poor guys doing the actual heavy lifting. All on-duty workers
131 * are either serving the manager role, on idle list or on busy hash.
132 */
133struct worker {
134 /* on idle list while idle, on busy hash table while busy */
135 union {
136 struct list_head entry; /* L: while idle */
137 struct hlist_node hentry; /* L: while busy */
138 };
139
140 struct work_struct *current_work; /* L: work being processed */
141 struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
142 struct list_head scheduled; /* L: scheduled works */
143 struct task_struct *task; /* I: worker task */
144 struct worker_pool *pool; /* I: the associated pool */
145 /* 64 bytes boundary on 64bit, 32 on 32bit */
146 unsigned long last_active; /* L: last active timestamp */
147 unsigned int flags; /* X: flags */
148 int id; /* I: worker id */
149
150 /* for rebinding worker to CPU */
151 struct work_struct rebind_work; /* L: for busy worker */
152};
153 124
154struct worker_pool { 125struct worker_pool {
155 struct global_cwq *gcwq; /* I: the owning gcwq */ 126 spinlock_t lock; /* the pool lock */
127 unsigned int cpu; /* I: the associated cpu */
128 int id; /* I: pool ID */
156 unsigned int flags; /* X: flags */ 129 unsigned int flags; /* X: flags */
157 130
158 struct list_head worklist; /* L: list of pending works */ 131 struct list_head worklist; /* L: list of pending works */
@@ -165,34 +138,28 @@ struct worker_pool {
165 struct timer_list idle_timer; /* L: worker idle timeout */ 138 struct timer_list idle_timer; /* L: worker idle timeout */
166 struct timer_list mayday_timer; /* L: SOS timer for workers */ 139 struct timer_list mayday_timer; /* L: SOS timer for workers */
167 140
168 struct mutex assoc_mutex; /* protect GCWQ_DISASSOCIATED */ 141 /* workers are chained either in busy_hash or idle_list */
169 struct ida worker_ida; /* L: for worker IDs */ 142 DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
170};
171
172/*
173 * Global per-cpu workqueue. There's one and only one for each cpu
174 * and all works are queued and processed here regardless of their
175 * target workqueues.
176 */
177struct global_cwq {
178 spinlock_t lock; /* the gcwq lock */
179 unsigned int cpu; /* I: the associated cpu */
180 unsigned int flags; /* L: GCWQ_* flags */
181
182 /* workers are chained either in busy_hash or pool idle_list */
183 struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE];
184 /* L: hash of busy workers */ 143 /* L: hash of busy workers */
185 144
186 struct worker_pool pools[NR_WORKER_POOLS]; 145 struct mutex assoc_mutex; /* protect POOL_DISASSOCIATED */
187 /* normal and highpri pools */ 146 struct ida worker_ida; /* L: for worker IDs */
147
148 /*
149 * The current concurrency level. As it's likely to be accessed
150 * from other CPUs during try_to_wake_up(), put it in a separate
151 * cacheline.
152 */
153 atomic_t nr_running ____cacheline_aligned_in_smp;
188} ____cacheline_aligned_in_smp; 154} ____cacheline_aligned_in_smp;
189 155
190/* 156/*
191 * The per-CPU workqueue. The lower WORK_STRUCT_FLAG_BITS of 157 * The per-pool workqueue. While queued, the lower WORK_STRUCT_FLAG_BITS
192 * work_struct->data are used for flags and thus cwqs need to be 158 * of work_struct->data are used for flags and the remaining high bits
193 * aligned at two's power of the number of flag bits. 159 * point to the pwq; thus, pwqs need to be aligned at two's power of the
160 * number of flag bits.
194 */ 161 */
195struct cpu_workqueue_struct { 162struct pool_workqueue {
196 struct worker_pool *pool; /* I: the associated pool */ 163 struct worker_pool *pool; /* I: the associated pool */
197 struct workqueue_struct *wq; /* I: the owning workqueue */ 164 struct workqueue_struct *wq; /* I: the owning workqueue */
198 int work_color; /* L: current color */ 165 int work_color; /* L: current color */
@@ -241,16 +208,16 @@ typedef unsigned long mayday_mask_t;
241struct workqueue_struct { 208struct workqueue_struct {
242 unsigned int flags; /* W: WQ_* flags */ 209 unsigned int flags; /* W: WQ_* flags */
243 union { 210 union {
244 struct cpu_workqueue_struct __percpu *pcpu; 211 struct pool_workqueue __percpu *pcpu;
245 struct cpu_workqueue_struct *single; 212 struct pool_workqueue *single;
246 unsigned long v; 213 unsigned long v;
247 } cpu_wq; /* I: cwq's */ 214 } pool_wq; /* I: pwq's */
248 struct list_head list; /* W: list of all workqueues */ 215 struct list_head list; /* W: list of all workqueues */
249 216
250 struct mutex flush_mutex; /* protects wq flushing */ 217 struct mutex flush_mutex; /* protects wq flushing */
251 int work_color; /* F: current work color */ 218 int work_color; /* F: current work color */
252 int flush_color; /* F: current flush color */ 219 int flush_color; /* F: current flush color */
253 atomic_t nr_cwqs_to_flush; /* flush in progress */ 220 atomic_t nr_pwqs_to_flush; /* flush in progress */
254 struct wq_flusher *first_flusher; /* F: first flusher */ 221 struct wq_flusher *first_flusher; /* F: first flusher */
255 struct list_head flusher_queue; /* F: flush waiters */ 222 struct list_head flusher_queue; /* F: flush waiters */
256 struct list_head flusher_overflow; /* F: flush overflow list */ 223 struct list_head flusher_overflow; /* F: flush overflow list */
@@ -259,7 +226,7 @@ struct workqueue_struct {
259 struct worker *rescuer; /* I: rescue worker */ 226 struct worker *rescuer; /* I: rescue worker */
260 227
261 int nr_drainers; /* W: drain in progress */ 228 int nr_drainers; /* W: drain in progress */
262 int saved_max_active; /* W: saved cwq max_active */ 229 int saved_max_active; /* W: saved pwq max_active */
263#ifdef CONFIG_LOCKDEP 230#ifdef CONFIG_LOCKDEP
264 struct lockdep_map lockdep_map; 231 struct lockdep_map lockdep_map;
265#endif 232#endif
@@ -280,16 +247,15 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
280#define CREATE_TRACE_POINTS 247#define CREATE_TRACE_POINTS
281#include <trace/events/workqueue.h> 248#include <trace/events/workqueue.h>
282 249
283#define for_each_worker_pool(pool, gcwq) \ 250#define for_each_std_worker_pool(pool, cpu) \
284 for ((pool) = &(gcwq)->pools[0]; \ 251 for ((pool) = &std_worker_pools(cpu)[0]; \
285 (pool) < &(gcwq)->pools[NR_WORKER_POOLS]; (pool)++) 252 (pool) < &std_worker_pools(cpu)[NR_STD_WORKER_POOLS]; (pool)++)
286 253
287#define for_each_busy_worker(worker, i, pos, gcwq) \ 254#define for_each_busy_worker(worker, i, pool) \
288 for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \ 255 hash_for_each(pool->busy_hash, i, worker, hentry)
289 hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry)
290 256
291static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask, 257static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
292 unsigned int sw) 258 unsigned int sw)
293{ 259{
294 if (cpu < nr_cpu_ids) { 260 if (cpu < nr_cpu_ids) {
295 if (sw & 1) { 261 if (sw & 1) {
@@ -300,42 +266,42 @@ static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask,
300 if (sw & 2) 266 if (sw & 2)
301 return WORK_CPU_UNBOUND; 267 return WORK_CPU_UNBOUND;
302 } 268 }
303 return WORK_CPU_NONE; 269 return WORK_CPU_END;
304} 270}
305 271
306static inline int __next_wq_cpu(int cpu, const struct cpumask *mask, 272static inline int __next_pwq_cpu(int cpu, const struct cpumask *mask,
307 struct workqueue_struct *wq) 273 struct workqueue_struct *wq)
308{ 274{
309 return __next_gcwq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2); 275 return __next_wq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2);
310} 276}
311 277
312/* 278/*
313 * CPU iterators 279 * CPU iterators
314 * 280 *
315 * An extra gcwq is defined for an invalid cpu number 281 * An extra cpu number is defined using an invalid cpu number
316 * (WORK_CPU_UNBOUND) to host workqueues which are not bound to any 282 * (WORK_CPU_UNBOUND) to host workqueues which are not bound to any
317 * specific CPU. The following iterators are similar to 283 * specific CPU. The following iterators are similar to for_each_*_cpu()
318 * for_each_*_cpu() iterators but also considers the unbound gcwq. 284 * iterators but also considers the unbound CPU.
319 * 285 *
320 * for_each_gcwq_cpu() : possible CPUs + WORK_CPU_UNBOUND 286 * for_each_wq_cpu() : possible CPUs + WORK_CPU_UNBOUND
321 * for_each_online_gcwq_cpu() : online CPUs + WORK_CPU_UNBOUND 287 * for_each_online_wq_cpu() : online CPUs + WORK_CPU_UNBOUND
322 * for_each_cwq_cpu() : possible CPUs for bound workqueues, 288 * for_each_pwq_cpu() : possible CPUs for bound workqueues,
323 * WORK_CPU_UNBOUND for unbound workqueues 289 * WORK_CPU_UNBOUND for unbound workqueues
324 */ 290 */
325#define for_each_gcwq_cpu(cpu) \ 291#define for_each_wq_cpu(cpu) \
326 for ((cpu) = __next_gcwq_cpu(-1, cpu_possible_mask, 3); \ 292 for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, 3); \
327 (cpu) < WORK_CPU_NONE; \ 293 (cpu) < WORK_CPU_END; \
328 (cpu) = __next_gcwq_cpu((cpu), cpu_possible_mask, 3)) 294 (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, 3))
329 295
330#define for_each_online_gcwq_cpu(cpu) \ 296#define for_each_online_wq_cpu(cpu) \
331 for ((cpu) = __next_gcwq_cpu(-1, cpu_online_mask, 3); \ 297 for ((cpu) = __next_wq_cpu(-1, cpu_online_mask, 3); \
332 (cpu) < WORK_CPU_NONE; \ 298 (cpu) < WORK_CPU_END; \
333 (cpu) = __next_gcwq_cpu((cpu), cpu_online_mask, 3)) 299 (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3))
334 300
335#define for_each_cwq_cpu(cpu, wq) \ 301#define for_each_pwq_cpu(cpu, wq) \
336 for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, (wq)); \ 302 for ((cpu) = __next_pwq_cpu(-1, cpu_possible_mask, (wq)); \
337 (cpu) < WORK_CPU_NONE; \ 303 (cpu) < WORK_CPU_END; \
338 (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq))) 304 (cpu) = __next_pwq_cpu((cpu), cpu_possible_mask, (wq)))
339 305
340#ifdef CONFIG_DEBUG_OBJECTS_WORK 306#ifdef CONFIG_DEBUG_OBJECTS_WORK
341 307
@@ -459,57 +425,70 @@ static LIST_HEAD(workqueues);
459static bool workqueue_freezing; /* W: have wqs started freezing? */ 425static bool workqueue_freezing; /* W: have wqs started freezing? */
460 426
461/* 427/*
462 * The almighty global cpu workqueues. nr_running is the only field 428 * The CPU and unbound standard worker pools. The unbound ones have
463 * which is expected to be used frequently by other cpus via 429 * POOL_DISASSOCIATED set, and their workers have WORKER_UNBOUND set.
464 * try_to_wake_up(). Put it in a separate cacheline.
465 */ 430 */
466static DEFINE_PER_CPU(struct global_cwq, global_cwq); 431static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
467static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, pool_nr_running[NR_WORKER_POOLS]); 432 cpu_std_worker_pools);
433static struct worker_pool unbound_std_worker_pools[NR_STD_WORKER_POOLS];
468 434
469/* 435/* idr of all pools */
470 * Global cpu workqueue and nr_running counter for unbound gcwq. The 436static DEFINE_MUTEX(worker_pool_idr_mutex);
471 * gcwq is always online, has GCWQ_DISASSOCIATED set, and all its 437static DEFINE_IDR(worker_pool_idr);
472 * workers have WORKER_UNBOUND set.
473 */
474static struct global_cwq unbound_global_cwq;
475static atomic_t unbound_pool_nr_running[NR_WORKER_POOLS] = {
476 [0 ... NR_WORKER_POOLS - 1] = ATOMIC_INIT(0), /* always 0 */
477};
478 438
479static int worker_thread(void *__worker); 439static int worker_thread(void *__worker);
480 440
481static int worker_pool_pri(struct worker_pool *pool) 441static struct worker_pool *std_worker_pools(int cpu)
442{
443 if (cpu != WORK_CPU_UNBOUND)
444 return per_cpu(cpu_std_worker_pools, cpu);
445 else
446 return unbound_std_worker_pools;
447}
448
449static int std_worker_pool_pri(struct worker_pool *pool)
482{ 450{
483 return pool - pool->gcwq->pools; 451 return pool - std_worker_pools(pool->cpu);
484} 452}
485 453
486static struct global_cwq *get_gcwq(unsigned int cpu) 454/* allocate ID and assign it to @pool */
455static int worker_pool_assign_id(struct worker_pool *pool)
487{ 456{
488 if (cpu != WORK_CPU_UNBOUND) 457 int ret;
489 return &per_cpu(global_cwq, cpu); 458
490 else 459 mutex_lock(&worker_pool_idr_mutex);
491 return &unbound_global_cwq; 460 ret = idr_alloc(&worker_pool_idr, pool, 0, 0, GFP_KERNEL);
461 if (ret >= 0)
462 pool->id = ret;
463 mutex_unlock(&worker_pool_idr_mutex);
464
465 return ret < 0 ? ret : 0;
492} 466}
493 467
494static atomic_t *get_pool_nr_running(struct worker_pool *pool) 468/*
469 * Lookup worker_pool by id. The idr currently is built during boot and
470 * never modified. Don't worry about locking for now.
471 */
472static struct worker_pool *worker_pool_by_id(int pool_id)
495{ 473{
496 int cpu = pool->gcwq->cpu; 474 return idr_find(&worker_pool_idr, pool_id);
497 int idx = worker_pool_pri(pool); 475}
498 476
499 if (cpu != WORK_CPU_UNBOUND) 477static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
500 return &per_cpu(pool_nr_running, cpu)[idx]; 478{
501 else 479 struct worker_pool *pools = std_worker_pools(cpu);
502 return &unbound_pool_nr_running[idx]; 480
481 return &pools[highpri];
503} 482}
504 483
505static struct cpu_workqueue_struct *get_cwq(unsigned int cpu, 484static struct pool_workqueue *get_pwq(unsigned int cpu,
506 struct workqueue_struct *wq) 485 struct workqueue_struct *wq)
507{ 486{
508 if (!(wq->flags & WQ_UNBOUND)) { 487 if (!(wq->flags & WQ_UNBOUND)) {
509 if (likely(cpu < nr_cpu_ids)) 488 if (likely(cpu < nr_cpu_ids))
510 return per_cpu_ptr(wq->cpu_wq.pcpu, cpu); 489 return per_cpu_ptr(wq->pool_wq.pcpu, cpu);
511 } else if (likely(cpu == WORK_CPU_UNBOUND)) 490 } else if (likely(cpu == WORK_CPU_UNBOUND))
512 return wq->cpu_wq.single; 491 return wq->pool_wq.single;
513 return NULL; 492 return NULL;
514} 493}
515 494
@@ -530,19 +509,19 @@ static int work_next_color(int color)
530} 509}
531 510
532/* 511/*
533 * While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data 512 * While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data
534 * contain the pointer to the queued cwq. Once execution starts, the flag 513 * contain the pointer to the queued pwq. Once execution starts, the flag
535 * is cleared and the high bits contain OFFQ flags and CPU number. 514 * is cleared and the high bits contain OFFQ flags and pool ID.
536 * 515 *
537 * set_work_cwq(), set_work_cpu_and_clear_pending(), mark_work_canceling() 516 * set_work_pwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
538 * and clear_work_data() can be used to set the cwq, cpu or clear 517 * and clear_work_data() can be used to set the pwq, pool or clear
539 * work->data. These functions should only be called while the work is 518 * work->data. These functions should only be called while the work is
540 * owned - ie. while the PENDING bit is set. 519 * owned - ie. while the PENDING bit is set.
541 * 520 *
542 * get_work_[g]cwq() can be used to obtain the gcwq or cwq corresponding to 521 * get_work_pool() and get_work_pwq() can be used to obtain the pool or pwq
543 * a work. gcwq is available once the work has been queued anywhere after 522 * corresponding to a work. Pool is available once the work has been
544 * initialization until it is sync canceled. cwq is available only while 523 * queued anywhere after initialization until it is sync canceled. pwq is
545 * the work item is queued. 524 * available only while the work item is queued.
546 * 525 *
547 * %WORK_OFFQ_CANCELING is used to mark a work item which is being 526 * %WORK_OFFQ_CANCELING is used to mark a work item which is being
548 * canceled. While being canceled, a work item may have its PENDING set 527 * canceled. While being canceled, a work item may have its PENDING set
@@ -556,16 +535,22 @@ static inline void set_work_data(struct work_struct *work, unsigned long data,
556 atomic_long_set(&work->data, data | flags | work_static(work)); 535 atomic_long_set(&work->data, data | flags | work_static(work));
557} 536}
558 537
559static void set_work_cwq(struct work_struct *work, 538static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
560 struct cpu_workqueue_struct *cwq,
561 unsigned long extra_flags) 539 unsigned long extra_flags)
562{ 540{
563 set_work_data(work, (unsigned long)cwq, 541 set_work_data(work, (unsigned long)pwq,
564 WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags); 542 WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
565} 543}
566 544
567static void set_work_cpu_and_clear_pending(struct work_struct *work, 545static void set_work_pool_and_keep_pending(struct work_struct *work,
568 unsigned int cpu) 546 int pool_id)
547{
548 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
549 WORK_STRUCT_PENDING);
550}
551
552static void set_work_pool_and_clear_pending(struct work_struct *work,
553 int pool_id)
569{ 554{
570 /* 555 /*
571 * The following wmb is paired with the implied mb in 556 * The following wmb is paired with the implied mb in
@@ -574,67 +559,92 @@ static void set_work_cpu_and_clear_pending(struct work_struct *work,
574 * owner. 559 * owner.
575 */ 560 */
576 smp_wmb(); 561 smp_wmb();
577 set_work_data(work, (unsigned long)cpu << WORK_OFFQ_CPU_SHIFT, 0); 562 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
578} 563}
579 564
580static void clear_work_data(struct work_struct *work) 565static void clear_work_data(struct work_struct *work)
581{ 566{
582 smp_wmb(); /* see set_work_cpu_and_clear_pending() */ 567 smp_wmb(); /* see set_work_pool_and_clear_pending() */
583 set_work_data(work, WORK_STRUCT_NO_CPU, 0); 568 set_work_data(work, WORK_STRUCT_NO_POOL, 0);
584} 569}
585 570
586static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work) 571static struct pool_workqueue *get_work_pwq(struct work_struct *work)
587{ 572{
588 unsigned long data = atomic_long_read(&work->data); 573 unsigned long data = atomic_long_read(&work->data);
589 574
590 if (data & WORK_STRUCT_CWQ) 575 if (data & WORK_STRUCT_PWQ)
591 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK); 576 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
592 else 577 else
593 return NULL; 578 return NULL;
594} 579}
595 580
596static struct global_cwq *get_work_gcwq(struct work_struct *work) 581/**
582 * get_work_pool - return the worker_pool a given work was associated with
583 * @work: the work item of interest
584 *
585 * Return the worker_pool @work was last associated with. %NULL if none.
586 */
587static struct worker_pool *get_work_pool(struct work_struct *work)
597{ 588{
598 unsigned long data = atomic_long_read(&work->data); 589 unsigned long data = atomic_long_read(&work->data);
599 unsigned int cpu; 590 struct worker_pool *pool;
591 int pool_id;
600 592
601 if (data & WORK_STRUCT_CWQ) 593 if (data & WORK_STRUCT_PWQ)
602 return ((struct cpu_workqueue_struct *) 594 return ((struct pool_workqueue *)
603 (data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq; 595 (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
604 596
605 cpu = data >> WORK_OFFQ_CPU_SHIFT; 597 pool_id = data >> WORK_OFFQ_POOL_SHIFT;
606 if (cpu == WORK_CPU_NONE) 598 if (pool_id == WORK_OFFQ_POOL_NONE)
607 return NULL; 599 return NULL;
608 600
609 BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND); 601 pool = worker_pool_by_id(pool_id);
610 return get_gcwq(cpu); 602 WARN_ON_ONCE(!pool);
603 return pool;
604}
605
606/**
607 * get_work_pool_id - return the worker pool ID a given work is associated with
608 * @work: the work item of interest
609 *
610 * Return the worker_pool ID @work was last associated with.
611 * %WORK_OFFQ_POOL_NONE if none.
612 */
613static int get_work_pool_id(struct work_struct *work)
614{
615 unsigned long data = atomic_long_read(&work->data);
616
617 if (data & WORK_STRUCT_PWQ)
618 return ((struct pool_workqueue *)
619 (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
620
621 return data >> WORK_OFFQ_POOL_SHIFT;
611} 622}
612 623
613static void mark_work_canceling(struct work_struct *work) 624static void mark_work_canceling(struct work_struct *work)
614{ 625{
615 struct global_cwq *gcwq = get_work_gcwq(work); 626 unsigned long pool_id = get_work_pool_id(work);
616 unsigned long cpu = gcwq ? gcwq->cpu : WORK_CPU_NONE;
617 627
618 set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING, 628 pool_id <<= WORK_OFFQ_POOL_SHIFT;
619 WORK_STRUCT_PENDING); 629 set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
620} 630}
621 631
622static bool work_is_canceling(struct work_struct *work) 632static bool work_is_canceling(struct work_struct *work)
623{ 633{
624 unsigned long data = atomic_long_read(&work->data); 634 unsigned long data = atomic_long_read(&work->data);
625 635
626 return !(data & WORK_STRUCT_CWQ) && (data & WORK_OFFQ_CANCELING); 636 return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
627} 637}
628 638
629/* 639/*
630 * Policy functions. These define the policies on how the global worker 640 * Policy functions. These define the policies on how the global worker
631 * pools are managed. Unless noted otherwise, these functions assume that 641 * pools are managed. Unless noted otherwise, these functions assume that
632 * they're being called with gcwq->lock held. 642 * they're being called with pool->lock held.
633 */ 643 */
634 644
635static bool __need_more_worker(struct worker_pool *pool) 645static bool __need_more_worker(struct worker_pool *pool)
636{ 646{
637 return !atomic_read(get_pool_nr_running(pool)); 647 return !atomic_read(&pool->nr_running);
638} 648}
639 649
640/* 650/*
@@ -642,7 +652,7 @@ static bool __need_more_worker(struct worker_pool *pool)
642 * running workers. 652 * running workers.
643 * 653 *
644 * Note that, because unbound workers never contribute to nr_running, this 654 * Note that, because unbound workers never contribute to nr_running, this
645 * function will always return %true for unbound gcwq as long as the 655 * function will always return %true for unbound pools as long as the
646 * worklist isn't empty. 656 * worklist isn't empty.
647 */ 657 */
648static bool need_more_worker(struct worker_pool *pool) 658static bool need_more_worker(struct worker_pool *pool)
@@ -659,9 +669,8 @@ static bool may_start_working(struct worker_pool *pool)
659/* Do I need to keep working? Called from currently running workers. */ 669/* Do I need to keep working? Called from currently running workers. */
660static bool keep_working(struct worker_pool *pool) 670static bool keep_working(struct worker_pool *pool)
661{ 671{
662 atomic_t *nr_running = get_pool_nr_running(pool); 672 return !list_empty(&pool->worklist) &&
663 673 atomic_read(&pool->nr_running) <= 1;
664 return !list_empty(&pool->worklist) && atomic_read(nr_running) <= 1;
665} 674}
666 675
667/* Do we need a new worker? Called from manager. */ 676/* Do we need a new worker? Called from manager. */
@@ -714,7 +723,7 @@ static struct worker *first_worker(struct worker_pool *pool)
714 * Wake up the first idle worker of @pool. 723 * Wake up the first idle worker of @pool.
715 * 724 *
716 * CONTEXT: 725 * CONTEXT:
717 * spin_lock_irq(gcwq->lock). 726 * spin_lock_irq(pool->lock).
718 */ 727 */
719static void wake_up_worker(struct worker_pool *pool) 728static void wake_up_worker(struct worker_pool *pool)
720{ 729{
@@ -739,8 +748,10 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
739{ 748{
740 struct worker *worker = kthread_data(task); 749 struct worker *worker = kthread_data(task);
741 750
742 if (!(worker->flags & WORKER_NOT_RUNNING)) 751 if (!(worker->flags & WORKER_NOT_RUNNING)) {
743 atomic_inc(get_pool_nr_running(worker->pool)); 752 WARN_ON_ONCE(worker->pool->cpu != cpu);
753 atomic_inc(&worker->pool->nr_running);
754 }
744} 755}
745 756
746/** 757/**
@@ -762,12 +773,18 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
762 unsigned int cpu) 773 unsigned int cpu)
763{ 774{
764 struct worker *worker = kthread_data(task), *to_wakeup = NULL; 775 struct worker *worker = kthread_data(task), *to_wakeup = NULL;
765 struct worker_pool *pool = worker->pool; 776 struct worker_pool *pool;
766 atomic_t *nr_running = get_pool_nr_running(pool);
767 777
778 /*
779 * Rescuers, which may not have all the fields set up like normal
780 * workers, also reach here, let's not access anything before
781 * checking NOT_RUNNING.
782 */
768 if (worker->flags & WORKER_NOT_RUNNING) 783 if (worker->flags & WORKER_NOT_RUNNING)
769 return NULL; 784 return NULL;
770 785
786 pool = worker->pool;
787
771 /* this can only happen on the local cpu */ 788 /* this can only happen on the local cpu */
772 BUG_ON(cpu != raw_smp_processor_id()); 789 BUG_ON(cpu != raw_smp_processor_id());
773 790
@@ -779,10 +796,11 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
779 * NOT_RUNNING is clear. This means that we're bound to and 796 * NOT_RUNNING is clear. This means that we're bound to and
780 * running on the local cpu w/ rq lock held and preemption 797 * running on the local cpu w/ rq lock held and preemption
781 * disabled, which in turn means that none else could be 798 * disabled, which in turn means that none else could be
782 * manipulating idle_list, so dereferencing idle_list without gcwq 799 * manipulating idle_list, so dereferencing idle_list without pool
783 * lock is safe. 800 * lock is safe.
784 */ 801 */
785 if (atomic_dec_and_test(nr_running) && !list_empty(&pool->worklist)) 802 if (atomic_dec_and_test(&pool->nr_running) &&
803 !list_empty(&pool->worklist))
786 to_wakeup = first_worker(pool); 804 to_wakeup = first_worker(pool);
787 return to_wakeup ? to_wakeup->task : NULL; 805 return to_wakeup ? to_wakeup->task : NULL;
788} 806}
@@ -798,7 +816,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
798 * woken up. 816 * woken up.
799 * 817 *
800 * CONTEXT: 818 * CONTEXT:
801 * spin_lock_irq(gcwq->lock) 819 * spin_lock_irq(pool->lock)
802 */ 820 */
803static inline void worker_set_flags(struct worker *worker, unsigned int flags, 821static inline void worker_set_flags(struct worker *worker, unsigned int flags,
804 bool wakeup) 822 bool wakeup)
@@ -814,14 +832,12 @@ static inline void worker_set_flags(struct worker *worker, unsigned int flags,
814 */ 832 */
815 if ((flags & WORKER_NOT_RUNNING) && 833 if ((flags & WORKER_NOT_RUNNING) &&
816 !(worker->flags & WORKER_NOT_RUNNING)) { 834 !(worker->flags & WORKER_NOT_RUNNING)) {
817 atomic_t *nr_running = get_pool_nr_running(pool);
818
819 if (wakeup) { 835 if (wakeup) {
820 if (atomic_dec_and_test(nr_running) && 836 if (atomic_dec_and_test(&pool->nr_running) &&
821 !list_empty(&pool->worklist)) 837 !list_empty(&pool->worklist))
822 wake_up_worker(pool); 838 wake_up_worker(pool);
823 } else 839 } else
824 atomic_dec(nr_running); 840 atomic_dec(&pool->nr_running);
825 } 841 }
826 842
827 worker->flags |= flags; 843 worker->flags |= flags;
@@ -835,7 +851,7 @@ static inline void worker_set_flags(struct worker *worker, unsigned int flags,
835 * Clear @flags in @worker->flags and adjust nr_running accordingly. 851 * Clear @flags in @worker->flags and adjust nr_running accordingly.
836 * 852 *
837 * CONTEXT: 853 * CONTEXT:
838 * spin_lock_irq(gcwq->lock) 854 * spin_lock_irq(pool->lock)
839 */ 855 */
840static inline void worker_clr_flags(struct worker *worker, unsigned int flags) 856static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
841{ 857{
@@ -853,87 +869,55 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
853 */ 869 */
854 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING)) 870 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
855 if (!(worker->flags & WORKER_NOT_RUNNING)) 871 if (!(worker->flags & WORKER_NOT_RUNNING))
856 atomic_inc(get_pool_nr_running(pool)); 872 atomic_inc(&pool->nr_running);
857} 873}
858 874
859/** 875/**
860 * busy_worker_head - return the busy hash head for a work 876 * find_worker_executing_work - find worker which is executing a work
861 * @gcwq: gcwq of interest 877 * @pool: pool of interest
862 * @work: work to be hashed
863 *
864 * Return hash head of @gcwq for @work.
865 *
866 * CONTEXT:
867 * spin_lock_irq(gcwq->lock).
868 *
869 * RETURNS:
870 * Pointer to the hash head.
871 */
872static struct hlist_head *busy_worker_head(struct global_cwq *gcwq,
873 struct work_struct *work)
874{
875 const int base_shift = ilog2(sizeof(struct work_struct));
876 unsigned long v = (unsigned long)work;
877
878 /* simple shift and fold hash, do we need something better? */
879 v >>= base_shift;
880 v += v >> BUSY_WORKER_HASH_ORDER;
881 v &= BUSY_WORKER_HASH_MASK;
882
883 return &gcwq->busy_hash[v];
884}
885
886/**
887 * __find_worker_executing_work - find worker which is executing a work
888 * @gcwq: gcwq of interest
889 * @bwh: hash head as returned by busy_worker_head()
890 * @work: work to find worker for 878 * @work: work to find worker for
891 * 879 *
892 * Find a worker which is executing @work on @gcwq. @bwh should be 880 * Find a worker which is executing @work on @pool by searching
893 * the hash head obtained by calling busy_worker_head() with the same 881 * @pool->busy_hash which is keyed by the address of @work. For a worker
894 * work. 882 * to match, its current execution should match the address of @work and
883 * its work function. This is to avoid unwanted dependency between
884 * unrelated work executions through a work item being recycled while still
885 * being executed.
886 *
887 * This is a bit tricky. A work item may be freed once its execution
888 * starts and nothing prevents the freed area from being recycled for
889 * another work item. If the same work item address ends up being reused
890 * before the original execution finishes, workqueue will identify the
891 * recycled work item as currently executing and make it wait until the
892 * current execution finishes, introducing an unwanted dependency.
893 *
894 * This function checks the work item address, work function and workqueue
895 * to avoid false positives. Note that this isn't complete as one may
896 * construct a work function which can introduce dependency onto itself
897 * through a recycled work item. Well, if somebody wants to shoot oneself
898 * in the foot that badly, there's only so much we can do, and if such
899 * deadlock actually occurs, it should be easy to locate the culprit work
900 * function.
895 * 901 *
896 * CONTEXT: 902 * CONTEXT:
897 * spin_lock_irq(gcwq->lock). 903 * spin_lock_irq(pool->lock).
898 * 904 *
899 * RETURNS: 905 * RETURNS:
900 * Pointer to worker which is executing @work if found, NULL 906 * Pointer to worker which is executing @work if found, NULL
901 * otherwise. 907 * otherwise.
902 */ 908 */
903static struct worker *__find_worker_executing_work(struct global_cwq *gcwq, 909static struct worker *find_worker_executing_work(struct worker_pool *pool,
904 struct hlist_head *bwh, 910 struct work_struct *work)
905 struct work_struct *work)
906{ 911{
907 struct worker *worker; 912 struct worker *worker;
908 struct hlist_node *tmp;
909 913
910 hlist_for_each_entry(worker, tmp, bwh, hentry) 914 hash_for_each_possible(pool->busy_hash, worker, hentry,
911 if (worker->current_work == work) 915 (unsigned long)work)
916 if (worker->current_work == work &&
917 worker->current_func == work->func)
912 return worker; 918 return worker;
913 return NULL;
914}
915 919
916/** 920 return NULL;
917 * find_worker_executing_work - find worker which is executing a work
918 * @gcwq: gcwq of interest
919 * @work: work to find worker for
920 *
921 * Find a worker which is executing @work on @gcwq. This function is
922 * identical to __find_worker_executing_work() except that this
923 * function calculates @bwh itself.
924 *
925 * CONTEXT:
926 * spin_lock_irq(gcwq->lock).
927 *
928 * RETURNS:
929 * Pointer to worker which is executing @work if found, NULL
930 * otherwise.
931 */
932static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
933 struct work_struct *work)
934{
935 return __find_worker_executing_work(gcwq, busy_worker_head(gcwq, work),
936 work);
937} 921}
938 922
939/** 923/**
@@ -951,7 +935,7 @@ static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
951 * nested inside outer list_for_each_entry_safe(). 935 * nested inside outer list_for_each_entry_safe().
952 * 936 *
953 * CONTEXT: 937 * CONTEXT:
954 * spin_lock_irq(gcwq->lock). 938 * spin_lock_irq(pool->lock).
955 */ 939 */
956static void move_linked_works(struct work_struct *work, struct list_head *head, 940static void move_linked_works(struct work_struct *work, struct list_head *head,
957 struct work_struct **nextp) 941 struct work_struct **nextp)
@@ -977,67 +961,67 @@ static void move_linked_works(struct work_struct *work, struct list_head *head,
977 *nextp = n; 961 *nextp = n;
978} 962}
979 963
980static void cwq_activate_delayed_work(struct work_struct *work) 964static void pwq_activate_delayed_work(struct work_struct *work)
981{ 965{
982 struct cpu_workqueue_struct *cwq = get_work_cwq(work); 966 struct pool_workqueue *pwq = get_work_pwq(work);
983 967
984 trace_workqueue_activate_work(work); 968 trace_workqueue_activate_work(work);
985 move_linked_works(work, &cwq->pool->worklist, NULL); 969 move_linked_works(work, &pwq->pool->worklist, NULL);
986 __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); 970 __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
987 cwq->nr_active++; 971 pwq->nr_active++;
988} 972}
989 973
990static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) 974static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
991{ 975{
992 struct work_struct *work = list_first_entry(&cwq->delayed_works, 976 struct work_struct *work = list_first_entry(&pwq->delayed_works,
993 struct work_struct, entry); 977 struct work_struct, entry);
994 978
995 cwq_activate_delayed_work(work); 979 pwq_activate_delayed_work(work);
996} 980}
997 981
998/** 982/**
999 * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight 983 * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
1000 * @cwq: cwq of interest 984 * @pwq: pwq of interest
1001 * @color: color of work which left the queue 985 * @color: color of work which left the queue
1002 * 986 *
1003 * A work either has completed or is removed from pending queue, 987 * A work either has completed or is removed from pending queue,
1004 * decrement nr_in_flight of its cwq and handle workqueue flushing. 988 * decrement nr_in_flight of its pwq and handle workqueue flushing.
1005 * 989 *
1006 * CONTEXT: 990 * CONTEXT:
1007 * spin_lock_irq(gcwq->lock). 991 * spin_lock_irq(pool->lock).
1008 */ 992 */
1009static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color) 993static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
1010{ 994{
1011 /* ignore uncolored works */ 995 /* ignore uncolored works */
1012 if (color == WORK_NO_COLOR) 996 if (color == WORK_NO_COLOR)
1013 return; 997 return;
1014 998
1015 cwq->nr_in_flight[color]--; 999 pwq->nr_in_flight[color]--;
1016 1000
1017 cwq->nr_active--; 1001 pwq->nr_active--;
1018 if (!list_empty(&cwq->delayed_works)) { 1002 if (!list_empty(&pwq->delayed_works)) {
1019 /* one down, submit a delayed one */ 1003 /* one down, submit a delayed one */
1020 if (cwq->nr_active < cwq->max_active) 1004 if (pwq->nr_active < pwq->max_active)
1021 cwq_activate_first_delayed(cwq); 1005 pwq_activate_first_delayed(pwq);
1022 } 1006 }
1023 1007
1024 /* is flush in progress and are we at the flushing tip? */ 1008 /* is flush in progress and are we at the flushing tip? */
1025 if (likely(cwq->flush_color != color)) 1009 if (likely(pwq->flush_color != color))
1026 return; 1010 return;
1027 1011
1028 /* are there still in-flight works? */ 1012 /* are there still in-flight works? */
1029 if (cwq->nr_in_flight[color]) 1013 if (pwq->nr_in_flight[color])
1030 return; 1014 return;
1031 1015
1032 /* this cwq is done, clear flush_color */ 1016 /* this pwq is done, clear flush_color */
1033 cwq->flush_color = -1; 1017 pwq->flush_color = -1;
1034 1018
1035 /* 1019 /*
1036 * If this was the last cwq, wake up the first flusher. It 1020 * If this was the last pwq, wake up the first flusher. It
1037 * will handle the rest. 1021 * will handle the rest.
1038 */ 1022 */
1039 if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush)) 1023 if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
1040 complete(&cwq->wq->first_flusher->done); 1024 complete(&pwq->wq->first_flusher->done);
1041} 1025}
1042 1026
1043/** 1027/**
@@ -1068,7 +1052,8 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
1068static int try_to_grab_pending(struct work_struct *work, bool is_dwork, 1052static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
1069 unsigned long *flags) 1053 unsigned long *flags)
1070{ 1054{
1071 struct global_cwq *gcwq; 1055 struct worker_pool *pool;
1056 struct pool_workqueue *pwq;
1072 1057
1073 local_irq_save(*flags); 1058 local_irq_save(*flags);
1074 1059
@@ -1093,41 +1078,43 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
1093 * The queueing is in progress, or it is already queued. Try to 1078 * The queueing is in progress, or it is already queued. Try to
1094 * steal it from ->worklist without clearing WORK_STRUCT_PENDING. 1079 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
1095 */ 1080 */
1096 gcwq = get_work_gcwq(work); 1081 pool = get_work_pool(work);
1097 if (!gcwq) 1082 if (!pool)
1098 goto fail; 1083 goto fail;
1099 1084
1100 spin_lock(&gcwq->lock); 1085 spin_lock(&pool->lock);
1101 if (!list_empty(&work->entry)) { 1086 /*
1087 * work->data is guaranteed to point to pwq only while the work
1088 * item is queued on pwq->wq, and both updating work->data to point
1089 * to pwq on queueing and to pool on dequeueing are done under
1090 * pwq->pool->lock. This in turn guarantees that, if work->data
1091 * points to pwq which is associated with a locked pool, the work
1092 * item is currently queued on that pool.
1093 */
1094 pwq = get_work_pwq(work);
1095 if (pwq && pwq->pool == pool) {
1096 debug_work_deactivate(work);
1097
1102 /* 1098 /*
1103 * This work is queued, but perhaps we locked the wrong gcwq. 1099 * A delayed work item cannot be grabbed directly because
1104 * In that case we must see the new value after rmb(), see 1100 * it might have linked NO_COLOR work items which, if left
1105 * insert_work()->wmb(). 1101 * on the delayed_list, will confuse pwq->nr_active
1102 * management later on and cause stall. Make sure the work
1103 * item is activated before grabbing.
1106 */ 1104 */
1107 smp_rmb(); 1105 if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
1108 if (gcwq == get_work_gcwq(work)) { 1106 pwq_activate_delayed_work(work);
1109 debug_work_deactivate(work);
1110 1107
1111 /* 1108 list_del_init(&work->entry);
1112 * A delayed work item cannot be grabbed directly 1109 pwq_dec_nr_in_flight(get_work_pwq(work), get_work_color(work));
1113 * because it might have linked NO_COLOR work items
1114 * which, if left on the delayed_list, will confuse
1115 * cwq->nr_active management later on and cause
1116 * stall. Make sure the work item is activated
1117 * before grabbing.
1118 */
1119 if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
1120 cwq_activate_delayed_work(work);
1121 1110
1122 list_del_init(&work->entry); 1111 /* work->data points to pwq iff queued, point to pool */
1123 cwq_dec_nr_in_flight(get_work_cwq(work), 1112 set_work_pool_and_keep_pending(work, pool->id);
1124 get_work_color(work));
1125 1113
1126 spin_unlock(&gcwq->lock); 1114 spin_unlock(&pool->lock);
1127 return 1; 1115 return 1;
1128 }
1129 } 1116 }
1130 spin_unlock(&gcwq->lock); 1117 spin_unlock(&pool->lock);
1131fail: 1118fail:
1132 local_irq_restore(*flags); 1119 local_irq_restore(*flags);
1133 if (work_is_canceling(work)) 1120 if (work_is_canceling(work))
@@ -1137,33 +1124,25 @@ fail:
1137} 1124}
1138 1125
1139/** 1126/**
1140 * insert_work - insert a work into gcwq 1127 * insert_work - insert a work into a pool
1141 * @cwq: cwq @work belongs to 1128 * @pwq: pwq @work belongs to
1142 * @work: work to insert 1129 * @work: work to insert
1143 * @head: insertion point 1130 * @head: insertion point
1144 * @extra_flags: extra WORK_STRUCT_* flags to set 1131 * @extra_flags: extra WORK_STRUCT_* flags to set
1145 * 1132 *
1146 * Insert @work which belongs to @cwq into @gcwq after @head. 1133 * Insert @work which belongs to @pwq after @head. @extra_flags is or'd to
1147 * @extra_flags is or'd to work_struct flags. 1134 * work_struct flags.
1148 * 1135 *
1149 * CONTEXT: 1136 * CONTEXT:
1150 * spin_lock_irq(gcwq->lock). 1137 * spin_lock_irq(pool->lock).
1151 */ 1138 */
1152static void insert_work(struct cpu_workqueue_struct *cwq, 1139static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
1153 struct work_struct *work, struct list_head *head, 1140 struct list_head *head, unsigned int extra_flags)
1154 unsigned int extra_flags)
1155{ 1141{
1156 struct worker_pool *pool = cwq->pool; 1142 struct worker_pool *pool = pwq->pool;
1157 1143
1158 /* we own @work, set data and link */ 1144 /* we own @work, set data and link */
1159 set_work_cwq(work, cwq, extra_flags); 1145 set_work_pwq(work, pwq, extra_flags);
1160
1161 /*
1162 * Ensure that we get the right work->data if we see the
1163 * result of list_add() below, see try_to_grab_pending().
1164 */
1165 smp_wmb();
1166
1167 list_add_tail(&work->entry, head); 1146 list_add_tail(&work->entry, head);
1168 1147
1169 /* 1148 /*
@@ -1179,41 +1158,24 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
1179 1158
1180/* 1159/*
1181 * Test whether @work is being queued from another work executing on the 1160 * Test whether @work is being queued from another work executing on the
1182 * same workqueue. This is rather expensive and should only be used from 1161 * same workqueue.
1183 * cold paths.
1184 */ 1162 */
1185static bool is_chained_work(struct workqueue_struct *wq) 1163static bool is_chained_work(struct workqueue_struct *wq)
1186{ 1164{
1187 unsigned long flags; 1165 struct worker *worker;
1188 unsigned int cpu;
1189
1190 for_each_gcwq_cpu(cpu) {
1191 struct global_cwq *gcwq = get_gcwq(cpu);
1192 struct worker *worker;
1193 struct hlist_node *pos;
1194 int i;
1195 1166
1196 spin_lock_irqsave(&gcwq->lock, flags); 1167 worker = current_wq_worker();
1197 for_each_busy_worker(worker, i, pos, gcwq) { 1168 /*
1198 if (worker->task != current) 1169 * Return %true iff I'm a worker execuing a work item on @wq. If
1199 continue; 1170 * I'm @worker, it's safe to dereference it without locking.
1200 spin_unlock_irqrestore(&gcwq->lock, flags); 1171 */
1201 /* 1172 return worker && worker->current_pwq->wq == wq;
1202 * I'm @worker, no locking necessary. See if @work
1203 * is headed to the same workqueue.
1204 */
1205 return worker->current_cwq->wq == wq;
1206 }
1207 spin_unlock_irqrestore(&gcwq->lock, flags);
1208 }
1209 return false;
1210} 1173}
1211 1174
1212static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, 1175static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
1213 struct work_struct *work) 1176 struct work_struct *work)
1214{ 1177{
1215 struct global_cwq *gcwq; 1178 struct pool_workqueue *pwq;
1216 struct cpu_workqueue_struct *cwq;
1217 struct list_head *worklist; 1179 struct list_head *worklist;
1218 unsigned int work_flags; 1180 unsigned int work_flags;
1219 unsigned int req_cpu = cpu; 1181 unsigned int req_cpu = cpu;
@@ -1233,9 +1195,9 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
1233 WARN_ON_ONCE(!is_chained_work(wq))) 1195 WARN_ON_ONCE(!is_chained_work(wq)))
1234 return; 1196 return;
1235 1197
1236 /* determine gcwq to use */ 1198 /* determine the pwq to use */
1237 if (!(wq->flags & WQ_UNBOUND)) { 1199 if (!(wq->flags & WQ_UNBOUND)) {
1238 struct global_cwq *last_gcwq; 1200 struct worker_pool *last_pool;
1239 1201
1240 if (cpu == WORK_CPU_UNBOUND) 1202 if (cpu == WORK_CPU_UNBOUND)
1241 cpu = raw_smp_processor_id(); 1203 cpu = raw_smp_processor_id();
@@ -1246,55 +1208,54 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
1246 * work needs to be queued on that cpu to guarantee 1208 * work needs to be queued on that cpu to guarantee
1247 * non-reentrancy. 1209 * non-reentrancy.
1248 */ 1210 */
1249 gcwq = get_gcwq(cpu); 1211 pwq = get_pwq(cpu, wq);
1250 last_gcwq = get_work_gcwq(work); 1212 last_pool = get_work_pool(work);
1251 1213
1252 if (last_gcwq && last_gcwq != gcwq) { 1214 if (last_pool && last_pool != pwq->pool) {
1253 struct worker *worker; 1215 struct worker *worker;
1254 1216
1255 spin_lock(&last_gcwq->lock); 1217 spin_lock(&last_pool->lock);
1256 1218
1257 worker = find_worker_executing_work(last_gcwq, work); 1219 worker = find_worker_executing_work(last_pool, work);
1258 1220
1259 if (worker && worker->current_cwq->wq == wq) 1221 if (worker && worker->current_pwq->wq == wq) {
1260 gcwq = last_gcwq; 1222 pwq = get_pwq(last_pool->cpu, wq);
1261 else { 1223 } else {
1262 /* meh... not running there, queue here */ 1224 /* meh... not running there, queue here */
1263 spin_unlock(&last_gcwq->lock); 1225 spin_unlock(&last_pool->lock);
1264 spin_lock(&gcwq->lock); 1226 spin_lock(&pwq->pool->lock);
1265 } 1227 }
1266 } else { 1228 } else {
1267 spin_lock(&gcwq->lock); 1229 spin_lock(&pwq->pool->lock);
1268 } 1230 }
1269 } else { 1231 } else {
1270 gcwq = get_gcwq(WORK_CPU_UNBOUND); 1232 pwq = get_pwq(WORK_CPU_UNBOUND, wq);
1271 spin_lock(&gcwq->lock); 1233 spin_lock(&pwq->pool->lock);
1272 } 1234 }
1273 1235
1274 /* gcwq determined, get cwq and queue */ 1236 /* pwq determined, queue */
1275 cwq = get_cwq(gcwq->cpu, wq); 1237 trace_workqueue_queue_work(req_cpu, pwq, work);
1276 trace_workqueue_queue_work(req_cpu, cwq, work);
1277 1238
1278 if (WARN_ON(!list_empty(&work->entry))) { 1239 if (WARN_ON(!list_empty(&work->entry))) {
1279 spin_unlock(&gcwq->lock); 1240 spin_unlock(&pwq->pool->lock);
1280 return; 1241 return;
1281 } 1242 }
1282 1243
1283 cwq->nr_in_flight[cwq->work_color]++; 1244 pwq->nr_in_flight[pwq->work_color]++;
1284 work_flags = work_color_to_flags(cwq->work_color); 1245 work_flags = work_color_to_flags(pwq->work_color);
1285 1246
1286 if (likely(cwq->nr_active < cwq->max_active)) { 1247 if (likely(pwq->nr_active < pwq->max_active)) {
1287 trace_workqueue_activate_work(work); 1248 trace_workqueue_activate_work(work);
1288 cwq->nr_active++; 1249 pwq->nr_active++;
1289 worklist = &cwq->pool->worklist; 1250 worklist = &pwq->pool->worklist;
1290 } else { 1251 } else {
1291 work_flags |= WORK_STRUCT_DELAYED; 1252 work_flags |= WORK_STRUCT_DELAYED;
1292 worklist = &cwq->delayed_works; 1253 worklist = &pwq->delayed_works;
1293 } 1254 }
1294 1255
1295 insert_work(cwq, work, worklist, work_flags); 1256 insert_work(pwq, work, worklist, work_flags);
1296 1257
1297 spin_unlock(&gcwq->lock); 1258 spin_unlock(&pwq->pool->lock);
1298} 1259}
1299 1260
1300/** 1261/**
@@ -1345,51 +1306,37 @@ EXPORT_SYMBOL_GPL(queue_work);
1345void delayed_work_timer_fn(unsigned long __data) 1306void delayed_work_timer_fn(unsigned long __data)
1346{ 1307{
1347 struct delayed_work *dwork = (struct delayed_work *)__data; 1308 struct delayed_work *dwork = (struct delayed_work *)__data;
1348 struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
1349 1309
1350 /* should have been called from irqsafe timer with irq already off */ 1310 /* should have been called from irqsafe timer with irq already off */
1351 __queue_work(dwork->cpu, cwq->wq, &dwork->work); 1311 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
1352} 1312}
1353EXPORT_SYMBOL_GPL(delayed_work_timer_fn); 1313EXPORT_SYMBOL(delayed_work_timer_fn);
1354 1314
1355static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, 1315static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1356 struct delayed_work *dwork, unsigned long delay) 1316 struct delayed_work *dwork, unsigned long delay)
1357{ 1317{
1358 struct timer_list *timer = &dwork->timer; 1318 struct timer_list *timer = &dwork->timer;
1359 struct work_struct *work = &dwork->work; 1319 struct work_struct *work = &dwork->work;
1360 unsigned int lcpu;
1361 1320
1362 WARN_ON_ONCE(timer->function != delayed_work_timer_fn || 1321 WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
1363 timer->data != (unsigned long)dwork); 1322 timer->data != (unsigned long)dwork);
1364 BUG_ON(timer_pending(timer)); 1323 WARN_ON_ONCE(timer_pending(timer));
1365 BUG_ON(!list_empty(&work->entry)); 1324 WARN_ON_ONCE(!list_empty(&work->entry));
1366
1367 timer_stats_timer_set_start_info(&dwork->timer);
1368 1325
1369 /* 1326 /*
1370 * This stores cwq for the moment, for the timer_fn. Note that the 1327 * If @delay is 0, queue @dwork->work immediately. This is for
1371 * work's gcwq is preserved to allow reentrance detection for 1328 * both optimization and correctness. The earliest @timer can
1372 * delayed works. 1329 * expire is on the closest next tick and delayed_work users depend
1330 * on that there's no such delay when @delay is 0.
1373 */ 1331 */
1374 if (!(wq->flags & WQ_UNBOUND)) { 1332 if (!delay) {
1375 struct global_cwq *gcwq = get_work_gcwq(work); 1333 __queue_work(cpu, wq, &dwork->work);
1376 1334 return;
1377 /*
1378 * If we cannot get the last gcwq from @work directly,
1379 * select the last CPU such that it avoids unnecessarily
1380 * triggering non-reentrancy check in __queue_work().
1381 */
1382 lcpu = cpu;
1383 if (gcwq)
1384 lcpu = gcwq->cpu;
1385 if (lcpu == WORK_CPU_UNBOUND)
1386 lcpu = raw_smp_processor_id();
1387 } else {
1388 lcpu = WORK_CPU_UNBOUND;
1389 } 1335 }
1390 1336
1391 set_work_cwq(work, get_cwq(lcpu, wq), 0); 1337 timer_stats_timer_set_start_info(&dwork->timer);
1392 1338
1339 dwork->wq = wq;
1393 dwork->cpu = cpu; 1340 dwork->cpu = cpu;
1394 timer->expires = jiffies + delay; 1341 timer->expires = jiffies + delay;
1395 1342
@@ -1417,9 +1364,6 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
1417 bool ret = false; 1364 bool ret = false;
1418 unsigned long flags; 1365 unsigned long flags;
1419 1366
1420 if (!delay)
1421 return queue_work_on(cpu, wq, &dwork->work);
1422
1423 /* read the comment in __queue_work() */ 1367 /* read the comment in __queue_work() */
1424 local_irq_save(flags); 1368 local_irq_save(flags);
1425 1369
@@ -1509,12 +1453,11 @@ EXPORT_SYMBOL_GPL(mod_delayed_work);
1509 * necessary. 1453 * necessary.
1510 * 1454 *
1511 * LOCKING: 1455 * LOCKING:
1512 * spin_lock_irq(gcwq->lock). 1456 * spin_lock_irq(pool->lock).
1513 */ 1457 */
1514static void worker_enter_idle(struct worker *worker) 1458static void worker_enter_idle(struct worker *worker)
1515{ 1459{
1516 struct worker_pool *pool = worker->pool; 1460 struct worker_pool *pool = worker->pool;
1517 struct global_cwq *gcwq = pool->gcwq;
1518 1461
1519 BUG_ON(worker->flags & WORKER_IDLE); 1462 BUG_ON(worker->flags & WORKER_IDLE);
1520 BUG_ON(!list_empty(&worker->entry) && 1463 BUG_ON(!list_empty(&worker->entry) &&
@@ -1532,14 +1475,14 @@ static void worker_enter_idle(struct worker *worker)
1532 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); 1475 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
1533 1476
1534 /* 1477 /*
1535 * Sanity check nr_running. Because gcwq_unbind_fn() releases 1478 * Sanity check nr_running. Because wq_unbind_fn() releases
1536 * gcwq->lock between setting %WORKER_UNBOUND and zapping 1479 * pool->lock between setting %WORKER_UNBOUND and zapping
1537 * nr_running, the warning may trigger spuriously. Check iff 1480 * nr_running, the warning may trigger spuriously. Check iff
1538 * unbind is not in progress. 1481 * unbind is not in progress.
1539 */ 1482 */
1540 WARN_ON_ONCE(!(gcwq->flags & GCWQ_DISASSOCIATED) && 1483 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
1541 pool->nr_workers == pool->nr_idle && 1484 pool->nr_workers == pool->nr_idle &&
1542 atomic_read(get_pool_nr_running(pool))); 1485 atomic_read(&pool->nr_running));
1543} 1486}
1544 1487
1545/** 1488/**
@@ -1549,7 +1492,7 @@ static void worker_enter_idle(struct worker *worker)
1549 * @worker is leaving idle state. Update stats. 1492 * @worker is leaving idle state. Update stats.
1550 * 1493 *
1551 * LOCKING: 1494 * LOCKING:
1552 * spin_lock_irq(gcwq->lock). 1495 * spin_lock_irq(pool->lock).
1553 */ 1496 */
1554static void worker_leave_idle(struct worker *worker) 1497static void worker_leave_idle(struct worker *worker)
1555{ 1498{
@@ -1562,7 +1505,7 @@ static void worker_leave_idle(struct worker *worker)
1562} 1505}
1563 1506
1564/** 1507/**
1565 * worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock gcwq 1508 * worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock pool
1566 * @worker: self 1509 * @worker: self
1567 * 1510 *
1568 * Works which are scheduled while the cpu is online must at least be 1511 * Works which are scheduled while the cpu is online must at least be
@@ -1574,27 +1517,27 @@ static void worker_leave_idle(struct worker *worker)
1574 * themselves to the target cpu and may race with cpu going down or 1517 * themselves to the target cpu and may race with cpu going down or
1575 * coming online. kthread_bind() can't be used because it may put the 1518 * coming online. kthread_bind() can't be used because it may put the
1576 * worker to already dead cpu and set_cpus_allowed_ptr() can't be used 1519 * worker to already dead cpu and set_cpus_allowed_ptr() can't be used
1577 * verbatim as it's best effort and blocking and gcwq may be 1520 * verbatim as it's best effort and blocking and pool may be
1578 * [dis]associated in the meantime. 1521 * [dis]associated in the meantime.
1579 * 1522 *
1580 * This function tries set_cpus_allowed() and locks gcwq and verifies the 1523 * This function tries set_cpus_allowed() and locks pool and verifies the
1581 * binding against %GCWQ_DISASSOCIATED which is set during 1524 * binding against %POOL_DISASSOCIATED which is set during
1582 * %CPU_DOWN_PREPARE and cleared during %CPU_ONLINE, so if the worker 1525 * %CPU_DOWN_PREPARE and cleared during %CPU_ONLINE, so if the worker
1583 * enters idle state or fetches works without dropping lock, it can 1526 * enters idle state or fetches works without dropping lock, it can
1584 * guarantee the scheduling requirement described in the first paragraph. 1527 * guarantee the scheduling requirement described in the first paragraph.
1585 * 1528 *
1586 * CONTEXT: 1529 * CONTEXT:
1587 * Might sleep. Called without any lock but returns with gcwq->lock 1530 * Might sleep. Called without any lock but returns with pool->lock
1588 * held. 1531 * held.
1589 * 1532 *
1590 * RETURNS: 1533 * RETURNS:
1591 * %true if the associated gcwq is online (@worker is successfully 1534 * %true if the associated pool is online (@worker is successfully
1592 * bound), %false if offline. 1535 * bound), %false if offline.
1593 */ 1536 */
1594static bool worker_maybe_bind_and_lock(struct worker *worker) 1537static bool worker_maybe_bind_and_lock(struct worker *worker)
1595__acquires(&gcwq->lock) 1538__acquires(&pool->lock)
1596{ 1539{
1597 struct global_cwq *gcwq = worker->pool->gcwq; 1540 struct worker_pool *pool = worker->pool;
1598 struct task_struct *task = worker->task; 1541 struct task_struct *task = worker->task;
1599 1542
1600 while (true) { 1543 while (true) {
@@ -1602,19 +1545,19 @@ __acquires(&gcwq->lock)
1602 * The following call may fail, succeed or succeed 1545 * The following call may fail, succeed or succeed
1603 * without actually migrating the task to the cpu if 1546 * without actually migrating the task to the cpu if
1604 * it races with cpu hotunplug operation. Verify 1547 * it races with cpu hotunplug operation. Verify
1605 * against GCWQ_DISASSOCIATED. 1548 * against POOL_DISASSOCIATED.
1606 */ 1549 */
1607 if (!(gcwq->flags & GCWQ_DISASSOCIATED)) 1550 if (!(pool->flags & POOL_DISASSOCIATED))
1608 set_cpus_allowed_ptr(task, get_cpu_mask(gcwq->cpu)); 1551 set_cpus_allowed_ptr(task, get_cpu_mask(pool->cpu));
1609 1552
1610 spin_lock_irq(&gcwq->lock); 1553 spin_lock_irq(&pool->lock);
1611 if (gcwq->flags & GCWQ_DISASSOCIATED) 1554 if (pool->flags & POOL_DISASSOCIATED)
1612 return false; 1555 return false;
1613 if (task_cpu(task) == gcwq->cpu && 1556 if (task_cpu(task) == pool->cpu &&
1614 cpumask_equal(&current->cpus_allowed, 1557 cpumask_equal(&current->cpus_allowed,
1615 get_cpu_mask(gcwq->cpu))) 1558 get_cpu_mask(pool->cpu)))
1616 return true; 1559 return true;
1617 spin_unlock_irq(&gcwq->lock); 1560 spin_unlock_irq(&pool->lock);
1618 1561
1619 /* 1562 /*
1620 * We've raced with CPU hot[un]plug. Give it a breather 1563 * We've raced with CPU hot[un]plug. Give it a breather
@@ -1633,15 +1576,13 @@ __acquires(&gcwq->lock)
1633 */ 1576 */
1634static void idle_worker_rebind(struct worker *worker) 1577static void idle_worker_rebind(struct worker *worker)
1635{ 1578{
1636 struct global_cwq *gcwq = worker->pool->gcwq;
1637
1638 /* CPU may go down again inbetween, clear UNBOUND only on success */ 1579 /* CPU may go down again inbetween, clear UNBOUND only on success */
1639 if (worker_maybe_bind_and_lock(worker)) 1580 if (worker_maybe_bind_and_lock(worker))
1640 worker_clr_flags(worker, WORKER_UNBOUND); 1581 worker_clr_flags(worker, WORKER_UNBOUND);
1641 1582
1642 /* rebind complete, become available again */ 1583 /* rebind complete, become available again */
1643 list_add(&worker->entry, &worker->pool->idle_list); 1584 list_add(&worker->entry, &worker->pool->idle_list);
1644 spin_unlock_irq(&gcwq->lock); 1585 spin_unlock_irq(&worker->pool->lock);
1645} 1586}
1646 1587
1647/* 1588/*
@@ -1653,19 +1594,18 @@ static void idle_worker_rebind(struct worker *worker)
1653static void busy_worker_rebind_fn(struct work_struct *work) 1594static void busy_worker_rebind_fn(struct work_struct *work)
1654{ 1595{
1655 struct worker *worker = container_of(work, struct worker, rebind_work); 1596 struct worker *worker = container_of(work, struct worker, rebind_work);
1656 struct global_cwq *gcwq = worker->pool->gcwq;
1657 1597
1658 if (worker_maybe_bind_and_lock(worker)) 1598 if (worker_maybe_bind_and_lock(worker))
1659 worker_clr_flags(worker, WORKER_UNBOUND); 1599 worker_clr_flags(worker, WORKER_UNBOUND);
1660 1600
1661 spin_unlock_irq(&gcwq->lock); 1601 spin_unlock_irq(&worker->pool->lock);
1662} 1602}
1663 1603
1664/** 1604/**
1665 * rebind_workers - rebind all workers of a gcwq to the associated CPU 1605 * rebind_workers - rebind all workers of a pool to the associated CPU
1666 * @gcwq: gcwq of interest 1606 * @pool: pool of interest
1667 * 1607 *
1668 * @gcwq->cpu is coming online. Rebind all workers to the CPU. Rebinding 1608 * @pool->cpu is coming online. Rebind all workers to the CPU. Rebinding
1669 * is different for idle and busy ones. 1609 * is different for idle and busy ones.
1670 * 1610 *
1671 * Idle ones will be removed from the idle_list and woken up. They will 1611 * Idle ones will be removed from the idle_list and woken up. They will
@@ -1683,38 +1623,31 @@ static void busy_worker_rebind_fn(struct work_struct *work)
1683 * including the manager will not appear on @idle_list until rebind is 1623 * including the manager will not appear on @idle_list until rebind is
1684 * complete, making local wake-ups safe. 1624 * complete, making local wake-ups safe.
1685 */ 1625 */
1686static void rebind_workers(struct global_cwq *gcwq) 1626static void rebind_workers(struct worker_pool *pool)
1687{ 1627{
1688 struct worker_pool *pool;
1689 struct worker *worker, *n; 1628 struct worker *worker, *n;
1690 struct hlist_node *pos;
1691 int i; 1629 int i;
1692 1630
1693 lockdep_assert_held(&gcwq->lock); 1631 lockdep_assert_held(&pool->assoc_mutex);
1694 1632 lockdep_assert_held(&pool->lock);
1695 for_each_worker_pool(pool, gcwq)
1696 lockdep_assert_held(&pool->assoc_mutex);
1697 1633
1698 /* dequeue and kick idle ones */ 1634 /* dequeue and kick idle ones */
1699 for_each_worker_pool(pool, gcwq) { 1635 list_for_each_entry_safe(worker, n, &pool->idle_list, entry) {
1700 list_for_each_entry_safe(worker, n, &pool->idle_list, entry) { 1636 /*
1701 /* 1637 * idle workers should be off @pool->idle_list until rebind
1702 * idle workers should be off @pool->idle_list 1638 * is complete to avoid receiving premature local wake-ups.
1703 * until rebind is complete to avoid receiving 1639 */
1704 * premature local wake-ups. 1640 list_del_init(&worker->entry);
1705 */
1706 list_del_init(&worker->entry);
1707 1641
1708 /* 1642 /*
1709 * worker_thread() will see the above dequeuing 1643 * worker_thread() will see the above dequeuing and call
1710 * and call idle_worker_rebind(). 1644 * idle_worker_rebind().
1711 */ 1645 */
1712 wake_up_process(worker->task); 1646 wake_up_process(worker->task);
1713 }
1714 } 1647 }
1715 1648
1716 /* rebind busy workers */ 1649 /* rebind busy workers */
1717 for_each_busy_worker(worker, i, pos, gcwq) { 1650 for_each_busy_worker(worker, i, pool) {
1718 struct work_struct *rebind_work = &worker->rebind_work; 1651 struct work_struct *rebind_work = &worker->rebind_work;
1719 struct workqueue_struct *wq; 1652 struct workqueue_struct *wq;
1720 1653
@@ -1726,16 +1659,16 @@ static void rebind_workers(struct global_cwq *gcwq)
1726 1659
1727 /* 1660 /*
1728 * wq doesn't really matter but let's keep @worker->pool 1661 * wq doesn't really matter but let's keep @worker->pool
1729 * and @cwq->pool consistent for sanity. 1662 * and @pwq->pool consistent for sanity.
1730 */ 1663 */
1731 if (worker_pool_pri(worker->pool)) 1664 if (std_worker_pool_pri(worker->pool))
1732 wq = system_highpri_wq; 1665 wq = system_highpri_wq;
1733 else 1666 else
1734 wq = system_wq; 1667 wq = system_wq;
1735 1668
1736 insert_work(get_cwq(gcwq->cpu, wq), rebind_work, 1669 insert_work(get_pwq(pool->cpu, wq), rebind_work,
1737 worker->scheduled.next, 1670 worker->scheduled.next,
1738 work_color_to_flags(WORK_NO_COLOR)); 1671 work_color_to_flags(WORK_NO_COLOR));
1739 } 1672 }
1740} 1673}
1741 1674
@@ -1770,19 +1703,18 @@ static struct worker *alloc_worker(void)
1770 */ 1703 */
1771static struct worker *create_worker(struct worker_pool *pool) 1704static struct worker *create_worker(struct worker_pool *pool)
1772{ 1705{
1773 struct global_cwq *gcwq = pool->gcwq; 1706 const char *pri = std_worker_pool_pri(pool) ? "H" : "";
1774 const char *pri = worker_pool_pri(pool) ? "H" : "";
1775 struct worker *worker = NULL; 1707 struct worker *worker = NULL;
1776 int id = -1; 1708 int id = -1;
1777 1709
1778 spin_lock_irq(&gcwq->lock); 1710 spin_lock_irq(&pool->lock);
1779 while (ida_get_new(&pool->worker_ida, &id)) { 1711 while (ida_get_new(&pool->worker_ida, &id)) {
1780 spin_unlock_irq(&gcwq->lock); 1712 spin_unlock_irq(&pool->lock);
1781 if (!ida_pre_get(&pool->worker_ida, GFP_KERNEL)) 1713 if (!ida_pre_get(&pool->worker_ida, GFP_KERNEL))
1782 goto fail; 1714 goto fail;
1783 spin_lock_irq(&gcwq->lock); 1715 spin_lock_irq(&pool->lock);
1784 } 1716 }
1785 spin_unlock_irq(&gcwq->lock); 1717 spin_unlock_irq(&pool->lock);
1786 1718
1787 worker = alloc_worker(); 1719 worker = alloc_worker();
1788 if (!worker) 1720 if (!worker)
@@ -1791,30 +1723,30 @@ static struct worker *create_worker(struct worker_pool *pool)
1791 worker->pool = pool; 1723 worker->pool = pool;
1792 worker->id = id; 1724 worker->id = id;
1793 1725
1794 if (gcwq->cpu != WORK_CPU_UNBOUND) 1726 if (pool->cpu != WORK_CPU_UNBOUND)
1795 worker->task = kthread_create_on_node(worker_thread, 1727 worker->task = kthread_create_on_node(worker_thread,
1796 worker, cpu_to_node(gcwq->cpu), 1728 worker, cpu_to_node(pool->cpu),
1797 "kworker/%u:%d%s", gcwq->cpu, id, pri); 1729 "kworker/%u:%d%s", pool->cpu, id, pri);
1798 else 1730 else
1799 worker->task = kthread_create(worker_thread, worker, 1731 worker->task = kthread_create(worker_thread, worker,
1800 "kworker/u:%d%s", id, pri); 1732 "kworker/u:%d%s", id, pri);
1801 if (IS_ERR(worker->task)) 1733 if (IS_ERR(worker->task))
1802 goto fail; 1734 goto fail;
1803 1735
1804 if (worker_pool_pri(pool)) 1736 if (std_worker_pool_pri(pool))
1805 set_user_nice(worker->task, HIGHPRI_NICE_LEVEL); 1737 set_user_nice(worker->task, HIGHPRI_NICE_LEVEL);
1806 1738
1807 /* 1739 /*
1808 * Determine CPU binding of the new worker depending on 1740 * Determine CPU binding of the new worker depending on
1809 * %GCWQ_DISASSOCIATED. The caller is responsible for ensuring the 1741 * %POOL_DISASSOCIATED. The caller is responsible for ensuring the
1810 * flag remains stable across this function. See the comments 1742 * flag remains stable across this function. See the comments
1811 * above the flag definition for details. 1743 * above the flag definition for details.
1812 * 1744 *
1813 * As an unbound worker may later become a regular one if CPU comes 1745 * As an unbound worker may later become a regular one if CPU comes
1814 * online, make sure every worker has %PF_THREAD_BOUND set. 1746 * online, make sure every worker has %PF_THREAD_BOUND set.
1815 */ 1747 */
1816 if (!(gcwq->flags & GCWQ_DISASSOCIATED)) { 1748 if (!(pool->flags & POOL_DISASSOCIATED)) {
1817 kthread_bind(worker->task, gcwq->cpu); 1749 kthread_bind(worker->task, pool->cpu);
1818 } else { 1750 } else {
1819 worker->task->flags |= PF_THREAD_BOUND; 1751 worker->task->flags |= PF_THREAD_BOUND;
1820 worker->flags |= WORKER_UNBOUND; 1752 worker->flags |= WORKER_UNBOUND;
@@ -1823,9 +1755,9 @@ static struct worker *create_worker(struct worker_pool *pool)
1823 return worker; 1755 return worker;
1824fail: 1756fail:
1825 if (id >= 0) { 1757 if (id >= 0) {
1826 spin_lock_irq(&gcwq->lock); 1758 spin_lock_irq(&pool->lock);
1827 ida_remove(&pool->worker_ida, id); 1759 ida_remove(&pool->worker_ida, id);
1828 spin_unlock_irq(&gcwq->lock); 1760 spin_unlock_irq(&pool->lock);
1829 } 1761 }
1830 kfree(worker); 1762 kfree(worker);
1831 return NULL; 1763 return NULL;
@@ -1835,10 +1767,10 @@ fail:
1835 * start_worker - start a newly created worker 1767 * start_worker - start a newly created worker
1836 * @worker: worker to start 1768 * @worker: worker to start
1837 * 1769 *
1838 * Make the gcwq aware of @worker and start it. 1770 * Make the pool aware of @worker and start it.
1839 * 1771 *
1840 * CONTEXT: 1772 * CONTEXT:
1841 * spin_lock_irq(gcwq->lock). 1773 * spin_lock_irq(pool->lock).
1842 */ 1774 */
1843static void start_worker(struct worker *worker) 1775static void start_worker(struct worker *worker)
1844{ 1776{
@@ -1852,15 +1784,14 @@ static void start_worker(struct worker *worker)
1852 * destroy_worker - destroy a workqueue worker 1784 * destroy_worker - destroy a workqueue worker
1853 * @worker: worker to be destroyed 1785 * @worker: worker to be destroyed
1854 * 1786 *
1855 * Destroy @worker and adjust @gcwq stats accordingly. 1787 * Destroy @worker and adjust @pool stats accordingly.
1856 * 1788 *
1857 * CONTEXT: 1789 * CONTEXT:
1858 * spin_lock_irq(gcwq->lock) which is released and regrabbed. 1790 * spin_lock_irq(pool->lock) which is released and regrabbed.
1859 */ 1791 */
1860static void destroy_worker(struct worker *worker) 1792static void destroy_worker(struct worker *worker)
1861{ 1793{
1862 struct worker_pool *pool = worker->pool; 1794 struct worker_pool *pool = worker->pool;
1863 struct global_cwq *gcwq = pool->gcwq;
1864 int id = worker->id; 1795 int id = worker->id;
1865 1796
1866 /* sanity check frenzy */ 1797 /* sanity check frenzy */
@@ -1875,21 +1806,20 @@ static void destroy_worker(struct worker *worker)
1875 list_del_init(&worker->entry); 1806 list_del_init(&worker->entry);
1876 worker->flags |= WORKER_DIE; 1807 worker->flags |= WORKER_DIE;
1877 1808
1878 spin_unlock_irq(&gcwq->lock); 1809 spin_unlock_irq(&pool->lock);
1879 1810
1880 kthread_stop(worker->task); 1811 kthread_stop(worker->task);
1881 kfree(worker); 1812 kfree(worker);
1882 1813
1883 spin_lock_irq(&gcwq->lock); 1814 spin_lock_irq(&pool->lock);
1884 ida_remove(&pool->worker_ida, id); 1815 ida_remove(&pool->worker_ida, id);
1885} 1816}
1886 1817
1887static void idle_worker_timeout(unsigned long __pool) 1818static void idle_worker_timeout(unsigned long __pool)
1888{ 1819{
1889 struct worker_pool *pool = (void *)__pool; 1820 struct worker_pool *pool = (void *)__pool;
1890 struct global_cwq *gcwq = pool->gcwq;
1891 1821
1892 spin_lock_irq(&gcwq->lock); 1822 spin_lock_irq(&pool->lock);
1893 1823
1894 if (too_many_workers(pool)) { 1824 if (too_many_workers(pool)) {
1895 struct worker *worker; 1825 struct worker *worker;
@@ -1908,20 +1838,20 @@ static void idle_worker_timeout(unsigned long __pool)
1908 } 1838 }
1909 } 1839 }
1910 1840
1911 spin_unlock_irq(&gcwq->lock); 1841 spin_unlock_irq(&pool->lock);
1912} 1842}
1913 1843
1914static bool send_mayday(struct work_struct *work) 1844static bool send_mayday(struct work_struct *work)
1915{ 1845{
1916 struct cpu_workqueue_struct *cwq = get_work_cwq(work); 1846 struct pool_workqueue *pwq = get_work_pwq(work);
1917 struct workqueue_struct *wq = cwq->wq; 1847 struct workqueue_struct *wq = pwq->wq;
1918 unsigned int cpu; 1848 unsigned int cpu;
1919 1849
1920 if (!(wq->flags & WQ_RESCUER)) 1850 if (!(wq->flags & WQ_RESCUER))
1921 return false; 1851 return false;
1922 1852
1923 /* mayday mayday mayday */ 1853 /* mayday mayday mayday */
1924 cpu = cwq->pool->gcwq->cpu; 1854 cpu = pwq->pool->cpu;
1925 /* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */ 1855 /* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */
1926 if (cpu == WORK_CPU_UNBOUND) 1856 if (cpu == WORK_CPU_UNBOUND)
1927 cpu = 0; 1857 cpu = 0;
@@ -1930,13 +1860,12 @@ static bool send_mayday(struct work_struct *work)
1930 return true; 1860 return true;
1931} 1861}
1932 1862
1933static void gcwq_mayday_timeout(unsigned long __pool) 1863static void pool_mayday_timeout(unsigned long __pool)
1934{ 1864{
1935 struct worker_pool *pool = (void *)__pool; 1865 struct worker_pool *pool = (void *)__pool;
1936 struct global_cwq *gcwq = pool->gcwq;
1937 struct work_struct *work; 1866 struct work_struct *work;
1938 1867
1939 spin_lock_irq(&gcwq->lock); 1868 spin_lock_irq(&pool->lock);
1940 1869
1941 if (need_to_create_worker(pool)) { 1870 if (need_to_create_worker(pool)) {
1942 /* 1871 /*
@@ -1949,7 +1878,7 @@ static void gcwq_mayday_timeout(unsigned long __pool)
1949 send_mayday(work); 1878 send_mayday(work);
1950 } 1879 }
1951 1880
1952 spin_unlock_irq(&gcwq->lock); 1881 spin_unlock_irq(&pool->lock);
1953 1882
1954 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL); 1883 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
1955} 1884}
@@ -1968,24 +1897,22 @@ static void gcwq_mayday_timeout(unsigned long __pool)
1968 * may_start_working() true. 1897 * may_start_working() true.
1969 * 1898 *
1970 * LOCKING: 1899 * LOCKING:
1971 * spin_lock_irq(gcwq->lock) which may be released and regrabbed 1900 * spin_lock_irq(pool->lock) which may be released and regrabbed
1972 * multiple times. Does GFP_KERNEL allocations. Called only from 1901 * multiple times. Does GFP_KERNEL allocations. Called only from
1973 * manager. 1902 * manager.
1974 * 1903 *
1975 * RETURNS: 1904 * RETURNS:
1976 * false if no action was taken and gcwq->lock stayed locked, true 1905 * false if no action was taken and pool->lock stayed locked, true
1977 * otherwise. 1906 * otherwise.
1978 */ 1907 */
1979static bool maybe_create_worker(struct worker_pool *pool) 1908static bool maybe_create_worker(struct worker_pool *pool)
1980__releases(&gcwq->lock) 1909__releases(&pool->lock)
1981__acquires(&gcwq->lock) 1910__acquires(&pool->lock)
1982{ 1911{
1983 struct global_cwq *gcwq = pool->gcwq;
1984
1985 if (!need_to_create_worker(pool)) 1912 if (!need_to_create_worker(pool))
1986 return false; 1913 return false;
1987restart: 1914restart:
1988 spin_unlock_irq(&gcwq->lock); 1915 spin_unlock_irq(&pool->lock);
1989 1916
1990 /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */ 1917 /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
1991 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT); 1918 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
@@ -1996,7 +1923,7 @@ restart:
1996 worker = create_worker(pool); 1923 worker = create_worker(pool);
1997 if (worker) { 1924 if (worker) {
1998 del_timer_sync(&pool->mayday_timer); 1925 del_timer_sync(&pool->mayday_timer);
1999 spin_lock_irq(&gcwq->lock); 1926 spin_lock_irq(&pool->lock);
2000 start_worker(worker); 1927 start_worker(worker);
2001 BUG_ON(need_to_create_worker(pool)); 1928 BUG_ON(need_to_create_worker(pool));
2002 return true; 1929 return true;
@@ -2013,7 +1940,7 @@ restart:
2013 } 1940 }
2014 1941
2015 del_timer_sync(&pool->mayday_timer); 1942 del_timer_sync(&pool->mayday_timer);
2016 spin_lock_irq(&gcwq->lock); 1943 spin_lock_irq(&pool->lock);
2017 if (need_to_create_worker(pool)) 1944 if (need_to_create_worker(pool))
2018 goto restart; 1945 goto restart;
2019 return true; 1946 return true;
@@ -2027,11 +1954,11 @@ restart:
2027 * IDLE_WORKER_TIMEOUT. 1954 * IDLE_WORKER_TIMEOUT.
2028 * 1955 *
2029 * LOCKING: 1956 * LOCKING:
2030 * spin_lock_irq(gcwq->lock) which may be released and regrabbed 1957 * spin_lock_irq(pool->lock) which may be released and regrabbed
2031 * multiple times. Called only from manager. 1958 * multiple times. Called only from manager.
2032 * 1959 *
2033 * RETURNS: 1960 * RETURNS:
2034 * false if no action was taken and gcwq->lock stayed locked, true 1961 * false if no action was taken and pool->lock stayed locked, true
2035 * otherwise. 1962 * otherwise.
2036 */ 1963 */
2037static bool maybe_destroy_workers(struct worker_pool *pool) 1964static bool maybe_destroy_workers(struct worker_pool *pool)
@@ -2061,21 +1988,21 @@ static bool maybe_destroy_workers(struct worker_pool *pool)
2061 * manage_workers - manage worker pool 1988 * manage_workers - manage worker pool
2062 * @worker: self 1989 * @worker: self
2063 * 1990 *
2064 * Assume the manager role and manage gcwq worker pool @worker belongs 1991 * Assume the manager role and manage the worker pool @worker belongs
2065 * to. At any given time, there can be only zero or one manager per 1992 * to. At any given time, there can be only zero or one manager per
2066 * gcwq. The exclusion is handled automatically by this function. 1993 * pool. The exclusion is handled automatically by this function.
2067 * 1994 *
2068 * The caller can safely start processing works on false return. On 1995 * The caller can safely start processing works on false return. On
2069 * true return, it's guaranteed that need_to_create_worker() is false 1996 * true return, it's guaranteed that need_to_create_worker() is false
2070 * and may_start_working() is true. 1997 * and may_start_working() is true.
2071 * 1998 *
2072 * CONTEXT: 1999 * CONTEXT:
2073 * spin_lock_irq(gcwq->lock) which may be released and regrabbed 2000 * spin_lock_irq(pool->lock) which may be released and regrabbed
2074 * multiple times. Does GFP_KERNEL allocations. 2001 * multiple times. Does GFP_KERNEL allocations.
2075 * 2002 *
2076 * RETURNS: 2003 * RETURNS:
2077 * false if no action was taken and gcwq->lock stayed locked, true if 2004 * spin_lock_irq(pool->lock) which may be released and regrabbed
2078 * some action was taken. 2005 * multiple times. Does GFP_KERNEL allocations.
2079 */ 2006 */
2080static bool manage_workers(struct worker *worker) 2007static bool manage_workers(struct worker *worker)
2081{ 2008{
@@ -2097,20 +2024,20 @@ static bool manage_workers(struct worker *worker)
2097 * manager against CPU hotplug. 2024 * manager against CPU hotplug.
2098 * 2025 *
2099 * assoc_mutex would always be free unless CPU hotplug is in 2026 * assoc_mutex would always be free unless CPU hotplug is in
2100 * progress. trylock first without dropping @gcwq->lock. 2027 * progress. trylock first without dropping @pool->lock.
2101 */ 2028 */
2102 if (unlikely(!mutex_trylock(&pool->assoc_mutex))) { 2029 if (unlikely(!mutex_trylock(&pool->assoc_mutex))) {
2103 spin_unlock_irq(&pool->gcwq->lock); 2030 spin_unlock_irq(&pool->lock);
2104 mutex_lock(&pool->assoc_mutex); 2031 mutex_lock(&pool->assoc_mutex);
2105 /* 2032 /*
2106 * CPU hotplug could have happened while we were waiting 2033 * CPU hotplug could have happened while we were waiting
2107 * for assoc_mutex. Hotplug itself can't handle us 2034 * for assoc_mutex. Hotplug itself can't handle us
2108 * because manager isn't either on idle or busy list, and 2035 * because manager isn't either on idle or busy list, and
2109 * @gcwq's state and ours could have deviated. 2036 * @pool's state and ours could have deviated.
2110 * 2037 *
2111 * As hotplug is now excluded via assoc_mutex, we can 2038 * As hotplug is now excluded via assoc_mutex, we can
2112 * simply try to bind. It will succeed or fail depending 2039 * simply try to bind. It will succeed or fail depending
2113 * on @gcwq's current state. Try it and adjust 2040 * on @pool's current state. Try it and adjust
2114 * %WORKER_UNBOUND accordingly. 2041 * %WORKER_UNBOUND accordingly.
2115 */ 2042 */
2116 if (worker_maybe_bind_and_lock(worker)) 2043 if (worker_maybe_bind_and_lock(worker))
@@ -2147,18 +2074,15 @@ static bool manage_workers(struct worker *worker)
2147 * call this function to process a work. 2074 * call this function to process a work.
2148 * 2075 *
2149 * CONTEXT: 2076 * CONTEXT:
2150 * spin_lock_irq(gcwq->lock) which is released and regrabbed. 2077 * spin_lock_irq(pool->lock) which is released and regrabbed.
2151 */ 2078 */
2152static void process_one_work(struct worker *worker, struct work_struct *work) 2079static void process_one_work(struct worker *worker, struct work_struct *work)
2153__releases(&gcwq->lock) 2080__releases(&pool->lock)
2154__acquires(&gcwq->lock) 2081__acquires(&pool->lock)
2155{ 2082{
2156 struct cpu_workqueue_struct *cwq = get_work_cwq(work); 2083 struct pool_workqueue *pwq = get_work_pwq(work);
2157 struct worker_pool *pool = worker->pool; 2084 struct worker_pool *pool = worker->pool;
2158 struct global_cwq *gcwq = pool->gcwq; 2085 bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
2159 struct hlist_head *bwh = busy_worker_head(gcwq, work);
2160 bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
2161 work_func_t f = work->func;
2162 int work_color; 2086 int work_color;
2163 struct worker *collision; 2087 struct worker *collision;
2164#ifdef CONFIG_LOCKDEP 2088#ifdef CONFIG_LOCKDEP
@@ -2176,11 +2100,11 @@ __acquires(&gcwq->lock)
2176 /* 2100 /*
2177 * Ensure we're on the correct CPU. DISASSOCIATED test is 2101 * Ensure we're on the correct CPU. DISASSOCIATED test is
2178 * necessary to avoid spurious warnings from rescuers servicing the 2102 * necessary to avoid spurious warnings from rescuers servicing the
2179 * unbound or a disassociated gcwq. 2103 * unbound or a disassociated pool.
2180 */ 2104 */
2181 WARN_ON_ONCE(!(worker->flags & WORKER_UNBOUND) && 2105 WARN_ON_ONCE(!(worker->flags & WORKER_UNBOUND) &&
2182 !(gcwq->flags & GCWQ_DISASSOCIATED) && 2106 !(pool->flags & POOL_DISASSOCIATED) &&
2183 raw_smp_processor_id() != gcwq->cpu); 2107 raw_smp_processor_id() != pool->cpu);
2184 2108
2185 /* 2109 /*
2186 * A single work shouldn't be executed concurrently by 2110 * A single work shouldn't be executed concurrently by
@@ -2188,7 +2112,7 @@ __acquires(&gcwq->lock)
2188 * already processing the work. If so, defer the work to the 2112 * already processing the work. If so, defer the work to the
2189 * currently executing one. 2113 * currently executing one.
2190 */ 2114 */
2191 collision = __find_worker_executing_work(gcwq, bwh, work); 2115 collision = find_worker_executing_work(pool, work);
2192 if (unlikely(collision)) { 2116 if (unlikely(collision)) {
2193 move_linked_works(work, &collision->scheduled, NULL); 2117 move_linked_works(work, &collision->scheduled, NULL);
2194 return; 2118 return;
@@ -2196,9 +2120,10 @@ __acquires(&gcwq->lock)
2196 2120
2197 /* claim and dequeue */ 2121 /* claim and dequeue */
2198 debug_work_deactivate(work); 2122 debug_work_deactivate(work);
2199 hlist_add_head(&worker->hentry, bwh); 2123 hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
2200 worker->current_work = work; 2124 worker->current_work = work;
2201 worker->current_cwq = cwq; 2125 worker->current_func = work->func;
2126 worker->current_pwq = pwq;
2202 work_color = get_work_color(work); 2127 work_color = get_work_color(work);
2203 2128
2204 list_del_init(&work->entry); 2129 list_del_init(&work->entry);
@@ -2211,53 +2136,55 @@ __acquires(&gcwq->lock)
2211 worker_set_flags(worker, WORKER_CPU_INTENSIVE, true); 2136 worker_set_flags(worker, WORKER_CPU_INTENSIVE, true);
2212 2137
2213 /* 2138 /*
2214 * Unbound gcwq isn't concurrency managed and work items should be 2139 * Unbound pool isn't concurrency managed and work items should be
2215 * executed ASAP. Wake up another worker if necessary. 2140 * executed ASAP. Wake up another worker if necessary.
2216 */ 2141 */
2217 if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool)) 2142 if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool))
2218 wake_up_worker(pool); 2143 wake_up_worker(pool);
2219 2144
2220 /* 2145 /*
2221 * Record the last CPU and clear PENDING which should be the last 2146 * Record the last pool and clear PENDING which should be the last
2222 * update to @work. Also, do this inside @gcwq->lock so that 2147 * update to @work. Also, do this inside @pool->lock so that
2223 * PENDING and queued state changes happen together while IRQ is 2148 * PENDING and queued state changes happen together while IRQ is
2224 * disabled. 2149 * disabled.
2225 */ 2150 */
2226 set_work_cpu_and_clear_pending(work, gcwq->cpu); 2151 set_work_pool_and_clear_pending(work, pool->id);
2227 2152
2228 spin_unlock_irq(&gcwq->lock); 2153 spin_unlock_irq(&pool->lock);
2229 2154
2230 lock_map_acquire_read(&cwq->wq->lockdep_map); 2155 lock_map_acquire_read(&pwq->wq->lockdep_map);
2231 lock_map_acquire(&lockdep_map); 2156 lock_map_acquire(&lockdep_map);
2232 trace_workqueue_execute_start(work); 2157 trace_workqueue_execute_start(work);
2233 f(work); 2158 worker->current_func(work);
2234 /* 2159 /*
2235 * While we must be careful to not use "work" after this, the trace 2160 * While we must be careful to not use "work" after this, the trace
2236 * point will only record its address. 2161 * point will only record its address.
2237 */ 2162 */
2238 trace_workqueue_execute_end(work); 2163 trace_workqueue_execute_end(work);
2239 lock_map_release(&lockdep_map); 2164 lock_map_release(&lockdep_map);
2240 lock_map_release(&cwq->wq->lockdep_map); 2165 lock_map_release(&pwq->wq->lockdep_map);
2241 2166
2242 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { 2167 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
2243 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n" 2168 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2244 " last function: %pf\n", 2169 " last function: %pf\n",
2245 current->comm, preempt_count(), task_pid_nr(current), f); 2170 current->comm, preempt_count(), task_pid_nr(current),
2171 worker->current_func);
2246 debug_show_held_locks(current); 2172 debug_show_held_locks(current);
2247 dump_stack(); 2173 dump_stack();
2248 } 2174 }
2249 2175
2250 spin_lock_irq(&gcwq->lock); 2176 spin_lock_irq(&pool->lock);
2251 2177
2252 /* clear cpu intensive status */ 2178 /* clear cpu intensive status */
2253 if (unlikely(cpu_intensive)) 2179 if (unlikely(cpu_intensive))
2254 worker_clr_flags(worker, WORKER_CPU_INTENSIVE); 2180 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
2255 2181
2256 /* we're done with it, release */ 2182 /* we're done with it, release */
2257 hlist_del_init(&worker->hentry); 2183 hash_del(&worker->hentry);
2258 worker->current_work = NULL; 2184 worker->current_work = NULL;
2259 worker->current_cwq = NULL; 2185 worker->current_func = NULL;
2260 cwq_dec_nr_in_flight(cwq, work_color); 2186 worker->current_pwq = NULL;
2187 pwq_dec_nr_in_flight(pwq, work_color);
2261} 2188}
2262 2189
2263/** 2190/**
@@ -2269,7 +2196,7 @@ __acquires(&gcwq->lock)
2269 * fetches a work from the top and executes it. 2196 * fetches a work from the top and executes it.
2270 * 2197 *
2271 * CONTEXT: 2198 * CONTEXT:
2272 * spin_lock_irq(gcwq->lock) which may be released and regrabbed 2199 * spin_lock_irq(pool->lock) which may be released and regrabbed
2273 * multiple times. 2200 * multiple times.
2274 */ 2201 */
2275static void process_scheduled_works(struct worker *worker) 2202static void process_scheduled_works(struct worker *worker)
@@ -2285,8 +2212,8 @@ static void process_scheduled_works(struct worker *worker)
2285 * worker_thread - the worker thread function 2212 * worker_thread - the worker thread function
2286 * @__worker: self 2213 * @__worker: self
2287 * 2214 *
2288 * The gcwq worker thread function. There's a single dynamic pool of 2215 * The worker thread function. There are NR_CPU_WORKER_POOLS dynamic pools
2289 * these per each cpu. These workers process all works regardless of 2216 * of these per each cpu. These workers process all works regardless of
2290 * their specific target workqueue. The only exception is works which 2217 * their specific target workqueue. The only exception is works which
2291 * belong to workqueues with a rescuer which will be explained in 2218 * belong to workqueues with a rescuer which will be explained in
2292 * rescuer_thread(). 2219 * rescuer_thread().
@@ -2295,16 +2222,15 @@ static int worker_thread(void *__worker)
2295{ 2222{
2296 struct worker *worker = __worker; 2223 struct worker *worker = __worker;
2297 struct worker_pool *pool = worker->pool; 2224 struct worker_pool *pool = worker->pool;
2298 struct global_cwq *gcwq = pool->gcwq;
2299 2225
2300 /* tell the scheduler that this is a workqueue worker */ 2226 /* tell the scheduler that this is a workqueue worker */
2301 worker->task->flags |= PF_WQ_WORKER; 2227 worker->task->flags |= PF_WQ_WORKER;
2302woke_up: 2228woke_up:
2303 spin_lock_irq(&gcwq->lock); 2229 spin_lock_irq(&pool->lock);
2304 2230
2305 /* we are off idle list if destruction or rebind is requested */ 2231 /* we are off idle list if destruction or rebind is requested */
2306 if (unlikely(list_empty(&worker->entry))) { 2232 if (unlikely(list_empty(&worker->entry))) {
2307 spin_unlock_irq(&gcwq->lock); 2233 spin_unlock_irq(&pool->lock);
2308 2234
2309 /* if DIE is set, destruction is requested */ 2235 /* if DIE is set, destruction is requested */
2310 if (worker->flags & WORKER_DIE) { 2236 if (worker->flags & WORKER_DIE) {
@@ -2363,52 +2289,61 @@ sleep:
2363 goto recheck; 2289 goto recheck;
2364 2290
2365 /* 2291 /*
2366 * gcwq->lock is held and there's no work to process and no 2292 * pool->lock is held and there's no work to process and no need to
2367 * need to manage, sleep. Workers are woken up only while 2293 * manage, sleep. Workers are woken up only while holding
2368 * holding gcwq->lock or from local cpu, so setting the 2294 * pool->lock or from local cpu, so setting the current state
2369 * current state before releasing gcwq->lock is enough to 2295 * before releasing pool->lock is enough to prevent losing any
2370 * prevent losing any event. 2296 * event.
2371 */ 2297 */
2372 worker_enter_idle(worker); 2298 worker_enter_idle(worker);
2373 __set_current_state(TASK_INTERRUPTIBLE); 2299 __set_current_state(TASK_INTERRUPTIBLE);
2374 spin_unlock_irq(&gcwq->lock); 2300 spin_unlock_irq(&pool->lock);
2375 schedule(); 2301 schedule();
2376 goto woke_up; 2302 goto woke_up;
2377} 2303}
2378 2304
2379/** 2305/**
2380 * rescuer_thread - the rescuer thread function 2306 * rescuer_thread - the rescuer thread function
2381 * @__wq: the associated workqueue 2307 * @__rescuer: self
2382 * 2308 *
2383 * Workqueue rescuer thread function. There's one rescuer for each 2309 * Workqueue rescuer thread function. There's one rescuer for each
2384 * workqueue which has WQ_RESCUER set. 2310 * workqueue which has WQ_RESCUER set.
2385 * 2311 *
2386 * Regular work processing on a gcwq may block trying to create a new 2312 * Regular work processing on a pool may block trying to create a new
2387 * worker which uses GFP_KERNEL allocation which has slight chance of 2313 * worker which uses GFP_KERNEL allocation which has slight chance of
2388 * developing into deadlock if some works currently on the same queue 2314 * developing into deadlock if some works currently on the same queue
2389 * need to be processed to satisfy the GFP_KERNEL allocation. This is 2315 * need to be processed to satisfy the GFP_KERNEL allocation. This is
2390 * the problem rescuer solves. 2316 * the problem rescuer solves.
2391 * 2317 *
2392 * When such condition is possible, the gcwq summons rescuers of all 2318 * When such condition is possible, the pool summons rescuers of all
2393 * workqueues which have works queued on the gcwq and let them process 2319 * workqueues which have works queued on the pool and let them process
2394 * those works so that forward progress can be guaranteed. 2320 * those works so that forward progress can be guaranteed.
2395 * 2321 *
2396 * This should happen rarely. 2322 * This should happen rarely.
2397 */ 2323 */
2398static int rescuer_thread(void *__wq) 2324static int rescuer_thread(void *__rescuer)
2399{ 2325{
2400 struct workqueue_struct *wq = __wq; 2326 struct worker *rescuer = __rescuer;
2401 struct worker *rescuer = wq->rescuer; 2327 struct workqueue_struct *wq = rescuer->rescue_wq;
2402 struct list_head *scheduled = &rescuer->scheduled; 2328 struct list_head *scheduled = &rescuer->scheduled;
2403 bool is_unbound = wq->flags & WQ_UNBOUND; 2329 bool is_unbound = wq->flags & WQ_UNBOUND;
2404 unsigned int cpu; 2330 unsigned int cpu;
2405 2331
2406 set_user_nice(current, RESCUER_NICE_LEVEL); 2332 set_user_nice(current, RESCUER_NICE_LEVEL);
2333
2334 /*
2335 * Mark rescuer as worker too. As WORKER_PREP is never cleared, it
2336 * doesn't participate in concurrency management.
2337 */
2338 rescuer->task->flags |= PF_WQ_WORKER;
2407repeat: 2339repeat:
2408 set_current_state(TASK_INTERRUPTIBLE); 2340 set_current_state(TASK_INTERRUPTIBLE);
2409 2341
2410 if (kthread_should_stop()) 2342 if (kthread_should_stop()) {
2343 __set_current_state(TASK_RUNNING);
2344 rescuer->task->flags &= ~PF_WQ_WORKER;
2411 return 0; 2345 return 0;
2346 }
2412 2347
2413 /* 2348 /*
2414 * See whether any cpu is asking for help. Unbounded 2349 * See whether any cpu is asking for help. Unbounded
@@ -2416,9 +2351,8 @@ repeat:
2416 */ 2351 */
2417 for_each_mayday_cpu(cpu, wq->mayday_mask) { 2352 for_each_mayday_cpu(cpu, wq->mayday_mask) {
2418 unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu; 2353 unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu;
2419 struct cpu_workqueue_struct *cwq = get_cwq(tcpu, wq); 2354 struct pool_workqueue *pwq = get_pwq(tcpu, wq);
2420 struct worker_pool *pool = cwq->pool; 2355 struct worker_pool *pool = pwq->pool;
2421 struct global_cwq *gcwq = pool->gcwq;
2422 struct work_struct *work, *n; 2356 struct work_struct *work, *n;
2423 2357
2424 __set_current_state(TASK_RUNNING); 2358 __set_current_state(TASK_RUNNING);
@@ -2434,22 +2368,24 @@ repeat:
2434 */ 2368 */
2435 BUG_ON(!list_empty(&rescuer->scheduled)); 2369 BUG_ON(!list_empty(&rescuer->scheduled));
2436 list_for_each_entry_safe(work, n, &pool->worklist, entry) 2370 list_for_each_entry_safe(work, n, &pool->worklist, entry)
2437 if (get_work_cwq(work) == cwq) 2371 if (get_work_pwq(work) == pwq)
2438 move_linked_works(work, scheduled, &n); 2372 move_linked_works(work, scheduled, &n);
2439 2373
2440 process_scheduled_works(rescuer); 2374 process_scheduled_works(rescuer);
2441 2375
2442 /* 2376 /*
2443 * Leave this gcwq. If keep_working() is %true, notify a 2377 * Leave this pool. If keep_working() is %true, notify a
2444 * regular worker; otherwise, we end up with 0 concurrency 2378 * regular worker; otherwise, we end up with 0 concurrency
2445 * and stalling the execution. 2379 * and stalling the execution.
2446 */ 2380 */
2447 if (keep_working(pool)) 2381 if (keep_working(pool))
2448 wake_up_worker(pool); 2382 wake_up_worker(pool);
2449 2383
2450 spin_unlock_irq(&gcwq->lock); 2384 spin_unlock_irq(&pool->lock);
2451 } 2385 }
2452 2386
2387 /* rescuers should never participate in concurrency management */
2388 WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
2453 schedule(); 2389 schedule();
2454 goto repeat; 2390 goto repeat;
2455} 2391}
@@ -2467,7 +2403,7 @@ static void wq_barrier_func(struct work_struct *work)
2467 2403
2468/** 2404/**
2469 * insert_wq_barrier - insert a barrier work 2405 * insert_wq_barrier - insert a barrier work
2470 * @cwq: cwq to insert barrier into 2406 * @pwq: pwq to insert barrier into
2471 * @barr: wq_barrier to insert 2407 * @barr: wq_barrier to insert
2472 * @target: target work to attach @barr to 2408 * @target: target work to attach @barr to
2473 * @worker: worker currently executing @target, NULL if @target is not executing 2409 * @worker: worker currently executing @target, NULL if @target is not executing
@@ -2484,12 +2420,12 @@ static void wq_barrier_func(struct work_struct *work)
2484 * after a work with LINKED flag set. 2420 * after a work with LINKED flag set.
2485 * 2421 *
2486 * Note that when @worker is non-NULL, @target may be modified 2422 * Note that when @worker is non-NULL, @target may be modified
2487 * underneath us, so we can't reliably determine cwq from @target. 2423 * underneath us, so we can't reliably determine pwq from @target.
2488 * 2424 *
2489 * CONTEXT: 2425 * CONTEXT:
2490 * spin_lock_irq(gcwq->lock). 2426 * spin_lock_irq(pool->lock).
2491 */ 2427 */
2492static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, 2428static void insert_wq_barrier(struct pool_workqueue *pwq,
2493 struct wq_barrier *barr, 2429 struct wq_barrier *barr,
2494 struct work_struct *target, struct worker *worker) 2430 struct work_struct *target, struct worker *worker)
2495{ 2431{
@@ -2497,7 +2433,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
2497 unsigned int linked = 0; 2433 unsigned int linked = 0;
2498 2434
2499 /* 2435 /*
2500 * debugobject calls are safe here even with gcwq->lock locked 2436 * debugobject calls are safe here even with pool->lock locked
2501 * as we know for sure that this will not trigger any of the 2437 * as we know for sure that this will not trigger any of the
2502 * checks and call back into the fixup functions where we 2438 * checks and call back into the fixup functions where we
2503 * might deadlock. 2439 * might deadlock.
@@ -2522,23 +2458,23 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
2522 } 2458 }
2523 2459
2524 debug_work_activate(&barr->work); 2460 debug_work_activate(&barr->work);
2525 insert_work(cwq, &barr->work, head, 2461 insert_work(pwq, &barr->work, head,
2526 work_color_to_flags(WORK_NO_COLOR) | linked); 2462 work_color_to_flags(WORK_NO_COLOR) | linked);
2527} 2463}
2528 2464
2529/** 2465/**
2530 * flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing 2466 * flush_workqueue_prep_pwqs - prepare pwqs for workqueue flushing
2531 * @wq: workqueue being flushed 2467 * @wq: workqueue being flushed
2532 * @flush_color: new flush color, < 0 for no-op 2468 * @flush_color: new flush color, < 0 for no-op
2533 * @work_color: new work color, < 0 for no-op 2469 * @work_color: new work color, < 0 for no-op
2534 * 2470 *
2535 * Prepare cwqs for workqueue flushing. 2471 * Prepare pwqs for workqueue flushing.
2536 * 2472 *
2537 * If @flush_color is non-negative, flush_color on all cwqs should be 2473 * If @flush_color is non-negative, flush_color on all pwqs should be
2538 * -1. If no cwq has in-flight commands at the specified color, all 2474 * -1. If no pwq has in-flight commands at the specified color, all
2539 * cwq->flush_color's stay at -1 and %false is returned. If any cwq 2475 * pwq->flush_color's stay at -1 and %false is returned. If any pwq
2540 * has in flight commands, its cwq->flush_color is set to 2476 * has in flight commands, its pwq->flush_color is set to
2541 * @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq 2477 * @flush_color, @wq->nr_pwqs_to_flush is updated accordingly, pwq
2542 * wakeup logic is armed and %true is returned. 2478 * wakeup logic is armed and %true is returned.
2543 * 2479 *
2544 * The caller should have initialized @wq->first_flusher prior to 2480 * The caller should have initialized @wq->first_flusher prior to
@@ -2546,7 +2482,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
2546 * @flush_color is negative, no flush color update is done and %false 2482 * @flush_color is negative, no flush color update is done and %false
2547 * is returned. 2483 * is returned.
2548 * 2484 *
2549 * If @work_color is non-negative, all cwqs should have the same 2485 * If @work_color is non-negative, all pwqs should have the same
2550 * work_color which is previous to @work_color and all will be 2486 * work_color which is previous to @work_color and all will be
2551 * advanced to @work_color. 2487 * advanced to @work_color.
2552 * 2488 *
@@ -2557,42 +2493,42 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
2557 * %true if @flush_color >= 0 and there's something to flush. %false 2493 * %true if @flush_color >= 0 and there's something to flush. %false
2558 * otherwise. 2494 * otherwise.
2559 */ 2495 */
2560static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq, 2496static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
2561 int flush_color, int work_color) 2497 int flush_color, int work_color)
2562{ 2498{
2563 bool wait = false; 2499 bool wait = false;
2564 unsigned int cpu; 2500 unsigned int cpu;
2565 2501
2566 if (flush_color >= 0) { 2502 if (flush_color >= 0) {
2567 BUG_ON(atomic_read(&wq->nr_cwqs_to_flush)); 2503 BUG_ON(atomic_read(&wq->nr_pwqs_to_flush));
2568 atomic_set(&wq->nr_cwqs_to_flush, 1); 2504 atomic_set(&wq->nr_pwqs_to_flush, 1);
2569 } 2505 }
2570 2506
2571 for_each_cwq_cpu(cpu, wq) { 2507 for_each_pwq_cpu(cpu, wq) {
2572 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 2508 struct pool_workqueue *pwq = get_pwq(cpu, wq);
2573 struct global_cwq *gcwq = cwq->pool->gcwq; 2509 struct worker_pool *pool = pwq->pool;
2574 2510
2575 spin_lock_irq(&gcwq->lock); 2511 spin_lock_irq(&pool->lock);
2576 2512
2577 if (flush_color >= 0) { 2513 if (flush_color >= 0) {
2578 BUG_ON(cwq->flush_color != -1); 2514 BUG_ON(pwq->flush_color != -1);
2579 2515
2580 if (cwq->nr_in_flight[flush_color]) { 2516 if (pwq->nr_in_flight[flush_color]) {
2581 cwq->flush_color = flush_color; 2517 pwq->flush_color = flush_color;
2582 atomic_inc(&wq->nr_cwqs_to_flush); 2518 atomic_inc(&wq->nr_pwqs_to_flush);
2583 wait = true; 2519 wait = true;
2584 } 2520 }
2585 } 2521 }
2586 2522
2587 if (work_color >= 0) { 2523 if (work_color >= 0) {
2588 BUG_ON(work_color != work_next_color(cwq->work_color)); 2524 BUG_ON(work_color != work_next_color(pwq->work_color));
2589 cwq->work_color = work_color; 2525 pwq->work_color = work_color;
2590 } 2526 }
2591 2527
2592 spin_unlock_irq(&gcwq->lock); 2528 spin_unlock_irq(&pool->lock);
2593 } 2529 }
2594 2530
2595 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush)) 2531 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
2596 complete(&wq->first_flusher->done); 2532 complete(&wq->first_flusher->done);
2597 2533
2598 return wait; 2534 return wait;
@@ -2643,7 +2579,7 @@ void flush_workqueue(struct workqueue_struct *wq)
2643 2579
2644 wq->first_flusher = &this_flusher; 2580 wq->first_flusher = &this_flusher;
2645 2581
2646 if (!flush_workqueue_prep_cwqs(wq, wq->flush_color, 2582 if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
2647 wq->work_color)) { 2583 wq->work_color)) {
2648 /* nothing to flush, done */ 2584 /* nothing to flush, done */
2649 wq->flush_color = next_color; 2585 wq->flush_color = next_color;
@@ -2654,7 +2590,7 @@ void flush_workqueue(struct workqueue_struct *wq)
2654 /* wait in queue */ 2590 /* wait in queue */
2655 BUG_ON(wq->flush_color == this_flusher.flush_color); 2591 BUG_ON(wq->flush_color == this_flusher.flush_color);
2656 list_add_tail(&this_flusher.list, &wq->flusher_queue); 2592 list_add_tail(&this_flusher.list, &wq->flusher_queue);
2657 flush_workqueue_prep_cwqs(wq, -1, wq->work_color); 2593 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2658 } 2594 }
2659 } else { 2595 } else {
2660 /* 2596 /*
@@ -2721,7 +2657,7 @@ void flush_workqueue(struct workqueue_struct *wq)
2721 2657
2722 list_splice_tail_init(&wq->flusher_overflow, 2658 list_splice_tail_init(&wq->flusher_overflow,
2723 &wq->flusher_queue); 2659 &wq->flusher_queue);
2724 flush_workqueue_prep_cwqs(wq, -1, wq->work_color); 2660 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2725 } 2661 }
2726 2662
2727 if (list_empty(&wq->flusher_queue)) { 2663 if (list_empty(&wq->flusher_queue)) {
@@ -2731,7 +2667,7 @@ void flush_workqueue(struct workqueue_struct *wq)
2731 2667
2732 /* 2668 /*
2733 * Need to flush more colors. Make the next flusher 2669 * Need to flush more colors. Make the next flusher
2734 * the new first flusher and arm cwqs. 2670 * the new first flusher and arm pwqs.
2735 */ 2671 */
2736 BUG_ON(wq->flush_color == wq->work_color); 2672 BUG_ON(wq->flush_color == wq->work_color);
2737 BUG_ON(wq->flush_color != next->flush_color); 2673 BUG_ON(wq->flush_color != next->flush_color);
@@ -2739,7 +2675,7 @@ void flush_workqueue(struct workqueue_struct *wq)
2739 list_del_init(&next->list); 2675 list_del_init(&next->list);
2740 wq->first_flusher = next; 2676 wq->first_flusher = next;
2741 2677
2742 if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1)) 2678 if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
2743 break; 2679 break;
2744 2680
2745 /* 2681 /*
@@ -2782,13 +2718,13 @@ void drain_workqueue(struct workqueue_struct *wq)
2782reflush: 2718reflush:
2783 flush_workqueue(wq); 2719 flush_workqueue(wq);
2784 2720
2785 for_each_cwq_cpu(cpu, wq) { 2721 for_each_pwq_cpu(cpu, wq) {
2786 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 2722 struct pool_workqueue *pwq = get_pwq(cpu, wq);
2787 bool drained; 2723 bool drained;
2788 2724
2789 spin_lock_irq(&cwq->pool->gcwq->lock); 2725 spin_lock_irq(&pwq->pool->lock);
2790 drained = !cwq->nr_active && list_empty(&cwq->delayed_works); 2726 drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
2791 spin_unlock_irq(&cwq->pool->gcwq->lock); 2727 spin_unlock_irq(&pwq->pool->lock);
2792 2728
2793 if (drained) 2729 if (drained)
2794 continue; 2730 continue;
@@ -2810,34 +2746,29 @@ EXPORT_SYMBOL_GPL(drain_workqueue);
2810static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) 2746static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
2811{ 2747{
2812 struct worker *worker = NULL; 2748 struct worker *worker = NULL;
2813 struct global_cwq *gcwq; 2749 struct worker_pool *pool;
2814 struct cpu_workqueue_struct *cwq; 2750 struct pool_workqueue *pwq;
2815 2751
2816 might_sleep(); 2752 might_sleep();
2817 gcwq = get_work_gcwq(work); 2753 pool = get_work_pool(work);
2818 if (!gcwq) 2754 if (!pool)
2819 return false; 2755 return false;
2820 2756
2821 spin_lock_irq(&gcwq->lock); 2757 spin_lock_irq(&pool->lock);
2822 if (!list_empty(&work->entry)) { 2758 /* see the comment in try_to_grab_pending() with the same code */
2823 /* 2759 pwq = get_work_pwq(work);
2824 * See the comment near try_to_grab_pending()->smp_rmb(). 2760 if (pwq) {
2825 * If it was re-queued to a different gcwq under us, we 2761 if (unlikely(pwq->pool != pool))
2826 * are not going to wait.
2827 */
2828 smp_rmb();
2829 cwq = get_work_cwq(work);
2830 if (unlikely(!cwq || gcwq != cwq->pool->gcwq))
2831 goto already_gone; 2762 goto already_gone;
2832 } else { 2763 } else {
2833 worker = find_worker_executing_work(gcwq, work); 2764 worker = find_worker_executing_work(pool, work);
2834 if (!worker) 2765 if (!worker)
2835 goto already_gone; 2766 goto already_gone;
2836 cwq = worker->current_cwq; 2767 pwq = worker->current_pwq;
2837 } 2768 }
2838 2769
2839 insert_wq_barrier(cwq, barr, work, worker); 2770 insert_wq_barrier(pwq, barr, work, worker);
2840 spin_unlock_irq(&gcwq->lock); 2771 spin_unlock_irq(&pool->lock);
2841 2772
2842 /* 2773 /*
2843 * If @max_active is 1 or rescuer is in use, flushing another work 2774 * If @max_active is 1 or rescuer is in use, flushing another work
@@ -2845,15 +2776,15 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
2845 * flusher is not running on the same workqueue by verifying write 2776 * flusher is not running on the same workqueue by verifying write
2846 * access. 2777 * access.
2847 */ 2778 */
2848 if (cwq->wq->saved_max_active == 1 || cwq->wq->flags & WQ_RESCUER) 2779 if (pwq->wq->saved_max_active == 1 || pwq->wq->flags & WQ_RESCUER)
2849 lock_map_acquire(&cwq->wq->lockdep_map); 2780 lock_map_acquire(&pwq->wq->lockdep_map);
2850 else 2781 else
2851 lock_map_acquire_read(&cwq->wq->lockdep_map); 2782 lock_map_acquire_read(&pwq->wq->lockdep_map);
2852 lock_map_release(&cwq->wq->lockdep_map); 2783 lock_map_release(&pwq->wq->lockdep_map);
2853 2784
2854 return true; 2785 return true;
2855already_gone: 2786already_gone:
2856 spin_unlock_irq(&gcwq->lock); 2787 spin_unlock_irq(&pool->lock);
2857 return false; 2788 return false;
2858} 2789}
2859 2790
@@ -2949,8 +2880,7 @@ bool flush_delayed_work(struct delayed_work *dwork)
2949{ 2880{
2950 local_irq_disable(); 2881 local_irq_disable();
2951 if (del_timer_sync(&dwork->timer)) 2882 if (del_timer_sync(&dwork->timer))
2952 __queue_work(dwork->cpu, 2883 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
2953 get_work_cwq(&dwork->work)->wq, &dwork->work);
2954 local_irq_enable(); 2884 local_irq_enable();
2955 return flush_work(&dwork->work); 2885 return flush_work(&dwork->work);
2956} 2886}
@@ -2980,7 +2910,8 @@ bool cancel_delayed_work(struct delayed_work *dwork)
2980 if (unlikely(ret < 0)) 2910 if (unlikely(ret < 0))
2981 return false; 2911 return false;
2982 2912
2983 set_work_cpu_and_clear_pending(&dwork->work, work_cpu(&dwork->work)); 2913 set_work_pool_and_clear_pending(&dwork->work,
2914 get_work_pool_id(&dwork->work));
2984 local_irq_restore(flags); 2915 local_irq_restore(flags);
2985 return ret; 2916 return ret;
2986} 2917}
@@ -3159,46 +3090,46 @@ int keventd_up(void)
3159 return system_wq != NULL; 3090 return system_wq != NULL;
3160} 3091}
3161 3092
3162static int alloc_cwqs(struct workqueue_struct *wq) 3093static int alloc_pwqs(struct workqueue_struct *wq)
3163{ 3094{
3164 /* 3095 /*
3165 * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS. 3096 * pwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
3166 * Make sure that the alignment isn't lower than that of 3097 * Make sure that the alignment isn't lower than that of
3167 * unsigned long long. 3098 * unsigned long long.
3168 */ 3099 */
3169 const size_t size = sizeof(struct cpu_workqueue_struct); 3100 const size_t size = sizeof(struct pool_workqueue);
3170 const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS, 3101 const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
3171 __alignof__(unsigned long long)); 3102 __alignof__(unsigned long long));
3172 3103
3173 if (!(wq->flags & WQ_UNBOUND)) 3104 if (!(wq->flags & WQ_UNBOUND))
3174 wq->cpu_wq.pcpu = __alloc_percpu(size, align); 3105 wq->pool_wq.pcpu = __alloc_percpu(size, align);
3175 else { 3106 else {
3176 void *ptr; 3107 void *ptr;
3177 3108
3178 /* 3109 /*
3179 * Allocate enough room to align cwq and put an extra 3110 * Allocate enough room to align pwq and put an extra
3180 * pointer at the end pointing back to the originally 3111 * pointer at the end pointing back to the originally
3181 * allocated pointer which will be used for free. 3112 * allocated pointer which will be used for free.
3182 */ 3113 */
3183 ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL); 3114 ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL);
3184 if (ptr) { 3115 if (ptr) {
3185 wq->cpu_wq.single = PTR_ALIGN(ptr, align); 3116 wq->pool_wq.single = PTR_ALIGN(ptr, align);
3186 *(void **)(wq->cpu_wq.single + 1) = ptr; 3117 *(void **)(wq->pool_wq.single + 1) = ptr;
3187 } 3118 }
3188 } 3119 }
3189 3120
3190 /* just in case, make sure it's actually aligned */ 3121 /* just in case, make sure it's actually aligned */
3191 BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align)); 3122 BUG_ON(!IS_ALIGNED(wq->pool_wq.v, align));
3192 return wq->cpu_wq.v ? 0 : -ENOMEM; 3123 return wq->pool_wq.v ? 0 : -ENOMEM;
3193} 3124}
3194 3125
3195static void free_cwqs(struct workqueue_struct *wq) 3126static void free_pwqs(struct workqueue_struct *wq)
3196{ 3127{
3197 if (!(wq->flags & WQ_UNBOUND)) 3128 if (!(wq->flags & WQ_UNBOUND))
3198 free_percpu(wq->cpu_wq.pcpu); 3129 free_percpu(wq->pool_wq.pcpu);
3199 else if (wq->cpu_wq.single) { 3130 else if (wq->pool_wq.single) {
3200 /* the pointer to free is stored right after the cwq */ 3131 /* the pointer to free is stored right after the pwq */
3201 kfree(*(void **)(wq->cpu_wq.single + 1)); 3132 kfree(*(void **)(wq->pool_wq.single + 1));
3202 } 3133 }
3203} 3134}
3204 3135
@@ -3252,27 +3183,25 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
3252 wq->flags = flags; 3183 wq->flags = flags;
3253 wq->saved_max_active = max_active; 3184 wq->saved_max_active = max_active;
3254 mutex_init(&wq->flush_mutex); 3185 mutex_init(&wq->flush_mutex);
3255 atomic_set(&wq->nr_cwqs_to_flush, 0); 3186 atomic_set(&wq->nr_pwqs_to_flush, 0);
3256 INIT_LIST_HEAD(&wq->flusher_queue); 3187 INIT_LIST_HEAD(&wq->flusher_queue);
3257 INIT_LIST_HEAD(&wq->flusher_overflow); 3188 INIT_LIST_HEAD(&wq->flusher_overflow);
3258 3189
3259 lockdep_init_map(&wq->lockdep_map, lock_name, key, 0); 3190 lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
3260 INIT_LIST_HEAD(&wq->list); 3191 INIT_LIST_HEAD(&wq->list);
3261 3192
3262 if (alloc_cwqs(wq) < 0) 3193 if (alloc_pwqs(wq) < 0)
3263 goto err; 3194 goto err;
3264 3195
3265 for_each_cwq_cpu(cpu, wq) { 3196 for_each_pwq_cpu(cpu, wq) {
3266 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 3197 struct pool_workqueue *pwq = get_pwq(cpu, wq);
3267 struct global_cwq *gcwq = get_gcwq(cpu); 3198
3268 int pool_idx = (bool)(flags & WQ_HIGHPRI); 3199 BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
3269 3200 pwq->pool = get_std_worker_pool(cpu, flags & WQ_HIGHPRI);
3270 BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK); 3201 pwq->wq = wq;
3271 cwq->pool = &gcwq->pools[pool_idx]; 3202 pwq->flush_color = -1;
3272 cwq->wq = wq; 3203 pwq->max_active = max_active;
3273 cwq->flush_color = -1; 3204 INIT_LIST_HEAD(&pwq->delayed_works);
3274 cwq->max_active = max_active;
3275 INIT_LIST_HEAD(&cwq->delayed_works);
3276 } 3205 }
3277 3206
3278 if (flags & WQ_RESCUER) { 3207 if (flags & WQ_RESCUER) {
@@ -3285,7 +3214,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
3285 if (!rescuer) 3214 if (!rescuer)
3286 goto err; 3215 goto err;
3287 3216
3288 rescuer->task = kthread_create(rescuer_thread, wq, "%s", 3217 rescuer->rescue_wq = wq;
3218 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s",
3289 wq->name); 3219 wq->name);
3290 if (IS_ERR(rescuer->task)) 3220 if (IS_ERR(rescuer->task))
3291 goto err; 3221 goto err;
@@ -3302,8 +3232,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
3302 spin_lock(&workqueue_lock); 3232 spin_lock(&workqueue_lock);
3303 3233
3304 if (workqueue_freezing && wq->flags & WQ_FREEZABLE) 3234 if (workqueue_freezing && wq->flags & WQ_FREEZABLE)
3305 for_each_cwq_cpu(cpu, wq) 3235 for_each_pwq_cpu(cpu, wq)
3306 get_cwq(cpu, wq)->max_active = 0; 3236 get_pwq(cpu, wq)->max_active = 0;
3307 3237
3308 list_add(&wq->list, &workqueues); 3238 list_add(&wq->list, &workqueues);
3309 3239
@@ -3312,7 +3242,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
3312 return wq; 3242 return wq;
3313err: 3243err:
3314 if (wq) { 3244 if (wq) {
3315 free_cwqs(wq); 3245 free_pwqs(wq);
3316 free_mayday_mask(wq->mayday_mask); 3246 free_mayday_mask(wq->mayday_mask);
3317 kfree(wq->rescuer); 3247 kfree(wq->rescuer);
3318 kfree(wq); 3248 kfree(wq);
@@ -3343,14 +3273,14 @@ void destroy_workqueue(struct workqueue_struct *wq)
3343 spin_unlock(&workqueue_lock); 3273 spin_unlock(&workqueue_lock);
3344 3274
3345 /* sanity check */ 3275 /* sanity check */
3346 for_each_cwq_cpu(cpu, wq) { 3276 for_each_pwq_cpu(cpu, wq) {
3347 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 3277 struct pool_workqueue *pwq = get_pwq(cpu, wq);
3348 int i; 3278 int i;
3349 3279
3350 for (i = 0; i < WORK_NR_COLORS; i++) 3280 for (i = 0; i < WORK_NR_COLORS; i++)
3351 BUG_ON(cwq->nr_in_flight[i]); 3281 BUG_ON(pwq->nr_in_flight[i]);
3352 BUG_ON(cwq->nr_active); 3282 BUG_ON(pwq->nr_active);
3353 BUG_ON(!list_empty(&cwq->delayed_works)); 3283 BUG_ON(!list_empty(&pwq->delayed_works));
3354 } 3284 }
3355 3285
3356 if (wq->flags & WQ_RESCUER) { 3286 if (wq->flags & WQ_RESCUER) {
@@ -3359,29 +3289,29 @@ void destroy_workqueue(struct workqueue_struct *wq)
3359 kfree(wq->rescuer); 3289 kfree(wq->rescuer);
3360 } 3290 }
3361 3291
3362 free_cwqs(wq); 3292 free_pwqs(wq);
3363 kfree(wq); 3293 kfree(wq);
3364} 3294}
3365EXPORT_SYMBOL_GPL(destroy_workqueue); 3295EXPORT_SYMBOL_GPL(destroy_workqueue);
3366 3296
3367/** 3297/**
3368 * cwq_set_max_active - adjust max_active of a cwq 3298 * pwq_set_max_active - adjust max_active of a pwq
3369 * @cwq: target cpu_workqueue_struct 3299 * @pwq: target pool_workqueue
3370 * @max_active: new max_active value. 3300 * @max_active: new max_active value.
3371 * 3301 *
3372 * Set @cwq->max_active to @max_active and activate delayed works if 3302 * Set @pwq->max_active to @max_active and activate delayed works if
3373 * increased. 3303 * increased.
3374 * 3304 *
3375 * CONTEXT: 3305 * CONTEXT:
3376 * spin_lock_irq(gcwq->lock). 3306 * spin_lock_irq(pool->lock).
3377 */ 3307 */
3378static void cwq_set_max_active(struct cpu_workqueue_struct *cwq, int max_active) 3308static void pwq_set_max_active(struct pool_workqueue *pwq, int max_active)
3379{ 3309{
3380 cwq->max_active = max_active; 3310 pwq->max_active = max_active;
3381 3311
3382 while (!list_empty(&cwq->delayed_works) && 3312 while (!list_empty(&pwq->delayed_works) &&
3383 cwq->nr_active < cwq->max_active) 3313 pwq->nr_active < pwq->max_active)
3384 cwq_activate_first_delayed(cwq); 3314 pwq_activate_first_delayed(pwq);
3385} 3315}
3386 3316
3387/** 3317/**
@@ -3404,16 +3334,17 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
3404 3334
3405 wq->saved_max_active = max_active; 3335 wq->saved_max_active = max_active;
3406 3336
3407 for_each_cwq_cpu(cpu, wq) { 3337 for_each_pwq_cpu(cpu, wq) {
3408 struct global_cwq *gcwq = get_gcwq(cpu); 3338 struct pool_workqueue *pwq = get_pwq(cpu, wq);
3339 struct worker_pool *pool = pwq->pool;
3409 3340
3410 spin_lock_irq(&gcwq->lock); 3341 spin_lock_irq(&pool->lock);
3411 3342
3412 if (!(wq->flags & WQ_FREEZABLE) || 3343 if (!(wq->flags & WQ_FREEZABLE) ||
3413 !(gcwq->flags & GCWQ_FREEZING)) 3344 !(pool->flags & POOL_FREEZING))
3414 cwq_set_max_active(get_cwq(gcwq->cpu, wq), max_active); 3345 pwq_set_max_active(pwq, max_active);
3415 3346
3416 spin_unlock_irq(&gcwq->lock); 3347 spin_unlock_irq(&pool->lock);
3417 } 3348 }
3418 3349
3419 spin_unlock(&workqueue_lock); 3350 spin_unlock(&workqueue_lock);
@@ -3434,57 +3365,38 @@ EXPORT_SYMBOL_GPL(workqueue_set_max_active);
3434 */ 3365 */
3435bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq) 3366bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
3436{ 3367{
3437 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 3368 struct pool_workqueue *pwq = get_pwq(cpu, wq);
3438 3369
3439 return !list_empty(&cwq->delayed_works); 3370 return !list_empty(&pwq->delayed_works);
3440} 3371}
3441EXPORT_SYMBOL_GPL(workqueue_congested); 3372EXPORT_SYMBOL_GPL(workqueue_congested);
3442 3373
3443/** 3374/**
3444 * work_cpu - return the last known associated cpu for @work
3445 * @work: the work of interest
3446 *
3447 * RETURNS:
3448 * CPU number if @work was ever queued. WORK_CPU_NONE otherwise.
3449 */
3450unsigned int work_cpu(struct work_struct *work)
3451{
3452 struct global_cwq *gcwq = get_work_gcwq(work);
3453
3454 return gcwq ? gcwq->cpu : WORK_CPU_NONE;
3455}
3456EXPORT_SYMBOL_GPL(work_cpu);
3457
3458/**
3459 * work_busy - test whether a work is currently pending or running 3375 * work_busy - test whether a work is currently pending or running
3460 * @work: the work to be tested 3376 * @work: the work to be tested
3461 * 3377 *
3462 * Test whether @work is currently pending or running. There is no 3378 * Test whether @work is currently pending or running. There is no
3463 * synchronization around this function and the test result is 3379 * synchronization around this function and the test result is
3464 * unreliable and only useful as advisory hints or for debugging. 3380 * unreliable and only useful as advisory hints or for debugging.
3465 * Especially for reentrant wqs, the pending state might hide the
3466 * running state.
3467 * 3381 *
3468 * RETURNS: 3382 * RETURNS:
3469 * OR'd bitmask of WORK_BUSY_* bits. 3383 * OR'd bitmask of WORK_BUSY_* bits.
3470 */ 3384 */
3471unsigned int work_busy(struct work_struct *work) 3385unsigned int work_busy(struct work_struct *work)
3472{ 3386{
3473 struct global_cwq *gcwq = get_work_gcwq(work); 3387 struct worker_pool *pool = get_work_pool(work);
3474 unsigned long flags; 3388 unsigned long flags;
3475 unsigned int ret = 0; 3389 unsigned int ret = 0;
3476 3390
3477 if (!gcwq)
3478 return false;
3479
3480 spin_lock_irqsave(&gcwq->lock, flags);
3481
3482 if (work_pending(work)) 3391 if (work_pending(work))
3483 ret |= WORK_BUSY_PENDING; 3392 ret |= WORK_BUSY_PENDING;
3484 if (find_worker_executing_work(gcwq, work))
3485 ret |= WORK_BUSY_RUNNING;
3486 3393
3487 spin_unlock_irqrestore(&gcwq->lock, flags); 3394 if (pool) {
3395 spin_lock_irqsave(&pool->lock, flags);
3396 if (find_worker_executing_work(pool, work))
3397 ret |= WORK_BUSY_RUNNING;
3398 spin_unlock_irqrestore(&pool->lock, flags);
3399 }
3488 3400
3489 return ret; 3401 return ret;
3490} 3402}
@@ -3494,86 +3406,75 @@ EXPORT_SYMBOL_GPL(work_busy);
3494 * CPU hotplug. 3406 * CPU hotplug.
3495 * 3407 *
3496 * There are two challenges in supporting CPU hotplug. Firstly, there 3408 * There are two challenges in supporting CPU hotplug. Firstly, there
3497 * are a lot of assumptions on strong associations among work, cwq and 3409 * are a lot of assumptions on strong associations among work, pwq and
3498 * gcwq which make migrating pending and scheduled works very 3410 * pool which make migrating pending and scheduled works very
3499 * difficult to implement without impacting hot paths. Secondly, 3411 * difficult to implement without impacting hot paths. Secondly,
3500 * gcwqs serve mix of short, long and very long running works making 3412 * worker pools serve mix of short, long and very long running works making
3501 * blocked draining impractical. 3413 * blocked draining impractical.
3502 * 3414 *
3503 * This is solved by allowing a gcwq to be disassociated from the CPU 3415 * This is solved by allowing the pools to be disassociated from the CPU
3504 * running as an unbound one and allowing it to be reattached later if the 3416 * running as an unbound one and allowing it to be reattached later if the
3505 * cpu comes back online. 3417 * cpu comes back online.
3506 */ 3418 */
3507 3419
3508/* claim manager positions of all pools */ 3420static void wq_unbind_fn(struct work_struct *work)
3509static void gcwq_claim_assoc_and_lock(struct global_cwq *gcwq)
3510{
3511 struct worker_pool *pool;
3512
3513 for_each_worker_pool(pool, gcwq)
3514 mutex_lock_nested(&pool->assoc_mutex, pool - gcwq->pools);
3515 spin_lock_irq(&gcwq->lock);
3516}
3517
3518/* release manager positions */
3519static void gcwq_release_assoc_and_unlock(struct global_cwq *gcwq)
3520{
3521 struct worker_pool *pool;
3522
3523 spin_unlock_irq(&gcwq->lock);
3524 for_each_worker_pool(pool, gcwq)
3525 mutex_unlock(&pool->assoc_mutex);
3526}
3527
3528static void gcwq_unbind_fn(struct work_struct *work)
3529{ 3421{
3530 struct global_cwq *gcwq = get_gcwq(smp_processor_id()); 3422 int cpu = smp_processor_id();
3531 struct worker_pool *pool; 3423 struct worker_pool *pool;
3532 struct worker *worker; 3424 struct worker *worker;
3533 struct hlist_node *pos;
3534 int i; 3425 int i;
3535 3426
3536 BUG_ON(gcwq->cpu != smp_processor_id()); 3427 for_each_std_worker_pool(pool, cpu) {
3428 BUG_ON(cpu != smp_processor_id());
3537 3429
3538 gcwq_claim_assoc_and_lock(gcwq); 3430 mutex_lock(&pool->assoc_mutex);
3431 spin_lock_irq(&pool->lock);
3539 3432
3540 /* 3433 /*
3541 * We've claimed all manager positions. Make all workers unbound 3434 * We've claimed all manager positions. Make all workers
3542 * and set DISASSOCIATED. Before this, all workers except for the 3435 * unbound and set DISASSOCIATED. Before this, all workers
3543 * ones which are still executing works from before the last CPU 3436 * except for the ones which are still executing works from
3544 * down must be on the cpu. After this, they may become diasporas. 3437 * before the last CPU down must be on the cpu. After
3545 */ 3438 * this, they may become diasporas.
3546 for_each_worker_pool(pool, gcwq) 3439 */
3547 list_for_each_entry(worker, &pool->idle_list, entry) 3440 list_for_each_entry(worker, &pool->idle_list, entry)
3548 worker->flags |= WORKER_UNBOUND; 3441 worker->flags |= WORKER_UNBOUND;
3549 3442
3550 for_each_busy_worker(worker, i, pos, gcwq) 3443 for_each_busy_worker(worker, i, pool)
3551 worker->flags |= WORKER_UNBOUND; 3444 worker->flags |= WORKER_UNBOUND;
3552 3445
3553 gcwq->flags |= GCWQ_DISASSOCIATED; 3446 pool->flags |= POOL_DISASSOCIATED;
3554 3447
3555 gcwq_release_assoc_and_unlock(gcwq); 3448 spin_unlock_irq(&pool->lock);
3449 mutex_unlock(&pool->assoc_mutex);
3556 3450
3557 /* 3451 /*
3558 * Call schedule() so that we cross rq->lock and thus can guarantee 3452 * Call schedule() so that we cross rq->lock and thus can
3559 * sched callbacks see the %WORKER_UNBOUND flag. This is necessary 3453 * guarantee sched callbacks see the %WORKER_UNBOUND flag.
3560 * as scheduler callbacks may be invoked from other cpus. 3454 * This is necessary as scheduler callbacks may be invoked
3561 */ 3455 * from other cpus.
3562 schedule(); 3456 */
3457 schedule();
3563 3458
3564 /* 3459 /*
3565 * Sched callbacks are disabled now. Zap nr_running. After this, 3460 * Sched callbacks are disabled now. Zap nr_running.
3566 * nr_running stays zero and need_more_worker() and keep_working() 3461 * After this, nr_running stays zero and need_more_worker()
3567 * are always true as long as the worklist is not empty. @gcwq now 3462 * and keep_working() are always true as long as the
3568 * behaves as unbound (in terms of concurrency management) gcwq 3463 * worklist is not empty. This pool now behaves as an
3569 * which is served by workers tied to the CPU. 3464 * unbound (in terms of concurrency management) pool which
3570 * 3465 * are served by workers tied to the pool.
3571 * On return from this function, the current worker would trigger 3466 */
3572 * unbound chain execution of pending work items if other workers 3467 atomic_set(&pool->nr_running, 0);
3573 * didn't already. 3468
3574 */ 3469 /*
3575 for_each_worker_pool(pool, gcwq) 3470 * With concurrency management just turned off, a busy
3576 atomic_set(get_pool_nr_running(pool), 0); 3471 * worker blocking could lead to lengthy stalls. Kick off
3472 * unbound chain execution of currently pending work items.
3473 */
3474 spin_lock_irq(&pool->lock);
3475 wake_up_worker(pool);
3476 spin_unlock_irq(&pool->lock);
3477 }
3577} 3478}
3578 3479
3579/* 3480/*
@@ -3585,12 +3486,11 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
3585 void *hcpu) 3486 void *hcpu)
3586{ 3487{
3587 unsigned int cpu = (unsigned long)hcpu; 3488 unsigned int cpu = (unsigned long)hcpu;
3588 struct global_cwq *gcwq = get_gcwq(cpu);
3589 struct worker_pool *pool; 3489 struct worker_pool *pool;
3590 3490
3591 switch (action & ~CPU_TASKS_FROZEN) { 3491 switch (action & ~CPU_TASKS_FROZEN) {
3592 case CPU_UP_PREPARE: 3492 case CPU_UP_PREPARE:
3593 for_each_worker_pool(pool, gcwq) { 3493 for_each_std_worker_pool(pool, cpu) {
3594 struct worker *worker; 3494 struct worker *worker;
3595 3495
3596 if (pool->nr_workers) 3496 if (pool->nr_workers)
@@ -3600,18 +3500,24 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
3600 if (!worker) 3500 if (!worker)
3601 return NOTIFY_BAD; 3501 return NOTIFY_BAD;
3602 3502
3603 spin_lock_irq(&gcwq->lock); 3503 spin_lock_irq(&pool->lock);
3604 start_worker(worker); 3504 start_worker(worker);
3605 spin_unlock_irq(&gcwq->lock); 3505 spin_unlock_irq(&pool->lock);
3606 } 3506 }
3607 break; 3507 break;
3608 3508
3609 case CPU_DOWN_FAILED: 3509 case CPU_DOWN_FAILED:
3610 case CPU_ONLINE: 3510 case CPU_ONLINE:
3611 gcwq_claim_assoc_and_lock(gcwq); 3511 for_each_std_worker_pool(pool, cpu) {
3612 gcwq->flags &= ~GCWQ_DISASSOCIATED; 3512 mutex_lock(&pool->assoc_mutex);
3613 rebind_workers(gcwq); 3513 spin_lock_irq(&pool->lock);
3614 gcwq_release_assoc_and_unlock(gcwq); 3514
3515 pool->flags &= ~POOL_DISASSOCIATED;
3516 rebind_workers(pool);
3517
3518 spin_unlock_irq(&pool->lock);
3519 mutex_unlock(&pool->assoc_mutex);
3520 }
3615 break; 3521 break;
3616 } 3522 }
3617 return NOTIFY_OK; 3523 return NOTIFY_OK;
@@ -3631,7 +3537,7 @@ static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,
3631 switch (action & ~CPU_TASKS_FROZEN) { 3537 switch (action & ~CPU_TASKS_FROZEN) {
3632 case CPU_DOWN_PREPARE: 3538 case CPU_DOWN_PREPARE:
3633 /* unbinding should happen on the local CPU */ 3539 /* unbinding should happen on the local CPU */
3634 INIT_WORK_ONSTACK(&unbind_work, gcwq_unbind_fn); 3540 INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
3635 queue_work_on(cpu, system_highpri_wq, &unbind_work); 3541 queue_work_on(cpu, system_highpri_wq, &unbind_work);
3636 flush_work(&unbind_work); 3542 flush_work(&unbind_work);
3637 break; 3543 break;
@@ -3684,10 +3590,10 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
3684 * 3590 *
3685 * Start freezing workqueues. After this function returns, all freezable 3591 * Start freezing workqueues. After this function returns, all freezable
3686 * workqueues will queue new works to their frozen_works list instead of 3592 * workqueues will queue new works to their frozen_works list instead of
3687 * gcwq->worklist. 3593 * pool->worklist.
3688 * 3594 *
3689 * CONTEXT: 3595 * CONTEXT:
3690 * Grabs and releases workqueue_lock and gcwq->lock's. 3596 * Grabs and releases workqueue_lock and pool->lock's.
3691 */ 3597 */
3692void freeze_workqueues_begin(void) 3598void freeze_workqueues_begin(void)
3693{ 3599{
@@ -3698,23 +3604,26 @@ void freeze_workqueues_begin(void)
3698 BUG_ON(workqueue_freezing); 3604 BUG_ON(workqueue_freezing);
3699 workqueue_freezing = true; 3605 workqueue_freezing = true;
3700 3606
3701 for_each_gcwq_cpu(cpu) { 3607 for_each_wq_cpu(cpu) {
3702 struct global_cwq *gcwq = get_gcwq(cpu); 3608 struct worker_pool *pool;
3703 struct workqueue_struct *wq; 3609 struct workqueue_struct *wq;
3704 3610
3705 spin_lock_irq(&gcwq->lock); 3611 for_each_std_worker_pool(pool, cpu) {
3612 spin_lock_irq(&pool->lock);
3706 3613
3707 BUG_ON(gcwq->flags & GCWQ_FREEZING); 3614 WARN_ON_ONCE(pool->flags & POOL_FREEZING);
3708 gcwq->flags |= GCWQ_FREEZING; 3615 pool->flags |= POOL_FREEZING;
3709 3616
3710 list_for_each_entry(wq, &workqueues, list) { 3617 list_for_each_entry(wq, &workqueues, list) {
3711 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 3618 struct pool_workqueue *pwq = get_pwq(cpu, wq);
3712 3619
3713 if (cwq && wq->flags & WQ_FREEZABLE) 3620 if (pwq && pwq->pool == pool &&
3714 cwq->max_active = 0; 3621 (wq->flags & WQ_FREEZABLE))
3715 } 3622 pwq->max_active = 0;
3623 }
3716 3624
3717 spin_unlock_irq(&gcwq->lock); 3625 spin_unlock_irq(&pool->lock);
3626 }
3718 } 3627 }
3719 3628
3720 spin_unlock(&workqueue_lock); 3629 spin_unlock(&workqueue_lock);
@@ -3742,20 +3651,20 @@ bool freeze_workqueues_busy(void)
3742 3651
3743 BUG_ON(!workqueue_freezing); 3652 BUG_ON(!workqueue_freezing);
3744 3653
3745 for_each_gcwq_cpu(cpu) { 3654 for_each_wq_cpu(cpu) {
3746 struct workqueue_struct *wq; 3655 struct workqueue_struct *wq;
3747 /* 3656 /*
3748 * nr_active is monotonically decreasing. It's safe 3657 * nr_active is monotonically decreasing. It's safe
3749 * to peek without lock. 3658 * to peek without lock.
3750 */ 3659 */
3751 list_for_each_entry(wq, &workqueues, list) { 3660 list_for_each_entry(wq, &workqueues, list) {
3752 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 3661 struct pool_workqueue *pwq = get_pwq(cpu, wq);
3753 3662
3754 if (!cwq || !(wq->flags & WQ_FREEZABLE)) 3663 if (!pwq || !(wq->flags & WQ_FREEZABLE))
3755 continue; 3664 continue;
3756 3665
3757 BUG_ON(cwq->nr_active < 0); 3666 BUG_ON(pwq->nr_active < 0);
3758 if (cwq->nr_active) { 3667 if (pwq->nr_active) {
3759 busy = true; 3668 busy = true;
3760 goto out_unlock; 3669 goto out_unlock;
3761 } 3670 }
@@ -3770,10 +3679,10 @@ out_unlock:
3770 * thaw_workqueues - thaw workqueues 3679 * thaw_workqueues - thaw workqueues
3771 * 3680 *
3772 * Thaw workqueues. Normal queueing is restored and all collected 3681 * Thaw workqueues. Normal queueing is restored and all collected
3773 * frozen works are transferred to their respective gcwq worklists. 3682 * frozen works are transferred to their respective pool worklists.
3774 * 3683 *
3775 * CONTEXT: 3684 * CONTEXT:
3776 * Grabs and releases workqueue_lock and gcwq->lock's. 3685 * Grabs and releases workqueue_lock and pool->lock's.
3777 */ 3686 */
3778void thaw_workqueues(void) 3687void thaw_workqueues(void)
3779{ 3688{
@@ -3784,30 +3693,31 @@ void thaw_workqueues(void)
3784 if (!workqueue_freezing) 3693 if (!workqueue_freezing)
3785 goto out_unlock; 3694 goto out_unlock;
3786 3695
3787 for_each_gcwq_cpu(cpu) { 3696 for_each_wq_cpu(cpu) {
3788 struct global_cwq *gcwq = get_gcwq(cpu);
3789 struct worker_pool *pool; 3697 struct worker_pool *pool;
3790 struct workqueue_struct *wq; 3698 struct workqueue_struct *wq;
3791 3699
3792 spin_lock_irq(&gcwq->lock); 3700 for_each_std_worker_pool(pool, cpu) {
3701 spin_lock_irq(&pool->lock);
3793 3702
3794 BUG_ON(!(gcwq->flags & GCWQ_FREEZING)); 3703 WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
3795 gcwq->flags &= ~GCWQ_FREEZING; 3704 pool->flags &= ~POOL_FREEZING;
3796 3705
3797 list_for_each_entry(wq, &workqueues, list) { 3706 list_for_each_entry(wq, &workqueues, list) {
3798 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 3707 struct pool_workqueue *pwq = get_pwq(cpu, wq);
3799 3708
3800 if (!cwq || !(wq->flags & WQ_FREEZABLE)) 3709 if (!pwq || pwq->pool != pool ||
3801 continue; 3710 !(wq->flags & WQ_FREEZABLE))
3711 continue;
3802 3712
3803 /* restore max_active and repopulate worklist */ 3713 /* restore max_active and repopulate worklist */
3804 cwq_set_max_active(cwq, wq->saved_max_active); 3714 pwq_set_max_active(pwq, wq->saved_max_active);
3805 } 3715 }
3806 3716
3807 for_each_worker_pool(pool, gcwq)
3808 wake_up_worker(pool); 3717 wake_up_worker(pool);
3809 3718
3810 spin_unlock_irq(&gcwq->lock); 3719 spin_unlock_irq(&pool->lock);
3720 }
3811 } 3721 }
3812 3722
3813 workqueue_freezing = false; 3723 workqueue_freezing = false;
@@ -3819,60 +3729,56 @@ out_unlock:
3819static int __init init_workqueues(void) 3729static int __init init_workqueues(void)
3820{ 3730{
3821 unsigned int cpu; 3731 unsigned int cpu;
3822 int i;
3823 3732
3824 /* make sure we have enough bits for OFFQ CPU number */ 3733 /* make sure we have enough bits for OFFQ pool ID */
3825 BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT)) < 3734 BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
3826 WORK_CPU_LAST); 3735 WORK_CPU_END * NR_STD_WORKER_POOLS);
3827 3736
3828 cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP); 3737 cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
3829 hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN); 3738 hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
3830 3739
3831 /* initialize gcwqs */ 3740 /* initialize CPU pools */
3832 for_each_gcwq_cpu(cpu) { 3741 for_each_wq_cpu(cpu) {
3833 struct global_cwq *gcwq = get_gcwq(cpu);
3834 struct worker_pool *pool; 3742 struct worker_pool *pool;
3835 3743
3836 spin_lock_init(&gcwq->lock); 3744 for_each_std_worker_pool(pool, cpu) {
3837 gcwq->cpu = cpu; 3745 spin_lock_init(&pool->lock);
3838 gcwq->flags |= GCWQ_DISASSOCIATED; 3746 pool->cpu = cpu;
3839 3747 pool->flags |= POOL_DISASSOCIATED;
3840 for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
3841 INIT_HLIST_HEAD(&gcwq->busy_hash[i]);
3842
3843 for_each_worker_pool(pool, gcwq) {
3844 pool->gcwq = gcwq;
3845 INIT_LIST_HEAD(&pool->worklist); 3748 INIT_LIST_HEAD(&pool->worklist);
3846 INIT_LIST_HEAD(&pool->idle_list); 3749 INIT_LIST_HEAD(&pool->idle_list);
3750 hash_init(pool->busy_hash);
3847 3751
3848 init_timer_deferrable(&pool->idle_timer); 3752 init_timer_deferrable(&pool->idle_timer);
3849 pool->idle_timer.function = idle_worker_timeout; 3753 pool->idle_timer.function = idle_worker_timeout;
3850 pool->idle_timer.data = (unsigned long)pool; 3754 pool->idle_timer.data = (unsigned long)pool;
3851 3755
3852 setup_timer(&pool->mayday_timer, gcwq_mayday_timeout, 3756 setup_timer(&pool->mayday_timer, pool_mayday_timeout,
3853 (unsigned long)pool); 3757 (unsigned long)pool);
3854 3758
3855 mutex_init(&pool->assoc_mutex); 3759 mutex_init(&pool->assoc_mutex);
3856 ida_init(&pool->worker_ida); 3760 ida_init(&pool->worker_ida);
3761
3762 /* alloc pool ID */
3763 BUG_ON(worker_pool_assign_id(pool));
3857 } 3764 }
3858 } 3765 }
3859 3766
3860 /* create the initial worker */ 3767 /* create the initial worker */
3861 for_each_online_gcwq_cpu(cpu) { 3768 for_each_online_wq_cpu(cpu) {
3862 struct global_cwq *gcwq = get_gcwq(cpu);
3863 struct worker_pool *pool; 3769 struct worker_pool *pool;
3864 3770
3865 if (cpu != WORK_CPU_UNBOUND) 3771 for_each_std_worker_pool(pool, cpu) {
3866 gcwq->flags &= ~GCWQ_DISASSOCIATED;
3867
3868 for_each_worker_pool(pool, gcwq) {
3869 struct worker *worker; 3772 struct worker *worker;
3870 3773
3774 if (cpu != WORK_CPU_UNBOUND)
3775 pool->flags &= ~POOL_DISASSOCIATED;
3776
3871 worker = create_worker(pool); 3777 worker = create_worker(pool);
3872 BUG_ON(!worker); 3778 BUG_ON(!worker);
3873 spin_lock_irq(&gcwq->lock); 3779 spin_lock_irq(&pool->lock);
3874 start_worker(worker); 3780 start_worker(worker);
3875 spin_unlock_irq(&gcwq->lock); 3781 spin_unlock_irq(&pool->lock);
3876 } 3782 }
3877 } 3783 }
3878 3784