aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-02-20 01:01:33 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-20 01:01:33 -0500
commit67cb104b4c30bd52292b6a7f526349aab2dd5cbd (patch)
treeb7d2659f9c7bfe676016680339c0ffe47ef29afd
parent1eaec8212e35aef6606a4e8b40aa9ad9ba87672a (diff)
parent1438ade5670b56d5386c220e1ad4b5a824a1e585 (diff)
Merge branch 'for-3.9' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
Pull workqueue changes from Tejun Heo: "A lot of reorganization is going on mostly to prepare for worker pools with custom attributes so that workqueue can replace custom pool implementations in places including writeback and btrfs and make CPU assignment in crypto more flexible. workqueue evolved from purely per-cpu design and implementation, so there are a lot of assumptions regarding being bound to CPUs and even unbound workqueues are implemented as an extension of the model - workqueues running on the special unbound CPU. Bulk of changes this round are about promoting worker_pools as the top level abstraction replacing global_cwq (global cpu workqueue). At this point, I'm fairly confident about getting custom worker pools working pretty soon and ready for the next merge window. Lai's patches are replacing the convoluted mb() dancing workqueue has been doing with much simpler mechanism which only depends on assignment atomicity of long. For details, please read the commit message of 0b3dae68ac ("workqueue: simplify is-work-item-queued-here test"). While the change ends up adding one pointer to struct delayed_work, the inflation in percentage is less than five percent and it decouples delayed_work logic a lot more cleaner from usual work handling, removes the unusual memory barrier dancing, and allows for further simplification, so I think the trade-off is acceptable. There will be two more workqueue related pull requests and there are some shared commits among them. I'll write further pull requests assuming this pull request is pulled first." * 'for-3.9' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq: (37 commits) workqueue: un-GPL function delayed_work_timer_fn() workqueue: rename cpu_workqueue to pool_workqueue workqueue: reimplement is_chained_work() using current_wq_worker() workqueue: fix is_chained_work() regression workqueue: pick cwq instead of pool in __queue_work() workqueue: make get_work_pool_id() cheaper workqueue: move nr_running into worker_pool workqueue: cosmetic update in try_to_grab_pending() workqueue: simplify is-work-item-queued-here test workqueue: make work->data point to pool after try_to_grab_pending() workqueue: add delayed_work->wq to simplify reentrancy handling workqueue: make work_busy() test WORK_STRUCT_PENDING first workqueue: replace WORK_CPU_NONE/LAST with WORK_CPU_END workqueue: post global_cwq removal cleanups workqueue: rename nr_running variables workqueue: remove global_cwq workqueue: remove worker_pool->gcwq workqueue: replace for_each_worker_pool() with for_each_std_worker_pool() workqueue: make freezing/thawing per-pool workqueue: make hotplug processing per-pool ...
-rw-r--r--include/linux/async.h1
-rw-r--r--include/linux/workqueue.h35
-rw-r--r--include/trace/events/workqueue.h10
-rw-r--r--kernel/async.c14
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/workqueue.c1530
-rw-r--r--kernel/workqueue_internal.h65
-rw-r--r--kernel/workqueue_sched.h9
8 files changed, 818 insertions, 848 deletions
diff --git a/include/linux/async.h b/include/linux/async.h
index 7a24fe9b44b4..345169cfa304 100644
--- a/include/linux/async.h
+++ b/include/linux/async.h
@@ -52,4 +52,5 @@ extern void async_synchronize_full_domain(struct async_domain *domain);
52extern void async_synchronize_cookie(async_cookie_t cookie); 52extern void async_synchronize_cookie(async_cookie_t cookie);
53extern void async_synchronize_cookie_domain(async_cookie_t cookie, 53extern void async_synchronize_cookie_domain(async_cookie_t cookie,
54 struct async_domain *domain); 54 struct async_domain *domain);
55extern bool current_is_async(void);
55#endif 56#endif
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 2b58905d3504..8afab27cdbc2 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -27,7 +27,7 @@ void delayed_work_timer_fn(unsigned long __data);
27enum { 27enum {
28 WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */ 28 WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */
29 WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */ 29 WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */
30 WORK_STRUCT_CWQ_BIT = 2, /* data points to cwq */ 30 WORK_STRUCT_PWQ_BIT = 2, /* data points to pwq */
31 WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */ 31 WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */
32#ifdef CONFIG_DEBUG_OBJECTS_WORK 32#ifdef CONFIG_DEBUG_OBJECTS_WORK
33 WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */ 33 WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */
@@ -40,7 +40,7 @@ enum {
40 40
41 WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT, 41 WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT,
42 WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT, 42 WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT,
43 WORK_STRUCT_CWQ = 1 << WORK_STRUCT_CWQ_BIT, 43 WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT,
44 WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT, 44 WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT,
45#ifdef CONFIG_DEBUG_OBJECTS_WORK 45#ifdef CONFIG_DEBUG_OBJECTS_WORK
46 WORK_STRUCT_STATIC = 1 << WORK_STRUCT_STATIC_BIT, 46 WORK_STRUCT_STATIC = 1 << WORK_STRUCT_STATIC_BIT,
@@ -57,29 +57,36 @@ enum {
57 57
58 /* special cpu IDs */ 58 /* special cpu IDs */
59 WORK_CPU_UNBOUND = NR_CPUS, 59 WORK_CPU_UNBOUND = NR_CPUS,
60 WORK_CPU_NONE = NR_CPUS + 1, 60 WORK_CPU_END = NR_CPUS + 1,
61 WORK_CPU_LAST = WORK_CPU_NONE,
62 61
63 /* 62 /*
64 * Reserve 7 bits off of cwq pointer w/ debugobjects turned 63 * Reserve 7 bits off of pwq pointer w/ debugobjects turned off.
65 * off. This makes cwqs aligned to 256 bytes and allows 15 64 * This makes pwqs aligned to 256 bytes and allows 15 workqueue
66 * workqueue flush colors. 65 * flush colors.
67 */ 66 */
68 WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT + 67 WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT +
69 WORK_STRUCT_COLOR_BITS, 68 WORK_STRUCT_COLOR_BITS,
70 69
71 /* data contains off-queue information when !WORK_STRUCT_CWQ */ 70 /* data contains off-queue information when !WORK_STRUCT_PWQ */
72 WORK_OFFQ_FLAG_BASE = WORK_STRUCT_FLAG_BITS, 71 WORK_OFFQ_FLAG_BASE = WORK_STRUCT_FLAG_BITS,
73 72
74 WORK_OFFQ_CANCELING = (1 << WORK_OFFQ_FLAG_BASE), 73 WORK_OFFQ_CANCELING = (1 << WORK_OFFQ_FLAG_BASE),
75 74
75 /*
76 * When a work item is off queue, its high bits point to the last
77 * pool it was on. Cap at 31 bits and use the highest number to
78 * indicate that no pool is associated.
79 */
76 WORK_OFFQ_FLAG_BITS = 1, 80 WORK_OFFQ_FLAG_BITS = 1,
77 WORK_OFFQ_CPU_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS, 81 WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
82 WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT,
83 WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
84 WORK_OFFQ_POOL_NONE = (1LU << WORK_OFFQ_POOL_BITS) - 1,
78 85
79 /* convenience constants */ 86 /* convenience constants */
80 WORK_STRUCT_FLAG_MASK = (1UL << WORK_STRUCT_FLAG_BITS) - 1, 87 WORK_STRUCT_FLAG_MASK = (1UL << WORK_STRUCT_FLAG_BITS) - 1,
81 WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK, 88 WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
82 WORK_STRUCT_NO_CPU = (unsigned long)WORK_CPU_NONE << WORK_OFFQ_CPU_SHIFT, 89 WORK_STRUCT_NO_POOL = (unsigned long)WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT,
83 90
84 /* bit mask for work_busy() return values */ 91 /* bit mask for work_busy() return values */
85 WORK_BUSY_PENDING = 1 << 0, 92 WORK_BUSY_PENDING = 1 << 0,
@@ -95,13 +102,16 @@ struct work_struct {
95#endif 102#endif
96}; 103};
97 104
98#define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU) 105#define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL)
99#define WORK_DATA_STATIC_INIT() \ 106#define WORK_DATA_STATIC_INIT() \
100 ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU | WORK_STRUCT_STATIC) 107 ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC)
101 108
102struct delayed_work { 109struct delayed_work {
103 struct work_struct work; 110 struct work_struct work;
104 struct timer_list timer; 111 struct timer_list timer;
112
113 /* target workqueue and CPU ->timer uses to queue ->work */
114 struct workqueue_struct *wq;
105 int cpu; 115 int cpu;
106}; 116};
107 117
@@ -426,7 +436,6 @@ extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
426extern void workqueue_set_max_active(struct workqueue_struct *wq, 436extern void workqueue_set_max_active(struct workqueue_struct *wq,
427 int max_active); 437 int max_active);
428extern bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq); 438extern bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq);
429extern unsigned int work_cpu(struct work_struct *work);
430extern unsigned int work_busy(struct work_struct *work); 439extern unsigned int work_busy(struct work_struct *work);
431 440
432/* 441/*
diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index f28d1b65f178..bf0e18ba6cfb 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -27,7 +27,7 @@ DECLARE_EVENT_CLASS(workqueue_work,
27/** 27/**
28 * workqueue_queue_work - called when a work gets queued 28 * workqueue_queue_work - called when a work gets queued
29 * @req_cpu: the requested cpu 29 * @req_cpu: the requested cpu
30 * @cwq: pointer to struct cpu_workqueue_struct 30 * @pwq: pointer to struct pool_workqueue
31 * @work: pointer to struct work_struct 31 * @work: pointer to struct work_struct
32 * 32 *
33 * This event occurs when a work is queued immediately or once a 33 * This event occurs when a work is queued immediately or once a
@@ -36,10 +36,10 @@ DECLARE_EVENT_CLASS(workqueue_work,
36 */ 36 */
37TRACE_EVENT(workqueue_queue_work, 37TRACE_EVENT(workqueue_queue_work,
38 38
39 TP_PROTO(unsigned int req_cpu, struct cpu_workqueue_struct *cwq, 39 TP_PROTO(unsigned int req_cpu, struct pool_workqueue *pwq,
40 struct work_struct *work), 40 struct work_struct *work),
41 41
42 TP_ARGS(req_cpu, cwq, work), 42 TP_ARGS(req_cpu, pwq, work),
43 43
44 TP_STRUCT__entry( 44 TP_STRUCT__entry(
45 __field( void *, work ) 45 __field( void *, work )
@@ -52,9 +52,9 @@ TRACE_EVENT(workqueue_queue_work,
52 TP_fast_assign( 52 TP_fast_assign(
53 __entry->work = work; 53 __entry->work = work;
54 __entry->function = work->func; 54 __entry->function = work->func;
55 __entry->workqueue = cwq->wq; 55 __entry->workqueue = pwq->wq;
56 __entry->req_cpu = req_cpu; 56 __entry->req_cpu = req_cpu;
57 __entry->cpu = cwq->pool->gcwq->cpu; 57 __entry->cpu = pwq->pool->cpu;
58 ), 58 ),
59 59
60 TP_printk("work struct=%p function=%pf workqueue=%p req_cpu=%u cpu=%u", 60 TP_printk("work struct=%p function=%pf workqueue=%p req_cpu=%u cpu=%u",
diff --git a/kernel/async.c b/kernel/async.c
index 6f34904a0b53..6c68fc3fae7b 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -57,6 +57,8 @@ asynchronous and synchronous parts of the kernel.
57#include <linux/slab.h> 57#include <linux/slab.h>
58#include <linux/workqueue.h> 58#include <linux/workqueue.h>
59 59
60#include "workqueue_internal.h"
61
60static async_cookie_t next_cookie = 1; 62static async_cookie_t next_cookie = 1;
61 63
62#define MAX_WORK 32768 64#define MAX_WORK 32768
@@ -353,3 +355,15 @@ void async_synchronize_cookie(async_cookie_t cookie)
353 async_synchronize_cookie_domain(cookie, &async_running); 355 async_synchronize_cookie_domain(cookie, &async_running);
354} 356}
355EXPORT_SYMBOL_GPL(async_synchronize_cookie); 357EXPORT_SYMBOL_GPL(async_synchronize_cookie);
358
359/**
360 * current_is_async - is %current an async worker task?
361 *
362 * Returns %true if %current is an async worker task.
363 */
364bool current_is_async(void)
365{
366 struct worker *worker = current_wq_worker();
367
368 return worker && worker->current_func == async_run_entry_fn;
369}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4a88f1d51563..03d7784b7bd2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -83,7 +83,7 @@
83#endif 83#endif
84 84
85#include "sched.h" 85#include "sched.h"
86#include "../workqueue_sched.h" 86#include "../workqueue_internal.h"
87#include "../smpboot.h" 87#include "../smpboot.h"
88 88
89#define CREATE_TRACE_POINTS 89#define CREATE_TRACE_POINTS
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index fbc6576a83c3..f4feacad3812 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -41,32 +41,31 @@
41#include <linux/debug_locks.h> 41#include <linux/debug_locks.h>
42#include <linux/lockdep.h> 42#include <linux/lockdep.h>
43#include <linux/idr.h> 43#include <linux/idr.h>
44#include <linux/hashtable.h>
44 45
45#include "workqueue_sched.h" 46#include "workqueue_internal.h"
46 47
47enum { 48enum {
48 /* 49 /*
49 * global_cwq flags 50 * worker_pool flags
50 * 51 *
51 * A bound gcwq is either associated or disassociated with its CPU. 52 * A bound pool is either associated or disassociated with its CPU.
52 * While associated (!DISASSOCIATED), all workers are bound to the 53 * While associated (!DISASSOCIATED), all workers are bound to the
53 * CPU and none has %WORKER_UNBOUND set and concurrency management 54 * CPU and none has %WORKER_UNBOUND set and concurrency management
54 * is in effect. 55 * is in effect.
55 * 56 *
56 * While DISASSOCIATED, the cpu may be offline and all workers have 57 * While DISASSOCIATED, the cpu may be offline and all workers have
57 * %WORKER_UNBOUND set and concurrency management disabled, and may 58 * %WORKER_UNBOUND set and concurrency management disabled, and may
58 * be executing on any CPU. The gcwq behaves as an unbound one. 59 * be executing on any CPU. The pool behaves as an unbound one.
59 * 60 *
60 * Note that DISASSOCIATED can be flipped only while holding 61 * Note that DISASSOCIATED can be flipped only while holding
61 * assoc_mutex of all pools on the gcwq to avoid changing binding 62 * assoc_mutex to avoid changing binding state while
62 * state while create_worker() is in progress. 63 * create_worker() is in progress.
63 */ 64 */
64 GCWQ_DISASSOCIATED = 1 << 0, /* cpu can't serve workers */
65 GCWQ_FREEZING = 1 << 1, /* freeze in progress */
66
67 /* pool flags */
68 POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ 65 POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
69 POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */ 66 POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */
67 POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
68 POOL_FREEZING = 1 << 3, /* freeze in progress */
70 69
71 /* worker flags */ 70 /* worker flags */
72 WORKER_STARTED = 1 << 0, /* started */ 71 WORKER_STARTED = 1 << 0, /* started */
@@ -79,11 +78,9 @@ enum {
79 WORKER_NOT_RUNNING = WORKER_PREP | WORKER_UNBOUND | 78 WORKER_NOT_RUNNING = WORKER_PREP | WORKER_UNBOUND |
80 WORKER_CPU_INTENSIVE, 79 WORKER_CPU_INTENSIVE,
81 80
82 NR_WORKER_POOLS = 2, /* # worker pools per gcwq */ 81 NR_STD_WORKER_POOLS = 2, /* # standard pools per cpu */
83 82
84 BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */ 83 BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */
85 BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER,
86 BUSY_WORKER_HASH_MASK = BUSY_WORKER_HASH_SIZE - 1,
87 84
88 MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */ 85 MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */
89 IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */ 86 IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */
@@ -111,48 +108,24 @@ enum {
111 * P: Preemption protected. Disabling preemption is enough and should 108 * P: Preemption protected. Disabling preemption is enough and should
112 * only be modified and accessed from the local cpu. 109 * only be modified and accessed from the local cpu.
113 * 110 *
114 * L: gcwq->lock protected. Access with gcwq->lock held. 111 * L: pool->lock protected. Access with pool->lock held.
115 * 112 *
116 * X: During normal operation, modification requires gcwq->lock and 113 * X: During normal operation, modification requires pool->lock and should
117 * should be done only from local cpu. Either disabling preemption 114 * be done only from local cpu. Either disabling preemption on local
118 * on local cpu or grabbing gcwq->lock is enough for read access. 115 * cpu or grabbing pool->lock is enough for read access. If
119 * If GCWQ_DISASSOCIATED is set, it's identical to L. 116 * POOL_DISASSOCIATED is set, it's identical to L.
120 * 117 *
121 * F: wq->flush_mutex protected. 118 * F: wq->flush_mutex protected.
122 * 119 *
123 * W: workqueue_lock protected. 120 * W: workqueue_lock protected.
124 */ 121 */
125 122
126struct global_cwq; 123/* struct worker is defined in workqueue_internal.h */
127struct worker_pool;
128
129/*
130 * The poor guys doing the actual heavy lifting. All on-duty workers
131 * are either serving the manager role, on idle list or on busy hash.
132 */
133struct worker {
134 /* on idle list while idle, on busy hash table while busy */
135 union {
136 struct list_head entry; /* L: while idle */
137 struct hlist_node hentry; /* L: while busy */
138 };
139
140 struct work_struct *current_work; /* L: work being processed */
141 struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
142 struct list_head scheduled; /* L: scheduled works */
143 struct task_struct *task; /* I: worker task */
144 struct worker_pool *pool; /* I: the associated pool */
145 /* 64 bytes boundary on 64bit, 32 on 32bit */
146 unsigned long last_active; /* L: last active timestamp */
147 unsigned int flags; /* X: flags */
148 int id; /* I: worker id */
149
150 /* for rebinding worker to CPU */
151 struct work_struct rebind_work; /* L: for busy worker */
152};
153 124
154struct worker_pool { 125struct worker_pool {
155 struct global_cwq *gcwq; /* I: the owning gcwq */ 126 spinlock_t lock; /* the pool lock */
127 unsigned int cpu; /* I: the associated cpu */
128 int id; /* I: pool ID */
156 unsigned int flags; /* X: flags */ 129 unsigned int flags; /* X: flags */
157 130
158 struct list_head worklist; /* L: list of pending works */ 131 struct list_head worklist; /* L: list of pending works */
@@ -165,34 +138,28 @@ struct worker_pool {
165 struct timer_list idle_timer; /* L: worker idle timeout */ 138 struct timer_list idle_timer; /* L: worker idle timeout */
166 struct timer_list mayday_timer; /* L: SOS timer for workers */ 139 struct timer_list mayday_timer; /* L: SOS timer for workers */
167 140
168 struct mutex assoc_mutex; /* protect GCWQ_DISASSOCIATED */ 141 /* workers are chained either in busy_hash or idle_list */
169 struct ida worker_ida; /* L: for worker IDs */ 142 DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
170};
171
172/*
173 * Global per-cpu workqueue. There's one and only one for each cpu
174 * and all works are queued and processed here regardless of their
175 * target workqueues.
176 */
177struct global_cwq {
178 spinlock_t lock; /* the gcwq lock */
179 unsigned int cpu; /* I: the associated cpu */
180 unsigned int flags; /* L: GCWQ_* flags */
181
182 /* workers are chained either in busy_hash or pool idle_list */
183 struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE];
184 /* L: hash of busy workers */ 143 /* L: hash of busy workers */
185 144
186 struct worker_pool pools[NR_WORKER_POOLS]; 145 struct mutex assoc_mutex; /* protect POOL_DISASSOCIATED */
187 /* normal and highpri pools */ 146 struct ida worker_ida; /* L: for worker IDs */
147
148 /*
149 * The current concurrency level. As it's likely to be accessed
150 * from other CPUs during try_to_wake_up(), put it in a separate
151 * cacheline.
152 */
153 atomic_t nr_running ____cacheline_aligned_in_smp;
188} ____cacheline_aligned_in_smp; 154} ____cacheline_aligned_in_smp;
189 155
190/* 156/*
191 * The per-CPU workqueue. The lower WORK_STRUCT_FLAG_BITS of 157 * The per-pool workqueue. While queued, the lower WORK_STRUCT_FLAG_BITS
192 * work_struct->data are used for flags and thus cwqs need to be 158 * of work_struct->data are used for flags and the remaining high bits
193 * aligned at two's power of the number of flag bits. 159 * point to the pwq; thus, pwqs need to be aligned at two's power of the
160 * number of flag bits.
194 */ 161 */
195struct cpu_workqueue_struct { 162struct pool_workqueue {
196 struct worker_pool *pool; /* I: the associated pool */ 163 struct worker_pool *pool; /* I: the associated pool */
197 struct workqueue_struct *wq; /* I: the owning workqueue */ 164 struct workqueue_struct *wq; /* I: the owning workqueue */
198 int work_color; /* L: current color */ 165 int work_color; /* L: current color */
@@ -241,16 +208,16 @@ typedef unsigned long mayday_mask_t;
241struct workqueue_struct { 208struct workqueue_struct {
242 unsigned int flags; /* W: WQ_* flags */ 209 unsigned int flags; /* W: WQ_* flags */
243 union { 210 union {
244 struct cpu_workqueue_struct __percpu *pcpu; 211 struct pool_workqueue __percpu *pcpu;
245 struct cpu_workqueue_struct *single; 212 struct pool_workqueue *single;
246 unsigned long v; 213 unsigned long v;
247 } cpu_wq; /* I: cwq's */ 214 } pool_wq; /* I: pwq's */
248 struct list_head list; /* W: list of all workqueues */ 215 struct list_head list; /* W: list of all workqueues */
249 216
250 struct mutex flush_mutex; /* protects wq flushing */ 217 struct mutex flush_mutex; /* protects wq flushing */
251 int work_color; /* F: current work color */ 218 int work_color; /* F: current work color */
252 int flush_color; /* F: current flush color */ 219 int flush_color; /* F: current flush color */
253 atomic_t nr_cwqs_to_flush; /* flush in progress */ 220 atomic_t nr_pwqs_to_flush; /* flush in progress */
254 struct wq_flusher *first_flusher; /* F: first flusher */ 221 struct wq_flusher *first_flusher; /* F: first flusher */
255 struct list_head flusher_queue; /* F: flush waiters */ 222 struct list_head flusher_queue; /* F: flush waiters */
256 struct list_head flusher_overflow; /* F: flush overflow list */ 223 struct list_head flusher_overflow; /* F: flush overflow list */
@@ -259,7 +226,7 @@ struct workqueue_struct {
259 struct worker *rescuer; /* I: rescue worker */ 226 struct worker *rescuer; /* I: rescue worker */
260 227
261 int nr_drainers; /* W: drain in progress */ 228 int nr_drainers; /* W: drain in progress */
262 int saved_max_active; /* W: saved cwq max_active */ 229 int saved_max_active; /* W: saved pwq max_active */
263#ifdef CONFIG_LOCKDEP 230#ifdef CONFIG_LOCKDEP
264 struct lockdep_map lockdep_map; 231 struct lockdep_map lockdep_map;
265#endif 232#endif
@@ -280,16 +247,15 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
280#define CREATE_TRACE_POINTS 247#define CREATE_TRACE_POINTS
281#include <trace/events/workqueue.h> 248#include <trace/events/workqueue.h>
282 249
283#define for_each_worker_pool(pool, gcwq) \ 250#define for_each_std_worker_pool(pool, cpu) \
284 for ((pool) = &(gcwq)->pools[0]; \ 251 for ((pool) = &std_worker_pools(cpu)[0]; \
285 (pool) < &(gcwq)->pools[NR_WORKER_POOLS]; (pool)++) 252 (pool) < &std_worker_pools(cpu)[NR_STD_WORKER_POOLS]; (pool)++)
286 253
287#define for_each_busy_worker(worker, i, pos, gcwq) \ 254#define for_each_busy_worker(worker, i, pos, pool) \
288 for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \ 255 hash_for_each(pool->busy_hash, i, pos, worker, hentry)
289 hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry)
290 256
291static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask, 257static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
292 unsigned int sw) 258 unsigned int sw)
293{ 259{
294 if (cpu < nr_cpu_ids) { 260 if (cpu < nr_cpu_ids) {
295 if (sw & 1) { 261 if (sw & 1) {
@@ -300,42 +266,42 @@ static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask,
300 if (sw & 2) 266 if (sw & 2)
301 return WORK_CPU_UNBOUND; 267 return WORK_CPU_UNBOUND;
302 } 268 }
303 return WORK_CPU_NONE; 269 return WORK_CPU_END;
304} 270}
305 271
306static inline int __next_wq_cpu(int cpu, const struct cpumask *mask, 272static inline int __next_pwq_cpu(int cpu, const struct cpumask *mask,
307 struct workqueue_struct *wq) 273 struct workqueue_struct *wq)
308{ 274{
309 return __next_gcwq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2); 275 return __next_wq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2);
310} 276}
311 277
312/* 278/*
313 * CPU iterators 279 * CPU iterators
314 * 280 *
315 * An extra gcwq is defined for an invalid cpu number 281 * An extra cpu number is defined using an invalid cpu number
316 * (WORK_CPU_UNBOUND) to host workqueues which are not bound to any 282 * (WORK_CPU_UNBOUND) to host workqueues which are not bound to any
317 * specific CPU. The following iterators are similar to 283 * specific CPU. The following iterators are similar to for_each_*_cpu()
318 * for_each_*_cpu() iterators but also considers the unbound gcwq. 284 * iterators but also considers the unbound CPU.
319 * 285 *
320 * for_each_gcwq_cpu() : possible CPUs + WORK_CPU_UNBOUND 286 * for_each_wq_cpu() : possible CPUs + WORK_CPU_UNBOUND
321 * for_each_online_gcwq_cpu() : online CPUs + WORK_CPU_UNBOUND 287 * for_each_online_wq_cpu() : online CPUs + WORK_CPU_UNBOUND
322 * for_each_cwq_cpu() : possible CPUs for bound workqueues, 288 * for_each_pwq_cpu() : possible CPUs for bound workqueues,
323 * WORK_CPU_UNBOUND for unbound workqueues 289 * WORK_CPU_UNBOUND for unbound workqueues
324 */ 290 */
325#define for_each_gcwq_cpu(cpu) \ 291#define for_each_wq_cpu(cpu) \
326 for ((cpu) = __next_gcwq_cpu(-1, cpu_possible_mask, 3); \ 292 for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, 3); \
327 (cpu) < WORK_CPU_NONE; \ 293 (cpu) < WORK_CPU_END; \
328 (cpu) = __next_gcwq_cpu((cpu), cpu_possible_mask, 3)) 294 (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, 3))
329 295
330#define for_each_online_gcwq_cpu(cpu) \ 296#define for_each_online_wq_cpu(cpu) \
331 for ((cpu) = __next_gcwq_cpu(-1, cpu_online_mask, 3); \ 297 for ((cpu) = __next_wq_cpu(-1, cpu_online_mask, 3); \
332 (cpu) < WORK_CPU_NONE; \ 298 (cpu) < WORK_CPU_END; \
333 (cpu) = __next_gcwq_cpu((cpu), cpu_online_mask, 3)) 299 (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3))
334 300
335#define for_each_cwq_cpu(cpu, wq) \ 301#define for_each_pwq_cpu(cpu, wq) \
336 for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, (wq)); \ 302 for ((cpu) = __next_pwq_cpu(-1, cpu_possible_mask, (wq)); \
337 (cpu) < WORK_CPU_NONE; \ 303 (cpu) < WORK_CPU_END; \
338 (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq))) 304 (cpu) = __next_pwq_cpu((cpu), cpu_possible_mask, (wq)))
339 305
340#ifdef CONFIG_DEBUG_OBJECTS_WORK 306#ifdef CONFIG_DEBUG_OBJECTS_WORK
341 307
@@ -459,57 +425,69 @@ static LIST_HEAD(workqueues);
459static bool workqueue_freezing; /* W: have wqs started freezing? */ 425static bool workqueue_freezing; /* W: have wqs started freezing? */
460 426
461/* 427/*
462 * The almighty global cpu workqueues. nr_running is the only field 428 * The CPU and unbound standard worker pools. The unbound ones have
463 * which is expected to be used frequently by other cpus via 429 * POOL_DISASSOCIATED set, and their workers have WORKER_UNBOUND set.
464 * try_to_wake_up(). Put it in a separate cacheline.
465 */ 430 */
466static DEFINE_PER_CPU(struct global_cwq, global_cwq); 431static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
467static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, pool_nr_running[NR_WORKER_POOLS]); 432 cpu_std_worker_pools);
433static struct worker_pool unbound_std_worker_pools[NR_STD_WORKER_POOLS];
468 434
469/* 435/* idr of all pools */
470 * Global cpu workqueue and nr_running counter for unbound gcwq. The 436static DEFINE_MUTEX(worker_pool_idr_mutex);
471 * gcwq is always online, has GCWQ_DISASSOCIATED set, and all its 437static DEFINE_IDR(worker_pool_idr);
472 * workers have WORKER_UNBOUND set.
473 */
474static struct global_cwq unbound_global_cwq;
475static atomic_t unbound_pool_nr_running[NR_WORKER_POOLS] = {
476 [0 ... NR_WORKER_POOLS - 1] = ATOMIC_INIT(0), /* always 0 */
477};
478 438
479static int worker_thread(void *__worker); 439static int worker_thread(void *__worker);
480 440
481static int worker_pool_pri(struct worker_pool *pool) 441static struct worker_pool *std_worker_pools(int cpu)
482{ 442{
483 return pool - pool->gcwq->pools; 443 if (cpu != WORK_CPU_UNBOUND)
444 return per_cpu(cpu_std_worker_pools, cpu);
445 else
446 return unbound_std_worker_pools;
484} 447}
485 448
486static struct global_cwq *get_gcwq(unsigned int cpu) 449static int std_worker_pool_pri(struct worker_pool *pool)
487{ 450{
488 if (cpu != WORK_CPU_UNBOUND) 451 return pool - std_worker_pools(pool->cpu);
489 return &per_cpu(global_cwq, cpu);
490 else
491 return &unbound_global_cwq;
492} 452}
493 453
494static atomic_t *get_pool_nr_running(struct worker_pool *pool) 454/* allocate ID and assign it to @pool */
455static int worker_pool_assign_id(struct worker_pool *pool)
495{ 456{
496 int cpu = pool->gcwq->cpu; 457 int ret;
497 int idx = worker_pool_pri(pool);
498 458
499 if (cpu != WORK_CPU_UNBOUND) 459 mutex_lock(&worker_pool_idr_mutex);
500 return &per_cpu(pool_nr_running, cpu)[idx]; 460 idr_pre_get(&worker_pool_idr, GFP_KERNEL);
501 else 461 ret = idr_get_new(&worker_pool_idr, pool, &pool->id);
502 return &unbound_pool_nr_running[idx]; 462 mutex_unlock(&worker_pool_idr_mutex);
463
464 return ret;
503} 465}
504 466
505static struct cpu_workqueue_struct *get_cwq(unsigned int cpu, 467/*
506 struct workqueue_struct *wq) 468 * Lookup worker_pool by id. The idr currently is built during boot and
469 * never modified. Don't worry about locking for now.
470 */
471static struct worker_pool *worker_pool_by_id(int pool_id)
472{
473 return idr_find(&worker_pool_idr, pool_id);
474}
475
476static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
477{
478 struct worker_pool *pools = std_worker_pools(cpu);
479
480 return &pools[highpri];
481}
482
483static struct pool_workqueue *get_pwq(unsigned int cpu,
484 struct workqueue_struct *wq)
507{ 485{
508 if (!(wq->flags & WQ_UNBOUND)) { 486 if (!(wq->flags & WQ_UNBOUND)) {
509 if (likely(cpu < nr_cpu_ids)) 487 if (likely(cpu < nr_cpu_ids))
510 return per_cpu_ptr(wq->cpu_wq.pcpu, cpu); 488 return per_cpu_ptr(wq->pool_wq.pcpu, cpu);
511 } else if (likely(cpu == WORK_CPU_UNBOUND)) 489 } else if (likely(cpu == WORK_CPU_UNBOUND))
512 return wq->cpu_wq.single; 490 return wq->pool_wq.single;
513 return NULL; 491 return NULL;
514} 492}
515 493
@@ -530,19 +508,19 @@ static int work_next_color(int color)
530} 508}
531 509
532/* 510/*
533 * While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data 511 * While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data
534 * contain the pointer to the queued cwq. Once execution starts, the flag 512 * contain the pointer to the queued pwq. Once execution starts, the flag
535 * is cleared and the high bits contain OFFQ flags and CPU number. 513 * is cleared and the high bits contain OFFQ flags and pool ID.
536 * 514 *
537 * set_work_cwq(), set_work_cpu_and_clear_pending(), mark_work_canceling() 515 * set_work_pwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
538 * and clear_work_data() can be used to set the cwq, cpu or clear 516 * and clear_work_data() can be used to set the pwq, pool or clear
539 * work->data. These functions should only be called while the work is 517 * work->data. These functions should only be called while the work is
540 * owned - ie. while the PENDING bit is set. 518 * owned - ie. while the PENDING bit is set.
541 * 519 *
542 * get_work_[g]cwq() can be used to obtain the gcwq or cwq corresponding to 520 * get_work_pool() and get_work_pwq() can be used to obtain the pool or pwq
543 * a work. gcwq is available once the work has been queued anywhere after 521 * corresponding to a work. Pool is available once the work has been
544 * initialization until it is sync canceled. cwq is available only while 522 * queued anywhere after initialization until it is sync canceled. pwq is
545 * the work item is queued. 523 * available only while the work item is queued.
546 * 524 *
547 * %WORK_OFFQ_CANCELING is used to mark a work item which is being 525 * %WORK_OFFQ_CANCELING is used to mark a work item which is being
548 * canceled. While being canceled, a work item may have its PENDING set 526 * canceled. While being canceled, a work item may have its PENDING set
@@ -556,16 +534,22 @@ static inline void set_work_data(struct work_struct *work, unsigned long data,
556 atomic_long_set(&work->data, data | flags | work_static(work)); 534 atomic_long_set(&work->data, data | flags | work_static(work));
557} 535}
558 536
559static void set_work_cwq(struct work_struct *work, 537static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
560 struct cpu_workqueue_struct *cwq,
561 unsigned long extra_flags) 538 unsigned long extra_flags)
562{ 539{
563 set_work_data(work, (unsigned long)cwq, 540 set_work_data(work, (unsigned long)pwq,
564 WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags); 541 WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
565} 542}
566 543
567static void set_work_cpu_and_clear_pending(struct work_struct *work, 544static void set_work_pool_and_keep_pending(struct work_struct *work,
568 unsigned int cpu) 545 int pool_id)
546{
547 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
548 WORK_STRUCT_PENDING);
549}
550
551static void set_work_pool_and_clear_pending(struct work_struct *work,
552 int pool_id)
569{ 553{
570 /* 554 /*
571 * The following wmb is paired with the implied mb in 555 * The following wmb is paired with the implied mb in
@@ -574,67 +558,92 @@ static void set_work_cpu_and_clear_pending(struct work_struct *work,
574 * owner. 558 * owner.
575 */ 559 */
576 smp_wmb(); 560 smp_wmb();
577 set_work_data(work, (unsigned long)cpu << WORK_OFFQ_CPU_SHIFT, 0); 561 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
578} 562}
579 563
580static void clear_work_data(struct work_struct *work) 564static void clear_work_data(struct work_struct *work)
581{ 565{
582 smp_wmb(); /* see set_work_cpu_and_clear_pending() */ 566 smp_wmb(); /* see set_work_pool_and_clear_pending() */
583 set_work_data(work, WORK_STRUCT_NO_CPU, 0); 567 set_work_data(work, WORK_STRUCT_NO_POOL, 0);
584} 568}
585 569
586static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work) 570static struct pool_workqueue *get_work_pwq(struct work_struct *work)
587{ 571{
588 unsigned long data = atomic_long_read(&work->data); 572 unsigned long data = atomic_long_read(&work->data);
589 573
590 if (data & WORK_STRUCT_CWQ) 574 if (data & WORK_STRUCT_PWQ)
591 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK); 575 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
592 else 576 else
593 return NULL; 577 return NULL;
594} 578}
595 579
596static struct global_cwq *get_work_gcwq(struct work_struct *work) 580/**
581 * get_work_pool - return the worker_pool a given work was associated with
582 * @work: the work item of interest
583 *
584 * Return the worker_pool @work was last associated with. %NULL if none.
585 */
586static struct worker_pool *get_work_pool(struct work_struct *work)
597{ 587{
598 unsigned long data = atomic_long_read(&work->data); 588 unsigned long data = atomic_long_read(&work->data);
599 unsigned int cpu; 589 struct worker_pool *pool;
590 int pool_id;
600 591
601 if (data & WORK_STRUCT_CWQ) 592 if (data & WORK_STRUCT_PWQ)
602 return ((struct cpu_workqueue_struct *) 593 return ((struct pool_workqueue *)
603 (data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq; 594 (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
604 595
605 cpu = data >> WORK_OFFQ_CPU_SHIFT; 596 pool_id = data >> WORK_OFFQ_POOL_SHIFT;
606 if (cpu == WORK_CPU_NONE) 597 if (pool_id == WORK_OFFQ_POOL_NONE)
607 return NULL; 598 return NULL;
608 599
609 BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND); 600 pool = worker_pool_by_id(pool_id);
610 return get_gcwq(cpu); 601 WARN_ON_ONCE(!pool);
602 return pool;
603}
604
605/**
606 * get_work_pool_id - return the worker pool ID a given work is associated with
607 * @work: the work item of interest
608 *
609 * Return the worker_pool ID @work was last associated with.
610 * %WORK_OFFQ_POOL_NONE if none.
611 */
612static int get_work_pool_id(struct work_struct *work)
613{
614 unsigned long data = atomic_long_read(&work->data);
615
616 if (data & WORK_STRUCT_PWQ)
617 return ((struct pool_workqueue *)
618 (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
619
620 return data >> WORK_OFFQ_POOL_SHIFT;
611} 621}
612 622
613static void mark_work_canceling(struct work_struct *work) 623static void mark_work_canceling(struct work_struct *work)
614{ 624{
615 struct global_cwq *gcwq = get_work_gcwq(work); 625 unsigned long pool_id = get_work_pool_id(work);
616 unsigned long cpu = gcwq ? gcwq->cpu : WORK_CPU_NONE;
617 626
618 set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING, 627 pool_id <<= WORK_OFFQ_POOL_SHIFT;
619 WORK_STRUCT_PENDING); 628 set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
620} 629}
621 630
622static bool work_is_canceling(struct work_struct *work) 631static bool work_is_canceling(struct work_struct *work)
623{ 632{
624 unsigned long data = atomic_long_read(&work->data); 633 unsigned long data = atomic_long_read(&work->data);
625 634
626 return !(data & WORK_STRUCT_CWQ) && (data & WORK_OFFQ_CANCELING); 635 return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
627} 636}
628 637
629/* 638/*
630 * Policy functions. These define the policies on how the global worker 639 * Policy functions. These define the policies on how the global worker
631 * pools are managed. Unless noted otherwise, these functions assume that 640 * pools are managed. Unless noted otherwise, these functions assume that
632 * they're being called with gcwq->lock held. 641 * they're being called with pool->lock held.
633 */ 642 */
634 643
635static bool __need_more_worker(struct worker_pool *pool) 644static bool __need_more_worker(struct worker_pool *pool)
636{ 645{
637 return !atomic_read(get_pool_nr_running(pool)); 646 return !atomic_read(&pool->nr_running);
638} 647}
639 648
640/* 649/*
@@ -642,7 +651,7 @@ static bool __need_more_worker(struct worker_pool *pool)
642 * running workers. 651 * running workers.
643 * 652 *
644 * Note that, because unbound workers never contribute to nr_running, this 653 * Note that, because unbound workers never contribute to nr_running, this
645 * function will always return %true for unbound gcwq as long as the 654 * function will always return %true for unbound pools as long as the
646 * worklist isn't empty. 655 * worklist isn't empty.
647 */ 656 */
648static bool need_more_worker(struct worker_pool *pool) 657static bool need_more_worker(struct worker_pool *pool)
@@ -659,9 +668,8 @@ static bool may_start_working(struct worker_pool *pool)
659/* Do I need to keep working? Called from currently running workers. */ 668/* Do I need to keep working? Called from currently running workers. */
660static bool keep_working(struct worker_pool *pool) 669static bool keep_working(struct worker_pool *pool)
661{ 670{
662 atomic_t *nr_running = get_pool_nr_running(pool); 671 return !list_empty(&pool->worklist) &&
663 672 atomic_read(&pool->nr_running) <= 1;
664 return !list_empty(&pool->worklist) && atomic_read(nr_running) <= 1;
665} 673}
666 674
667/* Do we need a new worker? Called from manager. */ 675/* Do we need a new worker? Called from manager. */
@@ -714,7 +722,7 @@ static struct worker *first_worker(struct worker_pool *pool)
714 * Wake up the first idle worker of @pool. 722 * Wake up the first idle worker of @pool.
715 * 723 *
716 * CONTEXT: 724 * CONTEXT:
717 * spin_lock_irq(gcwq->lock). 725 * spin_lock_irq(pool->lock).
718 */ 726 */
719static void wake_up_worker(struct worker_pool *pool) 727static void wake_up_worker(struct worker_pool *pool)
720{ 728{
@@ -740,8 +748,8 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
740 struct worker *worker = kthread_data(task); 748 struct worker *worker = kthread_data(task);
741 749
742 if (!(worker->flags & WORKER_NOT_RUNNING)) { 750 if (!(worker->flags & WORKER_NOT_RUNNING)) {
743 WARN_ON_ONCE(worker->pool->gcwq->cpu != cpu); 751 WARN_ON_ONCE(worker->pool->cpu != cpu);
744 atomic_inc(get_pool_nr_running(worker->pool)); 752 atomic_inc(&worker->pool->nr_running);
745 } 753 }
746} 754}
747 755
@@ -764,12 +772,18 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
764 unsigned int cpu) 772 unsigned int cpu)
765{ 773{
766 struct worker *worker = kthread_data(task), *to_wakeup = NULL; 774 struct worker *worker = kthread_data(task), *to_wakeup = NULL;
767 struct worker_pool *pool = worker->pool; 775 struct worker_pool *pool;
768 atomic_t *nr_running = get_pool_nr_running(pool);
769 776
777 /*
778 * Rescuers, which may not have all the fields set up like normal
779 * workers, also reach here, let's not access anything before
780 * checking NOT_RUNNING.
781 */
770 if (worker->flags & WORKER_NOT_RUNNING) 782 if (worker->flags & WORKER_NOT_RUNNING)
771 return NULL; 783 return NULL;
772 784
785 pool = worker->pool;
786
773 /* this can only happen on the local cpu */ 787 /* this can only happen on the local cpu */
774 BUG_ON(cpu != raw_smp_processor_id()); 788 BUG_ON(cpu != raw_smp_processor_id());
775 789
@@ -781,10 +795,11 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
781 * NOT_RUNNING is clear. This means that we're bound to and 795 * NOT_RUNNING is clear. This means that we're bound to and
782 * running on the local cpu w/ rq lock held and preemption 796 * running on the local cpu w/ rq lock held and preemption
783 * disabled, which in turn means that none else could be 797 * disabled, which in turn means that none else could be
784 * manipulating idle_list, so dereferencing idle_list without gcwq 798 * manipulating idle_list, so dereferencing idle_list without pool
785 * lock is safe. 799 * lock is safe.
786 */ 800 */
787 if (atomic_dec_and_test(nr_running) && !list_empty(&pool->worklist)) 801 if (atomic_dec_and_test(&pool->nr_running) &&
802 !list_empty(&pool->worklist))
788 to_wakeup = first_worker(pool); 803 to_wakeup = first_worker(pool);
789 return to_wakeup ? to_wakeup->task : NULL; 804 return to_wakeup ? to_wakeup->task : NULL;
790} 805}
@@ -800,7 +815,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
800 * woken up. 815 * woken up.
801 * 816 *
802 * CONTEXT: 817 * CONTEXT:
803 * spin_lock_irq(gcwq->lock) 818 * spin_lock_irq(pool->lock)
804 */ 819 */
805static inline void worker_set_flags(struct worker *worker, unsigned int flags, 820static inline void worker_set_flags(struct worker *worker, unsigned int flags,
806 bool wakeup) 821 bool wakeup)
@@ -816,14 +831,12 @@ static inline void worker_set_flags(struct worker *worker, unsigned int flags,
816 */ 831 */
817 if ((flags & WORKER_NOT_RUNNING) && 832 if ((flags & WORKER_NOT_RUNNING) &&
818 !(worker->flags & WORKER_NOT_RUNNING)) { 833 !(worker->flags & WORKER_NOT_RUNNING)) {
819 atomic_t *nr_running = get_pool_nr_running(pool);
820
821 if (wakeup) { 834 if (wakeup) {
822 if (atomic_dec_and_test(nr_running) && 835 if (atomic_dec_and_test(&pool->nr_running) &&
823 !list_empty(&pool->worklist)) 836 !list_empty(&pool->worklist))
824 wake_up_worker(pool); 837 wake_up_worker(pool);
825 } else 838 } else
826 atomic_dec(nr_running); 839 atomic_dec(&pool->nr_running);
827 } 840 }
828 841
829 worker->flags |= flags; 842 worker->flags |= flags;
@@ -837,7 +850,7 @@ static inline void worker_set_flags(struct worker *worker, unsigned int flags,
837 * Clear @flags in @worker->flags and adjust nr_running accordingly. 850 * Clear @flags in @worker->flags and adjust nr_running accordingly.
838 * 851 *
839 * CONTEXT: 852 * CONTEXT:
840 * spin_lock_irq(gcwq->lock) 853 * spin_lock_irq(pool->lock)
841 */ 854 */
842static inline void worker_clr_flags(struct worker *worker, unsigned int flags) 855static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
843{ 856{
@@ -855,87 +868,56 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
855 */ 868 */
856 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING)) 869 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
857 if (!(worker->flags & WORKER_NOT_RUNNING)) 870 if (!(worker->flags & WORKER_NOT_RUNNING))
858 atomic_inc(get_pool_nr_running(pool)); 871 atomic_inc(&pool->nr_running);
859} 872}
860 873
861/** 874/**
862 * busy_worker_head - return the busy hash head for a work 875 * find_worker_executing_work - find worker which is executing a work
863 * @gcwq: gcwq of interest 876 * @pool: pool of interest
864 * @work: work to be hashed
865 *
866 * Return hash head of @gcwq for @work.
867 *
868 * CONTEXT:
869 * spin_lock_irq(gcwq->lock).
870 *
871 * RETURNS:
872 * Pointer to the hash head.
873 */
874static struct hlist_head *busy_worker_head(struct global_cwq *gcwq,
875 struct work_struct *work)
876{
877 const int base_shift = ilog2(sizeof(struct work_struct));
878 unsigned long v = (unsigned long)work;
879
880 /* simple shift and fold hash, do we need something better? */
881 v >>= base_shift;
882 v += v >> BUSY_WORKER_HASH_ORDER;
883 v &= BUSY_WORKER_HASH_MASK;
884
885 return &gcwq->busy_hash[v];
886}
887
888/**
889 * __find_worker_executing_work - find worker which is executing a work
890 * @gcwq: gcwq of interest
891 * @bwh: hash head as returned by busy_worker_head()
892 * @work: work to find worker for 877 * @work: work to find worker for
893 * 878 *
894 * Find a worker which is executing @work on @gcwq. @bwh should be 879 * Find a worker which is executing @work on @pool by searching
895 * the hash head obtained by calling busy_worker_head() with the same 880 * @pool->busy_hash which is keyed by the address of @work. For a worker
896 * work. 881 * to match, its current execution should match the address of @work and
882 * its work function. This is to avoid unwanted dependency between
883 * unrelated work executions through a work item being recycled while still
884 * being executed.
885 *
886 * This is a bit tricky. A work item may be freed once its execution
887 * starts and nothing prevents the freed area from being recycled for
888 * another work item. If the same work item address ends up being reused
889 * before the original execution finishes, workqueue will identify the
890 * recycled work item as currently executing and make it wait until the
891 * current execution finishes, introducing an unwanted dependency.
892 *
893 * This function checks the work item address, work function and workqueue
894 * to avoid false positives. Note that this isn't complete as one may
895 * construct a work function which can introduce dependency onto itself
896 * through a recycled work item. Well, if somebody wants to shoot oneself
897 * in the foot that badly, there's only so much we can do, and if such
898 * deadlock actually occurs, it should be easy to locate the culprit work
899 * function.
897 * 900 *
898 * CONTEXT: 901 * CONTEXT:
899 * spin_lock_irq(gcwq->lock). 902 * spin_lock_irq(pool->lock).
900 * 903 *
901 * RETURNS: 904 * RETURNS:
902 * Pointer to worker which is executing @work if found, NULL 905 * Pointer to worker which is executing @work if found, NULL
903 * otherwise. 906 * otherwise.
904 */ 907 */
905static struct worker *__find_worker_executing_work(struct global_cwq *gcwq, 908static struct worker *find_worker_executing_work(struct worker_pool *pool,
906 struct hlist_head *bwh, 909 struct work_struct *work)
907 struct work_struct *work)
908{ 910{
909 struct worker *worker; 911 struct worker *worker;
910 struct hlist_node *tmp; 912 struct hlist_node *tmp;
911 913
912 hlist_for_each_entry(worker, tmp, bwh, hentry) 914 hash_for_each_possible(pool->busy_hash, worker, tmp, hentry,
913 if (worker->current_work == work) 915 (unsigned long)work)
916 if (worker->current_work == work &&
917 worker->current_func == work->func)
914 return worker; 918 return worker;
915 return NULL;
916}
917 919
918/** 920 return NULL;
919 * find_worker_executing_work - find worker which is executing a work
920 * @gcwq: gcwq of interest
921 * @work: work to find worker for
922 *
923 * Find a worker which is executing @work on @gcwq. This function is
924 * identical to __find_worker_executing_work() except that this
925 * function calculates @bwh itself.
926 *
927 * CONTEXT:
928 * spin_lock_irq(gcwq->lock).
929 *
930 * RETURNS:
931 * Pointer to worker which is executing @work if found, NULL
932 * otherwise.
933 */
934static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
935 struct work_struct *work)
936{
937 return __find_worker_executing_work(gcwq, busy_worker_head(gcwq, work),
938 work);
939} 921}
940 922
941/** 923/**
@@ -953,7 +935,7 @@ static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
953 * nested inside outer list_for_each_entry_safe(). 935 * nested inside outer list_for_each_entry_safe().
954 * 936 *
955 * CONTEXT: 937 * CONTEXT:
956 * spin_lock_irq(gcwq->lock). 938 * spin_lock_irq(pool->lock).
957 */ 939 */
958static void move_linked_works(struct work_struct *work, struct list_head *head, 940static void move_linked_works(struct work_struct *work, struct list_head *head,
959 struct work_struct **nextp) 941 struct work_struct **nextp)
@@ -979,67 +961,67 @@ static void move_linked_works(struct work_struct *work, struct list_head *head,
979 *nextp = n; 961 *nextp = n;
980} 962}
981 963
982static void cwq_activate_delayed_work(struct work_struct *work) 964static void pwq_activate_delayed_work(struct work_struct *work)
983{ 965{
984 struct cpu_workqueue_struct *cwq = get_work_cwq(work); 966 struct pool_workqueue *pwq = get_work_pwq(work);
985 967
986 trace_workqueue_activate_work(work); 968 trace_workqueue_activate_work(work);
987 move_linked_works(work, &cwq->pool->worklist, NULL); 969 move_linked_works(work, &pwq->pool->worklist, NULL);
988 __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); 970 __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
989 cwq->nr_active++; 971 pwq->nr_active++;
990} 972}
991 973
992static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) 974static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
993{ 975{
994 struct work_struct *work = list_first_entry(&cwq->delayed_works, 976 struct work_struct *work = list_first_entry(&pwq->delayed_works,
995 struct work_struct, entry); 977 struct work_struct, entry);
996 978
997 cwq_activate_delayed_work(work); 979 pwq_activate_delayed_work(work);
998} 980}
999 981
1000/** 982/**
1001 * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight 983 * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
1002 * @cwq: cwq of interest 984 * @pwq: pwq of interest
1003 * @color: color of work which left the queue 985 * @color: color of work which left the queue
1004 * 986 *
1005 * A work either has completed or is removed from pending queue, 987 * A work either has completed or is removed from pending queue,
1006 * decrement nr_in_flight of its cwq and handle workqueue flushing. 988 * decrement nr_in_flight of its pwq and handle workqueue flushing.
1007 * 989 *
1008 * CONTEXT: 990 * CONTEXT:
1009 * spin_lock_irq(gcwq->lock). 991 * spin_lock_irq(pool->lock).
1010 */ 992 */
1011static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color) 993static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
1012{ 994{
1013 /* ignore uncolored works */ 995 /* ignore uncolored works */
1014 if (color == WORK_NO_COLOR) 996 if (color == WORK_NO_COLOR)
1015 return; 997 return;
1016 998
1017 cwq->nr_in_flight[color]--; 999 pwq->nr_in_flight[color]--;
1018 1000
1019 cwq->nr_active--; 1001 pwq->nr_active--;
1020 if (!list_empty(&cwq->delayed_works)) { 1002 if (!list_empty(&pwq->delayed_works)) {
1021 /* one down, submit a delayed one */ 1003 /* one down, submit a delayed one */
1022 if (cwq->nr_active < cwq->max_active) 1004 if (pwq->nr_active < pwq->max_active)
1023 cwq_activate_first_delayed(cwq); 1005 pwq_activate_first_delayed(pwq);
1024 } 1006 }
1025 1007
1026 /* is flush in progress and are we at the flushing tip? */ 1008 /* is flush in progress and are we at the flushing tip? */
1027 if (likely(cwq->flush_color != color)) 1009 if (likely(pwq->flush_color != color))
1028 return; 1010 return;
1029 1011
1030 /* are there still in-flight works? */ 1012 /* are there still in-flight works? */
1031 if (cwq->nr_in_flight[color]) 1013 if (pwq->nr_in_flight[color])
1032 return; 1014 return;
1033 1015
1034 /* this cwq is done, clear flush_color */ 1016 /* this pwq is done, clear flush_color */
1035 cwq->flush_color = -1; 1017 pwq->flush_color = -1;
1036 1018
1037 /* 1019 /*
1038 * If this was the last cwq, wake up the first flusher. It 1020 * If this was the last pwq, wake up the first flusher. It
1039 * will handle the rest. 1021 * will handle the rest.
1040 */ 1022 */
1041 if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush)) 1023 if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
1042 complete(&cwq->wq->first_flusher->done); 1024 complete(&pwq->wq->first_flusher->done);
1043} 1025}
1044 1026
1045/** 1027/**
@@ -1070,7 +1052,8 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
1070static int try_to_grab_pending(struct work_struct *work, bool is_dwork, 1052static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
1071 unsigned long *flags) 1053 unsigned long *flags)
1072{ 1054{
1073 struct global_cwq *gcwq; 1055 struct worker_pool *pool;
1056 struct pool_workqueue *pwq;
1074 1057
1075 local_irq_save(*flags); 1058 local_irq_save(*flags);
1076 1059
@@ -1095,41 +1078,43 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
1095 * The queueing is in progress, or it is already queued. Try to 1078 * The queueing is in progress, or it is already queued. Try to
1096 * steal it from ->worklist without clearing WORK_STRUCT_PENDING. 1079 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
1097 */ 1080 */
1098 gcwq = get_work_gcwq(work); 1081 pool = get_work_pool(work);
1099 if (!gcwq) 1082 if (!pool)
1100 goto fail; 1083 goto fail;
1101 1084
1102 spin_lock(&gcwq->lock); 1085 spin_lock(&pool->lock);
1103 if (!list_empty(&work->entry)) { 1086 /*
1087 * work->data is guaranteed to point to pwq only while the work
1088 * item is queued on pwq->wq, and both updating work->data to point
1089 * to pwq on queueing and to pool on dequeueing are done under
1090 * pwq->pool->lock. This in turn guarantees that, if work->data
1091 * points to pwq which is associated with a locked pool, the work
1092 * item is currently queued on that pool.
1093 */
1094 pwq = get_work_pwq(work);
1095 if (pwq && pwq->pool == pool) {
1096 debug_work_deactivate(work);
1097
1104 /* 1098 /*
1105 * This work is queued, but perhaps we locked the wrong gcwq. 1099 * A delayed work item cannot be grabbed directly because
1106 * In that case we must see the new value after rmb(), see 1100 * it might have linked NO_COLOR work items which, if left
1107 * insert_work()->wmb(). 1101 * on the delayed_list, will confuse pwq->nr_active
1102 * management later on and cause stall. Make sure the work
1103 * item is activated before grabbing.
1108 */ 1104 */
1109 smp_rmb(); 1105 if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
1110 if (gcwq == get_work_gcwq(work)) { 1106 pwq_activate_delayed_work(work);
1111 debug_work_deactivate(work);
1112 1107
1113 /* 1108 list_del_init(&work->entry);
1114 * A delayed work item cannot be grabbed directly 1109 pwq_dec_nr_in_flight(get_work_pwq(work), get_work_color(work));
1115 * because it might have linked NO_COLOR work items
1116 * which, if left on the delayed_list, will confuse
1117 * cwq->nr_active management later on and cause
1118 * stall. Make sure the work item is activated
1119 * before grabbing.
1120 */
1121 if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
1122 cwq_activate_delayed_work(work);
1123 1110
1124 list_del_init(&work->entry); 1111 /* work->data points to pwq iff queued, point to pool */
1125 cwq_dec_nr_in_flight(get_work_cwq(work), 1112 set_work_pool_and_keep_pending(work, pool->id);
1126 get_work_color(work));
1127 1113
1128 spin_unlock(&gcwq->lock); 1114 spin_unlock(&pool->lock);
1129 return 1; 1115 return 1;
1130 }
1131 } 1116 }
1132 spin_unlock(&gcwq->lock); 1117 spin_unlock(&pool->lock);
1133fail: 1118fail:
1134 local_irq_restore(*flags); 1119 local_irq_restore(*flags);
1135 if (work_is_canceling(work)) 1120 if (work_is_canceling(work))
@@ -1139,33 +1124,25 @@ fail:
1139} 1124}
1140 1125
1141/** 1126/**
1142 * insert_work - insert a work into gcwq 1127 * insert_work - insert a work into a pool
1143 * @cwq: cwq @work belongs to 1128 * @pwq: pwq @work belongs to
1144 * @work: work to insert 1129 * @work: work to insert
1145 * @head: insertion point 1130 * @head: insertion point
1146 * @extra_flags: extra WORK_STRUCT_* flags to set 1131 * @extra_flags: extra WORK_STRUCT_* flags to set
1147 * 1132 *
1148 * Insert @work which belongs to @cwq into @gcwq after @head. 1133 * Insert @work which belongs to @pwq after @head. @extra_flags is or'd to
1149 * @extra_flags is or'd to work_struct flags. 1134 * work_struct flags.
1150 * 1135 *
1151 * CONTEXT: 1136 * CONTEXT:
1152 * spin_lock_irq(gcwq->lock). 1137 * spin_lock_irq(pool->lock).
1153 */ 1138 */
1154static void insert_work(struct cpu_workqueue_struct *cwq, 1139static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
1155 struct work_struct *work, struct list_head *head, 1140 struct list_head *head, unsigned int extra_flags)
1156 unsigned int extra_flags)
1157{ 1141{
1158 struct worker_pool *pool = cwq->pool; 1142 struct worker_pool *pool = pwq->pool;
1159 1143
1160 /* we own @work, set data and link */ 1144 /* we own @work, set data and link */
1161 set_work_cwq(work, cwq, extra_flags); 1145 set_work_pwq(work, pwq, extra_flags);
1162
1163 /*
1164 * Ensure that we get the right work->data if we see the
1165 * result of list_add() below, see try_to_grab_pending().
1166 */
1167 smp_wmb();
1168
1169 list_add_tail(&work->entry, head); 1146 list_add_tail(&work->entry, head);
1170 1147
1171 /* 1148 /*
@@ -1181,41 +1158,24 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
1181 1158
1182/* 1159/*
1183 * Test whether @work is being queued from another work executing on the 1160 * Test whether @work is being queued from another work executing on the
1184 * same workqueue. This is rather expensive and should only be used from 1161 * same workqueue.
1185 * cold paths.
1186 */ 1162 */
1187static bool is_chained_work(struct workqueue_struct *wq) 1163static bool is_chained_work(struct workqueue_struct *wq)
1188{ 1164{
1189 unsigned long flags; 1165 struct worker *worker;
1190 unsigned int cpu;
1191
1192 for_each_gcwq_cpu(cpu) {
1193 struct global_cwq *gcwq = get_gcwq(cpu);
1194 struct worker *worker;
1195 struct hlist_node *pos;
1196 int i;
1197 1166
1198 spin_lock_irqsave(&gcwq->lock, flags); 1167 worker = current_wq_worker();
1199 for_each_busy_worker(worker, i, pos, gcwq) { 1168 /*
1200 if (worker->task != current) 1169 * Return %true iff I'm a worker execuing a work item on @wq. If
1201 continue; 1170 * I'm @worker, it's safe to dereference it without locking.
1202 spin_unlock_irqrestore(&gcwq->lock, flags); 1171 */
1203 /* 1172 return worker && worker->current_pwq->wq == wq;
1204 * I'm @worker, no locking necessary. See if @work
1205 * is headed to the same workqueue.
1206 */
1207 return worker->current_cwq->wq == wq;
1208 }
1209 spin_unlock_irqrestore(&gcwq->lock, flags);
1210 }
1211 return false;
1212} 1173}
1213 1174
1214static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, 1175static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
1215 struct work_struct *work) 1176 struct work_struct *work)
1216{ 1177{
1217 struct global_cwq *gcwq; 1178 struct pool_workqueue *pwq;
1218 struct cpu_workqueue_struct *cwq;
1219 struct list_head *worklist; 1179 struct list_head *worklist;
1220 unsigned int work_flags; 1180 unsigned int work_flags;
1221 unsigned int req_cpu = cpu; 1181 unsigned int req_cpu = cpu;
@@ -1235,9 +1195,9 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
1235 WARN_ON_ONCE(!is_chained_work(wq))) 1195 WARN_ON_ONCE(!is_chained_work(wq)))
1236 return; 1196 return;
1237 1197
1238 /* determine gcwq to use */ 1198 /* determine the pwq to use */
1239 if (!(wq->flags & WQ_UNBOUND)) { 1199 if (!(wq->flags & WQ_UNBOUND)) {
1240 struct global_cwq *last_gcwq; 1200 struct worker_pool *last_pool;
1241 1201
1242 if (cpu == WORK_CPU_UNBOUND) 1202 if (cpu == WORK_CPU_UNBOUND)
1243 cpu = raw_smp_processor_id(); 1203 cpu = raw_smp_processor_id();
@@ -1248,55 +1208,54 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
1248 * work needs to be queued on that cpu to guarantee 1208 * work needs to be queued on that cpu to guarantee
1249 * non-reentrancy. 1209 * non-reentrancy.
1250 */ 1210 */
1251 gcwq = get_gcwq(cpu); 1211 pwq = get_pwq(cpu, wq);
1252 last_gcwq = get_work_gcwq(work); 1212 last_pool = get_work_pool(work);
1253 1213
1254 if (last_gcwq && last_gcwq != gcwq) { 1214 if (last_pool && last_pool != pwq->pool) {
1255 struct worker *worker; 1215 struct worker *worker;
1256 1216
1257 spin_lock(&last_gcwq->lock); 1217 spin_lock(&last_pool->lock);
1258 1218
1259 worker = find_worker_executing_work(last_gcwq, work); 1219 worker = find_worker_executing_work(last_pool, work);
1260 1220
1261 if (worker && worker->current_cwq->wq == wq) 1221 if (worker && worker->current_pwq->wq == wq) {
1262 gcwq = last_gcwq; 1222 pwq = get_pwq(last_pool->cpu, wq);
1263 else { 1223 } else {
1264 /* meh... not running there, queue here */ 1224 /* meh... not running there, queue here */
1265 spin_unlock(&last_gcwq->lock); 1225 spin_unlock(&last_pool->lock);
1266 spin_lock(&gcwq->lock); 1226 spin_lock(&pwq->pool->lock);
1267 } 1227 }
1268 } else { 1228 } else {
1269 spin_lock(&gcwq->lock); 1229 spin_lock(&pwq->pool->lock);
1270 } 1230 }
1271 } else { 1231 } else {
1272 gcwq = get_gcwq(WORK_CPU_UNBOUND); 1232 pwq = get_pwq(WORK_CPU_UNBOUND, wq);
1273 spin_lock(&gcwq->lock); 1233 spin_lock(&pwq->pool->lock);
1274 } 1234 }
1275 1235
1276 /* gcwq determined, get cwq and queue */ 1236 /* pwq determined, queue */
1277 cwq = get_cwq(gcwq->cpu, wq); 1237 trace_workqueue_queue_work(req_cpu, pwq, work);
1278 trace_workqueue_queue_work(req_cpu, cwq, work);
1279 1238
1280 if (WARN_ON(!list_empty(&work->entry))) { 1239 if (WARN_ON(!list_empty(&work->entry))) {
1281 spin_unlock(&gcwq->lock); 1240 spin_unlock(&pwq->pool->lock);
1282 return; 1241 return;
1283 } 1242 }
1284 1243
1285 cwq->nr_in_flight[cwq->work_color]++; 1244 pwq->nr_in_flight[pwq->work_color]++;
1286 work_flags = work_color_to_flags(cwq->work_color); 1245 work_flags = work_color_to_flags(pwq->work_color);
1287 1246
1288 if (likely(cwq->nr_active < cwq->max_active)) { 1247 if (likely(pwq->nr_active < pwq->max_active)) {
1289 trace_workqueue_activate_work(work); 1248 trace_workqueue_activate_work(work);
1290 cwq->nr_active++; 1249 pwq->nr_active++;
1291 worklist = &cwq->pool->worklist; 1250 worklist = &pwq->pool->worklist;
1292 } else { 1251 } else {
1293 work_flags |= WORK_STRUCT_DELAYED; 1252 work_flags |= WORK_STRUCT_DELAYED;
1294 worklist = &cwq->delayed_works; 1253 worklist = &pwq->delayed_works;
1295 } 1254 }
1296 1255
1297 insert_work(cwq, work, worklist, work_flags); 1256 insert_work(pwq, work, worklist, work_flags);
1298 1257
1299 spin_unlock(&gcwq->lock); 1258 spin_unlock(&pwq->pool->lock);
1300} 1259}
1301 1260
1302/** 1261/**
@@ -1347,19 +1306,17 @@ EXPORT_SYMBOL_GPL(queue_work);
1347void delayed_work_timer_fn(unsigned long __data) 1306void delayed_work_timer_fn(unsigned long __data)
1348{ 1307{
1349 struct delayed_work *dwork = (struct delayed_work *)__data; 1308 struct delayed_work *dwork = (struct delayed_work *)__data;
1350 struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
1351 1309
1352 /* should have been called from irqsafe timer with irq already off */ 1310 /* should have been called from irqsafe timer with irq already off */
1353 __queue_work(dwork->cpu, cwq->wq, &dwork->work); 1311 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
1354} 1312}
1355EXPORT_SYMBOL_GPL(delayed_work_timer_fn); 1313EXPORT_SYMBOL(delayed_work_timer_fn);
1356 1314
1357static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, 1315static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1358 struct delayed_work *dwork, unsigned long delay) 1316 struct delayed_work *dwork, unsigned long delay)
1359{ 1317{
1360 struct timer_list *timer = &dwork->timer; 1318 struct timer_list *timer = &dwork->timer;
1361 struct work_struct *work = &dwork->work; 1319 struct work_struct *work = &dwork->work;
1362 unsigned int lcpu;
1363 1320
1364 WARN_ON_ONCE(timer->function != delayed_work_timer_fn || 1321 WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
1365 timer->data != (unsigned long)dwork); 1322 timer->data != (unsigned long)dwork);
@@ -1379,30 +1336,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1379 1336
1380 timer_stats_timer_set_start_info(&dwork->timer); 1337 timer_stats_timer_set_start_info(&dwork->timer);
1381 1338
1382 /* 1339 dwork->wq = wq;
1383 * This stores cwq for the moment, for the timer_fn. Note that the
1384 * work's gcwq is preserved to allow reentrance detection for
1385 * delayed works.
1386 */
1387 if (!(wq->flags & WQ_UNBOUND)) {
1388 struct global_cwq *gcwq = get_work_gcwq(work);
1389
1390 /*
1391 * If we cannot get the last gcwq from @work directly,
1392 * select the last CPU such that it avoids unnecessarily
1393 * triggering non-reentrancy check in __queue_work().
1394 */
1395 lcpu = cpu;
1396 if (gcwq)
1397 lcpu = gcwq->cpu;
1398 if (lcpu == WORK_CPU_UNBOUND)
1399 lcpu = raw_smp_processor_id();
1400 } else {
1401 lcpu = WORK_CPU_UNBOUND;
1402 }
1403
1404 set_work_cwq(work, get_cwq(lcpu, wq), 0);
1405
1406 dwork->cpu = cpu; 1340 dwork->cpu = cpu;
1407 timer->expires = jiffies + delay; 1341 timer->expires = jiffies + delay;
1408 1342
@@ -1519,12 +1453,11 @@ EXPORT_SYMBOL_GPL(mod_delayed_work);
1519 * necessary. 1453 * necessary.
1520 * 1454 *
1521 * LOCKING: 1455 * LOCKING:
1522 * spin_lock_irq(gcwq->lock). 1456 * spin_lock_irq(pool->lock).
1523 */ 1457 */
1524static void worker_enter_idle(struct worker *worker) 1458static void worker_enter_idle(struct worker *worker)
1525{ 1459{
1526 struct worker_pool *pool = worker->pool; 1460 struct worker_pool *pool = worker->pool;
1527 struct global_cwq *gcwq = pool->gcwq;
1528 1461
1529 BUG_ON(worker->flags & WORKER_IDLE); 1462 BUG_ON(worker->flags & WORKER_IDLE);
1530 BUG_ON(!list_empty(&worker->entry) && 1463 BUG_ON(!list_empty(&worker->entry) &&
@@ -1542,14 +1475,14 @@ static void worker_enter_idle(struct worker *worker)
1542 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); 1475 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
1543 1476
1544 /* 1477 /*
1545 * Sanity check nr_running. Because gcwq_unbind_fn() releases 1478 * Sanity check nr_running. Because wq_unbind_fn() releases
1546 * gcwq->lock between setting %WORKER_UNBOUND and zapping 1479 * pool->lock between setting %WORKER_UNBOUND and zapping
1547 * nr_running, the warning may trigger spuriously. Check iff 1480 * nr_running, the warning may trigger spuriously. Check iff
1548 * unbind is not in progress. 1481 * unbind is not in progress.
1549 */ 1482 */
1550 WARN_ON_ONCE(!(gcwq->flags & GCWQ_DISASSOCIATED) && 1483 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
1551 pool->nr_workers == pool->nr_idle && 1484 pool->nr_workers == pool->nr_idle &&
1552 atomic_read(get_pool_nr_running(pool))); 1485 atomic_read(&pool->nr_running));
1553} 1486}
1554 1487
1555/** 1488/**
@@ -1559,7 +1492,7 @@ static void worker_enter_idle(struct worker *worker)
1559 * @worker is leaving idle state. Update stats. 1492 * @worker is leaving idle state. Update stats.
1560 * 1493 *
1561 * LOCKING: 1494 * LOCKING:
1562 * spin_lock_irq(gcwq->lock). 1495 * spin_lock_irq(pool->lock).
1563 */ 1496 */
1564static void worker_leave_idle(struct worker *worker) 1497static void worker_leave_idle(struct worker *worker)
1565{ 1498{
@@ -1572,7 +1505,7 @@ static void worker_leave_idle(struct worker *worker)
1572} 1505}
1573 1506
1574/** 1507/**
1575 * worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock gcwq 1508 * worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock pool
1576 * @worker: self 1509 * @worker: self
1577 * 1510 *
1578 * Works which are scheduled while the cpu is online must at least be 1511 * Works which are scheduled while the cpu is online must at least be
@@ -1584,27 +1517,27 @@ static void worker_leave_idle(struct worker *worker)
1584 * themselves to the target cpu and may race with cpu going down or 1517 * themselves to the target cpu and may race with cpu going down or
1585 * coming online. kthread_bind() can't be used because it may put the 1518 * coming online. kthread_bind() can't be used because it may put the
1586 * worker to already dead cpu and set_cpus_allowed_ptr() can't be used 1519 * worker to already dead cpu and set_cpus_allowed_ptr() can't be used
1587 * verbatim as it's best effort and blocking and gcwq may be 1520 * verbatim as it's best effort and blocking and pool may be
1588 * [dis]associated in the meantime. 1521 * [dis]associated in the meantime.
1589 * 1522 *
1590 * This function tries set_cpus_allowed() and locks gcwq and verifies the 1523 * This function tries set_cpus_allowed() and locks pool and verifies the
1591 * binding against %GCWQ_DISASSOCIATED which is set during 1524 * binding against %POOL_DISASSOCIATED which is set during
1592 * %CPU_DOWN_PREPARE and cleared during %CPU_ONLINE, so if the worker 1525 * %CPU_DOWN_PREPARE and cleared during %CPU_ONLINE, so if the worker
1593 * enters idle state or fetches works without dropping lock, it can 1526 * enters idle state or fetches works without dropping lock, it can
1594 * guarantee the scheduling requirement described in the first paragraph. 1527 * guarantee the scheduling requirement described in the first paragraph.
1595 * 1528 *
1596 * CONTEXT: 1529 * CONTEXT:
1597 * Might sleep. Called without any lock but returns with gcwq->lock 1530 * Might sleep. Called without any lock but returns with pool->lock
1598 * held. 1531 * held.
1599 * 1532 *
1600 * RETURNS: 1533 * RETURNS:
1601 * %true if the associated gcwq is online (@worker is successfully 1534 * %true if the associated pool is online (@worker is successfully
1602 * bound), %false if offline. 1535 * bound), %false if offline.
1603 */ 1536 */
1604static bool worker_maybe_bind_and_lock(struct worker *worker) 1537static bool worker_maybe_bind_and_lock(struct worker *worker)
1605__acquires(&gcwq->lock) 1538__acquires(&pool->lock)
1606{ 1539{
1607 struct global_cwq *gcwq = worker->pool->gcwq; 1540 struct worker_pool *pool = worker->pool;
1608 struct task_struct *task = worker->task; 1541 struct task_struct *task = worker->task;
1609 1542
1610 while (true) { 1543 while (true) {
@@ -1612,19 +1545,19 @@ __acquires(&gcwq->lock)
1612 * The following call may fail, succeed or succeed 1545 * The following call may fail, succeed or succeed
1613 * without actually migrating the task to the cpu if 1546 * without actually migrating the task to the cpu if
1614 * it races with cpu hotunplug operation. Verify 1547 * it races with cpu hotunplug operation. Verify
1615 * against GCWQ_DISASSOCIATED. 1548 * against POOL_DISASSOCIATED.
1616 */ 1549 */
1617 if (!(gcwq->flags & GCWQ_DISASSOCIATED)) 1550 if (!(pool->flags & POOL_DISASSOCIATED))
1618 set_cpus_allowed_ptr(task, get_cpu_mask(gcwq->cpu)); 1551 set_cpus_allowed_ptr(task, get_cpu_mask(pool->cpu));
1619 1552
1620 spin_lock_irq(&gcwq->lock); 1553 spin_lock_irq(&pool->lock);
1621 if (gcwq->flags & GCWQ_DISASSOCIATED) 1554 if (pool->flags & POOL_DISASSOCIATED)
1622 return false; 1555 return false;
1623 if (task_cpu(task) == gcwq->cpu && 1556 if (task_cpu(task) == pool->cpu &&
1624 cpumask_equal(&current->cpus_allowed, 1557 cpumask_equal(&current->cpus_allowed,
1625 get_cpu_mask(gcwq->cpu))) 1558 get_cpu_mask(pool->cpu)))
1626 return true; 1559 return true;
1627 spin_unlock_irq(&gcwq->lock); 1560 spin_unlock_irq(&pool->lock);
1628 1561
1629 /* 1562 /*
1630 * We've raced with CPU hot[un]plug. Give it a breather 1563 * We've raced with CPU hot[un]plug. Give it a breather
@@ -1643,15 +1576,13 @@ __acquires(&gcwq->lock)
1643 */ 1576 */
1644static void idle_worker_rebind(struct worker *worker) 1577static void idle_worker_rebind(struct worker *worker)
1645{ 1578{
1646 struct global_cwq *gcwq = worker->pool->gcwq;
1647
1648 /* CPU may go down again inbetween, clear UNBOUND only on success */ 1579 /* CPU may go down again inbetween, clear UNBOUND only on success */
1649 if (worker_maybe_bind_and_lock(worker)) 1580 if (worker_maybe_bind_and_lock(worker))
1650 worker_clr_flags(worker, WORKER_UNBOUND); 1581 worker_clr_flags(worker, WORKER_UNBOUND);
1651 1582
1652 /* rebind complete, become available again */ 1583 /* rebind complete, become available again */
1653 list_add(&worker->entry, &worker->pool->idle_list); 1584 list_add(&worker->entry, &worker->pool->idle_list);
1654 spin_unlock_irq(&gcwq->lock); 1585 spin_unlock_irq(&worker->pool->lock);
1655} 1586}
1656 1587
1657/* 1588/*
@@ -1663,19 +1594,18 @@ static void idle_worker_rebind(struct worker *worker)
1663static void busy_worker_rebind_fn(struct work_struct *work) 1594static void busy_worker_rebind_fn(struct work_struct *work)
1664{ 1595{
1665 struct worker *worker = container_of(work, struct worker, rebind_work); 1596 struct worker *worker = container_of(work, struct worker, rebind_work);
1666 struct global_cwq *gcwq = worker->pool->gcwq;
1667 1597
1668 if (worker_maybe_bind_and_lock(worker)) 1598 if (worker_maybe_bind_and_lock(worker))
1669 worker_clr_flags(worker, WORKER_UNBOUND); 1599 worker_clr_flags(worker, WORKER_UNBOUND);
1670 1600
1671 spin_unlock_irq(&gcwq->lock); 1601 spin_unlock_irq(&worker->pool->lock);
1672} 1602}
1673 1603
1674/** 1604/**
1675 * rebind_workers - rebind all workers of a gcwq to the associated CPU 1605 * rebind_workers - rebind all workers of a pool to the associated CPU
1676 * @gcwq: gcwq of interest 1606 * @pool: pool of interest
1677 * 1607 *
1678 * @gcwq->cpu is coming online. Rebind all workers to the CPU. Rebinding 1608 * @pool->cpu is coming online. Rebind all workers to the CPU. Rebinding
1679 * is different for idle and busy ones. 1609 * is different for idle and busy ones.
1680 * 1610 *
1681 * Idle ones will be removed from the idle_list and woken up. They will 1611 * Idle ones will be removed from the idle_list and woken up. They will
@@ -1693,38 +1623,32 @@ static void busy_worker_rebind_fn(struct work_struct *work)
1693 * including the manager will not appear on @idle_list until rebind is 1623 * including the manager will not appear on @idle_list until rebind is
1694 * complete, making local wake-ups safe. 1624 * complete, making local wake-ups safe.
1695 */ 1625 */
1696static void rebind_workers(struct global_cwq *gcwq) 1626static void rebind_workers(struct worker_pool *pool)
1697{ 1627{
1698 struct worker_pool *pool;
1699 struct worker *worker, *n; 1628 struct worker *worker, *n;
1700 struct hlist_node *pos; 1629 struct hlist_node *pos;
1701 int i; 1630 int i;
1702 1631
1703 lockdep_assert_held(&gcwq->lock); 1632 lockdep_assert_held(&pool->assoc_mutex);
1704 1633 lockdep_assert_held(&pool->lock);
1705 for_each_worker_pool(pool, gcwq)
1706 lockdep_assert_held(&pool->assoc_mutex);
1707 1634
1708 /* dequeue and kick idle ones */ 1635 /* dequeue and kick idle ones */
1709 for_each_worker_pool(pool, gcwq) { 1636 list_for_each_entry_safe(worker, n, &pool->idle_list, entry) {
1710 list_for_each_entry_safe(worker, n, &pool->idle_list, entry) { 1637 /*
1711 /* 1638 * idle workers should be off @pool->idle_list until rebind
1712 * idle workers should be off @pool->idle_list 1639 * is complete to avoid receiving premature local wake-ups.
1713 * until rebind is complete to avoid receiving 1640 */
1714 * premature local wake-ups. 1641 list_del_init(&worker->entry);
1715 */
1716 list_del_init(&worker->entry);
1717 1642
1718 /* 1643 /*
1719 * worker_thread() will see the above dequeuing 1644 * worker_thread() will see the above dequeuing and call
1720 * and call idle_worker_rebind(). 1645 * idle_worker_rebind().
1721 */ 1646 */
1722 wake_up_process(worker->task); 1647 wake_up_process(worker->task);
1723 }
1724 } 1648 }
1725 1649
1726 /* rebind busy workers */ 1650 /* rebind busy workers */
1727 for_each_busy_worker(worker, i, pos, gcwq) { 1651 for_each_busy_worker(worker, i, pos, pool) {
1728 struct work_struct *rebind_work = &worker->rebind_work; 1652 struct work_struct *rebind_work = &worker->rebind_work;
1729 struct workqueue_struct *wq; 1653 struct workqueue_struct *wq;
1730 1654
@@ -1736,16 +1660,16 @@ static void rebind_workers(struct global_cwq *gcwq)
1736 1660
1737 /* 1661 /*
1738 * wq doesn't really matter but let's keep @worker->pool 1662 * wq doesn't really matter but let's keep @worker->pool
1739 * and @cwq->pool consistent for sanity. 1663 * and @pwq->pool consistent for sanity.
1740 */ 1664 */
1741 if (worker_pool_pri(worker->pool)) 1665 if (std_worker_pool_pri(worker->pool))
1742 wq = system_highpri_wq; 1666 wq = system_highpri_wq;
1743 else 1667 else
1744 wq = system_wq; 1668 wq = system_wq;
1745 1669
1746 insert_work(get_cwq(gcwq->cpu, wq), rebind_work, 1670 insert_work(get_pwq(pool->cpu, wq), rebind_work,
1747 worker->scheduled.next, 1671 worker->scheduled.next,
1748 work_color_to_flags(WORK_NO_COLOR)); 1672 work_color_to_flags(WORK_NO_COLOR));
1749 } 1673 }
1750} 1674}
1751 1675
@@ -1780,19 +1704,18 @@ static struct worker *alloc_worker(void)
1780 */ 1704 */
1781static struct worker *create_worker(struct worker_pool *pool) 1705static struct worker *create_worker(struct worker_pool *pool)
1782{ 1706{
1783 struct global_cwq *gcwq = pool->gcwq; 1707 const char *pri = std_worker_pool_pri(pool) ? "H" : "";
1784 const char *pri = worker_pool_pri(pool) ? "H" : "";
1785 struct worker *worker = NULL; 1708 struct worker *worker = NULL;
1786 int id = -1; 1709 int id = -1;
1787 1710
1788 spin_lock_irq(&gcwq->lock); 1711 spin_lock_irq(&pool->lock);
1789 while (ida_get_new(&pool->worker_ida, &id)) { 1712 while (ida_get_new(&pool->worker_ida, &id)) {
1790 spin_unlock_irq(&gcwq->lock); 1713 spin_unlock_irq(&pool->lock);
1791 if (!ida_pre_get(&pool->worker_ida, GFP_KERNEL)) 1714 if (!ida_pre_get(&pool->worker_ida, GFP_KERNEL))
1792 goto fail; 1715 goto fail;
1793 spin_lock_irq(&gcwq->lock); 1716 spin_lock_irq(&pool->lock);
1794 } 1717 }
1795 spin_unlock_irq(&gcwq->lock); 1718 spin_unlock_irq(&pool->lock);
1796 1719
1797 worker = alloc_worker(); 1720 worker = alloc_worker();
1798 if (!worker) 1721 if (!worker)
@@ -1801,30 +1724,30 @@ static struct worker *create_worker(struct worker_pool *pool)
1801 worker->pool = pool; 1724 worker->pool = pool;
1802 worker->id = id; 1725 worker->id = id;
1803 1726
1804 if (gcwq->cpu != WORK_CPU_UNBOUND) 1727 if (pool->cpu != WORK_CPU_UNBOUND)
1805 worker->task = kthread_create_on_node(worker_thread, 1728 worker->task = kthread_create_on_node(worker_thread,
1806 worker, cpu_to_node(gcwq->cpu), 1729 worker, cpu_to_node(pool->cpu),
1807 "kworker/%u:%d%s", gcwq->cpu, id, pri); 1730 "kworker/%u:%d%s", pool->cpu, id, pri);
1808 else 1731 else
1809 worker->task = kthread_create(worker_thread, worker, 1732 worker->task = kthread_create(worker_thread, worker,
1810 "kworker/u:%d%s", id, pri); 1733 "kworker/u:%d%s", id, pri);
1811 if (IS_ERR(worker->task)) 1734 if (IS_ERR(worker->task))
1812 goto fail; 1735 goto fail;
1813 1736
1814 if (worker_pool_pri(pool)) 1737 if (std_worker_pool_pri(pool))
1815 set_user_nice(worker->task, HIGHPRI_NICE_LEVEL); 1738 set_user_nice(worker->task, HIGHPRI_NICE_LEVEL);
1816 1739
1817 /* 1740 /*
1818 * Determine CPU binding of the new worker depending on 1741 * Determine CPU binding of the new worker depending on
1819 * %GCWQ_DISASSOCIATED. The caller is responsible for ensuring the 1742 * %POOL_DISASSOCIATED. The caller is responsible for ensuring the
1820 * flag remains stable across this function. See the comments 1743 * flag remains stable across this function. See the comments
1821 * above the flag definition for details. 1744 * above the flag definition for details.
1822 * 1745 *
1823 * As an unbound worker may later become a regular one if CPU comes 1746 * As an unbound worker may later become a regular one if CPU comes
1824 * online, make sure every worker has %PF_THREAD_BOUND set. 1747 * online, make sure every worker has %PF_THREAD_BOUND set.
1825 */ 1748 */
1826 if (!(gcwq->flags & GCWQ_DISASSOCIATED)) { 1749 if (!(pool->flags & POOL_DISASSOCIATED)) {
1827 kthread_bind(worker->task, gcwq->cpu); 1750 kthread_bind(worker->task, pool->cpu);
1828 } else { 1751 } else {
1829 worker->task->flags |= PF_THREAD_BOUND; 1752 worker->task->flags |= PF_THREAD_BOUND;
1830 worker->flags |= WORKER_UNBOUND; 1753 worker->flags |= WORKER_UNBOUND;
@@ -1833,9 +1756,9 @@ static struct worker *create_worker(struct worker_pool *pool)
1833 return worker; 1756 return worker;
1834fail: 1757fail:
1835 if (id >= 0) { 1758 if (id >= 0) {
1836 spin_lock_irq(&gcwq->lock); 1759 spin_lock_irq(&pool->lock);
1837 ida_remove(&pool->worker_ida, id); 1760 ida_remove(&pool->worker_ida, id);
1838 spin_unlock_irq(&gcwq->lock); 1761 spin_unlock_irq(&pool->lock);
1839 } 1762 }
1840 kfree(worker); 1763 kfree(worker);
1841 return NULL; 1764 return NULL;
@@ -1845,10 +1768,10 @@ fail:
1845 * start_worker - start a newly created worker 1768 * start_worker - start a newly created worker
1846 * @worker: worker to start 1769 * @worker: worker to start
1847 * 1770 *
1848 * Make the gcwq aware of @worker and start it. 1771 * Make the pool aware of @worker and start it.
1849 * 1772 *
1850 * CONTEXT: 1773 * CONTEXT:
1851 * spin_lock_irq(gcwq->lock). 1774 * spin_lock_irq(pool->lock).
1852 */ 1775 */
1853static void start_worker(struct worker *worker) 1776static void start_worker(struct worker *worker)
1854{ 1777{
@@ -1862,15 +1785,14 @@ static void start_worker(struct worker *worker)
1862 * destroy_worker - destroy a workqueue worker 1785 * destroy_worker - destroy a workqueue worker
1863 * @worker: worker to be destroyed 1786 * @worker: worker to be destroyed
1864 * 1787 *
1865 * Destroy @worker and adjust @gcwq stats accordingly. 1788 * Destroy @worker and adjust @pool stats accordingly.
1866 * 1789 *
1867 * CONTEXT: 1790 * CONTEXT:
1868 * spin_lock_irq(gcwq->lock) which is released and regrabbed. 1791 * spin_lock_irq(pool->lock) which is released and regrabbed.
1869 */ 1792 */
1870static void destroy_worker(struct worker *worker) 1793static void destroy_worker(struct worker *worker)
1871{ 1794{
1872 struct worker_pool *pool = worker->pool; 1795 struct worker_pool *pool = worker->pool;
1873 struct global_cwq *gcwq = pool->gcwq;
1874 int id = worker->id; 1796 int id = worker->id;
1875 1797
1876 /* sanity check frenzy */ 1798 /* sanity check frenzy */
@@ -1885,21 +1807,20 @@ static void destroy_worker(struct worker *worker)
1885 list_del_init(&worker->entry); 1807 list_del_init(&worker->entry);
1886 worker->flags |= WORKER_DIE; 1808 worker->flags |= WORKER_DIE;
1887 1809
1888 spin_unlock_irq(&gcwq->lock); 1810 spin_unlock_irq(&pool->lock);
1889 1811
1890 kthread_stop(worker->task); 1812 kthread_stop(worker->task);
1891 kfree(worker); 1813 kfree(worker);
1892 1814
1893 spin_lock_irq(&gcwq->lock); 1815 spin_lock_irq(&pool->lock);
1894 ida_remove(&pool->worker_ida, id); 1816 ida_remove(&pool->worker_ida, id);
1895} 1817}
1896 1818
1897static void idle_worker_timeout(unsigned long __pool) 1819static void idle_worker_timeout(unsigned long __pool)
1898{ 1820{
1899 struct worker_pool *pool = (void *)__pool; 1821 struct worker_pool *pool = (void *)__pool;
1900 struct global_cwq *gcwq = pool->gcwq;
1901 1822
1902 spin_lock_irq(&gcwq->lock); 1823 spin_lock_irq(&pool->lock);
1903 1824
1904 if (too_many_workers(pool)) { 1825 if (too_many_workers(pool)) {
1905 struct worker *worker; 1826 struct worker *worker;
@@ -1918,20 +1839,20 @@ static void idle_worker_timeout(unsigned long __pool)
1918 } 1839 }
1919 } 1840 }
1920 1841
1921 spin_unlock_irq(&gcwq->lock); 1842 spin_unlock_irq(&pool->lock);
1922} 1843}
1923 1844
1924static bool send_mayday(struct work_struct *work) 1845static bool send_mayday(struct work_struct *work)
1925{ 1846{
1926 struct cpu_workqueue_struct *cwq = get_work_cwq(work); 1847 struct pool_workqueue *pwq = get_work_pwq(work);
1927 struct workqueue_struct *wq = cwq->wq; 1848 struct workqueue_struct *wq = pwq->wq;
1928 unsigned int cpu; 1849 unsigned int cpu;
1929 1850
1930 if (!(wq->flags & WQ_RESCUER)) 1851 if (!(wq->flags & WQ_RESCUER))
1931 return false; 1852 return false;
1932 1853
1933 /* mayday mayday mayday */ 1854 /* mayday mayday mayday */
1934 cpu = cwq->pool->gcwq->cpu; 1855 cpu = pwq->pool->cpu;
1935 /* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */ 1856 /* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */
1936 if (cpu == WORK_CPU_UNBOUND) 1857 if (cpu == WORK_CPU_UNBOUND)
1937 cpu = 0; 1858 cpu = 0;
@@ -1940,13 +1861,12 @@ static bool send_mayday(struct work_struct *work)
1940 return true; 1861 return true;
1941} 1862}
1942 1863
1943static void gcwq_mayday_timeout(unsigned long __pool) 1864static void pool_mayday_timeout(unsigned long __pool)
1944{ 1865{
1945 struct worker_pool *pool = (void *)__pool; 1866 struct worker_pool *pool = (void *)__pool;
1946 struct global_cwq *gcwq = pool->gcwq;
1947 struct work_struct *work; 1867 struct work_struct *work;
1948 1868
1949 spin_lock_irq(&gcwq->lock); 1869 spin_lock_irq(&pool->lock);
1950 1870
1951 if (need_to_create_worker(pool)) { 1871 if (need_to_create_worker(pool)) {
1952 /* 1872 /*
@@ -1959,7 +1879,7 @@ static void gcwq_mayday_timeout(unsigned long __pool)
1959 send_mayday(work); 1879 send_mayday(work);
1960 } 1880 }
1961 1881
1962 spin_unlock_irq(&gcwq->lock); 1882 spin_unlock_irq(&pool->lock);
1963 1883
1964 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL); 1884 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
1965} 1885}
@@ -1978,24 +1898,22 @@ static void gcwq_mayday_timeout(unsigned long __pool)
1978 * may_start_working() true. 1898 * may_start_working() true.
1979 * 1899 *
1980 * LOCKING: 1900 * LOCKING:
1981 * spin_lock_irq(gcwq->lock) which may be released and regrabbed 1901 * spin_lock_irq(pool->lock) which may be released and regrabbed
1982 * multiple times. Does GFP_KERNEL allocations. Called only from 1902 * multiple times. Does GFP_KERNEL allocations. Called only from
1983 * manager. 1903 * manager.
1984 * 1904 *
1985 * RETURNS: 1905 * RETURNS:
1986 * false if no action was taken and gcwq->lock stayed locked, true 1906 * false if no action was taken and pool->lock stayed locked, true
1987 * otherwise. 1907 * otherwise.
1988 */ 1908 */
1989static bool maybe_create_worker(struct worker_pool *pool) 1909static bool maybe_create_worker(struct worker_pool *pool)
1990__releases(&gcwq->lock) 1910__releases(&pool->lock)
1991__acquires(&gcwq->lock) 1911__acquires(&pool->lock)
1992{ 1912{
1993 struct global_cwq *gcwq = pool->gcwq;
1994
1995 if (!need_to_create_worker(pool)) 1913 if (!need_to_create_worker(pool))
1996 return false; 1914 return false;
1997restart: 1915restart:
1998 spin_unlock_irq(&gcwq->lock); 1916 spin_unlock_irq(&pool->lock);
1999 1917
2000 /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */ 1918 /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
2001 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT); 1919 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
@@ -2006,7 +1924,7 @@ restart:
2006 worker = create_worker(pool); 1924 worker = create_worker(pool);
2007 if (worker) { 1925 if (worker) {
2008 del_timer_sync(&pool->mayday_timer); 1926 del_timer_sync(&pool->mayday_timer);
2009 spin_lock_irq(&gcwq->lock); 1927 spin_lock_irq(&pool->lock);
2010 start_worker(worker); 1928 start_worker(worker);
2011 BUG_ON(need_to_create_worker(pool)); 1929 BUG_ON(need_to_create_worker(pool));
2012 return true; 1930 return true;
@@ -2023,7 +1941,7 @@ restart:
2023 } 1941 }
2024 1942
2025 del_timer_sync(&pool->mayday_timer); 1943 del_timer_sync(&pool->mayday_timer);
2026 spin_lock_irq(&gcwq->lock); 1944 spin_lock_irq(&pool->lock);
2027 if (need_to_create_worker(pool)) 1945 if (need_to_create_worker(pool))
2028 goto restart; 1946 goto restart;
2029 return true; 1947 return true;
@@ -2037,11 +1955,11 @@ restart:
2037 * IDLE_WORKER_TIMEOUT. 1955 * IDLE_WORKER_TIMEOUT.
2038 * 1956 *
2039 * LOCKING: 1957 * LOCKING:
2040 * spin_lock_irq(gcwq->lock) which may be released and regrabbed 1958 * spin_lock_irq(pool->lock) which may be released and regrabbed
2041 * multiple times. Called only from manager. 1959 * multiple times. Called only from manager.
2042 * 1960 *
2043 * RETURNS: 1961 * RETURNS:
2044 * false if no action was taken and gcwq->lock stayed locked, true 1962 * false if no action was taken and pool->lock stayed locked, true
2045 * otherwise. 1963 * otherwise.
2046 */ 1964 */
2047static bool maybe_destroy_workers(struct worker_pool *pool) 1965static bool maybe_destroy_workers(struct worker_pool *pool)
@@ -2071,21 +1989,21 @@ static bool maybe_destroy_workers(struct worker_pool *pool)
2071 * manage_workers - manage worker pool 1989 * manage_workers - manage worker pool
2072 * @worker: self 1990 * @worker: self
2073 * 1991 *
2074 * Assume the manager role and manage gcwq worker pool @worker belongs 1992 * Assume the manager role and manage the worker pool @worker belongs
2075 * to. At any given time, there can be only zero or one manager per 1993 * to. At any given time, there can be only zero or one manager per
2076 * gcwq. The exclusion is handled automatically by this function. 1994 * pool. The exclusion is handled automatically by this function.
2077 * 1995 *
2078 * The caller can safely start processing works on false return. On 1996 * The caller can safely start processing works on false return. On
2079 * true return, it's guaranteed that need_to_create_worker() is false 1997 * true return, it's guaranteed that need_to_create_worker() is false
2080 * and may_start_working() is true. 1998 * and may_start_working() is true.
2081 * 1999 *
2082 * CONTEXT: 2000 * CONTEXT:
2083 * spin_lock_irq(gcwq->lock) which may be released and regrabbed 2001 * spin_lock_irq(pool->lock) which may be released and regrabbed
2084 * multiple times. Does GFP_KERNEL allocations. 2002 * multiple times. Does GFP_KERNEL allocations.
2085 * 2003 *
2086 * RETURNS: 2004 * RETURNS:
2087 * false if no action was taken and gcwq->lock stayed locked, true if 2005 * spin_lock_irq(pool->lock) which may be released and regrabbed
2088 * some action was taken. 2006 * multiple times. Does GFP_KERNEL allocations.
2089 */ 2007 */
2090static bool manage_workers(struct worker *worker) 2008static bool manage_workers(struct worker *worker)
2091{ 2009{
@@ -2107,20 +2025,20 @@ static bool manage_workers(struct worker *worker)
2107 * manager against CPU hotplug. 2025 * manager against CPU hotplug.
2108 * 2026 *
2109 * assoc_mutex would always be free unless CPU hotplug is in 2027 * assoc_mutex would always be free unless CPU hotplug is in
2110 * progress. trylock first without dropping @gcwq->lock. 2028 * progress. trylock first without dropping @pool->lock.
2111 */ 2029 */
2112 if (unlikely(!mutex_trylock(&pool->assoc_mutex))) { 2030 if (unlikely(!mutex_trylock(&pool->assoc_mutex))) {
2113 spin_unlock_irq(&pool->gcwq->lock); 2031 spin_unlock_irq(&pool->lock);
2114 mutex_lock(&pool->assoc_mutex); 2032 mutex_lock(&pool->assoc_mutex);
2115 /* 2033 /*
2116 * CPU hotplug could have happened while we were waiting 2034 * CPU hotplug could have happened while we were waiting
2117 * for assoc_mutex. Hotplug itself can't handle us 2035 * for assoc_mutex. Hotplug itself can't handle us
2118 * because manager isn't either on idle or busy list, and 2036 * because manager isn't either on idle or busy list, and
2119 * @gcwq's state and ours could have deviated. 2037 * @pool's state and ours could have deviated.
2120 * 2038 *
2121 * As hotplug is now excluded via assoc_mutex, we can 2039 * As hotplug is now excluded via assoc_mutex, we can
2122 * simply try to bind. It will succeed or fail depending 2040 * simply try to bind. It will succeed or fail depending
2123 * on @gcwq's current state. Try it and adjust 2041 * on @pool's current state. Try it and adjust
2124 * %WORKER_UNBOUND accordingly. 2042 * %WORKER_UNBOUND accordingly.
2125 */ 2043 */
2126 if (worker_maybe_bind_and_lock(worker)) 2044 if (worker_maybe_bind_and_lock(worker))
@@ -2157,18 +2075,15 @@ static bool manage_workers(struct worker *worker)
2157 * call this function to process a work. 2075 * call this function to process a work.
2158 * 2076 *
2159 * CONTEXT: 2077 * CONTEXT:
2160 * spin_lock_irq(gcwq->lock) which is released and regrabbed. 2078 * spin_lock_irq(pool->lock) which is released and regrabbed.
2161 */ 2079 */
2162static void process_one_work(struct worker *worker, struct work_struct *work) 2080static void process_one_work(struct worker *worker, struct work_struct *work)
2163__releases(&gcwq->lock) 2081__releases(&pool->lock)
2164__acquires(&gcwq->lock) 2082__acquires(&pool->lock)
2165{ 2083{
2166 struct cpu_workqueue_struct *cwq = get_work_cwq(work); 2084 struct pool_workqueue *pwq = get_work_pwq(work);
2167 struct worker_pool *pool = worker->pool; 2085 struct worker_pool *pool = worker->pool;
2168 struct global_cwq *gcwq = pool->gcwq; 2086 bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
2169 struct hlist_head *bwh = busy_worker_head(gcwq, work);
2170 bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
2171 work_func_t f = work->func;
2172 int work_color; 2087 int work_color;
2173 struct worker *collision; 2088 struct worker *collision;
2174#ifdef CONFIG_LOCKDEP 2089#ifdef CONFIG_LOCKDEP
@@ -2186,11 +2101,11 @@ __acquires(&gcwq->lock)
2186 /* 2101 /*
2187 * Ensure we're on the correct CPU. DISASSOCIATED test is 2102 * Ensure we're on the correct CPU. DISASSOCIATED test is
2188 * necessary to avoid spurious warnings from rescuers servicing the 2103 * necessary to avoid spurious warnings from rescuers servicing the
2189 * unbound or a disassociated gcwq. 2104 * unbound or a disassociated pool.
2190 */ 2105 */
2191 WARN_ON_ONCE(!(worker->flags & WORKER_UNBOUND) && 2106 WARN_ON_ONCE(!(worker->flags & WORKER_UNBOUND) &&
2192 !(gcwq->flags & GCWQ_DISASSOCIATED) && 2107 !(pool->flags & POOL_DISASSOCIATED) &&
2193 raw_smp_processor_id() != gcwq->cpu); 2108 raw_smp_processor_id() != pool->cpu);
2194 2109
2195 /* 2110 /*
2196 * A single work shouldn't be executed concurrently by 2111 * A single work shouldn't be executed concurrently by
@@ -2198,7 +2113,7 @@ __acquires(&gcwq->lock)
2198 * already processing the work. If so, defer the work to the 2113 * already processing the work. If so, defer the work to the
2199 * currently executing one. 2114 * currently executing one.
2200 */ 2115 */
2201 collision = __find_worker_executing_work(gcwq, bwh, work); 2116 collision = find_worker_executing_work(pool, work);
2202 if (unlikely(collision)) { 2117 if (unlikely(collision)) {
2203 move_linked_works(work, &collision->scheduled, NULL); 2118 move_linked_works(work, &collision->scheduled, NULL);
2204 return; 2119 return;
@@ -2206,9 +2121,10 @@ __acquires(&gcwq->lock)
2206 2121
2207 /* claim and dequeue */ 2122 /* claim and dequeue */
2208 debug_work_deactivate(work); 2123 debug_work_deactivate(work);
2209 hlist_add_head(&worker->hentry, bwh); 2124 hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
2210 worker->current_work = work; 2125 worker->current_work = work;
2211 worker->current_cwq = cwq; 2126 worker->current_func = work->func;
2127 worker->current_pwq = pwq;
2212 work_color = get_work_color(work); 2128 work_color = get_work_color(work);
2213 2129
2214 list_del_init(&work->entry); 2130 list_del_init(&work->entry);
@@ -2221,53 +2137,55 @@ __acquires(&gcwq->lock)
2221 worker_set_flags(worker, WORKER_CPU_INTENSIVE, true); 2137 worker_set_flags(worker, WORKER_CPU_INTENSIVE, true);
2222 2138
2223 /* 2139 /*
2224 * Unbound gcwq isn't concurrency managed and work items should be 2140 * Unbound pool isn't concurrency managed and work items should be
2225 * executed ASAP. Wake up another worker if necessary. 2141 * executed ASAP. Wake up another worker if necessary.
2226 */ 2142 */
2227 if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool)) 2143 if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool))
2228 wake_up_worker(pool); 2144 wake_up_worker(pool);
2229 2145
2230 /* 2146 /*
2231 * Record the last CPU and clear PENDING which should be the last 2147 * Record the last pool and clear PENDING which should be the last
2232 * update to @work. Also, do this inside @gcwq->lock so that 2148 * update to @work. Also, do this inside @pool->lock so that
2233 * PENDING and queued state changes happen together while IRQ is 2149 * PENDING and queued state changes happen together while IRQ is
2234 * disabled. 2150 * disabled.
2235 */ 2151 */
2236 set_work_cpu_and_clear_pending(work, gcwq->cpu); 2152 set_work_pool_and_clear_pending(work, pool->id);
2237 2153
2238 spin_unlock_irq(&gcwq->lock); 2154 spin_unlock_irq(&pool->lock);
2239 2155
2240 lock_map_acquire_read(&cwq->wq->lockdep_map); 2156 lock_map_acquire_read(&pwq->wq->lockdep_map);
2241 lock_map_acquire(&lockdep_map); 2157 lock_map_acquire(&lockdep_map);
2242 trace_workqueue_execute_start(work); 2158 trace_workqueue_execute_start(work);
2243 f(work); 2159 worker->current_func(work);
2244 /* 2160 /*
2245 * While we must be careful to not use "work" after this, the trace 2161 * While we must be careful to not use "work" after this, the trace
2246 * point will only record its address. 2162 * point will only record its address.
2247 */ 2163 */
2248 trace_workqueue_execute_end(work); 2164 trace_workqueue_execute_end(work);
2249 lock_map_release(&lockdep_map); 2165 lock_map_release(&lockdep_map);
2250 lock_map_release(&cwq->wq->lockdep_map); 2166 lock_map_release(&pwq->wq->lockdep_map);
2251 2167
2252 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { 2168 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
2253 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n" 2169 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2254 " last function: %pf\n", 2170 " last function: %pf\n",
2255 current->comm, preempt_count(), task_pid_nr(current), f); 2171 current->comm, preempt_count(), task_pid_nr(current),
2172 worker->current_func);
2256 debug_show_held_locks(current); 2173 debug_show_held_locks(current);
2257 dump_stack(); 2174 dump_stack();
2258 } 2175 }
2259 2176
2260 spin_lock_irq(&gcwq->lock); 2177 spin_lock_irq(&pool->lock);
2261 2178
2262 /* clear cpu intensive status */ 2179 /* clear cpu intensive status */
2263 if (unlikely(cpu_intensive)) 2180 if (unlikely(cpu_intensive))
2264 worker_clr_flags(worker, WORKER_CPU_INTENSIVE); 2181 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
2265 2182
2266 /* we're done with it, release */ 2183 /* we're done with it, release */
2267 hlist_del_init(&worker->hentry); 2184 hash_del(&worker->hentry);
2268 worker->current_work = NULL; 2185 worker->current_work = NULL;
2269 worker->current_cwq = NULL; 2186 worker->current_func = NULL;
2270 cwq_dec_nr_in_flight(cwq, work_color); 2187 worker->current_pwq = NULL;
2188 pwq_dec_nr_in_flight(pwq, work_color);
2271} 2189}
2272 2190
2273/** 2191/**
@@ -2279,7 +2197,7 @@ __acquires(&gcwq->lock)
2279 * fetches a work from the top and executes it. 2197 * fetches a work from the top and executes it.
2280 * 2198 *
2281 * CONTEXT: 2199 * CONTEXT:
2282 * spin_lock_irq(gcwq->lock) which may be released and regrabbed 2200 * spin_lock_irq(pool->lock) which may be released and regrabbed
2283 * multiple times. 2201 * multiple times.
2284 */ 2202 */
2285static void process_scheduled_works(struct worker *worker) 2203static void process_scheduled_works(struct worker *worker)
@@ -2295,8 +2213,8 @@ static void process_scheduled_works(struct worker *worker)
2295 * worker_thread - the worker thread function 2213 * worker_thread - the worker thread function
2296 * @__worker: self 2214 * @__worker: self
2297 * 2215 *
2298 * The gcwq worker thread function. There's a single dynamic pool of 2216 * The worker thread function. There are NR_CPU_WORKER_POOLS dynamic pools
2299 * these per each cpu. These workers process all works regardless of 2217 * of these per each cpu. These workers process all works regardless of
2300 * their specific target workqueue. The only exception is works which 2218 * their specific target workqueue. The only exception is works which
2301 * belong to workqueues with a rescuer which will be explained in 2219 * belong to workqueues with a rescuer which will be explained in
2302 * rescuer_thread(). 2220 * rescuer_thread().
@@ -2305,16 +2223,15 @@ static int worker_thread(void *__worker)
2305{ 2223{
2306 struct worker *worker = __worker; 2224 struct worker *worker = __worker;
2307 struct worker_pool *pool = worker->pool; 2225 struct worker_pool *pool = worker->pool;
2308 struct global_cwq *gcwq = pool->gcwq;
2309 2226
2310 /* tell the scheduler that this is a workqueue worker */ 2227 /* tell the scheduler that this is a workqueue worker */
2311 worker->task->flags |= PF_WQ_WORKER; 2228 worker->task->flags |= PF_WQ_WORKER;
2312woke_up: 2229woke_up:
2313 spin_lock_irq(&gcwq->lock); 2230 spin_lock_irq(&pool->lock);
2314 2231
2315 /* we are off idle list if destruction or rebind is requested */ 2232 /* we are off idle list if destruction or rebind is requested */
2316 if (unlikely(list_empty(&worker->entry))) { 2233 if (unlikely(list_empty(&worker->entry))) {
2317 spin_unlock_irq(&gcwq->lock); 2234 spin_unlock_irq(&pool->lock);
2318 2235
2319 /* if DIE is set, destruction is requested */ 2236 /* if DIE is set, destruction is requested */
2320 if (worker->flags & WORKER_DIE) { 2237 if (worker->flags & WORKER_DIE) {
@@ -2373,52 +2290,59 @@ sleep:
2373 goto recheck; 2290 goto recheck;
2374 2291
2375 /* 2292 /*
2376 * gcwq->lock is held and there's no work to process and no 2293 * pool->lock is held and there's no work to process and no need to
2377 * need to manage, sleep. Workers are woken up only while 2294 * manage, sleep. Workers are woken up only while holding
2378 * holding gcwq->lock or from local cpu, so setting the 2295 * pool->lock or from local cpu, so setting the current state
2379 * current state before releasing gcwq->lock is enough to 2296 * before releasing pool->lock is enough to prevent losing any
2380 * prevent losing any event. 2297 * event.
2381 */ 2298 */
2382 worker_enter_idle(worker); 2299 worker_enter_idle(worker);
2383 __set_current_state(TASK_INTERRUPTIBLE); 2300 __set_current_state(TASK_INTERRUPTIBLE);
2384 spin_unlock_irq(&gcwq->lock); 2301 spin_unlock_irq(&pool->lock);
2385 schedule(); 2302 schedule();
2386 goto woke_up; 2303 goto woke_up;
2387} 2304}
2388 2305
2389/** 2306/**
2390 * rescuer_thread - the rescuer thread function 2307 * rescuer_thread - the rescuer thread function
2391 * @__wq: the associated workqueue 2308 * @__rescuer: self
2392 * 2309 *
2393 * Workqueue rescuer thread function. There's one rescuer for each 2310 * Workqueue rescuer thread function. There's one rescuer for each
2394 * workqueue which has WQ_RESCUER set. 2311 * workqueue which has WQ_RESCUER set.
2395 * 2312 *
2396 * Regular work processing on a gcwq may block trying to create a new 2313 * Regular work processing on a pool may block trying to create a new
2397 * worker which uses GFP_KERNEL allocation which has slight chance of 2314 * worker which uses GFP_KERNEL allocation which has slight chance of
2398 * developing into deadlock if some works currently on the same queue 2315 * developing into deadlock if some works currently on the same queue
2399 * need to be processed to satisfy the GFP_KERNEL allocation. This is 2316 * need to be processed to satisfy the GFP_KERNEL allocation. This is
2400 * the problem rescuer solves. 2317 * the problem rescuer solves.
2401 * 2318 *
2402 * When such condition is possible, the gcwq summons rescuers of all 2319 * When such condition is possible, the pool summons rescuers of all
2403 * workqueues which have works queued on the gcwq and let them process 2320 * workqueues which have works queued on the pool and let them process
2404 * those works so that forward progress can be guaranteed. 2321 * those works so that forward progress can be guaranteed.
2405 * 2322 *
2406 * This should happen rarely. 2323 * This should happen rarely.
2407 */ 2324 */
2408static int rescuer_thread(void *__wq) 2325static int rescuer_thread(void *__rescuer)
2409{ 2326{
2410 struct workqueue_struct *wq = __wq; 2327 struct worker *rescuer = __rescuer;
2411 struct worker *rescuer = wq->rescuer; 2328 struct workqueue_struct *wq = rescuer->rescue_wq;
2412 struct list_head *scheduled = &rescuer->scheduled; 2329 struct list_head *scheduled = &rescuer->scheduled;
2413 bool is_unbound = wq->flags & WQ_UNBOUND; 2330 bool is_unbound = wq->flags & WQ_UNBOUND;
2414 unsigned int cpu; 2331 unsigned int cpu;
2415 2332
2416 set_user_nice(current, RESCUER_NICE_LEVEL); 2333 set_user_nice(current, RESCUER_NICE_LEVEL);
2334
2335 /*
2336 * Mark rescuer as worker too. As WORKER_PREP is never cleared, it
2337 * doesn't participate in concurrency management.
2338 */
2339 rescuer->task->flags |= PF_WQ_WORKER;
2417repeat: 2340repeat:
2418 set_current_state(TASK_INTERRUPTIBLE); 2341 set_current_state(TASK_INTERRUPTIBLE);
2419 2342
2420 if (kthread_should_stop()) { 2343 if (kthread_should_stop()) {
2421 __set_current_state(TASK_RUNNING); 2344 __set_current_state(TASK_RUNNING);
2345 rescuer->task->flags &= ~PF_WQ_WORKER;
2422 return 0; 2346 return 0;
2423 } 2347 }
2424 2348
@@ -2428,9 +2352,8 @@ repeat:
2428 */ 2352 */
2429 for_each_mayday_cpu(cpu, wq->mayday_mask) { 2353 for_each_mayday_cpu(cpu, wq->mayday_mask) {
2430 unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu; 2354 unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu;
2431 struct cpu_workqueue_struct *cwq = get_cwq(tcpu, wq); 2355 struct pool_workqueue *pwq = get_pwq(tcpu, wq);
2432 struct worker_pool *pool = cwq->pool; 2356 struct worker_pool *pool = pwq->pool;
2433 struct global_cwq *gcwq = pool->gcwq;
2434 struct work_struct *work, *n; 2357 struct work_struct *work, *n;
2435 2358
2436 __set_current_state(TASK_RUNNING); 2359 __set_current_state(TASK_RUNNING);
@@ -2446,22 +2369,24 @@ repeat:
2446 */ 2369 */
2447 BUG_ON(!list_empty(&rescuer->scheduled)); 2370 BUG_ON(!list_empty(&rescuer->scheduled));
2448 list_for_each_entry_safe(work, n, &pool->worklist, entry) 2371 list_for_each_entry_safe(work, n, &pool->worklist, entry)
2449 if (get_work_cwq(work) == cwq) 2372 if (get_work_pwq(work) == pwq)
2450 move_linked_works(work, scheduled, &n); 2373 move_linked_works(work, scheduled, &n);
2451 2374
2452 process_scheduled_works(rescuer); 2375 process_scheduled_works(rescuer);
2453 2376
2454 /* 2377 /*
2455 * Leave this gcwq. If keep_working() is %true, notify a 2378 * Leave this pool. If keep_working() is %true, notify a
2456 * regular worker; otherwise, we end up with 0 concurrency 2379 * regular worker; otherwise, we end up with 0 concurrency
2457 * and stalling the execution. 2380 * and stalling the execution.
2458 */ 2381 */
2459 if (keep_working(pool)) 2382 if (keep_working(pool))
2460 wake_up_worker(pool); 2383 wake_up_worker(pool);
2461 2384
2462 spin_unlock_irq(&gcwq->lock); 2385 spin_unlock_irq(&pool->lock);
2463 } 2386 }
2464 2387
2388 /* rescuers should never participate in concurrency management */
2389 WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
2465 schedule(); 2390 schedule();
2466 goto repeat; 2391 goto repeat;
2467} 2392}
@@ -2479,7 +2404,7 @@ static void wq_barrier_func(struct work_struct *work)
2479 2404
2480/** 2405/**
2481 * insert_wq_barrier - insert a barrier work 2406 * insert_wq_barrier - insert a barrier work
2482 * @cwq: cwq to insert barrier into 2407 * @pwq: pwq to insert barrier into
2483 * @barr: wq_barrier to insert 2408 * @barr: wq_barrier to insert
2484 * @target: target work to attach @barr to 2409 * @target: target work to attach @barr to
2485 * @worker: worker currently executing @target, NULL if @target is not executing 2410 * @worker: worker currently executing @target, NULL if @target is not executing
@@ -2496,12 +2421,12 @@ static void wq_barrier_func(struct work_struct *work)
2496 * after a work with LINKED flag set. 2421 * after a work with LINKED flag set.
2497 * 2422 *
2498 * Note that when @worker is non-NULL, @target may be modified 2423 * Note that when @worker is non-NULL, @target may be modified
2499 * underneath us, so we can't reliably determine cwq from @target. 2424 * underneath us, so we can't reliably determine pwq from @target.
2500 * 2425 *
2501 * CONTEXT: 2426 * CONTEXT:
2502 * spin_lock_irq(gcwq->lock). 2427 * spin_lock_irq(pool->lock).
2503 */ 2428 */
2504static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, 2429static void insert_wq_barrier(struct pool_workqueue *pwq,
2505 struct wq_barrier *barr, 2430 struct wq_barrier *barr,
2506 struct work_struct *target, struct worker *worker) 2431 struct work_struct *target, struct worker *worker)
2507{ 2432{
@@ -2509,7 +2434,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
2509 unsigned int linked = 0; 2434 unsigned int linked = 0;
2510 2435
2511 /* 2436 /*
2512 * debugobject calls are safe here even with gcwq->lock locked 2437 * debugobject calls are safe here even with pool->lock locked
2513 * as we know for sure that this will not trigger any of the 2438 * as we know for sure that this will not trigger any of the
2514 * checks and call back into the fixup functions where we 2439 * checks and call back into the fixup functions where we
2515 * might deadlock. 2440 * might deadlock.
@@ -2534,23 +2459,23 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
2534 } 2459 }
2535 2460
2536 debug_work_activate(&barr->work); 2461 debug_work_activate(&barr->work);
2537 insert_work(cwq, &barr->work, head, 2462 insert_work(pwq, &barr->work, head,
2538 work_color_to_flags(WORK_NO_COLOR) | linked); 2463 work_color_to_flags(WORK_NO_COLOR) | linked);
2539} 2464}
2540 2465
2541/** 2466/**
2542 * flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing 2467 * flush_workqueue_prep_pwqs - prepare pwqs for workqueue flushing
2543 * @wq: workqueue being flushed 2468 * @wq: workqueue being flushed
2544 * @flush_color: new flush color, < 0 for no-op 2469 * @flush_color: new flush color, < 0 for no-op
2545 * @work_color: new work color, < 0 for no-op 2470 * @work_color: new work color, < 0 for no-op
2546 * 2471 *
2547 * Prepare cwqs for workqueue flushing. 2472 * Prepare pwqs for workqueue flushing.
2548 * 2473 *
2549 * If @flush_color is non-negative, flush_color on all cwqs should be 2474 * If @flush_color is non-negative, flush_color on all pwqs should be
2550 * -1. If no cwq has in-flight commands at the specified color, all 2475 * -1. If no pwq has in-flight commands at the specified color, all
2551 * cwq->flush_color's stay at -1 and %false is returned. If any cwq 2476 * pwq->flush_color's stay at -1 and %false is returned. If any pwq
2552 * has in flight commands, its cwq->flush_color is set to 2477 * has in flight commands, its pwq->flush_color is set to
2553 * @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq 2478 * @flush_color, @wq->nr_pwqs_to_flush is updated accordingly, pwq
2554 * wakeup logic is armed and %true is returned. 2479 * wakeup logic is armed and %true is returned.
2555 * 2480 *
2556 * The caller should have initialized @wq->first_flusher prior to 2481 * The caller should have initialized @wq->first_flusher prior to
@@ -2558,7 +2483,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
2558 * @flush_color is negative, no flush color update is done and %false 2483 * @flush_color is negative, no flush color update is done and %false
2559 * is returned. 2484 * is returned.
2560 * 2485 *
2561 * If @work_color is non-negative, all cwqs should have the same 2486 * If @work_color is non-negative, all pwqs should have the same
2562 * work_color which is previous to @work_color and all will be 2487 * work_color which is previous to @work_color and all will be
2563 * advanced to @work_color. 2488 * advanced to @work_color.
2564 * 2489 *
@@ -2569,42 +2494,42 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
2569 * %true if @flush_color >= 0 and there's something to flush. %false 2494 * %true if @flush_color >= 0 and there's something to flush. %false
2570 * otherwise. 2495 * otherwise.
2571 */ 2496 */
2572static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq, 2497static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
2573 int flush_color, int work_color) 2498 int flush_color, int work_color)
2574{ 2499{
2575 bool wait = false; 2500 bool wait = false;
2576 unsigned int cpu; 2501 unsigned int cpu;
2577 2502
2578 if (flush_color >= 0) { 2503 if (flush_color >= 0) {
2579 BUG_ON(atomic_read(&wq->nr_cwqs_to_flush)); 2504 BUG_ON(atomic_read(&wq->nr_pwqs_to_flush));
2580 atomic_set(&wq->nr_cwqs_to_flush, 1); 2505 atomic_set(&wq->nr_pwqs_to_flush, 1);
2581 } 2506 }
2582 2507
2583 for_each_cwq_cpu(cpu, wq) { 2508 for_each_pwq_cpu(cpu, wq) {
2584 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 2509 struct pool_workqueue *pwq = get_pwq(cpu, wq);
2585 struct global_cwq *gcwq = cwq->pool->gcwq; 2510 struct worker_pool *pool = pwq->pool;
2586 2511
2587 spin_lock_irq(&gcwq->lock); 2512 spin_lock_irq(&pool->lock);
2588 2513
2589 if (flush_color >= 0) { 2514 if (flush_color >= 0) {
2590 BUG_ON(cwq->flush_color != -1); 2515 BUG_ON(pwq->flush_color != -1);
2591 2516
2592 if (cwq->nr_in_flight[flush_color]) { 2517 if (pwq->nr_in_flight[flush_color]) {
2593 cwq->flush_color = flush_color; 2518 pwq->flush_color = flush_color;
2594 atomic_inc(&wq->nr_cwqs_to_flush); 2519 atomic_inc(&wq->nr_pwqs_to_flush);
2595 wait = true; 2520 wait = true;
2596 } 2521 }
2597 } 2522 }
2598 2523
2599 if (work_color >= 0) { 2524 if (work_color >= 0) {
2600 BUG_ON(work_color != work_next_color(cwq->work_color)); 2525 BUG_ON(work_color != work_next_color(pwq->work_color));
2601 cwq->work_color = work_color; 2526 pwq->work_color = work_color;
2602 } 2527 }
2603 2528
2604 spin_unlock_irq(&gcwq->lock); 2529 spin_unlock_irq(&pool->lock);
2605 } 2530 }
2606 2531
2607 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush)) 2532 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
2608 complete(&wq->first_flusher->done); 2533 complete(&wq->first_flusher->done);
2609 2534
2610 return wait; 2535 return wait;
@@ -2655,7 +2580,7 @@ void flush_workqueue(struct workqueue_struct *wq)
2655 2580
2656 wq->first_flusher = &this_flusher; 2581 wq->first_flusher = &this_flusher;
2657 2582
2658 if (!flush_workqueue_prep_cwqs(wq, wq->flush_color, 2583 if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
2659 wq->work_color)) { 2584 wq->work_color)) {
2660 /* nothing to flush, done */ 2585 /* nothing to flush, done */
2661 wq->flush_color = next_color; 2586 wq->flush_color = next_color;
@@ -2666,7 +2591,7 @@ void flush_workqueue(struct workqueue_struct *wq)
2666 /* wait in queue */ 2591 /* wait in queue */
2667 BUG_ON(wq->flush_color == this_flusher.flush_color); 2592 BUG_ON(wq->flush_color == this_flusher.flush_color);
2668 list_add_tail(&this_flusher.list, &wq->flusher_queue); 2593 list_add_tail(&this_flusher.list, &wq->flusher_queue);
2669 flush_workqueue_prep_cwqs(wq, -1, wq->work_color); 2594 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2670 } 2595 }
2671 } else { 2596 } else {
2672 /* 2597 /*
@@ -2733,7 +2658,7 @@ void flush_workqueue(struct workqueue_struct *wq)
2733 2658
2734 list_splice_tail_init(&wq->flusher_overflow, 2659 list_splice_tail_init(&wq->flusher_overflow,
2735 &wq->flusher_queue); 2660 &wq->flusher_queue);
2736 flush_workqueue_prep_cwqs(wq, -1, wq->work_color); 2661 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2737 } 2662 }
2738 2663
2739 if (list_empty(&wq->flusher_queue)) { 2664 if (list_empty(&wq->flusher_queue)) {
@@ -2743,7 +2668,7 @@ void flush_workqueue(struct workqueue_struct *wq)
2743 2668
2744 /* 2669 /*
2745 * Need to flush more colors. Make the next flusher 2670 * Need to flush more colors. Make the next flusher
2746 * the new first flusher and arm cwqs. 2671 * the new first flusher and arm pwqs.
2747 */ 2672 */
2748 BUG_ON(wq->flush_color == wq->work_color); 2673 BUG_ON(wq->flush_color == wq->work_color);
2749 BUG_ON(wq->flush_color != next->flush_color); 2674 BUG_ON(wq->flush_color != next->flush_color);
@@ -2751,7 +2676,7 @@ void flush_workqueue(struct workqueue_struct *wq)
2751 list_del_init(&next->list); 2676 list_del_init(&next->list);
2752 wq->first_flusher = next; 2677 wq->first_flusher = next;
2753 2678
2754 if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1)) 2679 if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
2755 break; 2680 break;
2756 2681
2757 /* 2682 /*
@@ -2794,13 +2719,13 @@ void drain_workqueue(struct workqueue_struct *wq)
2794reflush: 2719reflush:
2795 flush_workqueue(wq); 2720 flush_workqueue(wq);
2796 2721
2797 for_each_cwq_cpu(cpu, wq) { 2722 for_each_pwq_cpu(cpu, wq) {
2798 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 2723 struct pool_workqueue *pwq = get_pwq(cpu, wq);
2799 bool drained; 2724 bool drained;
2800 2725
2801 spin_lock_irq(&cwq->pool->gcwq->lock); 2726 spin_lock_irq(&pwq->pool->lock);
2802 drained = !cwq->nr_active && list_empty(&cwq->delayed_works); 2727 drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
2803 spin_unlock_irq(&cwq->pool->gcwq->lock); 2728 spin_unlock_irq(&pwq->pool->lock);
2804 2729
2805 if (drained) 2730 if (drained)
2806 continue; 2731 continue;
@@ -2822,34 +2747,29 @@ EXPORT_SYMBOL_GPL(drain_workqueue);
2822static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) 2747static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
2823{ 2748{
2824 struct worker *worker = NULL; 2749 struct worker *worker = NULL;
2825 struct global_cwq *gcwq; 2750 struct worker_pool *pool;
2826 struct cpu_workqueue_struct *cwq; 2751 struct pool_workqueue *pwq;
2827 2752
2828 might_sleep(); 2753 might_sleep();
2829 gcwq = get_work_gcwq(work); 2754 pool = get_work_pool(work);
2830 if (!gcwq) 2755 if (!pool)
2831 return false; 2756 return false;
2832 2757
2833 spin_lock_irq(&gcwq->lock); 2758 spin_lock_irq(&pool->lock);
2834 if (!list_empty(&work->entry)) { 2759 /* see the comment in try_to_grab_pending() with the same code */
2835 /* 2760 pwq = get_work_pwq(work);
2836 * See the comment near try_to_grab_pending()->smp_rmb(). 2761 if (pwq) {
2837 * If it was re-queued to a different gcwq under us, we 2762 if (unlikely(pwq->pool != pool))
2838 * are not going to wait.
2839 */
2840 smp_rmb();
2841 cwq = get_work_cwq(work);
2842 if (unlikely(!cwq || gcwq != cwq->pool->gcwq))
2843 goto already_gone; 2763 goto already_gone;
2844 } else { 2764 } else {
2845 worker = find_worker_executing_work(gcwq, work); 2765 worker = find_worker_executing_work(pool, work);
2846 if (!worker) 2766 if (!worker)
2847 goto already_gone; 2767 goto already_gone;
2848 cwq = worker->current_cwq; 2768 pwq = worker->current_pwq;
2849 } 2769 }
2850 2770
2851 insert_wq_barrier(cwq, barr, work, worker); 2771 insert_wq_barrier(pwq, barr, work, worker);
2852 spin_unlock_irq(&gcwq->lock); 2772 spin_unlock_irq(&pool->lock);
2853 2773
2854 /* 2774 /*
2855 * If @max_active is 1 or rescuer is in use, flushing another work 2775 * If @max_active is 1 or rescuer is in use, flushing another work
@@ -2857,15 +2777,15 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
2857 * flusher is not running on the same workqueue by verifying write 2777 * flusher is not running on the same workqueue by verifying write
2858 * access. 2778 * access.
2859 */ 2779 */
2860 if (cwq->wq->saved_max_active == 1 || cwq->wq->flags & WQ_RESCUER) 2780 if (pwq->wq->saved_max_active == 1 || pwq->wq->flags & WQ_RESCUER)
2861 lock_map_acquire(&cwq->wq->lockdep_map); 2781 lock_map_acquire(&pwq->wq->lockdep_map);
2862 else 2782 else
2863 lock_map_acquire_read(&cwq->wq->lockdep_map); 2783 lock_map_acquire_read(&pwq->wq->lockdep_map);
2864 lock_map_release(&cwq->wq->lockdep_map); 2784 lock_map_release(&pwq->wq->lockdep_map);
2865 2785
2866 return true; 2786 return true;
2867already_gone: 2787already_gone:
2868 spin_unlock_irq(&gcwq->lock); 2788 spin_unlock_irq(&pool->lock);
2869 return false; 2789 return false;
2870} 2790}
2871 2791
@@ -2961,8 +2881,7 @@ bool flush_delayed_work(struct delayed_work *dwork)
2961{ 2881{
2962 local_irq_disable(); 2882 local_irq_disable();
2963 if (del_timer_sync(&dwork->timer)) 2883 if (del_timer_sync(&dwork->timer))
2964 __queue_work(dwork->cpu, 2884 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
2965 get_work_cwq(&dwork->work)->wq, &dwork->work);
2966 local_irq_enable(); 2885 local_irq_enable();
2967 return flush_work(&dwork->work); 2886 return flush_work(&dwork->work);
2968} 2887}
@@ -2992,7 +2911,8 @@ bool cancel_delayed_work(struct delayed_work *dwork)
2992 if (unlikely(ret < 0)) 2911 if (unlikely(ret < 0))
2993 return false; 2912 return false;
2994 2913
2995 set_work_cpu_and_clear_pending(&dwork->work, work_cpu(&dwork->work)); 2914 set_work_pool_and_clear_pending(&dwork->work,
2915 get_work_pool_id(&dwork->work));
2996 local_irq_restore(flags); 2916 local_irq_restore(flags);
2997 return ret; 2917 return ret;
2998} 2918}
@@ -3171,46 +3091,46 @@ int keventd_up(void)
3171 return system_wq != NULL; 3091 return system_wq != NULL;
3172} 3092}
3173 3093
3174static int alloc_cwqs(struct workqueue_struct *wq) 3094static int alloc_pwqs(struct workqueue_struct *wq)
3175{ 3095{
3176 /* 3096 /*
3177 * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS. 3097 * pwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
3178 * Make sure that the alignment isn't lower than that of 3098 * Make sure that the alignment isn't lower than that of
3179 * unsigned long long. 3099 * unsigned long long.
3180 */ 3100 */
3181 const size_t size = sizeof(struct cpu_workqueue_struct); 3101 const size_t size = sizeof(struct pool_workqueue);
3182 const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS, 3102 const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
3183 __alignof__(unsigned long long)); 3103 __alignof__(unsigned long long));
3184 3104
3185 if (!(wq->flags & WQ_UNBOUND)) 3105 if (!(wq->flags & WQ_UNBOUND))
3186 wq->cpu_wq.pcpu = __alloc_percpu(size, align); 3106 wq->pool_wq.pcpu = __alloc_percpu(size, align);
3187 else { 3107 else {
3188 void *ptr; 3108 void *ptr;
3189 3109
3190 /* 3110 /*
3191 * Allocate enough room to align cwq and put an extra 3111 * Allocate enough room to align pwq and put an extra
3192 * pointer at the end pointing back to the originally 3112 * pointer at the end pointing back to the originally
3193 * allocated pointer which will be used for free. 3113 * allocated pointer which will be used for free.
3194 */ 3114 */
3195 ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL); 3115 ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL);
3196 if (ptr) { 3116 if (ptr) {
3197 wq->cpu_wq.single = PTR_ALIGN(ptr, align); 3117 wq->pool_wq.single = PTR_ALIGN(ptr, align);
3198 *(void **)(wq->cpu_wq.single + 1) = ptr; 3118 *(void **)(wq->pool_wq.single + 1) = ptr;
3199 } 3119 }
3200 } 3120 }
3201 3121
3202 /* just in case, make sure it's actually aligned */ 3122 /* just in case, make sure it's actually aligned */
3203 BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align)); 3123 BUG_ON(!IS_ALIGNED(wq->pool_wq.v, align));
3204 return wq->cpu_wq.v ? 0 : -ENOMEM; 3124 return wq->pool_wq.v ? 0 : -ENOMEM;
3205} 3125}
3206 3126
3207static void free_cwqs(struct workqueue_struct *wq) 3127static void free_pwqs(struct workqueue_struct *wq)
3208{ 3128{
3209 if (!(wq->flags & WQ_UNBOUND)) 3129 if (!(wq->flags & WQ_UNBOUND))
3210 free_percpu(wq->cpu_wq.pcpu); 3130 free_percpu(wq->pool_wq.pcpu);
3211 else if (wq->cpu_wq.single) { 3131 else if (wq->pool_wq.single) {
3212 /* the pointer to free is stored right after the cwq */ 3132 /* the pointer to free is stored right after the pwq */
3213 kfree(*(void **)(wq->cpu_wq.single + 1)); 3133 kfree(*(void **)(wq->pool_wq.single + 1));
3214 } 3134 }
3215} 3135}
3216 3136
@@ -3264,27 +3184,25 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
3264 wq->flags = flags; 3184 wq->flags = flags;
3265 wq->saved_max_active = max_active; 3185 wq->saved_max_active = max_active;
3266 mutex_init(&wq->flush_mutex); 3186 mutex_init(&wq->flush_mutex);
3267 atomic_set(&wq->nr_cwqs_to_flush, 0); 3187 atomic_set(&wq->nr_pwqs_to_flush, 0);
3268 INIT_LIST_HEAD(&wq->flusher_queue); 3188 INIT_LIST_HEAD(&wq->flusher_queue);
3269 INIT_LIST_HEAD(&wq->flusher_overflow); 3189 INIT_LIST_HEAD(&wq->flusher_overflow);
3270 3190
3271 lockdep_init_map(&wq->lockdep_map, lock_name, key, 0); 3191 lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
3272 INIT_LIST_HEAD(&wq->list); 3192 INIT_LIST_HEAD(&wq->list);
3273 3193
3274 if (alloc_cwqs(wq) < 0) 3194 if (alloc_pwqs(wq) < 0)
3275 goto err; 3195 goto err;
3276 3196
3277 for_each_cwq_cpu(cpu, wq) { 3197 for_each_pwq_cpu(cpu, wq) {
3278 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 3198 struct pool_workqueue *pwq = get_pwq(cpu, wq);
3279 struct global_cwq *gcwq = get_gcwq(cpu); 3199
3280 int pool_idx = (bool)(flags & WQ_HIGHPRI); 3200 BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
3281 3201 pwq->pool = get_std_worker_pool(cpu, flags & WQ_HIGHPRI);
3282 BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK); 3202 pwq->wq = wq;
3283 cwq->pool = &gcwq->pools[pool_idx]; 3203 pwq->flush_color = -1;
3284 cwq->wq = wq; 3204 pwq->max_active = max_active;
3285 cwq->flush_color = -1; 3205 INIT_LIST_HEAD(&pwq->delayed_works);
3286 cwq->max_active = max_active;
3287 INIT_LIST_HEAD(&cwq->delayed_works);
3288 } 3206 }
3289 3207
3290 if (flags & WQ_RESCUER) { 3208 if (flags & WQ_RESCUER) {
@@ -3297,7 +3215,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
3297 if (!rescuer) 3215 if (!rescuer)
3298 goto err; 3216 goto err;
3299 3217
3300 rescuer->task = kthread_create(rescuer_thread, wq, "%s", 3218 rescuer->rescue_wq = wq;
3219 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s",
3301 wq->name); 3220 wq->name);
3302 if (IS_ERR(rescuer->task)) 3221 if (IS_ERR(rescuer->task))
3303 goto err; 3222 goto err;
@@ -3314,8 +3233,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
3314 spin_lock(&workqueue_lock); 3233 spin_lock(&workqueue_lock);
3315 3234
3316 if (workqueue_freezing && wq->flags & WQ_FREEZABLE) 3235 if (workqueue_freezing && wq->flags & WQ_FREEZABLE)
3317 for_each_cwq_cpu(cpu, wq) 3236 for_each_pwq_cpu(cpu, wq)
3318 get_cwq(cpu, wq)->max_active = 0; 3237 get_pwq(cpu, wq)->max_active = 0;
3319 3238
3320 list_add(&wq->list, &workqueues); 3239 list_add(&wq->list, &workqueues);
3321 3240
@@ -3324,7 +3243,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
3324 return wq; 3243 return wq;
3325err: 3244err:
3326 if (wq) { 3245 if (wq) {
3327 free_cwqs(wq); 3246 free_pwqs(wq);
3328 free_mayday_mask(wq->mayday_mask); 3247 free_mayday_mask(wq->mayday_mask);
3329 kfree(wq->rescuer); 3248 kfree(wq->rescuer);
3330 kfree(wq); 3249 kfree(wq);
@@ -3355,14 +3274,14 @@ void destroy_workqueue(struct workqueue_struct *wq)
3355 spin_unlock(&workqueue_lock); 3274 spin_unlock(&workqueue_lock);
3356 3275
3357 /* sanity check */ 3276 /* sanity check */
3358 for_each_cwq_cpu(cpu, wq) { 3277 for_each_pwq_cpu(cpu, wq) {
3359 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 3278 struct pool_workqueue *pwq = get_pwq(cpu, wq);
3360 int i; 3279 int i;
3361 3280
3362 for (i = 0; i < WORK_NR_COLORS; i++) 3281 for (i = 0; i < WORK_NR_COLORS; i++)
3363 BUG_ON(cwq->nr_in_flight[i]); 3282 BUG_ON(pwq->nr_in_flight[i]);
3364 BUG_ON(cwq->nr_active); 3283 BUG_ON(pwq->nr_active);
3365 BUG_ON(!list_empty(&cwq->delayed_works)); 3284 BUG_ON(!list_empty(&pwq->delayed_works));
3366 } 3285 }
3367 3286
3368 if (wq->flags & WQ_RESCUER) { 3287 if (wq->flags & WQ_RESCUER) {
@@ -3371,29 +3290,29 @@ void destroy_workqueue(struct workqueue_struct *wq)
3371 kfree(wq->rescuer); 3290 kfree(wq->rescuer);
3372 } 3291 }
3373 3292
3374 free_cwqs(wq); 3293 free_pwqs(wq);
3375 kfree(wq); 3294 kfree(wq);
3376} 3295}
3377EXPORT_SYMBOL_GPL(destroy_workqueue); 3296EXPORT_SYMBOL_GPL(destroy_workqueue);
3378 3297
3379/** 3298/**
3380 * cwq_set_max_active - adjust max_active of a cwq 3299 * pwq_set_max_active - adjust max_active of a pwq
3381 * @cwq: target cpu_workqueue_struct 3300 * @pwq: target pool_workqueue
3382 * @max_active: new max_active value. 3301 * @max_active: new max_active value.
3383 * 3302 *
3384 * Set @cwq->max_active to @max_active and activate delayed works if 3303 * Set @pwq->max_active to @max_active and activate delayed works if
3385 * increased. 3304 * increased.
3386 * 3305 *
3387 * CONTEXT: 3306 * CONTEXT:
3388 * spin_lock_irq(gcwq->lock). 3307 * spin_lock_irq(pool->lock).
3389 */ 3308 */
3390static void cwq_set_max_active(struct cpu_workqueue_struct *cwq, int max_active) 3309static void pwq_set_max_active(struct pool_workqueue *pwq, int max_active)
3391{ 3310{
3392 cwq->max_active = max_active; 3311 pwq->max_active = max_active;
3393 3312
3394 while (!list_empty(&cwq->delayed_works) && 3313 while (!list_empty(&pwq->delayed_works) &&
3395 cwq->nr_active < cwq->max_active) 3314 pwq->nr_active < pwq->max_active)
3396 cwq_activate_first_delayed(cwq); 3315 pwq_activate_first_delayed(pwq);
3397} 3316}
3398 3317
3399/** 3318/**
@@ -3416,16 +3335,17 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
3416 3335
3417 wq->saved_max_active = max_active; 3336 wq->saved_max_active = max_active;
3418 3337
3419 for_each_cwq_cpu(cpu, wq) { 3338 for_each_pwq_cpu(cpu, wq) {
3420 struct global_cwq *gcwq = get_gcwq(cpu); 3339 struct pool_workqueue *pwq = get_pwq(cpu, wq);
3340 struct worker_pool *pool = pwq->pool;
3421 3341
3422 spin_lock_irq(&gcwq->lock); 3342 spin_lock_irq(&pool->lock);
3423 3343
3424 if (!(wq->flags & WQ_FREEZABLE) || 3344 if (!(wq->flags & WQ_FREEZABLE) ||
3425 !(gcwq->flags & GCWQ_FREEZING)) 3345 !(pool->flags & POOL_FREEZING))
3426 cwq_set_max_active(get_cwq(gcwq->cpu, wq), max_active); 3346 pwq_set_max_active(pwq, max_active);
3427 3347
3428 spin_unlock_irq(&gcwq->lock); 3348 spin_unlock_irq(&pool->lock);
3429 } 3349 }
3430 3350
3431 spin_unlock(&workqueue_lock); 3351 spin_unlock(&workqueue_lock);
@@ -3446,57 +3366,38 @@ EXPORT_SYMBOL_GPL(workqueue_set_max_active);
3446 */ 3366 */
3447bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq) 3367bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
3448{ 3368{
3449 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 3369 struct pool_workqueue *pwq = get_pwq(cpu, wq);
3450 3370
3451 return !list_empty(&cwq->delayed_works); 3371 return !list_empty(&pwq->delayed_works);
3452} 3372}
3453EXPORT_SYMBOL_GPL(workqueue_congested); 3373EXPORT_SYMBOL_GPL(workqueue_congested);
3454 3374
3455/** 3375/**
3456 * work_cpu - return the last known associated cpu for @work
3457 * @work: the work of interest
3458 *
3459 * RETURNS:
3460 * CPU number if @work was ever queued. WORK_CPU_NONE otherwise.
3461 */
3462unsigned int work_cpu(struct work_struct *work)
3463{
3464 struct global_cwq *gcwq = get_work_gcwq(work);
3465
3466 return gcwq ? gcwq->cpu : WORK_CPU_NONE;
3467}
3468EXPORT_SYMBOL_GPL(work_cpu);
3469
3470/**
3471 * work_busy - test whether a work is currently pending or running 3376 * work_busy - test whether a work is currently pending or running
3472 * @work: the work to be tested 3377 * @work: the work to be tested
3473 * 3378 *
3474 * Test whether @work is currently pending or running. There is no 3379 * Test whether @work is currently pending or running. There is no
3475 * synchronization around this function and the test result is 3380 * synchronization around this function and the test result is
3476 * unreliable and only useful as advisory hints or for debugging. 3381 * unreliable and only useful as advisory hints or for debugging.
3477 * Especially for reentrant wqs, the pending state might hide the
3478 * running state.
3479 * 3382 *
3480 * RETURNS: 3383 * RETURNS:
3481 * OR'd bitmask of WORK_BUSY_* bits. 3384 * OR'd bitmask of WORK_BUSY_* bits.
3482 */ 3385 */
3483unsigned int work_busy(struct work_struct *work) 3386unsigned int work_busy(struct work_struct *work)
3484{ 3387{
3485 struct global_cwq *gcwq = get_work_gcwq(work); 3388 struct worker_pool *pool = get_work_pool(work);
3486 unsigned long flags; 3389 unsigned long flags;
3487 unsigned int ret = 0; 3390 unsigned int ret = 0;
3488 3391
3489 if (!gcwq)
3490 return 0;
3491
3492 spin_lock_irqsave(&gcwq->lock, flags);
3493
3494 if (work_pending(work)) 3392 if (work_pending(work))
3495 ret |= WORK_BUSY_PENDING; 3393 ret |= WORK_BUSY_PENDING;
3496 if (find_worker_executing_work(gcwq, work))
3497 ret |= WORK_BUSY_RUNNING;
3498 3394
3499 spin_unlock_irqrestore(&gcwq->lock, flags); 3395 if (pool) {
3396 spin_lock_irqsave(&pool->lock, flags);
3397 if (find_worker_executing_work(pool, work))
3398 ret |= WORK_BUSY_RUNNING;
3399 spin_unlock_irqrestore(&pool->lock, flags);
3400 }
3500 3401
3501 return ret; 3402 return ret;
3502} 3403}
@@ -3506,65 +3407,49 @@ EXPORT_SYMBOL_GPL(work_busy);
3506 * CPU hotplug. 3407 * CPU hotplug.
3507 * 3408 *
3508 * There are two challenges in supporting CPU hotplug. Firstly, there 3409 * There are two challenges in supporting CPU hotplug. Firstly, there
3509 * are a lot of assumptions on strong associations among work, cwq and 3410 * are a lot of assumptions on strong associations among work, pwq and
3510 * gcwq which make migrating pending and scheduled works very 3411 * pool which make migrating pending and scheduled works very
3511 * difficult to implement without impacting hot paths. Secondly, 3412 * difficult to implement without impacting hot paths. Secondly,
3512 * gcwqs serve mix of short, long and very long running works making 3413 * worker pools serve mix of short, long and very long running works making
3513 * blocked draining impractical. 3414 * blocked draining impractical.
3514 * 3415 *
3515 * This is solved by allowing a gcwq to be disassociated from the CPU 3416 * This is solved by allowing the pools to be disassociated from the CPU
3516 * running as an unbound one and allowing it to be reattached later if the 3417 * running as an unbound one and allowing it to be reattached later if the
3517 * cpu comes back online. 3418 * cpu comes back online.
3518 */ 3419 */
3519 3420
3520/* claim manager positions of all pools */ 3421static void wq_unbind_fn(struct work_struct *work)
3521static void gcwq_claim_assoc_and_lock(struct global_cwq *gcwq)
3522{ 3422{
3523 struct worker_pool *pool; 3423 int cpu = smp_processor_id();
3524
3525 for_each_worker_pool(pool, gcwq)
3526 mutex_lock_nested(&pool->assoc_mutex, pool - gcwq->pools);
3527 spin_lock_irq(&gcwq->lock);
3528}
3529
3530/* release manager positions */
3531static void gcwq_release_assoc_and_unlock(struct global_cwq *gcwq)
3532{
3533 struct worker_pool *pool;
3534
3535 spin_unlock_irq(&gcwq->lock);
3536 for_each_worker_pool(pool, gcwq)
3537 mutex_unlock(&pool->assoc_mutex);
3538}
3539
3540static void gcwq_unbind_fn(struct work_struct *work)
3541{
3542 struct global_cwq *gcwq = get_gcwq(smp_processor_id());
3543 struct worker_pool *pool; 3424 struct worker_pool *pool;
3544 struct worker *worker; 3425 struct worker *worker;
3545 struct hlist_node *pos; 3426 struct hlist_node *pos;
3546 int i; 3427 int i;
3547 3428
3548 BUG_ON(gcwq->cpu != smp_processor_id()); 3429 for_each_std_worker_pool(pool, cpu) {
3430 BUG_ON(cpu != smp_processor_id());
3549 3431
3550 gcwq_claim_assoc_and_lock(gcwq); 3432 mutex_lock(&pool->assoc_mutex);
3433 spin_lock_irq(&pool->lock);
3551 3434
3552 /* 3435 /*
3553 * We've claimed all manager positions. Make all workers unbound 3436 * We've claimed all manager positions. Make all workers
3554 * and set DISASSOCIATED. Before this, all workers except for the 3437 * unbound and set DISASSOCIATED. Before this, all workers
3555 * ones which are still executing works from before the last CPU 3438 * except for the ones which are still executing works from
3556 * down must be on the cpu. After this, they may become diasporas. 3439 * before the last CPU down must be on the cpu. After
3557 */ 3440 * this, they may become diasporas.
3558 for_each_worker_pool(pool, gcwq) 3441 */
3559 list_for_each_entry(worker, &pool->idle_list, entry) 3442 list_for_each_entry(worker, &pool->idle_list, entry)
3560 worker->flags |= WORKER_UNBOUND; 3443 worker->flags |= WORKER_UNBOUND;
3561 3444
3562 for_each_busy_worker(worker, i, pos, gcwq) 3445 for_each_busy_worker(worker, i, pos, pool)
3563 worker->flags |= WORKER_UNBOUND; 3446 worker->flags |= WORKER_UNBOUND;
3564 3447
3565 gcwq->flags |= GCWQ_DISASSOCIATED; 3448 pool->flags |= POOL_DISASSOCIATED;
3566 3449
3567 gcwq_release_assoc_and_unlock(gcwq); 3450 spin_unlock_irq(&pool->lock);
3451 mutex_unlock(&pool->assoc_mutex);
3452 }
3568 3453
3569 /* 3454 /*
3570 * Call schedule() so that we cross rq->lock and thus can guarantee 3455 * Call schedule() so that we cross rq->lock and thus can guarantee
@@ -3576,16 +3461,16 @@ static void gcwq_unbind_fn(struct work_struct *work)
3576 /* 3461 /*
3577 * Sched callbacks are disabled now. Zap nr_running. After this, 3462 * Sched callbacks are disabled now. Zap nr_running. After this,
3578 * nr_running stays zero and need_more_worker() and keep_working() 3463 * nr_running stays zero and need_more_worker() and keep_working()
3579 * are always true as long as the worklist is not empty. @gcwq now 3464 * are always true as long as the worklist is not empty. Pools on
3580 * behaves as unbound (in terms of concurrency management) gcwq 3465 * @cpu now behave as unbound (in terms of concurrency management)
3581 * which is served by workers tied to the CPU. 3466 * pools which are served by workers tied to the CPU.
3582 * 3467 *
3583 * On return from this function, the current worker would trigger 3468 * On return from this function, the current worker would trigger
3584 * unbound chain execution of pending work items if other workers 3469 * unbound chain execution of pending work items if other workers
3585 * didn't already. 3470 * didn't already.
3586 */ 3471 */
3587 for_each_worker_pool(pool, gcwq) 3472 for_each_std_worker_pool(pool, cpu)
3588 atomic_set(get_pool_nr_running(pool), 0); 3473 atomic_set(&pool->nr_running, 0);
3589} 3474}
3590 3475
3591/* 3476/*
@@ -3597,12 +3482,11 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
3597 void *hcpu) 3482 void *hcpu)
3598{ 3483{
3599 unsigned int cpu = (unsigned long)hcpu; 3484 unsigned int cpu = (unsigned long)hcpu;
3600 struct global_cwq *gcwq = get_gcwq(cpu);
3601 struct worker_pool *pool; 3485 struct worker_pool *pool;
3602 3486
3603 switch (action & ~CPU_TASKS_FROZEN) { 3487 switch (action & ~CPU_TASKS_FROZEN) {
3604 case CPU_UP_PREPARE: 3488 case CPU_UP_PREPARE:
3605 for_each_worker_pool(pool, gcwq) { 3489 for_each_std_worker_pool(pool, cpu) {
3606 struct worker *worker; 3490 struct worker *worker;
3607 3491
3608 if (pool->nr_workers) 3492 if (pool->nr_workers)
@@ -3612,18 +3496,24 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
3612 if (!worker) 3496 if (!worker)
3613 return NOTIFY_BAD; 3497 return NOTIFY_BAD;
3614 3498
3615 spin_lock_irq(&gcwq->lock); 3499 spin_lock_irq(&pool->lock);
3616 start_worker(worker); 3500 start_worker(worker);
3617 spin_unlock_irq(&gcwq->lock); 3501 spin_unlock_irq(&pool->lock);
3618 } 3502 }
3619 break; 3503 break;
3620 3504
3621 case CPU_DOWN_FAILED: 3505 case CPU_DOWN_FAILED:
3622 case CPU_ONLINE: 3506 case CPU_ONLINE:
3623 gcwq_claim_assoc_and_lock(gcwq); 3507 for_each_std_worker_pool(pool, cpu) {
3624 gcwq->flags &= ~GCWQ_DISASSOCIATED; 3508 mutex_lock(&pool->assoc_mutex);
3625 rebind_workers(gcwq); 3509 spin_lock_irq(&pool->lock);
3626 gcwq_release_assoc_and_unlock(gcwq); 3510
3511 pool->flags &= ~POOL_DISASSOCIATED;
3512 rebind_workers(pool);
3513
3514 spin_unlock_irq(&pool->lock);
3515 mutex_unlock(&pool->assoc_mutex);
3516 }
3627 break; 3517 break;
3628 } 3518 }
3629 return NOTIFY_OK; 3519 return NOTIFY_OK;
@@ -3643,7 +3533,7 @@ static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,
3643 switch (action & ~CPU_TASKS_FROZEN) { 3533 switch (action & ~CPU_TASKS_FROZEN) {
3644 case CPU_DOWN_PREPARE: 3534 case CPU_DOWN_PREPARE:
3645 /* unbinding should happen on the local CPU */ 3535 /* unbinding should happen on the local CPU */
3646 INIT_WORK_ONSTACK(&unbind_work, gcwq_unbind_fn); 3536 INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
3647 queue_work_on(cpu, system_highpri_wq, &unbind_work); 3537 queue_work_on(cpu, system_highpri_wq, &unbind_work);
3648 flush_work(&unbind_work); 3538 flush_work(&unbind_work);
3649 break; 3539 break;
@@ -3696,10 +3586,10 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
3696 * 3586 *
3697 * Start freezing workqueues. After this function returns, all freezable 3587 * Start freezing workqueues. After this function returns, all freezable
3698 * workqueues will queue new works to their frozen_works list instead of 3588 * workqueues will queue new works to their frozen_works list instead of
3699 * gcwq->worklist. 3589 * pool->worklist.
3700 * 3590 *
3701 * CONTEXT: 3591 * CONTEXT:
3702 * Grabs and releases workqueue_lock and gcwq->lock's. 3592 * Grabs and releases workqueue_lock and pool->lock's.
3703 */ 3593 */
3704void freeze_workqueues_begin(void) 3594void freeze_workqueues_begin(void)
3705{ 3595{
@@ -3710,23 +3600,26 @@ void freeze_workqueues_begin(void)
3710 BUG_ON(workqueue_freezing); 3600 BUG_ON(workqueue_freezing);
3711 workqueue_freezing = true; 3601 workqueue_freezing = true;
3712 3602
3713 for_each_gcwq_cpu(cpu) { 3603 for_each_wq_cpu(cpu) {
3714 struct global_cwq *gcwq = get_gcwq(cpu); 3604 struct worker_pool *pool;
3715 struct workqueue_struct *wq; 3605 struct workqueue_struct *wq;
3716 3606
3717 spin_lock_irq(&gcwq->lock); 3607 for_each_std_worker_pool(pool, cpu) {
3608 spin_lock_irq(&pool->lock);
3718 3609
3719 BUG_ON(gcwq->flags & GCWQ_FREEZING); 3610 WARN_ON_ONCE(pool->flags & POOL_FREEZING);
3720 gcwq->flags |= GCWQ_FREEZING; 3611 pool->flags |= POOL_FREEZING;
3721 3612
3722 list_for_each_entry(wq, &workqueues, list) { 3613 list_for_each_entry(wq, &workqueues, list) {
3723 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 3614 struct pool_workqueue *pwq = get_pwq(cpu, wq);
3724 3615
3725 if (cwq && wq->flags & WQ_FREEZABLE) 3616 if (pwq && pwq->pool == pool &&
3726 cwq->max_active = 0; 3617 (wq->flags & WQ_FREEZABLE))
3727 } 3618 pwq->max_active = 0;
3619 }
3728 3620
3729 spin_unlock_irq(&gcwq->lock); 3621 spin_unlock_irq(&pool->lock);
3622 }
3730 } 3623 }
3731 3624
3732 spin_unlock(&workqueue_lock); 3625 spin_unlock(&workqueue_lock);
@@ -3754,20 +3647,20 @@ bool freeze_workqueues_busy(void)
3754 3647
3755 BUG_ON(!workqueue_freezing); 3648 BUG_ON(!workqueue_freezing);
3756 3649
3757 for_each_gcwq_cpu(cpu) { 3650 for_each_wq_cpu(cpu) {
3758 struct workqueue_struct *wq; 3651 struct workqueue_struct *wq;
3759 /* 3652 /*
3760 * nr_active is monotonically decreasing. It's safe 3653 * nr_active is monotonically decreasing. It's safe
3761 * to peek without lock. 3654 * to peek without lock.
3762 */ 3655 */
3763 list_for_each_entry(wq, &workqueues, list) { 3656 list_for_each_entry(wq, &workqueues, list) {
3764 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 3657 struct pool_workqueue *pwq = get_pwq(cpu, wq);
3765 3658
3766 if (!cwq || !(wq->flags & WQ_FREEZABLE)) 3659 if (!pwq || !(wq->flags & WQ_FREEZABLE))
3767 continue; 3660 continue;
3768 3661
3769 BUG_ON(cwq->nr_active < 0); 3662 BUG_ON(pwq->nr_active < 0);
3770 if (cwq->nr_active) { 3663 if (pwq->nr_active) {
3771 busy = true; 3664 busy = true;
3772 goto out_unlock; 3665 goto out_unlock;
3773 } 3666 }
@@ -3782,10 +3675,10 @@ out_unlock:
3782 * thaw_workqueues - thaw workqueues 3675 * thaw_workqueues - thaw workqueues
3783 * 3676 *
3784 * Thaw workqueues. Normal queueing is restored and all collected 3677 * Thaw workqueues. Normal queueing is restored and all collected
3785 * frozen works are transferred to their respective gcwq worklists. 3678 * frozen works are transferred to their respective pool worklists.
3786 * 3679 *
3787 * CONTEXT: 3680 * CONTEXT:
3788 * Grabs and releases workqueue_lock and gcwq->lock's. 3681 * Grabs and releases workqueue_lock and pool->lock's.
3789 */ 3682 */
3790void thaw_workqueues(void) 3683void thaw_workqueues(void)
3791{ 3684{
@@ -3796,30 +3689,31 @@ void thaw_workqueues(void)
3796 if (!workqueue_freezing) 3689 if (!workqueue_freezing)
3797 goto out_unlock; 3690 goto out_unlock;
3798 3691
3799 for_each_gcwq_cpu(cpu) { 3692 for_each_wq_cpu(cpu) {
3800 struct global_cwq *gcwq = get_gcwq(cpu);
3801 struct worker_pool *pool; 3693 struct worker_pool *pool;
3802 struct workqueue_struct *wq; 3694 struct workqueue_struct *wq;
3803 3695
3804 spin_lock_irq(&gcwq->lock); 3696 for_each_std_worker_pool(pool, cpu) {
3697 spin_lock_irq(&pool->lock);
3805 3698
3806 BUG_ON(!(gcwq->flags & GCWQ_FREEZING)); 3699 WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
3807 gcwq->flags &= ~GCWQ_FREEZING; 3700 pool->flags &= ~POOL_FREEZING;
3808 3701
3809 list_for_each_entry(wq, &workqueues, list) { 3702 list_for_each_entry(wq, &workqueues, list) {
3810 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 3703 struct pool_workqueue *pwq = get_pwq(cpu, wq);
3811 3704
3812 if (!cwq || !(wq->flags & WQ_FREEZABLE)) 3705 if (!pwq || pwq->pool != pool ||
3813 continue; 3706 !(wq->flags & WQ_FREEZABLE))
3707 continue;
3814 3708
3815 /* restore max_active and repopulate worklist */ 3709 /* restore max_active and repopulate worklist */
3816 cwq_set_max_active(cwq, wq->saved_max_active); 3710 pwq_set_max_active(pwq, wq->saved_max_active);
3817 } 3711 }
3818 3712
3819 for_each_worker_pool(pool, gcwq)
3820 wake_up_worker(pool); 3713 wake_up_worker(pool);
3821 3714
3822 spin_unlock_irq(&gcwq->lock); 3715 spin_unlock_irq(&pool->lock);
3716 }
3823 } 3717 }
3824 3718
3825 workqueue_freezing = false; 3719 workqueue_freezing = false;
@@ -3831,60 +3725,56 @@ out_unlock:
3831static int __init init_workqueues(void) 3725static int __init init_workqueues(void)
3832{ 3726{
3833 unsigned int cpu; 3727 unsigned int cpu;
3834 int i;
3835 3728
3836 /* make sure we have enough bits for OFFQ CPU number */ 3729 /* make sure we have enough bits for OFFQ pool ID */
3837 BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT)) < 3730 BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
3838 WORK_CPU_LAST); 3731 WORK_CPU_END * NR_STD_WORKER_POOLS);
3839 3732
3840 cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP); 3733 cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
3841 hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN); 3734 hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
3842 3735
3843 /* initialize gcwqs */ 3736 /* initialize CPU pools */
3844 for_each_gcwq_cpu(cpu) { 3737 for_each_wq_cpu(cpu) {
3845 struct global_cwq *gcwq = get_gcwq(cpu);
3846 struct worker_pool *pool; 3738 struct worker_pool *pool;
3847 3739
3848 spin_lock_init(&gcwq->lock); 3740 for_each_std_worker_pool(pool, cpu) {
3849 gcwq->cpu = cpu; 3741 spin_lock_init(&pool->lock);
3850 gcwq->flags |= GCWQ_DISASSOCIATED; 3742 pool->cpu = cpu;
3851 3743 pool->flags |= POOL_DISASSOCIATED;
3852 for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
3853 INIT_HLIST_HEAD(&gcwq->busy_hash[i]);
3854
3855 for_each_worker_pool(pool, gcwq) {
3856 pool->gcwq = gcwq;
3857 INIT_LIST_HEAD(&pool->worklist); 3744 INIT_LIST_HEAD(&pool->worklist);
3858 INIT_LIST_HEAD(&pool->idle_list); 3745 INIT_LIST_HEAD(&pool->idle_list);
3746 hash_init(pool->busy_hash);
3859 3747
3860 init_timer_deferrable(&pool->idle_timer); 3748 init_timer_deferrable(&pool->idle_timer);
3861 pool->idle_timer.function = idle_worker_timeout; 3749 pool->idle_timer.function = idle_worker_timeout;
3862 pool->idle_timer.data = (unsigned long)pool; 3750 pool->idle_timer.data = (unsigned long)pool;
3863 3751
3864 setup_timer(&pool->mayday_timer, gcwq_mayday_timeout, 3752 setup_timer(&pool->mayday_timer, pool_mayday_timeout,
3865 (unsigned long)pool); 3753 (unsigned long)pool);
3866 3754
3867 mutex_init(&pool->assoc_mutex); 3755 mutex_init(&pool->assoc_mutex);
3868 ida_init(&pool->worker_ida); 3756 ida_init(&pool->worker_ida);
3757
3758 /* alloc pool ID */
3759 BUG_ON(worker_pool_assign_id(pool));
3869 } 3760 }
3870 } 3761 }
3871 3762
3872 /* create the initial worker */ 3763 /* create the initial worker */
3873 for_each_online_gcwq_cpu(cpu) { 3764 for_each_online_wq_cpu(cpu) {
3874 struct global_cwq *gcwq = get_gcwq(cpu);
3875 struct worker_pool *pool; 3765 struct worker_pool *pool;
3876 3766
3877 if (cpu != WORK_CPU_UNBOUND) 3767 for_each_std_worker_pool(pool, cpu) {
3878 gcwq->flags &= ~GCWQ_DISASSOCIATED;
3879
3880 for_each_worker_pool(pool, gcwq) {
3881 struct worker *worker; 3768 struct worker *worker;
3882 3769
3770 if (cpu != WORK_CPU_UNBOUND)
3771 pool->flags &= ~POOL_DISASSOCIATED;
3772
3883 worker = create_worker(pool); 3773 worker = create_worker(pool);
3884 BUG_ON(!worker); 3774 BUG_ON(!worker);
3885 spin_lock_irq(&gcwq->lock); 3775 spin_lock_irq(&pool->lock);
3886 start_worker(worker); 3776 start_worker(worker);
3887 spin_unlock_irq(&gcwq->lock); 3777 spin_unlock_irq(&pool->lock);
3888 } 3778 }
3889 } 3779 }
3890 3780
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
new file mode 100644
index 000000000000..07650264ec15
--- /dev/null
+++ b/kernel/workqueue_internal.h
@@ -0,0 +1,65 @@
1/*
2 * kernel/workqueue_internal.h
3 *
4 * Workqueue internal header file. Only to be included by workqueue and
5 * core kernel subsystems.
6 */
7#ifndef _KERNEL_WORKQUEUE_INTERNAL_H
8#define _KERNEL_WORKQUEUE_INTERNAL_H
9
10#include <linux/workqueue.h>
11#include <linux/kthread.h>
12
13struct worker_pool;
14
15/*
16 * The poor guys doing the actual heavy lifting. All on-duty workers are
17 * either serving the manager role, on idle list or on busy hash. For
18 * details on the locking annotation (L, I, X...), refer to workqueue.c.
19 *
20 * Only to be used in workqueue and async.
21 */
22struct worker {
23 /* on idle list while idle, on busy hash table while busy */
24 union {
25 struct list_head entry; /* L: while idle */
26 struct hlist_node hentry; /* L: while busy */
27 };
28
29 struct work_struct *current_work; /* L: work being processed */
30 work_func_t current_func; /* L: current_work's fn */
31 struct pool_workqueue *current_pwq; /* L: current_work's pwq */
32 struct list_head scheduled; /* L: scheduled works */
33 struct task_struct *task; /* I: worker task */
34 struct worker_pool *pool; /* I: the associated pool */
35 /* 64 bytes boundary on 64bit, 32 on 32bit */
36 unsigned long last_active; /* L: last active timestamp */
37 unsigned int flags; /* X: flags */
38 int id; /* I: worker id */
39
40 /* for rebinding worker to CPU */
41 struct work_struct rebind_work; /* L: for busy worker */
42
43 /* used only by rescuers to point to the target workqueue */
44 struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */
45};
46
47/**
48 * current_wq_worker - return struct worker if %current is a workqueue worker
49 */
50static inline struct worker *current_wq_worker(void)
51{
52 if (current->flags & PF_WQ_WORKER)
53 return kthread_data(current);
54 return NULL;
55}
56
57/*
58 * Scheduler hooks for concurrency managed workqueue. Only to be used from
59 * sched.c and workqueue.c.
60 */
61void wq_worker_waking_up(struct task_struct *task, unsigned int cpu);
62struct task_struct *wq_worker_sleeping(struct task_struct *task,
63 unsigned int cpu);
64
65#endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
diff --git a/kernel/workqueue_sched.h b/kernel/workqueue_sched.h
deleted file mode 100644
index 2d10fc98dc79..000000000000
--- a/kernel/workqueue_sched.h
+++ /dev/null
@@ -1,9 +0,0 @@
1/*
2 * kernel/workqueue_sched.h
3 *
4 * Scheduler hooks for concurrency managed workqueue. Only to be
5 * included from sched.c and workqueue.c.
6 */
7void wq_worker_waking_up(struct task_struct *task, unsigned int cpu);
8struct task_struct *wq_worker_sleeping(struct task_struct *task,
9 unsigned int cpu);