diff options
-rw-r--r-- | Documentation/slow-work.txt | 322 | ||||
-rw-r--r-- | include/linux/slow-work.h | 163 | ||||
-rw-r--r-- | init/Kconfig | 24 | ||||
-rw-r--r-- | kernel/Makefile | 2 | ||||
-rw-r--r-- | kernel/slow-work-debugfs.c | 227 | ||||
-rw-r--r-- | kernel/slow-work.c | 1068 | ||||
-rw-r--r-- | kernel/slow-work.h | 72 | ||||
-rw-r--r-- | kernel/sysctl.c | 8 |
8 files changed, 0 insertions, 1886 deletions
diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt deleted file mode 100644 index 9dbf4470c7e1..000000000000 --- a/Documentation/slow-work.txt +++ /dev/null | |||
@@ -1,322 +0,0 @@ | |||
1 | ==================================== | ||
2 | SLOW WORK ITEM EXECUTION THREAD POOL | ||
3 | ==================================== | ||
4 | |||
5 | By: David Howells <dhowells@redhat.com> | ||
6 | |||
7 | The slow work item execution thread pool is a pool of threads for performing | ||
8 | things that take a relatively long time, such as making mkdir calls. | ||
9 | Typically, when processing something, these items will spend a lot of time | ||
10 | blocking a thread on I/O, thus making that thread unavailable for doing other | ||
11 | work. | ||
12 | |||
13 | The standard workqueue model is unsuitable for this class of work item as that | ||
14 | limits the owner to a single thread or a single thread per CPU. For some | ||
15 | tasks, however, more threads - or fewer - are required. | ||
16 | |||
17 | There is just one pool per system. It contains no threads unless something | ||
18 | wants to use it - and that something must register its interest first. When | ||
19 | the pool is active, the number of threads it contains is dynamic, varying | ||
20 | between a maximum and minimum setting, depending on the load. | ||
21 | |||
22 | |||
23 | ==================== | ||
24 | CLASSES OF WORK ITEM | ||
25 | ==================== | ||
26 | |||
27 | This pool support two classes of work items: | ||
28 | |||
29 | (*) Slow work items. | ||
30 | |||
31 | (*) Very slow work items. | ||
32 | |||
33 | The former are expected to finish much quicker than the latter. | ||
34 | |||
35 | An operation of the very slow class may do a batch combination of several | ||
36 | lookups, mkdirs, and a create for instance. | ||
37 | |||
38 | An operation of the ordinarily slow class may, for example, write stuff or | ||
39 | expand files, provided the time taken to do so isn't too long. | ||
40 | |||
41 | Operations of both types may sleep during execution, thus tying up the thread | ||
42 | loaned to it. | ||
43 | |||
44 | A further class of work item is available, based on the slow work item class: | ||
45 | |||
46 | (*) Delayed slow work items. | ||
47 | |||
48 | These are slow work items that have a timer to defer queueing of the item for | ||
49 | a while. | ||
50 | |||
51 | |||
52 | THREAD-TO-CLASS ALLOCATION | ||
53 | -------------------------- | ||
54 | |||
55 | Not all the threads in the pool are available to work on very slow work items. | ||
56 | The number will be between one and one fewer than the number of active threads. | ||
57 | This is configurable (see the "Pool Configuration" section). | ||
58 | |||
59 | All the threads are available to work on ordinarily slow work items, but a | ||
60 | percentage of the threads will prefer to work on very slow work items. | ||
61 | |||
62 | The configuration ensures that at least one thread will be available to work on | ||
63 | very slow work items, and at least one thread will be available that won't work | ||
64 | on very slow work items at all. | ||
65 | |||
66 | |||
67 | ===================== | ||
68 | USING SLOW WORK ITEMS | ||
69 | ===================== | ||
70 | |||
71 | Firstly, a module or subsystem wanting to make use of slow work items must | ||
72 | register its interest: | ||
73 | |||
74 | int ret = slow_work_register_user(struct module *module); | ||
75 | |||
76 | This will return 0 if successful, or a -ve error upon failure. The module | ||
77 | pointer should be the module interested in using this facility (almost | ||
78 | certainly THIS_MODULE). | ||
79 | |||
80 | |||
81 | Slow work items may then be set up by: | ||
82 | |||
83 | (1) Declaring a slow_work struct type variable: | ||
84 | |||
85 | #include <linux/slow-work.h> | ||
86 | |||
87 | struct slow_work myitem; | ||
88 | |||
89 | (2) Declaring the operations to be used for this item: | ||
90 | |||
91 | struct slow_work_ops myitem_ops = { | ||
92 | .get_ref = myitem_get_ref, | ||
93 | .put_ref = myitem_put_ref, | ||
94 | .execute = myitem_execute, | ||
95 | }; | ||
96 | |||
97 | [*] For a description of the ops, see section "Item Operations". | ||
98 | |||
99 | (3) Initialising the item: | ||
100 | |||
101 | slow_work_init(&myitem, &myitem_ops); | ||
102 | |||
103 | or: | ||
104 | |||
105 | delayed_slow_work_init(&myitem, &myitem_ops); | ||
106 | |||
107 | or: | ||
108 | |||
109 | vslow_work_init(&myitem, &myitem_ops); | ||
110 | |||
111 | depending on its class. | ||
112 | |||
113 | A suitably set up work item can then be enqueued for processing: | ||
114 | |||
115 | int ret = slow_work_enqueue(&myitem); | ||
116 | |||
117 | This will return a -ve error if the thread pool is unable to gain a reference | ||
118 | on the item, 0 otherwise, or (for delayed work): | ||
119 | |||
120 | int ret = delayed_slow_work_enqueue(&myitem, my_jiffy_delay); | ||
121 | |||
122 | |||
123 | The items are reference counted, so there ought to be no need for a flush | ||
124 | operation. But as the reference counting is optional, means to cancel | ||
125 | existing work items are also included: | ||
126 | |||
127 | cancel_slow_work(&myitem); | ||
128 | cancel_delayed_slow_work(&myitem); | ||
129 | |||
130 | can be used to cancel pending work. The above cancel function waits for | ||
131 | existing work to have been executed (or prevent execution of them, depending | ||
132 | on timing). | ||
133 | |||
134 | |||
135 | When all a module's slow work items have been processed, and the | ||
136 | module has no further interest in the facility, it should unregister its | ||
137 | interest: | ||
138 | |||
139 | slow_work_unregister_user(struct module *module); | ||
140 | |||
141 | The module pointer is used to wait for all outstanding work items for that | ||
142 | module before completing the unregistration. This prevents the put_ref() code | ||
143 | from being taken away before it completes. module should almost certainly be | ||
144 | THIS_MODULE. | ||
145 | |||
146 | |||
147 | ================ | ||
148 | HELPER FUNCTIONS | ||
149 | ================ | ||
150 | |||
151 | The slow-work facility provides a function by which it can be determined | ||
152 | whether or not an item is queued for later execution: | ||
153 | |||
154 | bool queued = slow_work_is_queued(struct slow_work *work); | ||
155 | |||
156 | If it returns false, then the item is not on the queue (it may be executing | ||
157 | with a requeue pending). This can be used to work out whether an item on which | ||
158 | another depends is on the queue, thus allowing a dependent item to be queued | ||
159 | after it. | ||
160 | |||
161 | If the above shows an item on which another depends not to be queued, then the | ||
162 | owner of the dependent item might need to wait. However, to avoid locking up | ||
163 | the threads unnecessarily be sleeping in them, it can make sense under some | ||
164 | circumstances to return the work item to the queue, thus deferring it until | ||
165 | some other items have had a chance to make use of the yielded thread. | ||
166 | |||
167 | To yield a thread and defer an item, the work function should simply enqueue | ||
168 | the work item again and return. However, this doesn't work if there's nothing | ||
169 | actually on the queue, as the thread just vacated will jump straight back into | ||
170 | the item's work function, thus busy waiting on a CPU. | ||
171 | |||
172 | Instead, the item should use the thread to wait for the dependency to go away, | ||
173 | but rather than using schedule() or schedule_timeout() to sleep, it should use | ||
174 | the following function: | ||
175 | |||
176 | bool requeue = slow_work_sleep_till_thread_needed( | ||
177 | struct slow_work *work, | ||
178 | signed long *_timeout); | ||
179 | |||
180 | This will add a second wait and then sleep, such that it will be woken up if | ||
181 | either something appears on the queue that could usefully make use of the | ||
182 | thread - and behind which this item can be queued, or if the event the caller | ||
183 | set up to wait for happens. True will be returned if something else appeared | ||
184 | on the queue and this work function should perhaps return, of false if | ||
185 | something else woke it up. The timeout is as for schedule_timeout(). | ||
186 | |||
187 | For example: | ||
188 | |||
189 | wq = bit_waitqueue(&my_flags, MY_BIT); | ||
190 | init_wait(&wait); | ||
191 | requeue = false; | ||
192 | do { | ||
193 | prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); | ||
194 | if (!test_bit(MY_BIT, &my_flags)) | ||
195 | break; | ||
196 | requeue = slow_work_sleep_till_thread_needed(&my_work, | ||
197 | &timeout); | ||
198 | } while (timeout > 0 && !requeue); | ||
199 | finish_wait(wq, &wait); | ||
200 | if (!test_bit(MY_BIT, &my_flags) | ||
201 | goto do_my_thing; | ||
202 | if (requeue) | ||
203 | return; // to slow_work | ||
204 | |||
205 | |||
206 | =============== | ||
207 | ITEM OPERATIONS | ||
208 | =============== | ||
209 | |||
210 | Each work item requires a table of operations of type struct slow_work_ops. | ||
211 | Only ->execute() is required; the getting and putting of a reference and the | ||
212 | describing of an item are all optional. | ||
213 | |||
214 | (*) Get a reference on an item: | ||
215 | |||
216 | int (*get_ref)(struct slow_work *work); | ||
217 | |||
218 | This allows the thread pool to attempt to pin an item by getting a | ||
219 | reference on it. This function should return 0 if the reference was | ||
220 | granted, or a -ve error otherwise. If an error is returned, | ||
221 | slow_work_enqueue() will fail. | ||
222 | |||
223 | The reference is held whilst the item is queued and whilst it is being | ||
224 | executed. The item may then be requeued with the same reference held, or | ||
225 | the reference will be released. | ||
226 | |||
227 | (*) Release a reference on an item: | ||
228 | |||
229 | void (*put_ref)(struct slow_work *work); | ||
230 | |||
231 | This allows the thread pool to unpin an item by releasing the reference on | ||
232 | it. The thread pool will not touch the item again once this has been | ||
233 | called. | ||
234 | |||
235 | (*) Execute an item: | ||
236 | |||
237 | void (*execute)(struct slow_work *work); | ||
238 | |||
239 | This should perform the work required of the item. It may sleep, it may | ||
240 | perform disk I/O and it may wait for locks. | ||
241 | |||
242 | (*) View an item through /proc: | ||
243 | |||
244 | void (*desc)(struct slow_work *work, struct seq_file *m); | ||
245 | |||
246 | If supplied, this should print to 'm' a small string describing the work | ||
247 | the item is to do. This should be no more than about 40 characters, and | ||
248 | shouldn't include a newline character. | ||
249 | |||
250 | See the 'Viewing executing and queued items' section below. | ||
251 | |||
252 | |||
253 | ================== | ||
254 | POOL CONFIGURATION | ||
255 | ================== | ||
256 | |||
257 | The slow-work thread pool has a number of configurables: | ||
258 | |||
259 | (*) /proc/sys/kernel/slow-work/min-threads | ||
260 | |||
261 | The minimum number of threads that should be in the pool whilst it is in | ||
262 | use. This may be anywhere between 2 and max-threads. | ||
263 | |||
264 | (*) /proc/sys/kernel/slow-work/max-threads | ||
265 | |||
266 | The maximum number of threads that should in the pool. This may be | ||
267 | anywhere between min-threads and 255 or NR_CPUS * 2, whichever is greater. | ||
268 | |||
269 | (*) /proc/sys/kernel/slow-work/vslow-percentage | ||
270 | |||
271 | The percentage of active threads in the pool that may be used to execute | ||
272 | very slow work items. This may be between 1 and 99. The resultant number | ||
273 | is bounded to between 1 and one fewer than the number of active threads. | ||
274 | This ensures there is always at least one thread that can process very | ||
275 | slow work items, and always at least one thread that won't. | ||
276 | |||
277 | |||
278 | ================================== | ||
279 | VIEWING EXECUTING AND QUEUED ITEMS | ||
280 | ================================== | ||
281 | |||
282 | If CONFIG_SLOW_WORK_DEBUG is enabled, a debugfs file is made available: | ||
283 | |||
284 | /sys/kernel/debug/slow_work/runqueue | ||
285 | |||
286 | through which the list of work items being executed and the queues of items to | ||
287 | be executed may be viewed. The owner of a work item is given the chance to | ||
288 | add some information of its own. | ||
289 | |||
290 | The contents look something like the following: | ||
291 | |||
292 | THR PID ITEM ADDR FL MARK DESC | ||
293 | === ===== ================ == ===== ========== | ||
294 | 0 3005 ffff880023f52348 a 952ms FSC: OBJ17d3: LOOK | ||
295 | 1 3006 ffff880024e33668 2 160ms FSC: OBJ17e5 OP60d3b: Write1/Store fl=2 | ||
296 | 2 3165 ffff8800296dd180 a 424ms FSC: OBJ17e4: LOOK | ||
297 | 3 4089 ffff8800262c8d78 a 212ms FSC: OBJ17ea: CRTN | ||
298 | 4 4090 ffff88002792bed8 2 388ms FSC: OBJ17e8 OP60d36: Write1/Store fl=2 | ||
299 | 5 4092 ffff88002a0ef308 2 388ms FSC: OBJ17e7 OP60d2e: Write1/Store fl=2 | ||
300 | 6 4094 ffff88002abaf4b8 2 132ms FSC: OBJ17e2 OP60d4e: Write1/Store fl=2 | ||
301 | 7 4095 ffff88002bb188e0 a 388ms FSC: OBJ17e9: CRTN | ||
302 | vsq - ffff880023d99668 1 308ms FSC: OBJ17e0 OP60f91: Write1/EnQ fl=2 | ||
303 | vsq - ffff8800295d1740 1 212ms FSC: OBJ16be OP4d4b6: Write1/EnQ fl=2 | ||
304 | vsq - ffff880025ba3308 1 160ms FSC: OBJ179a OP58dec: Write1/EnQ fl=2 | ||
305 | vsq - ffff880024ec83e0 1 160ms FSC: OBJ17ae OP599f2: Write1/EnQ fl=2 | ||
306 | vsq - ffff880026618e00 1 160ms FSC: OBJ17e6 OP60d33: Write1/EnQ fl=2 | ||
307 | vsq - ffff880025a2a4b8 1 132ms FSC: OBJ16a2 OP4d583: Write1/EnQ fl=2 | ||
308 | vsq - ffff880023cbe6d8 9 212ms FSC: OBJ17eb: LOOK | ||
309 | vsq - ffff880024d37590 9 212ms FSC: OBJ17ec: LOOK | ||
310 | vsq - ffff880027746cb0 9 212ms FSC: OBJ17ed: LOOK | ||
311 | vsq - ffff880024d37ae8 9 212ms FSC: OBJ17ee: LOOK | ||
312 | vsq - ffff880024d37cb0 9 212ms FSC: OBJ17ef: LOOK | ||
313 | vsq - ffff880025036550 9 212ms FSC: OBJ17f0: LOOK | ||
314 | vsq - ffff8800250368e0 9 212ms FSC: OBJ17f1: LOOK | ||
315 | vsq - ffff880025036aa8 9 212ms FSC: OBJ17f2: LOOK | ||
316 | |||
317 | In the 'THR' column, executing items show the thread they're occupying and | ||
318 | queued threads indicate which queue they're on. 'PID' shows the process ID of | ||
319 | a slow-work thread that's executing something. 'FL' shows the work item flags. | ||
320 | 'MARK' indicates how long since an item was queued or began executing. Lastly, | ||
321 | the 'DESC' column permits the owner of an item to give some information. | ||
322 | |||
diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h deleted file mode 100644 index 13337bf6c3f5..000000000000 --- a/include/linux/slow-work.h +++ /dev/null | |||
@@ -1,163 +0,0 @@ | |||
1 | /* Worker thread pool for slow items, such as filesystem lookups or mkdirs | ||
2 | * | ||
3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | * | ||
11 | * See Documentation/slow-work.txt | ||
12 | */ | ||
13 | |||
14 | #ifndef _LINUX_SLOW_WORK_H | ||
15 | #define _LINUX_SLOW_WORK_H | ||
16 | |||
17 | #ifdef CONFIG_SLOW_WORK | ||
18 | |||
19 | #include <linux/sysctl.h> | ||
20 | #include <linux/timer.h> | ||
21 | |||
22 | struct slow_work; | ||
23 | #ifdef CONFIG_SLOW_WORK_DEBUG | ||
24 | struct seq_file; | ||
25 | #endif | ||
26 | |||
27 | /* | ||
28 | * The operations used to support slow work items | ||
29 | */ | ||
30 | struct slow_work_ops { | ||
31 | /* owner */ | ||
32 | struct module *owner; | ||
33 | |||
34 | /* get a ref on a work item | ||
35 | * - return 0 if successful, -ve if not | ||
36 | */ | ||
37 | int (*get_ref)(struct slow_work *work); | ||
38 | |||
39 | /* discard a ref to a work item */ | ||
40 | void (*put_ref)(struct slow_work *work); | ||
41 | |||
42 | /* execute a work item */ | ||
43 | void (*execute)(struct slow_work *work); | ||
44 | |||
45 | #ifdef CONFIG_SLOW_WORK_DEBUG | ||
46 | /* describe a work item for debugfs */ | ||
47 | void (*desc)(struct slow_work *work, struct seq_file *m); | ||
48 | #endif | ||
49 | }; | ||
50 | |||
51 | /* | ||
52 | * A slow work item | ||
53 | * - A reference is held on the parent object by the thread pool when it is | ||
54 | * queued | ||
55 | */ | ||
56 | struct slow_work { | ||
57 | struct module *owner; /* the owning module */ | ||
58 | unsigned long flags; | ||
59 | #define SLOW_WORK_PENDING 0 /* item pending (further) execution */ | ||
60 | #define SLOW_WORK_EXECUTING 1 /* item currently executing */ | ||
61 | #define SLOW_WORK_ENQ_DEFERRED 2 /* item enqueue deferred */ | ||
62 | #define SLOW_WORK_VERY_SLOW 3 /* item is very slow */ | ||
63 | #define SLOW_WORK_CANCELLING 4 /* item is being cancelled, don't enqueue */ | ||
64 | #define SLOW_WORK_DELAYED 5 /* item is struct delayed_slow_work with active timer */ | ||
65 | const struct slow_work_ops *ops; /* operations table for this item */ | ||
66 | struct list_head link; /* link in queue */ | ||
67 | #ifdef CONFIG_SLOW_WORK_DEBUG | ||
68 | struct timespec mark; /* jiffies at which queued or exec begun */ | ||
69 | #endif | ||
70 | }; | ||
71 | |||
72 | struct delayed_slow_work { | ||
73 | struct slow_work work; | ||
74 | struct timer_list timer; | ||
75 | }; | ||
76 | |||
77 | /** | ||
78 | * slow_work_init - Initialise a slow work item | ||
79 | * @work: The work item to initialise | ||
80 | * @ops: The operations to use to handle the slow work item | ||
81 | * | ||
82 | * Initialise a slow work item. | ||
83 | */ | ||
84 | static inline void slow_work_init(struct slow_work *work, | ||
85 | const struct slow_work_ops *ops) | ||
86 | { | ||
87 | work->flags = 0; | ||
88 | work->ops = ops; | ||
89 | INIT_LIST_HEAD(&work->link); | ||
90 | } | ||
91 | |||
92 | /** | ||
93 | * slow_work_init - Initialise a delayed slow work item | ||
94 | * @work: The work item to initialise | ||
95 | * @ops: The operations to use to handle the slow work item | ||
96 | * | ||
97 | * Initialise a delayed slow work item. | ||
98 | */ | ||
99 | static inline void delayed_slow_work_init(struct delayed_slow_work *dwork, | ||
100 | const struct slow_work_ops *ops) | ||
101 | { | ||
102 | init_timer(&dwork->timer); | ||
103 | slow_work_init(&dwork->work, ops); | ||
104 | } | ||
105 | |||
106 | /** | ||
107 | * vslow_work_init - Initialise a very slow work item | ||
108 | * @work: The work item to initialise | ||
109 | * @ops: The operations to use to handle the slow work item | ||
110 | * | ||
111 | * Initialise a very slow work item. This item will be restricted such that | ||
112 | * only a certain number of the pool threads will be able to execute items of | ||
113 | * this type. | ||
114 | */ | ||
115 | static inline void vslow_work_init(struct slow_work *work, | ||
116 | const struct slow_work_ops *ops) | ||
117 | { | ||
118 | work->flags = 1 << SLOW_WORK_VERY_SLOW; | ||
119 | work->ops = ops; | ||
120 | INIT_LIST_HEAD(&work->link); | ||
121 | } | ||
122 | |||
123 | /** | ||
124 | * slow_work_is_queued - Determine if a slow work item is on the work queue | ||
125 | * work: The work item to test | ||
126 | * | ||
127 | * Determine if the specified slow-work item is on the work queue. This | ||
128 | * returns true if it is actually on the queue. | ||
129 | * | ||
130 | * If the item is executing and has been marked for requeue when execution | ||
131 | * finishes, then false will be returned. | ||
132 | * | ||
133 | * Anyone wishing to wait for completion of execution can wait on the | ||
134 | * SLOW_WORK_EXECUTING bit. | ||
135 | */ | ||
136 | static inline bool slow_work_is_queued(struct slow_work *work) | ||
137 | { | ||
138 | unsigned long flags = work->flags; | ||
139 | return flags & SLOW_WORK_PENDING && !(flags & SLOW_WORK_EXECUTING); | ||
140 | } | ||
141 | |||
142 | extern int slow_work_enqueue(struct slow_work *work); | ||
143 | extern void slow_work_cancel(struct slow_work *work); | ||
144 | extern int slow_work_register_user(struct module *owner); | ||
145 | extern void slow_work_unregister_user(struct module *owner); | ||
146 | |||
147 | extern int delayed_slow_work_enqueue(struct delayed_slow_work *dwork, | ||
148 | unsigned long delay); | ||
149 | |||
150 | static inline void delayed_slow_work_cancel(struct delayed_slow_work *dwork) | ||
151 | { | ||
152 | slow_work_cancel(&dwork->work); | ||
153 | } | ||
154 | |||
155 | extern bool slow_work_sleep_till_thread_needed(struct slow_work *work, | ||
156 | signed long *_timeout); | ||
157 | |||
158 | #ifdef CONFIG_SYSCTL | ||
159 | extern ctl_table slow_work_sysctls[]; | ||
160 | #endif | ||
161 | |||
162 | #endif /* CONFIG_SLOW_WORK */ | ||
163 | #endif /* _LINUX_SLOW_WORK_H */ | ||
diff --git a/init/Kconfig b/init/Kconfig index 5cff9a980c39..cb64c5889e02 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -1143,30 +1143,6 @@ config TRACEPOINTS | |||
1143 | 1143 | ||
1144 | source "arch/Kconfig" | 1144 | source "arch/Kconfig" |
1145 | 1145 | ||
1146 | config SLOW_WORK | ||
1147 | default n | ||
1148 | bool | ||
1149 | help | ||
1150 | The slow work thread pool provides a number of dynamically allocated | ||
1151 | threads that can be used by the kernel to perform operations that | ||
1152 | take a relatively long time. | ||
1153 | |||
1154 | An example of this would be CacheFiles doing a path lookup followed | ||
1155 | by a series of mkdirs and a create call, all of which have to touch | ||
1156 | disk. | ||
1157 | |||
1158 | See Documentation/slow-work.txt. | ||
1159 | |||
1160 | config SLOW_WORK_DEBUG | ||
1161 | bool "Slow work debugging through debugfs" | ||
1162 | default n | ||
1163 | depends on SLOW_WORK && DEBUG_FS | ||
1164 | help | ||
1165 | Display the contents of the slow work run queue through debugfs, | ||
1166 | including items currently executing. | ||
1167 | |||
1168 | See Documentation/slow-work.txt. | ||
1169 | |||
1170 | endmenu # General setup | 1146 | endmenu # General setup |
1171 | 1147 | ||
1172 | config HAVE_GENERIC_DMA_COHERENT | 1148 | config HAVE_GENERIC_DMA_COHERENT |
diff --git a/kernel/Makefile b/kernel/Makefile index 057472fbc272..2484ac39b2e2 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -99,8 +99,6 @@ obj-$(CONFIG_TRACING) += trace/ | |||
99 | obj-$(CONFIG_X86_DS) += trace/ | 99 | obj-$(CONFIG_X86_DS) += trace/ |
100 | obj-$(CONFIG_RING_BUFFER) += trace/ | 100 | obj-$(CONFIG_RING_BUFFER) += trace/ |
101 | obj-$(CONFIG_SMP) += sched_cpupri.o | 101 | obj-$(CONFIG_SMP) += sched_cpupri.o |
102 | obj-$(CONFIG_SLOW_WORK) += slow-work.o | ||
103 | obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o | ||
104 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o | 102 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o |
105 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o | 103 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o |
106 | obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o | 104 | obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o |
diff --git a/kernel/slow-work-debugfs.c b/kernel/slow-work-debugfs.c deleted file mode 100644 index e45c43645298..000000000000 --- a/kernel/slow-work-debugfs.c +++ /dev/null | |||
@@ -1,227 +0,0 @@ | |||
1 | /* Slow work debugging | ||
2 | * | ||
3 | * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/slow-work.h> | ||
14 | #include <linux/fs.h> | ||
15 | #include <linux/time.h> | ||
16 | #include <linux/seq_file.h> | ||
17 | #include "slow-work.h" | ||
18 | |||
19 | #define ITERATOR_SHIFT (BITS_PER_LONG - 4) | ||
20 | #define ITERATOR_SELECTOR (0xfUL << ITERATOR_SHIFT) | ||
21 | #define ITERATOR_COUNTER (~ITERATOR_SELECTOR) | ||
22 | |||
23 | void slow_work_new_thread_desc(struct slow_work *work, struct seq_file *m) | ||
24 | { | ||
25 | seq_puts(m, "Slow-work: New thread"); | ||
26 | } | ||
27 | |||
28 | /* | ||
29 | * Render the time mark field on a work item into a 5-char time with units plus | ||
30 | * a space | ||
31 | */ | ||
32 | static void slow_work_print_mark(struct seq_file *m, struct slow_work *work) | ||
33 | { | ||
34 | struct timespec now, diff; | ||
35 | |||
36 | now = CURRENT_TIME; | ||
37 | diff = timespec_sub(now, work->mark); | ||
38 | |||
39 | if (diff.tv_sec < 0) | ||
40 | seq_puts(m, " -ve "); | ||
41 | else if (diff.tv_sec == 0 && diff.tv_nsec < 1000) | ||
42 | seq_printf(m, "%3luns ", diff.tv_nsec); | ||
43 | else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000) | ||
44 | seq_printf(m, "%3luus ", diff.tv_nsec / 1000); | ||
45 | else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000000) | ||
46 | seq_printf(m, "%3lums ", diff.tv_nsec / 1000000); | ||
47 | else if (diff.tv_sec <= 1) | ||
48 | seq_puts(m, " 1s "); | ||
49 | else if (diff.tv_sec < 60) | ||
50 | seq_printf(m, "%4lus ", diff.tv_sec); | ||
51 | else if (diff.tv_sec < 60 * 60) | ||
52 | seq_printf(m, "%4lum ", diff.tv_sec / 60); | ||
53 | else if (diff.tv_sec < 60 * 60 * 24) | ||
54 | seq_printf(m, "%4luh ", diff.tv_sec / 3600); | ||
55 | else | ||
56 | seq_puts(m, "exces "); | ||
57 | } | ||
58 | |||
59 | /* | ||
60 | * Describe a slow work item for debugfs | ||
61 | */ | ||
62 | static int slow_work_runqueue_show(struct seq_file *m, void *v) | ||
63 | { | ||
64 | struct slow_work *work; | ||
65 | struct list_head *p = v; | ||
66 | unsigned long id; | ||
67 | |||
68 | switch ((unsigned long) v) { | ||
69 | case 1: | ||
70 | seq_puts(m, "THR PID ITEM ADDR FL MARK DESC\n"); | ||
71 | return 0; | ||
72 | case 2: | ||
73 | seq_puts(m, "=== ===== ================ == ===== ==========\n"); | ||
74 | return 0; | ||
75 | |||
76 | case 3 ... 3 + SLOW_WORK_THREAD_LIMIT - 1: | ||
77 | id = (unsigned long) v - 3; | ||
78 | |||
79 | read_lock(&slow_work_execs_lock); | ||
80 | work = slow_work_execs[id]; | ||
81 | if (work) { | ||
82 | smp_read_barrier_depends(); | ||
83 | |||
84 | seq_printf(m, "%3lu %5d %16p %2lx ", | ||
85 | id, slow_work_pids[id], work, work->flags); | ||
86 | slow_work_print_mark(m, work); | ||
87 | |||
88 | if (work->ops->desc) | ||
89 | work->ops->desc(work, m); | ||
90 | seq_putc(m, '\n'); | ||
91 | } | ||
92 | read_unlock(&slow_work_execs_lock); | ||
93 | return 0; | ||
94 | |||
95 | default: | ||
96 | work = list_entry(p, struct slow_work, link); | ||
97 | seq_printf(m, "%3s - %16p %2lx ", | ||
98 | work->flags & SLOW_WORK_VERY_SLOW ? "vsq" : "sq", | ||
99 | work, work->flags); | ||
100 | slow_work_print_mark(m, work); | ||
101 | |||
102 | if (work->ops->desc) | ||
103 | work->ops->desc(work, m); | ||
104 | seq_putc(m, '\n'); | ||
105 | return 0; | ||
106 | } | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * map the iterator to a work item | ||
111 | */ | ||
112 | static void *slow_work_runqueue_index(struct seq_file *m, loff_t *_pos) | ||
113 | { | ||
114 | struct list_head *p; | ||
115 | unsigned long count, id; | ||
116 | |||
117 | switch (*_pos >> ITERATOR_SHIFT) { | ||
118 | case 0x0: | ||
119 | if (*_pos == 0) | ||
120 | *_pos = 1; | ||
121 | if (*_pos < 3) | ||
122 | return (void *)(unsigned long) *_pos; | ||
123 | if (*_pos < 3 + SLOW_WORK_THREAD_LIMIT) | ||
124 | for (id = *_pos - 3; | ||
125 | id < SLOW_WORK_THREAD_LIMIT; | ||
126 | id++, (*_pos)++) | ||
127 | if (slow_work_execs[id]) | ||
128 | return (void *)(unsigned long) *_pos; | ||
129 | *_pos = 0x1UL << ITERATOR_SHIFT; | ||
130 | |||
131 | case 0x1: | ||
132 | count = *_pos & ITERATOR_COUNTER; | ||
133 | list_for_each(p, &slow_work_queue) { | ||
134 | if (count == 0) | ||
135 | return p; | ||
136 | count--; | ||
137 | } | ||
138 | *_pos = 0x2UL << ITERATOR_SHIFT; | ||
139 | |||
140 | case 0x2: | ||
141 | count = *_pos & ITERATOR_COUNTER; | ||
142 | list_for_each(p, &vslow_work_queue) { | ||
143 | if (count == 0) | ||
144 | return p; | ||
145 | count--; | ||
146 | } | ||
147 | *_pos = 0x3UL << ITERATOR_SHIFT; | ||
148 | |||
149 | default: | ||
150 | return NULL; | ||
151 | } | ||
152 | } | ||
153 | |||
154 | /* | ||
155 | * set up the iterator to start reading from the first line | ||
156 | */ | ||
157 | static void *slow_work_runqueue_start(struct seq_file *m, loff_t *_pos) | ||
158 | { | ||
159 | spin_lock_irq(&slow_work_queue_lock); | ||
160 | return slow_work_runqueue_index(m, _pos); | ||
161 | } | ||
162 | |||
163 | /* | ||
164 | * move to the next line | ||
165 | */ | ||
166 | static void *slow_work_runqueue_next(struct seq_file *m, void *v, loff_t *_pos) | ||
167 | { | ||
168 | struct list_head *p = v; | ||
169 | unsigned long selector = *_pos >> ITERATOR_SHIFT; | ||
170 | |||
171 | (*_pos)++; | ||
172 | switch (selector) { | ||
173 | case 0x0: | ||
174 | return slow_work_runqueue_index(m, _pos); | ||
175 | |||
176 | case 0x1: | ||
177 | if (*_pos >> ITERATOR_SHIFT == 0x1) { | ||
178 | p = p->next; | ||
179 | if (p != &slow_work_queue) | ||
180 | return p; | ||
181 | } | ||
182 | *_pos = 0x2UL << ITERATOR_SHIFT; | ||
183 | p = &vslow_work_queue; | ||
184 | |||
185 | case 0x2: | ||
186 | if (*_pos >> ITERATOR_SHIFT == 0x2) { | ||
187 | p = p->next; | ||
188 | if (p != &vslow_work_queue) | ||
189 | return p; | ||
190 | } | ||
191 | *_pos = 0x3UL << ITERATOR_SHIFT; | ||
192 | |||
193 | default: | ||
194 | return NULL; | ||
195 | } | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * clean up after reading | ||
200 | */ | ||
201 | static void slow_work_runqueue_stop(struct seq_file *m, void *v) | ||
202 | { | ||
203 | spin_unlock_irq(&slow_work_queue_lock); | ||
204 | } | ||
205 | |||
206 | static const struct seq_operations slow_work_runqueue_ops = { | ||
207 | .start = slow_work_runqueue_start, | ||
208 | .stop = slow_work_runqueue_stop, | ||
209 | .next = slow_work_runqueue_next, | ||
210 | .show = slow_work_runqueue_show, | ||
211 | }; | ||
212 | |||
213 | /* | ||
214 | * open "/sys/kernel/debug/slow_work/runqueue" to list queue contents | ||
215 | */ | ||
216 | static int slow_work_runqueue_open(struct inode *inode, struct file *file) | ||
217 | { | ||
218 | return seq_open(file, &slow_work_runqueue_ops); | ||
219 | } | ||
220 | |||
221 | const struct file_operations slow_work_runqueue_fops = { | ||
222 | .owner = THIS_MODULE, | ||
223 | .open = slow_work_runqueue_open, | ||
224 | .read = seq_read, | ||
225 | .llseek = seq_lseek, | ||
226 | .release = seq_release, | ||
227 | }; | ||
diff --git a/kernel/slow-work.c b/kernel/slow-work.c deleted file mode 100644 index 7d3f4fa9ef4f..000000000000 --- a/kernel/slow-work.c +++ /dev/null | |||
@@ -1,1068 +0,0 @@ | |||
1 | /* Worker thread pool for slow items, such as filesystem lookups or mkdirs | ||
2 | * | ||
3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | * | ||
11 | * See Documentation/slow-work.txt | ||
12 | */ | ||
13 | |||
14 | #include <linux/module.h> | ||
15 | #include <linux/slow-work.h> | ||
16 | #include <linux/kthread.h> | ||
17 | #include <linux/freezer.h> | ||
18 | #include <linux/wait.h> | ||
19 | #include <linux/debugfs.h> | ||
20 | #include "slow-work.h" | ||
21 | |||
22 | static void slow_work_cull_timeout(unsigned long); | ||
23 | static void slow_work_oom_timeout(unsigned long); | ||
24 | |||
25 | #ifdef CONFIG_SYSCTL | ||
26 | static int slow_work_min_threads_sysctl(struct ctl_table *, int, | ||
27 | void __user *, size_t *, loff_t *); | ||
28 | |||
29 | static int slow_work_max_threads_sysctl(struct ctl_table *, int , | ||
30 | void __user *, size_t *, loff_t *); | ||
31 | #endif | ||
32 | |||
33 | /* | ||
34 | * The pool of threads has at least min threads in it as long as someone is | ||
35 | * using the facility, and may have as many as max. | ||
36 | * | ||
37 | * A portion of the pool may be processing very slow operations. | ||
38 | */ | ||
39 | static unsigned slow_work_min_threads = 2; | ||
40 | static unsigned slow_work_max_threads = 4; | ||
41 | static unsigned vslow_work_proportion = 50; /* % of threads that may process | ||
42 | * very slow work */ | ||
43 | |||
44 | #ifdef CONFIG_SYSCTL | ||
45 | static const int slow_work_min_min_threads = 2; | ||
46 | static int slow_work_max_max_threads = SLOW_WORK_THREAD_LIMIT; | ||
47 | static const int slow_work_min_vslow = 1; | ||
48 | static const int slow_work_max_vslow = 99; | ||
49 | |||
50 | ctl_table slow_work_sysctls[] = { | ||
51 | { | ||
52 | .procname = "min-threads", | ||
53 | .data = &slow_work_min_threads, | ||
54 | .maxlen = sizeof(unsigned), | ||
55 | .mode = 0644, | ||
56 | .proc_handler = slow_work_min_threads_sysctl, | ||
57 | .extra1 = (void *) &slow_work_min_min_threads, | ||
58 | .extra2 = &slow_work_max_threads, | ||
59 | }, | ||
60 | { | ||
61 | .procname = "max-threads", | ||
62 | .data = &slow_work_max_threads, | ||
63 | .maxlen = sizeof(unsigned), | ||
64 | .mode = 0644, | ||
65 | .proc_handler = slow_work_max_threads_sysctl, | ||
66 | .extra1 = &slow_work_min_threads, | ||
67 | .extra2 = (void *) &slow_work_max_max_threads, | ||
68 | }, | ||
69 | { | ||
70 | .procname = "vslow-percentage", | ||
71 | .data = &vslow_work_proportion, | ||
72 | .maxlen = sizeof(unsigned), | ||
73 | .mode = 0644, | ||
74 | .proc_handler = proc_dointvec_minmax, | ||
75 | .extra1 = (void *) &slow_work_min_vslow, | ||
76 | .extra2 = (void *) &slow_work_max_vslow, | ||
77 | }, | ||
78 | {} | ||
79 | }; | ||
80 | #endif | ||
81 | |||
82 | /* | ||
83 | * The active state of the thread pool | ||
84 | */ | ||
85 | static atomic_t slow_work_thread_count; | ||
86 | static atomic_t vslow_work_executing_count; | ||
87 | |||
88 | static bool slow_work_may_not_start_new_thread; | ||
89 | static bool slow_work_cull; /* cull a thread due to lack of activity */ | ||
90 | static DEFINE_TIMER(slow_work_cull_timer, slow_work_cull_timeout, 0, 0); | ||
91 | static DEFINE_TIMER(slow_work_oom_timer, slow_work_oom_timeout, 0, 0); | ||
92 | static struct slow_work slow_work_new_thread; /* new thread starter */ | ||
93 | |||
94 | /* | ||
95 | * slow work ID allocation (use slow_work_queue_lock) | ||
96 | */ | ||
97 | static DECLARE_BITMAP(slow_work_ids, SLOW_WORK_THREAD_LIMIT); | ||
98 | |||
99 | /* | ||
100 | * Unregistration tracking to prevent put_ref() from disappearing during module | ||
101 | * unload | ||
102 | */ | ||
103 | #ifdef CONFIG_MODULES | ||
104 | static struct module *slow_work_thread_processing[SLOW_WORK_THREAD_LIMIT]; | ||
105 | static struct module *slow_work_unreg_module; | ||
106 | static struct slow_work *slow_work_unreg_work_item; | ||
107 | static DECLARE_WAIT_QUEUE_HEAD(slow_work_unreg_wq); | ||
108 | static DEFINE_MUTEX(slow_work_unreg_sync_lock); | ||
109 | |||
110 | static void slow_work_set_thread_processing(int id, struct slow_work *work) | ||
111 | { | ||
112 | if (work) | ||
113 | slow_work_thread_processing[id] = work->owner; | ||
114 | } | ||
115 | static void slow_work_done_thread_processing(int id, struct slow_work *work) | ||
116 | { | ||
117 | struct module *module = slow_work_thread_processing[id]; | ||
118 | |||
119 | slow_work_thread_processing[id] = NULL; | ||
120 | smp_mb(); | ||
121 | if (slow_work_unreg_work_item == work || | ||
122 | slow_work_unreg_module == module) | ||
123 | wake_up_all(&slow_work_unreg_wq); | ||
124 | } | ||
125 | static void slow_work_clear_thread_processing(int id) | ||
126 | { | ||
127 | slow_work_thread_processing[id] = NULL; | ||
128 | } | ||
129 | #else | ||
130 | static void slow_work_set_thread_processing(int id, struct slow_work *work) {} | ||
131 | static void slow_work_done_thread_processing(int id, struct slow_work *work) {} | ||
132 | static void slow_work_clear_thread_processing(int id) {} | ||
133 | #endif | ||
134 | |||
135 | /* | ||
136 | * Data for tracking currently executing items for indication through /proc | ||
137 | */ | ||
138 | #ifdef CONFIG_SLOW_WORK_DEBUG | ||
139 | struct slow_work *slow_work_execs[SLOW_WORK_THREAD_LIMIT]; | ||
140 | pid_t slow_work_pids[SLOW_WORK_THREAD_LIMIT]; | ||
141 | DEFINE_RWLOCK(slow_work_execs_lock); | ||
142 | #endif | ||
143 | |||
144 | /* | ||
145 | * The queues of work items and the lock governing access to them. These are | ||
146 | * shared between all the CPUs. It doesn't make sense to have per-CPU queues | ||
147 | * as the number of threads bears no relation to the number of CPUs. | ||
148 | * | ||
149 | * There are two queues of work items: one for slow work items, and one for | ||
150 | * very slow work items. | ||
151 | */ | ||
152 | LIST_HEAD(slow_work_queue); | ||
153 | LIST_HEAD(vslow_work_queue); | ||
154 | DEFINE_SPINLOCK(slow_work_queue_lock); | ||
155 | |||
156 | /* | ||
157 | * The following are two wait queues that get pinged when a work item is placed | ||
158 | * on an empty queue. These allow work items that are hogging a thread by | ||
159 | * sleeping in a way that could be deferred to yield their thread and enqueue | ||
160 | * themselves. | ||
161 | */ | ||
162 | static DECLARE_WAIT_QUEUE_HEAD(slow_work_queue_waits_for_occupation); | ||
163 | static DECLARE_WAIT_QUEUE_HEAD(vslow_work_queue_waits_for_occupation); | ||
164 | |||
165 | /* | ||
166 | * The thread controls. A variable used to signal to the threads that they | ||
167 | * should exit when the queue is empty, a waitqueue used by the threads to wait | ||
168 | * for signals, and a completion set by the last thread to exit. | ||
169 | */ | ||
170 | static bool slow_work_threads_should_exit; | ||
171 | static DECLARE_WAIT_QUEUE_HEAD(slow_work_thread_wq); | ||
172 | static DECLARE_COMPLETION(slow_work_last_thread_exited); | ||
173 | |||
174 | /* | ||
175 | * The number of users of the thread pool and its lock. Whilst this is zero we | ||
176 | * have no threads hanging around, and when this reaches zero, we wait for all | ||
177 | * active or queued work items to complete and kill all the threads we do have. | ||
178 | */ | ||
179 | static int slow_work_user_count; | ||
180 | static DEFINE_MUTEX(slow_work_user_lock); | ||
181 | |||
182 | static inline int slow_work_get_ref(struct slow_work *work) | ||
183 | { | ||
184 | if (work->ops->get_ref) | ||
185 | return work->ops->get_ref(work); | ||
186 | |||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | static inline void slow_work_put_ref(struct slow_work *work) | ||
191 | { | ||
192 | if (work->ops->put_ref) | ||
193 | work->ops->put_ref(work); | ||
194 | } | ||
195 | |||
196 | /* | ||
197 | * Calculate the maximum number of active threads in the pool that are | ||
198 | * permitted to process very slow work items. | ||
199 | * | ||
200 | * The answer is rounded up to at least 1, but may not equal or exceed the | ||
201 | * maximum number of the threads in the pool. This means we always have at | ||
202 | * least one thread that can process slow work items, and we always have at | ||
203 | * least one thread that won't get tied up doing so. | ||
204 | */ | ||
205 | static unsigned slow_work_calc_vsmax(void) | ||
206 | { | ||
207 | unsigned vsmax; | ||
208 | |||
209 | vsmax = atomic_read(&slow_work_thread_count) * vslow_work_proportion; | ||
210 | vsmax /= 100; | ||
211 | vsmax = max(vsmax, 1U); | ||
212 | return min(vsmax, slow_work_max_threads - 1); | ||
213 | } | ||
214 | |||
215 | /* | ||
216 | * Attempt to execute stuff queued on a slow thread. Return true if we managed | ||
217 | * it, false if there was nothing to do. | ||
218 | */ | ||
219 | static noinline bool slow_work_execute(int id) | ||
220 | { | ||
221 | struct slow_work *work = NULL; | ||
222 | unsigned vsmax; | ||
223 | bool very_slow; | ||
224 | |||
225 | vsmax = slow_work_calc_vsmax(); | ||
226 | |||
227 | /* see if we can schedule a new thread to be started if we're not | ||
228 | * keeping up with the work */ | ||
229 | if (!waitqueue_active(&slow_work_thread_wq) && | ||
230 | (!list_empty(&slow_work_queue) || !list_empty(&vslow_work_queue)) && | ||
231 | atomic_read(&slow_work_thread_count) < slow_work_max_threads && | ||
232 | !slow_work_may_not_start_new_thread) | ||
233 | slow_work_enqueue(&slow_work_new_thread); | ||
234 | |||
235 | /* find something to execute */ | ||
236 | spin_lock_irq(&slow_work_queue_lock); | ||
237 | if (!list_empty(&vslow_work_queue) && | ||
238 | atomic_read(&vslow_work_executing_count) < vsmax) { | ||
239 | work = list_entry(vslow_work_queue.next, | ||
240 | struct slow_work, link); | ||
241 | if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags)) | ||
242 | BUG(); | ||
243 | list_del_init(&work->link); | ||
244 | atomic_inc(&vslow_work_executing_count); | ||
245 | very_slow = true; | ||
246 | } else if (!list_empty(&slow_work_queue)) { | ||
247 | work = list_entry(slow_work_queue.next, | ||
248 | struct slow_work, link); | ||
249 | if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags)) | ||
250 | BUG(); | ||
251 | list_del_init(&work->link); | ||
252 | very_slow = false; | ||
253 | } else { | ||
254 | very_slow = false; /* avoid the compiler warning */ | ||
255 | } | ||
256 | |||
257 | slow_work_set_thread_processing(id, work); | ||
258 | if (work) { | ||
259 | slow_work_mark_time(work); | ||
260 | slow_work_begin_exec(id, work); | ||
261 | } | ||
262 | |||
263 | spin_unlock_irq(&slow_work_queue_lock); | ||
264 | |||
265 | if (!work) | ||
266 | return false; | ||
267 | |||
268 | if (!test_and_clear_bit(SLOW_WORK_PENDING, &work->flags)) | ||
269 | BUG(); | ||
270 | |||
271 | /* don't execute if the work is in the process of being cancelled */ | ||
272 | if (!test_bit(SLOW_WORK_CANCELLING, &work->flags)) | ||
273 | work->ops->execute(work); | ||
274 | |||
275 | if (very_slow) | ||
276 | atomic_dec(&vslow_work_executing_count); | ||
277 | clear_bit_unlock(SLOW_WORK_EXECUTING, &work->flags); | ||
278 | |||
279 | /* wake up anyone waiting for this work to be complete */ | ||
280 | wake_up_bit(&work->flags, SLOW_WORK_EXECUTING); | ||
281 | |||
282 | slow_work_end_exec(id, work); | ||
283 | |||
284 | /* if someone tried to enqueue the item whilst we were executing it, | ||
285 | * then it'll be left unenqueued to avoid multiple threads trying to | ||
286 | * execute it simultaneously | ||
287 | * | ||
288 | * there is, however, a race between us testing the pending flag and | ||
289 | * getting the spinlock, and between the enqueuer setting the pending | ||
290 | * flag and getting the spinlock, so we use a deferral bit to tell us | ||
291 | * if the enqueuer got there first | ||
292 | */ | ||
293 | if (test_bit(SLOW_WORK_PENDING, &work->flags)) { | ||
294 | spin_lock_irq(&slow_work_queue_lock); | ||
295 | |||
296 | if (!test_bit(SLOW_WORK_EXECUTING, &work->flags) && | ||
297 | test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags)) | ||
298 | goto auto_requeue; | ||
299 | |||
300 | spin_unlock_irq(&slow_work_queue_lock); | ||
301 | } | ||
302 | |||
303 | /* sort out the race between module unloading and put_ref() */ | ||
304 | slow_work_put_ref(work); | ||
305 | slow_work_done_thread_processing(id, work); | ||
306 | |||
307 | return true; | ||
308 | |||
309 | auto_requeue: | ||
310 | /* we must complete the enqueue operation | ||
311 | * - we transfer our ref on the item back to the appropriate queue | ||
312 | * - don't wake another thread up as we're awake already | ||
313 | */ | ||
314 | slow_work_mark_time(work); | ||
315 | if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) | ||
316 | list_add_tail(&work->link, &vslow_work_queue); | ||
317 | else | ||
318 | list_add_tail(&work->link, &slow_work_queue); | ||
319 | spin_unlock_irq(&slow_work_queue_lock); | ||
320 | slow_work_clear_thread_processing(id); | ||
321 | return true; | ||
322 | } | ||
323 | |||
324 | /** | ||
325 | * slow_work_sleep_till_thread_needed - Sleep till thread needed by other work | ||
326 | * work: The work item under execution that wants to sleep | ||
327 | * _timeout: Scheduler sleep timeout | ||
328 | * | ||
329 | * Allow a requeueable work item to sleep on a slow-work processor thread until | ||
330 | * that thread is needed to do some other work or the sleep is interrupted by | ||
331 | * some other event. | ||
332 | * | ||
333 | * The caller must set up a wake up event before calling this and must have set | ||
334 | * the appropriate sleep mode (such as TASK_UNINTERRUPTIBLE) and tested its own | ||
335 | * condition before calling this function as no test is made here. | ||
336 | * | ||
337 | * False is returned if there is nothing on the queue; true is returned if the | ||
338 | * work item should be requeued | ||
339 | */ | ||
340 | bool slow_work_sleep_till_thread_needed(struct slow_work *work, | ||
341 | signed long *_timeout) | ||
342 | { | ||
343 | wait_queue_head_t *wfo_wq; | ||
344 | struct list_head *queue; | ||
345 | |||
346 | DEFINE_WAIT(wait); | ||
347 | |||
348 | if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) { | ||
349 | wfo_wq = &vslow_work_queue_waits_for_occupation; | ||
350 | queue = &vslow_work_queue; | ||
351 | } else { | ||
352 | wfo_wq = &slow_work_queue_waits_for_occupation; | ||
353 | queue = &slow_work_queue; | ||
354 | } | ||
355 | |||
356 | if (!list_empty(queue)) | ||
357 | return true; | ||
358 | |||
359 | add_wait_queue_exclusive(wfo_wq, &wait); | ||
360 | if (list_empty(queue)) | ||
361 | *_timeout = schedule_timeout(*_timeout); | ||
362 | finish_wait(wfo_wq, &wait); | ||
363 | |||
364 | return !list_empty(queue); | ||
365 | } | ||
366 | EXPORT_SYMBOL(slow_work_sleep_till_thread_needed); | ||
367 | |||
368 | /** | ||
369 | * slow_work_enqueue - Schedule a slow work item for processing | ||
370 | * @work: The work item to queue | ||
371 | * | ||
372 | * Schedule a slow work item for processing. If the item is already undergoing | ||
373 | * execution, this guarantees not to re-enter the execution routine until the | ||
374 | * first execution finishes. | ||
375 | * | ||
376 | * The item is pinned by this function as it retains a reference to it, managed | ||
377 | * through the item operations. The item is unpinned once it has been | ||
378 | * executed. | ||
379 | * | ||
380 | * An item may hog the thread that is running it for a relatively large amount | ||
381 | * of time, sufficient, for example, to perform several lookup, mkdir, create | ||
382 | * and setxattr operations. It may sleep on I/O and may sleep to obtain locks. | ||
383 | * | ||
384 | * Conversely, if a number of items are awaiting processing, it may take some | ||
385 | * time before any given item is given attention. The number of threads in the | ||
386 | * pool may be increased to deal with demand, but only up to a limit. | ||
387 | * | ||
388 | * If SLOW_WORK_VERY_SLOW is set on the work item, then it will be placed in | ||
389 | * the very slow queue, from which only a portion of the threads will be | ||
390 | * allowed to pick items to execute. This ensures that very slow items won't | ||
391 | * overly block ones that are just ordinarily slow. | ||
392 | * | ||
393 | * Returns 0 if successful, -EAGAIN if not (or -ECANCELED if cancelled work is | ||
394 | * attempted queued) | ||
395 | */ | ||
396 | int slow_work_enqueue(struct slow_work *work) | ||
397 | { | ||
398 | wait_queue_head_t *wfo_wq; | ||
399 | struct list_head *queue; | ||
400 | unsigned long flags; | ||
401 | int ret; | ||
402 | |||
403 | if (test_bit(SLOW_WORK_CANCELLING, &work->flags)) | ||
404 | return -ECANCELED; | ||
405 | |||
406 | BUG_ON(slow_work_user_count <= 0); | ||
407 | BUG_ON(!work); | ||
408 | BUG_ON(!work->ops); | ||
409 | |||
410 | /* when honouring an enqueue request, we only promise that we will run | ||
411 | * the work function in the future; we do not promise to run it once | ||
412 | * per enqueue request | ||
413 | * | ||
414 | * we use the PENDING bit to merge together repeat requests without | ||
415 | * having to disable IRQs and take the spinlock, whilst still | ||
416 | * maintaining our promise | ||
417 | */ | ||
418 | if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) { | ||
419 | if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) { | ||
420 | wfo_wq = &vslow_work_queue_waits_for_occupation; | ||
421 | queue = &vslow_work_queue; | ||
422 | } else { | ||
423 | wfo_wq = &slow_work_queue_waits_for_occupation; | ||
424 | queue = &slow_work_queue; | ||
425 | } | ||
426 | |||
427 | spin_lock_irqsave(&slow_work_queue_lock, flags); | ||
428 | |||
429 | if (unlikely(test_bit(SLOW_WORK_CANCELLING, &work->flags))) | ||
430 | goto cancelled; | ||
431 | |||
432 | /* we promise that we will not attempt to execute the work | ||
433 | * function in more than one thread simultaneously | ||
434 | * | ||
435 | * this, however, leaves us with a problem if we're asked to | ||
436 | * enqueue the work whilst someone is executing the work | ||
437 | * function as simply queueing the work immediately means that | ||
438 | * another thread may try executing it whilst it is already | ||
439 | * under execution | ||
440 | * | ||
441 | * to deal with this, we set the ENQ_DEFERRED bit instead of | ||
442 | * enqueueing, and the thread currently executing the work | ||
443 | * function will enqueue the work item when the work function | ||
444 | * returns and it has cleared the EXECUTING bit | ||
445 | */ | ||
446 | if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) { | ||
447 | set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags); | ||
448 | } else { | ||
449 | ret = slow_work_get_ref(work); | ||
450 | if (ret < 0) | ||
451 | goto failed; | ||
452 | slow_work_mark_time(work); | ||
453 | list_add_tail(&work->link, queue); | ||
454 | wake_up(&slow_work_thread_wq); | ||
455 | |||
456 | /* if someone who could be requeued is sleeping on a | ||
457 | * thread, then ask them to yield their thread */ | ||
458 | if (work->link.prev == queue) | ||
459 | wake_up(wfo_wq); | ||
460 | } | ||
461 | |||
462 | spin_unlock_irqrestore(&slow_work_queue_lock, flags); | ||
463 | } | ||
464 | return 0; | ||
465 | |||
466 | cancelled: | ||
467 | ret = -ECANCELED; | ||
468 | failed: | ||
469 | spin_unlock_irqrestore(&slow_work_queue_lock, flags); | ||
470 | return ret; | ||
471 | } | ||
472 | EXPORT_SYMBOL(slow_work_enqueue); | ||
473 | |||
474 | static int slow_work_wait(void *word) | ||
475 | { | ||
476 | schedule(); | ||
477 | return 0; | ||
478 | } | ||
479 | |||
480 | /** | ||
481 | * slow_work_cancel - Cancel a slow work item | ||
482 | * @work: The work item to cancel | ||
483 | * | ||
484 | * This function will cancel a previously enqueued work item. If we cannot | ||
485 | * cancel the work item, it is guarenteed to have run when this function | ||
486 | * returns. | ||
487 | */ | ||
488 | void slow_work_cancel(struct slow_work *work) | ||
489 | { | ||
490 | bool wait = true, put = false; | ||
491 | |||
492 | set_bit(SLOW_WORK_CANCELLING, &work->flags); | ||
493 | smp_mb(); | ||
494 | |||
495 | /* if the work item is a delayed work item with an active timer, we | ||
496 | * need to wait for the timer to finish _before_ getting the spinlock, | ||
497 | * lest we deadlock against the timer routine | ||
498 | * | ||
499 | * the timer routine will leave DELAYED set if it notices the | ||
500 | * CANCELLING flag in time | ||
501 | */ | ||
502 | if (test_bit(SLOW_WORK_DELAYED, &work->flags)) { | ||
503 | struct delayed_slow_work *dwork = | ||
504 | container_of(work, struct delayed_slow_work, work); | ||
505 | del_timer_sync(&dwork->timer); | ||
506 | } | ||
507 | |||
508 | spin_lock_irq(&slow_work_queue_lock); | ||
509 | |||
510 | if (test_bit(SLOW_WORK_DELAYED, &work->flags)) { | ||
511 | /* the timer routine aborted or never happened, so we are left | ||
512 | * holding the timer's reference on the item and should just | ||
513 | * drop the pending flag and wait for any ongoing execution to | ||
514 | * finish */ | ||
515 | struct delayed_slow_work *dwork = | ||
516 | container_of(work, struct delayed_slow_work, work); | ||
517 | |||
518 | BUG_ON(timer_pending(&dwork->timer)); | ||
519 | BUG_ON(!list_empty(&work->link)); | ||
520 | |||
521 | clear_bit(SLOW_WORK_DELAYED, &work->flags); | ||
522 | put = true; | ||
523 | clear_bit(SLOW_WORK_PENDING, &work->flags); | ||
524 | |||
525 | } else if (test_bit(SLOW_WORK_PENDING, &work->flags) && | ||
526 | !list_empty(&work->link)) { | ||
527 | /* the link in the pending queue holds a reference on the item | ||
528 | * that we will need to release */ | ||
529 | list_del_init(&work->link); | ||
530 | wait = false; | ||
531 | put = true; | ||
532 | clear_bit(SLOW_WORK_PENDING, &work->flags); | ||
533 | |||
534 | } else if (test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags)) { | ||
535 | /* the executor is holding our only reference on the item, so | ||
536 | * we merely need to wait for it to finish executing */ | ||
537 | clear_bit(SLOW_WORK_PENDING, &work->flags); | ||
538 | } | ||
539 | |||
540 | spin_unlock_irq(&slow_work_queue_lock); | ||
541 | |||
542 | /* the EXECUTING flag is set by the executor whilst the spinlock is set | ||
543 | * and before the item is dequeued - so assuming the above doesn't | ||
544 | * actually dequeue it, simply waiting for the EXECUTING flag to be | ||
545 | * released here should be sufficient */ | ||
546 | if (wait) | ||
547 | wait_on_bit(&work->flags, SLOW_WORK_EXECUTING, slow_work_wait, | ||
548 | TASK_UNINTERRUPTIBLE); | ||
549 | |||
550 | clear_bit(SLOW_WORK_CANCELLING, &work->flags); | ||
551 | if (put) | ||
552 | slow_work_put_ref(work); | ||
553 | } | ||
554 | EXPORT_SYMBOL(slow_work_cancel); | ||
555 | |||
556 | /* | ||
557 | * Handle expiry of the delay timer, indicating that a delayed slow work item | ||
558 | * should now be queued if not cancelled | ||
559 | */ | ||
560 | static void delayed_slow_work_timer(unsigned long data) | ||
561 | { | ||
562 | wait_queue_head_t *wfo_wq; | ||
563 | struct list_head *queue; | ||
564 | struct slow_work *work = (struct slow_work *) data; | ||
565 | unsigned long flags; | ||
566 | bool queued = false, put = false, first = false; | ||
567 | |||
568 | if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) { | ||
569 | wfo_wq = &vslow_work_queue_waits_for_occupation; | ||
570 | queue = &vslow_work_queue; | ||
571 | } else { | ||
572 | wfo_wq = &slow_work_queue_waits_for_occupation; | ||
573 | queue = &slow_work_queue; | ||
574 | } | ||
575 | |||
576 | spin_lock_irqsave(&slow_work_queue_lock, flags); | ||
577 | if (likely(!test_bit(SLOW_WORK_CANCELLING, &work->flags))) { | ||
578 | clear_bit(SLOW_WORK_DELAYED, &work->flags); | ||
579 | |||
580 | if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) { | ||
581 | /* we discard the reference the timer was holding in | ||
582 | * favour of the one the executor holds */ | ||
583 | set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags); | ||
584 | put = true; | ||
585 | } else { | ||
586 | slow_work_mark_time(work); | ||
587 | list_add_tail(&work->link, queue); | ||
588 | queued = true; | ||
589 | if (work->link.prev == queue) | ||
590 | first = true; | ||
591 | } | ||
592 | } | ||
593 | |||
594 | spin_unlock_irqrestore(&slow_work_queue_lock, flags); | ||
595 | if (put) | ||
596 | slow_work_put_ref(work); | ||
597 | if (first) | ||
598 | wake_up(wfo_wq); | ||
599 | if (queued) | ||
600 | wake_up(&slow_work_thread_wq); | ||
601 | } | ||
602 | |||
603 | /** | ||
604 | * delayed_slow_work_enqueue - Schedule a delayed slow work item for processing | ||
605 | * @dwork: The delayed work item to queue | ||
606 | * @delay: When to start executing the work, in jiffies from now | ||
607 | * | ||
608 | * This is similar to slow_work_enqueue(), but it adds a delay before the work | ||
609 | * is actually queued for processing. | ||
610 | * | ||
611 | * The item can have delayed processing requested on it whilst it is being | ||
612 | * executed. The delay will begin immediately, and if it expires before the | ||
613 | * item finishes executing, the item will be placed back on the queue when it | ||
614 | * has done executing. | ||
615 | */ | ||
616 | int delayed_slow_work_enqueue(struct delayed_slow_work *dwork, | ||
617 | unsigned long delay) | ||
618 | { | ||
619 | struct slow_work *work = &dwork->work; | ||
620 | unsigned long flags; | ||
621 | int ret; | ||
622 | |||
623 | if (delay == 0) | ||
624 | return slow_work_enqueue(&dwork->work); | ||
625 | |||
626 | BUG_ON(slow_work_user_count <= 0); | ||
627 | BUG_ON(!work); | ||
628 | BUG_ON(!work->ops); | ||
629 | |||
630 | if (test_bit(SLOW_WORK_CANCELLING, &work->flags)) | ||
631 | return -ECANCELED; | ||
632 | |||
633 | if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) { | ||
634 | spin_lock_irqsave(&slow_work_queue_lock, flags); | ||
635 | |||
636 | if (test_bit(SLOW_WORK_CANCELLING, &work->flags)) | ||
637 | goto cancelled; | ||
638 | |||
639 | /* the timer holds a reference whilst it is pending */ | ||
640 | ret = slow_work_get_ref(work); | ||
641 | if (ret < 0) | ||
642 | goto cant_get_ref; | ||
643 | |||
644 | if (test_and_set_bit(SLOW_WORK_DELAYED, &work->flags)) | ||
645 | BUG(); | ||
646 | dwork->timer.expires = jiffies + delay; | ||
647 | dwork->timer.data = (unsigned long) work; | ||
648 | dwork->timer.function = delayed_slow_work_timer; | ||
649 | add_timer(&dwork->timer); | ||
650 | |||
651 | spin_unlock_irqrestore(&slow_work_queue_lock, flags); | ||
652 | } | ||
653 | |||
654 | return 0; | ||
655 | |||
656 | cancelled: | ||
657 | ret = -ECANCELED; | ||
658 | cant_get_ref: | ||
659 | spin_unlock_irqrestore(&slow_work_queue_lock, flags); | ||
660 | return ret; | ||
661 | } | ||
662 | EXPORT_SYMBOL(delayed_slow_work_enqueue); | ||
663 | |||
664 | /* | ||
665 | * Schedule a cull of the thread pool at some time in the near future | ||
666 | */ | ||
667 | static void slow_work_schedule_cull(void) | ||
668 | { | ||
669 | mod_timer(&slow_work_cull_timer, | ||
670 | round_jiffies(jiffies + SLOW_WORK_CULL_TIMEOUT)); | ||
671 | } | ||
672 | |||
673 | /* | ||
674 | * Worker thread culling algorithm | ||
675 | */ | ||
676 | static bool slow_work_cull_thread(void) | ||
677 | { | ||
678 | unsigned long flags; | ||
679 | bool do_cull = false; | ||
680 | |||
681 | spin_lock_irqsave(&slow_work_queue_lock, flags); | ||
682 | |||
683 | if (slow_work_cull) { | ||
684 | slow_work_cull = false; | ||
685 | |||
686 | if (list_empty(&slow_work_queue) && | ||
687 | list_empty(&vslow_work_queue) && | ||
688 | atomic_read(&slow_work_thread_count) > | ||
689 | slow_work_min_threads) { | ||
690 | slow_work_schedule_cull(); | ||
691 | do_cull = true; | ||
692 | } | ||
693 | } | ||
694 | |||
695 | spin_unlock_irqrestore(&slow_work_queue_lock, flags); | ||
696 | return do_cull; | ||
697 | } | ||
698 | |||
699 | /* | ||
700 | * Determine if there is slow work available for dispatch | ||
701 | */ | ||
702 | static inline bool slow_work_available(int vsmax) | ||
703 | { | ||
704 | return !list_empty(&slow_work_queue) || | ||
705 | (!list_empty(&vslow_work_queue) && | ||
706 | atomic_read(&vslow_work_executing_count) < vsmax); | ||
707 | } | ||
708 | |||
709 | /* | ||
710 | * Worker thread dispatcher | ||
711 | */ | ||
712 | static int slow_work_thread(void *_data) | ||
713 | { | ||
714 | int vsmax, id; | ||
715 | |||
716 | DEFINE_WAIT(wait); | ||
717 | |||
718 | set_freezable(); | ||
719 | set_user_nice(current, -5); | ||
720 | |||
721 | /* allocate ourselves an ID */ | ||
722 | spin_lock_irq(&slow_work_queue_lock); | ||
723 | id = find_first_zero_bit(slow_work_ids, SLOW_WORK_THREAD_LIMIT); | ||
724 | BUG_ON(id < 0 || id >= SLOW_WORK_THREAD_LIMIT); | ||
725 | __set_bit(id, slow_work_ids); | ||
726 | slow_work_set_thread_pid(id, current->pid); | ||
727 | spin_unlock_irq(&slow_work_queue_lock); | ||
728 | |||
729 | sprintf(current->comm, "kslowd%03u", id); | ||
730 | |||
731 | for (;;) { | ||
732 | vsmax = vslow_work_proportion; | ||
733 | vsmax *= atomic_read(&slow_work_thread_count); | ||
734 | vsmax /= 100; | ||
735 | |||
736 | prepare_to_wait_exclusive(&slow_work_thread_wq, &wait, | ||
737 | TASK_INTERRUPTIBLE); | ||
738 | if (!freezing(current) && | ||
739 | !slow_work_threads_should_exit && | ||
740 | !slow_work_available(vsmax) && | ||
741 | !slow_work_cull) | ||
742 | schedule(); | ||
743 | finish_wait(&slow_work_thread_wq, &wait); | ||
744 | |||
745 | try_to_freeze(); | ||
746 | |||
747 | vsmax = vslow_work_proportion; | ||
748 | vsmax *= atomic_read(&slow_work_thread_count); | ||
749 | vsmax /= 100; | ||
750 | |||
751 | if (slow_work_available(vsmax) && slow_work_execute(id)) { | ||
752 | cond_resched(); | ||
753 | if (list_empty(&slow_work_queue) && | ||
754 | list_empty(&vslow_work_queue) && | ||
755 | atomic_read(&slow_work_thread_count) > | ||
756 | slow_work_min_threads) | ||
757 | slow_work_schedule_cull(); | ||
758 | continue; | ||
759 | } | ||
760 | |||
761 | if (slow_work_threads_should_exit) | ||
762 | break; | ||
763 | |||
764 | if (slow_work_cull && slow_work_cull_thread()) | ||
765 | break; | ||
766 | } | ||
767 | |||
768 | spin_lock_irq(&slow_work_queue_lock); | ||
769 | slow_work_set_thread_pid(id, 0); | ||
770 | __clear_bit(id, slow_work_ids); | ||
771 | spin_unlock_irq(&slow_work_queue_lock); | ||
772 | |||
773 | if (atomic_dec_and_test(&slow_work_thread_count)) | ||
774 | complete_and_exit(&slow_work_last_thread_exited, 0); | ||
775 | return 0; | ||
776 | } | ||
777 | |||
778 | /* | ||
779 | * Handle thread cull timer expiration | ||
780 | */ | ||
781 | static void slow_work_cull_timeout(unsigned long data) | ||
782 | { | ||
783 | slow_work_cull = true; | ||
784 | wake_up(&slow_work_thread_wq); | ||
785 | } | ||
786 | |||
787 | /* | ||
788 | * Start a new slow work thread | ||
789 | */ | ||
790 | static void slow_work_new_thread_execute(struct slow_work *work) | ||
791 | { | ||
792 | struct task_struct *p; | ||
793 | |||
794 | if (slow_work_threads_should_exit) | ||
795 | return; | ||
796 | |||
797 | if (atomic_read(&slow_work_thread_count) >= slow_work_max_threads) | ||
798 | return; | ||
799 | |||
800 | if (!mutex_trylock(&slow_work_user_lock)) | ||
801 | return; | ||
802 | |||
803 | slow_work_may_not_start_new_thread = true; | ||
804 | atomic_inc(&slow_work_thread_count); | ||
805 | p = kthread_run(slow_work_thread, NULL, "kslowd"); | ||
806 | if (IS_ERR(p)) { | ||
807 | printk(KERN_DEBUG "Slow work thread pool: OOM\n"); | ||
808 | if (atomic_dec_and_test(&slow_work_thread_count)) | ||
809 | BUG(); /* we're running on a slow work thread... */ | ||
810 | mod_timer(&slow_work_oom_timer, | ||
811 | round_jiffies(jiffies + SLOW_WORK_OOM_TIMEOUT)); | ||
812 | } else { | ||
813 | /* ratelimit the starting of new threads */ | ||
814 | mod_timer(&slow_work_oom_timer, jiffies + 1); | ||
815 | } | ||
816 | |||
817 | mutex_unlock(&slow_work_user_lock); | ||
818 | } | ||
819 | |||
820 | static const struct slow_work_ops slow_work_new_thread_ops = { | ||
821 | .owner = THIS_MODULE, | ||
822 | .execute = slow_work_new_thread_execute, | ||
823 | #ifdef CONFIG_SLOW_WORK_DEBUG | ||
824 | .desc = slow_work_new_thread_desc, | ||
825 | #endif | ||
826 | }; | ||
827 | |||
828 | /* | ||
829 | * post-OOM new thread start suppression expiration | ||
830 | */ | ||
831 | static void slow_work_oom_timeout(unsigned long data) | ||
832 | { | ||
833 | slow_work_may_not_start_new_thread = false; | ||
834 | } | ||
835 | |||
836 | #ifdef CONFIG_SYSCTL | ||
837 | /* | ||
838 | * Handle adjustment of the minimum number of threads | ||
839 | */ | ||
840 | static int slow_work_min_threads_sysctl(struct ctl_table *table, int write, | ||
841 | void __user *buffer, | ||
842 | size_t *lenp, loff_t *ppos) | ||
843 | { | ||
844 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | ||
845 | int n; | ||
846 | |||
847 | if (ret == 0) { | ||
848 | mutex_lock(&slow_work_user_lock); | ||
849 | if (slow_work_user_count > 0) { | ||
850 | /* see if we need to start or stop threads */ | ||
851 | n = atomic_read(&slow_work_thread_count) - | ||
852 | slow_work_min_threads; | ||
853 | |||
854 | if (n < 0 && !slow_work_may_not_start_new_thread) | ||
855 | slow_work_enqueue(&slow_work_new_thread); | ||
856 | else if (n > 0) | ||
857 | slow_work_schedule_cull(); | ||
858 | } | ||
859 | mutex_unlock(&slow_work_user_lock); | ||
860 | } | ||
861 | |||
862 | return ret; | ||
863 | } | ||
864 | |||
865 | /* | ||
866 | * Handle adjustment of the maximum number of threads | ||
867 | */ | ||
868 | static int slow_work_max_threads_sysctl(struct ctl_table *table, int write, | ||
869 | void __user *buffer, | ||
870 | size_t *lenp, loff_t *ppos) | ||
871 | { | ||
872 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | ||
873 | int n; | ||
874 | |||
875 | if (ret == 0) { | ||
876 | mutex_lock(&slow_work_user_lock); | ||
877 | if (slow_work_user_count > 0) { | ||
878 | /* see if we need to stop threads */ | ||
879 | n = slow_work_max_threads - | ||
880 | atomic_read(&slow_work_thread_count); | ||
881 | |||
882 | if (n < 0) | ||
883 | slow_work_schedule_cull(); | ||
884 | } | ||
885 | mutex_unlock(&slow_work_user_lock); | ||
886 | } | ||
887 | |||
888 | return ret; | ||
889 | } | ||
890 | #endif /* CONFIG_SYSCTL */ | ||
891 | |||
892 | /** | ||
893 | * slow_work_register_user - Register a user of the facility | ||
894 | * @module: The module about to make use of the facility | ||
895 | * | ||
896 | * Register a user of the facility, starting up the initial threads if there | ||
897 | * aren't any other users at this point. This will return 0 if successful, or | ||
898 | * an error if not. | ||
899 | */ | ||
900 | int slow_work_register_user(struct module *module) | ||
901 | { | ||
902 | struct task_struct *p; | ||
903 | int loop; | ||
904 | |||
905 | mutex_lock(&slow_work_user_lock); | ||
906 | |||
907 | if (slow_work_user_count == 0) { | ||
908 | printk(KERN_NOTICE "Slow work thread pool: Starting up\n"); | ||
909 | init_completion(&slow_work_last_thread_exited); | ||
910 | |||
911 | slow_work_threads_should_exit = false; | ||
912 | slow_work_init(&slow_work_new_thread, | ||
913 | &slow_work_new_thread_ops); | ||
914 | slow_work_may_not_start_new_thread = false; | ||
915 | slow_work_cull = false; | ||
916 | |||
917 | /* start the minimum number of threads */ | ||
918 | for (loop = 0; loop < slow_work_min_threads; loop++) { | ||
919 | atomic_inc(&slow_work_thread_count); | ||
920 | p = kthread_run(slow_work_thread, NULL, "kslowd"); | ||
921 | if (IS_ERR(p)) | ||
922 | goto error; | ||
923 | } | ||
924 | printk(KERN_NOTICE "Slow work thread pool: Ready\n"); | ||
925 | } | ||
926 | |||
927 | slow_work_user_count++; | ||
928 | mutex_unlock(&slow_work_user_lock); | ||
929 | return 0; | ||
930 | |||
931 | error: | ||
932 | if (atomic_dec_and_test(&slow_work_thread_count)) | ||
933 | complete(&slow_work_last_thread_exited); | ||
934 | if (loop > 0) { | ||
935 | printk(KERN_ERR "Slow work thread pool:" | ||
936 | " Aborting startup on ENOMEM\n"); | ||
937 | slow_work_threads_should_exit = true; | ||
938 | wake_up_all(&slow_work_thread_wq); | ||
939 | wait_for_completion(&slow_work_last_thread_exited); | ||
940 | printk(KERN_ERR "Slow work thread pool: Aborted\n"); | ||
941 | } | ||
942 | mutex_unlock(&slow_work_user_lock); | ||
943 | return PTR_ERR(p); | ||
944 | } | ||
945 | EXPORT_SYMBOL(slow_work_register_user); | ||
946 | |||
947 | /* | ||
948 | * wait for all outstanding items from the calling module to complete | ||
949 | * - note that more items may be queued whilst we're waiting | ||
950 | */ | ||
951 | static void slow_work_wait_for_items(struct module *module) | ||
952 | { | ||
953 | #ifdef CONFIG_MODULES | ||
954 | DECLARE_WAITQUEUE(myself, current); | ||
955 | struct slow_work *work; | ||
956 | int loop; | ||
957 | |||
958 | mutex_lock(&slow_work_unreg_sync_lock); | ||
959 | add_wait_queue(&slow_work_unreg_wq, &myself); | ||
960 | |||
961 | for (;;) { | ||
962 | spin_lock_irq(&slow_work_queue_lock); | ||
963 | |||
964 | /* first of all, we wait for the last queued item in each list | ||
965 | * to be processed */ | ||
966 | list_for_each_entry_reverse(work, &vslow_work_queue, link) { | ||
967 | if (work->owner == module) { | ||
968 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
969 | slow_work_unreg_work_item = work; | ||
970 | goto do_wait; | ||
971 | } | ||
972 | } | ||
973 | list_for_each_entry_reverse(work, &slow_work_queue, link) { | ||
974 | if (work->owner == module) { | ||
975 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
976 | slow_work_unreg_work_item = work; | ||
977 | goto do_wait; | ||
978 | } | ||
979 | } | ||
980 | |||
981 | /* then we wait for the items being processed to finish */ | ||
982 | slow_work_unreg_module = module; | ||
983 | smp_mb(); | ||
984 | for (loop = 0; loop < SLOW_WORK_THREAD_LIMIT; loop++) { | ||
985 | if (slow_work_thread_processing[loop] == module) | ||
986 | goto do_wait; | ||
987 | } | ||
988 | spin_unlock_irq(&slow_work_queue_lock); | ||
989 | break; /* okay, we're done */ | ||
990 | |||
991 | do_wait: | ||
992 | spin_unlock_irq(&slow_work_queue_lock); | ||
993 | schedule(); | ||
994 | slow_work_unreg_work_item = NULL; | ||
995 | slow_work_unreg_module = NULL; | ||
996 | } | ||
997 | |||
998 | remove_wait_queue(&slow_work_unreg_wq, &myself); | ||
999 | mutex_unlock(&slow_work_unreg_sync_lock); | ||
1000 | #endif /* CONFIG_MODULES */ | ||
1001 | } | ||
1002 | |||
1003 | /** | ||
1004 | * slow_work_unregister_user - Unregister a user of the facility | ||
1005 | * @module: The module whose items should be cleared | ||
1006 | * | ||
1007 | * Unregister a user of the facility, killing all the threads if this was the | ||
1008 | * last one. | ||
1009 | * | ||
1010 | * This waits for all the work items belonging to the nominated module to go | ||
1011 | * away before proceeding. | ||
1012 | */ | ||
1013 | void slow_work_unregister_user(struct module *module) | ||
1014 | { | ||
1015 | /* first of all, wait for all outstanding items from the calling module | ||
1016 | * to complete */ | ||
1017 | if (module) | ||
1018 | slow_work_wait_for_items(module); | ||
1019 | |||
1020 | /* then we can actually go about shutting down the facility if need | ||
1021 | * be */ | ||
1022 | mutex_lock(&slow_work_user_lock); | ||
1023 | |||
1024 | BUG_ON(slow_work_user_count <= 0); | ||
1025 | |||
1026 | slow_work_user_count--; | ||
1027 | if (slow_work_user_count == 0) { | ||
1028 | printk(KERN_NOTICE "Slow work thread pool: Shutting down\n"); | ||
1029 | slow_work_threads_should_exit = true; | ||
1030 | del_timer_sync(&slow_work_cull_timer); | ||
1031 | del_timer_sync(&slow_work_oom_timer); | ||
1032 | wake_up_all(&slow_work_thread_wq); | ||
1033 | wait_for_completion(&slow_work_last_thread_exited); | ||
1034 | printk(KERN_NOTICE "Slow work thread pool:" | ||
1035 | " Shut down complete\n"); | ||
1036 | } | ||
1037 | |||
1038 | mutex_unlock(&slow_work_user_lock); | ||
1039 | } | ||
1040 | EXPORT_SYMBOL(slow_work_unregister_user); | ||
1041 | |||
1042 | /* | ||
1043 | * Initialise the slow work facility | ||
1044 | */ | ||
1045 | static int __init init_slow_work(void) | ||
1046 | { | ||
1047 | unsigned nr_cpus = num_possible_cpus(); | ||
1048 | |||
1049 | if (slow_work_max_threads < nr_cpus) | ||
1050 | slow_work_max_threads = nr_cpus; | ||
1051 | #ifdef CONFIG_SYSCTL | ||
1052 | if (slow_work_max_max_threads < nr_cpus * 2) | ||
1053 | slow_work_max_max_threads = nr_cpus * 2; | ||
1054 | #endif | ||
1055 | #ifdef CONFIG_SLOW_WORK_DEBUG | ||
1056 | { | ||
1057 | struct dentry *dbdir; | ||
1058 | |||
1059 | dbdir = debugfs_create_dir("slow_work", NULL); | ||
1060 | if (dbdir && !IS_ERR(dbdir)) | ||
1061 | debugfs_create_file("runqueue", S_IFREG | 0400, dbdir, | ||
1062 | NULL, &slow_work_runqueue_fops); | ||
1063 | } | ||
1064 | #endif | ||
1065 | return 0; | ||
1066 | } | ||
1067 | |||
1068 | subsys_initcall(init_slow_work); | ||
diff --git a/kernel/slow-work.h b/kernel/slow-work.h deleted file mode 100644 index a29ebd1ef41d..000000000000 --- a/kernel/slow-work.h +++ /dev/null | |||
@@ -1,72 +0,0 @@ | |||
1 | /* Slow work private definitions | ||
2 | * | ||
3 | * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #define SLOW_WORK_CULL_TIMEOUT (5 * HZ) /* cull threads 5s after running out of | ||
13 | * things to do */ | ||
14 | #define SLOW_WORK_OOM_TIMEOUT (5 * HZ) /* can't start new threads for 5s after | ||
15 | * OOM */ | ||
16 | |||
17 | #define SLOW_WORK_THREAD_LIMIT 255 /* abs maximum number of slow-work threads */ | ||
18 | |||
19 | /* | ||
20 | * slow-work.c | ||
21 | */ | ||
22 | #ifdef CONFIG_SLOW_WORK_DEBUG | ||
23 | extern struct slow_work *slow_work_execs[]; | ||
24 | extern pid_t slow_work_pids[]; | ||
25 | extern rwlock_t slow_work_execs_lock; | ||
26 | #endif | ||
27 | |||
28 | extern struct list_head slow_work_queue; | ||
29 | extern struct list_head vslow_work_queue; | ||
30 | extern spinlock_t slow_work_queue_lock; | ||
31 | |||
32 | /* | ||
33 | * slow-work-debugfs.c | ||
34 | */ | ||
35 | #ifdef CONFIG_SLOW_WORK_DEBUG | ||
36 | extern const struct file_operations slow_work_runqueue_fops; | ||
37 | |||
38 | extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *); | ||
39 | #endif | ||
40 | |||
41 | /* | ||
42 | * Helper functions | ||
43 | */ | ||
44 | static inline void slow_work_set_thread_pid(int id, pid_t pid) | ||
45 | { | ||
46 | #ifdef CONFIG_SLOW_WORK_DEBUG | ||
47 | slow_work_pids[id] = pid; | ||
48 | #endif | ||
49 | } | ||
50 | |||
51 | static inline void slow_work_mark_time(struct slow_work *work) | ||
52 | { | ||
53 | #ifdef CONFIG_SLOW_WORK_DEBUG | ||
54 | work->mark = CURRENT_TIME; | ||
55 | #endif | ||
56 | } | ||
57 | |||
58 | static inline void slow_work_begin_exec(int id, struct slow_work *work) | ||
59 | { | ||
60 | #ifdef CONFIG_SLOW_WORK_DEBUG | ||
61 | slow_work_execs[id] = work; | ||
62 | #endif | ||
63 | } | ||
64 | |||
65 | static inline void slow_work_end_exec(int id, struct slow_work *work) | ||
66 | { | ||
67 | #ifdef CONFIG_SLOW_WORK_DEBUG | ||
68 | write_lock(&slow_work_execs_lock); | ||
69 | slow_work_execs[id] = NULL; | ||
70 | write_unlock(&slow_work_execs_lock); | ||
71 | #endif | ||
72 | } | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d24f761f4876..5821365b9605 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -50,7 +50,6 @@ | |||
50 | #include <linux/acpi.h> | 50 | #include <linux/acpi.h> |
51 | #include <linux/reboot.h> | 51 | #include <linux/reboot.h> |
52 | #include <linux/ftrace.h> | 52 | #include <linux/ftrace.h> |
53 | #include <linux/slow-work.h> | ||
54 | #include <linux/perf_event.h> | 53 | #include <linux/perf_event.h> |
55 | #include <linux/kprobes.h> | 54 | #include <linux/kprobes.h> |
56 | #include <linux/pipe_fs_i.h> | 55 | #include <linux/pipe_fs_i.h> |
@@ -906,13 +905,6 @@ static struct ctl_table kern_table[] = { | |||
906 | .proc_handler = proc_dointvec, | 905 | .proc_handler = proc_dointvec, |
907 | }, | 906 | }, |
908 | #endif | 907 | #endif |
909 | #ifdef CONFIG_SLOW_WORK | ||
910 | { | ||
911 | .procname = "slow-work", | ||
912 | .mode = 0555, | ||
913 | .child = slow_work_sysctls, | ||
914 | }, | ||
915 | #endif | ||
916 | #ifdef CONFIG_PERF_EVENTS | 908 | #ifdef CONFIG_PERF_EVENTS |
917 | { | 909 | { |
918 | .procname = "perf_event_paranoid", | 910 | .procname = "perf_event_paranoid", |