diff options
Diffstat (limited to 'Documentation/slow-work.txt')
-rw-r--r-- | Documentation/slow-work.txt | 322 |
1 files changed, 0 insertions, 322 deletions
diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt deleted file mode 100644 index 9dbf4470c7e1..000000000000 --- a/Documentation/slow-work.txt +++ /dev/null | |||
@@ -1,322 +0,0 @@ | |||
1 | ==================================== | ||
2 | SLOW WORK ITEM EXECUTION THREAD POOL | ||
3 | ==================================== | ||
4 | |||
5 | By: David Howells <dhowells@redhat.com> | ||
6 | |||
7 | The slow work item execution thread pool is a pool of threads for performing | ||
8 | things that take a relatively long time, such as making mkdir calls. | ||
9 | Typically, when processing something, these items will spend a lot of time | ||
10 | blocking a thread on I/O, thus making that thread unavailable for doing other | ||
11 | work. | ||
12 | |||
13 | The standard workqueue model is unsuitable for this class of work item as that | ||
14 | limits the owner to a single thread or a single thread per CPU. For some | ||
15 | tasks, however, more threads - or fewer - are required. | ||
16 | |||
17 | There is just one pool per system. It contains no threads unless something | ||
18 | wants to use it - and that something must register its interest first. When | ||
19 | the pool is active, the number of threads it contains is dynamic, varying | ||
20 | between a maximum and minimum setting, depending on the load. | ||
21 | |||
22 | |||
23 | ==================== | ||
24 | CLASSES OF WORK ITEM | ||
25 | ==================== | ||
26 | |||
27 | This pool support two classes of work items: | ||
28 | |||
29 | (*) Slow work items. | ||
30 | |||
31 | (*) Very slow work items. | ||
32 | |||
33 | The former are expected to finish much quicker than the latter. | ||
34 | |||
35 | An operation of the very slow class may do a batch combination of several | ||
36 | lookups, mkdirs, and a create for instance. | ||
37 | |||
38 | An operation of the ordinarily slow class may, for example, write stuff or | ||
39 | expand files, provided the time taken to do so isn't too long. | ||
40 | |||
41 | Operations of both types may sleep during execution, thus tying up the thread | ||
42 | loaned to it. | ||
43 | |||
44 | A further class of work item is available, based on the slow work item class: | ||
45 | |||
46 | (*) Delayed slow work items. | ||
47 | |||
48 | These are slow work items that have a timer to defer queueing of the item for | ||
49 | a while. | ||
50 | |||
51 | |||
52 | THREAD-TO-CLASS ALLOCATION | ||
53 | -------------------------- | ||
54 | |||
55 | Not all the threads in the pool are available to work on very slow work items. | ||
56 | The number will be between one and one fewer than the number of active threads. | ||
57 | This is configurable (see the "Pool Configuration" section). | ||
58 | |||
59 | All the threads are available to work on ordinarily slow work items, but a | ||
60 | percentage of the threads will prefer to work on very slow work items. | ||
61 | |||
62 | The configuration ensures that at least one thread will be available to work on | ||
63 | very slow work items, and at least one thread will be available that won't work | ||
64 | on very slow work items at all. | ||
65 | |||
66 | |||
67 | ===================== | ||
68 | USING SLOW WORK ITEMS | ||
69 | ===================== | ||
70 | |||
71 | Firstly, a module or subsystem wanting to make use of slow work items must | ||
72 | register its interest: | ||
73 | |||
74 | int ret = slow_work_register_user(struct module *module); | ||
75 | |||
76 | This will return 0 if successful, or a -ve error upon failure. The module | ||
77 | pointer should be the module interested in using this facility (almost | ||
78 | certainly THIS_MODULE). | ||
79 | |||
80 | |||
81 | Slow work items may then be set up by: | ||
82 | |||
83 | (1) Declaring a slow_work struct type variable: | ||
84 | |||
85 | #include <linux/slow-work.h> | ||
86 | |||
87 | struct slow_work myitem; | ||
88 | |||
89 | (2) Declaring the operations to be used for this item: | ||
90 | |||
91 | struct slow_work_ops myitem_ops = { | ||
92 | .get_ref = myitem_get_ref, | ||
93 | .put_ref = myitem_put_ref, | ||
94 | .execute = myitem_execute, | ||
95 | }; | ||
96 | |||
97 | [*] For a description of the ops, see section "Item Operations". | ||
98 | |||
99 | (3) Initialising the item: | ||
100 | |||
101 | slow_work_init(&myitem, &myitem_ops); | ||
102 | |||
103 | or: | ||
104 | |||
105 | delayed_slow_work_init(&myitem, &myitem_ops); | ||
106 | |||
107 | or: | ||
108 | |||
109 | vslow_work_init(&myitem, &myitem_ops); | ||
110 | |||
111 | depending on its class. | ||
112 | |||
113 | A suitably set up work item can then be enqueued for processing: | ||
114 | |||
115 | int ret = slow_work_enqueue(&myitem); | ||
116 | |||
117 | This will return a -ve error if the thread pool is unable to gain a reference | ||
118 | on the item, 0 otherwise, or (for delayed work): | ||
119 | |||
120 | int ret = delayed_slow_work_enqueue(&myitem, my_jiffy_delay); | ||
121 | |||
122 | |||
123 | The items are reference counted, so there ought to be no need for a flush | ||
124 | operation. But as the reference counting is optional, means to cancel | ||
125 | existing work items are also included: | ||
126 | |||
127 | cancel_slow_work(&myitem); | ||
128 | cancel_delayed_slow_work(&myitem); | ||
129 | |||
130 | can be used to cancel pending work. The above cancel function waits for | ||
131 | existing work to have been executed (or prevent execution of them, depending | ||
132 | on timing). | ||
133 | |||
134 | |||
135 | When all a module's slow work items have been processed, and the | ||
136 | module has no further interest in the facility, it should unregister its | ||
137 | interest: | ||
138 | |||
139 | slow_work_unregister_user(struct module *module); | ||
140 | |||
141 | The module pointer is used to wait for all outstanding work items for that | ||
142 | module before completing the unregistration. This prevents the put_ref() code | ||
143 | from being taken away before it completes. module should almost certainly be | ||
144 | THIS_MODULE. | ||
145 | |||
146 | |||
147 | ================ | ||
148 | HELPER FUNCTIONS | ||
149 | ================ | ||
150 | |||
151 | The slow-work facility provides a function by which it can be determined | ||
152 | whether or not an item is queued for later execution: | ||
153 | |||
154 | bool queued = slow_work_is_queued(struct slow_work *work); | ||
155 | |||
156 | If it returns false, then the item is not on the queue (it may be executing | ||
157 | with a requeue pending). This can be used to work out whether an item on which | ||
158 | another depends is on the queue, thus allowing a dependent item to be queued | ||
159 | after it. | ||
160 | |||
161 | If the above shows an item on which another depends not to be queued, then the | ||
162 | owner of the dependent item might need to wait. However, to avoid locking up | ||
163 | the threads unnecessarily be sleeping in them, it can make sense under some | ||
164 | circumstances to return the work item to the queue, thus deferring it until | ||
165 | some other items have had a chance to make use of the yielded thread. | ||
166 | |||
167 | To yield a thread and defer an item, the work function should simply enqueue | ||
168 | the work item again and return. However, this doesn't work if there's nothing | ||
169 | actually on the queue, as the thread just vacated will jump straight back into | ||
170 | the item's work function, thus busy waiting on a CPU. | ||
171 | |||
172 | Instead, the item should use the thread to wait for the dependency to go away, | ||
173 | but rather than using schedule() or schedule_timeout() to sleep, it should use | ||
174 | the following function: | ||
175 | |||
176 | bool requeue = slow_work_sleep_till_thread_needed( | ||
177 | struct slow_work *work, | ||
178 | signed long *_timeout); | ||
179 | |||
180 | This will add a second wait and then sleep, such that it will be woken up if | ||
181 | either something appears on the queue that could usefully make use of the | ||
182 | thread - and behind which this item can be queued, or if the event the caller | ||
183 | set up to wait for happens. True will be returned if something else appeared | ||
184 | on the queue and this work function should perhaps return, of false if | ||
185 | something else woke it up. The timeout is as for schedule_timeout(). | ||
186 | |||
187 | For example: | ||
188 | |||
189 | wq = bit_waitqueue(&my_flags, MY_BIT); | ||
190 | init_wait(&wait); | ||
191 | requeue = false; | ||
192 | do { | ||
193 | prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); | ||
194 | if (!test_bit(MY_BIT, &my_flags)) | ||
195 | break; | ||
196 | requeue = slow_work_sleep_till_thread_needed(&my_work, | ||
197 | &timeout); | ||
198 | } while (timeout > 0 && !requeue); | ||
199 | finish_wait(wq, &wait); | ||
200 | if (!test_bit(MY_BIT, &my_flags) | ||
201 | goto do_my_thing; | ||
202 | if (requeue) | ||
203 | return; // to slow_work | ||
204 | |||
205 | |||
206 | =============== | ||
207 | ITEM OPERATIONS | ||
208 | =============== | ||
209 | |||
210 | Each work item requires a table of operations of type struct slow_work_ops. | ||
211 | Only ->execute() is required; the getting and putting of a reference and the | ||
212 | describing of an item are all optional. | ||
213 | |||
214 | (*) Get a reference on an item: | ||
215 | |||
216 | int (*get_ref)(struct slow_work *work); | ||
217 | |||
218 | This allows the thread pool to attempt to pin an item by getting a | ||
219 | reference on it. This function should return 0 if the reference was | ||
220 | granted, or a -ve error otherwise. If an error is returned, | ||
221 | slow_work_enqueue() will fail. | ||
222 | |||
223 | The reference is held whilst the item is queued and whilst it is being | ||
224 | executed. The item may then be requeued with the same reference held, or | ||
225 | the reference will be released. | ||
226 | |||
227 | (*) Release a reference on an item: | ||
228 | |||
229 | void (*put_ref)(struct slow_work *work); | ||
230 | |||
231 | This allows the thread pool to unpin an item by releasing the reference on | ||
232 | it. The thread pool will not touch the item again once this has been | ||
233 | called. | ||
234 | |||
235 | (*) Execute an item: | ||
236 | |||
237 | void (*execute)(struct slow_work *work); | ||
238 | |||
239 | This should perform the work required of the item. It may sleep, it may | ||
240 | perform disk I/O and it may wait for locks. | ||
241 | |||
242 | (*) View an item through /proc: | ||
243 | |||
244 | void (*desc)(struct slow_work *work, struct seq_file *m); | ||
245 | |||
246 | If supplied, this should print to 'm' a small string describing the work | ||
247 | the item is to do. This should be no more than about 40 characters, and | ||
248 | shouldn't include a newline character. | ||
249 | |||
250 | See the 'Viewing executing and queued items' section below. | ||
251 | |||
252 | |||
253 | ================== | ||
254 | POOL CONFIGURATION | ||
255 | ================== | ||
256 | |||
257 | The slow-work thread pool has a number of configurables: | ||
258 | |||
259 | (*) /proc/sys/kernel/slow-work/min-threads | ||
260 | |||
261 | The minimum number of threads that should be in the pool whilst it is in | ||
262 | use. This may be anywhere between 2 and max-threads. | ||
263 | |||
264 | (*) /proc/sys/kernel/slow-work/max-threads | ||
265 | |||
266 | The maximum number of threads that should in the pool. This may be | ||
267 | anywhere between min-threads and 255 or NR_CPUS * 2, whichever is greater. | ||
268 | |||
269 | (*) /proc/sys/kernel/slow-work/vslow-percentage | ||
270 | |||
271 | The percentage of active threads in the pool that may be used to execute | ||
272 | very slow work items. This may be between 1 and 99. The resultant number | ||
273 | is bounded to between 1 and one fewer than the number of active threads. | ||
274 | This ensures there is always at least one thread that can process very | ||
275 | slow work items, and always at least one thread that won't. | ||
276 | |||
277 | |||
278 | ================================== | ||
279 | VIEWING EXECUTING AND QUEUED ITEMS | ||
280 | ================================== | ||
281 | |||
282 | If CONFIG_SLOW_WORK_DEBUG is enabled, a debugfs file is made available: | ||
283 | |||
284 | /sys/kernel/debug/slow_work/runqueue | ||
285 | |||
286 | through which the list of work items being executed and the queues of items to | ||
287 | be executed may be viewed. The owner of a work item is given the chance to | ||
288 | add some information of its own. | ||
289 | |||
290 | The contents look something like the following: | ||
291 | |||
292 | THR PID ITEM ADDR FL MARK DESC | ||
293 | === ===== ================ == ===== ========== | ||
294 | 0 3005 ffff880023f52348 a 952ms FSC: OBJ17d3: LOOK | ||
295 | 1 3006 ffff880024e33668 2 160ms FSC: OBJ17e5 OP60d3b: Write1/Store fl=2 | ||
296 | 2 3165 ffff8800296dd180 a 424ms FSC: OBJ17e4: LOOK | ||
297 | 3 4089 ffff8800262c8d78 a 212ms FSC: OBJ17ea: CRTN | ||
298 | 4 4090 ffff88002792bed8 2 388ms FSC: OBJ17e8 OP60d36: Write1/Store fl=2 | ||
299 | 5 4092 ffff88002a0ef308 2 388ms FSC: OBJ17e7 OP60d2e: Write1/Store fl=2 | ||
300 | 6 4094 ffff88002abaf4b8 2 132ms FSC: OBJ17e2 OP60d4e: Write1/Store fl=2 | ||
301 | 7 4095 ffff88002bb188e0 a 388ms FSC: OBJ17e9: CRTN | ||
302 | vsq - ffff880023d99668 1 308ms FSC: OBJ17e0 OP60f91: Write1/EnQ fl=2 | ||
303 | vsq - ffff8800295d1740 1 212ms FSC: OBJ16be OP4d4b6: Write1/EnQ fl=2 | ||
304 | vsq - ffff880025ba3308 1 160ms FSC: OBJ179a OP58dec: Write1/EnQ fl=2 | ||
305 | vsq - ffff880024ec83e0 1 160ms FSC: OBJ17ae OP599f2: Write1/EnQ fl=2 | ||
306 | vsq - ffff880026618e00 1 160ms FSC: OBJ17e6 OP60d33: Write1/EnQ fl=2 | ||
307 | vsq - ffff880025a2a4b8 1 132ms FSC: OBJ16a2 OP4d583: Write1/EnQ fl=2 | ||
308 | vsq - ffff880023cbe6d8 9 212ms FSC: OBJ17eb: LOOK | ||
309 | vsq - ffff880024d37590 9 212ms FSC: OBJ17ec: LOOK | ||
310 | vsq - ffff880027746cb0 9 212ms FSC: OBJ17ed: LOOK | ||
311 | vsq - ffff880024d37ae8 9 212ms FSC: OBJ17ee: LOOK | ||
312 | vsq - ffff880024d37cb0 9 212ms FSC: OBJ17ef: LOOK | ||
313 | vsq - ffff880025036550 9 212ms FSC: OBJ17f0: LOOK | ||
314 | vsq - ffff8800250368e0 9 212ms FSC: OBJ17f1: LOOK | ||
315 | vsq - ffff880025036aa8 9 212ms FSC: OBJ17f2: LOOK | ||
316 | |||
317 | In the 'THR' column, executing items show the thread they're occupying and | ||
318 | queued threads indicate which queue they're on. 'PID' shows the process ID of | ||
319 | a slow-work thread that's executing something. 'FL' shows the work item flags. | ||
320 | 'MARK' indicates how long since an item was queued or began executing. Lastly, | ||
321 | the 'DESC' column permits the owner of an item to give some information. | ||
322 | |||