diff options
Diffstat (limited to 'Documentation/slow-work.txt')
| -rw-r--r-- | Documentation/slow-work.txt | 160 |
1 files changed, 154 insertions, 6 deletions
diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index ebc50f808ea4..9dbf4470c7e1 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt | |||
| @@ -41,6 +41,13 @@ expand files, provided the time taken to do so isn't too long. | |||
| 41 | Operations of both types may sleep during execution, thus tying up the thread | 41 | Operations of both types may sleep during execution, thus tying up the thread |
| 42 | loaned to it. | 42 | loaned to it. |
| 43 | 43 | ||
| 44 | A further class of work item is available, based on the slow work item class: | ||
| 45 | |||
| 46 | (*) Delayed slow work items. | ||
| 47 | |||
| 48 | These are slow work items that have a timer to defer queueing of the item for | ||
| 49 | a while. | ||
| 50 | |||
| 44 | 51 | ||
| 45 | THREAD-TO-CLASS ALLOCATION | 52 | THREAD-TO-CLASS ALLOCATION |
| 46 | -------------------------- | 53 | -------------------------- |
| @@ -64,9 +71,11 @@ USING SLOW WORK ITEMS | |||
| 64 | Firstly, a module or subsystem wanting to make use of slow work items must | 71 | Firstly, a module or subsystem wanting to make use of slow work items must |
| 65 | register its interest: | 72 | register its interest: |
| 66 | 73 | ||
| 67 | int ret = slow_work_register_user(); | 74 | int ret = slow_work_register_user(struct module *module); |
| 68 | 75 | ||
| 69 | This will return 0 if successful, or a -ve error upon failure. | 76 | This will return 0 if successful, or a -ve error upon failure. The module |
| 77 | pointer should be the module interested in using this facility (almost | ||
| 78 | certainly THIS_MODULE). | ||
| 70 | 79 | ||
| 71 | 80 | ||
| 72 | Slow work items may then be set up by: | 81 | Slow work items may then be set up by: |
| @@ -93,6 +102,10 @@ Slow work items may then be set up by: | |||
| 93 | 102 | ||
| 94 | or: | 103 | or: |
| 95 | 104 | ||
| 105 | delayed_slow_work_init(&myitem, &myitem_ops); | ||
| 106 | |||
| 107 | or: | ||
| 108 | |||
| 96 | vslow_work_init(&myitem, &myitem_ops); | 109 | vslow_work_init(&myitem, &myitem_ops); |
| 97 | 110 | ||
| 98 | depending on its class. | 111 | depending on its class. |
| @@ -102,15 +115,92 @@ A suitably set up work item can then be enqueued for processing: | |||
| 102 | int ret = slow_work_enqueue(&myitem); | 115 | int ret = slow_work_enqueue(&myitem); |
| 103 | 116 | ||
| 104 | This will return a -ve error if the thread pool is unable to gain a reference | 117 | This will return a -ve error if the thread pool is unable to gain a reference |
| 105 | on the item, 0 otherwise. | 118 | on the item, 0 otherwise, or (for delayed work): |
| 119 | |||
| 120 | int ret = delayed_slow_work_enqueue(&myitem, my_jiffy_delay); | ||
| 106 | 121 | ||
| 107 | 122 | ||
| 108 | The items are reference counted, so there ought to be no need for a flush | 123 | The items are reference counted, so there ought to be no need for a flush |
| 109 | operation. When all a module's slow work items have been processed, and the | 124 | operation. But as the reference counting is optional, means to cancel |
| 125 | existing work items are also included: | ||
| 126 | |||
| 127 | cancel_slow_work(&myitem); | ||
| 128 | cancel_delayed_slow_work(&myitem); | ||
| 129 | |||
| 130 | can be used to cancel pending work. The above cancel function waits for | ||
| 131 | existing work to have been executed (or prevent execution of them, depending | ||
| 132 | on timing). | ||
| 133 | |||
| 134 | |||
| 135 | When all a module's slow work items have been processed, and the | ||
| 110 | module has no further interest in the facility, it should unregister its | 136 | module has no further interest in the facility, it should unregister its |
| 111 | interest: | 137 | interest: |
| 112 | 138 | ||
| 113 | slow_work_unregister_user(); | 139 | slow_work_unregister_user(struct module *module); |
| 140 | |||
| 141 | The module pointer is used to wait for all outstanding work items for that | ||
| 142 | module before completing the unregistration. This prevents the put_ref() code | ||
| 143 | from being taken away before it completes. module should almost certainly be | ||
| 144 | THIS_MODULE. | ||
| 145 | |||
| 146 | |||
| 147 | ================ | ||
| 148 | HELPER FUNCTIONS | ||
| 149 | ================ | ||
| 150 | |||
| 151 | The slow-work facility provides a function by which it can be determined | ||
| 152 | whether or not an item is queued for later execution: | ||
| 153 | |||
| 154 | bool queued = slow_work_is_queued(struct slow_work *work); | ||
| 155 | |||
| 156 | If it returns false, then the item is not on the queue (it may be executing | ||
| 157 | with a requeue pending). This can be used to work out whether an item on which | ||
| 158 | another depends is on the queue, thus allowing a dependent item to be queued | ||
| 159 | after it. | ||
| 160 | |||
| 161 | If the above shows an item on which another depends not to be queued, then the | ||
| 162 | owner of the dependent item might need to wait. However, to avoid locking up | ||
| 163 | the threads unnecessarily be sleeping in them, it can make sense under some | ||
| 164 | circumstances to return the work item to the queue, thus deferring it until | ||
| 165 | some other items have had a chance to make use of the yielded thread. | ||
| 166 | |||
| 167 | To yield a thread and defer an item, the work function should simply enqueue | ||
| 168 | the work item again and return. However, this doesn't work if there's nothing | ||
| 169 | actually on the queue, as the thread just vacated will jump straight back into | ||
| 170 | the item's work function, thus busy waiting on a CPU. | ||
| 171 | |||
| 172 | Instead, the item should use the thread to wait for the dependency to go away, | ||
| 173 | but rather than using schedule() or schedule_timeout() to sleep, it should use | ||
| 174 | the following function: | ||
| 175 | |||
| 176 | bool requeue = slow_work_sleep_till_thread_needed( | ||
| 177 | struct slow_work *work, | ||
| 178 | signed long *_timeout); | ||
| 179 | |||
| 180 | This will add a second wait and then sleep, such that it will be woken up if | ||
| 181 | either something appears on the queue that could usefully make use of the | ||
| 182 | thread - and behind which this item can be queued, or if the event the caller | ||
| 183 | set up to wait for happens. True will be returned if something else appeared | ||
| 184 | on the queue and this work function should perhaps return, of false if | ||
| 185 | something else woke it up. The timeout is as for schedule_timeout(). | ||
| 186 | |||
| 187 | For example: | ||
| 188 | |||
| 189 | wq = bit_waitqueue(&my_flags, MY_BIT); | ||
| 190 | init_wait(&wait); | ||
| 191 | requeue = false; | ||
| 192 | do { | ||
| 193 | prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); | ||
| 194 | if (!test_bit(MY_BIT, &my_flags)) | ||
| 195 | break; | ||
| 196 | requeue = slow_work_sleep_till_thread_needed(&my_work, | ||
| 197 | &timeout); | ||
| 198 | } while (timeout > 0 && !requeue); | ||
| 199 | finish_wait(wq, &wait); | ||
| 200 | if (!test_bit(MY_BIT, &my_flags) | ||
| 201 | goto do_my_thing; | ||
| 202 | if (requeue) | ||
| 203 | return; // to slow_work | ||
| 114 | 204 | ||
| 115 | 205 | ||
| 116 | =============== | 206 | =============== |
| @@ -118,7 +208,8 @@ ITEM OPERATIONS | |||
| 118 | =============== | 208 | =============== |
| 119 | 209 | ||
| 120 | Each work item requires a table of operations of type struct slow_work_ops. | 210 | Each work item requires a table of operations of type struct slow_work_ops. |
| 121 | All members are required: | 211 | Only ->execute() is required; the getting and putting of a reference and the |
| 212 | describing of an item are all optional. | ||
| 122 | 213 | ||
| 123 | (*) Get a reference on an item: | 214 | (*) Get a reference on an item: |
| 124 | 215 | ||
| @@ -148,6 +239,16 @@ All members are required: | |||
| 148 | This should perform the work required of the item. It may sleep, it may | 239 | This should perform the work required of the item. It may sleep, it may |
| 149 | perform disk I/O and it may wait for locks. | 240 | perform disk I/O and it may wait for locks. |
| 150 | 241 | ||
| 242 | (*) View an item through /proc: | ||
| 243 | |||
| 244 | void (*desc)(struct slow_work *work, struct seq_file *m); | ||
| 245 | |||
| 246 | If supplied, this should print to 'm' a small string describing the work | ||
| 247 | the item is to do. This should be no more than about 40 characters, and | ||
| 248 | shouldn't include a newline character. | ||
| 249 | |||
| 250 | See the 'Viewing executing and queued items' section below. | ||
| 251 | |||
| 151 | 252 | ||
| 152 | ================== | 253 | ================== |
| 153 | POOL CONFIGURATION | 254 | POOL CONFIGURATION |
| @@ -172,3 +273,50 @@ The slow-work thread pool has a number of configurables: | |||
| 172 | is bounded to between 1 and one fewer than the number of active threads. | 273 | is bounded to between 1 and one fewer than the number of active threads. |
| 173 | This ensures there is always at least one thread that can process very | 274 | This ensures there is always at least one thread that can process very |
| 174 | slow work items, and always at least one thread that won't. | 275 | slow work items, and always at least one thread that won't. |
| 276 | |||
| 277 | |||
| 278 | ================================== | ||
| 279 | VIEWING EXECUTING AND QUEUED ITEMS | ||
| 280 | ================================== | ||
| 281 | |||
| 282 | If CONFIG_SLOW_WORK_DEBUG is enabled, a debugfs file is made available: | ||
| 283 | |||
| 284 | /sys/kernel/debug/slow_work/runqueue | ||
| 285 | |||
| 286 | through which the list of work items being executed and the queues of items to | ||
| 287 | be executed may be viewed. The owner of a work item is given the chance to | ||
| 288 | add some information of its own. | ||
| 289 | |||
| 290 | The contents look something like the following: | ||
| 291 | |||
| 292 | THR PID ITEM ADDR FL MARK DESC | ||
| 293 | === ===== ================ == ===== ========== | ||
| 294 | 0 3005 ffff880023f52348 a 952ms FSC: OBJ17d3: LOOK | ||
| 295 | 1 3006 ffff880024e33668 2 160ms FSC: OBJ17e5 OP60d3b: Write1/Store fl=2 | ||
| 296 | 2 3165 ffff8800296dd180 a 424ms FSC: OBJ17e4: LOOK | ||
| 297 | 3 4089 ffff8800262c8d78 a 212ms FSC: OBJ17ea: CRTN | ||
| 298 | 4 4090 ffff88002792bed8 2 388ms FSC: OBJ17e8 OP60d36: Write1/Store fl=2 | ||
| 299 | 5 4092 ffff88002a0ef308 2 388ms FSC: OBJ17e7 OP60d2e: Write1/Store fl=2 | ||
| 300 | 6 4094 ffff88002abaf4b8 2 132ms FSC: OBJ17e2 OP60d4e: Write1/Store fl=2 | ||
| 301 | 7 4095 ffff88002bb188e0 a 388ms FSC: OBJ17e9: CRTN | ||
| 302 | vsq - ffff880023d99668 1 308ms FSC: OBJ17e0 OP60f91: Write1/EnQ fl=2 | ||
| 303 | vsq - ffff8800295d1740 1 212ms FSC: OBJ16be OP4d4b6: Write1/EnQ fl=2 | ||
| 304 | vsq - ffff880025ba3308 1 160ms FSC: OBJ179a OP58dec: Write1/EnQ fl=2 | ||
| 305 | vsq - ffff880024ec83e0 1 160ms FSC: OBJ17ae OP599f2: Write1/EnQ fl=2 | ||
| 306 | vsq - ffff880026618e00 1 160ms FSC: OBJ17e6 OP60d33: Write1/EnQ fl=2 | ||
| 307 | vsq - ffff880025a2a4b8 1 132ms FSC: OBJ16a2 OP4d583: Write1/EnQ fl=2 | ||
| 308 | vsq - ffff880023cbe6d8 9 212ms FSC: OBJ17eb: LOOK | ||
| 309 | vsq - ffff880024d37590 9 212ms FSC: OBJ17ec: LOOK | ||
| 310 | vsq - ffff880027746cb0 9 212ms FSC: OBJ17ed: LOOK | ||
| 311 | vsq - ffff880024d37ae8 9 212ms FSC: OBJ17ee: LOOK | ||
| 312 | vsq - ffff880024d37cb0 9 212ms FSC: OBJ17ef: LOOK | ||
| 313 | vsq - ffff880025036550 9 212ms FSC: OBJ17f0: LOOK | ||
| 314 | vsq - ffff8800250368e0 9 212ms FSC: OBJ17f1: LOOK | ||
| 315 | vsq - ffff880025036aa8 9 212ms FSC: OBJ17f2: LOOK | ||
| 316 | |||
| 317 | In the 'THR' column, executing items show the thread they're occupying and | ||
| 318 | queued threads indicate which queue they're on. 'PID' shows the process ID of | ||
| 319 | a slow-work thread that's executing something. 'FL' shows the work item flags. | ||
| 320 | 'MARK' indicates how long since an item was queued or began executing. Lastly, | ||
| 321 | the 'DESC' column permits the owner of an item to give some information. | ||
| 322 | |||
