diff options
Diffstat (limited to 'drivers/block/elevator.c')
-rw-r--r-- | drivers/block/elevator.c | 391 |
1 files changed, 238 insertions, 153 deletions
diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c index 98f0126a2deb..36f1057084b0 100644 --- a/drivers/block/elevator.c +++ b/drivers/block/elevator.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/slab.h> | 34 | #include <linux/slab.h> |
35 | #include <linux/init.h> | 35 | #include <linux/init.h> |
36 | #include <linux/compiler.h> | 36 | #include <linux/compiler.h> |
37 | #include <linux/delay.h> | ||
37 | 38 | ||
38 | #include <asm/uaccess.h> | 39 | #include <asm/uaccess.h> |
39 | 40 | ||
@@ -83,21 +84,11 @@ inline int elv_try_merge(struct request *__rq, struct bio *bio) | |||
83 | } | 84 | } |
84 | EXPORT_SYMBOL(elv_try_merge); | 85 | EXPORT_SYMBOL(elv_try_merge); |
85 | 86 | ||
86 | inline int elv_try_last_merge(request_queue_t *q, struct bio *bio) | ||
87 | { | ||
88 | if (q->last_merge) | ||
89 | return elv_try_merge(q->last_merge, bio); | ||
90 | |||
91 | return ELEVATOR_NO_MERGE; | ||
92 | } | ||
93 | EXPORT_SYMBOL(elv_try_last_merge); | ||
94 | |||
95 | static struct elevator_type *elevator_find(const char *name) | 87 | static struct elevator_type *elevator_find(const char *name) |
96 | { | 88 | { |
97 | struct elevator_type *e = NULL; | 89 | struct elevator_type *e = NULL; |
98 | struct list_head *entry; | 90 | struct list_head *entry; |
99 | 91 | ||
100 | spin_lock_irq(&elv_list_lock); | ||
101 | list_for_each(entry, &elv_list) { | 92 | list_for_each(entry, &elv_list) { |
102 | struct elevator_type *__e; | 93 | struct elevator_type *__e; |
103 | 94 | ||
@@ -108,7 +99,6 @@ static struct elevator_type *elevator_find(const char *name) | |||
108 | break; | 99 | break; |
109 | } | 100 | } |
110 | } | 101 | } |
111 | spin_unlock_irq(&elv_list_lock); | ||
112 | 102 | ||
113 | return e; | 103 | return e; |
114 | } | 104 | } |
@@ -120,12 +110,15 @@ static void elevator_put(struct elevator_type *e) | |||
120 | 110 | ||
121 | static struct elevator_type *elevator_get(const char *name) | 111 | static struct elevator_type *elevator_get(const char *name) |
122 | { | 112 | { |
123 | struct elevator_type *e = elevator_find(name); | 113 | struct elevator_type *e; |
124 | 114 | ||
125 | if (!e) | 115 | spin_lock_irq(&elv_list_lock); |
126 | return NULL; | 116 | |
127 | if (!try_module_get(e->elevator_owner)) | 117 | e = elevator_find(name); |
128 | return NULL; | 118 | if (e && !try_module_get(e->elevator_owner)) |
119 | e = NULL; | ||
120 | |||
121 | spin_unlock_irq(&elv_list_lock); | ||
129 | 122 | ||
130 | return e; | 123 | return e; |
131 | } | 124 | } |
@@ -139,8 +132,6 @@ static int elevator_attach(request_queue_t *q, struct elevator_type *e, | |||
139 | eq->ops = &e->ops; | 132 | eq->ops = &e->ops; |
140 | eq->elevator_type = e; | 133 | eq->elevator_type = e; |
141 | 134 | ||
142 | INIT_LIST_HEAD(&q->queue_head); | ||
143 | q->last_merge = NULL; | ||
144 | q->elevator = eq; | 135 | q->elevator = eq; |
145 | 136 | ||
146 | if (eq->ops->elevator_init_fn) | 137 | if (eq->ops->elevator_init_fn) |
@@ -153,23 +144,20 @@ static char chosen_elevator[16]; | |||
153 | 144 | ||
154 | static void elevator_setup_default(void) | 145 | static void elevator_setup_default(void) |
155 | { | 146 | { |
147 | struct elevator_type *e; | ||
148 | |||
156 | /* | 149 | /* |
157 | * check if default is set and exists | 150 | * If default has not been set, use the compiled-in selection. |
158 | */ | 151 | */ |
159 | if (chosen_elevator[0] && elevator_find(chosen_elevator)) | 152 | if (!chosen_elevator[0]) |
160 | return; | 153 | strcpy(chosen_elevator, CONFIG_DEFAULT_IOSCHED); |
161 | 154 | ||
162 | #if defined(CONFIG_IOSCHED_AS) | 155 | /* |
163 | strcpy(chosen_elevator, "anticipatory"); | 156 | * If the given scheduler is not available, fall back to no-op. |
164 | #elif defined(CONFIG_IOSCHED_DEADLINE) | 157 | */ |
165 | strcpy(chosen_elevator, "deadline"); | 158 | if (!(e = elevator_find(chosen_elevator))) |
166 | #elif defined(CONFIG_IOSCHED_CFQ) | 159 | strcpy(chosen_elevator, "noop"); |
167 | strcpy(chosen_elevator, "cfq"); | 160 | elevator_put(e); |
168 | #elif defined(CONFIG_IOSCHED_NOOP) | ||
169 | strcpy(chosen_elevator, "noop"); | ||
170 | #else | ||
171 | #error "You must build at least 1 IO scheduler into the kernel" | ||
172 | #endif | ||
173 | } | 161 | } |
174 | 162 | ||
175 | static int __init elevator_setup(char *str) | 163 | static int __init elevator_setup(char *str) |
@@ -186,6 +174,11 @@ int elevator_init(request_queue_t *q, char *name) | |||
186 | struct elevator_queue *eq; | 174 | struct elevator_queue *eq; |
187 | int ret = 0; | 175 | int ret = 0; |
188 | 176 | ||
177 | INIT_LIST_HEAD(&q->queue_head); | ||
178 | q->last_merge = NULL; | ||
179 | q->end_sector = 0; | ||
180 | q->boundary_rq = NULL; | ||
181 | |||
189 | elevator_setup_default(); | 182 | elevator_setup_default(); |
190 | 183 | ||
191 | if (!name) | 184 | if (!name) |
@@ -220,9 +213,52 @@ void elevator_exit(elevator_t *e) | |||
220 | kfree(e); | 213 | kfree(e); |
221 | } | 214 | } |
222 | 215 | ||
216 | /* | ||
217 | * Insert rq into dispatch queue of q. Queue lock must be held on | ||
218 | * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be | ||
219 | * appended to the dispatch queue. To be used by specific elevators. | ||
220 | */ | ||
221 | void elv_dispatch_sort(request_queue_t *q, struct request *rq) | ||
222 | { | ||
223 | sector_t boundary; | ||
224 | struct list_head *entry; | ||
225 | |||
226 | if (q->last_merge == rq) | ||
227 | q->last_merge = NULL; | ||
228 | |||
229 | boundary = q->end_sector; | ||
230 | |||
231 | list_for_each_prev(entry, &q->queue_head) { | ||
232 | struct request *pos = list_entry_rq(entry); | ||
233 | |||
234 | if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) | ||
235 | break; | ||
236 | if (rq->sector >= boundary) { | ||
237 | if (pos->sector < boundary) | ||
238 | continue; | ||
239 | } else { | ||
240 | if (pos->sector >= boundary) | ||
241 | break; | ||
242 | } | ||
243 | if (rq->sector >= pos->sector) | ||
244 | break; | ||
245 | } | ||
246 | |||
247 | list_add(&rq->queuelist, entry); | ||
248 | } | ||
249 | |||
223 | int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) | 250 | int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) |
224 | { | 251 | { |
225 | elevator_t *e = q->elevator; | 252 | elevator_t *e = q->elevator; |
253 | int ret; | ||
254 | |||
255 | if (q->last_merge) { | ||
256 | ret = elv_try_merge(q->last_merge, bio); | ||
257 | if (ret != ELEVATOR_NO_MERGE) { | ||
258 | *req = q->last_merge; | ||
259 | return ret; | ||
260 | } | ||
261 | } | ||
226 | 262 | ||
227 | if (e->ops->elevator_merge_fn) | 263 | if (e->ops->elevator_merge_fn) |
228 | return e->ops->elevator_merge_fn(q, req, bio); | 264 | return e->ops->elevator_merge_fn(q, req, bio); |
@@ -236,6 +272,8 @@ void elv_merged_request(request_queue_t *q, struct request *rq) | |||
236 | 272 | ||
237 | if (e->ops->elevator_merged_fn) | 273 | if (e->ops->elevator_merged_fn) |
238 | e->ops->elevator_merged_fn(q, rq); | 274 | e->ops->elevator_merged_fn(q, rq); |
275 | |||
276 | q->last_merge = rq; | ||
239 | } | 277 | } |
240 | 278 | ||
241 | void elv_merge_requests(request_queue_t *q, struct request *rq, | 279 | void elv_merge_requests(request_queue_t *q, struct request *rq, |
@@ -243,20 +281,13 @@ void elv_merge_requests(request_queue_t *q, struct request *rq, | |||
243 | { | 281 | { |
244 | elevator_t *e = q->elevator; | 282 | elevator_t *e = q->elevator; |
245 | 283 | ||
246 | if (q->last_merge == next) | ||
247 | q->last_merge = NULL; | ||
248 | |||
249 | if (e->ops->elevator_merge_req_fn) | 284 | if (e->ops->elevator_merge_req_fn) |
250 | e->ops->elevator_merge_req_fn(q, rq, next); | 285 | e->ops->elevator_merge_req_fn(q, rq, next); |
286 | |||
287 | q->last_merge = rq; | ||
251 | } | 288 | } |
252 | 289 | ||
253 | /* | 290 | void elv_requeue_request(request_queue_t *q, struct request *rq) |
254 | * For careful internal use by the block layer. Essentially the same as | ||
255 | * a requeue in that it tells the io scheduler that this request is not | ||
256 | * active in the driver or hardware anymore, but we don't want the request | ||
257 | * added back to the scheduler. Function is not exported. | ||
258 | */ | ||
259 | void elv_deactivate_request(request_queue_t *q, struct request *rq) | ||
260 | { | 291 | { |
261 | elevator_t *e = q->elevator; | 292 | elevator_t *e = q->elevator; |
262 | 293 | ||
@@ -264,19 +295,14 @@ void elv_deactivate_request(request_queue_t *q, struct request *rq) | |||
264 | * it already went through dequeue, we need to decrement the | 295 | * it already went through dequeue, we need to decrement the |
265 | * in_flight count again | 296 | * in_flight count again |
266 | */ | 297 | */ |
267 | if (blk_account_rq(rq)) | 298 | if (blk_account_rq(rq)) { |
268 | q->in_flight--; | 299 | q->in_flight--; |
300 | if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn) | ||
301 | e->ops->elevator_deactivate_req_fn(q, rq); | ||
302 | } | ||
269 | 303 | ||
270 | rq->flags &= ~REQ_STARTED; | 304 | rq->flags &= ~REQ_STARTED; |
271 | 305 | ||
272 | if (e->ops->elevator_deactivate_req_fn) | ||
273 | e->ops->elevator_deactivate_req_fn(q, rq); | ||
274 | } | ||
275 | |||
276 | void elv_requeue_request(request_queue_t *q, struct request *rq) | ||
277 | { | ||
278 | elv_deactivate_request(q, rq); | ||
279 | |||
280 | /* | 306 | /* |
281 | * if this is the flush, requeue the original instead and drop the flush | 307 | * if this is the flush, requeue the original instead and drop the flush |
282 | */ | 308 | */ |
@@ -285,31 +311,27 @@ void elv_requeue_request(request_queue_t *q, struct request *rq) | |||
285 | rq = rq->end_io_data; | 311 | rq = rq->end_io_data; |
286 | } | 312 | } |
287 | 313 | ||
288 | /* | 314 | __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); |
289 | * the request is prepped and may have some resources allocated. | ||
290 | * allowing unprepped requests to pass this one may cause resource | ||
291 | * deadlock. turn on softbarrier. | ||
292 | */ | ||
293 | rq->flags |= REQ_SOFTBARRIER; | ||
294 | |||
295 | /* | ||
296 | * if iosched has an explicit requeue hook, then use that. otherwise | ||
297 | * just put the request at the front of the queue | ||
298 | */ | ||
299 | if (q->elevator->ops->elevator_requeue_req_fn) | ||
300 | q->elevator->ops->elevator_requeue_req_fn(q, rq); | ||
301 | else | ||
302 | __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); | ||
303 | } | 315 | } |
304 | 316 | ||
305 | void __elv_add_request(request_queue_t *q, struct request *rq, int where, | 317 | void __elv_add_request(request_queue_t *q, struct request *rq, int where, |
306 | int plug) | 318 | int plug) |
307 | { | 319 | { |
308 | /* | 320 | if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { |
309 | * barriers implicitly indicate back insertion | 321 | /* |
310 | */ | 322 | * barriers implicitly indicate back insertion |
311 | if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER) && | 323 | */ |
312 | where == ELEVATOR_INSERT_SORT) | 324 | if (where == ELEVATOR_INSERT_SORT) |
325 | where = ELEVATOR_INSERT_BACK; | ||
326 | |||
327 | /* | ||
328 | * this request is scheduling boundary, update end_sector | ||
329 | */ | ||
330 | if (blk_fs_request(rq)) { | ||
331 | q->end_sector = rq_end_sector(rq); | ||
332 | q->boundary_rq = rq; | ||
333 | } | ||
334 | } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) | ||
313 | where = ELEVATOR_INSERT_BACK; | 335 | where = ELEVATOR_INSERT_BACK; |
314 | 336 | ||
315 | if (plug) | 337 | if (plug) |
@@ -317,23 +339,54 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, | |||
317 | 339 | ||
318 | rq->q = q; | 340 | rq->q = q; |
319 | 341 | ||
320 | if (!test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) { | 342 | switch (where) { |
321 | q->elevator->ops->elevator_add_req_fn(q, rq, where); | 343 | case ELEVATOR_INSERT_FRONT: |
344 | rq->flags |= REQ_SOFTBARRIER; | ||
322 | 345 | ||
323 | if (blk_queue_plugged(q)) { | 346 | list_add(&rq->queuelist, &q->queue_head); |
324 | int nrq = q->rq.count[READ] + q->rq.count[WRITE] | 347 | break; |
325 | - q->in_flight; | ||
326 | 348 | ||
327 | if (nrq >= q->unplug_thresh) | 349 | case ELEVATOR_INSERT_BACK: |
328 | __generic_unplug_device(q); | 350 | rq->flags |= REQ_SOFTBARRIER; |
329 | } | 351 | |
330 | } else | 352 | while (q->elevator->ops->elevator_dispatch_fn(q, 1)) |
353 | ; | ||
354 | list_add_tail(&rq->queuelist, &q->queue_head); | ||
331 | /* | 355 | /* |
332 | * if drain is set, store the request "locally". when the drain | 356 | * We kick the queue here for the following reasons. |
333 | * is finished, the requests will be handed ordered to the io | 357 | * - The elevator might have returned NULL previously |
334 | * scheduler | 358 | * to delay requests and returned them now. As the |
359 | * queue wasn't empty before this request, ll_rw_blk | ||
360 | * won't run the queue on return, resulting in hang. | ||
361 | * - Usually, back inserted requests won't be merged | ||
362 | * with anything. There's no point in delaying queue | ||
363 | * processing. | ||
335 | */ | 364 | */ |
336 | list_add_tail(&rq->queuelist, &q->drain_list); | 365 | blk_remove_plug(q); |
366 | q->request_fn(q); | ||
367 | break; | ||
368 | |||
369 | case ELEVATOR_INSERT_SORT: | ||
370 | BUG_ON(!blk_fs_request(rq)); | ||
371 | rq->flags |= REQ_SORTED; | ||
372 | q->elevator->ops->elevator_add_req_fn(q, rq); | ||
373 | if (q->last_merge == NULL && rq_mergeable(rq)) | ||
374 | q->last_merge = rq; | ||
375 | break; | ||
376 | |||
377 | default: | ||
378 | printk(KERN_ERR "%s: bad insertion point %d\n", | ||
379 | __FUNCTION__, where); | ||
380 | BUG(); | ||
381 | } | ||
382 | |||
383 | if (blk_queue_plugged(q)) { | ||
384 | int nrq = q->rq.count[READ] + q->rq.count[WRITE] | ||
385 | - q->in_flight; | ||
386 | |||
387 | if (nrq >= q->unplug_thresh) | ||
388 | __generic_unplug_device(q); | ||
389 | } | ||
337 | } | 390 | } |
338 | 391 | ||
339 | void elv_add_request(request_queue_t *q, struct request *rq, int where, | 392 | void elv_add_request(request_queue_t *q, struct request *rq, int where, |
@@ -348,13 +401,19 @@ void elv_add_request(request_queue_t *q, struct request *rq, int where, | |||
348 | 401 | ||
349 | static inline struct request *__elv_next_request(request_queue_t *q) | 402 | static inline struct request *__elv_next_request(request_queue_t *q) |
350 | { | 403 | { |
351 | struct request *rq = q->elevator->ops->elevator_next_req_fn(q); | 404 | struct request *rq; |
405 | |||
406 | if (unlikely(list_empty(&q->queue_head) && | ||
407 | !q->elevator->ops->elevator_dispatch_fn(q, 0))) | ||
408 | return NULL; | ||
409 | |||
410 | rq = list_entry_rq(q->queue_head.next); | ||
352 | 411 | ||
353 | /* | 412 | /* |
354 | * if this is a barrier write and the device has to issue a | 413 | * if this is a barrier write and the device has to issue a |
355 | * flush sequence to support it, check how far we are | 414 | * flush sequence to support it, check how far we are |
356 | */ | 415 | */ |
357 | if (rq && blk_fs_request(rq) && blk_barrier_rq(rq)) { | 416 | if (blk_fs_request(rq) && blk_barrier_rq(rq)) { |
358 | BUG_ON(q->ordered == QUEUE_ORDERED_NONE); | 417 | BUG_ON(q->ordered == QUEUE_ORDERED_NONE); |
359 | 418 | ||
360 | if (q->ordered == QUEUE_ORDERED_FLUSH && | 419 | if (q->ordered == QUEUE_ORDERED_FLUSH && |
@@ -371,15 +430,30 @@ struct request *elv_next_request(request_queue_t *q) | |||
371 | int ret; | 430 | int ret; |
372 | 431 | ||
373 | while ((rq = __elv_next_request(q)) != NULL) { | 432 | while ((rq = __elv_next_request(q)) != NULL) { |
374 | /* | 433 | if (!(rq->flags & REQ_STARTED)) { |
375 | * just mark as started even if we don't start it, a request | 434 | elevator_t *e = q->elevator; |
376 | * that has been delayed should not be passed by new incoming | 435 | |
377 | * requests | 436 | /* |
378 | */ | 437 | * This is the first time the device driver |
379 | rq->flags |= REQ_STARTED; | 438 | * sees this request (possibly after |
439 | * requeueing). Notify IO scheduler. | ||
440 | */ | ||
441 | if (blk_sorted_rq(rq) && | ||
442 | e->ops->elevator_activate_req_fn) | ||
443 | e->ops->elevator_activate_req_fn(q, rq); | ||
444 | |||
445 | /* | ||
446 | * just mark as started even if we don't start | ||
447 | * it, a request that has been delayed should | ||
448 | * not be passed by new incoming requests | ||
449 | */ | ||
450 | rq->flags |= REQ_STARTED; | ||
451 | } | ||
380 | 452 | ||
381 | if (rq == q->last_merge) | 453 | if (!q->boundary_rq || q->boundary_rq == rq) { |
382 | q->last_merge = NULL; | 454 | q->end_sector = rq_end_sector(rq); |
455 | q->boundary_rq = NULL; | ||
456 | } | ||
383 | 457 | ||
384 | if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn) | 458 | if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn) |
385 | break; | 459 | break; |
@@ -391,9 +465,9 @@ struct request *elv_next_request(request_queue_t *q) | |||
391 | /* | 465 | /* |
392 | * the request may have been (partially) prepped. | 466 | * the request may have been (partially) prepped. |
393 | * we need to keep this request in the front to | 467 | * we need to keep this request in the front to |
394 | * avoid resource deadlock. turn on softbarrier. | 468 | * avoid resource deadlock. REQ_STARTED will |
469 | * prevent other fs requests from passing this one. | ||
395 | */ | 470 | */ |
396 | rq->flags |= REQ_SOFTBARRIER; | ||
397 | rq = NULL; | 471 | rq = NULL; |
398 | break; | 472 | break; |
399 | } else if (ret == BLKPREP_KILL) { | 473 | } else if (ret == BLKPREP_KILL) { |
@@ -416,42 +490,32 @@ struct request *elv_next_request(request_queue_t *q) | |||
416 | return rq; | 490 | return rq; |
417 | } | 491 | } |
418 | 492 | ||
419 | void elv_remove_request(request_queue_t *q, struct request *rq) | 493 | void elv_dequeue_request(request_queue_t *q, struct request *rq) |
420 | { | 494 | { |
421 | elevator_t *e = q->elevator; | 495 | BUG_ON(list_empty(&rq->queuelist)); |
496 | |||
497 | list_del_init(&rq->queuelist); | ||
422 | 498 | ||
423 | /* | 499 | /* |
424 | * the time frame between a request being removed from the lists | 500 | * the time frame between a request being removed from the lists |
425 | * and to it is freed is accounted as io that is in progress at | 501 | * and to it is freed is accounted as io that is in progress at |
426 | * the driver side. note that we only account requests that the | 502 | * the driver side. |
427 | * driver has seen (REQ_STARTED set), to avoid false accounting | ||
428 | * for request-request merges | ||
429 | */ | 503 | */ |
430 | if (blk_account_rq(rq)) | 504 | if (blk_account_rq(rq)) |
431 | q->in_flight++; | 505 | q->in_flight++; |
432 | |||
433 | /* | ||
434 | * the main clearing point for q->last_merge is on retrieval of | ||
435 | * request by driver (it calls elv_next_request()), but it _can_ | ||
436 | * also happen here if a request is added to the queue but later | ||
437 | * deleted without ever being given to driver (merged with another | ||
438 | * request). | ||
439 | */ | ||
440 | if (rq == q->last_merge) | ||
441 | q->last_merge = NULL; | ||
442 | |||
443 | if (e->ops->elevator_remove_req_fn) | ||
444 | e->ops->elevator_remove_req_fn(q, rq); | ||
445 | } | 506 | } |
446 | 507 | ||
447 | int elv_queue_empty(request_queue_t *q) | 508 | int elv_queue_empty(request_queue_t *q) |
448 | { | 509 | { |
449 | elevator_t *e = q->elevator; | 510 | elevator_t *e = q->elevator; |
450 | 511 | ||
512 | if (!list_empty(&q->queue_head)) | ||
513 | return 0; | ||
514 | |||
451 | if (e->ops->elevator_queue_empty_fn) | 515 | if (e->ops->elevator_queue_empty_fn) |
452 | return e->ops->elevator_queue_empty_fn(q); | 516 | return e->ops->elevator_queue_empty_fn(q); |
453 | 517 | ||
454 | return list_empty(&q->queue_head); | 518 | return 1; |
455 | } | 519 | } |
456 | 520 | ||
457 | struct request *elv_latter_request(request_queue_t *q, struct request *rq) | 521 | struct request *elv_latter_request(request_queue_t *q, struct request *rq) |
@@ -487,7 +551,7 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq) | |||
487 | } | 551 | } |
488 | 552 | ||
489 | int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio, | 553 | int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio, |
490 | int gfp_mask) | 554 | gfp_t gfp_mask) |
491 | { | 555 | { |
492 | elevator_t *e = q->elevator; | 556 | elevator_t *e = q->elevator; |
493 | 557 | ||
@@ -523,11 +587,11 @@ void elv_completed_request(request_queue_t *q, struct request *rq) | |||
523 | /* | 587 | /* |
524 | * request is released from the driver, io must be done | 588 | * request is released from the driver, io must be done |
525 | */ | 589 | */ |
526 | if (blk_account_rq(rq)) | 590 | if (blk_account_rq(rq)) { |
527 | q->in_flight--; | 591 | q->in_flight--; |
528 | 592 | if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) | |
529 | if (e->ops->elevator_completed_req_fn) | 593 | e->ops->elevator_completed_req_fn(q, rq); |
530 | e->ops->elevator_completed_req_fn(q, rq); | 594 | } |
531 | } | 595 | } |
532 | 596 | ||
533 | int elv_register_queue(struct request_queue *q) | 597 | int elv_register_queue(struct request_queue *q) |
@@ -555,10 +619,9 @@ void elv_unregister_queue(struct request_queue *q) | |||
555 | 619 | ||
556 | int elv_register(struct elevator_type *e) | 620 | int elv_register(struct elevator_type *e) |
557 | { | 621 | { |
622 | spin_lock_irq(&elv_list_lock); | ||
558 | if (elevator_find(e->elevator_name)) | 623 | if (elevator_find(e->elevator_name)) |
559 | BUG(); | 624 | BUG(); |
560 | |||
561 | spin_lock_irq(&elv_list_lock); | ||
562 | list_add_tail(&e->list, &elv_list); | 625 | list_add_tail(&e->list, &elv_list); |
563 | spin_unlock_irq(&elv_list_lock); | 626 | spin_unlock_irq(&elv_list_lock); |
564 | 627 | ||
@@ -572,6 +635,27 @@ EXPORT_SYMBOL_GPL(elv_register); | |||
572 | 635 | ||
573 | void elv_unregister(struct elevator_type *e) | 636 | void elv_unregister(struct elevator_type *e) |
574 | { | 637 | { |
638 | struct task_struct *g, *p; | ||
639 | |||
640 | /* | ||
641 | * Iterate every thread in the process to remove the io contexts. | ||
642 | */ | ||
643 | read_lock(&tasklist_lock); | ||
644 | do_each_thread(g, p) { | ||
645 | struct io_context *ioc = p->io_context; | ||
646 | if (ioc && ioc->cic) { | ||
647 | ioc->cic->exit(ioc->cic); | ||
648 | ioc->cic->dtor(ioc->cic); | ||
649 | ioc->cic = NULL; | ||
650 | } | ||
651 | if (ioc && ioc->aic) { | ||
652 | ioc->aic->exit(ioc->aic); | ||
653 | ioc->aic->dtor(ioc->aic); | ||
654 | ioc->aic = NULL; | ||
655 | } | ||
656 | } while_each_thread(g, p); | ||
657 | read_unlock(&tasklist_lock); | ||
658 | |||
575 | spin_lock_irq(&elv_list_lock); | 659 | spin_lock_irq(&elv_list_lock); |
576 | list_del_init(&e->list); | 660 | list_del_init(&e->list); |
577 | spin_unlock_irq(&elv_list_lock); | 661 | spin_unlock_irq(&elv_list_lock); |
@@ -582,25 +666,36 @@ EXPORT_SYMBOL_GPL(elv_unregister); | |||
582 | * switch to new_e io scheduler. be careful not to introduce deadlocks - | 666 | * switch to new_e io scheduler. be careful not to introduce deadlocks - |
583 | * we don't free the old io scheduler, before we have allocated what we | 667 | * we don't free the old io scheduler, before we have allocated what we |
584 | * need for the new one. this way we have a chance of going back to the old | 668 | * need for the new one. this way we have a chance of going back to the old |
585 | * one, if the new one fails init for some reason. we also do an intermediate | 669 | * one, if the new one fails init for some reason. |
586 | * switch to noop to ensure safety with stack-allocated requests, since they | ||
587 | * don't originate from the block layer allocator. noop is safe here, because | ||
588 | * it never needs to touch the elevator itself for completion events. DRAIN | ||
589 | * flags will make sure we don't touch it for additions either. | ||
590 | */ | 670 | */ |
591 | static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) | 671 | static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) |
592 | { | 672 | { |
593 | elevator_t *e = kmalloc(sizeof(elevator_t), GFP_KERNEL); | 673 | elevator_t *old_elevator, *e; |
594 | struct elevator_type *noop_elevator = NULL; | ||
595 | elevator_t *old_elevator; | ||
596 | 674 | ||
675 | /* | ||
676 | * Allocate new elevator | ||
677 | */ | ||
678 | e = kmalloc(sizeof(elevator_t), GFP_KERNEL); | ||
597 | if (!e) | 679 | if (!e) |
598 | goto error; | 680 | goto error; |
599 | 681 | ||
600 | /* | 682 | /* |
601 | * first step, drain requests from the block freelist | 683 | * Turn on BYPASS and drain all requests w/ elevator private data |
602 | */ | 684 | */ |
603 | blk_wait_queue_drained(q, 0); | 685 | spin_lock_irq(q->queue_lock); |
686 | |||
687 | set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | ||
688 | |||
689 | while (q->elevator->ops->elevator_dispatch_fn(q, 1)) | ||
690 | ; | ||
691 | |||
692 | while (q->rq.elvpriv) { | ||
693 | spin_unlock_irq(q->queue_lock); | ||
694 | msleep(10); | ||
695 | spin_lock_irq(q->queue_lock); | ||
696 | } | ||
697 | |||
698 | spin_unlock_irq(q->queue_lock); | ||
604 | 699 | ||
605 | /* | 700 | /* |
606 | * unregister old elevator data | 701 | * unregister old elevator data |
@@ -609,18 +704,6 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) | |||
609 | old_elevator = q->elevator; | 704 | old_elevator = q->elevator; |
610 | 705 | ||
611 | /* | 706 | /* |
612 | * next step, switch to noop since it uses no private rq structures | ||
613 | * and doesn't allocate any memory for anything. then wait for any | ||
614 | * non-fs requests in-flight | ||
615 | */ | ||
616 | noop_elevator = elevator_get("noop"); | ||
617 | spin_lock_irq(q->queue_lock); | ||
618 | elevator_attach(q, noop_elevator, e); | ||
619 | spin_unlock_irq(q->queue_lock); | ||
620 | |||
621 | blk_wait_queue_drained(q, 1); | ||
622 | |||
623 | /* | ||
624 | * attach and start new elevator | 707 | * attach and start new elevator |
625 | */ | 708 | */ |
626 | if (elevator_attach(q, new_e, e)) | 709 | if (elevator_attach(q, new_e, e)) |
@@ -630,11 +713,10 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) | |||
630 | goto fail_register; | 713 | goto fail_register; |
631 | 714 | ||
632 | /* | 715 | /* |
633 | * finally exit old elevator and start queue again | 716 | * finally exit old elevator and turn off BYPASS. |
634 | */ | 717 | */ |
635 | elevator_exit(old_elevator); | 718 | elevator_exit(old_elevator); |
636 | blk_finish_queue_drain(q); | 719 | clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
637 | elevator_put(noop_elevator); | ||
638 | return; | 720 | return; |
639 | 721 | ||
640 | fail_register: | 722 | fail_register: |
@@ -643,13 +725,13 @@ fail_register: | |||
643 | * one again (along with re-adding the sysfs dir) | 725 | * one again (along with re-adding the sysfs dir) |
644 | */ | 726 | */ |
645 | elevator_exit(e); | 727 | elevator_exit(e); |
728 | e = NULL; | ||
646 | fail: | 729 | fail: |
647 | q->elevator = old_elevator; | 730 | q->elevator = old_elevator; |
648 | elv_register_queue(q); | 731 | elv_register_queue(q); |
649 | blk_finish_queue_drain(q); | 732 | clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
733 | kfree(e); | ||
650 | error: | 734 | error: |
651 | if (noop_elevator) | ||
652 | elevator_put(noop_elevator); | ||
653 | elevator_put(new_e); | 735 | elevator_put(new_e); |
654 | printk(KERN_ERR "elevator: switch to %s failed\n",new_e->elevator_name); | 736 | printk(KERN_ERR "elevator: switch to %s failed\n",new_e->elevator_name); |
655 | } | 737 | } |
@@ -671,8 +753,10 @@ ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count) | |||
671 | return -EINVAL; | 753 | return -EINVAL; |
672 | } | 754 | } |
673 | 755 | ||
674 | if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) | 756 | if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { |
757 | elevator_put(e); | ||
675 | return count; | 758 | return count; |
759 | } | ||
676 | 760 | ||
677 | elevator_switch(q, e); | 761 | elevator_switch(q, e); |
678 | return count; | 762 | return count; |
@@ -701,11 +785,12 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name) | |||
701 | return len; | 785 | return len; |
702 | } | 786 | } |
703 | 787 | ||
788 | EXPORT_SYMBOL(elv_dispatch_sort); | ||
704 | EXPORT_SYMBOL(elv_add_request); | 789 | EXPORT_SYMBOL(elv_add_request); |
705 | EXPORT_SYMBOL(__elv_add_request); | 790 | EXPORT_SYMBOL(__elv_add_request); |
706 | EXPORT_SYMBOL(elv_requeue_request); | 791 | EXPORT_SYMBOL(elv_requeue_request); |
707 | EXPORT_SYMBOL(elv_next_request); | 792 | EXPORT_SYMBOL(elv_next_request); |
708 | EXPORT_SYMBOL(elv_remove_request); | 793 | EXPORT_SYMBOL(elv_dequeue_request); |
709 | EXPORT_SYMBOL(elv_queue_empty); | 794 | EXPORT_SYMBOL(elv_queue_empty); |
710 | EXPORT_SYMBOL(elv_completed_request); | 795 | EXPORT_SYMBOL(elv_completed_request); |
711 | EXPORT_SYMBOL(elevator_exit); | 796 | EXPORT_SYMBOL(elevator_exit); |