diff options
Diffstat (limited to 'block/elevator.c')
| -rw-r--r-- | block/elevator.c | 802 |
1 files changed, 802 insertions, 0 deletions
diff --git a/block/elevator.c b/block/elevator.c new file mode 100644 index 000000000000..d4a49a3df829 --- /dev/null +++ b/block/elevator.c | |||
| @@ -0,0 +1,802 @@ | |||
| 1 | /* | ||
| 2 | * linux/drivers/block/elevator.c | ||
| 3 | * | ||
| 4 | * Block device elevator/IO-scheduler. | ||
| 5 | * | ||
| 6 | * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE | ||
| 7 | * | ||
| 8 | * 30042000 Jens Axboe <axboe@suse.de> : | ||
| 9 | * | ||
| 10 | * Split the elevator a bit so that it is possible to choose a different | ||
| 11 | * one or even write a new "plug in". There are three pieces: | ||
| 12 | * - elevator_fn, inserts a new request in the queue list | ||
| 13 | * - elevator_merge_fn, decides whether a new buffer can be merged with | ||
| 14 | * an existing request | ||
| 15 | * - elevator_dequeue_fn, called when a request is taken off the active list | ||
| 16 | * | ||
| 17 | * 20082000 Dave Jones <davej@suse.de> : | ||
| 18 | * Removed tests for max-bomb-segments, which was breaking elvtune | ||
| 19 | * when run without -bN | ||
| 20 | * | ||
| 21 | * Jens: | ||
| 22 | * - Rework again to work with bio instead of buffer_heads | ||
| 23 | * - loose bi_dev comparisons, partition handling is right now | ||
| 24 | * - completely modularize elevator setup and teardown | ||
| 25 | * | ||
| 26 | */ | ||
| 27 | #include <linux/kernel.h> | ||
| 28 | #include <linux/fs.h> | ||
| 29 | #include <linux/blkdev.h> | ||
| 30 | #include <linux/elevator.h> | ||
| 31 | #include <linux/bio.h> | ||
| 32 | #include <linux/config.h> | ||
| 33 | #include <linux/module.h> | ||
| 34 | #include <linux/slab.h> | ||
| 35 | #include <linux/init.h> | ||
| 36 | #include <linux/compiler.h> | ||
| 37 | #include <linux/delay.h> | ||
| 38 | |||
| 39 | #include <asm/uaccess.h> | ||
| 40 | |||
| 41 | static DEFINE_SPINLOCK(elv_list_lock); | ||
| 42 | static LIST_HEAD(elv_list); | ||
| 43 | |||
| 44 | /* | ||
| 45 | * can we safely merge with this request? | ||
| 46 | */ | ||
| 47 | inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) | ||
| 48 | { | ||
| 49 | if (!rq_mergeable(rq)) | ||
| 50 | return 0; | ||
| 51 | |||
| 52 | /* | ||
| 53 | * different data direction or already started, don't merge | ||
| 54 | */ | ||
| 55 | if (bio_data_dir(bio) != rq_data_dir(rq)) | ||
| 56 | return 0; | ||
| 57 | |||
| 58 | /* | ||
| 59 | * same device and no special stuff set, merge is ok | ||
| 60 | */ | ||
| 61 | if (rq->rq_disk == bio->bi_bdev->bd_disk && | ||
| 62 | !rq->waiting && !rq->special) | ||
| 63 | return 1; | ||
| 64 | |||
| 65 | return 0; | ||
| 66 | } | ||
| 67 | EXPORT_SYMBOL(elv_rq_merge_ok); | ||
| 68 | |||
| 69 | inline int elv_try_merge(struct request *__rq, struct bio *bio) | ||
| 70 | { | ||
| 71 | int ret = ELEVATOR_NO_MERGE; | ||
| 72 | |||
| 73 | /* | ||
| 74 | * we can merge and sequence is ok, check if it's possible | ||
| 75 | */ | ||
| 76 | if (elv_rq_merge_ok(__rq, bio)) { | ||
| 77 | if (__rq->sector + __rq->nr_sectors == bio->bi_sector) | ||
| 78 | ret = ELEVATOR_BACK_MERGE; | ||
| 79 | else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) | ||
| 80 | ret = ELEVATOR_FRONT_MERGE; | ||
| 81 | } | ||
| 82 | |||
| 83 | return ret; | ||
| 84 | } | ||
| 85 | EXPORT_SYMBOL(elv_try_merge); | ||
| 86 | |||
| 87 | static struct elevator_type *elevator_find(const char *name) | ||
| 88 | { | ||
| 89 | struct elevator_type *e = NULL; | ||
| 90 | struct list_head *entry; | ||
| 91 | |||
| 92 | list_for_each(entry, &elv_list) { | ||
| 93 | struct elevator_type *__e; | ||
| 94 | |||
| 95 | __e = list_entry(entry, struct elevator_type, list); | ||
| 96 | |||
| 97 | if (!strcmp(__e->elevator_name, name)) { | ||
| 98 | e = __e; | ||
| 99 | break; | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | return e; | ||
| 104 | } | ||
| 105 | |||
| 106 | static void elevator_put(struct elevator_type *e) | ||
| 107 | { | ||
| 108 | module_put(e->elevator_owner); | ||
| 109 | } | ||
| 110 | |||
| 111 | static struct elevator_type *elevator_get(const char *name) | ||
| 112 | { | ||
| 113 | struct elevator_type *e; | ||
| 114 | |||
| 115 | spin_lock_irq(&elv_list_lock); | ||
| 116 | |||
| 117 | e = elevator_find(name); | ||
| 118 | if (e && !try_module_get(e->elevator_owner)) | ||
| 119 | e = NULL; | ||
| 120 | |||
| 121 | spin_unlock_irq(&elv_list_lock); | ||
| 122 | |||
| 123 | return e; | ||
| 124 | } | ||
| 125 | |||
| 126 | static int elevator_attach(request_queue_t *q, struct elevator_type *e, | ||
| 127 | struct elevator_queue *eq) | ||
| 128 | { | ||
| 129 | int ret = 0; | ||
| 130 | |||
| 131 | memset(eq, 0, sizeof(*eq)); | ||
| 132 | eq->ops = &e->ops; | ||
| 133 | eq->elevator_type = e; | ||
| 134 | |||
| 135 | q->elevator = eq; | ||
| 136 | |||
| 137 | if (eq->ops->elevator_init_fn) | ||
| 138 | ret = eq->ops->elevator_init_fn(q, eq); | ||
| 139 | |||
| 140 | return ret; | ||
| 141 | } | ||
| 142 | |||
| 143 | static char chosen_elevator[16]; | ||
| 144 | |||
| 145 | static void elevator_setup_default(void) | ||
| 146 | { | ||
| 147 | struct elevator_type *e; | ||
| 148 | |||
| 149 | /* | ||
| 150 | * If default has not been set, use the compiled-in selection. | ||
| 151 | */ | ||
| 152 | if (!chosen_elevator[0]) | ||
| 153 | strcpy(chosen_elevator, CONFIG_DEFAULT_IOSCHED); | ||
| 154 | |||
| 155 | /* | ||
| 156 | * If the given scheduler is not available, fall back to no-op. | ||
| 157 | */ | ||
| 158 | if (!(e = elevator_find(chosen_elevator))) | ||
| 159 | strcpy(chosen_elevator, "noop"); | ||
| 160 | elevator_put(e); | ||
| 161 | } | ||
| 162 | |||
| 163 | static int __init elevator_setup(char *str) | ||
| 164 | { | ||
| 165 | strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1); | ||
| 166 | return 0; | ||
| 167 | } | ||
| 168 | |||
| 169 | __setup("elevator=", elevator_setup); | ||
| 170 | |||
| 171 | int elevator_init(request_queue_t *q, char *name) | ||
| 172 | { | ||
| 173 | struct elevator_type *e = NULL; | ||
| 174 | struct elevator_queue *eq; | ||
| 175 | int ret = 0; | ||
| 176 | |||
| 177 | INIT_LIST_HEAD(&q->queue_head); | ||
| 178 | q->last_merge = NULL; | ||
| 179 | q->end_sector = 0; | ||
| 180 | q->boundary_rq = NULL; | ||
| 181 | |||
| 182 | elevator_setup_default(); | ||
| 183 | |||
| 184 | if (!name) | ||
| 185 | name = chosen_elevator; | ||
| 186 | |||
| 187 | e = elevator_get(name); | ||
| 188 | if (!e) | ||
| 189 | return -EINVAL; | ||
| 190 | |||
| 191 | eq = kmalloc(sizeof(struct elevator_queue), GFP_KERNEL); | ||
| 192 | if (!eq) { | ||
| 193 | elevator_put(e->elevator_type); | ||
| 194 | return -ENOMEM; | ||
| 195 | } | ||
| 196 | |||
| 197 | ret = elevator_attach(q, e, eq); | ||
| 198 | if (ret) { | ||
| 199 | kfree(eq); | ||
| 200 | elevator_put(e->elevator_type); | ||
| 201 | } | ||
| 202 | |||
| 203 | return ret; | ||
| 204 | } | ||
| 205 | |||
| 206 | void elevator_exit(elevator_t *e) | ||
| 207 | { | ||
| 208 | if (e->ops->elevator_exit_fn) | ||
| 209 | e->ops->elevator_exit_fn(e); | ||
| 210 | |||
| 211 | elevator_put(e->elevator_type); | ||
| 212 | e->elevator_type = NULL; | ||
| 213 | kfree(e); | ||
| 214 | } | ||
| 215 | |||
| 216 | /* | ||
| 217 | * Insert rq into dispatch queue of q. Queue lock must be held on | ||
| 218 | * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be | ||
| 219 | * appended to the dispatch queue. To be used by specific elevators. | ||
| 220 | */ | ||
| 221 | void elv_dispatch_sort(request_queue_t *q, struct request *rq) | ||
| 222 | { | ||
| 223 | sector_t boundary; | ||
| 224 | struct list_head *entry; | ||
| 225 | |||
| 226 | if (q->last_merge == rq) | ||
| 227 | q->last_merge = NULL; | ||
| 228 | |||
| 229 | boundary = q->end_sector; | ||
| 230 | |||
| 231 | list_for_each_prev(entry, &q->queue_head) { | ||
| 232 | struct request *pos = list_entry_rq(entry); | ||
| 233 | |||
| 234 | if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) | ||
| 235 | break; | ||
| 236 | if (rq->sector >= boundary) { | ||
| 237 | if (pos->sector < boundary) | ||
| 238 | continue; | ||
| 239 | } else { | ||
| 240 | if (pos->sector >= boundary) | ||
| 241 | break; | ||
| 242 | } | ||
| 243 | if (rq->sector >= pos->sector) | ||
| 244 | break; | ||
| 245 | } | ||
| 246 | |||
| 247 | list_add(&rq->queuelist, entry); | ||
| 248 | } | ||
| 249 | |||
| 250 | int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) | ||
| 251 | { | ||
| 252 | elevator_t *e = q->elevator; | ||
| 253 | int ret; | ||
| 254 | |||
| 255 | if (q->last_merge) { | ||
| 256 | ret = elv_try_merge(q->last_merge, bio); | ||
| 257 | if (ret != ELEVATOR_NO_MERGE) { | ||
| 258 | *req = q->last_merge; | ||
| 259 | return ret; | ||
| 260 | } | ||
| 261 | } | ||
| 262 | |||
| 263 | if (e->ops->elevator_merge_fn) | ||
| 264 | return e->ops->elevator_merge_fn(q, req, bio); | ||
| 265 | |||
| 266 | return ELEVATOR_NO_MERGE; | ||
| 267 | } | ||
| 268 | |||
| 269 | void elv_merged_request(request_queue_t *q, struct request *rq) | ||
| 270 | { | ||
| 271 | elevator_t *e = q->elevator; | ||
| 272 | |||
| 273 | if (e->ops->elevator_merged_fn) | ||
| 274 | e->ops->elevator_merged_fn(q, rq); | ||
| 275 | |||
| 276 | q->last_merge = rq; | ||
| 277 | } | ||
| 278 | |||
| 279 | void elv_merge_requests(request_queue_t *q, struct request *rq, | ||
| 280 | struct request *next) | ||
| 281 | { | ||
| 282 | elevator_t *e = q->elevator; | ||
| 283 | |||
| 284 | if (e->ops->elevator_merge_req_fn) | ||
| 285 | e->ops->elevator_merge_req_fn(q, rq, next); | ||
| 286 | |||
| 287 | q->last_merge = rq; | ||
| 288 | } | ||
| 289 | |||
| 290 | void elv_requeue_request(request_queue_t *q, struct request *rq) | ||
| 291 | { | ||
| 292 | elevator_t *e = q->elevator; | ||
| 293 | |||
| 294 | /* | ||
| 295 | * it already went through dequeue, we need to decrement the | ||
| 296 | * in_flight count again | ||
| 297 | */ | ||
| 298 | if (blk_account_rq(rq)) { | ||
| 299 | q->in_flight--; | ||
| 300 | if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn) | ||
| 301 | e->ops->elevator_deactivate_req_fn(q, rq); | ||
| 302 | } | ||
| 303 | |||
| 304 | rq->flags &= ~REQ_STARTED; | ||
| 305 | |||
| 306 | /* | ||
| 307 | * if this is the flush, requeue the original instead and drop the flush | ||
| 308 | */ | ||
| 309 | if (rq->flags & REQ_BAR_FLUSH) { | ||
| 310 | clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); | ||
| 311 | rq = rq->end_io_data; | ||
| 312 | } | ||
| 313 | |||
| 314 | __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); | ||
| 315 | } | ||
| 316 | |||
| 317 | void __elv_add_request(request_queue_t *q, struct request *rq, int where, | ||
| 318 | int plug) | ||
| 319 | { | ||
| 320 | if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { | ||
| 321 | /* | ||
| 322 | * barriers implicitly indicate back insertion | ||
| 323 | */ | ||
| 324 | if (where == ELEVATOR_INSERT_SORT) | ||
| 325 | where = ELEVATOR_INSERT_BACK; | ||
| 326 | |||
| 327 | /* | ||
| 328 | * this request is scheduling boundary, update end_sector | ||
| 329 | */ | ||
| 330 | if (blk_fs_request(rq)) { | ||
| 331 | q->end_sector = rq_end_sector(rq); | ||
| 332 | q->boundary_rq = rq; | ||
| 333 | } | ||
| 334 | } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) | ||
| 335 | where = ELEVATOR_INSERT_BACK; | ||
| 336 | |||
| 337 | if (plug) | ||
| 338 | blk_plug_device(q); | ||
| 339 | |||
| 340 | rq->q = q; | ||
| 341 | |||
| 342 | switch (where) { | ||
| 343 | case ELEVATOR_INSERT_FRONT: | ||
| 344 | rq->flags |= REQ_SOFTBARRIER; | ||
| 345 | |||
| 346 | list_add(&rq->queuelist, &q->queue_head); | ||
| 347 | break; | ||
| 348 | |||
| 349 | case ELEVATOR_INSERT_BACK: | ||
| 350 | rq->flags |= REQ_SOFTBARRIER; | ||
| 351 | |||
| 352 | while (q->elevator->ops->elevator_dispatch_fn(q, 1)) | ||
| 353 | ; | ||
| 354 | list_add_tail(&rq->queuelist, &q->queue_head); | ||
| 355 | /* | ||
| 356 | * We kick the queue here for the following reasons. | ||
| 357 | * - The elevator might have returned NULL previously | ||
| 358 | * to delay requests and returned them now. As the | ||
| 359 | * queue wasn't empty before this request, ll_rw_blk | ||
| 360 | * won't run the queue on return, resulting in hang. | ||
| 361 | * - Usually, back inserted requests won't be merged | ||
| 362 | * with anything. There's no point in delaying queue | ||
| 363 | * processing. | ||
| 364 | */ | ||
| 365 | blk_remove_plug(q); | ||
| 366 | q->request_fn(q); | ||
| 367 | break; | ||
| 368 | |||
| 369 | case ELEVATOR_INSERT_SORT: | ||
| 370 | BUG_ON(!blk_fs_request(rq)); | ||
| 371 | rq->flags |= REQ_SORTED; | ||
| 372 | if (q->last_merge == NULL && rq_mergeable(rq)) | ||
| 373 | q->last_merge = rq; | ||
| 374 | /* | ||
| 375 | * Some ioscheds (cfq) run q->request_fn directly, so | ||
| 376 | * rq cannot be accessed after calling | ||
| 377 | * elevator_add_req_fn. | ||
| 378 | */ | ||
| 379 | q->elevator->ops->elevator_add_req_fn(q, rq); | ||
| 380 | break; | ||
| 381 | |||
| 382 | default: | ||
| 383 | printk(KERN_ERR "%s: bad insertion point %d\n", | ||
| 384 | __FUNCTION__, where); | ||
| 385 | BUG(); | ||
| 386 | } | ||
| 387 | |||
| 388 | if (blk_queue_plugged(q)) { | ||
| 389 | int nrq = q->rq.count[READ] + q->rq.count[WRITE] | ||
| 390 | - q->in_flight; | ||
| 391 | |||
| 392 | if (nrq >= q->unplug_thresh) | ||
| 393 | __generic_unplug_device(q); | ||
| 394 | } | ||
| 395 | } | ||
| 396 | |||
| 397 | void elv_add_request(request_queue_t *q, struct request *rq, int where, | ||
| 398 | int plug) | ||
| 399 | { | ||
| 400 | unsigned long flags; | ||
| 401 | |||
| 402 | spin_lock_irqsave(q->queue_lock, flags); | ||
| 403 | __elv_add_request(q, rq, where, plug); | ||
| 404 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
| 405 | } | ||
| 406 | |||
| 407 | static inline struct request *__elv_next_request(request_queue_t *q) | ||
| 408 | { | ||
| 409 | struct request *rq; | ||
| 410 | |||
| 411 | if (unlikely(list_empty(&q->queue_head) && | ||
| 412 | !q->elevator->ops->elevator_dispatch_fn(q, 0))) | ||
| 413 | return NULL; | ||
| 414 | |||
| 415 | rq = list_entry_rq(q->queue_head.next); | ||
| 416 | |||
| 417 | /* | ||
| 418 | * if this is a barrier write and the device has to issue a | ||
| 419 | * flush sequence to support it, check how far we are | ||
| 420 | */ | ||
| 421 | if (blk_fs_request(rq) && blk_barrier_rq(rq)) { | ||
| 422 | BUG_ON(q->ordered == QUEUE_ORDERED_NONE); | ||
| 423 | |||
| 424 | if (q->ordered == QUEUE_ORDERED_FLUSH && | ||
| 425 | !blk_barrier_preflush(rq)) | ||
| 426 | rq = blk_start_pre_flush(q, rq); | ||
| 427 | } | ||
| 428 | |||
| 429 | return rq; | ||
| 430 | } | ||
| 431 | |||
| 432 | struct request *elv_next_request(request_queue_t *q) | ||
| 433 | { | ||
| 434 | struct request *rq; | ||
| 435 | int ret; | ||
| 436 | |||
| 437 | while ((rq = __elv_next_request(q)) != NULL) { | ||
| 438 | if (!(rq->flags & REQ_STARTED)) { | ||
| 439 | elevator_t *e = q->elevator; | ||
| 440 | |||
| 441 | /* | ||
| 442 | * This is the first time the device driver | ||
| 443 | * sees this request (possibly after | ||
| 444 | * requeueing). Notify IO scheduler. | ||
| 445 | */ | ||
| 446 | if (blk_sorted_rq(rq) && | ||
| 447 | e->ops->elevator_activate_req_fn) | ||
| 448 | e->ops->elevator_activate_req_fn(q, rq); | ||
| 449 | |||
| 450 | /* | ||
| 451 | * just mark as started even if we don't start | ||
| 452 | * it, a request that has been delayed should | ||
| 453 | * not be passed by new incoming requests | ||
| 454 | */ | ||
| 455 | rq->flags |= REQ_STARTED; | ||
| 456 | } | ||
| 457 | |||
| 458 | if (!q->boundary_rq || q->boundary_rq == rq) { | ||
| 459 | q->end_sector = rq_end_sector(rq); | ||
| 460 | q->boundary_rq = NULL; | ||
| 461 | } | ||
| 462 | |||
| 463 | if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn) | ||
| 464 | break; | ||
| 465 | |||
| 466 | ret = q->prep_rq_fn(q, rq); | ||
| 467 | if (ret == BLKPREP_OK) { | ||
| 468 | break; | ||
| 469 | } else if (ret == BLKPREP_DEFER) { | ||
| 470 | /* | ||
| 471 | * the request may have been (partially) prepped. | ||
| 472 | * we need to keep this request in the front to | ||
| 473 | * avoid resource deadlock. REQ_STARTED will | ||
| 474 | * prevent other fs requests from passing this one. | ||
| 475 | */ | ||
| 476 | rq = NULL; | ||
| 477 | break; | ||
| 478 | } else if (ret == BLKPREP_KILL) { | ||
| 479 | int nr_bytes = rq->hard_nr_sectors << 9; | ||
| 480 | |||
| 481 | if (!nr_bytes) | ||
| 482 | nr_bytes = rq->data_len; | ||
| 483 | |||
| 484 | blkdev_dequeue_request(rq); | ||
| 485 | rq->flags |= REQ_QUIET; | ||
| 486 | end_that_request_chunk(rq, 0, nr_bytes); | ||
| 487 | end_that_request_last(rq); | ||
| 488 | } else { | ||
| 489 | printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, | ||
| 490 | ret); | ||
| 491 | break; | ||
| 492 | } | ||
| 493 | } | ||
| 494 | |||
| 495 | return rq; | ||
| 496 | } | ||
| 497 | |||
| 498 | void elv_dequeue_request(request_queue_t *q, struct request *rq) | ||
| 499 | { | ||
| 500 | BUG_ON(list_empty(&rq->queuelist)); | ||
| 501 | |||
| 502 | list_del_init(&rq->queuelist); | ||
| 503 | |||
| 504 | /* | ||
| 505 | * the time frame between a request being removed from the lists | ||
| 506 | * and to it is freed is accounted as io that is in progress at | ||
| 507 | * the driver side. | ||
| 508 | */ | ||
| 509 | if (blk_account_rq(rq)) | ||
| 510 | q->in_flight++; | ||
| 511 | } | ||
| 512 | |||
| 513 | int elv_queue_empty(request_queue_t *q) | ||
| 514 | { | ||
| 515 | elevator_t *e = q->elevator; | ||
| 516 | |||
| 517 | if (!list_empty(&q->queue_head)) | ||
| 518 | return 0; | ||
| 519 | |||
| 520 | if (e->ops->elevator_queue_empty_fn) | ||
| 521 | return e->ops->elevator_queue_empty_fn(q); | ||
| 522 | |||
| 523 | return 1; | ||
| 524 | } | ||
| 525 | |||
| 526 | struct request *elv_latter_request(request_queue_t *q, struct request *rq) | ||
| 527 | { | ||
| 528 | struct list_head *next; | ||
| 529 | |||
| 530 | elevator_t *e = q->elevator; | ||
| 531 | |||
| 532 | if (e->ops->elevator_latter_req_fn) | ||
| 533 | return e->ops->elevator_latter_req_fn(q, rq); | ||
| 534 | |||
| 535 | next = rq->queuelist.next; | ||
| 536 | if (next != &q->queue_head && next != &rq->queuelist) | ||
| 537 | return list_entry_rq(next); | ||
| 538 | |||
| 539 | return NULL; | ||
| 540 | } | ||
| 541 | |||
| 542 | struct request *elv_former_request(request_queue_t *q, struct request *rq) | ||
| 543 | { | ||
| 544 | struct list_head *prev; | ||
| 545 | |||
| 546 | elevator_t *e = q->elevator; | ||
| 547 | |||
| 548 | if (e->ops->elevator_former_req_fn) | ||
| 549 | return e->ops->elevator_former_req_fn(q, rq); | ||
| 550 | |||
| 551 | prev = rq->queuelist.prev; | ||
| 552 | if (prev != &q->queue_head && prev != &rq->queuelist) | ||
| 553 | return list_entry_rq(prev); | ||
| 554 | |||
| 555 | return NULL; | ||
| 556 | } | ||
| 557 | |||
| 558 | int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio, | ||
| 559 | gfp_t gfp_mask) | ||
| 560 | { | ||
| 561 | elevator_t *e = q->elevator; | ||
| 562 | |||
| 563 | if (e->ops->elevator_set_req_fn) | ||
| 564 | return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask); | ||
| 565 | |||
| 566 | rq->elevator_private = NULL; | ||
| 567 | return 0; | ||
| 568 | } | ||
| 569 | |||
| 570 | void elv_put_request(request_queue_t *q, struct request *rq) | ||
| 571 | { | ||
| 572 | elevator_t *e = q->elevator; | ||
| 573 | |||
| 574 | if (e->ops->elevator_put_req_fn) | ||
| 575 | e->ops->elevator_put_req_fn(q, rq); | ||
| 576 | } | ||
| 577 | |||
| 578 | int elv_may_queue(request_queue_t *q, int rw, struct bio *bio) | ||
| 579 | { | ||
| 580 | elevator_t *e = q->elevator; | ||
| 581 | |||
| 582 | if (e->ops->elevator_may_queue_fn) | ||
| 583 | return e->ops->elevator_may_queue_fn(q, rw, bio); | ||
| 584 | |||
| 585 | return ELV_MQUEUE_MAY; | ||
| 586 | } | ||
| 587 | |||
| 588 | void elv_completed_request(request_queue_t *q, struct request *rq) | ||
| 589 | { | ||
| 590 | elevator_t *e = q->elevator; | ||
| 591 | |||
| 592 | /* | ||
| 593 | * request is released from the driver, io must be done | ||
| 594 | */ | ||
| 595 | if (blk_account_rq(rq)) { | ||
| 596 | q->in_flight--; | ||
| 597 | if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) | ||
| 598 | e->ops->elevator_completed_req_fn(q, rq); | ||
| 599 | } | ||
| 600 | } | ||
| 601 | |||
| 602 | int elv_register_queue(struct request_queue *q) | ||
| 603 | { | ||
| 604 | elevator_t *e = q->elevator; | ||
| 605 | |||
| 606 | e->kobj.parent = kobject_get(&q->kobj); | ||
| 607 | if (!e->kobj.parent) | ||
| 608 | return -EBUSY; | ||
| 609 | |||
| 610 | snprintf(e->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); | ||
| 611 | e->kobj.ktype = e->elevator_type->elevator_ktype; | ||
| 612 | |||
| 613 | return kobject_register(&e->kobj); | ||
| 614 | } | ||
| 615 | |||
| 616 | void elv_unregister_queue(struct request_queue *q) | ||
| 617 | { | ||
| 618 | if (q) { | ||
| 619 | elevator_t *e = q->elevator; | ||
| 620 | kobject_unregister(&e->kobj); | ||
| 621 | kobject_put(&q->kobj); | ||
| 622 | } | ||
| 623 | } | ||
| 624 | |||
| 625 | int elv_register(struct elevator_type *e) | ||
| 626 | { | ||
| 627 | spin_lock_irq(&elv_list_lock); | ||
| 628 | if (elevator_find(e->elevator_name)) | ||
| 629 | BUG(); | ||
| 630 | list_add_tail(&e->list, &elv_list); | ||
| 631 | spin_unlock_irq(&elv_list_lock); | ||
| 632 | |||
| 633 | printk(KERN_INFO "io scheduler %s registered", e->elevator_name); | ||
| 634 | if (!strcmp(e->elevator_name, chosen_elevator)) | ||
| 635 | printk(" (default)"); | ||
| 636 | printk("\n"); | ||
| 637 | return 0; | ||
| 638 | } | ||
| 639 | EXPORT_SYMBOL_GPL(elv_register); | ||
| 640 | |||
| 641 | void elv_unregister(struct elevator_type *e) | ||
| 642 | { | ||
| 643 | struct task_struct *g, *p; | ||
| 644 | |||
| 645 | /* | ||
| 646 | * Iterate every thread in the process to remove the io contexts. | ||
| 647 | */ | ||
| 648 | read_lock(&tasklist_lock); | ||
| 649 | do_each_thread(g, p) { | ||
| 650 | struct io_context *ioc = p->io_context; | ||
| 651 | if (ioc && ioc->cic) { | ||
| 652 | ioc->cic->exit(ioc->cic); | ||
| 653 | ioc->cic->dtor(ioc->cic); | ||
| 654 | ioc->cic = NULL; | ||
| 655 | } | ||
| 656 | if (ioc && ioc->aic) { | ||
| 657 | ioc->aic->exit(ioc->aic); | ||
| 658 | ioc->aic->dtor(ioc->aic); | ||
| 659 | ioc->aic = NULL; | ||
| 660 | } | ||
| 661 | } while_each_thread(g, p); | ||
| 662 | read_unlock(&tasklist_lock); | ||
| 663 | |||
| 664 | spin_lock_irq(&elv_list_lock); | ||
| 665 | list_del_init(&e->list); | ||
| 666 | spin_unlock_irq(&elv_list_lock); | ||
| 667 | } | ||
| 668 | EXPORT_SYMBOL_GPL(elv_unregister); | ||
| 669 | |||
| 670 | /* | ||
| 671 | * switch to new_e io scheduler. be careful not to introduce deadlocks - | ||
| 672 | * we don't free the old io scheduler, before we have allocated what we | ||
| 673 | * need for the new one. this way we have a chance of going back to the old | ||
| 674 | * one, if the new one fails init for some reason. | ||
| 675 | */ | ||
| 676 | static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) | ||
| 677 | { | ||
| 678 | elevator_t *old_elevator, *e; | ||
| 679 | |||
| 680 | /* | ||
| 681 | * Allocate new elevator | ||
| 682 | */ | ||
| 683 | e = kmalloc(sizeof(elevator_t), GFP_KERNEL); | ||
| 684 | if (!e) | ||
| 685 | goto error; | ||
| 686 | |||
| 687 | /* | ||
| 688 | * Turn on BYPASS and drain all requests w/ elevator private data | ||
| 689 | */ | ||
| 690 | spin_lock_irq(q->queue_lock); | ||
| 691 | |||
| 692 | set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | ||
| 693 | |||
| 694 | while (q->elevator->ops->elevator_dispatch_fn(q, 1)) | ||
| 695 | ; | ||
| 696 | |||
| 697 | while (q->rq.elvpriv) { | ||
| 698 | spin_unlock_irq(q->queue_lock); | ||
| 699 | msleep(10); | ||
| 700 | spin_lock_irq(q->queue_lock); | ||
| 701 | } | ||
| 702 | |||
| 703 | spin_unlock_irq(q->queue_lock); | ||
| 704 | |||
| 705 | /* | ||
| 706 | * unregister old elevator data | ||
| 707 | */ | ||
| 708 | elv_unregister_queue(q); | ||
| 709 | old_elevator = q->elevator; | ||
| 710 | |||
| 711 | /* | ||
| 712 | * attach and start new elevator | ||
| 713 | */ | ||
| 714 | if (elevator_attach(q, new_e, e)) | ||
| 715 | goto fail; | ||
| 716 | |||
| 717 | if (elv_register_queue(q)) | ||
| 718 | goto fail_register; | ||
| 719 | |||
| 720 | /* | ||
| 721 | * finally exit old elevator and turn off BYPASS. | ||
| 722 | */ | ||
| 723 | elevator_exit(old_elevator); | ||
| 724 | clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | ||
| 725 | return; | ||
| 726 | |||
| 727 | fail_register: | ||
| 728 | /* | ||
| 729 | * switch failed, exit the new io scheduler and reattach the old | ||
| 730 | * one again (along with re-adding the sysfs dir) | ||
| 731 | */ | ||
| 732 | elevator_exit(e); | ||
| 733 | e = NULL; | ||
| 734 | fail: | ||
| 735 | q->elevator = old_elevator; | ||
| 736 | elv_register_queue(q); | ||
| 737 | clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | ||
| 738 | kfree(e); | ||
| 739 | error: | ||
| 740 | elevator_put(new_e); | ||
| 741 | printk(KERN_ERR "elevator: switch to %s failed\n",new_e->elevator_name); | ||
| 742 | } | ||
| 743 | |||
| 744 | ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count) | ||
| 745 | { | ||
| 746 | char elevator_name[ELV_NAME_MAX]; | ||
| 747 | struct elevator_type *e; | ||
| 748 | |||
| 749 | memset(elevator_name, 0, sizeof(elevator_name)); | ||
| 750 | strncpy(elevator_name, name, sizeof(elevator_name)); | ||
| 751 | |||
| 752 | if (elevator_name[strlen(elevator_name) - 1] == '\n') | ||
| 753 | elevator_name[strlen(elevator_name) - 1] = '\0'; | ||
| 754 | |||
| 755 | e = elevator_get(elevator_name); | ||
| 756 | if (!e) { | ||
| 757 | printk(KERN_ERR "elevator: type %s not found\n", elevator_name); | ||
| 758 | return -EINVAL; | ||
| 759 | } | ||
| 760 | |||
| 761 | if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { | ||
| 762 | elevator_put(e); | ||
| 763 | return count; | ||
| 764 | } | ||
| 765 | |||
| 766 | elevator_switch(q, e); | ||
| 767 | return count; | ||
| 768 | } | ||
| 769 | |||
| 770 | ssize_t elv_iosched_show(request_queue_t *q, char *name) | ||
| 771 | { | ||
| 772 | elevator_t *e = q->elevator; | ||
| 773 | struct elevator_type *elv = e->elevator_type; | ||
| 774 | struct list_head *entry; | ||
| 775 | int len = 0; | ||
| 776 | |||
| 777 | spin_lock_irq(q->queue_lock); | ||
| 778 | list_for_each(entry, &elv_list) { | ||
| 779 | struct elevator_type *__e; | ||
| 780 | |||
| 781 | __e = list_entry(entry, struct elevator_type, list); | ||
| 782 | if (!strcmp(elv->elevator_name, __e->elevator_name)) | ||
| 783 | len += sprintf(name+len, "[%s] ", elv->elevator_name); | ||
| 784 | else | ||
| 785 | len += sprintf(name+len, "%s ", __e->elevator_name); | ||
| 786 | } | ||
| 787 | spin_unlock_irq(q->queue_lock); | ||
| 788 | |||
| 789 | len += sprintf(len+name, "\n"); | ||
| 790 | return len; | ||
| 791 | } | ||
| 792 | |||
| 793 | EXPORT_SYMBOL(elv_dispatch_sort); | ||
| 794 | EXPORT_SYMBOL(elv_add_request); | ||
| 795 | EXPORT_SYMBOL(__elv_add_request); | ||
| 796 | EXPORT_SYMBOL(elv_requeue_request); | ||
| 797 | EXPORT_SYMBOL(elv_next_request); | ||
| 798 | EXPORT_SYMBOL(elv_dequeue_request); | ||
| 799 | EXPORT_SYMBOL(elv_queue_empty); | ||
| 800 | EXPORT_SYMBOL(elv_completed_request); | ||
| 801 | EXPORT_SYMBOL(elevator_exit); | ||
| 802 | EXPORT_SYMBOL(elevator_init); | ||
