diff options
author | Tejun Heo <htejun@gmail.com> | 2005-10-28 02:29:39 -0400 |
---|---|---|
committer | Jens Axboe <axboe@nelson.home.kernel.dk> | 2005-10-28 02:48:12 -0400 |
commit | cb98fc8bb9c141009e2bda99c0db39d387e142cf (patch) | |
tree | 8957f8a79f39c3e6633a0dbb165ced8b530aca0c /drivers/block/elevator.c | |
parent | cb19833dccb32f97cacbfff834b53523915f13f6 (diff) |
[BLOCK] Reimplement elevator switch
This patch reimplements elevator switch. This patch assumes generic
dispatch queue patchset is applied.
* Each request is tagged with REQ_ELVPRIV flag if it has its elevator
private data set.
* Requests which doesn't have REQ_ELVPRIV flag set never enter
iosched. They are always directly back inserted to dispatch queue.
Of course, elevator_put_req_fn is called only for requests which
have its REQ_ELVPRIV set.
* Request queue maintains the current number of requests which have
its elevator data set (elevator_set_req_fn called) in
q->rq->elvpriv.
* If a request queue has QUEUE_FLAG_BYPASS set, elevator private data
is not allocated for new requests.
To switch to another iosched, we set QUEUE_FLAG_BYPASS and wait until
elvpriv goes to zero; then, we attach the new iosched and clears
QUEUE_FLAG_BYPASS. New implementation is much simpler and main code
paths are less cluttered, IMHO.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jens Axboe <axboe@suse.de>
Diffstat (limited to 'drivers/block/elevator.c')
-rw-r--r-- | drivers/block/elevator.c | 78 |
1 files changed, 35 insertions, 43 deletions
diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c index af2388e73f61..272d93946621 100644 --- a/drivers/block/elevator.c +++ b/drivers/block/elevator.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/slab.h> | 34 | #include <linux/slab.h> |
35 | #include <linux/init.h> | 35 | #include <linux/init.h> |
36 | #include <linux/compiler.h> | 36 | #include <linux/compiler.h> |
37 | #include <linux/delay.h> | ||
37 | 38 | ||
38 | #include <asm/uaccess.h> | 39 | #include <asm/uaccess.h> |
39 | 40 | ||
@@ -131,11 +132,7 @@ static int elevator_attach(request_queue_t *q, struct elevator_type *e, | |||
131 | eq->ops = &e->ops; | 132 | eq->ops = &e->ops; |
132 | eq->elevator_type = e; | 133 | eq->elevator_type = e; |
133 | 134 | ||
134 | INIT_LIST_HEAD(&q->queue_head); | ||
135 | q->last_merge = NULL; | ||
136 | q->elevator = eq; | 135 | q->elevator = eq; |
137 | q->end_sector = 0; | ||
138 | q->boundary_rq = NULL; | ||
139 | 136 | ||
140 | if (eq->ops->elevator_init_fn) | 137 | if (eq->ops->elevator_init_fn) |
141 | ret = eq->ops->elevator_init_fn(q, eq); | 138 | ret = eq->ops->elevator_init_fn(q, eq); |
@@ -184,6 +181,12 @@ int elevator_init(request_queue_t *q, char *name) | |||
184 | struct elevator_queue *eq; | 181 | struct elevator_queue *eq; |
185 | int ret = 0; | 182 | int ret = 0; |
186 | 183 | ||
184 | INIT_LIST_HEAD(&q->queue_head); | ||
185 | q->last_merge = NULL; | ||
186 | q->end_sector = 0; | ||
187 | q->boundary_rq = NULL; | ||
188 | q->max_back_kb = 0; | ||
189 | |||
187 | elevator_setup_default(); | 190 | elevator_setup_default(); |
188 | 191 | ||
189 | if (!name) | 192 | if (!name) |
@@ -336,23 +339,14 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, | |||
336 | q->end_sector = rq_end_sector(rq); | 339 | q->end_sector = rq_end_sector(rq); |
337 | q->boundary_rq = rq; | 340 | q->boundary_rq = rq; |
338 | } | 341 | } |
339 | } | 342 | } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) |
343 | where = ELEVATOR_INSERT_BACK; | ||
340 | 344 | ||
341 | if (plug) | 345 | if (plug) |
342 | blk_plug_device(q); | 346 | blk_plug_device(q); |
343 | 347 | ||
344 | rq->q = q; | 348 | rq->q = q; |
345 | 349 | ||
346 | if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) { | ||
347 | /* | ||
348 | * if drain is set, store the request "locally". when the drain | ||
349 | * is finished, the requests will be handed ordered to the io | ||
350 | * scheduler | ||
351 | */ | ||
352 | list_add_tail(&rq->queuelist, &q->drain_list); | ||
353 | return; | ||
354 | } | ||
355 | |||
356 | switch (where) { | 350 | switch (where) { |
357 | case ELEVATOR_INSERT_FRONT: | 351 | case ELEVATOR_INSERT_FRONT: |
358 | rq->flags |= REQ_SOFTBARRIER; | 352 | rq->flags |= REQ_SOFTBARRIER; |
@@ -659,25 +653,36 @@ EXPORT_SYMBOL_GPL(elv_unregister); | |||
659 | * switch to new_e io scheduler. be careful not to introduce deadlocks - | 653 | * switch to new_e io scheduler. be careful not to introduce deadlocks - |
660 | * we don't free the old io scheduler, before we have allocated what we | 654 | * we don't free the old io scheduler, before we have allocated what we |
661 | * need for the new one. this way we have a chance of going back to the old | 655 | * need for the new one. this way we have a chance of going back to the old |
662 | * one, if the new one fails init for some reason. we also do an intermediate | 656 | * one, if the new one fails init for some reason. |
663 | * switch to noop to ensure safety with stack-allocated requests, since they | ||
664 | * don't originate from the block layer allocator. noop is safe here, because | ||
665 | * it never needs to touch the elevator itself for completion events. DRAIN | ||
666 | * flags will make sure we don't touch it for additions either. | ||
667 | */ | 657 | */ |
668 | static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) | 658 | static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) |
669 | { | 659 | { |
670 | elevator_t *e = kmalloc(sizeof(elevator_t), GFP_KERNEL); | 660 | elevator_t *old_elevator, *e; |
671 | struct elevator_type *noop_elevator = NULL; | ||
672 | elevator_t *old_elevator; | ||
673 | 661 | ||
662 | /* | ||
663 | * Allocate new elevator | ||
664 | */ | ||
665 | e = kmalloc(sizeof(elevator_t), GFP_KERNEL); | ||
674 | if (!e) | 666 | if (!e) |
675 | goto error; | 667 | goto error; |
676 | 668 | ||
677 | /* | 669 | /* |
678 | * first step, drain requests from the block freelist | 670 | * Turn on BYPASS and drain all requests w/ elevator private data |
679 | */ | 671 | */ |
680 | blk_wait_queue_drained(q, 0); | 672 | spin_lock_irq(q->queue_lock); |
673 | |||
674 | set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); | ||
675 | |||
676 | while (q->elevator->ops->elevator_dispatch_fn(q, 1)) | ||
677 | ; | ||
678 | |||
679 | while (q->rq.elvpriv) { | ||
680 | spin_unlock_irq(q->queue_lock); | ||
681 | msleep(100); | ||
682 | spin_lock_irq(q->queue_lock); | ||
683 | } | ||
684 | |||
685 | spin_unlock_irq(q->queue_lock); | ||
681 | 686 | ||
682 | /* | 687 | /* |
683 | * unregister old elevator data | 688 | * unregister old elevator data |
@@ -686,18 +691,6 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) | |||
686 | old_elevator = q->elevator; | 691 | old_elevator = q->elevator; |
687 | 692 | ||
688 | /* | 693 | /* |
689 | * next step, switch to noop since it uses no private rq structures | ||
690 | * and doesn't allocate any memory for anything. then wait for any | ||
691 | * non-fs requests in-flight | ||
692 | */ | ||
693 | noop_elevator = elevator_get("noop"); | ||
694 | spin_lock_irq(q->queue_lock); | ||
695 | elevator_attach(q, noop_elevator, e); | ||
696 | spin_unlock_irq(q->queue_lock); | ||
697 | |||
698 | blk_wait_queue_drained(q, 1); | ||
699 | |||
700 | /* | ||
701 | * attach and start new elevator | 694 | * attach and start new elevator |
702 | */ | 695 | */ |
703 | if (elevator_attach(q, new_e, e)) | 696 | if (elevator_attach(q, new_e, e)) |
@@ -707,11 +700,10 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) | |||
707 | goto fail_register; | 700 | goto fail_register; |
708 | 701 | ||
709 | /* | 702 | /* |
710 | * finally exit old elevator and start queue again | 703 | * finally exit old elevator and turn off BYPASS. |
711 | */ | 704 | */ |
712 | elevator_exit(old_elevator); | 705 | elevator_exit(old_elevator); |
713 | blk_finish_queue_drain(q); | 706 | clear_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); |
714 | elevator_put(noop_elevator); | ||
715 | return; | 707 | return; |
716 | 708 | ||
717 | fail_register: | 709 | fail_register: |
@@ -720,13 +712,13 @@ fail_register: | |||
720 | * one again (along with re-adding the sysfs dir) | 712 | * one again (along with re-adding the sysfs dir) |
721 | */ | 713 | */ |
722 | elevator_exit(e); | 714 | elevator_exit(e); |
715 | e = NULL; | ||
723 | fail: | 716 | fail: |
724 | q->elevator = old_elevator; | 717 | q->elevator = old_elevator; |
725 | elv_register_queue(q); | 718 | elv_register_queue(q); |
726 | blk_finish_queue_drain(q); | 719 | clear_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); |
720 | kfree(e); | ||
727 | error: | 721 | error: |
728 | if (noop_elevator) | ||
729 | elevator_put(noop_elevator); | ||
730 | elevator_put(new_e); | 722 | elevator_put(new_e); |
731 | printk(KERN_ERR "elevator: switch to %s failed\n",new_e->elevator_name); | 723 | printk(KERN_ERR "elevator: switch to %s failed\n",new_e->elevator_name); |
732 | } | 724 | } |