diff options
author | Tejun Heo <htejun@gmail.com> | 2005-10-28 02:29:39 -0400 |
---|---|---|
committer | Jens Axboe <axboe@nelson.home.kernel.dk> | 2005-10-28 02:48:12 -0400 |
commit | cb98fc8bb9c141009e2bda99c0db39d387e142cf (patch) | |
tree | 8957f8a79f39c3e6633a0dbb165ced8b530aca0c | |
parent | cb19833dccb32f97cacbfff834b53523915f13f6 (diff) |
[BLOCK] Reimplement elevator switch
This patch reimplements elevator switch. This patch assumes generic
dispatch queue patchset is applied.
* Each request is tagged with REQ_ELVPRIV flag if it has its elevator
private data set.
* Requests which doesn't have REQ_ELVPRIV flag set never enter
iosched. They are always directly back inserted to dispatch queue.
Of course, elevator_put_req_fn is called only for requests which
have its REQ_ELVPRIV set.
* Request queue maintains the current number of requests which have
its elevator data set (elevator_set_req_fn called) in
q->rq->elvpriv.
* If a request queue has QUEUE_FLAG_BYPASS set, elevator private data
is not allocated for new requests.
To switch to another iosched, we set QUEUE_FLAG_BYPASS and wait until
elvpriv goes to zero; then, we attach the new iosched and clears
QUEUE_FLAG_BYPASS. New implementation is much simpler and main code
paths are less cluttered, IMHO.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jens Axboe <axboe@suse.de>
-rw-r--r-- | drivers/block/elevator.c | 78 | ||||
-rw-r--r-- | drivers/block/ll_rw_blk.c | 142 | ||||
-rw-r--r-- | include/linux/blkdev.h | 10 |
3 files changed, 66 insertions, 164 deletions
diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c index af2388e73f61..272d93946621 100644 --- a/drivers/block/elevator.c +++ b/drivers/block/elevator.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/slab.h> | 34 | #include <linux/slab.h> |
35 | #include <linux/init.h> | 35 | #include <linux/init.h> |
36 | #include <linux/compiler.h> | 36 | #include <linux/compiler.h> |
37 | #include <linux/delay.h> | ||
37 | 38 | ||
38 | #include <asm/uaccess.h> | 39 | #include <asm/uaccess.h> |
39 | 40 | ||
@@ -131,11 +132,7 @@ static int elevator_attach(request_queue_t *q, struct elevator_type *e, | |||
131 | eq->ops = &e->ops; | 132 | eq->ops = &e->ops; |
132 | eq->elevator_type = e; | 133 | eq->elevator_type = e; |
133 | 134 | ||
134 | INIT_LIST_HEAD(&q->queue_head); | ||
135 | q->last_merge = NULL; | ||
136 | q->elevator = eq; | 135 | q->elevator = eq; |
137 | q->end_sector = 0; | ||
138 | q->boundary_rq = NULL; | ||
139 | 136 | ||
140 | if (eq->ops->elevator_init_fn) | 137 | if (eq->ops->elevator_init_fn) |
141 | ret = eq->ops->elevator_init_fn(q, eq); | 138 | ret = eq->ops->elevator_init_fn(q, eq); |
@@ -184,6 +181,12 @@ int elevator_init(request_queue_t *q, char *name) | |||
184 | struct elevator_queue *eq; | 181 | struct elevator_queue *eq; |
185 | int ret = 0; | 182 | int ret = 0; |
186 | 183 | ||
184 | INIT_LIST_HEAD(&q->queue_head); | ||
185 | q->last_merge = NULL; | ||
186 | q->end_sector = 0; | ||
187 | q->boundary_rq = NULL; | ||
188 | q->max_back_kb = 0; | ||
189 | |||
187 | elevator_setup_default(); | 190 | elevator_setup_default(); |
188 | 191 | ||
189 | if (!name) | 192 | if (!name) |
@@ -336,23 +339,14 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, | |||
336 | q->end_sector = rq_end_sector(rq); | 339 | q->end_sector = rq_end_sector(rq); |
337 | q->boundary_rq = rq; | 340 | q->boundary_rq = rq; |
338 | } | 341 | } |
339 | } | 342 | } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) |
343 | where = ELEVATOR_INSERT_BACK; | ||
340 | 344 | ||
341 | if (plug) | 345 | if (plug) |
342 | blk_plug_device(q); | 346 | blk_plug_device(q); |
343 | 347 | ||
344 | rq->q = q; | 348 | rq->q = q; |
345 | 349 | ||
346 | if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) { | ||
347 | /* | ||
348 | * if drain is set, store the request "locally". when the drain | ||
349 | * is finished, the requests will be handed ordered to the io | ||
350 | * scheduler | ||
351 | */ | ||
352 | list_add_tail(&rq->queuelist, &q->drain_list); | ||
353 | return; | ||
354 | } | ||
355 | |||
356 | switch (where) { | 350 | switch (where) { |
357 | case ELEVATOR_INSERT_FRONT: | 351 | case ELEVATOR_INSERT_FRONT: |
358 | rq->flags |= REQ_SOFTBARRIER; | 352 | rq->flags |= REQ_SOFTBARRIER; |
@@ -659,25 +653,36 @@ EXPORT_SYMBOL_GPL(elv_unregister); | |||
659 | * switch to new_e io scheduler. be careful not to introduce deadlocks - | 653 | * switch to new_e io scheduler. be careful not to introduce deadlocks - |
660 | * we don't free the old io scheduler, before we have allocated what we | 654 | * we don't free the old io scheduler, before we have allocated what we |
661 | * need for the new one. this way we have a chance of going back to the old | 655 | * need for the new one. this way we have a chance of going back to the old |
662 | * one, if the new one fails init for some reason. we also do an intermediate | 656 | * one, if the new one fails init for some reason. |
663 | * switch to noop to ensure safety with stack-allocated requests, since they | ||
664 | * don't originate from the block layer allocator. noop is safe here, because | ||
665 | * it never needs to touch the elevator itself for completion events. DRAIN | ||
666 | * flags will make sure we don't touch it for additions either. | ||
667 | */ | 657 | */ |
668 | static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) | 658 | static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) |
669 | { | 659 | { |
670 | elevator_t *e = kmalloc(sizeof(elevator_t), GFP_KERNEL); | 660 | elevator_t *old_elevator, *e; |
671 | struct elevator_type *noop_elevator = NULL; | ||
672 | elevator_t *old_elevator; | ||
673 | 661 | ||
662 | /* | ||
663 | * Allocate new elevator | ||
664 | */ | ||
665 | e = kmalloc(sizeof(elevator_t), GFP_KERNEL); | ||
674 | if (!e) | 666 | if (!e) |
675 | goto error; | 667 | goto error; |
676 | 668 | ||
677 | /* | 669 | /* |
678 | * first step, drain requests from the block freelist | 670 | * Turn on BYPASS and drain all requests w/ elevator private data |
679 | */ | 671 | */ |
680 | blk_wait_queue_drained(q, 0); | 672 | spin_lock_irq(q->queue_lock); |
673 | |||
674 | set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); | ||
675 | |||
676 | while (q->elevator->ops->elevator_dispatch_fn(q, 1)) | ||
677 | ; | ||
678 | |||
679 | while (q->rq.elvpriv) { | ||
680 | spin_unlock_irq(q->queue_lock); | ||
681 | msleep(100); | ||
682 | spin_lock_irq(q->queue_lock); | ||
683 | } | ||
684 | |||
685 | spin_unlock_irq(q->queue_lock); | ||
681 | 686 | ||
682 | /* | 687 | /* |
683 | * unregister old elevator data | 688 | * unregister old elevator data |
@@ -686,18 +691,6 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) | |||
686 | old_elevator = q->elevator; | 691 | old_elevator = q->elevator; |
687 | 692 | ||
688 | /* | 693 | /* |
689 | * next step, switch to noop since it uses no private rq structures | ||
690 | * and doesn't allocate any memory for anything. then wait for any | ||
691 | * non-fs requests in-flight | ||
692 | */ | ||
693 | noop_elevator = elevator_get("noop"); | ||
694 | spin_lock_irq(q->queue_lock); | ||
695 | elevator_attach(q, noop_elevator, e); | ||
696 | spin_unlock_irq(q->queue_lock); | ||
697 | |||
698 | blk_wait_queue_drained(q, 1); | ||
699 | |||
700 | /* | ||
701 | * attach and start new elevator | 694 | * attach and start new elevator |
702 | */ | 695 | */ |
703 | if (elevator_attach(q, new_e, e)) | 696 | if (elevator_attach(q, new_e, e)) |
@@ -707,11 +700,10 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) | |||
707 | goto fail_register; | 700 | goto fail_register; |
708 | 701 | ||
709 | /* | 702 | /* |
710 | * finally exit old elevator and start queue again | 703 | * finally exit old elevator and turn off BYPASS. |
711 | */ | 704 | */ |
712 | elevator_exit(old_elevator); | 705 | elevator_exit(old_elevator); |
713 | blk_finish_queue_drain(q); | 706 | clear_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); |
714 | elevator_put(noop_elevator); | ||
715 | return; | 707 | return; |
716 | 708 | ||
717 | fail_register: | 709 | fail_register: |
@@ -720,13 +712,13 @@ fail_register: | |||
720 | * one again (along with re-adding the sysfs dir) | 712 | * one again (along with re-adding the sysfs dir) |
721 | */ | 713 | */ |
722 | elevator_exit(e); | 714 | elevator_exit(e); |
715 | e = NULL; | ||
723 | fail: | 716 | fail: |
724 | q->elevator = old_elevator; | 717 | q->elevator = old_elevator; |
725 | elv_register_queue(q); | 718 | elv_register_queue(q); |
726 | blk_finish_queue_drain(q); | 719 | clear_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); |
720 | kfree(e); | ||
727 | error: | 721 | error: |
728 | if (noop_elevator) | ||
729 | elevator_put(noop_elevator); | ||
730 | elevator_put(new_e); | 722 | elevator_put(new_e); |
731 | printk(KERN_ERR "elevator: switch to %s failed\n",new_e->elevator_name); | 723 | printk(KERN_ERR "elevator: switch to %s failed\n",new_e->elevator_name); |
732 | } | 724 | } |
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index d2a66fd309c3..f7c9931cb380 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c | |||
@@ -263,8 +263,6 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) | |||
263 | blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); | 263 | blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); |
264 | 264 | ||
265 | blk_queue_activity_fn(q, NULL, NULL); | 265 | blk_queue_activity_fn(q, NULL, NULL); |
266 | |||
267 | INIT_LIST_HEAD(&q->drain_list); | ||
268 | } | 266 | } |
269 | 267 | ||
270 | EXPORT_SYMBOL(blk_queue_make_request); | 268 | EXPORT_SYMBOL(blk_queue_make_request); |
@@ -1050,6 +1048,7 @@ static char *rq_flags[] = { | |||
1050 | "REQ_STARTED", | 1048 | "REQ_STARTED", |
1051 | "REQ_DONTPREP", | 1049 | "REQ_DONTPREP", |
1052 | "REQ_QUEUED", | 1050 | "REQ_QUEUED", |
1051 | "REQ_ELVPRIV", | ||
1053 | "REQ_PC", | 1052 | "REQ_PC", |
1054 | "REQ_BLOCK_PC", | 1053 | "REQ_BLOCK_PC", |
1055 | "REQ_SENSE", | 1054 | "REQ_SENSE", |
@@ -1640,9 +1639,9 @@ static int blk_init_free_list(request_queue_t *q) | |||
1640 | 1639 | ||
1641 | rl->count[READ] = rl->count[WRITE] = 0; | 1640 | rl->count[READ] = rl->count[WRITE] = 0; |
1642 | rl->starved[READ] = rl->starved[WRITE] = 0; | 1641 | rl->starved[READ] = rl->starved[WRITE] = 0; |
1642 | rl->elvpriv = 0; | ||
1643 | init_waitqueue_head(&rl->wait[READ]); | 1643 | init_waitqueue_head(&rl->wait[READ]); |
1644 | init_waitqueue_head(&rl->wait[WRITE]); | 1644 | init_waitqueue_head(&rl->wait[WRITE]); |
1645 | init_waitqueue_head(&rl->drain); | ||
1646 | 1645 | ||
1647 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, | 1646 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, |
1648 | mempool_free_slab, request_cachep, q->node); | 1647 | mempool_free_slab, request_cachep, q->node); |
@@ -1785,12 +1784,14 @@ EXPORT_SYMBOL(blk_get_queue); | |||
1785 | 1784 | ||
1786 | static inline void blk_free_request(request_queue_t *q, struct request *rq) | 1785 | static inline void blk_free_request(request_queue_t *q, struct request *rq) |
1787 | { | 1786 | { |
1788 | elv_put_request(q, rq); | 1787 | if (rq->flags & REQ_ELVPRIV) |
1788 | elv_put_request(q, rq); | ||
1789 | mempool_free(rq, q->rq.rq_pool); | 1789 | mempool_free(rq, q->rq.rq_pool); |
1790 | } | 1790 | } |
1791 | 1791 | ||
1792 | static inline struct request * | 1792 | static inline struct request * |
1793 | blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask) | 1793 | blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, |
1794 | int priv, int gfp_mask) | ||
1794 | { | 1795 | { |
1795 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); | 1796 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); |
1796 | 1797 | ||
@@ -1803,11 +1804,15 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask) | |||
1803 | */ | 1804 | */ |
1804 | rq->flags = rw; | 1805 | rq->flags = rw; |
1805 | 1806 | ||
1806 | if (!elv_set_request(q, rq, bio, gfp_mask)) | 1807 | if (priv) { |
1807 | return rq; | 1808 | if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) { |
1809 | mempool_free(rq, q->rq.rq_pool); | ||
1810 | return NULL; | ||
1811 | } | ||
1812 | rq->flags |= REQ_ELVPRIV; | ||
1813 | } | ||
1808 | 1814 | ||
1809 | mempool_free(rq, q->rq.rq_pool); | 1815 | return rq; |
1810 | return NULL; | ||
1811 | } | 1816 | } |
1812 | 1817 | ||
1813 | /* | 1818 | /* |
@@ -1863,22 +1868,18 @@ static void __freed_request(request_queue_t *q, int rw) | |||
1863 | * A request has just been released. Account for it, update the full and | 1868 | * A request has just been released. Account for it, update the full and |
1864 | * congestion status, wake up any waiters. Called under q->queue_lock. | 1869 | * congestion status, wake up any waiters. Called under q->queue_lock. |
1865 | */ | 1870 | */ |
1866 | static void freed_request(request_queue_t *q, int rw) | 1871 | static void freed_request(request_queue_t *q, int rw, int priv) |
1867 | { | 1872 | { |
1868 | struct request_list *rl = &q->rq; | 1873 | struct request_list *rl = &q->rq; |
1869 | 1874 | ||
1870 | rl->count[rw]--; | 1875 | rl->count[rw]--; |
1876 | if (priv) | ||
1877 | rl->elvpriv--; | ||
1871 | 1878 | ||
1872 | __freed_request(q, rw); | 1879 | __freed_request(q, rw); |
1873 | 1880 | ||
1874 | if (unlikely(rl->starved[rw ^ 1])) | 1881 | if (unlikely(rl->starved[rw ^ 1])) |
1875 | __freed_request(q, rw ^ 1); | 1882 | __freed_request(q, rw ^ 1); |
1876 | |||
1877 | if (!rl->count[READ] && !rl->count[WRITE]) { | ||
1878 | smp_mb(); | ||
1879 | if (unlikely(waitqueue_active(&rl->drain))) | ||
1880 | wake_up(&rl->drain); | ||
1881 | } | ||
1882 | } | 1883 | } |
1883 | 1884 | ||
1884 | #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) | 1885 | #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) |
@@ -1893,9 +1894,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, | |||
1893 | struct request *rq = NULL; | 1894 | struct request *rq = NULL; |
1894 | struct request_list *rl = &q->rq; | 1895 | struct request_list *rl = &q->rq; |
1895 | struct io_context *ioc = current_io_context(GFP_ATOMIC); | 1896 | struct io_context *ioc = current_io_context(GFP_ATOMIC); |
1896 | 1897 | int priv; | |
1897 | if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) | ||
1898 | goto out; | ||
1899 | 1898 | ||
1900 | if (rl->count[rw]+1 >= q->nr_requests) { | 1899 | if (rl->count[rw]+1 >= q->nr_requests) { |
1901 | /* | 1900 | /* |
@@ -1940,9 +1939,14 @@ get_rq: | |||
1940 | rl->starved[rw] = 0; | 1939 | rl->starved[rw] = 0; |
1941 | if (rl->count[rw] >= queue_congestion_on_threshold(q)) | 1940 | if (rl->count[rw] >= queue_congestion_on_threshold(q)) |
1942 | set_queue_congested(q, rw); | 1941 | set_queue_congested(q, rw); |
1942 | |||
1943 | priv = !test_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); | ||
1944 | if (priv) | ||
1945 | rl->elvpriv++; | ||
1946 | |||
1943 | spin_unlock_irq(q->queue_lock); | 1947 | spin_unlock_irq(q->queue_lock); |
1944 | 1948 | ||
1945 | rq = blk_alloc_request(q, rw, bio, gfp_mask); | 1949 | rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); |
1946 | if (!rq) { | 1950 | if (!rq) { |
1947 | /* | 1951 | /* |
1948 | * Allocation failed presumably due to memory. Undo anything | 1952 | * Allocation failed presumably due to memory. Undo anything |
@@ -1952,7 +1956,7 @@ get_rq: | |||
1952 | * wait queue, but this is pretty rare. | 1956 | * wait queue, but this is pretty rare. |
1953 | */ | 1957 | */ |
1954 | spin_lock_irq(q->queue_lock); | 1958 | spin_lock_irq(q->queue_lock); |
1955 | freed_request(q, rw); | 1959 | freed_request(q, rw, priv); |
1956 | 1960 | ||
1957 | /* | 1961 | /* |
1958 | * in the very unlikely event that allocation failed and no | 1962 | * in the very unlikely event that allocation failed and no |
@@ -2470,11 +2474,12 @@ static void __blk_put_request(request_queue_t *q, struct request *req) | |||
2470 | */ | 2474 | */ |
2471 | if (rl) { | 2475 | if (rl) { |
2472 | int rw = rq_data_dir(req); | 2476 | int rw = rq_data_dir(req); |
2477 | int priv = req->flags & REQ_ELVPRIV; | ||
2473 | 2478 | ||
2474 | BUG_ON(!list_empty(&req->queuelist)); | 2479 | BUG_ON(!list_empty(&req->queuelist)); |
2475 | 2480 | ||
2476 | blk_free_request(q, req); | 2481 | blk_free_request(q, req); |
2477 | freed_request(q, rw); | 2482 | freed_request(q, rw, priv); |
2478 | } | 2483 | } |
2479 | } | 2484 | } |
2480 | 2485 | ||
@@ -2802,97 +2807,6 @@ static inline void blk_partition_remap(struct bio *bio) | |||
2802 | } | 2807 | } |
2803 | } | 2808 | } |
2804 | 2809 | ||
2805 | void blk_finish_queue_drain(request_queue_t *q) | ||
2806 | { | ||
2807 | struct request_list *rl = &q->rq; | ||
2808 | struct request *rq; | ||
2809 | int requeued = 0; | ||
2810 | |||
2811 | spin_lock_irq(q->queue_lock); | ||
2812 | clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags); | ||
2813 | |||
2814 | while (!list_empty(&q->drain_list)) { | ||
2815 | rq = list_entry_rq(q->drain_list.next); | ||
2816 | |||
2817 | list_del_init(&rq->queuelist); | ||
2818 | elv_requeue_request(q, rq); | ||
2819 | requeued++; | ||
2820 | } | ||
2821 | |||
2822 | if (requeued) | ||
2823 | q->request_fn(q); | ||
2824 | |||
2825 | spin_unlock_irq(q->queue_lock); | ||
2826 | |||
2827 | wake_up(&rl->wait[0]); | ||
2828 | wake_up(&rl->wait[1]); | ||
2829 | wake_up(&rl->drain); | ||
2830 | } | ||
2831 | |||
2832 | static int wait_drain(request_queue_t *q, struct request_list *rl, int dispatch) | ||
2833 | { | ||
2834 | int wait = rl->count[READ] + rl->count[WRITE]; | ||
2835 | |||
2836 | if (dispatch) | ||
2837 | wait += !list_empty(&q->queue_head); | ||
2838 | |||
2839 | return wait; | ||
2840 | } | ||
2841 | |||
2842 | /* | ||
2843 | * We rely on the fact that only requests allocated through blk_alloc_request() | ||
2844 | * have io scheduler private data structures associated with them. Any other | ||
2845 | * type of request (allocated on stack or through kmalloc()) should not go | ||
2846 | * to the io scheduler core, but be attached to the queue head instead. | ||
2847 | */ | ||
2848 | void blk_wait_queue_drained(request_queue_t *q, int wait_dispatch) | ||
2849 | { | ||
2850 | struct request_list *rl = &q->rq; | ||
2851 | DEFINE_WAIT(wait); | ||
2852 | |||
2853 | spin_lock_irq(q->queue_lock); | ||
2854 | set_bit(QUEUE_FLAG_DRAIN, &q->queue_flags); | ||
2855 | |||
2856 | while (wait_drain(q, rl, wait_dispatch)) { | ||
2857 | prepare_to_wait(&rl->drain, &wait, TASK_UNINTERRUPTIBLE); | ||
2858 | |||
2859 | if (wait_drain(q, rl, wait_dispatch)) { | ||
2860 | __generic_unplug_device(q); | ||
2861 | spin_unlock_irq(q->queue_lock); | ||
2862 | io_schedule(); | ||
2863 | spin_lock_irq(q->queue_lock); | ||
2864 | } | ||
2865 | |||
2866 | finish_wait(&rl->drain, &wait); | ||
2867 | } | ||
2868 | |||
2869 | spin_unlock_irq(q->queue_lock); | ||
2870 | } | ||
2871 | |||
2872 | /* | ||
2873 | * block waiting for the io scheduler being started again. | ||
2874 | */ | ||
2875 | static inline void block_wait_queue_running(request_queue_t *q) | ||
2876 | { | ||
2877 | DEFINE_WAIT(wait); | ||
2878 | |||
2879 | while (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) { | ||
2880 | struct request_list *rl = &q->rq; | ||
2881 | |||
2882 | prepare_to_wait_exclusive(&rl->drain, &wait, | ||
2883 | TASK_UNINTERRUPTIBLE); | ||
2884 | |||
2885 | /* | ||
2886 | * re-check the condition. avoids using prepare_to_wait() | ||
2887 | * in the fast path (queue is running) | ||
2888 | */ | ||
2889 | if (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) | ||
2890 | io_schedule(); | ||
2891 | |||
2892 | finish_wait(&rl->drain, &wait); | ||
2893 | } | ||
2894 | } | ||
2895 | |||
2896 | static void handle_bad_sector(struct bio *bio) | 2810 | static void handle_bad_sector(struct bio *bio) |
2897 | { | 2811 | { |
2898 | char b[BDEVNAME_SIZE]; | 2812 | char b[BDEVNAME_SIZE]; |
@@ -2988,8 +2902,6 @@ end_io: | |||
2988 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | 2902 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) |
2989 | goto end_io; | 2903 | goto end_io; |
2990 | 2904 | ||
2991 | block_wait_queue_running(q); | ||
2992 | |||
2993 | /* | 2905 | /* |
2994 | * If this device has partitions, remap block n | 2906 | * If this device has partitions, remap block n |
2995 | * of partition p to block n+start(p) of the disk. | 2907 | * of partition p to block n+start(p) of the disk. |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 159dbcd2eb59..6186d5e2110f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -107,9 +107,9 @@ typedef void (rq_end_io_fn)(struct request *); | |||
107 | struct request_list { | 107 | struct request_list { |
108 | int count[2]; | 108 | int count[2]; |
109 | int starved[2]; | 109 | int starved[2]; |
110 | int elvpriv; | ||
110 | mempool_t *rq_pool; | 111 | mempool_t *rq_pool; |
111 | wait_queue_head_t wait[2]; | 112 | wait_queue_head_t wait[2]; |
112 | wait_queue_head_t drain; | ||
113 | }; | 113 | }; |
114 | 114 | ||
115 | #define BLK_MAX_CDB 16 | 115 | #define BLK_MAX_CDB 16 |
@@ -211,6 +211,7 @@ enum rq_flag_bits { | |||
211 | __REQ_STARTED, /* drive already may have started this one */ | 211 | __REQ_STARTED, /* drive already may have started this one */ |
212 | __REQ_DONTPREP, /* don't call prep for this one */ | 212 | __REQ_DONTPREP, /* don't call prep for this one */ |
213 | __REQ_QUEUED, /* uses queueing */ | 213 | __REQ_QUEUED, /* uses queueing */ |
214 | __REQ_ELVPRIV, /* elevator private data attached */ | ||
214 | /* | 215 | /* |
215 | * for ATA/ATAPI devices | 216 | * for ATA/ATAPI devices |
216 | */ | 217 | */ |
@@ -244,6 +245,7 @@ enum rq_flag_bits { | |||
244 | #define REQ_STARTED (1 << __REQ_STARTED) | 245 | #define REQ_STARTED (1 << __REQ_STARTED) |
245 | #define REQ_DONTPREP (1 << __REQ_DONTPREP) | 246 | #define REQ_DONTPREP (1 << __REQ_DONTPREP) |
246 | #define REQ_QUEUED (1 << __REQ_QUEUED) | 247 | #define REQ_QUEUED (1 << __REQ_QUEUED) |
248 | #define REQ_ELVPRIV (1 << __REQ_ELVPRIV) | ||
247 | #define REQ_PC (1 << __REQ_PC) | 249 | #define REQ_PC (1 << __REQ_PC) |
248 | #define REQ_BLOCK_PC (1 << __REQ_BLOCK_PC) | 250 | #define REQ_BLOCK_PC (1 << __REQ_BLOCK_PC) |
249 | #define REQ_SENSE (1 << __REQ_SENSE) | 251 | #define REQ_SENSE (1 << __REQ_SENSE) |
@@ -413,8 +415,6 @@ struct request_queue | |||
413 | unsigned int sg_reserved_size; | 415 | unsigned int sg_reserved_size; |
414 | int node; | 416 | int node; |
415 | 417 | ||
416 | struct list_head drain_list; | ||
417 | |||
418 | /* | 418 | /* |
419 | * reserved for flush operations | 419 | * reserved for flush operations |
420 | */ | 420 | */ |
@@ -442,7 +442,7 @@ enum { | |||
442 | #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ | 442 | #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ |
443 | #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ | 443 | #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ |
444 | #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ | 444 | #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ |
445 | #define QUEUE_FLAG_DRAIN 8 /* draining queue for sched switch */ | 445 | #define QUEUE_FLAG_BYPASS 8 /* don't use elevator, just do FIFO */ |
446 | #define QUEUE_FLAG_FLUSH 9 /* doing barrier flush sequence */ | 446 | #define QUEUE_FLAG_FLUSH 9 /* doing barrier flush sequence */ |
447 | 447 | ||
448 | #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) | 448 | #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) |
@@ -668,8 +668,6 @@ extern void blk_dump_rq_flags(struct request *, char *); | |||
668 | extern void generic_unplug_device(request_queue_t *); | 668 | extern void generic_unplug_device(request_queue_t *); |
669 | extern void __generic_unplug_device(request_queue_t *); | 669 | extern void __generic_unplug_device(request_queue_t *); |
670 | extern long nr_blockdev_pages(void); | 670 | extern long nr_blockdev_pages(void); |
671 | extern void blk_wait_queue_drained(request_queue_t *, int); | ||
672 | extern void blk_finish_queue_drain(request_queue_t *); | ||
673 | 671 | ||
674 | int blk_get_queue(request_queue_t *); | 672 | int blk_get_queue(request_queue_t *); |
675 | request_queue_t *blk_alloc_queue(int gfp_mask); | 673 | request_queue_t *blk_alloc_queue(int gfp_mask); |