diff options
author | Tejun Heo <htejun@gmail.com> | 2005-10-28 02:29:39 -0400 |
---|---|---|
committer | Jens Axboe <axboe@nelson.home.kernel.dk> | 2005-10-28 02:48:12 -0400 |
commit | cb98fc8bb9c141009e2bda99c0db39d387e142cf (patch) | |
tree | 8957f8a79f39c3e6633a0dbb165ced8b530aca0c /drivers/block/ll_rw_blk.c | |
parent | cb19833dccb32f97cacbfff834b53523915f13f6 (diff) |
[BLOCK] Reimplement elevator switch
This patch reimplements elevator switch. This patch assumes generic
dispatch queue patchset is applied.
* Each request is tagged with REQ_ELVPRIV flag if it has its elevator
private data set.
* Requests which doesn't have REQ_ELVPRIV flag set never enter
iosched. They are always directly back inserted to dispatch queue.
Of course, elevator_put_req_fn is called only for requests which
have its REQ_ELVPRIV set.
* Request queue maintains the current number of requests which have
its elevator data set (elevator_set_req_fn called) in
q->rq->elvpriv.
* If a request queue has QUEUE_FLAG_BYPASS set, elevator private data
is not allocated for new requests.
To switch to another iosched, we set QUEUE_FLAG_BYPASS and wait until
elvpriv goes to zero; then, we attach the new iosched and clears
QUEUE_FLAG_BYPASS. New implementation is much simpler and main code
paths are less cluttered, IMHO.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jens Axboe <axboe@suse.de>
Diffstat (limited to 'drivers/block/ll_rw_blk.c')
-rw-r--r-- | drivers/block/ll_rw_blk.c | 142 |
1 files changed, 27 insertions, 115 deletions
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index d2a66fd309c3..f7c9931cb380 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c | |||
@@ -263,8 +263,6 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) | |||
263 | blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); | 263 | blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); |
264 | 264 | ||
265 | blk_queue_activity_fn(q, NULL, NULL); | 265 | blk_queue_activity_fn(q, NULL, NULL); |
266 | |||
267 | INIT_LIST_HEAD(&q->drain_list); | ||
268 | } | 266 | } |
269 | 267 | ||
270 | EXPORT_SYMBOL(blk_queue_make_request); | 268 | EXPORT_SYMBOL(blk_queue_make_request); |
@@ -1050,6 +1048,7 @@ static char *rq_flags[] = { | |||
1050 | "REQ_STARTED", | 1048 | "REQ_STARTED", |
1051 | "REQ_DONTPREP", | 1049 | "REQ_DONTPREP", |
1052 | "REQ_QUEUED", | 1050 | "REQ_QUEUED", |
1051 | "REQ_ELVPRIV", | ||
1053 | "REQ_PC", | 1052 | "REQ_PC", |
1054 | "REQ_BLOCK_PC", | 1053 | "REQ_BLOCK_PC", |
1055 | "REQ_SENSE", | 1054 | "REQ_SENSE", |
@@ -1640,9 +1639,9 @@ static int blk_init_free_list(request_queue_t *q) | |||
1640 | 1639 | ||
1641 | rl->count[READ] = rl->count[WRITE] = 0; | 1640 | rl->count[READ] = rl->count[WRITE] = 0; |
1642 | rl->starved[READ] = rl->starved[WRITE] = 0; | 1641 | rl->starved[READ] = rl->starved[WRITE] = 0; |
1642 | rl->elvpriv = 0; | ||
1643 | init_waitqueue_head(&rl->wait[READ]); | 1643 | init_waitqueue_head(&rl->wait[READ]); |
1644 | init_waitqueue_head(&rl->wait[WRITE]); | 1644 | init_waitqueue_head(&rl->wait[WRITE]); |
1645 | init_waitqueue_head(&rl->drain); | ||
1646 | 1645 | ||
1647 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, | 1646 | rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, |
1648 | mempool_free_slab, request_cachep, q->node); | 1647 | mempool_free_slab, request_cachep, q->node); |
@@ -1785,12 +1784,14 @@ EXPORT_SYMBOL(blk_get_queue); | |||
1785 | 1784 | ||
1786 | static inline void blk_free_request(request_queue_t *q, struct request *rq) | 1785 | static inline void blk_free_request(request_queue_t *q, struct request *rq) |
1787 | { | 1786 | { |
1788 | elv_put_request(q, rq); | 1787 | if (rq->flags & REQ_ELVPRIV) |
1788 | elv_put_request(q, rq); | ||
1789 | mempool_free(rq, q->rq.rq_pool); | 1789 | mempool_free(rq, q->rq.rq_pool); |
1790 | } | 1790 | } |
1791 | 1791 | ||
1792 | static inline struct request * | 1792 | static inline struct request * |
1793 | blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask) | 1793 | blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, |
1794 | int priv, int gfp_mask) | ||
1794 | { | 1795 | { |
1795 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); | 1796 | struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); |
1796 | 1797 | ||
@@ -1803,11 +1804,15 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask) | |||
1803 | */ | 1804 | */ |
1804 | rq->flags = rw; | 1805 | rq->flags = rw; |
1805 | 1806 | ||
1806 | if (!elv_set_request(q, rq, bio, gfp_mask)) | 1807 | if (priv) { |
1807 | return rq; | 1808 | if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) { |
1809 | mempool_free(rq, q->rq.rq_pool); | ||
1810 | return NULL; | ||
1811 | } | ||
1812 | rq->flags |= REQ_ELVPRIV; | ||
1813 | } | ||
1808 | 1814 | ||
1809 | mempool_free(rq, q->rq.rq_pool); | 1815 | return rq; |
1810 | return NULL; | ||
1811 | } | 1816 | } |
1812 | 1817 | ||
1813 | /* | 1818 | /* |
@@ -1863,22 +1868,18 @@ static void __freed_request(request_queue_t *q, int rw) | |||
1863 | * A request has just been released. Account for it, update the full and | 1868 | * A request has just been released. Account for it, update the full and |
1864 | * congestion status, wake up any waiters. Called under q->queue_lock. | 1869 | * congestion status, wake up any waiters. Called under q->queue_lock. |
1865 | */ | 1870 | */ |
1866 | static void freed_request(request_queue_t *q, int rw) | 1871 | static void freed_request(request_queue_t *q, int rw, int priv) |
1867 | { | 1872 | { |
1868 | struct request_list *rl = &q->rq; | 1873 | struct request_list *rl = &q->rq; |
1869 | 1874 | ||
1870 | rl->count[rw]--; | 1875 | rl->count[rw]--; |
1876 | if (priv) | ||
1877 | rl->elvpriv--; | ||
1871 | 1878 | ||
1872 | __freed_request(q, rw); | 1879 | __freed_request(q, rw); |
1873 | 1880 | ||
1874 | if (unlikely(rl->starved[rw ^ 1])) | 1881 | if (unlikely(rl->starved[rw ^ 1])) |
1875 | __freed_request(q, rw ^ 1); | 1882 | __freed_request(q, rw ^ 1); |
1876 | |||
1877 | if (!rl->count[READ] && !rl->count[WRITE]) { | ||
1878 | smp_mb(); | ||
1879 | if (unlikely(waitqueue_active(&rl->drain))) | ||
1880 | wake_up(&rl->drain); | ||
1881 | } | ||
1882 | } | 1883 | } |
1883 | 1884 | ||
1884 | #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) | 1885 | #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) |
@@ -1893,9 +1894,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, | |||
1893 | struct request *rq = NULL; | 1894 | struct request *rq = NULL; |
1894 | struct request_list *rl = &q->rq; | 1895 | struct request_list *rl = &q->rq; |
1895 | struct io_context *ioc = current_io_context(GFP_ATOMIC); | 1896 | struct io_context *ioc = current_io_context(GFP_ATOMIC); |
1896 | 1897 | int priv; | |
1897 | if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) | ||
1898 | goto out; | ||
1899 | 1898 | ||
1900 | if (rl->count[rw]+1 >= q->nr_requests) { | 1899 | if (rl->count[rw]+1 >= q->nr_requests) { |
1901 | /* | 1900 | /* |
@@ -1940,9 +1939,14 @@ get_rq: | |||
1940 | rl->starved[rw] = 0; | 1939 | rl->starved[rw] = 0; |
1941 | if (rl->count[rw] >= queue_congestion_on_threshold(q)) | 1940 | if (rl->count[rw] >= queue_congestion_on_threshold(q)) |
1942 | set_queue_congested(q, rw); | 1941 | set_queue_congested(q, rw); |
1942 | |||
1943 | priv = !test_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); | ||
1944 | if (priv) | ||
1945 | rl->elvpriv++; | ||
1946 | |||
1943 | spin_unlock_irq(q->queue_lock); | 1947 | spin_unlock_irq(q->queue_lock); |
1944 | 1948 | ||
1945 | rq = blk_alloc_request(q, rw, bio, gfp_mask); | 1949 | rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); |
1946 | if (!rq) { | 1950 | if (!rq) { |
1947 | /* | 1951 | /* |
1948 | * Allocation failed presumably due to memory. Undo anything | 1952 | * Allocation failed presumably due to memory. Undo anything |
@@ -1952,7 +1956,7 @@ get_rq: | |||
1952 | * wait queue, but this is pretty rare. | 1956 | * wait queue, but this is pretty rare. |
1953 | */ | 1957 | */ |
1954 | spin_lock_irq(q->queue_lock); | 1958 | spin_lock_irq(q->queue_lock); |
1955 | freed_request(q, rw); | 1959 | freed_request(q, rw, priv); |
1956 | 1960 | ||
1957 | /* | 1961 | /* |
1958 | * in the very unlikely event that allocation failed and no | 1962 | * in the very unlikely event that allocation failed and no |
@@ -2470,11 +2474,12 @@ static void __blk_put_request(request_queue_t *q, struct request *req) | |||
2470 | */ | 2474 | */ |
2471 | if (rl) { | 2475 | if (rl) { |
2472 | int rw = rq_data_dir(req); | 2476 | int rw = rq_data_dir(req); |
2477 | int priv = req->flags & REQ_ELVPRIV; | ||
2473 | 2478 | ||
2474 | BUG_ON(!list_empty(&req->queuelist)); | 2479 | BUG_ON(!list_empty(&req->queuelist)); |
2475 | 2480 | ||
2476 | blk_free_request(q, req); | 2481 | blk_free_request(q, req); |
2477 | freed_request(q, rw); | 2482 | freed_request(q, rw, priv); |
2478 | } | 2483 | } |
2479 | } | 2484 | } |
2480 | 2485 | ||
@@ -2802,97 +2807,6 @@ static inline void blk_partition_remap(struct bio *bio) | |||
2802 | } | 2807 | } |
2803 | } | 2808 | } |
2804 | 2809 | ||
2805 | void blk_finish_queue_drain(request_queue_t *q) | ||
2806 | { | ||
2807 | struct request_list *rl = &q->rq; | ||
2808 | struct request *rq; | ||
2809 | int requeued = 0; | ||
2810 | |||
2811 | spin_lock_irq(q->queue_lock); | ||
2812 | clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags); | ||
2813 | |||
2814 | while (!list_empty(&q->drain_list)) { | ||
2815 | rq = list_entry_rq(q->drain_list.next); | ||
2816 | |||
2817 | list_del_init(&rq->queuelist); | ||
2818 | elv_requeue_request(q, rq); | ||
2819 | requeued++; | ||
2820 | } | ||
2821 | |||
2822 | if (requeued) | ||
2823 | q->request_fn(q); | ||
2824 | |||
2825 | spin_unlock_irq(q->queue_lock); | ||
2826 | |||
2827 | wake_up(&rl->wait[0]); | ||
2828 | wake_up(&rl->wait[1]); | ||
2829 | wake_up(&rl->drain); | ||
2830 | } | ||
2831 | |||
2832 | static int wait_drain(request_queue_t *q, struct request_list *rl, int dispatch) | ||
2833 | { | ||
2834 | int wait = rl->count[READ] + rl->count[WRITE]; | ||
2835 | |||
2836 | if (dispatch) | ||
2837 | wait += !list_empty(&q->queue_head); | ||
2838 | |||
2839 | return wait; | ||
2840 | } | ||
2841 | |||
2842 | /* | ||
2843 | * We rely on the fact that only requests allocated through blk_alloc_request() | ||
2844 | * have io scheduler private data structures associated with them. Any other | ||
2845 | * type of request (allocated on stack or through kmalloc()) should not go | ||
2846 | * to the io scheduler core, but be attached to the queue head instead. | ||
2847 | */ | ||
2848 | void blk_wait_queue_drained(request_queue_t *q, int wait_dispatch) | ||
2849 | { | ||
2850 | struct request_list *rl = &q->rq; | ||
2851 | DEFINE_WAIT(wait); | ||
2852 | |||
2853 | spin_lock_irq(q->queue_lock); | ||
2854 | set_bit(QUEUE_FLAG_DRAIN, &q->queue_flags); | ||
2855 | |||
2856 | while (wait_drain(q, rl, wait_dispatch)) { | ||
2857 | prepare_to_wait(&rl->drain, &wait, TASK_UNINTERRUPTIBLE); | ||
2858 | |||
2859 | if (wait_drain(q, rl, wait_dispatch)) { | ||
2860 | __generic_unplug_device(q); | ||
2861 | spin_unlock_irq(q->queue_lock); | ||
2862 | io_schedule(); | ||
2863 | spin_lock_irq(q->queue_lock); | ||
2864 | } | ||
2865 | |||
2866 | finish_wait(&rl->drain, &wait); | ||
2867 | } | ||
2868 | |||
2869 | spin_unlock_irq(q->queue_lock); | ||
2870 | } | ||
2871 | |||
2872 | /* | ||
2873 | * block waiting for the io scheduler being started again. | ||
2874 | */ | ||
2875 | static inline void block_wait_queue_running(request_queue_t *q) | ||
2876 | { | ||
2877 | DEFINE_WAIT(wait); | ||
2878 | |||
2879 | while (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) { | ||
2880 | struct request_list *rl = &q->rq; | ||
2881 | |||
2882 | prepare_to_wait_exclusive(&rl->drain, &wait, | ||
2883 | TASK_UNINTERRUPTIBLE); | ||
2884 | |||
2885 | /* | ||
2886 | * re-check the condition. avoids using prepare_to_wait() | ||
2887 | * in the fast path (queue is running) | ||
2888 | */ | ||
2889 | if (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) | ||
2890 | io_schedule(); | ||
2891 | |||
2892 | finish_wait(&rl->drain, &wait); | ||
2893 | } | ||
2894 | } | ||
2895 | |||
2896 | static void handle_bad_sector(struct bio *bio) | 2810 | static void handle_bad_sector(struct bio *bio) |
2897 | { | 2811 | { |
2898 | char b[BDEVNAME_SIZE]; | 2812 | char b[BDEVNAME_SIZE]; |
@@ -2988,8 +2902,6 @@ end_io: | |||
2988 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) | 2902 | if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) |
2989 | goto end_io; | 2903 | goto end_io; |
2990 | 2904 | ||
2991 | block_wait_queue_running(q); | ||
2992 | |||
2993 | /* | 2905 | /* |
2994 | * If this device has partitions, remap block n | 2906 | * If this device has partitions, remap block n |
2995 | * of partition p to block n+start(p) of the disk. | 2907 | * of partition p to block n+start(p) of the disk. |