aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/ll_rw_blk.c
diff options
context:
space:
mode:
authorTejun Heo <htejun@gmail.com>2005-10-28 02:29:39 -0400
committerJens Axboe <axboe@nelson.home.kernel.dk>2005-10-28 02:48:12 -0400
commitcb98fc8bb9c141009e2bda99c0db39d387e142cf (patch)
tree8957f8a79f39c3e6633a0dbb165ced8b530aca0c /drivers/block/ll_rw_blk.c
parentcb19833dccb32f97cacbfff834b53523915f13f6 (diff)
[BLOCK] Reimplement elevator switch
This patch reimplements elevator switch. This patch assumes generic dispatch queue patchset is applied. * Each request is tagged with REQ_ELVPRIV flag if it has its elevator private data set. * Requests which doesn't have REQ_ELVPRIV flag set never enter iosched. They are always directly back inserted to dispatch queue. Of course, elevator_put_req_fn is called only for requests which have its REQ_ELVPRIV set. * Request queue maintains the current number of requests which have its elevator data set (elevator_set_req_fn called) in q->rq->elvpriv. * If a request queue has QUEUE_FLAG_BYPASS set, elevator private data is not allocated for new requests. To switch to another iosched, we set QUEUE_FLAG_BYPASS and wait until elvpriv goes to zero; then, we attach the new iosched and clears QUEUE_FLAG_BYPASS. New implementation is much simpler and main code paths are less cluttered, IMHO. Signed-off-by: Tejun Heo <htejun@gmail.com> Signed-off-by: Jens Axboe <axboe@suse.de>
Diffstat (limited to 'drivers/block/ll_rw_blk.c')
-rw-r--r--drivers/block/ll_rw_blk.c142
1 files changed, 27 insertions, 115 deletions
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index d2a66fd309c3..f7c9931cb380 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -263,8 +263,6 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
263 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); 263 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
264 264
265 blk_queue_activity_fn(q, NULL, NULL); 265 blk_queue_activity_fn(q, NULL, NULL);
266
267 INIT_LIST_HEAD(&q->drain_list);
268} 266}
269 267
270EXPORT_SYMBOL(blk_queue_make_request); 268EXPORT_SYMBOL(blk_queue_make_request);
@@ -1050,6 +1048,7 @@ static char *rq_flags[] = {
1050 "REQ_STARTED", 1048 "REQ_STARTED",
1051 "REQ_DONTPREP", 1049 "REQ_DONTPREP",
1052 "REQ_QUEUED", 1050 "REQ_QUEUED",
1051 "REQ_ELVPRIV",
1053 "REQ_PC", 1052 "REQ_PC",
1054 "REQ_BLOCK_PC", 1053 "REQ_BLOCK_PC",
1055 "REQ_SENSE", 1054 "REQ_SENSE",
@@ -1640,9 +1639,9 @@ static int blk_init_free_list(request_queue_t *q)
1640 1639
1641 rl->count[READ] = rl->count[WRITE] = 0; 1640 rl->count[READ] = rl->count[WRITE] = 0;
1642 rl->starved[READ] = rl->starved[WRITE] = 0; 1641 rl->starved[READ] = rl->starved[WRITE] = 0;
1642 rl->elvpriv = 0;
1643 init_waitqueue_head(&rl->wait[READ]); 1643 init_waitqueue_head(&rl->wait[READ]);
1644 init_waitqueue_head(&rl->wait[WRITE]); 1644 init_waitqueue_head(&rl->wait[WRITE]);
1645 init_waitqueue_head(&rl->drain);
1646 1645
1647 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 1646 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
1648 mempool_free_slab, request_cachep, q->node); 1647 mempool_free_slab, request_cachep, q->node);
@@ -1785,12 +1784,14 @@ EXPORT_SYMBOL(blk_get_queue);
1785 1784
1786static inline void blk_free_request(request_queue_t *q, struct request *rq) 1785static inline void blk_free_request(request_queue_t *q, struct request *rq)
1787{ 1786{
1788 elv_put_request(q, rq); 1787 if (rq->flags & REQ_ELVPRIV)
1788 elv_put_request(q, rq);
1789 mempool_free(rq, q->rq.rq_pool); 1789 mempool_free(rq, q->rq.rq_pool);
1790} 1790}
1791 1791
1792static inline struct request * 1792static inline struct request *
1793blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask) 1793blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
1794 int priv, int gfp_mask)
1794{ 1795{
1795 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 1796 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
1796 1797
@@ -1803,11 +1804,15 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask)
1803 */ 1804 */
1804 rq->flags = rw; 1805 rq->flags = rw;
1805 1806
1806 if (!elv_set_request(q, rq, bio, gfp_mask)) 1807 if (priv) {
1807 return rq; 1808 if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
1809 mempool_free(rq, q->rq.rq_pool);
1810 return NULL;
1811 }
1812 rq->flags |= REQ_ELVPRIV;
1813 }
1808 1814
1809 mempool_free(rq, q->rq.rq_pool); 1815 return rq;
1810 return NULL;
1811} 1816}
1812 1817
1813/* 1818/*
@@ -1863,22 +1868,18 @@ static void __freed_request(request_queue_t *q, int rw)
1863 * A request has just been released. Account for it, update the full and 1868 * A request has just been released. Account for it, update the full and
1864 * congestion status, wake up any waiters. Called under q->queue_lock. 1869 * congestion status, wake up any waiters. Called under q->queue_lock.
1865 */ 1870 */
1866static void freed_request(request_queue_t *q, int rw) 1871static void freed_request(request_queue_t *q, int rw, int priv)
1867{ 1872{
1868 struct request_list *rl = &q->rq; 1873 struct request_list *rl = &q->rq;
1869 1874
1870 rl->count[rw]--; 1875 rl->count[rw]--;
1876 if (priv)
1877 rl->elvpriv--;
1871 1878
1872 __freed_request(q, rw); 1879 __freed_request(q, rw);
1873 1880
1874 if (unlikely(rl->starved[rw ^ 1])) 1881 if (unlikely(rl->starved[rw ^ 1]))
1875 __freed_request(q, rw ^ 1); 1882 __freed_request(q, rw ^ 1);
1876
1877 if (!rl->count[READ] && !rl->count[WRITE]) {
1878 smp_mb();
1879 if (unlikely(waitqueue_active(&rl->drain)))
1880 wake_up(&rl->drain);
1881 }
1882} 1883}
1883 1884
1884#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) 1885#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
@@ -1893,9 +1894,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
1893 struct request *rq = NULL; 1894 struct request *rq = NULL;
1894 struct request_list *rl = &q->rq; 1895 struct request_list *rl = &q->rq;
1895 struct io_context *ioc = current_io_context(GFP_ATOMIC); 1896 struct io_context *ioc = current_io_context(GFP_ATOMIC);
1896 1897 int priv;
1897 if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)))
1898 goto out;
1899 1898
1900 if (rl->count[rw]+1 >= q->nr_requests) { 1899 if (rl->count[rw]+1 >= q->nr_requests) {
1901 /* 1900 /*
@@ -1940,9 +1939,14 @@ get_rq:
1940 rl->starved[rw] = 0; 1939 rl->starved[rw] = 0;
1941 if (rl->count[rw] >= queue_congestion_on_threshold(q)) 1940 if (rl->count[rw] >= queue_congestion_on_threshold(q))
1942 set_queue_congested(q, rw); 1941 set_queue_congested(q, rw);
1942
1943 priv = !test_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
1944 if (priv)
1945 rl->elvpriv++;
1946
1943 spin_unlock_irq(q->queue_lock); 1947 spin_unlock_irq(q->queue_lock);
1944 1948
1945 rq = blk_alloc_request(q, rw, bio, gfp_mask); 1949 rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
1946 if (!rq) { 1950 if (!rq) {
1947 /* 1951 /*
1948 * Allocation failed presumably due to memory. Undo anything 1952 * Allocation failed presumably due to memory. Undo anything
@@ -1952,7 +1956,7 @@ get_rq:
1952 * wait queue, but this is pretty rare. 1956 * wait queue, but this is pretty rare.
1953 */ 1957 */
1954 spin_lock_irq(q->queue_lock); 1958 spin_lock_irq(q->queue_lock);
1955 freed_request(q, rw); 1959 freed_request(q, rw, priv);
1956 1960
1957 /* 1961 /*
1958 * in the very unlikely event that allocation failed and no 1962 * in the very unlikely event that allocation failed and no
@@ -2470,11 +2474,12 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
2470 */ 2474 */
2471 if (rl) { 2475 if (rl) {
2472 int rw = rq_data_dir(req); 2476 int rw = rq_data_dir(req);
2477 int priv = req->flags & REQ_ELVPRIV;
2473 2478
2474 BUG_ON(!list_empty(&req->queuelist)); 2479 BUG_ON(!list_empty(&req->queuelist));
2475 2480
2476 blk_free_request(q, req); 2481 blk_free_request(q, req);
2477 freed_request(q, rw); 2482 freed_request(q, rw, priv);
2478 } 2483 }
2479} 2484}
2480 2485
@@ -2802,97 +2807,6 @@ static inline void blk_partition_remap(struct bio *bio)
2802 } 2807 }
2803} 2808}
2804 2809
2805void blk_finish_queue_drain(request_queue_t *q)
2806{
2807 struct request_list *rl = &q->rq;
2808 struct request *rq;
2809 int requeued = 0;
2810
2811 spin_lock_irq(q->queue_lock);
2812 clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
2813
2814 while (!list_empty(&q->drain_list)) {
2815 rq = list_entry_rq(q->drain_list.next);
2816
2817 list_del_init(&rq->queuelist);
2818 elv_requeue_request(q, rq);
2819 requeued++;
2820 }
2821
2822 if (requeued)
2823 q->request_fn(q);
2824
2825 spin_unlock_irq(q->queue_lock);
2826
2827 wake_up(&rl->wait[0]);
2828 wake_up(&rl->wait[1]);
2829 wake_up(&rl->drain);
2830}
2831
2832static int wait_drain(request_queue_t *q, struct request_list *rl, int dispatch)
2833{
2834 int wait = rl->count[READ] + rl->count[WRITE];
2835
2836 if (dispatch)
2837 wait += !list_empty(&q->queue_head);
2838
2839 return wait;
2840}
2841
2842/*
2843 * We rely on the fact that only requests allocated through blk_alloc_request()
2844 * have io scheduler private data structures associated with them. Any other
2845 * type of request (allocated on stack or through kmalloc()) should not go
2846 * to the io scheduler core, but be attached to the queue head instead.
2847 */
2848void blk_wait_queue_drained(request_queue_t *q, int wait_dispatch)
2849{
2850 struct request_list *rl = &q->rq;
2851 DEFINE_WAIT(wait);
2852
2853 spin_lock_irq(q->queue_lock);
2854 set_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
2855
2856 while (wait_drain(q, rl, wait_dispatch)) {
2857 prepare_to_wait(&rl->drain, &wait, TASK_UNINTERRUPTIBLE);
2858
2859 if (wait_drain(q, rl, wait_dispatch)) {
2860 __generic_unplug_device(q);
2861 spin_unlock_irq(q->queue_lock);
2862 io_schedule();
2863 spin_lock_irq(q->queue_lock);
2864 }
2865
2866 finish_wait(&rl->drain, &wait);
2867 }
2868
2869 spin_unlock_irq(q->queue_lock);
2870}
2871
2872/*
2873 * block waiting for the io scheduler being started again.
2874 */
2875static inline void block_wait_queue_running(request_queue_t *q)
2876{
2877 DEFINE_WAIT(wait);
2878
2879 while (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) {
2880 struct request_list *rl = &q->rq;
2881
2882 prepare_to_wait_exclusive(&rl->drain, &wait,
2883 TASK_UNINTERRUPTIBLE);
2884
2885 /*
2886 * re-check the condition. avoids using prepare_to_wait()
2887 * in the fast path (queue is running)
2888 */
2889 if (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))
2890 io_schedule();
2891
2892 finish_wait(&rl->drain, &wait);
2893 }
2894}
2895
2896static void handle_bad_sector(struct bio *bio) 2810static void handle_bad_sector(struct bio *bio)
2897{ 2811{
2898 char b[BDEVNAME_SIZE]; 2812 char b[BDEVNAME_SIZE];
@@ -2988,8 +2902,6 @@ end_io:
2988 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) 2902 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
2989 goto end_io; 2903 goto end_io;
2990 2904
2991 block_wait_queue_running(q);
2992
2993 /* 2905 /*
2994 * If this device has partitions, remap block n 2906 * If this device has partitions, remap block n
2995 * of partition p to block n+start(p) of the disk. 2907 * of partition p to block n+start(p) of the disk.