aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorTejun Heo <htejun@gmail.com>2005-10-28 02:29:39 -0400
committerJens Axboe <axboe@nelson.home.kernel.dk>2005-10-28 02:48:12 -0400
commitcb98fc8bb9c141009e2bda99c0db39d387e142cf (patch)
tree8957f8a79f39c3e6633a0dbb165ced8b530aca0c /drivers
parentcb19833dccb32f97cacbfff834b53523915f13f6 (diff)
[BLOCK] Reimplement elevator switch
This patch reimplements elevator switch. This patch assumes generic dispatch queue patchset is applied. * Each request is tagged with REQ_ELVPRIV flag if it has its elevator private data set. * Requests which doesn't have REQ_ELVPRIV flag set never enter iosched. They are always directly back inserted to dispatch queue. Of course, elevator_put_req_fn is called only for requests which have its REQ_ELVPRIV set. * Request queue maintains the current number of requests which have its elevator data set (elevator_set_req_fn called) in q->rq->elvpriv. * If a request queue has QUEUE_FLAG_BYPASS set, elevator private data is not allocated for new requests. To switch to another iosched, we set QUEUE_FLAG_BYPASS and wait until elvpriv goes to zero; then, we attach the new iosched and clears QUEUE_FLAG_BYPASS. New implementation is much simpler and main code paths are less cluttered, IMHO. Signed-off-by: Tejun Heo <htejun@gmail.com> Signed-off-by: Jens Axboe <axboe@suse.de>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/elevator.c78
-rw-r--r--drivers/block/ll_rw_blk.c142
2 files changed, 62 insertions, 158 deletions
diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c
index af2388e73f61..272d93946621 100644
--- a/drivers/block/elevator.c
+++ b/drivers/block/elevator.c
@@ -34,6 +34,7 @@
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/compiler.h> 36#include <linux/compiler.h>
37#include <linux/delay.h>
37 38
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
39 40
@@ -131,11 +132,7 @@ static int elevator_attach(request_queue_t *q, struct elevator_type *e,
131 eq->ops = &e->ops; 132 eq->ops = &e->ops;
132 eq->elevator_type = e; 133 eq->elevator_type = e;
133 134
134 INIT_LIST_HEAD(&q->queue_head);
135 q->last_merge = NULL;
136 q->elevator = eq; 135 q->elevator = eq;
137 q->end_sector = 0;
138 q->boundary_rq = NULL;
139 136
140 if (eq->ops->elevator_init_fn) 137 if (eq->ops->elevator_init_fn)
141 ret = eq->ops->elevator_init_fn(q, eq); 138 ret = eq->ops->elevator_init_fn(q, eq);
@@ -184,6 +181,12 @@ int elevator_init(request_queue_t *q, char *name)
184 struct elevator_queue *eq; 181 struct elevator_queue *eq;
185 int ret = 0; 182 int ret = 0;
186 183
184 INIT_LIST_HEAD(&q->queue_head);
185 q->last_merge = NULL;
186 q->end_sector = 0;
187 q->boundary_rq = NULL;
188 q->max_back_kb = 0;
189
187 elevator_setup_default(); 190 elevator_setup_default();
188 191
189 if (!name) 192 if (!name)
@@ -336,23 +339,14 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
336 q->end_sector = rq_end_sector(rq); 339 q->end_sector = rq_end_sector(rq);
337 q->boundary_rq = rq; 340 q->boundary_rq = rq;
338 } 341 }
339 } 342 } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
343 where = ELEVATOR_INSERT_BACK;
340 344
341 if (plug) 345 if (plug)
342 blk_plug_device(q); 346 blk_plug_device(q);
343 347
344 rq->q = q; 348 rq->q = q;
345 349
346 if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) {
347 /*
348 * if drain is set, store the request "locally". when the drain
349 * is finished, the requests will be handed ordered to the io
350 * scheduler
351 */
352 list_add_tail(&rq->queuelist, &q->drain_list);
353 return;
354 }
355
356 switch (where) { 350 switch (where) {
357 case ELEVATOR_INSERT_FRONT: 351 case ELEVATOR_INSERT_FRONT:
358 rq->flags |= REQ_SOFTBARRIER; 352 rq->flags |= REQ_SOFTBARRIER;
@@ -659,25 +653,36 @@ EXPORT_SYMBOL_GPL(elv_unregister);
659 * switch to new_e io scheduler. be careful not to introduce deadlocks - 653 * switch to new_e io scheduler. be careful not to introduce deadlocks -
660 * we don't free the old io scheduler, before we have allocated what we 654 * we don't free the old io scheduler, before we have allocated what we
661 * need for the new one. this way we have a chance of going back to the old 655 * need for the new one. this way we have a chance of going back to the old
662 * one, if the new one fails init for some reason. we also do an intermediate 656 * one, if the new one fails init for some reason.
663 * switch to noop to ensure safety with stack-allocated requests, since they
664 * don't originate from the block layer allocator. noop is safe here, because
665 * it never needs to touch the elevator itself for completion events. DRAIN
666 * flags will make sure we don't touch it for additions either.
667 */ 657 */
668static void elevator_switch(request_queue_t *q, struct elevator_type *new_e) 658static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
669{ 659{
670 elevator_t *e = kmalloc(sizeof(elevator_t), GFP_KERNEL); 660 elevator_t *old_elevator, *e;
671 struct elevator_type *noop_elevator = NULL;
672 elevator_t *old_elevator;
673 661
662 /*
663 * Allocate new elevator
664 */
665 e = kmalloc(sizeof(elevator_t), GFP_KERNEL);
674 if (!e) 666 if (!e)
675 goto error; 667 goto error;
676 668
677 /* 669 /*
678 * first step, drain requests from the block freelist 670 * Turn on BYPASS and drain all requests w/ elevator private data
679 */ 671 */
680 blk_wait_queue_drained(q, 0); 672 spin_lock_irq(q->queue_lock);
673
674 set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
675
676 while (q->elevator->ops->elevator_dispatch_fn(q, 1))
677 ;
678
679 while (q->rq.elvpriv) {
680 spin_unlock_irq(q->queue_lock);
681 msleep(100);
682 spin_lock_irq(q->queue_lock);
683 }
684
685 spin_unlock_irq(q->queue_lock);
681 686
682 /* 687 /*
683 * unregister old elevator data 688 * unregister old elevator data
@@ -686,18 +691,6 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
686 old_elevator = q->elevator; 691 old_elevator = q->elevator;
687 692
688 /* 693 /*
689 * next step, switch to noop since it uses no private rq structures
690 * and doesn't allocate any memory for anything. then wait for any
691 * non-fs requests in-flight
692 */
693 noop_elevator = elevator_get("noop");
694 spin_lock_irq(q->queue_lock);
695 elevator_attach(q, noop_elevator, e);
696 spin_unlock_irq(q->queue_lock);
697
698 blk_wait_queue_drained(q, 1);
699
700 /*
701 * attach and start new elevator 694 * attach and start new elevator
702 */ 695 */
703 if (elevator_attach(q, new_e, e)) 696 if (elevator_attach(q, new_e, e))
@@ -707,11 +700,10 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
707 goto fail_register; 700 goto fail_register;
708 701
709 /* 702 /*
710 * finally exit old elevator and start queue again 703 * finally exit old elevator and turn off BYPASS.
711 */ 704 */
712 elevator_exit(old_elevator); 705 elevator_exit(old_elevator);
713 blk_finish_queue_drain(q); 706 clear_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
714 elevator_put(noop_elevator);
715 return; 707 return;
716 708
717fail_register: 709fail_register:
@@ -720,13 +712,13 @@ fail_register:
720 * one again (along with re-adding the sysfs dir) 712 * one again (along with re-adding the sysfs dir)
721 */ 713 */
722 elevator_exit(e); 714 elevator_exit(e);
715 e = NULL;
723fail: 716fail:
724 q->elevator = old_elevator; 717 q->elevator = old_elevator;
725 elv_register_queue(q); 718 elv_register_queue(q);
726 blk_finish_queue_drain(q); 719 clear_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
720 kfree(e);
727error: 721error:
728 if (noop_elevator)
729 elevator_put(noop_elevator);
730 elevator_put(new_e); 722 elevator_put(new_e);
731 printk(KERN_ERR "elevator: switch to %s failed\n",new_e->elevator_name); 723 printk(KERN_ERR "elevator: switch to %s failed\n",new_e->elevator_name);
732} 724}
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index d2a66fd309c3..f7c9931cb380 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -263,8 +263,6 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
263 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); 263 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
264 264
265 blk_queue_activity_fn(q, NULL, NULL); 265 blk_queue_activity_fn(q, NULL, NULL);
266
267 INIT_LIST_HEAD(&q->drain_list);
268} 266}
269 267
270EXPORT_SYMBOL(blk_queue_make_request); 268EXPORT_SYMBOL(blk_queue_make_request);
@@ -1050,6 +1048,7 @@ static char *rq_flags[] = {
1050 "REQ_STARTED", 1048 "REQ_STARTED",
1051 "REQ_DONTPREP", 1049 "REQ_DONTPREP",
1052 "REQ_QUEUED", 1050 "REQ_QUEUED",
1051 "REQ_ELVPRIV",
1053 "REQ_PC", 1052 "REQ_PC",
1054 "REQ_BLOCK_PC", 1053 "REQ_BLOCK_PC",
1055 "REQ_SENSE", 1054 "REQ_SENSE",
@@ -1640,9 +1639,9 @@ static int blk_init_free_list(request_queue_t *q)
1640 1639
1641 rl->count[READ] = rl->count[WRITE] = 0; 1640 rl->count[READ] = rl->count[WRITE] = 0;
1642 rl->starved[READ] = rl->starved[WRITE] = 0; 1641 rl->starved[READ] = rl->starved[WRITE] = 0;
1642 rl->elvpriv = 0;
1643 init_waitqueue_head(&rl->wait[READ]); 1643 init_waitqueue_head(&rl->wait[READ]);
1644 init_waitqueue_head(&rl->wait[WRITE]); 1644 init_waitqueue_head(&rl->wait[WRITE]);
1645 init_waitqueue_head(&rl->drain);
1646 1645
1647 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 1646 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
1648 mempool_free_slab, request_cachep, q->node); 1647 mempool_free_slab, request_cachep, q->node);
@@ -1785,12 +1784,14 @@ EXPORT_SYMBOL(blk_get_queue);
1785 1784
1786static inline void blk_free_request(request_queue_t *q, struct request *rq) 1785static inline void blk_free_request(request_queue_t *q, struct request *rq)
1787{ 1786{
1788 elv_put_request(q, rq); 1787 if (rq->flags & REQ_ELVPRIV)
1788 elv_put_request(q, rq);
1789 mempool_free(rq, q->rq.rq_pool); 1789 mempool_free(rq, q->rq.rq_pool);
1790} 1790}
1791 1791
1792static inline struct request * 1792static inline struct request *
1793blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask) 1793blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
1794 int priv, int gfp_mask)
1794{ 1795{
1795 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 1796 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
1796 1797
@@ -1803,11 +1804,15 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask)
1803 */ 1804 */
1804 rq->flags = rw; 1805 rq->flags = rw;
1805 1806
1806 if (!elv_set_request(q, rq, bio, gfp_mask)) 1807 if (priv) {
1807 return rq; 1808 if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
1809 mempool_free(rq, q->rq.rq_pool);
1810 return NULL;
1811 }
1812 rq->flags |= REQ_ELVPRIV;
1813 }
1808 1814
1809 mempool_free(rq, q->rq.rq_pool); 1815 return rq;
1810 return NULL;
1811} 1816}
1812 1817
1813/* 1818/*
@@ -1863,22 +1868,18 @@ static void __freed_request(request_queue_t *q, int rw)
1863 * A request has just been released. Account for it, update the full and 1868 * A request has just been released. Account for it, update the full and
1864 * congestion status, wake up any waiters. Called under q->queue_lock. 1869 * congestion status, wake up any waiters. Called under q->queue_lock.
1865 */ 1870 */
1866static void freed_request(request_queue_t *q, int rw) 1871static void freed_request(request_queue_t *q, int rw, int priv)
1867{ 1872{
1868 struct request_list *rl = &q->rq; 1873 struct request_list *rl = &q->rq;
1869 1874
1870 rl->count[rw]--; 1875 rl->count[rw]--;
1876 if (priv)
1877 rl->elvpriv--;
1871 1878
1872 __freed_request(q, rw); 1879 __freed_request(q, rw);
1873 1880
1874 if (unlikely(rl->starved[rw ^ 1])) 1881 if (unlikely(rl->starved[rw ^ 1]))
1875 __freed_request(q, rw ^ 1); 1882 __freed_request(q, rw ^ 1);
1876
1877 if (!rl->count[READ] && !rl->count[WRITE]) {
1878 smp_mb();
1879 if (unlikely(waitqueue_active(&rl->drain)))
1880 wake_up(&rl->drain);
1881 }
1882} 1883}
1883 1884
1884#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) 1885#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
@@ -1893,9 +1894,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
1893 struct request *rq = NULL; 1894 struct request *rq = NULL;
1894 struct request_list *rl = &q->rq; 1895 struct request_list *rl = &q->rq;
1895 struct io_context *ioc = current_io_context(GFP_ATOMIC); 1896 struct io_context *ioc = current_io_context(GFP_ATOMIC);
1896 1897 int priv;
1897 if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)))
1898 goto out;
1899 1898
1900 if (rl->count[rw]+1 >= q->nr_requests) { 1899 if (rl->count[rw]+1 >= q->nr_requests) {
1901 /* 1900 /*
@@ -1940,9 +1939,14 @@ get_rq:
1940 rl->starved[rw] = 0; 1939 rl->starved[rw] = 0;
1941 if (rl->count[rw] >= queue_congestion_on_threshold(q)) 1940 if (rl->count[rw] >= queue_congestion_on_threshold(q))
1942 set_queue_congested(q, rw); 1941 set_queue_congested(q, rw);
1942
1943 priv = !test_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
1944 if (priv)
1945 rl->elvpriv++;
1946
1943 spin_unlock_irq(q->queue_lock); 1947 spin_unlock_irq(q->queue_lock);
1944 1948
1945 rq = blk_alloc_request(q, rw, bio, gfp_mask); 1949 rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
1946 if (!rq) { 1950 if (!rq) {
1947 /* 1951 /*
1948 * Allocation failed presumably due to memory. Undo anything 1952 * Allocation failed presumably due to memory. Undo anything
@@ -1952,7 +1956,7 @@ get_rq:
1952 * wait queue, but this is pretty rare. 1956 * wait queue, but this is pretty rare.
1953 */ 1957 */
1954 spin_lock_irq(q->queue_lock); 1958 spin_lock_irq(q->queue_lock);
1955 freed_request(q, rw); 1959 freed_request(q, rw, priv);
1956 1960
1957 /* 1961 /*
1958 * in the very unlikely event that allocation failed and no 1962 * in the very unlikely event that allocation failed and no
@@ -2470,11 +2474,12 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
2470 */ 2474 */
2471 if (rl) { 2475 if (rl) {
2472 int rw = rq_data_dir(req); 2476 int rw = rq_data_dir(req);
2477 int priv = req->flags & REQ_ELVPRIV;
2473 2478
2474 BUG_ON(!list_empty(&req->queuelist)); 2479 BUG_ON(!list_empty(&req->queuelist));
2475 2480
2476 blk_free_request(q, req); 2481 blk_free_request(q, req);
2477 freed_request(q, rw); 2482 freed_request(q, rw, priv);
2478 } 2483 }
2479} 2484}
2480 2485
@@ -2802,97 +2807,6 @@ static inline void blk_partition_remap(struct bio *bio)
2802 } 2807 }
2803} 2808}
2804 2809
2805void blk_finish_queue_drain(request_queue_t *q)
2806{
2807 struct request_list *rl = &q->rq;
2808 struct request *rq;
2809 int requeued = 0;
2810
2811 spin_lock_irq(q->queue_lock);
2812 clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
2813
2814 while (!list_empty(&q->drain_list)) {
2815 rq = list_entry_rq(q->drain_list.next);
2816
2817 list_del_init(&rq->queuelist);
2818 elv_requeue_request(q, rq);
2819 requeued++;
2820 }
2821
2822 if (requeued)
2823 q->request_fn(q);
2824
2825 spin_unlock_irq(q->queue_lock);
2826
2827 wake_up(&rl->wait[0]);
2828 wake_up(&rl->wait[1]);
2829 wake_up(&rl->drain);
2830}
2831
2832static int wait_drain(request_queue_t *q, struct request_list *rl, int dispatch)
2833{
2834 int wait = rl->count[READ] + rl->count[WRITE];
2835
2836 if (dispatch)
2837 wait += !list_empty(&q->queue_head);
2838
2839 return wait;
2840}
2841
2842/*
2843 * We rely on the fact that only requests allocated through blk_alloc_request()
2844 * have io scheduler private data structures associated with them. Any other
2845 * type of request (allocated on stack or through kmalloc()) should not go
2846 * to the io scheduler core, but be attached to the queue head instead.
2847 */
2848void blk_wait_queue_drained(request_queue_t *q, int wait_dispatch)
2849{
2850 struct request_list *rl = &q->rq;
2851 DEFINE_WAIT(wait);
2852
2853 spin_lock_irq(q->queue_lock);
2854 set_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
2855
2856 while (wait_drain(q, rl, wait_dispatch)) {
2857 prepare_to_wait(&rl->drain, &wait, TASK_UNINTERRUPTIBLE);
2858
2859 if (wait_drain(q, rl, wait_dispatch)) {
2860 __generic_unplug_device(q);
2861 spin_unlock_irq(q->queue_lock);
2862 io_schedule();
2863 spin_lock_irq(q->queue_lock);
2864 }
2865
2866 finish_wait(&rl->drain, &wait);
2867 }
2868
2869 spin_unlock_irq(q->queue_lock);
2870}
2871
2872/*
2873 * block waiting for the io scheduler being started again.
2874 */
2875static inline void block_wait_queue_running(request_queue_t *q)
2876{
2877 DEFINE_WAIT(wait);
2878
2879 while (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) {
2880 struct request_list *rl = &q->rq;
2881
2882 prepare_to_wait_exclusive(&rl->drain, &wait,
2883 TASK_UNINTERRUPTIBLE);
2884
2885 /*
2886 * re-check the condition. avoids using prepare_to_wait()
2887 * in the fast path (queue is running)
2888 */
2889 if (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))
2890 io_schedule();
2891
2892 finish_wait(&rl->drain, &wait);
2893 }
2894}
2895
2896static void handle_bad_sector(struct bio *bio) 2810static void handle_bad_sector(struct bio *bio)
2897{ 2811{
2898 char b[BDEVNAME_SIZE]; 2812 char b[BDEVNAME_SIZE];
@@ -2988,8 +2902,6 @@ end_io:
2988 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) 2902 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
2989 goto end_io; 2903 goto end_io;
2990 2904
2991 block_wait_queue_running(q);
2992
2993 /* 2905 /*
2994 * If this device has partitions, remap block n 2906 * If this device has partitions, remap block n
2995 * of partition p to block n+start(p) of the disk. 2907 * of partition p to block n+start(p) of the disk.