aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@suse.de>2006-06-08 02:49:06 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-08 18:14:23 -0400
commitbc1c116974a5c3f498112a6f175d3e4a8cd5bdbc (patch)
tree69ea68db91fb871cd24a0a5c5045abbe9c77bd3a
parent26e780e8ef1cc3ef581a07aafe2346bb5a07b4f9 (diff)
[PATCH] elevator switching race
There's a race between shutting down one io scheduler and firing up the next, in which a new io could enter and cause the io scheduler to be invoked with bad or NULL data. To fix this, we need to maintain the queue lock for a bit longer. Unfortunately we cannot do that, since the elevator init requires to be run without the lock held. This isn't easily fixable, without also changing the mempool API. So split the initialization into two parts, and alloc-init operation and an attach operation. Then we can preallocate the io scheduler and related structures, and run the attach inside the lock after we detach the old one. This patch has survived 30 minutes of 1 second io scheduler switching with a very busy io load. Signed-off-by: Jens Axboe <axboe@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--block/as-iosched.c13
-rw-r--r--block/cfq-iosched.c10
-rw-r--r--block/deadline-iosched.c13
-rw-r--r--block/elevator.c55
-rw-r--r--block/noop-iosched.c7
-rw-r--r--include/linux/elevator.h2
6 files changed, 54 insertions, 46 deletions
diff --git a/block/as-iosched.c b/block/as-iosched.c
index e25a5d79ab27..a7caf35ca0c2 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1648,17 +1648,17 @@ static void as_exit_queue(elevator_t *e)
1648 * initialize elevator private data (as_data), and alloc a arq for 1648 * initialize elevator private data (as_data), and alloc a arq for
1649 * each request on the free lists 1649 * each request on the free lists
1650 */ 1650 */
1651static int as_init_queue(request_queue_t *q, elevator_t *e) 1651static void *as_init_queue(request_queue_t *q, elevator_t *e)
1652{ 1652{
1653 struct as_data *ad; 1653 struct as_data *ad;
1654 int i; 1654 int i;
1655 1655
1656 if (!arq_pool) 1656 if (!arq_pool)
1657 return -ENOMEM; 1657 return NULL;
1658 1658
1659 ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node); 1659 ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node);
1660 if (!ad) 1660 if (!ad)
1661 return -ENOMEM; 1661 return NULL;
1662 memset(ad, 0, sizeof(*ad)); 1662 memset(ad, 0, sizeof(*ad));
1663 1663
1664 ad->q = q; /* Identify what queue the data belongs to */ 1664 ad->q = q; /* Identify what queue the data belongs to */
@@ -1667,7 +1667,7 @@ static int as_init_queue(request_queue_t *q, elevator_t *e)
1667 GFP_KERNEL, q->node); 1667 GFP_KERNEL, q->node);
1668 if (!ad->hash) { 1668 if (!ad->hash) {
1669 kfree(ad); 1669 kfree(ad);
1670 return -ENOMEM; 1670 return NULL;
1671 } 1671 }
1672 1672
1673 ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 1673 ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
@@ -1675,7 +1675,7 @@ static int as_init_queue(request_queue_t *q, elevator_t *e)
1675 if (!ad->arq_pool) { 1675 if (!ad->arq_pool) {
1676 kfree(ad->hash); 1676 kfree(ad->hash);
1677 kfree(ad); 1677 kfree(ad);
1678 return -ENOMEM; 1678 return NULL;
1679 } 1679 }
1680 1680
1681 /* anticipatory scheduling helpers */ 1681 /* anticipatory scheduling helpers */
@@ -1696,14 +1696,13 @@ static int as_init_queue(request_queue_t *q, elevator_t *e)
1696 ad->antic_expire = default_antic_expire; 1696 ad->antic_expire = default_antic_expire;
1697 ad->batch_expire[REQ_SYNC] = default_read_batch_expire; 1697 ad->batch_expire[REQ_SYNC] = default_read_batch_expire;
1698 ad->batch_expire[REQ_ASYNC] = default_write_batch_expire; 1698 ad->batch_expire[REQ_ASYNC] = default_write_batch_expire;
1699 e->elevator_data = ad;
1700 1699
1701 ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC]; 1700 ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC];
1702 ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10; 1701 ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10;
1703 if (ad->write_batch_count < 2) 1702 if (ad->write_batch_count < 2)
1704 ad->write_batch_count = 2; 1703 ad->write_batch_count = 2;
1705 1704
1706 return 0; 1705 return ad;
1707} 1706}
1708 1707
1709/* 1708/*
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 8e9d84825e1c..a46d030e092a 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2251,14 +2251,14 @@ static void cfq_exit_queue(elevator_t *e)
2251 kfree(cfqd); 2251 kfree(cfqd);
2252} 2252}
2253 2253
2254static int cfq_init_queue(request_queue_t *q, elevator_t *e) 2254static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
2255{ 2255{
2256 struct cfq_data *cfqd; 2256 struct cfq_data *cfqd;
2257 int i; 2257 int i;
2258 2258
2259 cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); 2259 cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
2260 if (!cfqd) 2260 if (!cfqd)
2261 return -ENOMEM; 2261 return NULL;
2262 2262
2263 memset(cfqd, 0, sizeof(*cfqd)); 2263 memset(cfqd, 0, sizeof(*cfqd));
2264 2264
@@ -2288,8 +2288,6 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e)
2288 for (i = 0; i < CFQ_QHASH_ENTRIES; i++) 2288 for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
2289 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); 2289 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
2290 2290
2291 e->elevator_data = cfqd;
2292
2293 cfqd->queue = q; 2291 cfqd->queue = q;
2294 2292
2295 cfqd->max_queued = q->nr_requests / 4; 2293 cfqd->max_queued = q->nr_requests / 4;
@@ -2316,14 +2314,14 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e)
2316 cfqd->cfq_slice_async_rq = cfq_slice_async_rq; 2314 cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
2317 cfqd->cfq_slice_idle = cfq_slice_idle; 2315 cfqd->cfq_slice_idle = cfq_slice_idle;
2318 2316
2319 return 0; 2317 return cfqd;
2320out_crqpool: 2318out_crqpool:
2321 kfree(cfqd->cfq_hash); 2319 kfree(cfqd->cfq_hash);
2322out_cfqhash: 2320out_cfqhash:
2323 kfree(cfqd->crq_hash); 2321 kfree(cfqd->crq_hash);
2324out_crqhash: 2322out_crqhash:
2325 kfree(cfqd); 2323 kfree(cfqd);
2326 return -ENOMEM; 2324 return NULL;
2327} 2325}
2328 2326
2329static void cfq_slab_kill(void) 2327static void cfq_slab_kill(void)
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 399fa1e60e1f..3bd0415a9828 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -613,24 +613,24 @@ static void deadline_exit_queue(elevator_t *e)
613 * initialize elevator private data (deadline_data), and alloc a drq for 613 * initialize elevator private data (deadline_data), and alloc a drq for
614 * each request on the free lists 614 * each request on the free lists
615 */ 615 */
616static int deadline_init_queue(request_queue_t *q, elevator_t *e) 616static void *deadline_init_queue(request_queue_t *q, elevator_t *e)
617{ 617{
618 struct deadline_data *dd; 618 struct deadline_data *dd;
619 int i; 619 int i;
620 620
621 if (!drq_pool) 621 if (!drq_pool)
622 return -ENOMEM; 622 return NULL;
623 623
624 dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node); 624 dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
625 if (!dd) 625 if (!dd)
626 return -ENOMEM; 626 return NULL;
627 memset(dd, 0, sizeof(*dd)); 627 memset(dd, 0, sizeof(*dd));
628 628
629 dd->hash = kmalloc_node(sizeof(struct list_head)*DL_HASH_ENTRIES, 629 dd->hash = kmalloc_node(sizeof(struct list_head)*DL_HASH_ENTRIES,
630 GFP_KERNEL, q->node); 630 GFP_KERNEL, q->node);
631 if (!dd->hash) { 631 if (!dd->hash) {
632 kfree(dd); 632 kfree(dd);
633 return -ENOMEM; 633 return NULL;
634 } 634 }
635 635
636 dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, 636 dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
@@ -638,7 +638,7 @@ static int deadline_init_queue(request_queue_t *q, elevator_t *e)
638 if (!dd->drq_pool) { 638 if (!dd->drq_pool) {
639 kfree(dd->hash); 639 kfree(dd->hash);
640 kfree(dd); 640 kfree(dd);
641 return -ENOMEM; 641 return NULL;
642 } 642 }
643 643
644 for (i = 0; i < DL_HASH_ENTRIES; i++) 644 for (i = 0; i < DL_HASH_ENTRIES; i++)
@@ -653,8 +653,7 @@ static int deadline_init_queue(request_queue_t *q, elevator_t *e)
653 dd->writes_starved = writes_starved; 653 dd->writes_starved = writes_starved;
654 dd->front_merges = 1; 654 dd->front_merges = 1;
655 dd->fifo_batch = fifo_batch; 655 dd->fifo_batch = fifo_batch;
656 e->elevator_data = dd; 656 return dd;
657 return 0;
658} 657}
659 658
660static void deadline_put_request(request_queue_t *q, struct request *rq) 659static void deadline_put_request(request_queue_t *q, struct request *rq)
diff --git a/block/elevator.c b/block/elevator.c
index 8768a367fdde..a0afdd317cef 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -121,16 +121,16 @@ static struct elevator_type *elevator_get(const char *name)
121 return e; 121 return e;
122} 122}
123 123
124static int elevator_attach(request_queue_t *q, struct elevator_queue *eq) 124static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq)
125{ 125{
126 int ret = 0; 126 return eq->ops->elevator_init_fn(q, eq);
127}
127 128
129static void elevator_attach(request_queue_t *q, struct elevator_queue *eq,
130 void *data)
131{
128 q->elevator = eq; 132 q->elevator = eq;
129 133 eq->elevator_data = data;
130 if (eq->ops->elevator_init_fn)
131 ret = eq->ops->elevator_init_fn(q, eq);
132
133 return ret;
134} 134}
135 135
136static char chosen_elevator[16]; 136static char chosen_elevator[16];
@@ -181,6 +181,7 @@ int elevator_init(request_queue_t *q, char *name)
181 struct elevator_type *e = NULL; 181 struct elevator_type *e = NULL;
182 struct elevator_queue *eq; 182 struct elevator_queue *eq;
183 int ret = 0; 183 int ret = 0;
184 void *data;
184 185
185 INIT_LIST_HEAD(&q->queue_head); 186 INIT_LIST_HEAD(&q->queue_head);
186 q->last_merge = NULL; 187 q->last_merge = NULL;
@@ -202,10 +203,13 @@ int elevator_init(request_queue_t *q, char *name)
202 if (!eq) 203 if (!eq)
203 return -ENOMEM; 204 return -ENOMEM;
204 205
205 ret = elevator_attach(q, eq); 206 data = elevator_init_queue(q, eq);
206 if (ret) 207 if (!data) {
207 kobject_put(&eq->kobj); 208 kobject_put(&eq->kobj);
209 return -ENOMEM;
210 }
208 211
212 elevator_attach(q, eq, data);
209 return ret; 213 return ret;
210} 214}
211 215
@@ -722,13 +726,16 @@ int elv_register_queue(struct request_queue *q)
722 return error; 726 return error;
723} 727}
724 728
729static void __elv_unregister_queue(elevator_t *e)
730{
731 kobject_uevent(&e->kobj, KOBJ_REMOVE);
732 kobject_del(&e->kobj);
733}
734
725void elv_unregister_queue(struct request_queue *q) 735void elv_unregister_queue(struct request_queue *q)
726{ 736{
727 if (q) { 737 if (q)
728 elevator_t *e = q->elevator; 738 __elv_unregister_queue(q->elevator);
729 kobject_uevent(&e->kobj, KOBJ_REMOVE);
730 kobject_del(&e->kobj);
731 }
732} 739}
733 740
734int elv_register(struct elevator_type *e) 741int elv_register(struct elevator_type *e)
@@ -780,6 +787,7 @@ EXPORT_SYMBOL_GPL(elv_unregister);
780static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) 787static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
781{ 788{
782 elevator_t *old_elevator, *e; 789 elevator_t *old_elevator, *e;
790 void *data;
783 791
784 /* 792 /*
785 * Allocate new elevator 793 * Allocate new elevator
@@ -788,6 +796,12 @@ static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
788 if (!e) 796 if (!e)
789 return 0; 797 return 0;
790 798
799 data = elevator_init_queue(q, e);
800 if (!data) {
801 kobject_put(&e->kobj);
802 return 0;
803 }
804
791 /* 805 /*
792 * Turn on BYPASS and drain all requests w/ elevator private data 806 * Turn on BYPASS and drain all requests w/ elevator private data
793 */ 807 */
@@ -806,19 +820,19 @@ static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
806 elv_drain_elevator(q); 820 elv_drain_elevator(q);
807 } 821 }
808 822
809 spin_unlock_irq(q->queue_lock);
810
811 /* 823 /*
812 * unregister old elevator data 824 * Remember old elevator.
813 */ 825 */
814 elv_unregister_queue(q);
815 old_elevator = q->elevator; 826 old_elevator = q->elevator;
816 827
817 /* 828 /*
818 * attach and start new elevator 829 * attach and start new elevator
819 */ 830 */
820 if (elevator_attach(q, e)) 831 elevator_attach(q, e, data);
821 goto fail; 832
833 spin_unlock_irq(q->queue_lock);
834
835 __elv_unregister_queue(old_elevator);
822 836
823 if (elv_register_queue(q)) 837 if (elv_register_queue(q))
824 goto fail_register; 838 goto fail_register;
@@ -837,7 +851,6 @@ fail_register:
837 */ 851 */
838 elevator_exit(e); 852 elevator_exit(e);
839 e = NULL; 853 e = NULL;
840fail:
841 q->elevator = old_elevator; 854 q->elevator = old_elevator;
842 elv_register_queue(q); 855 elv_register_queue(q);
843 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 856 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index f370e4a7fe6d..56a7c620574f 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -65,16 +65,15 @@ noop_latter_request(request_queue_t *q, struct request *rq)
65 return list_entry(rq->queuelist.next, struct request, queuelist); 65 return list_entry(rq->queuelist.next, struct request, queuelist);
66} 66}
67 67
68static int noop_init_queue(request_queue_t *q, elevator_t *e) 68static void *noop_init_queue(request_queue_t *q, elevator_t *e)
69{ 69{
70 struct noop_data *nd; 70 struct noop_data *nd;
71 71
72 nd = kmalloc(sizeof(*nd), GFP_KERNEL); 72 nd = kmalloc(sizeof(*nd), GFP_KERNEL);
73 if (!nd) 73 if (!nd)
74 return -ENOMEM; 74 return NULL;
75 INIT_LIST_HEAD(&nd->queue); 75 INIT_LIST_HEAD(&nd->queue);
76 e->elevator_data = nd; 76 return nd;
77 return 0;
78} 77}
79 78
80static void noop_exit_queue(elevator_t *e) 79static void noop_exit_queue(elevator_t *e)
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index ad133fcfb239..1713ace808bf 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -21,7 +21,7 @@ typedef void (elevator_put_req_fn) (request_queue_t *, struct request *);
21typedef void (elevator_activate_req_fn) (request_queue_t *, struct request *); 21typedef void (elevator_activate_req_fn) (request_queue_t *, struct request *);
22typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *); 22typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *);
23 23
24typedef int (elevator_init_fn) (request_queue_t *, elevator_t *); 24typedef void *(elevator_init_fn) (request_queue_t *, elevator_t *);
25typedef void (elevator_exit_fn) (elevator_t *); 25typedef void (elevator_exit_fn) (elevator_t *);
26 26
27struct elevator_ops 27struct elevator_ops