diff options
author | Jens Axboe <axboe@suse.de> | 2006-06-08 02:49:06 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-06-08 18:14:23 -0400 |
commit | bc1c116974a5c3f498112a6f175d3e4a8cd5bdbc (patch) | |
tree | 69ea68db91fb871cd24a0a5c5045abbe9c77bd3a /block | |
parent | 26e780e8ef1cc3ef581a07aafe2346bb5a07b4f9 (diff) |
[PATCH] elevator switching race
There's a race between shutting down one io scheduler and firing up the
next, in which a new io could enter and cause the io scheduler to be
invoked with bad or NULL data.
To fix this, we need to maintain the queue lock for a bit longer.
Unfortunately we cannot do that, since the elevator init requires to be
run without the lock held. This isn't easily fixable, without also
changing the mempool API. So split the initialization into two parts,
and alloc-init operation and an attach operation. Then we can
preallocate the io scheduler and related structures, and run the attach
inside the lock after we detach the old one.
This patch has survived 30 minutes of 1 second io scheduler switching
with a very busy io load.
Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'block')
-rw-r--r-- | block/as-iosched.c | 13 | ||||
-rw-r--r-- | block/cfq-iosched.c | 10 | ||||
-rw-r--r-- | block/deadline-iosched.c | 13 | ||||
-rw-r--r-- | block/elevator.c | 55 | ||||
-rw-r--r-- | block/noop-iosched.c | 7 |
5 files changed, 53 insertions, 45 deletions
diff --git a/block/as-iosched.c b/block/as-iosched.c index e25a5d79ab27..a7caf35ca0c2 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c | |||
@@ -1648,17 +1648,17 @@ static void as_exit_queue(elevator_t *e) | |||
1648 | * initialize elevator private data (as_data), and alloc a arq for | 1648 | * initialize elevator private data (as_data), and alloc a arq for |
1649 | * each request on the free lists | 1649 | * each request on the free lists |
1650 | */ | 1650 | */ |
1651 | static int as_init_queue(request_queue_t *q, elevator_t *e) | 1651 | static void *as_init_queue(request_queue_t *q, elevator_t *e) |
1652 | { | 1652 | { |
1653 | struct as_data *ad; | 1653 | struct as_data *ad; |
1654 | int i; | 1654 | int i; |
1655 | 1655 | ||
1656 | if (!arq_pool) | 1656 | if (!arq_pool) |
1657 | return -ENOMEM; | 1657 | return NULL; |
1658 | 1658 | ||
1659 | ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node); | 1659 | ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node); |
1660 | if (!ad) | 1660 | if (!ad) |
1661 | return -ENOMEM; | 1661 | return NULL; |
1662 | memset(ad, 0, sizeof(*ad)); | 1662 | memset(ad, 0, sizeof(*ad)); |
1663 | 1663 | ||
1664 | ad->q = q; /* Identify what queue the data belongs to */ | 1664 | ad->q = q; /* Identify what queue the data belongs to */ |
@@ -1667,7 +1667,7 @@ static int as_init_queue(request_queue_t *q, elevator_t *e) | |||
1667 | GFP_KERNEL, q->node); | 1667 | GFP_KERNEL, q->node); |
1668 | if (!ad->hash) { | 1668 | if (!ad->hash) { |
1669 | kfree(ad); | 1669 | kfree(ad); |
1670 | return -ENOMEM; | 1670 | return NULL; |
1671 | } | 1671 | } |
1672 | 1672 | ||
1673 | ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, | 1673 | ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, |
@@ -1675,7 +1675,7 @@ static int as_init_queue(request_queue_t *q, elevator_t *e) | |||
1675 | if (!ad->arq_pool) { | 1675 | if (!ad->arq_pool) { |
1676 | kfree(ad->hash); | 1676 | kfree(ad->hash); |
1677 | kfree(ad); | 1677 | kfree(ad); |
1678 | return -ENOMEM; | 1678 | return NULL; |
1679 | } | 1679 | } |
1680 | 1680 | ||
1681 | /* anticipatory scheduling helpers */ | 1681 | /* anticipatory scheduling helpers */ |
@@ -1696,14 +1696,13 @@ static int as_init_queue(request_queue_t *q, elevator_t *e) | |||
1696 | ad->antic_expire = default_antic_expire; | 1696 | ad->antic_expire = default_antic_expire; |
1697 | ad->batch_expire[REQ_SYNC] = default_read_batch_expire; | 1697 | ad->batch_expire[REQ_SYNC] = default_read_batch_expire; |
1698 | ad->batch_expire[REQ_ASYNC] = default_write_batch_expire; | 1698 | ad->batch_expire[REQ_ASYNC] = default_write_batch_expire; |
1699 | e->elevator_data = ad; | ||
1700 | 1699 | ||
1701 | ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC]; | 1700 | ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC]; |
1702 | ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10; | 1701 | ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10; |
1703 | if (ad->write_batch_count < 2) | 1702 | if (ad->write_batch_count < 2) |
1704 | ad->write_batch_count = 2; | 1703 | ad->write_batch_count = 2; |
1705 | 1704 | ||
1706 | return 0; | 1705 | return ad; |
1707 | } | 1706 | } |
1708 | 1707 | ||
1709 | /* | 1708 | /* |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 8e9d84825e1c..a46d030e092a 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -2251,14 +2251,14 @@ static void cfq_exit_queue(elevator_t *e) | |||
2251 | kfree(cfqd); | 2251 | kfree(cfqd); |
2252 | } | 2252 | } |
2253 | 2253 | ||
2254 | static int cfq_init_queue(request_queue_t *q, elevator_t *e) | 2254 | static void *cfq_init_queue(request_queue_t *q, elevator_t *e) |
2255 | { | 2255 | { |
2256 | struct cfq_data *cfqd; | 2256 | struct cfq_data *cfqd; |
2257 | int i; | 2257 | int i; |
2258 | 2258 | ||
2259 | cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); | 2259 | cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); |
2260 | if (!cfqd) | 2260 | if (!cfqd) |
2261 | return -ENOMEM; | 2261 | return NULL; |
2262 | 2262 | ||
2263 | memset(cfqd, 0, sizeof(*cfqd)); | 2263 | memset(cfqd, 0, sizeof(*cfqd)); |
2264 | 2264 | ||
@@ -2288,8 +2288,6 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e) | |||
2288 | for (i = 0; i < CFQ_QHASH_ENTRIES; i++) | 2288 | for (i = 0; i < CFQ_QHASH_ENTRIES; i++) |
2289 | INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); | 2289 | INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); |
2290 | 2290 | ||
2291 | e->elevator_data = cfqd; | ||
2292 | |||
2293 | cfqd->queue = q; | 2291 | cfqd->queue = q; |
2294 | 2292 | ||
2295 | cfqd->max_queued = q->nr_requests / 4; | 2293 | cfqd->max_queued = q->nr_requests / 4; |
@@ -2316,14 +2314,14 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e) | |||
2316 | cfqd->cfq_slice_async_rq = cfq_slice_async_rq; | 2314 | cfqd->cfq_slice_async_rq = cfq_slice_async_rq; |
2317 | cfqd->cfq_slice_idle = cfq_slice_idle; | 2315 | cfqd->cfq_slice_idle = cfq_slice_idle; |
2318 | 2316 | ||
2319 | return 0; | 2317 | return cfqd; |
2320 | out_crqpool: | 2318 | out_crqpool: |
2321 | kfree(cfqd->cfq_hash); | 2319 | kfree(cfqd->cfq_hash); |
2322 | out_cfqhash: | 2320 | out_cfqhash: |
2323 | kfree(cfqd->crq_hash); | 2321 | kfree(cfqd->crq_hash); |
2324 | out_crqhash: | 2322 | out_crqhash: |
2325 | kfree(cfqd); | 2323 | kfree(cfqd); |
2326 | return -ENOMEM; | 2324 | return NULL; |
2327 | } | 2325 | } |
2328 | 2326 | ||
2329 | static void cfq_slab_kill(void) | 2327 | static void cfq_slab_kill(void) |
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 399fa1e60e1f..3bd0415a9828 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c | |||
@@ -613,24 +613,24 @@ static void deadline_exit_queue(elevator_t *e) | |||
613 | * initialize elevator private data (deadline_data), and alloc a drq for | 613 | * initialize elevator private data (deadline_data), and alloc a drq for |
614 | * each request on the free lists | 614 | * each request on the free lists |
615 | */ | 615 | */ |
616 | static int deadline_init_queue(request_queue_t *q, elevator_t *e) | 616 | static void *deadline_init_queue(request_queue_t *q, elevator_t *e) |
617 | { | 617 | { |
618 | struct deadline_data *dd; | 618 | struct deadline_data *dd; |
619 | int i; | 619 | int i; |
620 | 620 | ||
621 | if (!drq_pool) | 621 | if (!drq_pool) |
622 | return -ENOMEM; | 622 | return NULL; |
623 | 623 | ||
624 | dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node); | 624 | dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node); |
625 | if (!dd) | 625 | if (!dd) |
626 | return -ENOMEM; | 626 | return NULL; |
627 | memset(dd, 0, sizeof(*dd)); | 627 | memset(dd, 0, sizeof(*dd)); |
628 | 628 | ||
629 | dd->hash = kmalloc_node(sizeof(struct list_head)*DL_HASH_ENTRIES, | 629 | dd->hash = kmalloc_node(sizeof(struct list_head)*DL_HASH_ENTRIES, |
630 | GFP_KERNEL, q->node); | 630 | GFP_KERNEL, q->node); |
631 | if (!dd->hash) { | 631 | if (!dd->hash) { |
632 | kfree(dd); | 632 | kfree(dd); |
633 | return -ENOMEM; | 633 | return NULL; |
634 | } | 634 | } |
635 | 635 | ||
636 | dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, | 636 | dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, |
@@ -638,7 +638,7 @@ static int deadline_init_queue(request_queue_t *q, elevator_t *e) | |||
638 | if (!dd->drq_pool) { | 638 | if (!dd->drq_pool) { |
639 | kfree(dd->hash); | 639 | kfree(dd->hash); |
640 | kfree(dd); | 640 | kfree(dd); |
641 | return -ENOMEM; | 641 | return NULL; |
642 | } | 642 | } |
643 | 643 | ||
644 | for (i = 0; i < DL_HASH_ENTRIES; i++) | 644 | for (i = 0; i < DL_HASH_ENTRIES; i++) |
@@ -653,8 +653,7 @@ static int deadline_init_queue(request_queue_t *q, elevator_t *e) | |||
653 | dd->writes_starved = writes_starved; | 653 | dd->writes_starved = writes_starved; |
654 | dd->front_merges = 1; | 654 | dd->front_merges = 1; |
655 | dd->fifo_batch = fifo_batch; | 655 | dd->fifo_batch = fifo_batch; |
656 | e->elevator_data = dd; | 656 | return dd; |
657 | return 0; | ||
658 | } | 657 | } |
659 | 658 | ||
660 | static void deadline_put_request(request_queue_t *q, struct request *rq) | 659 | static void deadline_put_request(request_queue_t *q, struct request *rq) |
diff --git a/block/elevator.c b/block/elevator.c index 8768a367fdde..a0afdd317cef 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -121,16 +121,16 @@ static struct elevator_type *elevator_get(const char *name) | |||
121 | return e; | 121 | return e; |
122 | } | 122 | } |
123 | 123 | ||
124 | static int elevator_attach(request_queue_t *q, struct elevator_queue *eq) | 124 | static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq) |
125 | { | 125 | { |
126 | int ret = 0; | 126 | return eq->ops->elevator_init_fn(q, eq); |
127 | } | ||
127 | 128 | ||
129 | static void elevator_attach(request_queue_t *q, struct elevator_queue *eq, | ||
130 | void *data) | ||
131 | { | ||
128 | q->elevator = eq; | 132 | q->elevator = eq; |
129 | 133 | eq->elevator_data = data; | |
130 | if (eq->ops->elevator_init_fn) | ||
131 | ret = eq->ops->elevator_init_fn(q, eq); | ||
132 | |||
133 | return ret; | ||
134 | } | 134 | } |
135 | 135 | ||
136 | static char chosen_elevator[16]; | 136 | static char chosen_elevator[16]; |
@@ -181,6 +181,7 @@ int elevator_init(request_queue_t *q, char *name) | |||
181 | struct elevator_type *e = NULL; | 181 | struct elevator_type *e = NULL; |
182 | struct elevator_queue *eq; | 182 | struct elevator_queue *eq; |
183 | int ret = 0; | 183 | int ret = 0; |
184 | void *data; | ||
184 | 185 | ||
185 | INIT_LIST_HEAD(&q->queue_head); | 186 | INIT_LIST_HEAD(&q->queue_head); |
186 | q->last_merge = NULL; | 187 | q->last_merge = NULL; |
@@ -202,10 +203,13 @@ int elevator_init(request_queue_t *q, char *name) | |||
202 | if (!eq) | 203 | if (!eq) |
203 | return -ENOMEM; | 204 | return -ENOMEM; |
204 | 205 | ||
205 | ret = elevator_attach(q, eq); | 206 | data = elevator_init_queue(q, eq); |
206 | if (ret) | 207 | if (!data) { |
207 | kobject_put(&eq->kobj); | 208 | kobject_put(&eq->kobj); |
209 | return -ENOMEM; | ||
210 | } | ||
208 | 211 | ||
212 | elevator_attach(q, eq, data); | ||
209 | return ret; | 213 | return ret; |
210 | } | 214 | } |
211 | 215 | ||
@@ -722,13 +726,16 @@ int elv_register_queue(struct request_queue *q) | |||
722 | return error; | 726 | return error; |
723 | } | 727 | } |
724 | 728 | ||
729 | static void __elv_unregister_queue(elevator_t *e) | ||
730 | { | ||
731 | kobject_uevent(&e->kobj, KOBJ_REMOVE); | ||
732 | kobject_del(&e->kobj); | ||
733 | } | ||
734 | |||
725 | void elv_unregister_queue(struct request_queue *q) | 735 | void elv_unregister_queue(struct request_queue *q) |
726 | { | 736 | { |
727 | if (q) { | 737 | if (q) |
728 | elevator_t *e = q->elevator; | 738 | __elv_unregister_queue(q->elevator); |
729 | kobject_uevent(&e->kobj, KOBJ_REMOVE); | ||
730 | kobject_del(&e->kobj); | ||
731 | } | ||
732 | } | 739 | } |
733 | 740 | ||
734 | int elv_register(struct elevator_type *e) | 741 | int elv_register(struct elevator_type *e) |
@@ -780,6 +787,7 @@ EXPORT_SYMBOL_GPL(elv_unregister); | |||
780 | static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) | 787 | static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) |
781 | { | 788 | { |
782 | elevator_t *old_elevator, *e; | 789 | elevator_t *old_elevator, *e; |
790 | void *data; | ||
783 | 791 | ||
784 | /* | 792 | /* |
785 | * Allocate new elevator | 793 | * Allocate new elevator |
@@ -788,6 +796,12 @@ static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) | |||
788 | if (!e) | 796 | if (!e) |
789 | return 0; | 797 | return 0; |
790 | 798 | ||
799 | data = elevator_init_queue(q, e); | ||
800 | if (!data) { | ||
801 | kobject_put(&e->kobj); | ||
802 | return 0; | ||
803 | } | ||
804 | |||
791 | /* | 805 | /* |
792 | * Turn on BYPASS and drain all requests w/ elevator private data | 806 | * Turn on BYPASS and drain all requests w/ elevator private data |
793 | */ | 807 | */ |
@@ -806,19 +820,19 @@ static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) | |||
806 | elv_drain_elevator(q); | 820 | elv_drain_elevator(q); |
807 | } | 821 | } |
808 | 822 | ||
809 | spin_unlock_irq(q->queue_lock); | ||
810 | |||
811 | /* | 823 | /* |
812 | * unregister old elevator data | 824 | * Remember old elevator. |
813 | */ | 825 | */ |
814 | elv_unregister_queue(q); | ||
815 | old_elevator = q->elevator; | 826 | old_elevator = q->elevator; |
816 | 827 | ||
817 | /* | 828 | /* |
818 | * attach and start new elevator | 829 | * attach and start new elevator |
819 | */ | 830 | */ |
820 | if (elevator_attach(q, e)) | 831 | elevator_attach(q, e, data); |
821 | goto fail; | 832 | |
833 | spin_unlock_irq(q->queue_lock); | ||
834 | |||
835 | __elv_unregister_queue(old_elevator); | ||
822 | 836 | ||
823 | if (elv_register_queue(q)) | 837 | if (elv_register_queue(q)) |
824 | goto fail_register; | 838 | goto fail_register; |
@@ -837,7 +851,6 @@ fail_register: | |||
837 | */ | 851 | */ |
838 | elevator_exit(e); | 852 | elevator_exit(e); |
839 | e = NULL; | 853 | e = NULL; |
840 | fail: | ||
841 | q->elevator = old_elevator; | 854 | q->elevator = old_elevator; |
842 | elv_register_queue(q); | 855 | elv_register_queue(q); |
843 | clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 856 | clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
diff --git a/block/noop-iosched.c b/block/noop-iosched.c index f370e4a7fe6d..56a7c620574f 100644 --- a/block/noop-iosched.c +++ b/block/noop-iosched.c | |||
@@ -65,16 +65,15 @@ noop_latter_request(request_queue_t *q, struct request *rq) | |||
65 | return list_entry(rq->queuelist.next, struct request, queuelist); | 65 | return list_entry(rq->queuelist.next, struct request, queuelist); |
66 | } | 66 | } |
67 | 67 | ||
68 | static int noop_init_queue(request_queue_t *q, elevator_t *e) | 68 | static void *noop_init_queue(request_queue_t *q, elevator_t *e) |
69 | { | 69 | { |
70 | struct noop_data *nd; | 70 | struct noop_data *nd; |
71 | 71 | ||
72 | nd = kmalloc(sizeof(*nd), GFP_KERNEL); | 72 | nd = kmalloc(sizeof(*nd), GFP_KERNEL); |
73 | if (!nd) | 73 | if (!nd) |
74 | return -ENOMEM; | 74 | return NULL; |
75 | INIT_LIST_HEAD(&nd->queue); | 75 | INIT_LIST_HEAD(&nd->queue); |
76 | e->elevator_data = nd; | 76 | return nd; |
77 | return 0; | ||
78 | } | 77 | } |
79 | 78 | ||
80 | static void noop_exit_queue(elevator_t *e) | 79 | static void noop_exit_queue(elevator_t *e) |