aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2010-10-26 03:31:13 -0400
committerNeilBrown <neilb@suse.de>2010-10-28 02:36:15 -0400
commita167f663243662aa9153c01086580a11cde9ffdc (patch)
tree886e64787421bbf17a1eab7853d67258b598f050
parent2b193363ef68667ad717a6723165e0dccf99470f (diff)
md: use separate bio pool for each md device.
bio_clone and bio_alloc allocate from a common bio pool. If an md device is stacked with other devices that use this pool, or under something like swap which uses the pool, then the multiple calls on the pool can cause deadlocks. So allocate a local bio pool for each md array and use that rather than the common pool. This pool is used both for regular IO and metadata updates. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/faulty.c2
-rw-r--r--drivers/md/md.c81
-rw-r--r--drivers/md/md.h6
-rw-r--r--drivers/md/raid1.c7
-rw-r--r--drivers/md/raid10.c7
-rw-r--r--drivers/md/raid5.c4
6 files changed, 95 insertions, 12 deletions
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c
index 1a8987884614..339fdc670751 100644
--- a/drivers/md/faulty.c
+++ b/drivers/md/faulty.c
@@ -210,7 +210,7 @@ static int make_request(mddev_t *mddev, struct bio *bio)
210 } 210 }
211 } 211 }
212 if (failit) { 212 if (failit) {
213 struct bio *b = bio_clone(bio, GFP_NOIO); 213 struct bio *b = bio_clone_mddev(bio, GFP_NOIO, mddev);
214 b->bi_bdev = conf->rdev->bdev; 214 b->bi_bdev = conf->rdev->bdev;
215 b->bi_private = bio; 215 b->bi_private = bio;
216 b->bi_end_io = faulty_fail; 216 b->bi_end_io = faulty_fail;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 3149bf33f1c5..4e957f3140a8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -149,6 +149,72 @@ static const struct block_device_operations md_fops;
149 149
150static int start_readonly; 150static int start_readonly;
151 151
152/* bio_clone_mddev
153 * like bio_clone, but with a local bio set
154 */
155
156static void mddev_bio_destructor(struct bio *bio)
157{
158 mddev_t *mddev, **mddevp;
159
160 mddevp = (void*)bio;
161 mddev = mddevp[-1];
162
163 bio_free(bio, mddev->bio_set);
164}
165
166struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
167 mddev_t *mddev)
168{
169 struct bio *b;
170 mddev_t **mddevp;
171
172 if (!mddev || !mddev->bio_set)
173 return bio_alloc(gfp_mask, nr_iovecs);
174
175 b = bio_alloc_bioset(gfp_mask, nr_iovecs,
176 mddev->bio_set);
177 if (!b)
178 return NULL;
179 mddevp = (void*)b;
180 mddevp[-1] = mddev;
181 b->bi_destructor = mddev_bio_destructor;
182 return b;
183}
184EXPORT_SYMBOL_GPL(bio_alloc_mddev);
185
186struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
187 mddev_t *mddev)
188{
189 struct bio *b;
190 mddev_t **mddevp;
191
192 if (!mddev || !mddev->bio_set)
193 return bio_clone(bio, gfp_mask);
194
195 b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs,
196 mddev->bio_set);
197 if (!b)
198 return NULL;
199 mddevp = (void*)b;
200 mddevp[-1] = mddev;
201 b->bi_destructor = mddev_bio_destructor;
202 __bio_clone(b, bio);
203 if (bio_integrity(bio)) {
204 int ret;
205
206 ret = bio_integrity_clone(b, bio, gfp_mask, mddev->bio_set);
207
208 if (ret < 0) {
209 bio_put(b);
210 return NULL;
211 }
212 }
213
214 return b;
215}
216EXPORT_SYMBOL_GPL(bio_clone_mddev);
217
152/* 218/*
153 * We have a system wide 'event count' that is incremented 219 * We have a system wide 'event count' that is incremented
154 * on any 'interesting' event, and readers of /proc/mdstat 220 * on any 'interesting' event, and readers of /proc/mdstat
@@ -321,7 +387,7 @@ static void submit_flushes(mddev_t *mddev)
321 atomic_inc(&rdev->nr_pending); 387 atomic_inc(&rdev->nr_pending);
322 atomic_inc(&rdev->nr_pending); 388 atomic_inc(&rdev->nr_pending);
323 rcu_read_unlock(); 389 rcu_read_unlock();
324 bi = bio_alloc(GFP_KERNEL, 0); 390 bi = bio_alloc_mddev(GFP_KERNEL, 0, mddev);
325 bi->bi_end_io = md_end_flush; 391 bi->bi_end_io = md_end_flush;
326 bi->bi_private = rdev; 392 bi->bi_private = rdev;
327 bi->bi_bdev = rdev->bdev; 393 bi->bi_bdev = rdev->bdev;
@@ -428,6 +494,8 @@ static void mddev_delayed_delete(struct work_struct *ws);
428 494
429static void mddev_put(mddev_t *mddev) 495static void mddev_put(mddev_t *mddev)
430{ 496{
497 struct bio_set *bs = NULL;
498
431 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) 499 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
432 return; 500 return;
433 if (!mddev->raid_disks && list_empty(&mddev->disks) && 501 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
@@ -435,6 +503,8 @@ static void mddev_put(mddev_t *mddev)
435 /* Array is not configured at all, and not held active, 503 /* Array is not configured at all, and not held active,
436 * so destroy it */ 504 * so destroy it */
437 list_del(&mddev->all_mddevs); 505 list_del(&mddev->all_mddevs);
506 bs = mddev->bio_set;
507 mddev->bio_set = NULL;
438 if (mddev->gendisk) { 508 if (mddev->gendisk) {
439 /* We did a probe so need to clean up. Call 509 /* We did a probe so need to clean up. Call
440 * queue_work inside the spinlock so that 510 * queue_work inside the spinlock so that
@@ -447,6 +517,8 @@ static void mddev_put(mddev_t *mddev)
447 kfree(mddev); 517 kfree(mddev);
448 } 518 }
449 spin_unlock(&all_mddevs_lock); 519 spin_unlock(&all_mddevs_lock);
520 if (bs)
521 bioset_free(bs);
450} 522}
451 523
452void mddev_init(mddev_t *mddev) 524void mddev_init(mddev_t *mddev)
@@ -690,7 +762,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
690 * if zero is reached. 762 * if zero is reached.
691 * If an error occurred, call md_error 763 * If an error occurred, call md_error
692 */ 764 */
693 struct bio *bio = bio_alloc(GFP_NOIO, 1); 765 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
694 766
695 bio->bi_bdev = rdev->bdev; 767 bio->bi_bdev = rdev->bdev;
696 bio->bi_sector = sector; 768 bio->bi_sector = sector;
@@ -724,7 +796,7 @@ static void bi_complete(struct bio *bio, int error)
724int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size, 796int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
725 struct page *page, int rw) 797 struct page *page, int rw)
726{ 798{
727 struct bio *bio = bio_alloc(GFP_NOIO, 1); 799 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
728 struct completion event; 800 struct completion event;
729 int ret; 801 int ret;
730 802
@@ -4379,6 +4451,9 @@ int md_run(mddev_t *mddev)
4379 sysfs_notify_dirent_safe(rdev->sysfs_state); 4451 sysfs_notify_dirent_safe(rdev->sysfs_state);
4380 } 4452 }
4381 4453
4454 if (mddev->bio_set == NULL)
4455 mddev->bio_set = bioset_create(BIO_POOL_SIZE, sizeof(mddev));
4456
4382 spin_lock(&pers_lock); 4457 spin_lock(&pers_lock);
4383 pers = find_pers(mddev->level, mddev->clevel); 4458 pers = find_pers(mddev->level, mddev->clevel);
4384 if (!pers || !try_module_get(pers->owner)) { 4459 if (!pers || !try_module_get(pers->owner)) {
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 5ee537135553..d05bab55df4e 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -331,6 +331,8 @@ struct mddev_s
331 struct attribute_group *to_remove; 331 struct attribute_group *to_remove;
332 struct plug_handle *plug; /* if used by personality */ 332 struct plug_handle *plug; /* if used by personality */
333 333
334 struct bio_set *bio_set;
335
334 /* Generic flush handling. 336 /* Generic flush handling.
335 * The last to finish preflush schedules a worker to submit 337 * The last to finish preflush schedules a worker to submit
336 * the rest of the request (without the REQ_FLUSH flag). 338 * the rest of the request (without the REQ_FLUSH flag).
@@ -517,4 +519,8 @@ extern void md_rdev_init(mdk_rdev_t *rdev);
517 519
518extern void mddev_suspend(mddev_t *mddev); 520extern void mddev_suspend(mddev_t *mddev);
519extern void mddev_resume(mddev_t *mddev); 521extern void mddev_resume(mddev_t *mddev);
522extern struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
523 mddev_t *mddev);
524extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
525 mddev_t *mddev);
520#endif /* _MD_MD_H */ 526#endif /* _MD_MD_H */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 08c66afea494..54f60c9e5f85 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -861,7 +861,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
861 } 861 }
862 r1_bio->read_disk = rdisk; 862 r1_bio->read_disk = rdisk;
863 863
864 read_bio = bio_clone(bio, GFP_NOIO); 864 read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
865 865
866 r1_bio->bios[rdisk] = read_bio; 866 r1_bio->bios[rdisk] = read_bio;
867 867
@@ -950,7 +950,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
950 if (!r1_bio->bios[i]) 950 if (!r1_bio->bios[i])
951 continue; 951 continue;
952 952
953 mbio = bio_clone(bio, GFP_NOIO); 953 mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
954 r1_bio->bios[i] = mbio; 954 r1_bio->bios[i] = mbio;
955 955
956 mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; 956 mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
@@ -1640,7 +1640,8 @@ static void raid1d(mddev_t *mddev)
1640 mddev->ro ? IO_BLOCKED : NULL; 1640 mddev->ro ? IO_BLOCKED : NULL;
1641 r1_bio->read_disk = disk; 1641 r1_bio->read_disk = disk;
1642 bio_put(bio); 1642 bio_put(bio);
1643 bio = bio_clone(r1_bio->master_bio, GFP_NOIO); 1643 bio = bio_clone_mddev(r1_bio->master_bio,
1644 GFP_NOIO, mddev);
1644 r1_bio->bios[r1_bio->read_disk] = bio; 1645 r1_bio->bios[r1_bio->read_disk] = bio;
1645 rdev = conf->mirrors[disk].rdev; 1646 rdev = conf->mirrors[disk].rdev;
1646 if (printk_ratelimit()) 1647 if (printk_ratelimit())
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index d54122b9927e..c67aa54694ae 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -889,7 +889,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
889 } 889 }
890 mirror = conf->mirrors + disk; 890 mirror = conf->mirrors + disk;
891 891
892 read_bio = bio_clone(bio, GFP_NOIO); 892 read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
893 893
894 r10_bio->devs[slot].bio = read_bio; 894 r10_bio->devs[slot].bio = read_bio;
895 895
@@ -958,7 +958,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
958 if (!r10_bio->devs[i].bio) 958 if (!r10_bio->devs[i].bio)
959 continue; 959 continue;
960 960
961 mbio = bio_clone(bio, GFP_NOIO); 961 mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
962 r10_bio->devs[i].bio = mbio; 962 r10_bio->devs[i].bio = mbio;
963 963
964 mbio->bi_sector = r10_bio->devs[i].addr+ 964 mbio->bi_sector = r10_bio->devs[i].addr+
@@ -1746,7 +1746,8 @@ static void raid10d(mddev_t *mddev)
1746 mdname(mddev), 1746 mdname(mddev),
1747 bdevname(rdev->bdev,b), 1747 bdevname(rdev->bdev,b),
1748 (unsigned long long)r10_bio->sector); 1748 (unsigned long long)r10_bio->sector);
1749 bio = bio_clone(r10_bio->master_bio, GFP_NOIO); 1749 bio = bio_clone_mddev(r10_bio->master_bio,
1750 GFP_NOIO, mddev);
1750 r10_bio->devs[r10_bio->read_slot].bio = bio; 1751 r10_bio->devs[r10_bio->read_slot].bio = bio;
1751 bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr 1752 bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr
1752 + rdev->data_offset; 1753 + rdev->data_offset;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 8abc159b377a..dc574f303f8b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3876,9 +3876,9 @@ static int chunk_aligned_read(mddev_t *mddev, struct bio * raid_bio)
3876 return 0; 3876 return 0;
3877 } 3877 }
3878 /* 3878 /*
3879 * use bio_clone to make a copy of the bio 3879 * use bio_clone_mddev to make a copy of the bio
3880 */ 3880 */
3881 align_bi = bio_clone(raid_bio, GFP_NOIO); 3881 align_bi = bio_clone_mddev(raid_bio, GFP_NOIO, mddev);
3882 if (!align_bi) 3882 if (!align_bi)
3883 return 0; 3883 return 0;
3884 /* 3884 /*