aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2010-10-26 03:31:13 -0400
committerNeilBrown <neilb@suse.de>2010-10-28 02:36:15 -0400
commita167f663243662aa9153c01086580a11cde9ffdc (patch)
tree886e64787421bbf17a1eab7853d67258b598f050 /drivers/md/md.c
parent2b193363ef68667ad717a6723165e0dccf99470f (diff)
md: use separate bio pool for each md device.
bio_clone and bio_alloc allocate from a common bio pool. If an md device is stacked with other devices that use this pool, or under something like swap which uses the pool, then the multiple calls on the pool can cause deadlocks. So allocate a local bio pool for each md array and use that rather than the common pool. This pool is used both for regular IO and metadata updates. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c81
1 files changed, 78 insertions, 3 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 3149bf33f1c5..4e957f3140a8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -149,6 +149,72 @@ static const struct block_device_operations md_fops;
149 149
150static int start_readonly; 150static int start_readonly;
151 151
152/* bio_clone_mddev
153 * like bio_clone, but with a local bio set
154 */
155
156static void mddev_bio_destructor(struct bio *bio)
157{
158 mddev_t *mddev, **mddevp;
159
160 mddevp = (void*)bio;
161 mddev = mddevp[-1];
162
163 bio_free(bio, mddev->bio_set);
164}
165
166struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
167 mddev_t *mddev)
168{
169 struct bio *b;
170 mddev_t **mddevp;
171
172 if (!mddev || !mddev->bio_set)
173 return bio_alloc(gfp_mask, nr_iovecs);
174
175 b = bio_alloc_bioset(gfp_mask, nr_iovecs,
176 mddev->bio_set);
177 if (!b)
178 return NULL;
179 mddevp = (void*)b;
180 mddevp[-1] = mddev;
181 b->bi_destructor = mddev_bio_destructor;
182 return b;
183}
184EXPORT_SYMBOL_GPL(bio_alloc_mddev);
185
186struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
187 mddev_t *mddev)
188{
189 struct bio *b;
190 mddev_t **mddevp;
191
192 if (!mddev || !mddev->bio_set)
193 return bio_clone(bio, gfp_mask);
194
195 b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs,
196 mddev->bio_set);
197 if (!b)
198 return NULL;
199 mddevp = (void*)b;
200 mddevp[-1] = mddev;
201 b->bi_destructor = mddev_bio_destructor;
202 __bio_clone(b, bio);
203 if (bio_integrity(bio)) {
204 int ret;
205
206 ret = bio_integrity_clone(b, bio, gfp_mask, mddev->bio_set);
207
208 if (ret < 0) {
209 bio_put(b);
210 return NULL;
211 }
212 }
213
214 return b;
215}
216EXPORT_SYMBOL_GPL(bio_clone_mddev);
217
152/* 218/*
153 * We have a system wide 'event count' that is incremented 219 * We have a system wide 'event count' that is incremented
154 * on any 'interesting' event, and readers of /proc/mdstat 220 * on any 'interesting' event, and readers of /proc/mdstat
@@ -321,7 +387,7 @@ static void submit_flushes(mddev_t *mddev)
321 atomic_inc(&rdev->nr_pending); 387 atomic_inc(&rdev->nr_pending);
322 atomic_inc(&rdev->nr_pending); 388 atomic_inc(&rdev->nr_pending);
323 rcu_read_unlock(); 389 rcu_read_unlock();
324 bi = bio_alloc(GFP_KERNEL, 0); 390 bi = bio_alloc_mddev(GFP_KERNEL, 0, mddev);
325 bi->bi_end_io = md_end_flush; 391 bi->bi_end_io = md_end_flush;
326 bi->bi_private = rdev; 392 bi->bi_private = rdev;
327 bi->bi_bdev = rdev->bdev; 393 bi->bi_bdev = rdev->bdev;
@@ -428,6 +494,8 @@ static void mddev_delayed_delete(struct work_struct *ws);
428 494
429static void mddev_put(mddev_t *mddev) 495static void mddev_put(mddev_t *mddev)
430{ 496{
497 struct bio_set *bs = NULL;
498
431 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) 499 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
432 return; 500 return;
433 if (!mddev->raid_disks && list_empty(&mddev->disks) && 501 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
@@ -435,6 +503,8 @@ static void mddev_put(mddev_t *mddev)
435 /* Array is not configured at all, and not held active, 503 /* Array is not configured at all, and not held active,
436 * so destroy it */ 504 * so destroy it */
437 list_del(&mddev->all_mddevs); 505 list_del(&mddev->all_mddevs);
506 bs = mddev->bio_set;
507 mddev->bio_set = NULL;
438 if (mddev->gendisk) { 508 if (mddev->gendisk) {
439 /* We did a probe so need to clean up. Call 509 /* We did a probe so need to clean up. Call
440 * queue_work inside the spinlock so that 510 * queue_work inside the spinlock so that
@@ -447,6 +517,8 @@ static void mddev_put(mddev_t *mddev)
447 kfree(mddev); 517 kfree(mddev);
448 } 518 }
449 spin_unlock(&all_mddevs_lock); 519 spin_unlock(&all_mddevs_lock);
520 if (bs)
521 bioset_free(bs);
450} 522}
451 523
452void mddev_init(mddev_t *mddev) 524void mddev_init(mddev_t *mddev)
@@ -690,7 +762,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
690 * if zero is reached. 762 * if zero is reached.
691 * If an error occurred, call md_error 763 * If an error occurred, call md_error
692 */ 764 */
693 struct bio *bio = bio_alloc(GFP_NOIO, 1); 765 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
694 766
695 bio->bi_bdev = rdev->bdev; 767 bio->bi_bdev = rdev->bdev;
696 bio->bi_sector = sector; 768 bio->bi_sector = sector;
@@ -724,7 +796,7 @@ static void bi_complete(struct bio *bio, int error)
724int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size, 796int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
725 struct page *page, int rw) 797 struct page *page, int rw)
726{ 798{
727 struct bio *bio = bio_alloc(GFP_NOIO, 1); 799 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
728 struct completion event; 800 struct completion event;
729 int ret; 801 int ret;
730 802
@@ -4379,6 +4451,9 @@ int md_run(mddev_t *mddev)
4379 sysfs_notify_dirent_safe(rdev->sysfs_state); 4451 sysfs_notify_dirent_safe(rdev->sysfs_state);
4380 } 4452 }
4381 4453
4454 if (mddev->bio_set == NULL)
4455 mddev->bio_set = bioset_create(BIO_POOL_SIZE, sizeof(mddev));
4456
4382 spin_lock(&pers_lock); 4457 spin_lock(&pers_lock);
4383 pers = find_pers(mddev->level, mddev->clevel); 4458 pers = find_pers(mddev->level, mddev->clevel);
4384 if (!pers || !try_module_get(pers->owner)) { 4459 if (!pers || !try_module_get(pers->owner)) {