aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c162
1 files changed, 127 insertions, 35 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 225815197a3d..4e957f3140a8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -57,8 +57,6 @@
57#define DEBUG 0 57#define DEBUG 0
58#define dprintk(x...) ((void)(DEBUG && printk(x))) 58#define dprintk(x...) ((void)(DEBUG && printk(x)))
59 59
60static DEFINE_MUTEX(md_mutex);
61
62#ifndef MODULE 60#ifndef MODULE
63static void autostart_arrays(int part); 61static void autostart_arrays(int part);
64#endif 62#endif
@@ -69,6 +67,8 @@ static DEFINE_SPINLOCK(pers_lock);
69static void md_print_devices(void); 67static void md_print_devices(void);
70 68
71static DECLARE_WAIT_QUEUE_HEAD(resync_wait); 69static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
70static struct workqueue_struct *md_wq;
71static struct workqueue_struct *md_misc_wq;
72 72
73#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } 73#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
74 74
@@ -149,6 +149,72 @@ static const struct block_device_operations md_fops;
149 149
150static int start_readonly; 150static int start_readonly;
151 151
152/* bio_clone_mddev
153 * like bio_clone, but with a local bio set
154 */
155
156static void mddev_bio_destructor(struct bio *bio)
157{
158 mddev_t *mddev, **mddevp;
159
160 mddevp = (void*)bio;
161 mddev = mddevp[-1];
162
163 bio_free(bio, mddev->bio_set);
164}
165
166struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
167 mddev_t *mddev)
168{
169 struct bio *b;
170 mddev_t **mddevp;
171
172 if (!mddev || !mddev->bio_set)
173 return bio_alloc(gfp_mask, nr_iovecs);
174
175 b = bio_alloc_bioset(gfp_mask, nr_iovecs,
176 mddev->bio_set);
177 if (!b)
178 return NULL;
179 mddevp = (void*)b;
180 mddevp[-1] = mddev;
181 b->bi_destructor = mddev_bio_destructor;
182 return b;
183}
184EXPORT_SYMBOL_GPL(bio_alloc_mddev);
185
186struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
187 mddev_t *mddev)
188{
189 struct bio *b;
190 mddev_t **mddevp;
191
192 if (!mddev || !mddev->bio_set)
193 return bio_clone(bio, gfp_mask);
194
195 b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs,
196 mddev->bio_set);
197 if (!b)
198 return NULL;
199 mddevp = (void*)b;
200 mddevp[-1] = mddev;
201 b->bi_destructor = mddev_bio_destructor;
202 __bio_clone(b, bio);
203 if (bio_integrity(bio)) {
204 int ret;
205
206 ret = bio_integrity_clone(b, bio, gfp_mask, mddev->bio_set);
207
208 if (ret < 0) {
209 bio_put(b);
210 return NULL;
211 }
212 }
213
214 return b;
215}
216EXPORT_SYMBOL_GPL(bio_clone_mddev);
217
152/* 218/*
153 * We have a system wide 'event count' that is incremented 219 * We have a system wide 'event count' that is incremented
154 * on any 'interesting' event, and readers of /proc/mdstat 220 * on any 'interesting' event, and readers of /proc/mdstat
@@ -300,7 +366,7 @@ static void md_end_flush(struct bio *bio, int err)
300 366
301 if (atomic_dec_and_test(&mddev->flush_pending)) { 367 if (atomic_dec_and_test(&mddev->flush_pending)) {
302 /* The pre-request flush has finished */ 368 /* The pre-request flush has finished */
303 schedule_work(&mddev->flush_work); 369 queue_work(md_wq, &mddev->flush_work);
304 } 370 }
305 bio_put(bio); 371 bio_put(bio);
306} 372}
@@ -321,7 +387,7 @@ static void submit_flushes(mddev_t *mddev)
321 atomic_inc(&rdev->nr_pending); 387 atomic_inc(&rdev->nr_pending);
322 atomic_inc(&rdev->nr_pending); 388 atomic_inc(&rdev->nr_pending);
323 rcu_read_unlock(); 389 rcu_read_unlock();
324 bi = bio_alloc(GFP_KERNEL, 0); 390 bi = bio_alloc_mddev(GFP_KERNEL, 0, mddev);
325 bi->bi_end_io = md_end_flush; 391 bi->bi_end_io = md_end_flush;
326 bi->bi_private = rdev; 392 bi->bi_private = rdev;
327 bi->bi_bdev = rdev->bdev; 393 bi->bi_bdev = rdev->bdev;
@@ -369,7 +435,7 @@ void md_flush_request(mddev_t *mddev, struct bio *bio)
369 submit_flushes(mddev); 435 submit_flushes(mddev);
370 436
371 if (atomic_dec_and_test(&mddev->flush_pending)) 437 if (atomic_dec_and_test(&mddev->flush_pending))
372 schedule_work(&mddev->flush_work); 438 queue_work(md_wq, &mddev->flush_work);
373} 439}
374EXPORT_SYMBOL(md_flush_request); 440EXPORT_SYMBOL(md_flush_request);
375 441
@@ -428,6 +494,8 @@ static void mddev_delayed_delete(struct work_struct *ws);
428 494
429static void mddev_put(mddev_t *mddev) 495static void mddev_put(mddev_t *mddev)
430{ 496{
497 struct bio_set *bs = NULL;
498
431 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) 499 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
432 return; 500 return;
433 if (!mddev->raid_disks && list_empty(&mddev->disks) && 501 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
@@ -435,19 +503,22 @@ static void mddev_put(mddev_t *mddev)
435 /* Array is not configured at all, and not held active, 503 /* Array is not configured at all, and not held active,
436 * so destroy it */ 504 * so destroy it */
437 list_del(&mddev->all_mddevs); 505 list_del(&mddev->all_mddevs);
506 bs = mddev->bio_set;
507 mddev->bio_set = NULL;
438 if (mddev->gendisk) { 508 if (mddev->gendisk) {
439 /* we did a probe so need to clean up. 509 /* We did a probe so need to clean up. Call
440 * Call schedule_work inside the spinlock 510 * queue_work inside the spinlock so that
441 * so that flush_scheduled_work() after 511 * flush_workqueue() after mddev_find will
442 * mddev_find will succeed in waiting for the 512 * succeed in waiting for the work to be done.
443 * work to be done.
444 */ 513 */
445 INIT_WORK(&mddev->del_work, mddev_delayed_delete); 514 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
446 schedule_work(&mddev->del_work); 515 queue_work(md_misc_wq, &mddev->del_work);
447 } else 516 } else
448 kfree(mddev); 517 kfree(mddev);
449 } 518 }
450 spin_unlock(&all_mddevs_lock); 519 spin_unlock(&all_mddevs_lock);
520 if (bs)
521 bioset_free(bs);
451} 522}
452 523
453void mddev_init(mddev_t *mddev) 524void mddev_init(mddev_t *mddev)
@@ -691,7 +762,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
691 * if zero is reached. 762 * if zero is reached.
692 * If an error occurred, call md_error 763 * If an error occurred, call md_error
693 */ 764 */
694 struct bio *bio = bio_alloc(GFP_NOIO, 1); 765 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
695 766
696 bio->bi_bdev = rdev->bdev; 767 bio->bi_bdev = rdev->bdev;
697 bio->bi_sector = sector; 768 bio->bi_sector = sector;
@@ -722,16 +793,16 @@ static void bi_complete(struct bio *bio, int error)
722 complete((struct completion*)bio->bi_private); 793 complete((struct completion*)bio->bi_private);
723} 794}
724 795
725int sync_page_io(struct block_device *bdev, sector_t sector, int size, 796int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
726 struct page *page, int rw) 797 struct page *page, int rw)
727{ 798{
728 struct bio *bio = bio_alloc(GFP_NOIO, 1); 799 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
729 struct completion event; 800 struct completion event;
730 int ret; 801 int ret;
731 802
732 rw |= REQ_SYNC | REQ_UNPLUG; 803 rw |= REQ_SYNC | REQ_UNPLUG;
733 804
734 bio->bi_bdev = bdev; 805 bio->bi_bdev = rdev->bdev;
735 bio->bi_sector = sector; 806 bio->bi_sector = sector;
736 bio_add_page(bio, page, size, 0); 807 bio_add_page(bio, page, size, 0);
737 init_completion(&event); 808 init_completion(&event);
@@ -757,7 +828,7 @@ static int read_disk_sb(mdk_rdev_t * rdev, int size)
757 return 0; 828 return 0;
758 829
759 830
760 if (!sync_page_io(rdev->bdev, rdev->sb_start, size, rdev->sb_page, READ)) 831 if (!sync_page_io(rdev, rdev->sb_start, size, rdev->sb_page, READ))
761 goto fail; 832 goto fail;
762 rdev->sb_loaded = 1; 833 rdev->sb_loaded = 1;
763 return 0; 834 return 0;
@@ -1850,7 +1921,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
1850 synchronize_rcu(); 1921 synchronize_rcu();
1851 INIT_WORK(&rdev->del_work, md_delayed_delete); 1922 INIT_WORK(&rdev->del_work, md_delayed_delete);
1852 kobject_get(&rdev->kobj); 1923 kobject_get(&rdev->kobj);
1853 schedule_work(&rdev->del_work); 1924 queue_work(md_misc_wq, &rdev->del_work);
1854} 1925}
1855 1926
1856/* 1927/*
@@ -2108,6 +2179,8 @@ repeat:
2108 if (!mddev->persistent) { 2179 if (!mddev->persistent) {
2109 clear_bit(MD_CHANGE_CLEAN, &mddev->flags); 2180 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2110 clear_bit(MD_CHANGE_DEVS, &mddev->flags); 2181 clear_bit(MD_CHANGE_DEVS, &mddev->flags);
2182 if (!mddev->external)
2183 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2111 wake_up(&mddev->sb_wait); 2184 wake_up(&mddev->sb_wait);
2112 return; 2185 return;
2113 } 2186 }
@@ -4192,10 +4265,10 @@ static int md_alloc(dev_t dev, char *name)
4192 shift = partitioned ? MdpMinorShift : 0; 4265 shift = partitioned ? MdpMinorShift : 0;
4193 unit = MINOR(mddev->unit) >> shift; 4266 unit = MINOR(mddev->unit) >> shift;
4194 4267
4195 /* wait for any previous instance if this device 4268 /* wait for any previous instance of this device to be
4196 * to be completed removed (mddev_delayed_delete). 4269 * completely removed (mddev_delayed_delete).
4197 */ 4270 */
4198 flush_scheduled_work(); 4271 flush_workqueue(md_misc_wq);
4199 4272
4200 mutex_lock(&disks_mutex); 4273 mutex_lock(&disks_mutex);
4201 error = -EEXIST; 4274 error = -EEXIST;
@@ -4378,6 +4451,9 @@ int md_run(mddev_t *mddev)
4378 sysfs_notify_dirent_safe(rdev->sysfs_state); 4451 sysfs_notify_dirent_safe(rdev->sysfs_state);
4379 } 4452 }
4380 4453
4454 if (mddev->bio_set == NULL)
4455 mddev->bio_set = bioset_create(BIO_POOL_SIZE, sizeof(mddev));
4456
4381 spin_lock(&pers_lock); 4457 spin_lock(&pers_lock);
4382 pers = find_pers(mddev->level, mddev->clevel); 4458 pers = find_pers(mddev->level, mddev->clevel);
4383 if (!pers || !try_module_get(pers->owner)) { 4459 if (!pers || !try_module_get(pers->owner)) {
@@ -5885,16 +5961,14 @@ static int md_open(struct block_device *bdev, fmode_t mode)
5885 mddev_t *mddev = mddev_find(bdev->bd_dev); 5961 mddev_t *mddev = mddev_find(bdev->bd_dev);
5886 int err; 5962 int err;
5887 5963
5888 mutex_lock(&md_mutex);
5889 if (mddev->gendisk != bdev->bd_disk) { 5964 if (mddev->gendisk != bdev->bd_disk) {
5890 /* we are racing with mddev_put which is discarding this 5965 /* we are racing with mddev_put which is discarding this
5891 * bd_disk. 5966 * bd_disk.
5892 */ 5967 */
5893 mddev_put(mddev); 5968 mddev_put(mddev);
5894 /* Wait until bdev->bd_disk is definitely gone */ 5969 /* Wait until bdev->bd_disk is definitely gone */
5895 flush_scheduled_work(); 5970 flush_workqueue(md_misc_wq);
5896 /* Then retry the open from the top */ 5971 /* Then retry the open from the top */
5897 mutex_unlock(&md_mutex);
5898 return -ERESTARTSYS; 5972 return -ERESTARTSYS;
5899 } 5973 }
5900 BUG_ON(mddev != bdev->bd_disk->private_data); 5974 BUG_ON(mddev != bdev->bd_disk->private_data);
@@ -5908,7 +5982,6 @@ static int md_open(struct block_device *bdev, fmode_t mode)
5908 5982
5909 check_disk_size_change(mddev->gendisk, bdev); 5983 check_disk_size_change(mddev->gendisk, bdev);
5910 out: 5984 out:
5911 mutex_unlock(&md_mutex);
5912 return err; 5985 return err;
5913} 5986}
5914 5987
@@ -5917,10 +5990,8 @@ static int md_release(struct gendisk *disk, fmode_t mode)
5917 mddev_t *mddev = disk->private_data; 5990 mddev_t *mddev = disk->private_data;
5918 5991
5919 BUG_ON(!mddev); 5992 BUG_ON(!mddev);
5920 mutex_lock(&md_mutex);
5921 atomic_dec(&mddev->openers); 5993 atomic_dec(&mddev->openers);
5922 mddev_put(mddev); 5994 mddev_put(mddev);
5923 mutex_unlock(&md_mutex);
5924 5995
5925 return 0; 5996 return 0;
5926} 5997}
@@ -6052,7 +6123,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
6052 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 6123 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6053 md_wakeup_thread(mddev->thread); 6124 md_wakeup_thread(mddev->thread);
6054 if (mddev->event_work.func) 6125 if (mddev->event_work.func)
6055 schedule_work(&mddev->event_work); 6126 queue_work(md_misc_wq, &mddev->event_work);
6056 md_new_event_inintr(mddev); 6127 md_new_event_inintr(mddev);
6057} 6128}
6058 6129
@@ -7212,12 +7283,23 @@ static void md_geninit(void)
7212 7283
7213static int __init md_init(void) 7284static int __init md_init(void)
7214{ 7285{
7215 if (register_blkdev(MD_MAJOR, "md")) 7286 int ret = -ENOMEM;
7216 return -1; 7287
7217 if ((mdp_major=register_blkdev(0, "mdp"))<=0) { 7288 md_wq = alloc_workqueue("md", WQ_RESCUER, 0);
7218 unregister_blkdev(MD_MAJOR, "md"); 7289 if (!md_wq)
7219 return -1; 7290 goto err_wq;
7220 } 7291
7292 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
7293 if (!md_misc_wq)
7294 goto err_misc_wq;
7295
7296 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
7297 goto err_md;
7298
7299 if ((ret = register_blkdev(0, "mdp")) < 0)
7300 goto err_mdp;
7301 mdp_major = ret;
7302
7221 blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE, 7303 blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
7222 md_probe, NULL, NULL); 7304 md_probe, NULL, NULL);
7223 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE, 7305 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
@@ -7228,8 +7310,16 @@ static int __init md_init(void)
7228 7310
7229 md_geninit(); 7311 md_geninit();
7230 return 0; 7312 return 0;
7231}
7232 7313
7314err_mdp:
7315 unregister_blkdev(MD_MAJOR, "md");
7316err_md:
7317 destroy_workqueue(md_misc_wq);
7318err_misc_wq:
7319 destroy_workqueue(md_wq);
7320err_wq:
7321 return ret;
7322}
7233 7323
7234#ifndef MODULE 7324#ifndef MODULE
7235 7325
@@ -7316,6 +7406,8 @@ static __exit void md_exit(void)
7316 export_array(mddev); 7406 export_array(mddev);
7317 mddev->hold_active = 0; 7407 mddev->hold_active = 0;
7318 } 7408 }
7409 destroy_workqueue(md_misc_wq);
7410 destroy_workqueue(md_wq);
7319} 7411}
7320 7412
7321subsys_initcall(md_init); 7413subsys_initcall(md_init);