diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 162 |
1 files changed, 127 insertions, 35 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 225815197a3d..4e957f3140a8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -57,8 +57,6 @@ | |||
57 | #define DEBUG 0 | 57 | #define DEBUG 0 |
58 | #define dprintk(x...) ((void)(DEBUG && printk(x))) | 58 | #define dprintk(x...) ((void)(DEBUG && printk(x))) |
59 | 59 | ||
60 | static DEFINE_MUTEX(md_mutex); | ||
61 | |||
62 | #ifndef MODULE | 60 | #ifndef MODULE |
63 | static void autostart_arrays(int part); | 61 | static void autostart_arrays(int part); |
64 | #endif | 62 | #endif |
@@ -69,6 +67,8 @@ static DEFINE_SPINLOCK(pers_lock); | |||
69 | static void md_print_devices(void); | 67 | static void md_print_devices(void); |
70 | 68 | ||
71 | static DECLARE_WAIT_QUEUE_HEAD(resync_wait); | 69 | static DECLARE_WAIT_QUEUE_HEAD(resync_wait); |
70 | static struct workqueue_struct *md_wq; | ||
71 | static struct workqueue_struct *md_misc_wq; | ||
72 | 72 | ||
73 | #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } | 73 | #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } |
74 | 74 | ||
@@ -149,6 +149,72 @@ static const struct block_device_operations md_fops; | |||
149 | 149 | ||
150 | static int start_readonly; | 150 | static int start_readonly; |
151 | 151 | ||
152 | /* bio_clone_mddev | ||
153 | * like bio_clone, but with a local bio set | ||
154 | */ | ||
155 | |||
156 | static void mddev_bio_destructor(struct bio *bio) | ||
157 | { | ||
158 | mddev_t *mddev, **mddevp; | ||
159 | |||
160 | mddevp = (void*)bio; | ||
161 | mddev = mddevp[-1]; | ||
162 | |||
163 | bio_free(bio, mddev->bio_set); | ||
164 | } | ||
165 | |||
166 | struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, | ||
167 | mddev_t *mddev) | ||
168 | { | ||
169 | struct bio *b; | ||
170 | mddev_t **mddevp; | ||
171 | |||
172 | if (!mddev || !mddev->bio_set) | ||
173 | return bio_alloc(gfp_mask, nr_iovecs); | ||
174 | |||
175 | b = bio_alloc_bioset(gfp_mask, nr_iovecs, | ||
176 | mddev->bio_set); | ||
177 | if (!b) | ||
178 | return NULL; | ||
179 | mddevp = (void*)b; | ||
180 | mddevp[-1] = mddev; | ||
181 | b->bi_destructor = mddev_bio_destructor; | ||
182 | return b; | ||
183 | } | ||
184 | EXPORT_SYMBOL_GPL(bio_alloc_mddev); | ||
185 | |||
186 | struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, | ||
187 | mddev_t *mddev) | ||
188 | { | ||
189 | struct bio *b; | ||
190 | mddev_t **mddevp; | ||
191 | |||
192 | if (!mddev || !mddev->bio_set) | ||
193 | return bio_clone(bio, gfp_mask); | ||
194 | |||
195 | b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, | ||
196 | mddev->bio_set); | ||
197 | if (!b) | ||
198 | return NULL; | ||
199 | mddevp = (void*)b; | ||
200 | mddevp[-1] = mddev; | ||
201 | b->bi_destructor = mddev_bio_destructor; | ||
202 | __bio_clone(b, bio); | ||
203 | if (bio_integrity(bio)) { | ||
204 | int ret; | ||
205 | |||
206 | ret = bio_integrity_clone(b, bio, gfp_mask, mddev->bio_set); | ||
207 | |||
208 | if (ret < 0) { | ||
209 | bio_put(b); | ||
210 | return NULL; | ||
211 | } | ||
212 | } | ||
213 | |||
214 | return b; | ||
215 | } | ||
216 | EXPORT_SYMBOL_GPL(bio_clone_mddev); | ||
217 | |||
152 | /* | 218 | /* |
153 | * We have a system wide 'event count' that is incremented | 219 | * We have a system wide 'event count' that is incremented |
154 | * on any 'interesting' event, and readers of /proc/mdstat | 220 | * on any 'interesting' event, and readers of /proc/mdstat |
@@ -300,7 +366,7 @@ static void md_end_flush(struct bio *bio, int err) | |||
300 | 366 | ||
301 | if (atomic_dec_and_test(&mddev->flush_pending)) { | 367 | if (atomic_dec_and_test(&mddev->flush_pending)) { |
302 | /* The pre-request flush has finished */ | 368 | /* The pre-request flush has finished */ |
303 | schedule_work(&mddev->flush_work); | 369 | queue_work(md_wq, &mddev->flush_work); |
304 | } | 370 | } |
305 | bio_put(bio); | 371 | bio_put(bio); |
306 | } | 372 | } |
@@ -321,7 +387,7 @@ static void submit_flushes(mddev_t *mddev) | |||
321 | atomic_inc(&rdev->nr_pending); | 387 | atomic_inc(&rdev->nr_pending); |
322 | atomic_inc(&rdev->nr_pending); | 388 | atomic_inc(&rdev->nr_pending); |
323 | rcu_read_unlock(); | 389 | rcu_read_unlock(); |
324 | bi = bio_alloc(GFP_KERNEL, 0); | 390 | bi = bio_alloc_mddev(GFP_KERNEL, 0, mddev); |
325 | bi->bi_end_io = md_end_flush; | 391 | bi->bi_end_io = md_end_flush; |
326 | bi->bi_private = rdev; | 392 | bi->bi_private = rdev; |
327 | bi->bi_bdev = rdev->bdev; | 393 | bi->bi_bdev = rdev->bdev; |
@@ -369,7 +435,7 @@ void md_flush_request(mddev_t *mddev, struct bio *bio) | |||
369 | submit_flushes(mddev); | 435 | submit_flushes(mddev); |
370 | 436 | ||
371 | if (atomic_dec_and_test(&mddev->flush_pending)) | 437 | if (atomic_dec_and_test(&mddev->flush_pending)) |
372 | schedule_work(&mddev->flush_work); | 438 | queue_work(md_wq, &mddev->flush_work); |
373 | } | 439 | } |
374 | EXPORT_SYMBOL(md_flush_request); | 440 | EXPORT_SYMBOL(md_flush_request); |
375 | 441 | ||
@@ -428,6 +494,8 @@ static void mddev_delayed_delete(struct work_struct *ws); | |||
428 | 494 | ||
429 | static void mddev_put(mddev_t *mddev) | 495 | static void mddev_put(mddev_t *mddev) |
430 | { | 496 | { |
497 | struct bio_set *bs = NULL; | ||
498 | |||
431 | if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) | 499 | if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) |
432 | return; | 500 | return; |
433 | if (!mddev->raid_disks && list_empty(&mddev->disks) && | 501 | if (!mddev->raid_disks && list_empty(&mddev->disks) && |
@@ -435,19 +503,22 @@ static void mddev_put(mddev_t *mddev) | |||
435 | /* Array is not configured at all, and not held active, | 503 | /* Array is not configured at all, and not held active, |
436 | * so destroy it */ | 504 | * so destroy it */ |
437 | list_del(&mddev->all_mddevs); | 505 | list_del(&mddev->all_mddevs); |
506 | bs = mddev->bio_set; | ||
507 | mddev->bio_set = NULL; | ||
438 | if (mddev->gendisk) { | 508 | if (mddev->gendisk) { |
439 | /* we did a probe so need to clean up. | 509 | /* We did a probe so need to clean up. Call |
440 | * Call schedule_work inside the spinlock | 510 | * queue_work inside the spinlock so that |
441 | * so that flush_scheduled_work() after | 511 | * flush_workqueue() after mddev_find will |
442 | * mddev_find will succeed in waiting for the | 512 | * succeed in waiting for the work to be done. |
443 | * work to be done. | ||
444 | */ | 513 | */ |
445 | INIT_WORK(&mddev->del_work, mddev_delayed_delete); | 514 | INIT_WORK(&mddev->del_work, mddev_delayed_delete); |
446 | schedule_work(&mddev->del_work); | 515 | queue_work(md_misc_wq, &mddev->del_work); |
447 | } else | 516 | } else |
448 | kfree(mddev); | 517 | kfree(mddev); |
449 | } | 518 | } |
450 | spin_unlock(&all_mddevs_lock); | 519 | spin_unlock(&all_mddevs_lock); |
520 | if (bs) | ||
521 | bioset_free(bs); | ||
451 | } | 522 | } |
452 | 523 | ||
453 | void mddev_init(mddev_t *mddev) | 524 | void mddev_init(mddev_t *mddev) |
@@ -691,7 +762,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | |||
691 | * if zero is reached. | 762 | * if zero is reached. |
692 | * If an error occurred, call md_error | 763 | * If an error occurred, call md_error |
693 | */ | 764 | */ |
694 | struct bio *bio = bio_alloc(GFP_NOIO, 1); | 765 | struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev); |
695 | 766 | ||
696 | bio->bi_bdev = rdev->bdev; | 767 | bio->bi_bdev = rdev->bdev; |
697 | bio->bi_sector = sector; | 768 | bio->bi_sector = sector; |
@@ -722,16 +793,16 @@ static void bi_complete(struct bio *bio, int error) | |||
722 | complete((struct completion*)bio->bi_private); | 793 | complete((struct completion*)bio->bi_private); |
723 | } | 794 | } |
724 | 795 | ||
725 | int sync_page_io(struct block_device *bdev, sector_t sector, int size, | 796 | int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size, |
726 | struct page *page, int rw) | 797 | struct page *page, int rw) |
727 | { | 798 | { |
728 | struct bio *bio = bio_alloc(GFP_NOIO, 1); | 799 | struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev); |
729 | struct completion event; | 800 | struct completion event; |
730 | int ret; | 801 | int ret; |
731 | 802 | ||
732 | rw |= REQ_SYNC | REQ_UNPLUG; | 803 | rw |= REQ_SYNC | REQ_UNPLUG; |
733 | 804 | ||
734 | bio->bi_bdev = bdev; | 805 | bio->bi_bdev = rdev->bdev; |
735 | bio->bi_sector = sector; | 806 | bio->bi_sector = sector; |
736 | bio_add_page(bio, page, size, 0); | 807 | bio_add_page(bio, page, size, 0); |
737 | init_completion(&event); | 808 | init_completion(&event); |
@@ -757,7 +828,7 @@ static int read_disk_sb(mdk_rdev_t * rdev, int size) | |||
757 | return 0; | 828 | return 0; |
758 | 829 | ||
759 | 830 | ||
760 | if (!sync_page_io(rdev->bdev, rdev->sb_start, size, rdev->sb_page, READ)) | 831 | if (!sync_page_io(rdev, rdev->sb_start, size, rdev->sb_page, READ)) |
761 | goto fail; | 832 | goto fail; |
762 | rdev->sb_loaded = 1; | 833 | rdev->sb_loaded = 1; |
763 | return 0; | 834 | return 0; |
@@ -1850,7 +1921,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) | |||
1850 | synchronize_rcu(); | 1921 | synchronize_rcu(); |
1851 | INIT_WORK(&rdev->del_work, md_delayed_delete); | 1922 | INIT_WORK(&rdev->del_work, md_delayed_delete); |
1852 | kobject_get(&rdev->kobj); | 1923 | kobject_get(&rdev->kobj); |
1853 | schedule_work(&rdev->del_work); | 1924 | queue_work(md_misc_wq, &rdev->del_work); |
1854 | } | 1925 | } |
1855 | 1926 | ||
1856 | /* | 1927 | /* |
@@ -2108,6 +2179,8 @@ repeat: | |||
2108 | if (!mddev->persistent) { | 2179 | if (!mddev->persistent) { |
2109 | clear_bit(MD_CHANGE_CLEAN, &mddev->flags); | 2180 | clear_bit(MD_CHANGE_CLEAN, &mddev->flags); |
2110 | clear_bit(MD_CHANGE_DEVS, &mddev->flags); | 2181 | clear_bit(MD_CHANGE_DEVS, &mddev->flags); |
2182 | if (!mddev->external) | ||
2183 | clear_bit(MD_CHANGE_PENDING, &mddev->flags); | ||
2111 | wake_up(&mddev->sb_wait); | 2184 | wake_up(&mddev->sb_wait); |
2112 | return; | 2185 | return; |
2113 | } | 2186 | } |
@@ -4192,10 +4265,10 @@ static int md_alloc(dev_t dev, char *name) | |||
4192 | shift = partitioned ? MdpMinorShift : 0; | 4265 | shift = partitioned ? MdpMinorShift : 0; |
4193 | unit = MINOR(mddev->unit) >> shift; | 4266 | unit = MINOR(mddev->unit) >> shift; |
4194 | 4267 | ||
4195 | /* wait for any previous instance if this device | 4268 | /* wait for any previous instance of this device to be |
4196 | * to be completed removed (mddev_delayed_delete). | 4269 | * completely removed (mddev_delayed_delete). |
4197 | */ | 4270 | */ |
4198 | flush_scheduled_work(); | 4271 | flush_workqueue(md_misc_wq); |
4199 | 4272 | ||
4200 | mutex_lock(&disks_mutex); | 4273 | mutex_lock(&disks_mutex); |
4201 | error = -EEXIST; | 4274 | error = -EEXIST; |
@@ -4378,6 +4451,9 @@ int md_run(mddev_t *mddev) | |||
4378 | sysfs_notify_dirent_safe(rdev->sysfs_state); | 4451 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
4379 | } | 4452 | } |
4380 | 4453 | ||
4454 | if (mddev->bio_set == NULL) | ||
4455 | mddev->bio_set = bioset_create(BIO_POOL_SIZE, sizeof(mddev)); | ||
4456 | |||
4381 | spin_lock(&pers_lock); | 4457 | spin_lock(&pers_lock); |
4382 | pers = find_pers(mddev->level, mddev->clevel); | 4458 | pers = find_pers(mddev->level, mddev->clevel); |
4383 | if (!pers || !try_module_get(pers->owner)) { | 4459 | if (!pers || !try_module_get(pers->owner)) { |
@@ -5885,16 +5961,14 @@ static int md_open(struct block_device *bdev, fmode_t mode) | |||
5885 | mddev_t *mddev = mddev_find(bdev->bd_dev); | 5961 | mddev_t *mddev = mddev_find(bdev->bd_dev); |
5886 | int err; | 5962 | int err; |
5887 | 5963 | ||
5888 | mutex_lock(&md_mutex); | ||
5889 | if (mddev->gendisk != bdev->bd_disk) { | 5964 | if (mddev->gendisk != bdev->bd_disk) { |
5890 | /* we are racing with mddev_put which is discarding this | 5965 | /* we are racing with mddev_put which is discarding this |
5891 | * bd_disk. | 5966 | * bd_disk. |
5892 | */ | 5967 | */ |
5893 | mddev_put(mddev); | 5968 | mddev_put(mddev); |
5894 | /* Wait until bdev->bd_disk is definitely gone */ | 5969 | /* Wait until bdev->bd_disk is definitely gone */ |
5895 | flush_scheduled_work(); | 5970 | flush_workqueue(md_misc_wq); |
5896 | /* Then retry the open from the top */ | 5971 | /* Then retry the open from the top */ |
5897 | mutex_unlock(&md_mutex); | ||
5898 | return -ERESTARTSYS; | 5972 | return -ERESTARTSYS; |
5899 | } | 5973 | } |
5900 | BUG_ON(mddev != bdev->bd_disk->private_data); | 5974 | BUG_ON(mddev != bdev->bd_disk->private_data); |
@@ -5908,7 +5982,6 @@ static int md_open(struct block_device *bdev, fmode_t mode) | |||
5908 | 5982 | ||
5909 | check_disk_size_change(mddev->gendisk, bdev); | 5983 | check_disk_size_change(mddev->gendisk, bdev); |
5910 | out: | 5984 | out: |
5911 | mutex_unlock(&md_mutex); | ||
5912 | return err; | 5985 | return err; |
5913 | } | 5986 | } |
5914 | 5987 | ||
@@ -5917,10 +5990,8 @@ static int md_release(struct gendisk *disk, fmode_t mode) | |||
5917 | mddev_t *mddev = disk->private_data; | 5990 | mddev_t *mddev = disk->private_data; |
5918 | 5991 | ||
5919 | BUG_ON(!mddev); | 5992 | BUG_ON(!mddev); |
5920 | mutex_lock(&md_mutex); | ||
5921 | atomic_dec(&mddev->openers); | 5993 | atomic_dec(&mddev->openers); |
5922 | mddev_put(mddev); | 5994 | mddev_put(mddev); |
5923 | mutex_unlock(&md_mutex); | ||
5924 | 5995 | ||
5925 | return 0; | 5996 | return 0; |
5926 | } | 5997 | } |
@@ -6052,7 +6123,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
6052 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 6123 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
6053 | md_wakeup_thread(mddev->thread); | 6124 | md_wakeup_thread(mddev->thread); |
6054 | if (mddev->event_work.func) | 6125 | if (mddev->event_work.func) |
6055 | schedule_work(&mddev->event_work); | 6126 | queue_work(md_misc_wq, &mddev->event_work); |
6056 | md_new_event_inintr(mddev); | 6127 | md_new_event_inintr(mddev); |
6057 | } | 6128 | } |
6058 | 6129 | ||
@@ -7212,12 +7283,23 @@ static void md_geninit(void) | |||
7212 | 7283 | ||
7213 | static int __init md_init(void) | 7284 | static int __init md_init(void) |
7214 | { | 7285 | { |
7215 | if (register_blkdev(MD_MAJOR, "md")) | 7286 | int ret = -ENOMEM; |
7216 | return -1; | 7287 | |
7217 | if ((mdp_major=register_blkdev(0, "mdp"))<=0) { | 7288 | md_wq = alloc_workqueue("md", WQ_RESCUER, 0); |
7218 | unregister_blkdev(MD_MAJOR, "md"); | 7289 | if (!md_wq) |
7219 | return -1; | 7290 | goto err_wq; |
7220 | } | 7291 | |
7292 | md_misc_wq = alloc_workqueue("md_misc", 0, 0); | ||
7293 | if (!md_misc_wq) | ||
7294 | goto err_misc_wq; | ||
7295 | |||
7296 | if ((ret = register_blkdev(MD_MAJOR, "md")) < 0) | ||
7297 | goto err_md; | ||
7298 | |||
7299 | if ((ret = register_blkdev(0, "mdp")) < 0) | ||
7300 | goto err_mdp; | ||
7301 | mdp_major = ret; | ||
7302 | |||
7221 | blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE, | 7303 | blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE, |
7222 | md_probe, NULL, NULL); | 7304 | md_probe, NULL, NULL); |
7223 | blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE, | 7305 | blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE, |
@@ -7228,8 +7310,16 @@ static int __init md_init(void) | |||
7228 | 7310 | ||
7229 | md_geninit(); | 7311 | md_geninit(); |
7230 | return 0; | 7312 | return 0; |
7231 | } | ||
7232 | 7313 | ||
7314 | err_mdp: | ||
7315 | unregister_blkdev(MD_MAJOR, "md"); | ||
7316 | err_md: | ||
7317 | destroy_workqueue(md_misc_wq); | ||
7318 | err_misc_wq: | ||
7319 | destroy_workqueue(md_wq); | ||
7320 | err_wq: | ||
7321 | return ret; | ||
7322 | } | ||
7233 | 7323 | ||
7234 | #ifndef MODULE | 7324 | #ifndef MODULE |
7235 | 7325 | ||
@@ -7316,6 +7406,8 @@ static __exit void md_exit(void) | |||
7316 | export_array(mddev); | 7406 | export_array(mddev); |
7317 | mddev->hold_active = 0; | 7407 | mddev->hold_active = 0; |
7318 | } | 7408 | } |
7409 | destroy_workqueue(md_misc_wq); | ||
7410 | destroy_workqueue(md_wq); | ||
7319 | } | 7411 | } |
7320 | 7412 | ||
7321 | subsys_initcall(md_init); | 7413 | subsys_initcall(md_init); |