aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-08-10 18:38:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-08-10 18:38:19 -0400
commit3d30701b58970425e1d45994d6cb82f828924fdd (patch)
tree8b14cf462628bebf8548c1b8c205a674564052d1 /drivers/md/md.c
parent8cbd84f2dd4e52a8771b191030c374ba3e56d291 (diff)
parentfd8aa2c1811bf60ccb2d5de0579c6f62aec1772d (diff)
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (24 commits) md: clean up do_md_stop md: fix another deadlock with removing sysfs attributes. md: move revalidate_disk() back outside open_mutex md/raid10: fix deadlock with unaligned read during resync md/bitmap: separate out loading a bitmap from initialising the structures. md/bitmap: prepare for storing write-intent-bitmap via dm-dirty-log. md/bitmap: optimise scanning of empty bitmaps. md/bitmap: clean up plugging calls. md/bitmap: reduce dependence on sysfs. md/bitmap: white space clean up and similar. md/raid5: export raid5 unplugging interface. md/plug: optionally use plugger to unplug an array during resync/recovery. md/raid5: add simple plugging infrastructure. md/raid5: export is_congested test raid5: Don't set read-ahead when there is no queue md: add support for raising dm events. md: export various start/stop interfaces md: split out md_rdev_init md: be more careful setting MD_CHANGE_CLEAN md/raid5: ensure we create a unique name for kmem_cache when mddev has no gendisk ...
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c286
1 files changed, 176 insertions, 110 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 700c96edf9b2..11567c7999a2 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -262,7 +262,7 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
262 * Once ->stop is called and completes, the module will be completely 262 * Once ->stop is called and completes, the module will be completely
263 * unused. 263 * unused.
264 */ 264 */
265static void mddev_suspend(mddev_t *mddev) 265void mddev_suspend(mddev_t *mddev)
266{ 266{
267 BUG_ON(mddev->suspended); 267 BUG_ON(mddev->suspended);
268 mddev->suspended = 1; 268 mddev->suspended = 1;
@@ -270,13 +270,15 @@ static void mddev_suspend(mddev_t *mddev)
270 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); 270 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
271 mddev->pers->quiesce(mddev, 1); 271 mddev->pers->quiesce(mddev, 1);
272} 272}
273EXPORT_SYMBOL_GPL(mddev_suspend);
273 274
274static void mddev_resume(mddev_t *mddev) 275void mddev_resume(mddev_t *mddev)
275{ 276{
276 mddev->suspended = 0; 277 mddev->suspended = 0;
277 wake_up(&mddev->sb_wait); 278 wake_up(&mddev->sb_wait);
278 mddev->pers->quiesce(mddev, 0); 279 mddev->pers->quiesce(mddev, 0);
279} 280}
281EXPORT_SYMBOL_GPL(mddev_resume);
280 282
281int mddev_congested(mddev_t *mddev, int bits) 283int mddev_congested(mddev_t *mddev, int bits)
282{ 284{
@@ -385,6 +387,51 @@ void md_barrier_request(mddev_t *mddev, struct bio *bio)
385} 387}
386EXPORT_SYMBOL(md_barrier_request); 388EXPORT_SYMBOL(md_barrier_request);
387 389
390/* Support for plugging.
391 * This mirrors the plugging support in request_queue, but does not
392 * require having a whole queue
393 */
394static void plugger_work(struct work_struct *work)
395{
396 struct plug_handle *plug =
397 container_of(work, struct plug_handle, unplug_work);
398 plug->unplug_fn(plug);
399}
400static void plugger_timeout(unsigned long data)
401{
402 struct plug_handle *plug = (void *)data;
403 kblockd_schedule_work(NULL, &plug->unplug_work);
404}
405void plugger_init(struct plug_handle *plug,
406 void (*unplug_fn)(struct plug_handle *))
407{
408 plug->unplug_flag = 0;
409 plug->unplug_fn = unplug_fn;
410 init_timer(&plug->unplug_timer);
411 plug->unplug_timer.function = plugger_timeout;
412 plug->unplug_timer.data = (unsigned long)plug;
413 INIT_WORK(&plug->unplug_work, plugger_work);
414}
415EXPORT_SYMBOL_GPL(plugger_init);
416
417void plugger_set_plug(struct plug_handle *plug)
418{
419 if (!test_and_set_bit(PLUGGED_FLAG, &plug->unplug_flag))
420 mod_timer(&plug->unplug_timer, jiffies + msecs_to_jiffies(3)+1);
421}
422EXPORT_SYMBOL_GPL(plugger_set_plug);
423
424int plugger_remove_plug(struct plug_handle *plug)
425{
426 if (test_and_clear_bit(PLUGGED_FLAG, &plug->unplug_flag)) {
427 del_timer(&plug->unplug_timer);
428 return 1;
429 } else
430 return 0;
431}
432EXPORT_SYMBOL_GPL(plugger_remove_plug);
433
434
388static inline mddev_t *mddev_get(mddev_t *mddev) 435static inline mddev_t *mddev_get(mddev_t *mddev)
389{ 436{
390 atomic_inc(&mddev->active); 437 atomic_inc(&mddev->active);
@@ -417,7 +464,7 @@ static void mddev_put(mddev_t *mddev)
417 spin_unlock(&all_mddevs_lock); 464 spin_unlock(&all_mddevs_lock);
418} 465}
419 466
420static void mddev_init(mddev_t *mddev) 467void mddev_init(mddev_t *mddev)
421{ 468{
422 mutex_init(&mddev->open_mutex); 469 mutex_init(&mddev->open_mutex);
423 mutex_init(&mddev->reconfig_mutex); 470 mutex_init(&mddev->reconfig_mutex);
@@ -437,6 +484,7 @@ static void mddev_init(mddev_t *mddev)
437 mddev->resync_max = MaxSector; 484 mddev->resync_max = MaxSector;
438 mddev->level = LEVEL_NONE; 485 mddev->level = LEVEL_NONE;
439} 486}
487EXPORT_SYMBOL_GPL(mddev_init);
440 488
441static mddev_t * mddev_find(dev_t unit) 489static mddev_t * mddev_find(dev_t unit)
442{ 490{
@@ -533,25 +581,31 @@ static void mddev_unlock(mddev_t * mddev)
533 * an access to the files will try to take reconfig_mutex 581 * an access to the files will try to take reconfig_mutex
534 * while holding the file unremovable, which leads to 582 * while holding the file unremovable, which leads to
535 * a deadlock. 583 * a deadlock.
536 * So hold open_mutex instead - we are allowed to take 584 * So hold set sysfs_active while the remove in happeing,
537 * it while holding reconfig_mutex, and md_run can 585 * and anything else which might set ->to_remove or my
538 * use it to wait for the remove to complete. 586 * otherwise change the sysfs namespace will fail with
587 * -EBUSY if sysfs_active is still set.
588 * We set sysfs_active under reconfig_mutex and elsewhere
589 * test it under the same mutex to ensure its correct value
590 * is seen.
539 */ 591 */
540 struct attribute_group *to_remove = mddev->to_remove; 592 struct attribute_group *to_remove = mddev->to_remove;
541 mddev->to_remove = NULL; 593 mddev->to_remove = NULL;
542 mutex_lock(&mddev->open_mutex); 594 mddev->sysfs_active = 1;
543 mutex_unlock(&mddev->reconfig_mutex); 595 mutex_unlock(&mddev->reconfig_mutex);
544 596
545 if (to_remove != &md_redundancy_group) 597 if (mddev->kobj.sd) {
546 sysfs_remove_group(&mddev->kobj, to_remove); 598 if (to_remove != &md_redundancy_group)
547 if (mddev->pers == NULL || 599 sysfs_remove_group(&mddev->kobj, to_remove);
548 mddev->pers->sync_request == NULL) { 600 if (mddev->pers == NULL ||
549 sysfs_remove_group(&mddev->kobj, &md_redundancy_group); 601 mddev->pers->sync_request == NULL) {
550 if (mddev->sysfs_action) 602 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
551 sysfs_put(mddev->sysfs_action); 603 if (mddev->sysfs_action)
552 mddev->sysfs_action = NULL; 604 sysfs_put(mddev->sysfs_action);
605 mddev->sysfs_action = NULL;
606 }
553 } 607 }
554 mutex_unlock(&mddev->open_mutex); 608 mddev->sysfs_active = 0;
555 } else 609 } else
556 mutex_unlock(&mddev->reconfig_mutex); 610 mutex_unlock(&mddev->reconfig_mutex);
557 611
@@ -1812,11 +1866,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1812 goto fail; 1866 goto fail;
1813 1867
1814 ko = &part_to_dev(rdev->bdev->bd_part)->kobj; 1868 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
1815 if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { 1869 if (sysfs_create_link(&rdev->kobj, ko, "block"))
1816 kobject_del(&rdev->kobj); 1870 /* failure here is OK */;
1817 goto fail; 1871 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
1818 }
1819 rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, NULL, "state");
1820 1872
1821 list_add_rcu(&rdev->same_set, &mddev->disks); 1873 list_add_rcu(&rdev->same_set, &mddev->disks);
1822 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); 1874 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
@@ -2335,8 +2387,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2335 set_bit(In_sync, &rdev->flags); 2387 set_bit(In_sync, &rdev->flags);
2336 err = 0; 2388 err = 0;
2337 } 2389 }
2338 if (!err && rdev->sysfs_state) 2390 if (!err)
2339 sysfs_notify_dirent(rdev->sysfs_state); 2391 sysfs_notify_dirent_safe(rdev->sysfs_state);
2340 return err ? err : len; 2392 return err ? err : len;
2341} 2393}
2342static struct rdev_sysfs_entry rdev_state = 2394static struct rdev_sysfs_entry rdev_state =
@@ -2431,14 +2483,10 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2431 rdev->raid_disk = -1; 2483 rdev->raid_disk = -1;
2432 return err; 2484 return err;
2433 } else 2485 } else
2434 sysfs_notify_dirent(rdev->sysfs_state); 2486 sysfs_notify_dirent_safe(rdev->sysfs_state);
2435 sprintf(nm, "rd%d", rdev->raid_disk); 2487 sprintf(nm, "rd%d", rdev->raid_disk);
2436 if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm)) 2488 if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm))
2437 printk(KERN_WARNING 2489 /* failure here is OK */;
2438 "md: cannot register "
2439 "%s for %s\n",
2440 nm, mdname(rdev->mddev));
2441
2442 /* don't wakeup anyone, leave that to userspace. */ 2490 /* don't wakeup anyone, leave that to userspace. */
2443 } else { 2491 } else {
2444 if (slot >= rdev->mddev->raid_disks) 2492 if (slot >= rdev->mddev->raid_disks)
@@ -2448,7 +2496,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2448 clear_bit(Faulty, &rdev->flags); 2496 clear_bit(Faulty, &rdev->flags);
2449 clear_bit(WriteMostly, &rdev->flags); 2497 clear_bit(WriteMostly, &rdev->flags);
2450 set_bit(In_sync, &rdev->flags); 2498 set_bit(In_sync, &rdev->flags);
2451 sysfs_notify_dirent(rdev->sysfs_state); 2499 sysfs_notify_dirent_safe(rdev->sysfs_state);
2452 } 2500 }
2453 return len; 2501 return len;
2454} 2502}
@@ -2696,6 +2744,24 @@ static struct kobj_type rdev_ktype = {
2696 .default_attrs = rdev_default_attrs, 2744 .default_attrs = rdev_default_attrs,
2697}; 2745};
2698 2746
2747void md_rdev_init(mdk_rdev_t *rdev)
2748{
2749 rdev->desc_nr = -1;
2750 rdev->saved_raid_disk = -1;
2751 rdev->raid_disk = -1;
2752 rdev->flags = 0;
2753 rdev->data_offset = 0;
2754 rdev->sb_events = 0;
2755 rdev->last_read_error.tv_sec = 0;
2756 rdev->last_read_error.tv_nsec = 0;
2757 atomic_set(&rdev->nr_pending, 0);
2758 atomic_set(&rdev->read_errors, 0);
2759 atomic_set(&rdev->corrected_errors, 0);
2760
2761 INIT_LIST_HEAD(&rdev->same_set);
2762 init_waitqueue_head(&rdev->blocked_wait);
2763}
2764EXPORT_SYMBOL_GPL(md_rdev_init);
2699/* 2765/*
2700 * Import a device. If 'super_format' >= 0, then sanity check the superblock 2766 * Import a device. If 'super_format' >= 0, then sanity check the superblock
2701 * 2767 *
@@ -2719,6 +2785,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
2719 return ERR_PTR(-ENOMEM); 2785 return ERR_PTR(-ENOMEM);
2720 } 2786 }
2721 2787
2788 md_rdev_init(rdev);
2722 if ((err = alloc_disk_sb(rdev))) 2789 if ((err = alloc_disk_sb(rdev)))
2723 goto abort_free; 2790 goto abort_free;
2724 2791
@@ -2728,18 +2795,6 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
2728 2795
2729 kobject_init(&rdev->kobj, &rdev_ktype); 2796 kobject_init(&rdev->kobj, &rdev_ktype);
2730 2797
2731 rdev->desc_nr = -1;
2732 rdev->saved_raid_disk = -1;
2733 rdev->raid_disk = -1;
2734 rdev->flags = 0;
2735 rdev->data_offset = 0;
2736 rdev->sb_events = 0;
2737 rdev->last_read_error.tv_sec = 0;
2738 rdev->last_read_error.tv_nsec = 0;
2739 atomic_set(&rdev->nr_pending, 0);
2740 atomic_set(&rdev->read_errors, 0);
2741 atomic_set(&rdev->corrected_errors, 0);
2742
2743 size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; 2798 size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
2744 if (!size) { 2799 if (!size) {
2745 printk(KERN_WARNING 2800 printk(KERN_WARNING
@@ -2768,9 +2823,6 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
2768 } 2823 }
2769 } 2824 }
2770 2825
2771 INIT_LIST_HEAD(&rdev->same_set);
2772 init_waitqueue_head(&rdev->blocked_wait);
2773
2774 return rdev; 2826 return rdev;
2775 2827
2776abort_free: 2828abort_free:
@@ -2961,7 +3013,9 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
2961 * - new personality will access other array. 3013 * - new personality will access other array.
2962 */ 3014 */
2963 3015
2964 if (mddev->sync_thread || mddev->reshape_position != MaxSector) 3016 if (mddev->sync_thread ||
3017 mddev->reshape_position != MaxSector ||
3018 mddev->sysfs_active)
2965 return -EBUSY; 3019 return -EBUSY;
2966 3020
2967 if (!mddev->pers->quiesce) { 3021 if (!mddev->pers->quiesce) {
@@ -3438,7 +3492,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
3438 if (err) 3492 if (err)
3439 return err; 3493 return err;
3440 else { 3494 else {
3441 sysfs_notify_dirent(mddev->sysfs_state); 3495 sysfs_notify_dirent_safe(mddev->sysfs_state);
3442 return len; 3496 return len;
3443 } 3497 }
3444} 3498}
@@ -3736,7 +3790,7 @@ action_store(mddev_t *mddev, const char *page, size_t len)
3736 } 3790 }
3737 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 3791 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3738 md_wakeup_thread(mddev->thread); 3792 md_wakeup_thread(mddev->thread);
3739 sysfs_notify_dirent(mddev->sysfs_action); 3793 sysfs_notify_dirent_safe(mddev->sysfs_action);
3740 return len; 3794 return len;
3741} 3795}
3742 3796
@@ -4282,13 +4336,14 @@ static int md_alloc(dev_t dev, char *name)
4282 disk->disk_name); 4336 disk->disk_name);
4283 error = 0; 4337 error = 0;
4284 } 4338 }
4285 if (sysfs_create_group(&mddev->kobj, &md_bitmap_group)) 4339 if (mddev->kobj.sd &&
4340 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
4286 printk(KERN_DEBUG "pointless warning\n"); 4341 printk(KERN_DEBUG "pointless warning\n");
4287 abort: 4342 abort:
4288 mutex_unlock(&disks_mutex); 4343 mutex_unlock(&disks_mutex);
4289 if (!error) { 4344 if (!error && mddev->kobj.sd) {
4290 kobject_uevent(&mddev->kobj, KOBJ_ADD); 4345 kobject_uevent(&mddev->kobj, KOBJ_ADD);
4291 mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, NULL, "array_state"); 4346 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
4292 } 4347 }
4293 mddev_put(mddev); 4348 mddev_put(mddev);
4294 return error; 4349 return error;
@@ -4326,14 +4381,14 @@ static void md_safemode_timeout(unsigned long data)
4326 if (!atomic_read(&mddev->writes_pending)) { 4381 if (!atomic_read(&mddev->writes_pending)) {
4327 mddev->safemode = 1; 4382 mddev->safemode = 1;
4328 if (mddev->external) 4383 if (mddev->external)
4329 sysfs_notify_dirent(mddev->sysfs_state); 4384 sysfs_notify_dirent_safe(mddev->sysfs_state);
4330 } 4385 }
4331 md_wakeup_thread(mddev->thread); 4386 md_wakeup_thread(mddev->thread);
4332} 4387}
4333 4388
4334static int start_dirty_degraded; 4389static int start_dirty_degraded;
4335 4390
4336static int md_run(mddev_t *mddev) 4391int md_run(mddev_t *mddev)
4337{ 4392{
4338 int err; 4393 int err;
4339 mdk_rdev_t *rdev; 4394 mdk_rdev_t *rdev;
@@ -4345,13 +4400,9 @@ static int md_run(mddev_t *mddev)
4345 4400
4346 if (mddev->pers) 4401 if (mddev->pers)
4347 return -EBUSY; 4402 return -EBUSY;
4348 4403 /* Cannot run until previous stop completes properly */
4349 /* These two calls synchronise us with the 4404 if (mddev->sysfs_active)
4350 * sysfs_remove_group calls in mddev_unlock, 4405 return -EBUSY;
4351 * so they must have completed.
4352 */
4353 mutex_lock(&mddev->open_mutex);
4354 mutex_unlock(&mddev->open_mutex);
4355 4406
4356 /* 4407 /*
4357 * Analyze all RAID superblock(s) 4408 * Analyze all RAID superblock(s)
@@ -4398,7 +4449,7 @@ static int md_run(mddev_t *mddev)
4398 return -EINVAL; 4449 return -EINVAL;
4399 } 4450 }
4400 } 4451 }
4401 sysfs_notify_dirent(rdev->sysfs_state); 4452 sysfs_notify_dirent_safe(rdev->sysfs_state);
4402 } 4453 }
4403 4454
4404 spin_lock(&pers_lock); 4455 spin_lock(&pers_lock);
@@ -4497,11 +4548,12 @@ static int md_run(mddev_t *mddev)
4497 return err; 4548 return err;
4498 } 4549 }
4499 if (mddev->pers->sync_request) { 4550 if (mddev->pers->sync_request) {
4500 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group)) 4551 if (mddev->kobj.sd &&
4552 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
4501 printk(KERN_WARNING 4553 printk(KERN_WARNING
4502 "md: cannot register extra attributes for %s\n", 4554 "md: cannot register extra attributes for %s\n",
4503 mdname(mddev)); 4555 mdname(mddev));
4504 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action"); 4556 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
4505 } else if (mddev->ro == 2) /* auto-readonly not meaningful */ 4557 } else if (mddev->ro == 2) /* auto-readonly not meaningful */
4506 mddev->ro = 0; 4558 mddev->ro = 0;
4507 4559
@@ -4519,8 +4571,7 @@ static int md_run(mddev_t *mddev)
4519 char nm[20]; 4571 char nm[20];
4520 sprintf(nm, "rd%d", rdev->raid_disk); 4572 sprintf(nm, "rd%d", rdev->raid_disk);
4521 if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) 4573 if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm))
4522 printk("md: cannot register %s for %s\n", 4574 /* failure here is OK */;
4523 nm, mdname(mddev));
4524 } 4575 }
4525 4576
4526 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 4577 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -4532,12 +4583,12 @@ static int md_run(mddev_t *mddev)
4532 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ 4583 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
4533 4584
4534 md_new_event(mddev); 4585 md_new_event(mddev);
4535 sysfs_notify_dirent(mddev->sysfs_state); 4586 sysfs_notify_dirent_safe(mddev->sysfs_state);
4536 if (mddev->sysfs_action) 4587 sysfs_notify_dirent_safe(mddev->sysfs_action);
4537 sysfs_notify_dirent(mddev->sysfs_action);
4538 sysfs_notify(&mddev->kobj, NULL, "degraded"); 4588 sysfs_notify(&mddev->kobj, NULL, "degraded");
4539 return 0; 4589 return 0;
4540} 4590}
4591EXPORT_SYMBOL_GPL(md_run);
4541 4592
4542static int do_md_run(mddev_t *mddev) 4593static int do_md_run(mddev_t *mddev)
4543{ 4594{
@@ -4546,7 +4597,11 @@ static int do_md_run(mddev_t *mddev)
4546 err = md_run(mddev); 4597 err = md_run(mddev);
4547 if (err) 4598 if (err)
4548 goto out; 4599 goto out;
4549 4600 err = bitmap_load(mddev);
4601 if (err) {
4602 bitmap_destroy(mddev);
4603 goto out;
4604 }
4550 set_capacity(mddev->gendisk, mddev->array_sectors); 4605 set_capacity(mddev->gendisk, mddev->array_sectors);
4551 revalidate_disk(mddev->gendisk); 4606 revalidate_disk(mddev->gendisk);
4552 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); 4607 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
@@ -4574,7 +4629,7 @@ static int restart_array(mddev_t *mddev)
4574 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 4629 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4575 md_wakeup_thread(mddev->thread); 4630 md_wakeup_thread(mddev->thread);
4576 md_wakeup_thread(mddev->sync_thread); 4631 md_wakeup_thread(mddev->sync_thread);
4577 sysfs_notify_dirent(mddev->sysfs_state); 4632 sysfs_notify_dirent_safe(mddev->sysfs_state);
4578 return 0; 4633 return 0;
4579} 4634}
4580 4635
@@ -4645,9 +4700,10 @@ static void md_clean(mddev_t *mddev)
4645 mddev->bitmap_info.chunksize = 0; 4700 mddev->bitmap_info.chunksize = 0;
4646 mddev->bitmap_info.daemon_sleep = 0; 4701 mddev->bitmap_info.daemon_sleep = 0;
4647 mddev->bitmap_info.max_write_behind = 0; 4702 mddev->bitmap_info.max_write_behind = 0;
4703 mddev->plug = NULL;
4648} 4704}
4649 4705
4650static void md_stop_writes(mddev_t *mddev) 4706void md_stop_writes(mddev_t *mddev)
4651{ 4707{
4652 if (mddev->sync_thread) { 4708 if (mddev->sync_thread) {
4653 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4709 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -4667,11 +4723,10 @@ static void md_stop_writes(mddev_t *mddev)
4667 md_update_sb(mddev, 1); 4723 md_update_sb(mddev, 1);
4668 } 4724 }
4669} 4725}
4726EXPORT_SYMBOL_GPL(md_stop_writes);
4670 4727
4671static void md_stop(mddev_t *mddev) 4728void md_stop(mddev_t *mddev)
4672{ 4729{
4673 md_stop_writes(mddev);
4674
4675 mddev->pers->stop(mddev); 4730 mddev->pers->stop(mddev);
4676 if (mddev->pers->sync_request && mddev->to_remove == NULL) 4731 if (mddev->pers->sync_request && mddev->to_remove == NULL)
4677 mddev->to_remove = &md_redundancy_group; 4732 mddev->to_remove = &md_redundancy_group;
@@ -4679,6 +4734,7 @@ static void md_stop(mddev_t *mddev)
4679 mddev->pers = NULL; 4734 mddev->pers = NULL;
4680 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4735 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4681} 4736}
4737EXPORT_SYMBOL_GPL(md_stop);
4682 4738
4683static int md_set_readonly(mddev_t *mddev, int is_open) 4739static int md_set_readonly(mddev_t *mddev, int is_open)
4684{ 4740{
@@ -4698,7 +4754,7 @@ static int md_set_readonly(mddev_t *mddev, int is_open)
4698 mddev->ro = 1; 4754 mddev->ro = 1;
4699 set_disk_ro(mddev->gendisk, 1); 4755 set_disk_ro(mddev->gendisk, 1);
4700 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4756 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4701 sysfs_notify_dirent(mddev->sysfs_state); 4757 sysfs_notify_dirent_safe(mddev->sysfs_state);
4702 err = 0; 4758 err = 0;
4703 } 4759 }
4704out: 4760out:
@@ -4712,26 +4768,29 @@ out:
4712 */ 4768 */
4713static int do_md_stop(mddev_t * mddev, int mode, int is_open) 4769static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4714{ 4770{
4715 int err = 0;
4716 struct gendisk *disk = mddev->gendisk; 4771 struct gendisk *disk = mddev->gendisk;
4717 mdk_rdev_t *rdev; 4772 mdk_rdev_t *rdev;
4718 4773
4719 mutex_lock(&mddev->open_mutex); 4774 mutex_lock(&mddev->open_mutex);
4720 if (atomic_read(&mddev->openers) > is_open) { 4775 if (atomic_read(&mddev->openers) > is_open ||
4776 mddev->sysfs_active) {
4721 printk("md: %s still in use.\n",mdname(mddev)); 4777 printk("md: %s still in use.\n",mdname(mddev));
4722 err = -EBUSY; 4778 mutex_unlock(&mddev->open_mutex);
4723 } else if (mddev->pers) { 4779 return -EBUSY;
4780 }
4724 4781
4782 if (mddev->pers) {
4725 if (mddev->ro) 4783 if (mddev->ro)
4726 set_disk_ro(disk, 0); 4784 set_disk_ro(disk, 0);
4727 4785
4786 md_stop_writes(mddev);
4728 md_stop(mddev); 4787 md_stop(mddev);
4729 mddev->queue->merge_bvec_fn = NULL; 4788 mddev->queue->merge_bvec_fn = NULL;
4730 mddev->queue->unplug_fn = NULL; 4789 mddev->queue->unplug_fn = NULL;
4731 mddev->queue->backing_dev_info.congested_fn = NULL; 4790 mddev->queue->backing_dev_info.congested_fn = NULL;
4732 4791
4733 /* tell userspace to handle 'inactive' */ 4792 /* tell userspace to handle 'inactive' */
4734 sysfs_notify_dirent(mddev->sysfs_state); 4793 sysfs_notify_dirent_safe(mddev->sysfs_state);
4735 4794
4736 list_for_each_entry(rdev, &mddev->disks, same_set) 4795 list_for_each_entry(rdev, &mddev->disks, same_set)
4737 if (rdev->raid_disk >= 0) { 4796 if (rdev->raid_disk >= 0) {
@@ -4741,21 +4800,17 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4741 } 4800 }
4742 4801
4743 set_capacity(disk, 0); 4802 set_capacity(disk, 0);
4803 mutex_unlock(&mddev->open_mutex);
4744 revalidate_disk(disk); 4804 revalidate_disk(disk);
4745 4805
4746 if (mddev->ro) 4806 if (mddev->ro)
4747 mddev->ro = 0; 4807 mddev->ro = 0;
4748 4808 } else
4749 err = 0; 4809 mutex_unlock(&mddev->open_mutex);
4750 }
4751 mutex_unlock(&mddev->open_mutex);
4752 if (err)
4753 return err;
4754 /* 4810 /*
4755 * Free resources if final stop 4811 * Free resources if final stop
4756 */ 4812 */
4757 if (mode == 0) { 4813 if (mode == 0) {
4758
4759 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); 4814 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
4760 4815
4761 bitmap_destroy(mddev); 4816 bitmap_destroy(mddev);
@@ -4772,13 +4827,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4772 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); 4827 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
4773 if (mddev->hold_active == UNTIL_STOP) 4828 if (mddev->hold_active == UNTIL_STOP)
4774 mddev->hold_active = 0; 4829 mddev->hold_active = 0;
4775
4776 } 4830 }
4777 err = 0;
4778 blk_integrity_unregister(disk); 4831 blk_integrity_unregister(disk);
4779 md_new_event(mddev); 4832 md_new_event(mddev);
4780 sysfs_notify_dirent(mddev->sysfs_state); 4833 sysfs_notify_dirent_safe(mddev->sysfs_state);
4781 return err; 4834 return 0;
4782} 4835}
4783 4836
4784#ifndef MODULE 4837#ifndef MODULE
@@ -5139,7 +5192,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
5139 if (err) 5192 if (err)
5140 export_rdev(rdev); 5193 export_rdev(rdev);
5141 else 5194 else
5142 sysfs_notify_dirent(rdev->sysfs_state); 5195 sysfs_notify_dirent_safe(rdev->sysfs_state);
5143 5196
5144 md_update_sb(mddev, 1); 5197 md_update_sb(mddev, 1);
5145 if (mddev->degraded) 5198 if (mddev->degraded)
@@ -5332,8 +5385,11 @@ static int set_bitmap_file(mddev_t *mddev, int fd)
5332 err = 0; 5385 err = 0;
5333 if (mddev->pers) { 5386 if (mddev->pers) {
5334 mddev->pers->quiesce(mddev, 1); 5387 mddev->pers->quiesce(mddev, 1);
5335 if (fd >= 0) 5388 if (fd >= 0) {
5336 err = bitmap_create(mddev); 5389 err = bitmap_create(mddev);
5390 if (!err)
5391 err = bitmap_load(mddev);
5392 }
5337 if (fd < 0 || err) { 5393 if (fd < 0 || err) {
5338 bitmap_destroy(mddev); 5394 bitmap_destroy(mddev);
5339 fd = -1; /* make sure to put the file */ 5395 fd = -1; /* make sure to put the file */
@@ -5582,6 +5638,8 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
5582 mddev->bitmap_info.default_offset; 5638 mddev->bitmap_info.default_offset;
5583 mddev->pers->quiesce(mddev, 1); 5639 mddev->pers->quiesce(mddev, 1);
5584 rv = bitmap_create(mddev); 5640 rv = bitmap_create(mddev);
5641 if (!rv)
5642 rv = bitmap_load(mddev);
5585 if (rv) 5643 if (rv)
5586 bitmap_destroy(mddev); 5644 bitmap_destroy(mddev);
5587 mddev->pers->quiesce(mddev, 0); 5645 mddev->pers->quiesce(mddev, 0);
@@ -5814,7 +5872,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
5814 if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) { 5872 if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) {
5815 if (mddev->ro == 2) { 5873 if (mddev->ro == 2) {
5816 mddev->ro = 0; 5874 mddev->ro = 0;
5817 sysfs_notify_dirent(mddev->sysfs_state); 5875 sysfs_notify_dirent_safe(mddev->sysfs_state);
5818 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 5876 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5819 md_wakeup_thread(mddev->thread); 5877 md_wakeup_thread(mddev->thread);
5820 } else { 5878 } else {
@@ -6065,10 +6123,12 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
6065 mddev->pers->error_handler(mddev,rdev); 6123 mddev->pers->error_handler(mddev,rdev);
6066 if (mddev->degraded) 6124 if (mddev->degraded)
6067 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); 6125 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6068 sysfs_notify_dirent(rdev->sysfs_state); 6126 sysfs_notify_dirent_safe(rdev->sysfs_state);
6069 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 6127 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6070 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 6128 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6071 md_wakeup_thread(mddev->thread); 6129 md_wakeup_thread(mddev->thread);
6130 if (mddev->event_work.func)
6131 schedule_work(&mddev->event_work);
6072 md_new_event_inintr(mddev); 6132 md_new_event_inintr(mddev);
6073} 6133}
6074 6134
@@ -6526,7 +6586,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
6526 spin_unlock_irq(&mddev->write_lock); 6586 spin_unlock_irq(&mddev->write_lock);
6527 } 6587 }
6528 if (did_change) 6588 if (did_change)
6529 sysfs_notify_dirent(mddev->sysfs_state); 6589 sysfs_notify_dirent_safe(mddev->sysfs_state);
6530 wait_event(mddev->sb_wait, 6590 wait_event(mddev->sb_wait,
6531 !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && 6591 !test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
6532 !test_bit(MD_CHANGE_PENDING, &mddev->flags)); 6592 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
@@ -6569,7 +6629,7 @@ int md_allow_write(mddev_t *mddev)
6569 mddev->safemode = 1; 6629 mddev->safemode = 1;
6570 spin_unlock_irq(&mddev->write_lock); 6630 spin_unlock_irq(&mddev->write_lock);
6571 md_update_sb(mddev, 0); 6631 md_update_sb(mddev, 0);
6572 sysfs_notify_dirent(mddev->sysfs_state); 6632 sysfs_notify_dirent_safe(mddev->sysfs_state);
6573 } else 6633 } else
6574 spin_unlock_irq(&mddev->write_lock); 6634 spin_unlock_irq(&mddev->write_lock);
6575 6635
@@ -6580,6 +6640,14 @@ int md_allow_write(mddev_t *mddev)
6580} 6640}
6581EXPORT_SYMBOL_GPL(md_allow_write); 6641EXPORT_SYMBOL_GPL(md_allow_write);
6582 6642
6643void md_unplug(mddev_t *mddev)
6644{
6645 if (mddev->queue)
6646 blk_unplug(mddev->queue);
6647 if (mddev->plug)
6648 mddev->plug->unplug_fn(mddev->plug);
6649}
6650
6583#define SYNC_MARKS 10 6651#define SYNC_MARKS 10
6584#define SYNC_MARK_STEP (3*HZ) 6652#define SYNC_MARK_STEP (3*HZ)
6585void md_do_sync(mddev_t *mddev) 6653void md_do_sync(mddev_t *mddev)
@@ -6758,12 +6826,13 @@ void md_do_sync(mddev_t *mddev)
6758 >= mddev->resync_max - mddev->curr_resync_completed 6826 >= mddev->resync_max - mddev->curr_resync_completed
6759 )) { 6827 )) {
6760 /* time to update curr_resync_completed */ 6828 /* time to update curr_resync_completed */
6761 blk_unplug(mddev->queue); 6829 md_unplug(mddev);
6762 wait_event(mddev->recovery_wait, 6830 wait_event(mddev->recovery_wait,
6763 atomic_read(&mddev->recovery_active) == 0); 6831 atomic_read(&mddev->recovery_active) == 0);
6764 mddev->curr_resync_completed = 6832 mddev->curr_resync_completed =
6765 mddev->curr_resync; 6833 mddev->curr_resync;
6766 set_bit(MD_CHANGE_CLEAN, &mddev->flags); 6834 if (mddev->persistent)
6835 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
6767 sysfs_notify(&mddev->kobj, NULL, "sync_completed"); 6836 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
6768 } 6837 }
6769 6838
@@ -6835,7 +6904,7 @@ void md_do_sync(mddev_t *mddev)
6835 * about not overloading the IO subsystem. (things like an 6904 * about not overloading the IO subsystem. (things like an
6836 * e2fsck being done on the RAID array should execute fast) 6905 * e2fsck being done on the RAID array should execute fast)
6837 */ 6906 */
6838 blk_unplug(mddev->queue); 6907 md_unplug(mddev);
6839 cond_resched(); 6908 cond_resched();
6840 6909
6841 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2 6910 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
@@ -6854,7 +6923,7 @@ void md_do_sync(mddev_t *mddev)
6854 * this also signals 'finished resyncing' to md_stop 6923 * this also signals 'finished resyncing' to md_stop
6855 */ 6924 */
6856 out: 6925 out:
6857 blk_unplug(mddev->queue); 6926 md_unplug(mddev);
6858 6927
6859 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); 6928 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
6860 6929
@@ -6956,10 +7025,7 @@ static int remove_and_add_spares(mddev_t *mddev)
6956 sprintf(nm, "rd%d", rdev->raid_disk); 7025 sprintf(nm, "rd%d", rdev->raid_disk);
6957 if (sysfs_create_link(&mddev->kobj, 7026 if (sysfs_create_link(&mddev->kobj,
6958 &rdev->kobj, nm)) 7027 &rdev->kobj, nm))
6959 printk(KERN_WARNING 7028 /* failure here is OK */;
6960 "md: cannot register "
6961 "%s for %s\n",
6962 nm, mdname(mddev));
6963 spares++; 7029 spares++;
6964 md_new_event(mddev); 7030 md_new_event(mddev);
6965 set_bit(MD_CHANGE_DEVS, &mddev->flags); 7031 set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -7052,7 +7118,7 @@ void md_check_recovery(mddev_t *mddev)
7052 mddev->safemode = 0; 7118 mddev->safemode = 0;
7053 spin_unlock_irq(&mddev->write_lock); 7119 spin_unlock_irq(&mddev->write_lock);
7054 if (did_change) 7120 if (did_change)
7055 sysfs_notify_dirent(mddev->sysfs_state); 7121 sysfs_notify_dirent_safe(mddev->sysfs_state);
7056 } 7122 }
7057 7123
7058 if (mddev->flags) 7124 if (mddev->flags)
@@ -7091,7 +7157,7 @@ void md_check_recovery(mddev_t *mddev)
7091 mddev->recovery = 0; 7157 mddev->recovery = 0;
7092 /* flag recovery needed just to double check */ 7158 /* flag recovery needed just to double check */
7093 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 7159 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7094 sysfs_notify_dirent(mddev->sysfs_action); 7160 sysfs_notify_dirent_safe(mddev->sysfs_action);
7095 md_new_event(mddev); 7161 md_new_event(mddev);
7096 goto unlock; 7162 goto unlock;
7097 } 7163 }
@@ -7153,7 +7219,7 @@ void md_check_recovery(mddev_t *mddev)
7153 mddev->recovery = 0; 7219 mddev->recovery = 0;
7154 } else 7220 } else
7155 md_wakeup_thread(mddev->sync_thread); 7221 md_wakeup_thread(mddev->sync_thread);
7156 sysfs_notify_dirent(mddev->sysfs_action); 7222 sysfs_notify_dirent_safe(mddev->sysfs_action);
7157 md_new_event(mddev); 7223 md_new_event(mddev);
7158 } 7224 }
7159 unlock: 7225 unlock:
@@ -7162,7 +7228,7 @@ void md_check_recovery(mddev_t *mddev)
7162 if (test_and_clear_bit(MD_RECOVERY_RECOVER, 7228 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
7163 &mddev->recovery)) 7229 &mddev->recovery))
7164 if (mddev->sysfs_action) 7230 if (mddev->sysfs_action)
7165 sysfs_notify_dirent(mddev->sysfs_action); 7231 sysfs_notify_dirent_safe(mddev->sysfs_action);
7166 } 7232 }
7167 mddev_unlock(mddev); 7233 mddev_unlock(mddev);
7168 } 7234 }
@@ -7170,7 +7236,7 @@ void md_check_recovery(mddev_t *mddev)
7170 7236
7171void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev) 7237void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
7172{ 7238{
7173 sysfs_notify_dirent(rdev->sysfs_state); 7239 sysfs_notify_dirent_safe(rdev->sysfs_state);
7174 wait_event_timeout(rdev->blocked_wait, 7240 wait_event_timeout(rdev->blocked_wait,
7175 !test_bit(Blocked, &rdev->flags), 7241 !test_bit(Blocked, &rdev->flags),
7176 msecs_to_jiffies(5000)); 7242 msecs_to_jiffies(5000));