aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2013-04-23 21:42:42 -0400
committerNeilBrown <neilb@suse.de>2013-04-23 21:42:42 -0400
commit7ceb17e87bde79d285a8b988cfed9eaeebe60b86 (patch)
tree72924d5eb98bf7740148db5e136f873680b79c97 /drivers/md
parent7e83ccbecd608b971f340e951c9e84cd0343002f (diff)
md: Allow devices to be re-added to a read-only array.
When assembling an array incrementally we might want to make it device available when "enough" devices are present, but maybe not "all" devices are present. If the remaining devices appear before the array is actually used, they should be added transparently. We do this by using the "read-auto" mode where the array acts like it is read-only until a write request arrives. Current an add-device request switches a read-auto array to active. This means that only one device can be added after the array is first made read-auto. This isn't a problem for RAID5, but is not ideal for RAID6 or RAID10. Also we don't really want to switch the array to read-auto at all when re-adding a device as this doesn't really imply any change. So: - remove the "md_update_sb()" call from add_new_disk(). This isn't really needed as just adding a disk doesn't require a metadata update. Instead, just set MD_CHANGE_DEVS. This will effect a metadata update soon enough, once the array is not read-only. - Allow the ADD_NEW_DISK ioctl to succeed without activating a read-auto array, providing the MD_DISK_SYNC flag is set. In this case, the device will be rejected if it cannot be added with the correct device number, or has an incorrect event count. - Teach remove_and_add_spares() to be careful about adding spares when the array is read-only (or read-mostly) - only add devices that are thought to be in-sync, and only do it if the array is in-sync itself. - In md_check_recovery, use remove_and_add_spares in the read-only case, rather than open coding just the 'remove' part of it. Reported-by: Martin Wilck <mwilck@arcor.de> Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/md.c83
1 files changed, 57 insertions, 26 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 802c2a379d8f..491afda21fd9 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5816,7 +5816,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
5816 else 5816 else
5817 sysfs_notify_dirent_safe(rdev->sysfs_state); 5817 sysfs_notify_dirent_safe(rdev->sysfs_state);
5818 5818
5819 md_update_sb(mddev, 1); 5819 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5820 if (mddev->degraded) 5820 if (mddev->degraded)
5821 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); 5821 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5822 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 5822 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -6503,6 +6503,24 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
6503 err = hot_remove_disk(mddev, new_decode_dev(arg)); 6503 err = hot_remove_disk(mddev, new_decode_dev(arg));
6504 goto done_unlock; 6504 goto done_unlock;
6505 6505
6506 case ADD_NEW_DISK:
6507 /* We can support ADD_NEW_DISK on read-only arrays
6508 * on if we are re-adding a preexisting device.
6509 * So require mddev->pers and MD_DISK_SYNC.
6510 */
6511 if (mddev->pers) {
6512 mdu_disk_info_t info;
6513 if (copy_from_user(&info, argp, sizeof(info)))
6514 err = -EFAULT;
6515 else if (!(info.state & (1<<MD_DISK_SYNC)))
6516 /* Need to clear read-only for this */
6517 break;
6518 else
6519 err = add_new_disk(mddev, &info);
6520 goto done_unlock;
6521 }
6522 break;
6523
6506 case BLKROSET: 6524 case BLKROSET:
6507 if (get_user(ro, (int __user *)(arg))) { 6525 if (get_user(ro, (int __user *)(arg))) {
6508 err = -EFAULT; 6526 err = -EFAULT;
@@ -7685,17 +7703,36 @@ static int remove_and_add_spares(struct mddev *mddev,
7685 !test_bit(In_sync, &rdev->flags) && 7703 !test_bit(In_sync, &rdev->flags) &&
7686 !test_bit(Faulty, &rdev->flags)) 7704 !test_bit(Faulty, &rdev->flags))
7687 spares++; 7705 spares++;
7688 if (rdev->raid_disk < 0 7706 if (rdev->raid_disk >= 0)
7689 && !test_bit(Faulty, &rdev->flags)) { 7707 continue;
7690 rdev->recovery_offset = 0; 7708 if (test_bit(Faulty, &rdev->flags))
7691 if (mddev->pers-> 7709 continue;
7692 hot_add_disk(mddev, rdev) == 0) { 7710 if (mddev->ro &&
7693 if (sysfs_link_rdev(mddev, rdev)) 7711 rdev->saved_raid_disk < 0)
7694 /* failure here is OK */; 7712 continue;
7695 spares++; 7713
7696 md_new_event(mddev); 7714 rdev->recovery_offset = 0;
7697 set_bit(MD_CHANGE_DEVS, &mddev->flags); 7715 if (rdev->saved_raid_disk >= 0 && mddev->in_sync) {
7698 } 7716 spin_lock_irq(&mddev->write_lock);
7717 if (mddev->in_sync)
7718 /* OK, this device, which is in_sync,
7719 * will definitely be noticed before
7720 * the next write, so recovery isn't
7721 * needed.
7722 */
7723 rdev->recovery_offset = mddev->recovery_cp;
7724 spin_unlock_irq(&mddev->write_lock);
7725 }
7726 if (mddev->ro && rdev->recovery_offset != MaxSector)
7727 /* not safe to add this disk now */
7728 continue;
7729 if (mddev->pers->
7730 hot_add_disk(mddev, rdev) == 0) {
7731 if (sysfs_link_rdev(mddev, rdev))
7732 /* failure here is OK */;
7733 spares++;
7734 md_new_event(mddev);
7735 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7699 } 7736 }
7700 } 7737 }
7701no_add: 7738no_add:
@@ -7804,22 +7841,16 @@ void md_check_recovery(struct mddev *mddev)
7804 int spares = 0; 7841 int spares = 0;
7805 7842
7806 if (mddev->ro) { 7843 if (mddev->ro) {
7807 /* Only thing we do on a ro array is remove 7844 /* On a read-only array we can:
7808 * failed devices. 7845 * - remove failed devices
7846 * - add already-in_sync devices if the array itself
7847 * is in-sync.
7848 * As we only add devices that are already in-sync,
7849 * we can activate the spares immediately.
7809 */ 7850 */
7810 struct md_rdev *rdev;
7811 rdev_for_each(rdev, mddev)
7812 if (rdev->raid_disk >= 0 &&
7813 !test_bit(Blocked, &rdev->flags) &&
7814 test_bit(Faulty, &rdev->flags) &&
7815 atomic_read(&rdev->nr_pending)==0) {
7816 if (mddev->pers->hot_remove_disk(
7817 mddev, rdev) == 0) {
7818 sysfs_unlink_rdev(mddev, rdev);
7819 rdev->raid_disk = -1;
7820 }
7821 }
7822 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 7851 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7852 remove_and_add_spares(mddev, NULL);
7853 mddev->pers->spare_active(mddev);
7823 goto unlock; 7854 goto unlock;
7824 } 7855 }
7825 7856