aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2009-03-31 00:17:38 -0400
committerNeilBrown <neilb@suse.de>2009-03-31 00:17:38 -0400
commitec32a2bd35bd6b933a5db6542c48210ce069a376 (patch)
tree32d30344202025cf1c6cd202394e65ff7e99753f /drivers/md
parentfef9c61fdfabf97a307c2cf3621a6949f0a4b995 (diff)
md: allow number of drives in raid5 to be reduced
When reshaping a raid5 to have fewer devices, we work from the end of the array to the beginning. md_do_sync gives addresses to sync_request that go from the beginning to the end. So largely ignore them use the internal state variable "reshape_progress" to keep track of what to do next. Never allow the size to be reduced below the minimum (4 for raid6, 3 otherwise). We require that the size of the array has already been reduced before the array is reshaped to a smaller size. This is because simply reducing the size is an easily reversible operation, while the reshape is immediately destructive and so is not reversible for the blocks at the ends of the devices. Thus to reshape an array to have fewer devices, you must first write an appropriately small size to md/array_size. When reshape finished, we remove any drives that are no longer needed and fix up ->degraded. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid5.c124
1 files changed, 87 insertions, 37 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 1023c4e48a91..76eed592371e 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3725,6 +3725,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3725 int i; 3725 int i;
3726 int dd_idx; 3726 int dd_idx;
3727 sector_t writepos, safepos, gap; 3727 sector_t writepos, safepos, gap;
3728 sector_t stripe_addr;
3728 3729
3729 if (sector_nr == 0) { 3730 if (sector_nr == 0) {
3730 /* If restarting in the middle, skip the initial sectors */ 3731 /* If restarting in the middle, skip the initial sectors */
@@ -3782,10 +3783,21 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3782 wake_up(&conf->wait_for_overlap); 3783 wake_up(&conf->wait_for_overlap);
3783 } 3784 }
3784 3785
3786 if (mddev->delta_disks < 0) {
3787 BUG_ON(conf->reshape_progress == 0);
3788 stripe_addr = writepos;
3789 BUG_ON((mddev->dev_sectors &
3790 ~((sector_t)mddev->chunk_size / 512 - 1))
3791 - (conf->chunk_size / 512) - stripe_addr
3792 != sector_nr);
3793 } else {
3794 BUG_ON(writepos != sector_nr + conf->chunk_size / 512);
3795 stripe_addr = sector_nr;
3796 }
3785 for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) { 3797 for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) {
3786 int j; 3798 int j;
3787 int skipped = 0; 3799 int skipped = 0;
3788 sh = get_active_stripe(conf, sector_nr+i, 0, 0); 3800 sh = get_active_stripe(conf, stripe_addr+i, 0, 0);
3789 set_bit(STRIPE_EXPANDING, &sh->state); 3801 set_bit(STRIPE_EXPANDING, &sh->state);
3790 atomic_inc(&conf->reshape_stripes); 3802 atomic_inc(&conf->reshape_stripes);
3791 /* If any of this stripe is beyond the end of the old 3803 /* If any of this stripe is beyond the end of the old
@@ -3825,10 +3837,10 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3825 * block on the destination stripes. 3837 * block on the destination stripes.
3826 */ 3838 */
3827 first_sector = 3839 first_sector =
3828 raid5_compute_sector(conf, sector_nr*(new_data_disks), 3840 raid5_compute_sector(conf, stripe_addr*(new_data_disks),
3829 1, &dd_idx, NULL); 3841 1, &dd_idx, NULL);
3830 last_sector = 3842 last_sector =
3831 raid5_compute_sector(conf, ((sector_nr+conf->chunk_size/512) 3843 raid5_compute_sector(conf, ((stripe_addr+conf->chunk_size/512)
3832 *(new_data_disks) - 1), 3844 *(new_data_disks) - 1),
3833 1, &dd_idx, NULL); 3845 1, &dd_idx, NULL);
3834 if (last_sector >= mddev->dev_sectors) 3846 if (last_sector >= mddev->dev_sectors)
@@ -4366,12 +4378,6 @@ static int run(mddev_t *mddev)
4366 mdname(mddev)); 4378 mdname(mddev));
4367 return -EINVAL; 4379 return -EINVAL;
4368 } 4380 }
4369 if (mddev->delta_disks <= 0) {
4370 printk(KERN_ERR "raid5: %s: unsupported reshape "
4371 "(reduce disks) required - aborting.\n",
4372 mdname(mddev));
4373 return -EINVAL;
4374 }
4375 old_disks = mddev->raid_disks - mddev->delta_disks; 4381 old_disks = mddev->raid_disks - mddev->delta_disks;
4376 /* reshape_position must be on a new-stripe boundary, and one 4382 /* reshape_position must be on a new-stripe boundary, and one
4377 * further up in new geometry must map after here in old 4383 * further up in new geometry must map after here in old
@@ -4648,6 +4654,10 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
4648 print_raid5_conf(conf); 4654 print_raid5_conf(conf);
4649 rdev = p->rdev; 4655 rdev = p->rdev;
4650 if (rdev) { 4656 if (rdev) {
4657 if (number >= conf->raid_disks &&
4658 conf->reshape_progress == MaxSector)
4659 clear_bit(In_sync, &rdev->flags);
4660
4651 if (test_bit(In_sync, &rdev->flags) || 4661 if (test_bit(In_sync, &rdev->flags) ||
4652 atomic_read(&rdev->nr_pending)) { 4662 atomic_read(&rdev->nr_pending)) {
4653 err = -EBUSY; 4663 err = -EBUSY;
@@ -4657,7 +4667,8 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
4657 * isn't possible. 4667 * isn't possible.
4658 */ 4668 */
4659 if (!test_bit(Faulty, &rdev->flags) && 4669 if (!test_bit(Faulty, &rdev->flags) &&
4660 mddev->degraded <= conf->max_degraded) { 4670 mddev->degraded <= conf->max_degraded &&
4671 number < conf->raid_disks) {
4661 err = -EBUSY; 4672 err = -EBUSY;
4662 goto abort; 4673 goto abort;
4663 } 4674 }
@@ -4745,16 +4756,26 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
4745static int raid5_check_reshape(mddev_t *mddev) 4756static int raid5_check_reshape(mddev_t *mddev)
4746{ 4757{
4747 raid5_conf_t *conf = mddev_to_conf(mddev); 4758 raid5_conf_t *conf = mddev_to_conf(mddev);
4748 int err;
4749 4759
4750 if (mddev->delta_disks < 0 ||
4751 mddev->new_level != mddev->level)
4752 return -EINVAL; /* Cannot shrink array or change level yet */
4753 if (mddev->delta_disks == 0) 4760 if (mddev->delta_disks == 0)
4754 return 0; /* nothing to do */ 4761 return 0; /* nothing to do */
4755 if (mddev->bitmap) 4762 if (mddev->bitmap)
4756 /* Cannot grow a bitmap yet */ 4763 /* Cannot grow a bitmap yet */
4757 return -EBUSY; 4764 return -EBUSY;
4765 if (mddev->degraded > conf->max_degraded)
4766 return -EINVAL;
4767 if (mddev->delta_disks < 0) {
4768 /* We might be able to shrink, but the devices must
4769 * be made bigger first.
4770 * For raid6, 4 is the minimum size.
4771 * Otherwise 2 is the minimum
4772 */
4773 int min = 2;
4774 if (mddev->level == 6)
4775 min = 4;
4776 if (mddev->raid_disks + mddev->delta_disks < min)
4777 return -EINVAL;
4778 }
4758 4779
4759 /* Can only proceed if there are plenty of stripe_heads. 4780 /* Can only proceed if there are plenty of stripe_heads.
4760 * We need a minimum of one full stripe,, and for sensible progress 4781 * We need a minimum of one full stripe,, and for sensible progress
@@ -4771,14 +4792,7 @@ static int raid5_check_reshape(mddev_t *mddev)
4771 return -ENOSPC; 4792 return -ENOSPC;
4772 } 4793 }
4773 4794
4774 err = resize_stripes(conf, conf->raid_disks + mddev->delta_disks); 4795 return resize_stripes(conf, conf->raid_disks + mddev->delta_disks);
4775 if (err)
4776 return err;
4777
4778 if (mddev->degraded > conf->max_degraded)
4779 return -EINVAL;
4780 /* looks like we might be able to manage this */
4781 return 0;
4782} 4796}
4783 4797
4784static int raid5_start_reshape(mddev_t *mddev) 4798static int raid5_start_reshape(mddev_t *mddev)
@@ -4803,6 +4817,17 @@ static int raid5_start_reshape(mddev_t *mddev)
4803 */ 4817 */
4804 return -EINVAL; 4818 return -EINVAL;
4805 4819
4820 /* Refuse to reduce size of the array. Any reductions in
4821 * array size must be through explicit setting of array_size
4822 * attribute.
4823 */
4824 if (raid5_size(mddev, 0, conf->raid_disks + mddev->delta_disks)
4825 < mddev->array_sectors) {
4826 printk(KERN_ERR "md: %s: array size must be reduced "
4827 "before number of disks\n", mdname(mddev));
4828 return -EINVAL;
4829 }
4830
4806 atomic_set(&conf->reshape_stripes, 0); 4831 atomic_set(&conf->reshape_stripes, 0);
4807 spin_lock_irq(&conf->device_lock); 4832 spin_lock_irq(&conf->device_lock);
4808 conf->previous_raid_disks = conf->raid_disks; 4833 conf->previous_raid_disks = conf->raid_disks;
@@ -4836,9 +4861,12 @@ static int raid5_start_reshape(mddev_t *mddev)
4836 break; 4861 break;
4837 } 4862 }
4838 4863
4839 spin_lock_irqsave(&conf->device_lock, flags); 4864 if (mddev->delta_disks > 0) {
4840 mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) - added_devices; 4865 spin_lock_irqsave(&conf->device_lock, flags);
4841 spin_unlock_irqrestore(&conf->device_lock, flags); 4866 mddev->degraded = (conf->raid_disks - conf->previous_raid_disks)
4867 - added_devices;
4868 spin_unlock_irqrestore(&conf->device_lock, flags);
4869 }
4842 mddev->raid_disks = conf->raid_disks; 4870 mddev->raid_disks = conf->raid_disks;
4843 mddev->reshape_position = 0; 4871 mddev->reshape_position = 0;
4844 set_bit(MD_CHANGE_DEVS, &mddev->flags); 4872 set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -4863,6 +4891,9 @@ static int raid5_start_reshape(mddev_t *mddev)
4863} 4891}
4864#endif 4892#endif
4865 4893
4894/* This is called from the reshape thread and should make any
4895 * changes needed in 'conf'
4896 */
4866static void end_reshape(raid5_conf_t *conf) 4897static void end_reshape(raid5_conf_t *conf)
4867{ 4898{
4868 4899
@@ -4886,25 +4917,44 @@ static void end_reshape(raid5_conf_t *conf)
4886 } 4917 }
4887} 4918}
4888 4919
4920/* This is called from the raid5d thread with mddev_lock held.
4921 * It makes config changes to the device.
4922 */
4889static void raid5_finish_reshape(mddev_t *mddev) 4923static void raid5_finish_reshape(mddev_t *mddev)
4890{ 4924{
4891 struct block_device *bdev; 4925 struct block_device *bdev;
4892 4926
4893 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { 4927 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
4894 4928
4895 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); 4929 if (mddev->delta_disks > 0) {
4896 set_capacity(mddev->gendisk, mddev->array_sectors); 4930 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
4897 mddev->changed = 1; 4931 set_capacity(mddev->gendisk, mddev->array_sectors);
4898 mddev->reshape_position = MaxSector; 4932 mddev->changed = 1;
4899 4933
4900 bdev = bdget_disk(mddev->gendisk, 0); 4934 bdev = bdget_disk(mddev->gendisk, 0);
4901 if (bdev) { 4935 if (bdev) {
4902 mutex_lock(&bdev->bd_inode->i_mutex); 4936 mutex_lock(&bdev->bd_inode->i_mutex);
4903 i_size_write(bdev->bd_inode, 4937 i_size_write(bdev->bd_inode,
4904 (loff_t)mddev->array_sectors << 9); 4938 (loff_t)mddev->array_sectors << 9);
4905 mutex_unlock(&bdev->bd_inode->i_mutex); 4939 mutex_unlock(&bdev->bd_inode->i_mutex);
4906 bdput(bdev); 4940 bdput(bdev);
4941 }
4942 } else {
4943 int d;
4944 raid5_conf_t *conf = mddev_to_conf(mddev);
4945 mddev->degraded = conf->raid_disks;
4946 for (d = 0; d < conf->raid_disks ; d++)
4947 if (conf->disks[d].rdev &&
4948 test_bit(In_sync,
4949 &conf->disks[d].rdev->flags))
4950 mddev->degraded--;
4951 for (d = conf->raid_disks ;
4952 d < conf->raid_disks - mddev->delta_disks;
4953 d++)
4954 raid5_remove_disk(mddev, d);
4907 } 4955 }
4956 mddev->reshape_position = MaxSector;
4957 mddev->delta_disks = 0;
4908 } 4958 }
4909} 4959}
4910 4960