diff options
author | NeilBrown <neilb@suse.de> | 2009-03-31 00:16:46 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2009-03-31 00:16:46 -0400 |
commit | fef9c61fdfabf97a307c2cf3621a6949f0a4b995 (patch) | |
tree | 82b128341c12205db62fe092d692d32103a7ea9f /drivers | |
parent | cea9c22800773cecb1d41f4a6139f9eb6a95368b (diff) |
md/raid5: change reshape-progress measurement to cope with reshaping backwards.
When reducing the number of devices in a raid4/5/6, the reshape
process has to start at the end of the array and work down to the
beginning. So we need to handle expand_progress and expand_lo
differently.
This patch renames "expand_progress" and "expand_lo" to avoid the
implication that anything is getting bigger (expand->reshape) and
every place they are used, we make sure that they are used the right
way depending on whether delta_disks is positive or negative.
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/raid5.c | 94 | ||||
-rw-r--r-- | drivers/md/raid5.h | 15 |
2 files changed, 71 insertions, 38 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index a0f22dd33234..1023c4e48a91 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -3593,24 +3593,28 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3593 | retry: | 3593 | retry: |
3594 | previous = 0; | 3594 | previous = 0; |
3595 | prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); | 3595 | prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); |
3596 | if (likely(conf->expand_progress == MaxSector)) | 3596 | if (likely(conf->reshape_progress == MaxSector)) |
3597 | disks = conf->raid_disks; | 3597 | disks = conf->raid_disks; |
3598 | else { | 3598 | else { |
3599 | /* spinlock is needed as expand_progress may be | 3599 | /* spinlock is needed as reshape_progress may be |
3600 | * 64bit on a 32bit platform, and so it might be | 3600 | * 64bit on a 32bit platform, and so it might be |
3601 | * possible to see a half-updated value | 3601 | * possible to see a half-updated value |
3602 | * Ofcourse expand_progress could change after | 3602 | * Ofcourse reshape_progress could change after |
3603 | * the lock is dropped, so once we get a reference | 3603 | * the lock is dropped, so once we get a reference |
3604 | * to the stripe that we think it is, we will have | 3604 | * to the stripe that we think it is, we will have |
3605 | * to check again. | 3605 | * to check again. |
3606 | */ | 3606 | */ |
3607 | spin_lock_irq(&conf->device_lock); | 3607 | spin_lock_irq(&conf->device_lock); |
3608 | disks = conf->raid_disks; | 3608 | disks = conf->raid_disks; |
3609 | if (logical_sector >= conf->expand_progress) { | 3609 | if (mddev->delta_disks < 0 |
3610 | ? logical_sector < conf->reshape_progress | ||
3611 | : logical_sector >= conf->reshape_progress) { | ||
3610 | disks = conf->previous_raid_disks; | 3612 | disks = conf->previous_raid_disks; |
3611 | previous = 1; | 3613 | previous = 1; |
3612 | } else { | 3614 | } else { |
3613 | if (logical_sector >= conf->expand_lo) { | 3615 | if (mddev->delta_disks < 0 |
3616 | ? logical_sector < conf->reshape_safe | ||
3617 | : logical_sector >= conf->reshape_safe) { | ||
3614 | spin_unlock_irq(&conf->device_lock); | 3618 | spin_unlock_irq(&conf->device_lock); |
3615 | schedule(); | 3619 | schedule(); |
3616 | goto retry; | 3620 | goto retry; |
@@ -3630,7 +3634,7 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3630 | sh = get_active_stripe(conf, new_sector, previous, | 3634 | sh = get_active_stripe(conf, new_sector, previous, |
3631 | (bi->bi_rw&RWA_MASK)); | 3635 | (bi->bi_rw&RWA_MASK)); |
3632 | if (sh) { | 3636 | if (sh) { |
3633 | if (unlikely(conf->expand_progress != MaxSector)) { | 3637 | if (unlikely(conf->reshape_progress != MaxSector)) { |
3634 | /* expansion might have moved on while waiting for a | 3638 | /* expansion might have moved on while waiting for a |
3635 | * stripe, so we must do the range check again. | 3639 | * stripe, so we must do the range check again. |
3636 | * Expansion could still move past after this | 3640 | * Expansion could still move past after this |
@@ -3641,8 +3645,10 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3641 | */ | 3645 | */ |
3642 | int must_retry = 0; | 3646 | int must_retry = 0; |
3643 | spin_lock_irq(&conf->device_lock); | 3647 | spin_lock_irq(&conf->device_lock); |
3644 | if (logical_sector < conf->expand_progress && | 3648 | if ((mddev->delta_disks < 0 |
3645 | disks == conf->previous_raid_disks) | 3649 | ? logical_sector >= conf->reshape_progress |
3650 | : logical_sector < conf->reshape_progress) | ||
3651 | && disks == conf->previous_raid_disks) | ||
3646 | /* mismatch, need to try again */ | 3652 | /* mismatch, need to try again */ |
3647 | must_retry = 1; | 3653 | must_retry = 1; |
3648 | spin_unlock_irq(&conf->device_lock); | 3654 | spin_unlock_irq(&conf->device_lock); |
@@ -3720,13 +3726,20 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3720 | int dd_idx; | 3726 | int dd_idx; |
3721 | sector_t writepos, safepos, gap; | 3727 | sector_t writepos, safepos, gap; |
3722 | 3728 | ||
3723 | if (sector_nr == 0 && | 3729 | if (sector_nr == 0) { |
3724 | conf->expand_progress != 0) { | 3730 | /* If restarting in the middle, skip the initial sectors */ |
3725 | /* restarting in the middle, skip the initial sectors */ | 3731 | if (mddev->delta_disks < 0 && |
3726 | sector_nr = conf->expand_progress; | 3732 | conf->reshape_progress < raid5_size(mddev, 0, 0)) { |
3733 | sector_nr = raid5_size(mddev, 0, 0) | ||
3734 | - conf->reshape_progress; | ||
3735 | } else if (mddev->delta_disks > 0 && | ||
3736 | conf->reshape_progress > 0) | ||
3737 | sector_nr = conf->reshape_progress; | ||
3727 | sector_div(sector_nr, new_data_disks); | 3738 | sector_div(sector_nr, new_data_disks); |
3728 | *skipped = 1; | 3739 | if (sector_nr) { |
3729 | return sector_nr; | 3740 | *skipped = 1; |
3741 | return sector_nr; | ||
3742 | } | ||
3730 | } | 3743 | } |
3731 | 3744 | ||
3732 | /* we update the metadata when there is more than 3Meg | 3745 | /* we update the metadata when there is more than 3Meg |
@@ -3734,28 +3747,37 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3734 | * probably be time based) or when the data about to be | 3747 | * probably be time based) or when the data about to be |
3735 | * copied would over-write the source of the data at | 3748 | * copied would over-write the source of the data at |
3736 | * the front of the range. | 3749 | * the front of the range. |
3737 | * i.e. one new_stripe forward from expand_progress new_maps | 3750 | * i.e. one new_stripe along from reshape_progress new_maps |
3738 | * to after where expand_lo old_maps to | 3751 | * to after where reshape_safe old_maps to |
3739 | */ | 3752 | */ |
3740 | writepos = conf->expand_progress + | 3753 | writepos = conf->reshape_progress; |
3741 | conf->chunk_size/512*(new_data_disks); | ||
3742 | sector_div(writepos, new_data_disks); | 3754 | sector_div(writepos, new_data_disks); |
3743 | safepos = conf->expand_lo; | 3755 | safepos = conf->reshape_safe; |
3744 | sector_div(safepos, data_disks); | 3756 | sector_div(safepos, data_disks); |
3745 | gap = conf->expand_progress - conf->expand_lo; | 3757 | if (mddev->delta_disks < 0) { |
3758 | writepos -= conf->chunk_size/512; | ||
3759 | safepos += conf->chunk_size/512; | ||
3760 | gap = conf->reshape_safe - conf->reshape_progress; | ||
3761 | } else { | ||
3762 | writepos += conf->chunk_size/512; | ||
3763 | safepos -= conf->chunk_size/512; | ||
3764 | gap = conf->reshape_progress - conf->reshape_safe; | ||
3765 | } | ||
3746 | 3766 | ||
3747 | if (writepos >= safepos || | 3767 | if ((mddev->delta_disks < 0 |
3768 | ? writepos < safepos | ||
3769 | : writepos > safepos) || | ||
3748 | gap > (new_data_disks)*3000*2 /*3Meg*/) { | 3770 | gap > (new_data_disks)*3000*2 /*3Meg*/) { |
3749 | /* Cannot proceed until we've updated the superblock... */ | 3771 | /* Cannot proceed until we've updated the superblock... */ |
3750 | wait_event(conf->wait_for_overlap, | 3772 | wait_event(conf->wait_for_overlap, |
3751 | atomic_read(&conf->reshape_stripes)==0); | 3773 | atomic_read(&conf->reshape_stripes)==0); |
3752 | mddev->reshape_position = conf->expand_progress; | 3774 | mddev->reshape_position = conf->reshape_progress; |
3753 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 3775 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
3754 | md_wakeup_thread(mddev->thread); | 3776 | md_wakeup_thread(mddev->thread); |
3755 | wait_event(mddev->sb_wait, mddev->flags == 0 || | 3777 | wait_event(mddev->sb_wait, mddev->flags == 0 || |
3756 | kthread_should_stop()); | 3778 | kthread_should_stop()); |
3757 | spin_lock_irq(&conf->device_lock); | 3779 | spin_lock_irq(&conf->device_lock); |
3758 | conf->expand_lo = mddev->reshape_position; | 3780 | conf->reshape_safe = mddev->reshape_position; |
3759 | spin_unlock_irq(&conf->device_lock); | 3781 | spin_unlock_irq(&conf->device_lock); |
3760 | wake_up(&conf->wait_for_overlap); | 3782 | wake_up(&conf->wait_for_overlap); |
3761 | } | 3783 | } |
@@ -3792,7 +3814,10 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3792 | release_stripe(sh); | 3814 | release_stripe(sh); |
3793 | } | 3815 | } |
3794 | spin_lock_irq(&conf->device_lock); | 3816 | spin_lock_irq(&conf->device_lock); |
3795 | conf->expand_progress = (sector_nr + i) * new_data_disks; | 3817 | if (mddev->delta_disks < 0) |
3818 | conf->reshape_progress -= i * new_data_disks; | ||
3819 | else | ||
3820 | conf->reshape_progress += i * new_data_disks; | ||
3796 | spin_unlock_irq(&conf->device_lock); | 3821 | spin_unlock_irq(&conf->device_lock); |
3797 | /* Ok, those stripe are ready. We can start scheduling | 3822 | /* Ok, those stripe are ready. We can start scheduling |
3798 | * reads on the source stripes. | 3823 | * reads on the source stripes. |
@@ -3823,14 +3848,14 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3823 | /* Cannot proceed until we've updated the superblock... */ | 3848 | /* Cannot proceed until we've updated the superblock... */ |
3824 | wait_event(conf->wait_for_overlap, | 3849 | wait_event(conf->wait_for_overlap, |
3825 | atomic_read(&conf->reshape_stripes) == 0); | 3850 | atomic_read(&conf->reshape_stripes) == 0); |
3826 | mddev->reshape_position = conf->expand_progress; | 3851 | mddev->reshape_position = conf->reshape_progress; |
3827 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 3852 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
3828 | md_wakeup_thread(mddev->thread); | 3853 | md_wakeup_thread(mddev->thread); |
3829 | wait_event(mddev->sb_wait, | 3854 | wait_event(mddev->sb_wait, |
3830 | !test_bit(MD_CHANGE_DEVS, &mddev->flags) | 3855 | !test_bit(MD_CHANGE_DEVS, &mddev->flags) |
3831 | || kthread_should_stop()); | 3856 | || kthread_should_stop()); |
3832 | spin_lock_irq(&conf->device_lock); | 3857 | spin_lock_irq(&conf->device_lock); |
3833 | conf->expand_lo = mddev->reshape_position; | 3858 | conf->reshape_safe = mddev->reshape_position; |
3834 | spin_unlock_irq(&conf->device_lock); | 3859 | spin_unlock_irq(&conf->device_lock); |
3835 | wake_up(&conf->wait_for_overlap); | 3860 | wake_up(&conf->wait_for_overlap); |
3836 | } | 3861 | } |
@@ -4283,7 +4308,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
4283 | conf->max_degraded = 1; | 4308 | conf->max_degraded = 1; |
4284 | conf->algorithm = mddev->new_layout; | 4309 | conf->algorithm = mddev->new_layout; |
4285 | conf->max_nr_stripes = NR_STRIPES; | 4310 | conf->max_nr_stripes = NR_STRIPES; |
4286 | conf->expand_progress = mddev->reshape_position; | 4311 | conf->reshape_progress = mddev->reshape_position; |
4287 | 4312 | ||
4288 | memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + | 4313 | memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + |
4289 | conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; | 4314 | conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; |
@@ -4441,9 +4466,9 @@ static int run(mddev_t *mddev) | |||
4441 | 4466 | ||
4442 | print_raid5_conf(conf); | 4467 | print_raid5_conf(conf); |
4443 | 4468 | ||
4444 | if (conf->expand_progress != MaxSector) { | 4469 | if (conf->reshape_progress != MaxSector) { |
4445 | printk("...ok start reshape thread\n"); | 4470 | printk("...ok start reshape thread\n"); |
4446 | conf->expand_lo = conf->expand_progress; | 4471 | conf->reshape_safe = conf->reshape_progress; |
4447 | atomic_set(&conf->reshape_stripes, 0); | 4472 | atomic_set(&conf->reshape_stripes, 0); |
4448 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); | 4473 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
4449 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); | 4474 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); |
@@ -4782,8 +4807,11 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
4782 | spin_lock_irq(&conf->device_lock); | 4807 | spin_lock_irq(&conf->device_lock); |
4783 | conf->previous_raid_disks = conf->raid_disks; | 4808 | conf->previous_raid_disks = conf->raid_disks; |
4784 | conf->raid_disks += mddev->delta_disks; | 4809 | conf->raid_disks += mddev->delta_disks; |
4785 | conf->expand_progress = 0; | 4810 | if (mddev->delta_disks < 0) |
4786 | conf->expand_lo = 0; | 4811 | conf->reshape_progress = raid5_size(mddev, 0, 0); |
4812 | else | ||
4813 | conf->reshape_progress = 0; | ||
4814 | conf->reshape_safe = conf->reshape_progress; | ||
4787 | spin_unlock_irq(&conf->device_lock); | 4815 | spin_unlock_irq(&conf->device_lock); |
4788 | 4816 | ||
4789 | /* Add some new drives, as many as will fit. | 4817 | /* Add some new drives, as many as will fit. |
@@ -4825,7 +4853,7 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
4825 | mddev->recovery = 0; | 4853 | mddev->recovery = 0; |
4826 | spin_lock_irq(&conf->device_lock); | 4854 | spin_lock_irq(&conf->device_lock); |
4827 | mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; | 4855 | mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; |
4828 | conf->expand_progress = MaxSector; | 4856 | conf->reshape_progress = MaxSector; |
4829 | spin_unlock_irq(&conf->device_lock); | 4857 | spin_unlock_irq(&conf->device_lock); |
4830 | return -EAGAIN; | 4858 | return -EAGAIN; |
4831 | } | 4859 | } |
@@ -4842,7 +4870,7 @@ static void end_reshape(raid5_conf_t *conf) | |||
4842 | 4870 | ||
4843 | spin_lock_irq(&conf->device_lock); | 4871 | spin_lock_irq(&conf->device_lock); |
4844 | conf->previous_raid_disks = conf->raid_disks; | 4872 | conf->previous_raid_disks = conf->raid_disks; |
4845 | conf->expand_progress = MaxSector; | 4873 | conf->reshape_progress = MaxSector; |
4846 | spin_unlock_irq(&conf->device_lock); | 4874 | spin_unlock_irq(&conf->device_lock); |
4847 | 4875 | ||
4848 | /* read-ahead size must cover two whole stripes, which is | 4876 | /* read-ahead size must cover two whole stripes, which is |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index c2f37f25ef44..b2edcc434e41 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -337,11 +337,16 @@ struct raid5_private_data { | |||
337 | int raid_disks; | 337 | int raid_disks; |
338 | int max_nr_stripes; | 338 | int max_nr_stripes; |
339 | 339 | ||
340 | /* used during an expand */ | 340 | /* reshape_progress is the leading edge of a 'reshape' |
341 | sector_t expand_progress; /* MaxSector when no expand happening */ | 341 | * It has value MaxSector when no reshape is happening |
342 | sector_t expand_lo; /* from here up to expand_progress it out-of-bounds | 342 | * If delta_disks < 0, it is the last sector we started work on, |
343 | * as we haven't flushed the metadata yet | 343 | * else is it the next sector to work on. |
344 | */ | 344 | */ |
345 | sector_t reshape_progress; | ||
346 | /* reshape_safe is the trailing edge of a reshape. We know that | ||
347 | * before (or after) this address, all reshape has completed. | ||
348 | */ | ||
349 | sector_t reshape_safe; | ||
345 | int previous_raid_disks; | 350 | int previous_raid_disks; |
346 | 351 | ||
347 | struct list_head handle_list; /* stripes needing handling */ | 352 | struct list_head handle_list; /* stripes needing handling */ |