diff options
-rw-r--r-- | drivers/md/raid5.c | 34 | ||||
-rw-r--r-- | drivers/md/raid5.h | 2 |
2 files changed, 30 insertions, 6 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index bb4b12e370df..3bbc6d647044 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -3766,7 +3766,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3766 | int new_data_disks = conf->raid_disks - conf->max_degraded; | 3766 | int new_data_disks = conf->raid_disks - conf->max_degraded; |
3767 | int i; | 3767 | int i; |
3768 | int dd_idx; | 3768 | int dd_idx; |
3769 | sector_t writepos, safepos, gap; | 3769 | sector_t writepos, readpos, safepos; |
3770 | sector_t stripe_addr; | 3770 | sector_t stripe_addr; |
3771 | int reshape_sectors; | 3771 | int reshape_sectors; |
3772 | struct list_head stripes; | 3772 | struct list_head stripes; |
@@ -3806,26 +3806,46 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3806 | */ | 3806 | */ |
3807 | writepos = conf->reshape_progress; | 3807 | writepos = conf->reshape_progress; |
3808 | sector_div(writepos, new_data_disks); | 3808 | sector_div(writepos, new_data_disks); |
3809 | readpos = conf->reshape_progress; | ||
3810 | sector_div(readpos, data_disks); | ||
3809 | safepos = conf->reshape_safe; | 3811 | safepos = conf->reshape_safe; |
3810 | sector_div(safepos, data_disks); | 3812 | sector_div(safepos, data_disks); |
3811 | if (mddev->delta_disks < 0) { | 3813 | if (mddev->delta_disks < 0) { |
3812 | writepos -= reshape_sectors; | 3814 | writepos -= reshape_sectors; |
3815 | readpos += reshape_sectors; | ||
3813 | safepos += reshape_sectors; | 3816 | safepos += reshape_sectors; |
3814 | gap = conf->reshape_safe - conf->reshape_progress; | ||
3815 | } else { | 3817 | } else { |
3816 | writepos += reshape_sectors; | 3818 | writepos += reshape_sectors; |
3819 | readpos -= reshape_sectors; | ||
3817 | safepos -= reshape_sectors; | 3820 | safepos -= reshape_sectors; |
3818 | gap = conf->reshape_progress - conf->reshape_safe; | ||
3819 | } | 3821 | } |
3820 | 3822 | ||
3823 | /* 'writepos' is the most advanced device address we might write. | ||
3824 | * 'readpos' is the least advanced device address we might read. | ||
3825 | * 'safepos' is the least address recorded in the metadata as having | ||
3826 | * been reshaped. | ||
3827 | * If 'readpos' is behind 'writepos', then there is no way that we can | ||
3828 | * ensure safety in the face of a crash - that must be done by userspace | ||
3829 | * making a backup of the data. So in that case there is no particular | ||
3830 | * rush to update metadata. | ||
3831 | * Otherwise if 'safepos' is behind 'writepos', then we really need to | ||
3832 | * update the metadata to advance 'safepos' to match 'readpos' so that | ||
3833 | * we can be safe in the event of a crash. | ||
3834 | * So we insist on updating metadata if safepos is behind writepos and | ||
3835 | * readpos is beyond writepos. | ||
3836 | * In any case, update the metadata every 10 seconds. | ||
3837 | * Maybe that number should be configurable, but I'm not sure it is | ||
3838 | * worth it.... maybe it could be a multiple of safemode_delay??? | ||
3839 | */ | ||
3821 | if ((mddev->delta_disks < 0 | 3840 | if ((mddev->delta_disks < 0 |
3822 | ? writepos < safepos | 3841 | ? (safepos > writepos && readpos < writepos) |
3823 | : writepos > safepos) || | 3842 | : (safepos < writepos && readpos > writepos)) || |
3824 | gap > (new_data_disks)*3000*2 /*3Meg*/) { | 3843 | time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) { |
3825 | /* Cannot proceed until we've updated the superblock... */ | 3844 | /* Cannot proceed until we've updated the superblock... */ |
3826 | wait_event(conf->wait_for_overlap, | 3845 | wait_event(conf->wait_for_overlap, |
3827 | atomic_read(&conf->reshape_stripes)==0); | 3846 | atomic_read(&conf->reshape_stripes)==0); |
3828 | mddev->reshape_position = conf->reshape_progress; | 3847 | mddev->reshape_position = conf->reshape_progress; |
3848 | conf->reshape_checkpoint = jiffies; | ||
3829 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 3849 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
3830 | md_wakeup_thread(mddev->thread); | 3850 | md_wakeup_thread(mddev->thread); |
3831 | wait_event(mddev->sb_wait, mddev->flags == 0 || | 3851 | wait_event(mddev->sb_wait, mddev->flags == 0 || |
@@ -3923,6 +3943,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3923 | wait_event(conf->wait_for_overlap, | 3943 | wait_event(conf->wait_for_overlap, |
3924 | atomic_read(&conf->reshape_stripes) == 0); | 3944 | atomic_read(&conf->reshape_stripes) == 0); |
3925 | mddev->reshape_position = conf->reshape_progress; | 3945 | mddev->reshape_position = conf->reshape_progress; |
3946 | conf->reshape_checkpoint = jiffies; | ||
3926 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 3947 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
3927 | md_wakeup_thread(mddev->thread); | 3948 | md_wakeup_thread(mddev->thread); |
3928 | wait_event(mddev->sb_wait, | 3949 | wait_event(mddev->sb_wait, |
@@ -4957,6 +4978,7 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
4957 | spin_unlock_irq(&conf->device_lock); | 4978 | spin_unlock_irq(&conf->device_lock); |
4958 | return -EAGAIN; | 4979 | return -EAGAIN; |
4959 | } | 4980 | } |
4981 | conf->reshape_checkpoint = jiffies; | ||
4960 | md_wakeup_thread(mddev->sync_thread); | 4982 | md_wakeup_thread(mddev->sync_thread); |
4961 | md_new_event(mddev); | 4983 | md_new_event(mddev); |
4962 | return 0; | 4984 | return 0; |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index cdd045681720..52ba99954dec 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -352,6 +352,8 @@ struct raid5_private_data { | |||
352 | int previous_raid_disks; | 352 | int previous_raid_disks; |
353 | int prev_chunk, prev_algo; | 353 | int prev_chunk, prev_algo; |
354 | short generation; /* increments with every reshape */ | 354 | short generation; /* increments with every reshape */ |
355 | unsigned long reshape_checkpoint; /* Time we last updated | ||
356 | * metadata */ | ||
355 | 357 | ||
356 | struct list_head handle_list; /* stripes needing handling */ | 358 | struct list_head handle_list; /* stripes needing handling */ |
357 | struct list_head hold_list; /* preread ready stripes */ | 359 | struct list_head hold_list; /* preread ready stripes */ |