aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/raid5.c34
-rw-r--r--drivers/md/raid5.h2
2 files changed, 30 insertions, 6 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index bb4b12e370df..3bbc6d647044 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3766,7 +3766,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3766 int new_data_disks = conf->raid_disks - conf->max_degraded; 3766 int new_data_disks = conf->raid_disks - conf->max_degraded;
3767 int i; 3767 int i;
3768 int dd_idx; 3768 int dd_idx;
3769 sector_t writepos, safepos, gap; 3769 sector_t writepos, readpos, safepos;
3770 sector_t stripe_addr; 3770 sector_t stripe_addr;
3771 int reshape_sectors; 3771 int reshape_sectors;
3772 struct list_head stripes; 3772 struct list_head stripes;
@@ -3806,26 +3806,46 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3806 */ 3806 */
3807 writepos = conf->reshape_progress; 3807 writepos = conf->reshape_progress;
3808 sector_div(writepos, new_data_disks); 3808 sector_div(writepos, new_data_disks);
3809 readpos = conf->reshape_progress;
3810 sector_div(readpos, data_disks);
3809 safepos = conf->reshape_safe; 3811 safepos = conf->reshape_safe;
3810 sector_div(safepos, data_disks); 3812 sector_div(safepos, data_disks);
3811 if (mddev->delta_disks < 0) { 3813 if (mddev->delta_disks < 0) {
3812 writepos -= reshape_sectors; 3814 writepos -= reshape_sectors;
3815 readpos += reshape_sectors;
3813 safepos += reshape_sectors; 3816 safepos += reshape_sectors;
3814 gap = conf->reshape_safe - conf->reshape_progress;
3815 } else { 3817 } else {
3816 writepos += reshape_sectors; 3818 writepos += reshape_sectors;
3819 readpos -= reshape_sectors;
3817 safepos -= reshape_sectors; 3820 safepos -= reshape_sectors;
3818 gap = conf->reshape_progress - conf->reshape_safe;
3819 } 3821 }
3820 3822
3823 /* 'writepos' is the most advanced device address we might write.
3824 * 'readpos' is the least advanced device address we might read.
3825 * 'safepos' is the least address recorded in the metadata as having
3826 * been reshaped.
3827 * If 'readpos' is behind 'writepos', then there is no way that we can
3828 * ensure safety in the face of a crash - that must be done by userspace
3829 * making a backup of the data. So in that case there is no particular
3830 * rush to update metadata.
3831 * Otherwise if 'safepos' is behind 'writepos', then we really need to
3832 * update the metadata to advance 'safepos' to match 'readpos' so that
3833 * we can be safe in the event of a crash.
3834 * So we insist on updating metadata if safepos is behind writepos and
3835 * readpos is beyond writepos.
3836 * In any case, update the metadata every 10 seconds.
3837 * Maybe that number should be configurable, but I'm not sure it is
3838 * worth it.... maybe it could be a multiple of safemode_delay???
3839 */
3821 if ((mddev->delta_disks < 0 3840 if ((mddev->delta_disks < 0
3822 ? writepos < safepos 3841 ? (safepos > writepos && readpos < writepos)
3823 : writepos > safepos) || 3842 : (safepos < writepos && readpos > writepos)) ||
3824 gap > (new_data_disks)*3000*2 /*3Meg*/) { 3843 time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
3825 /* Cannot proceed until we've updated the superblock... */ 3844 /* Cannot proceed until we've updated the superblock... */
3826 wait_event(conf->wait_for_overlap, 3845 wait_event(conf->wait_for_overlap,
3827 atomic_read(&conf->reshape_stripes)==0); 3846 atomic_read(&conf->reshape_stripes)==0);
3828 mddev->reshape_position = conf->reshape_progress; 3847 mddev->reshape_position = conf->reshape_progress;
3848 conf->reshape_checkpoint = jiffies;
3829 set_bit(MD_CHANGE_DEVS, &mddev->flags); 3849 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3830 md_wakeup_thread(mddev->thread); 3850 md_wakeup_thread(mddev->thread);
3831 wait_event(mddev->sb_wait, mddev->flags == 0 || 3851 wait_event(mddev->sb_wait, mddev->flags == 0 ||
@@ -3923,6 +3943,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3923 wait_event(conf->wait_for_overlap, 3943 wait_event(conf->wait_for_overlap,
3924 atomic_read(&conf->reshape_stripes) == 0); 3944 atomic_read(&conf->reshape_stripes) == 0);
3925 mddev->reshape_position = conf->reshape_progress; 3945 mddev->reshape_position = conf->reshape_progress;
3946 conf->reshape_checkpoint = jiffies;
3926 set_bit(MD_CHANGE_DEVS, &mddev->flags); 3947 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3927 md_wakeup_thread(mddev->thread); 3948 md_wakeup_thread(mddev->thread);
3928 wait_event(mddev->sb_wait, 3949 wait_event(mddev->sb_wait,
@@ -4957,6 +4978,7 @@ static int raid5_start_reshape(mddev_t *mddev)
4957 spin_unlock_irq(&conf->device_lock); 4978 spin_unlock_irq(&conf->device_lock);
4958 return -EAGAIN; 4979 return -EAGAIN;
4959 } 4980 }
4981 conf->reshape_checkpoint = jiffies;
4960 md_wakeup_thread(mddev->sync_thread); 4982 md_wakeup_thread(mddev->sync_thread);
4961 md_new_event(mddev); 4983 md_new_event(mddev);
4962 return 0; 4984 return 0;
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index cdd045681720..52ba99954dec 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -352,6 +352,8 @@ struct raid5_private_data {
352 int previous_raid_disks; 352 int previous_raid_disks;
353 int prev_chunk, prev_algo; 353 int prev_chunk, prev_algo;
354 short generation; /* increments with every reshape */ 354 short generation; /* increments with every reshape */
355 unsigned long reshape_checkpoint; /* Time we last updated
356 * metadata */
355 357
356 struct list_head handle_list; /* stripes needing handling */ 358 struct list_head handle_list; /* stripes needing handling */
357 struct list_head hold_list; /* preread ready stripes */ 359 struct list_head hold_list; /* preread ready stripes */