aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2009-03-31 00:26:47 -0400
committerNeilBrown <neilb@suse.de>2009-03-31 00:26:47 -0400
commitab69ae12ceef7f23c578a3c230144e94a167a821 (patch)
tree01bee8e4c4a91a1714596c4f85769bfe72e328ce
parentd1a7c50369835f9ecbd7752016cd9302ecfae678 (diff)
md/raid5: be more careful about write ordering when reshaping.
When we are reshaping an array, it is very important that we read the data from a particular sector offset before writing new data at that offset. In most cases when growing or shrinking an array we read long before we even consider writing. But when restriping an array without changing it size, there is a small possibility that we might have some data to available write before the read has happened at the same location. This would require some stripes to be in cache already. To guard against this small possibility, we check, before writing, that the 'old' stripe at the same location is not in the process of being read. And we ensure that we mark all 'source' stripes as such before allowing new 'destination' stripes to proceed. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid5.c49
1 files changed, 47 insertions, 2 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 4fdc6d02b447..062df846fd62 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -395,7 +395,8 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
395 init_stripe(sh, sector, previous); 395 init_stripe(sh, sector, previous);
396 } else { 396 } else {
397 if (atomic_read(&sh->count)) { 397 if (atomic_read(&sh->count)) {
398 BUG_ON(!list_empty(&sh->lru)); 398 BUG_ON(!list_empty(&sh->lru)
399 && !test_bit(STRIPE_EXPANDING, &sh->state));
399 } else { 400 } else {
400 if (!test_bit(STRIPE_HANDLE, &sh->state)) 401 if (!test_bit(STRIPE_HANDLE, &sh->state))
401 atomic_inc(&conf->active_stripes); 402 atomic_inc(&conf->active_stripes);
@@ -2944,6 +2945,23 @@ static bool handle_stripe5(struct stripe_head *sh)
2944 2945
2945 /* Finish reconstruct operations initiated by the expansion process */ 2946 /* Finish reconstruct operations initiated by the expansion process */
2946 if (sh->reconstruct_state == reconstruct_state_result) { 2947 if (sh->reconstruct_state == reconstruct_state_result) {
2948 struct stripe_head *sh2
2949 = get_active_stripe(conf, sh->sector, 1, 1);
2950 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
2951 /* sh cannot be written until sh2 has been read.
2952 * so arrange for sh to be delayed a little
2953 */
2954 set_bit(STRIPE_DELAYED, &sh->state);
2955 set_bit(STRIPE_HANDLE, &sh->state);
2956 if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
2957 &sh2->state))
2958 atomic_inc(&conf->preread_active_stripes);
2959 release_stripe(sh2);
2960 goto unlock;
2961 }
2962 if (sh2)
2963 release_stripe(sh2);
2964
2947 sh->reconstruct_state = reconstruct_state_idle; 2965 sh->reconstruct_state = reconstruct_state_idle;
2948 clear_bit(STRIPE_EXPANDING, &sh->state); 2966 clear_bit(STRIPE_EXPANDING, &sh->state);
2949 for (i = conf->raid_disks; i--; ) { 2967 for (i = conf->raid_disks; i--; ) {
@@ -3172,6 +3190,23 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3172 } 3190 }
3173 3191
3174 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { 3192 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
3193 struct stripe_head *sh2
3194 = get_active_stripe(conf, sh->sector, 1, 1);
3195 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
3196 /* sh cannot be written until sh2 has been read.
3197 * so arrange for sh to be delayed a little
3198 */
3199 set_bit(STRIPE_DELAYED, &sh->state);
3200 set_bit(STRIPE_HANDLE, &sh->state);
3201 if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
3202 &sh2->state))
3203 atomic_inc(&conf->preread_active_stripes);
3204 release_stripe(sh2);
3205 goto unlock;
3206 }
3207 if (sh2)
3208 release_stripe(sh2);
3209
3175 /* Need to write out all blocks after computing P&Q */ 3210 /* Need to write out all blocks after computing P&Q */
3176 sh->disks = conf->raid_disks; 3211 sh->disks = conf->raid_disks;
3177 stripe_set_idx(sh->sector, conf, 0, sh); 3212 stripe_set_idx(sh->sector, conf, 0, sh);
@@ -3739,6 +3774,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3739 sector_t writepos, safepos, gap; 3774 sector_t writepos, safepos, gap;
3740 sector_t stripe_addr; 3775 sector_t stripe_addr;
3741 int reshape_sectors; 3776 int reshape_sectors;
3777 struct list_head stripes;
3742 3778
3743 if (sector_nr == 0) { 3779 if (sector_nr == 0) {
3744 /* If restarting in the middle, skip the initial sectors */ 3780 /* If restarting in the middle, skip the initial sectors */
@@ -3816,6 +3852,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3816 BUG_ON(writepos != sector_nr + reshape_sectors); 3852 BUG_ON(writepos != sector_nr + reshape_sectors);
3817 stripe_addr = sector_nr; 3853 stripe_addr = sector_nr;
3818 } 3854 }
3855 INIT_LIST_HEAD(&stripes);
3819 for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) { 3856 for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
3820 int j; 3857 int j;
3821 int skipped = 0; 3858 int skipped = 0;
@@ -3845,7 +3882,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3845 set_bit(STRIPE_EXPAND_READY, &sh->state); 3882 set_bit(STRIPE_EXPAND_READY, &sh->state);
3846 set_bit(STRIPE_HANDLE, &sh->state); 3883 set_bit(STRIPE_HANDLE, &sh->state);
3847 } 3884 }
3848 release_stripe(sh); 3885 list_add(&sh->lru, &stripes);
3849 } 3886 }
3850 spin_lock_irq(&conf->device_lock); 3887 spin_lock_irq(&conf->device_lock);
3851 if (mddev->delta_disks < 0) 3888 if (mddev->delta_disks < 0)
@@ -3874,6 +3911,14 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3874 release_stripe(sh); 3911 release_stripe(sh);
3875 first_sector += STRIPE_SECTORS; 3912 first_sector += STRIPE_SECTORS;
3876 } 3913 }
3914 /* Now that the sources are clearly marked, we can release
3915 * the destination stripes
3916 */
3917 while (!list_empty(&stripes)) {
3918 sh = list_entry(stripes.next, struct stripe_head, lru);
3919 list_del_init(&sh->lru);
3920 release_stripe(sh);
3921 }
3877 /* If this takes us to the resync_max point where we have to pause, 3922 /* If this takes us to the resync_max point where we have to pause,
3878 * then we need to write out the superblock. 3923 * then we need to write out the superblock.
3879 */ 3924 */