aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2013-07-21 22:57:21 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2013-08-04 04:50:54 -0400
commitc1dadcc1086bbbac1b5eeb4d5e157c76f2d8dfba (patch)
treeb9a49a01b861c0d999d233cec8bf3340e4e2b17b /drivers/md
parent8afb90da9f16abc5e577318544419bfcf3565391 (diff)
md/raid5: fix interaction of 'replace' and 'recovery'.
commit f94c0b6658c7edea8bc19d13be321e3860a3fa54 upstream. If a device in a RAID4/5/6 is being replaced while another is being recovered, then the writes to the replacement device currently don't happen, resulting in corruption when the replacement completes and the new drive takes over. This is because the replacement writes are only triggered when 's.replacing' is set and not when the similar 's.sync' is set (which is the case during resync and recovery - it means all devices need to be read). So schedule those writes when s.replacing is set as well. In this case we cannot use "STRIPE_INSYNC" to record that the replacement has happened as that is needed for recording that any parity calculation is complete. So introduce STRIPE_REPLACED to record if the replacement has happened. For safety we should also check that STRIPE_COMPUTE_RUN is not set. This has a similar effect to the "s.locked == 0" test. The latter ensure that now IO has been flagged but not started. The former checks if any parity calculation has been flagged by not started. We must wait for both of these to complete before triggering the 'replace'. Add a similar test to the subsequent check for "are we finished yet". This possibly isn't needed (is subsumed in the STRIPE_INSYNC test), but it makes it more obvious that the REPLACE will happen before we think we are finished. Finally if a NeedReplace device is not UPTODATE then that is an error. We really must trigger a warning. This bug was introduced in commit 9a3e1101b827a59ac9036a672f5fa8d5279d0fe2 (md/raid5: detect and handle replacements during recovery.) which introduced replacement for raid5. That was in 3.3-rc3, so any stable kernel since then would benefit from this fix. Reported-by: qindehua <13691222965@163.com> Tested-by: qindehua <qindehua@163.com> Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid5.c15
-rw-r--r--drivers/md/raid5.h1
2 files changed, 11 insertions, 5 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 05e4a105b9c7..a35b846af4f8 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3462,6 +3462,7 @@ static void handle_stripe(struct stripe_head *sh)
3462 test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { 3462 test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
3463 set_bit(STRIPE_SYNCING, &sh->state); 3463 set_bit(STRIPE_SYNCING, &sh->state);
3464 clear_bit(STRIPE_INSYNC, &sh->state); 3464 clear_bit(STRIPE_INSYNC, &sh->state);
3465 clear_bit(STRIPE_REPLACED, &sh->state);
3465 } 3466 }
3466 spin_unlock(&sh->stripe_lock); 3467 spin_unlock(&sh->stripe_lock);
3467 } 3468 }
@@ -3607,19 +3608,23 @@ static void handle_stripe(struct stripe_head *sh)
3607 handle_parity_checks5(conf, sh, &s, disks); 3608 handle_parity_checks5(conf, sh, &s, disks);
3608 } 3609 }
3609 3610
3610 if (s.replacing && s.locked == 0 3611 if ((s.replacing || s.syncing) && s.locked == 0
3611 && !test_bit(STRIPE_INSYNC, &sh->state)) { 3612 && !test_bit(STRIPE_COMPUTE_RUN, &sh->state)
3613 && !test_bit(STRIPE_REPLACED, &sh->state)) {
3612 /* Write out to replacement devices where possible */ 3614 /* Write out to replacement devices where possible */
3613 for (i = 0; i < conf->raid_disks; i++) 3615 for (i = 0; i < conf->raid_disks; i++)
3614 if (test_bit(R5_UPTODATE, &sh->dev[i].flags) && 3616 if (test_bit(R5_NeedReplace, &sh->dev[i].flags)) {
3615 test_bit(R5_NeedReplace, &sh->dev[i].flags)) { 3617 WARN_ON(!test_bit(R5_UPTODATE, &sh->dev[i].flags));
3616 set_bit(R5_WantReplace, &sh->dev[i].flags); 3618 set_bit(R5_WantReplace, &sh->dev[i].flags);
3617 set_bit(R5_LOCKED, &sh->dev[i].flags); 3619 set_bit(R5_LOCKED, &sh->dev[i].flags);
3618 s.locked++; 3620 s.locked++;
3619 } 3621 }
3620 set_bit(STRIPE_INSYNC, &sh->state); 3622 if (s.replacing)
3623 set_bit(STRIPE_INSYNC, &sh->state);
3624 set_bit(STRIPE_REPLACED, &sh->state);
3621 } 3625 }
3622 if ((s.syncing || s.replacing) && s.locked == 0 && 3626 if ((s.syncing || s.replacing) && s.locked == 0 &&
3627 !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
3623 test_bit(STRIPE_INSYNC, &sh->state)) { 3628 test_bit(STRIPE_INSYNC, &sh->state)) {
3624 md_done_sync(conf->mddev, STRIPE_SECTORS, 1); 3629 md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
3625 clear_bit(STRIPE_SYNCING, &sh->state); 3630 clear_bit(STRIPE_SYNCING, &sh->state);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index b0b663b119a8..70c49329ca9a 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -306,6 +306,7 @@ enum {
306 STRIPE_SYNC_REQUESTED, 306 STRIPE_SYNC_REQUESTED,
307 STRIPE_SYNCING, 307 STRIPE_SYNCING,
308 STRIPE_INSYNC, 308 STRIPE_INSYNC,
309 STRIPE_REPLACED,
309 STRIPE_PREREAD_ACTIVE, 310 STRIPE_PREREAD_ACTIVE,
310 STRIPE_DELAYED, 311 STRIPE_DELAYED,
311 STRIPE_DEGRADED, 312 STRIPE_DEGRADED,