diff options
author | Dan Williams <dan.j.williams@intel.com> | 2008-05-12 17:02:12 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-05-13 11:02:24 -0400 |
commit | c8894419acf5e56851de9741c5047bebd78acd1f (patch) | |
tree | 6294b87ebf31d577cd48880a201ff4b1a6a50ffc | |
parent | 3f275ea3086054205795972b8e87f2046fd3de98 (diff) |
md: fix raid5 'repair' operations
commit bd2ab67030e9116f1e4aae1289220255412b37fd "md: close a livelock window
in handle_parity_checks5" introduced a bug in handling 'repair' operations.
After a repair operation completes we clear the state bits tracking this
operation. However, they are cleared too early and this results in the code
deciding to re-run the parity check operation. Since we have done the repair
in memory the second check does not find a mismatch and thus does not do a
writeback.
Test results:
$ echo repair > /sys/block/md0/md/sync_action
$ cat /sys/block/md0/md/mismatch_cnt
51072
$ echo repair > /sys/block/md0/md/sync_action
$ cat /sys/block/md0/md/mismatch_cnt
0
(also fix incorrect indentation)
Cc: <stable@kernel.org>
Tested-by: George Spelvin <linux@horizon.com>
Acked-by: NeilBrown <neilb@suse.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | drivers/md/raid5.c | 25 |
1 files changed, 13 insertions, 12 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 087eee0cb809..ee0ea9183080 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -2369,8 +2369,8 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, | |||
2369 | 2369 | ||
2370 | /* complete a check operation */ | 2370 | /* complete a check operation */ |
2371 | if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) { | 2371 | if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) { |
2372 | clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); | 2372 | clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); |
2373 | clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); | 2373 | clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); |
2374 | if (s->failed == 0) { | 2374 | if (s->failed == 0) { |
2375 | if (sh->ops.zero_sum_result == 0) | 2375 | if (sh->ops.zero_sum_result == 0) |
2376 | /* parity is correct (on disc, | 2376 | /* parity is correct (on disc, |
@@ -2400,16 +2400,6 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, | |||
2400 | canceled_check = 1; /* STRIPE_INSYNC is not set */ | 2400 | canceled_check = 1; /* STRIPE_INSYNC is not set */ |
2401 | } | 2401 | } |
2402 | 2402 | ||
2403 | /* check if we can clear a parity disk reconstruct */ | ||
2404 | if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && | ||
2405 | test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { | ||
2406 | |||
2407 | clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending); | ||
2408 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); | ||
2409 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); | ||
2410 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); | ||
2411 | } | ||
2412 | |||
2413 | /* start a new check operation if there are no failures, the stripe is | 2403 | /* start a new check operation if there are no failures, the stripe is |
2414 | * not insync, and a repair is not in flight | 2404 | * not insync, and a repair is not in flight |
2415 | */ | 2405 | */ |
@@ -2424,6 +2414,17 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, | |||
2424 | } | 2414 | } |
2425 | } | 2415 | } |
2426 | 2416 | ||
2417 | /* check if we can clear a parity disk reconstruct */ | ||
2418 | if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && | ||
2419 | test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { | ||
2420 | |||
2421 | clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending); | ||
2422 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); | ||
2423 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); | ||
2424 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); | ||
2425 | } | ||
2426 | |||
2427 | |||
2427 | /* Wait for check parity and compute block operations to complete | 2428 | /* Wait for check parity and compute block operations to complete |
2428 | * before write-back. If a failure occurred while the check operation | 2429 | * before write-back. If a failure occurred while the check operation |
2429 | * was in flight we need to cycle this stripe through handle_stripe | 2430 | * was in flight we need to cycle this stripe through handle_stripe |