aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2008-05-12 17:02:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-05-13 11:02:24 -0400
commitc8894419acf5e56851de9741c5047bebd78acd1f (patch)
tree6294b87ebf31d577cd48880a201ff4b1a6a50ffc
parent3f275ea3086054205795972b8e87f2046fd3de98 (diff)
md: fix raid5 'repair' operations
commit bd2ab67030e9116f1e4aae1289220255412b37fd "md: close a livelock window in handle_parity_checks5" introduced a bug in handling 'repair' operations. After a repair operation completes we clear the state bits tracking this operation. However, they are cleared too early and this results in the code deciding to re-run the parity check operation. Since we have done the repair in memory the second check does not find a mismatch and thus does not do a writeback. Test results: $ echo repair > /sys/block/md0/md/sync_action $ cat /sys/block/md0/md/mismatch_cnt 51072 $ echo repair > /sys/block/md0/md/sync_action $ cat /sys/block/md0/md/mismatch_cnt 0 (also fix incorrect indentation) Cc: <stable@kernel.org> Tested-by: George Spelvin <linux@horizon.com> Acked-by: NeilBrown <neilb@suse.de> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/md/raid5.c25
1 files changed, 13 insertions, 12 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 087eee0cb809..ee0ea9183080 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2369,8 +2369,8 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2369 2369
2370 /* complete a check operation */ 2370 /* complete a check operation */
2371 if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) { 2371 if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
2372 clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); 2372 clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
2373 clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); 2373 clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
2374 if (s->failed == 0) { 2374 if (s->failed == 0) {
2375 if (sh->ops.zero_sum_result == 0) 2375 if (sh->ops.zero_sum_result == 0)
2376 /* parity is correct (on disc, 2376 /* parity is correct (on disc,
@@ -2400,16 +2400,6 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2400 canceled_check = 1; /* STRIPE_INSYNC is not set */ 2400 canceled_check = 1; /* STRIPE_INSYNC is not set */
2401 } 2401 }
2402 2402
2403 /* check if we can clear a parity disk reconstruct */
2404 if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
2405 test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
2406
2407 clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending);
2408 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
2409 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
2410 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
2411 }
2412
2413 /* start a new check operation if there are no failures, the stripe is 2403 /* start a new check operation if there are no failures, the stripe is
2414 * not insync, and a repair is not in flight 2404 * not insync, and a repair is not in flight
2415 */ 2405 */
@@ -2424,6 +2414,17 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2424 } 2414 }
2425 } 2415 }
2426 2416
2417 /* check if we can clear a parity disk reconstruct */
2418 if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
2419 test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
2420
2421 clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending);
2422 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
2423 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
2424 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
2425 }
2426
2427
2427 /* Wait for check parity and compute block operations to complete 2428 /* Wait for check parity and compute block operations to complete
2428 * before write-back. If a failure occurred while the check operation 2429 * before write-back. If a failure occurred while the check operation
2429 * was in flight we need to cycle this stripe through handle_stripe 2430 * was in flight we need to cycle this stripe through handle_stripe