aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2008-04-11 00:29:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-11 11:06:44 -0400
commitbd2ab67030e9116f1e4aae1289220255412b37fd (patch)
treeda52cf8952a660f17ce45c8c510ded9a3487180a /drivers/md/raid5.c
parent231bc2a222411f43bfb0fbb6d64c0f34c7b1039f (diff)
md: close a livelock window in handle_parity_checks5
If a failure is detected after a parity check operation has been initiated, but before it completes handle_parity_checks5 will never quiesce operations on the stripe. Explicitly handle this case by "canceling" the parity check, i.e. clear the STRIPE_OP_CHECK flags and queue the stripe on the handle list again to refresh any non-uptodate blocks. Kernel versions >= 2.6.23 are susceptible. Cc: <stable@kernel.org> Cc: NeilBrown <neilb@suse.de> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c51
1 files changed, 29 insertions, 22 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index c574cf5efb5c..b162b839a662 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2348,25 +2348,15 @@ static void handle_issuing_new_write_requests6(raid5_conf_t *conf,
2348static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, 2348static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2349 struct stripe_head_state *s, int disks) 2349 struct stripe_head_state *s, int disks)
2350{ 2350{
2351 int canceled_check = 0;
2352
2351 set_bit(STRIPE_HANDLE, &sh->state); 2353 set_bit(STRIPE_HANDLE, &sh->state);
2352 /* Take one of the following actions:
2353 * 1/ start a check parity operation if (uptodate == disks)
2354 * 2/ finish a check parity operation and act on the result
2355 * 3/ skip to the writeback section if we previously
2356 * initiated a recovery operation
2357 */
2358 if (s->failed == 0 &&
2359 !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
2360 if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
2361 BUG_ON(s->uptodate != disks);
2362 clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
2363 sh->ops.count++;
2364 s->uptodate--;
2365 } else if (
2366 test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
2367 clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
2368 clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
2369 2354
2355 /* complete a check operation */
2356 if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
2357 clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
2358 clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
2359 if (s->failed == 0) {
2370 if (sh->ops.zero_sum_result == 0) 2360 if (sh->ops.zero_sum_result == 0)
2371 /* parity is correct (on disc, 2361 /* parity is correct (on disc,
2372 * not in buffer any more) 2362 * not in buffer any more)
@@ -2391,7 +2381,8 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2391 s->uptodate++; 2381 s->uptodate++;
2392 } 2382 }
2393 } 2383 }
2394 } 2384 } else
2385 canceled_check = 1; /* STRIPE_INSYNC is not set */
2395 } 2386 }
2396 2387
2397 /* check if we can clear a parity disk reconstruct */ 2388 /* check if we can clear a parity disk reconstruct */
@@ -2404,12 +2395,28 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2404 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); 2395 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
2405 } 2396 }
2406 2397
2398 /* start a new check operation if there are no failures, the stripe is
2399 * not insync, and a repair is not in flight
2400 */
2401 if (s->failed == 0 &&
2402 !test_bit(STRIPE_INSYNC, &sh->state) &&
2403 !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
2404 if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
2405 BUG_ON(s->uptodate != disks);
2406 clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
2407 sh->ops.count++;
2408 s->uptodate--;
2409 }
2410 }
2411
2407 /* Wait for check parity and compute block operations to complete 2412 /* Wait for check parity and compute block operations to complete
2408 * before write-back 2413 * before write-back. If a failure occurred while the check operation
2414 * was in flight we need to cycle this stripe through handle_stripe
2415 * since the parity block may not be uptodate
2409 */ 2416 */
2410 if (!test_bit(STRIPE_INSYNC, &sh->state) && 2417 if (!canceled_check && !test_bit(STRIPE_INSYNC, &sh->state) &&
2411 !test_bit(STRIPE_OP_CHECK, &sh->ops.pending) && 2418 !test_bit(STRIPE_OP_CHECK, &sh->ops.pending) &&
2412 !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) { 2419 !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) {
2413 struct r5dev *dev; 2420 struct r5dev *dev;
2414 /* either failed parity check, or recovery is happening */ 2421 /* either failed parity check, or recovery is happening */
2415 if (s->failed == 0) 2422 if (s->failed == 0)