aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2008-06-06 01:45:52 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-06-06 14:29:08 -0400
commite0a115e5aa554b93150a8dc1c3fe15467708abb2 (patch)
treec8d869cba362f3728c528d696e3985f1c30b0a7b
parentb2c8daddcbe03a22402ecf943bb88302601c6835 (diff)
md: fix prexor vs sync_request race
During the initial array synchronization process there is a window between when a prexor operation is scheduled to a specific stripe and when it completes for a sync_request to be scheduled to the same stripe. When this happens the prexor completes and the stripe is unconditionally marked "insync", effectively canceling the sync_request for the stripe. Prior to 2.6.23 this was not a problem because the prexor operation was done under sh->lock. The effect in older kernels being that the prexor would still erroneously mark the stripe "insync", but sync_request would be held off and re-mark the stripe as "!in_sync". Change the write completion logic to not mark the stripe "in_sync" if a prexor was performed. The effect of the change is to sometimes not set STRIPE_INSYNC. The worst this can do is cause the resync to stall waiting for STRIPE_INSYNC to be set. If this were happening, then STRIPE_SYNCING would be set and handle_issuing_new_read_requests would cause all available blocks to eventually be read, at which point prexor would never be used on that stripe any more and STRIPE_INSYNC would eventually be set. echo repair > /sys/block/mdN/md/sync_action will correct arrays that may have lost this race. Cc: <stable@kernel.org> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/md/raid5.c5
1 files changed, 5 insertions, 0 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 425958a76b84..f0f0585c107e 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2645,6 +2645,7 @@ static void handle_stripe5(struct stripe_head *sh)
2645 struct r5dev *dev; 2645 struct r5dev *dev;
2646 unsigned long pending = 0; 2646 unsigned long pending = 0;
2647 mdk_rdev_t *blocked_rdev = NULL; 2647 mdk_rdev_t *blocked_rdev = NULL;
2648 int prexor;
2648 2649
2649 memset(&s, 0, sizeof(s)); 2650 memset(&s, 0, sizeof(s));
2650 pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " 2651 pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d "
@@ -2774,9 +2775,11 @@ static void handle_stripe5(struct stripe_head *sh)
2774 /* leave prexor set until postxor is done, allows us to distinguish 2775 /* leave prexor set until postxor is done, allows us to distinguish
2775 * a rmw from a rcw during biodrain 2776 * a rmw from a rcw during biodrain
2776 */ 2777 */
2778 prexor = 0;
2777 if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && 2779 if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
2778 test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { 2780 test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
2779 2781
2782 prexor = 1;
2780 clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete); 2783 clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
2781 clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack); 2784 clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
2782 clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending); 2785 clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
@@ -2810,6 +2813,8 @@ static void handle_stripe5(struct stripe_head *sh)
2810 if (!test_and_set_bit( 2813 if (!test_and_set_bit(
2811 STRIPE_OP_IO, &sh->ops.pending)) 2814 STRIPE_OP_IO, &sh->ops.pending))
2812 sh->ops.count++; 2815 sh->ops.count++;
2816 if (prexor)
2817 continue;
2813 if (!test_bit(R5_Insync, &dev->flags) || 2818 if (!test_bit(R5_Insync, &dev->flags) ||
2814 (i == sh->pd_idx && s.failed == 0)) 2819 (i == sh->pd_idx && s.failed == 0))
2815 set_bit(STRIPE_INSYNC, &sh->state); 2820 set_bit(STRIPE_INSYNC, &sh->state);