diff options
author | Dan Williams <dan.j.williams@intel.com> | 2008-06-06 01:45:52 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-06-06 14:29:08 -0400 |
commit | e0a115e5aa554b93150a8dc1c3fe15467708abb2 (patch) | |
tree | c8d869cba362f3728c528d696e3985f1c30b0a7b | |
parent | b2c8daddcbe03a22402ecf943bb88302601c6835 (diff) |
md: fix prexor vs sync_request race
During the initial array synchronization process there is a window between
when a prexor operation is scheduled to a specific stripe and when it
completes for a sync_request to be scheduled to the same stripe. When
this happens the prexor completes and the stripe is unconditionally marked
"insync", effectively canceling the sync_request for the stripe. Prior to
2.6.23 this was not a problem because the prexor operation was done under
sh->lock. The effect in older kernels being that the prexor would still
erroneously mark the stripe "insync", but sync_request would be held off
and re-mark the stripe as "!in_sync".
Change the write completion logic to not mark the stripe "in_sync" if a
prexor was performed. The effect of the change is to sometimes not set
STRIPE_INSYNC. The worst this can do is cause the resync to stall waiting
for STRIPE_INSYNC to be set. If this were happening, then STRIPE_SYNCING
would be set and handle_issuing_new_read_requests would cause all
available blocks to eventually be read, at which point prexor would never
be used on that stripe any more and STRIPE_INSYNC would eventually be set.
echo repair > /sys/block/mdN/md/sync_action will correct arrays that may
have lost this race.
Cc: <stable@kernel.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | drivers/md/raid5.c | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 425958a76b84..f0f0585c107e 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -2645,6 +2645,7 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2645 | struct r5dev *dev; | 2645 | struct r5dev *dev; |
2646 | unsigned long pending = 0; | 2646 | unsigned long pending = 0; |
2647 | mdk_rdev_t *blocked_rdev = NULL; | 2647 | mdk_rdev_t *blocked_rdev = NULL; |
2648 | int prexor; | ||
2648 | 2649 | ||
2649 | memset(&s, 0, sizeof(s)); | 2650 | memset(&s, 0, sizeof(s)); |
2650 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " | 2651 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " |
@@ -2774,9 +2775,11 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2774 | /* leave prexor set until postxor is done, allows us to distinguish | 2775 | /* leave prexor set until postxor is done, allows us to distinguish |
2775 | * a rmw from a rcw during biodrain | 2776 | * a rmw from a rcw during biodrain |
2776 | */ | 2777 | */ |
2778 | prexor = 0; | ||
2777 | if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && | 2779 | if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && |
2778 | test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { | 2780 | test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { |
2779 | 2781 | ||
2782 | prexor = 1; | ||
2780 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete); | 2783 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete); |
2781 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack); | 2784 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack); |
2782 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending); | 2785 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending); |
@@ -2810,6 +2813,8 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2810 | if (!test_and_set_bit( | 2813 | if (!test_and_set_bit( |
2811 | STRIPE_OP_IO, &sh->ops.pending)) | 2814 | STRIPE_OP_IO, &sh->ops.pending)) |
2812 | sh->ops.count++; | 2815 | sh->ops.count++; |
2816 | if (prexor) | ||
2817 | continue; | ||
2813 | if (!test_bit(R5_Insync, &dev->flags) || | 2818 | if (!test_bit(R5_Insync, &dev->flags) || |
2814 | (i == sh->pd_idx && s.failed == 0)) | 2819 | (i == sh->pd_idx && s.failed == 0)) |
2815 | set_bit(STRIPE_INSYNC, &sh->state); | 2820 | set_bit(STRIPE_INSYNC, &sh->state); |