diff options
author | Dan Williams <dan.j.williams@intel.com> | 2007-11-14 19:59:35 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-11-14 21:45:39 -0500 |
commit | 6c55be8b962f1bdc592d579e81fc27b11ea53dfc (patch) | |
tree | 3413cc2c8e26afdbc498cbdbd96bc2fa9733b2a6 /drivers | |
parent | 5b23dbe8173c212d6a326e35347b038705603d39 (diff) |
raid5: fix unending write sequence
<debug output from Joel's system>
handling stripe 7629696, state=0x14 cnt=1, pd_idx=2 ops=0:0:0
check 5: state 0x6 toread 0000000000000000 read 0000000000000000 write fffff800ffcffcc0 written 0000000000000000
check 4: state 0x6 toread 0000000000000000 read 0000000000000000 write fffff800fdd4e360 written 0000000000000000
check 3: state 0x1 toread 0000000000000000 read 0000000000000000 write 0000000000000000 written 0000000000000000
check 2: state 0x1 toread 0000000000000000 read 0000000000000000 write 0000000000000000 written 0000000000000000
check 1: state 0x6 toread 0000000000000000 read 0000000000000000 write fffff800ff517e40 written 0000000000000000
check 0: state 0x6 toread 0000000000000000 read 0000000000000000 write fffff800fd4cae60 written 0000000000000000
locked=4 uptodate=2 to_read=0 to_write=4 failed=0 failed_num=0
for sector 7629696, rmw=0 rcw=0
</debug>
These blocks were prepared to be written out, but were never handled in
ops_run_biodrain(), so they remain locked forever. The operations flags
are all clear which means handle_stripe() thinks nothing else needs to be
done.
This state suggests that the STRIPE_OP_PREXOR bit was sampled 'set' when it
should not have been. This patch cleans up cases where the code looks at
sh->ops.pending when it should be looking at the consistent stack-based
snapshot of the operations flags.
Report from Joel:
Resync done. Patch fix this bug.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Joel Bertrand <joel.bertrand@systella.fr>
Cc: <stable@kernel.org>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/raid5.c | 16 |
1 files changed, 9 insertions, 7 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 1cfc984cc7b7..a5aad8cad843 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -688,7 +688,8 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
688 | } | 688 | } |
689 | 689 | ||
690 | static struct dma_async_tx_descriptor * | 690 | static struct dma_async_tx_descriptor * |
691 | ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | 691 | ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx, |
692 | unsigned long pending) | ||
692 | { | 693 | { |
693 | int disks = sh->disks; | 694 | int disks = sh->disks; |
694 | int pd_idx = sh->pd_idx, i; | 695 | int pd_idx = sh->pd_idx, i; |
@@ -696,7 +697,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
696 | /* check if prexor is active which means only process blocks | 697 | /* check if prexor is active which means only process blocks |
697 | * that are part of a read-modify-write (Wantprexor) | 698 | * that are part of a read-modify-write (Wantprexor) |
698 | */ | 699 | */ |
699 | int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending); | 700 | int prexor = test_bit(STRIPE_OP_PREXOR, &pending); |
700 | 701 | ||
701 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | 702 | pr_debug("%s: stripe %llu\n", __FUNCTION__, |
702 | (unsigned long long)sh->sector); | 703 | (unsigned long long)sh->sector); |
@@ -773,7 +774,8 @@ static void ops_complete_write(void *stripe_head_ref) | |||
773 | } | 774 | } |
774 | 775 | ||
775 | static void | 776 | static void |
776 | ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | 777 | ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx, |
778 | unsigned long pending) | ||
777 | { | 779 | { |
778 | /* kernel stack size limits the total number of disks */ | 780 | /* kernel stack size limits the total number of disks */ |
779 | int disks = sh->disks; | 781 | int disks = sh->disks; |
@@ -781,7 +783,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
781 | 783 | ||
782 | int count = 0, pd_idx = sh->pd_idx, i; | 784 | int count = 0, pd_idx = sh->pd_idx, i; |
783 | struct page *xor_dest; | 785 | struct page *xor_dest; |
784 | int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending); | 786 | int prexor = test_bit(STRIPE_OP_PREXOR, &pending); |
785 | unsigned long flags; | 787 | unsigned long flags; |
786 | dma_async_tx_callback callback; | 788 | dma_async_tx_callback callback; |
787 | 789 | ||
@@ -808,7 +810,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
808 | } | 810 | } |
809 | 811 | ||
810 | /* check whether this postxor is part of a write */ | 812 | /* check whether this postxor is part of a write */ |
811 | callback = test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending) ? | 813 | callback = test_bit(STRIPE_OP_BIODRAIN, &pending) ? |
812 | ops_complete_write : ops_complete_postxor; | 814 | ops_complete_write : ops_complete_postxor; |
813 | 815 | ||
814 | /* 1/ if we prexor'd then the dest is reused as a source | 816 | /* 1/ if we prexor'd then the dest is reused as a source |
@@ -896,12 +898,12 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long pending) | |||
896 | tx = ops_run_prexor(sh, tx); | 898 | tx = ops_run_prexor(sh, tx); |
897 | 899 | ||
898 | if (test_bit(STRIPE_OP_BIODRAIN, &pending)) { | 900 | if (test_bit(STRIPE_OP_BIODRAIN, &pending)) { |
899 | tx = ops_run_biodrain(sh, tx); | 901 | tx = ops_run_biodrain(sh, tx, pending); |
900 | overlap_clear++; | 902 | overlap_clear++; |
901 | } | 903 | } |
902 | 904 | ||
903 | if (test_bit(STRIPE_OP_POSTXOR, &pending)) | 905 | if (test_bit(STRIPE_OP_POSTXOR, &pending)) |
904 | ops_run_postxor(sh, tx); | 906 | ops_run_postxor(sh, tx, pending); |
905 | 907 | ||
906 | if (test_bit(STRIPE_OP_CHECK, &pending)) | 908 | if (test_bit(STRIPE_OP_CHECK, &pending)) |
907 | ops_run_check(sh); | 909 | ops_run_check(sh); |