aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/raid5.c204
-rw-r--r--include/linux/raid/raid5.h9
2 files changed, 63 insertions, 150 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 835046bf384e..b9159367491a 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -122,6 +122,13 @@ static void return_io(struct bio *return_bi)
122 122
123static void print_raid5_conf (raid5_conf_t *conf); 123static void print_raid5_conf (raid5_conf_t *conf);
124 124
125static int stripe_operations_active(struct stripe_head *sh)
126{
127 return sh->check_state || sh->reconstruct_state ||
128 test_bit(STRIPE_BIOFILL_RUN, &sh->state) ||
129 test_bit(STRIPE_COMPUTE_RUN, &sh->state);
130}
131
125static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) 132static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
126{ 133{
127 if (atomic_dec_and_test(&sh->count)) { 134 if (atomic_dec_and_test(&sh->count)) {
@@ -141,7 +148,7 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
141 } 148 }
142 md_wakeup_thread(conf->mddev->thread); 149 md_wakeup_thread(conf->mddev->thread);
143 } else { 150 } else {
144 BUG_ON(sh->ops.pending); 151 BUG_ON(stripe_operations_active(sh));
145 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { 152 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
146 atomic_dec(&conf->preread_active_stripes); 153 atomic_dec(&conf->preread_active_stripes);
147 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) 154 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
@@ -243,7 +250,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int
243 250
244 BUG_ON(atomic_read(&sh->count) != 0); 251 BUG_ON(atomic_read(&sh->count) != 0);
245 BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); 252 BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
246 BUG_ON(sh->ops.pending || sh->ops.ack || sh->ops.complete); 253 BUG_ON(stripe_operations_active(sh));
247 254
248 CHECK_DEVLOCK(); 255 CHECK_DEVLOCK();
249 pr_debug("init_stripe called, stripe %llu\n", 256 pr_debug("init_stripe called, stripe %llu\n",
@@ -344,47 +351,6 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
344 return sh; 351 return sh;
345} 352}
346 353
347/* test_and_ack_op() ensures that we only dequeue an operation once */
348#define test_and_ack_op(op, pend) \
349do { \
350 if (test_bit(op, &sh->ops.pending) && \
351 !test_bit(op, &sh->ops.complete)) { \
352 if (test_and_set_bit(op, &sh->ops.ack)) \
353 clear_bit(op, &pend); \
354 else \
355 ack++; \
356 } else \
357 clear_bit(op, &pend); \
358} while (0)
359
360/* find new work to run, do not resubmit work that is already
361 * in flight
362 */
363static unsigned long get_stripe_work(struct stripe_head *sh)
364{
365 unsigned long pending;
366 int ack = 0;
367
368 pending = sh->ops.pending;
369
370 test_and_ack_op(STRIPE_OP_BIOFILL, pending);
371 test_and_ack_op(STRIPE_OP_COMPUTE_BLK, pending);
372 test_and_ack_op(STRIPE_OP_PREXOR, pending);
373 test_and_ack_op(STRIPE_OP_BIODRAIN, pending);
374 test_and_ack_op(STRIPE_OP_POSTXOR, pending);
375 test_and_ack_op(STRIPE_OP_CHECK, pending);
376
377 sh->ops.count -= ack;
378 if (unlikely(sh->ops.count < 0)) {
379 printk(KERN_ERR "pending: %#lx ops.pending: %#lx ops.ack: %#lx "
380 "ops.complete: %#lx\n", pending, sh->ops.pending,
381 sh->ops.ack, sh->ops.complete);
382 BUG();
383 }
384
385 return pending;
386}
387
388static void 354static void
389raid5_end_read_request(struct bio *bi, int error); 355raid5_end_read_request(struct bio *bi, int error);
390static void 356static void
@@ -609,7 +575,7 @@ static void ops_complete_compute5(void *stripe_head_ref)
609} 575}
610 576
611static struct dma_async_tx_descriptor * 577static struct dma_async_tx_descriptor *
612ops_run_compute5(struct stripe_head *sh, unsigned long pending) 578ops_run_compute5(struct stripe_head *sh, unsigned long ops_request)
613{ 579{
614 /* kernel stack size limits the total number of disks */ 580 /* kernel stack size limits the total number of disks */
615 int disks = sh->disks; 581 int disks = sh->disks;
@@ -640,7 +606,7 @@ ops_run_compute5(struct stripe_head *sh, unsigned long pending)
640 ops_complete_compute5, sh); 606 ops_complete_compute5, sh);
641 607
642 /* ack now if postxor is not set to be run */ 608 /* ack now if postxor is not set to be run */
643 if (tx && !test_bit(STRIPE_OP_POSTXOR, &pending)) 609 if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request))
644 async_tx_ack(tx); 610 async_tx_ack(tx);
645 611
646 return tx; 612 return tx;
@@ -652,8 +618,6 @@ static void ops_complete_prexor(void *stripe_head_ref)
652 618
653 pr_debug("%s: stripe %llu\n", __func__, 619 pr_debug("%s: stripe %llu\n", __func__,
654 (unsigned long long)sh->sector); 620 (unsigned long long)sh->sector);
655
656 set_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
657} 621}
658 622
659static struct dma_async_tx_descriptor * 623static struct dma_async_tx_descriptor *
@@ -686,7 +650,7 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
686 650
687static struct dma_async_tx_descriptor * 651static struct dma_async_tx_descriptor *
688ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx, 652ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
689 unsigned long pending) 653 unsigned long ops_request)
690{ 654{
691 int disks = sh->disks; 655 int disks = sh->disks;
692 int pd_idx = sh->pd_idx, i; 656 int pd_idx = sh->pd_idx, i;
@@ -694,7 +658,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
694 /* check if prexor is active which means only process blocks 658 /* check if prexor is active which means only process blocks
695 * that are part of a read-modify-write (Wantprexor) 659 * that are part of a read-modify-write (Wantprexor)
696 */ 660 */
697 int prexor = test_bit(STRIPE_OP_PREXOR, &pending); 661 int prexor = test_bit(STRIPE_OP_PREXOR, &ops_request);
698 662
699 pr_debug("%s: stripe %llu\n", __func__, 663 pr_debug("%s: stripe %llu\n", __func__,
700 (unsigned long long)sh->sector); 664 (unsigned long long)sh->sector);
@@ -744,7 +708,7 @@ static void ops_complete_postxor(void *stripe_head_ref)
744 pr_debug("%s: stripe %llu\n", __func__, 708 pr_debug("%s: stripe %llu\n", __func__,
745 (unsigned long long)sh->sector); 709 (unsigned long long)sh->sector);
746 710
747 set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); 711 sh->reconstruct_state = reconstruct_state_result;
748 set_bit(STRIPE_HANDLE, &sh->state); 712 set_bit(STRIPE_HANDLE, &sh->state);
749 release_stripe(sh); 713 release_stripe(sh);
750} 714}
@@ -763,16 +727,14 @@ static void ops_complete_write(void *stripe_head_ref)
763 set_bit(R5_UPTODATE, &dev->flags); 727 set_bit(R5_UPTODATE, &dev->flags);
764 } 728 }
765 729
766 set_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete); 730 sh->reconstruct_state = reconstruct_state_drain_result;
767 set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
768
769 set_bit(STRIPE_HANDLE, &sh->state); 731 set_bit(STRIPE_HANDLE, &sh->state);
770 release_stripe(sh); 732 release_stripe(sh);
771} 733}
772 734
773static void 735static void
774ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx, 736ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
775 unsigned long pending) 737 unsigned long ops_request)
776{ 738{
777 /* kernel stack size limits the total number of disks */ 739 /* kernel stack size limits the total number of disks */
778 int disks = sh->disks; 740 int disks = sh->disks;
@@ -780,7 +742,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
780 742
781 int count = 0, pd_idx = sh->pd_idx, i; 743 int count = 0, pd_idx = sh->pd_idx, i;
782 struct page *xor_dest; 744 struct page *xor_dest;
783 int prexor = test_bit(STRIPE_OP_PREXOR, &pending); 745 int prexor = test_bit(STRIPE_OP_PREXOR, &ops_request);
784 unsigned long flags; 746 unsigned long flags;
785 dma_async_tx_callback callback; 747 dma_async_tx_callback callback;
786 748
@@ -807,7 +769,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
807 } 769 }
808 770
809 /* check whether this postxor is part of a write */ 771 /* check whether this postxor is part of a write */
810 callback = test_bit(STRIPE_OP_BIODRAIN, &pending) ? 772 callback = test_bit(STRIPE_OP_BIODRAIN, &ops_request) ?
811 ops_complete_write : ops_complete_postxor; 773 ops_complete_write : ops_complete_postxor;
812 774
813 /* 1/ if we prexor'd then the dest is reused as a source 775 /* 1/ if we prexor'd then the dest is reused as a source
@@ -868,8 +830,7 @@ static void ops_run_check(struct stripe_head *sh)
868 ops_complete_check, sh); 830 ops_complete_check, sh);
869} 831}
870 832
871static void raid5_run_ops(struct stripe_head *sh, unsigned long pending, 833static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
872 unsigned long ops_request)
873{ 834{
874 int overlap_clear = 0, i, disks = sh->disks; 835 int overlap_clear = 0, i, disks = sh->disks;
875 struct dma_async_tx_descriptor *tx = NULL; 836 struct dma_async_tx_descriptor *tx = NULL;
@@ -880,18 +841,18 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long pending,
880 } 841 }
881 842
882 if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) 843 if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request))
883 tx = ops_run_compute5(sh, pending); 844 tx = ops_run_compute5(sh, ops_request);
884 845
885 if (test_bit(STRIPE_OP_PREXOR, &pending)) 846 if (test_bit(STRIPE_OP_PREXOR, &ops_request))
886 tx = ops_run_prexor(sh, tx); 847 tx = ops_run_prexor(sh, tx);
887 848
888 if (test_bit(STRIPE_OP_BIODRAIN, &pending)) { 849 if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
889 tx = ops_run_biodrain(sh, tx, pending); 850 tx = ops_run_biodrain(sh, tx, ops_request);
890 overlap_clear++; 851 overlap_clear++;
891 } 852 }
892 853
893 if (test_bit(STRIPE_OP_POSTXOR, &pending)) 854 if (test_bit(STRIPE_OP_POSTXOR, &ops_request))
894 ops_run_postxor(sh, tx, pending); 855 ops_run_postxor(sh, tx, ops_request);
895 856
896 if (test_bit(STRIPE_OP_CHECK, &ops_request)) 857 if (test_bit(STRIPE_OP_CHECK, &ops_request))
897 ops_run_check(sh); 858 ops_run_check(sh);
@@ -1684,11 +1645,11 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
1684 } 1645 }
1685} 1646}
1686 1647
1687static int 1648static void
1688handle_write_operations5(struct stripe_head *sh, int rcw, int expand) 1649handle_write_operations5(struct stripe_head *sh, struct stripe_head_state *s,
1650 int rcw, int expand)
1689{ 1651{
1690 int i, pd_idx = sh->pd_idx, disks = sh->disks; 1652 int i, pd_idx = sh->pd_idx, disks = sh->disks;
1691 int locked = 0;
1692 1653
1693 if (rcw) { 1654 if (rcw) {
1694 /* if we are not expanding this is a proper write request, and 1655 /* if we are not expanding this is a proper write request, and
@@ -1696,12 +1657,12 @@ handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
1696 * stripe cache 1657 * stripe cache
1697 */ 1658 */
1698 if (!expand) { 1659 if (!expand) {
1699 set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending); 1660 sh->reconstruct_state = reconstruct_state_drain_run;
1700 sh->ops.count++; 1661 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
1701 } 1662 } else
1663 sh->reconstruct_state = reconstruct_state_run;
1702 1664
1703 set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); 1665 set_bit(STRIPE_OP_POSTXOR, &s->ops_request);
1704 sh->ops.count++;
1705 1666
1706 for (i = disks; i--; ) { 1667 for (i = disks; i--; ) {
1707 struct r5dev *dev = &sh->dev[i]; 1668 struct r5dev *dev = &sh->dev[i];
@@ -1710,21 +1671,20 @@ handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
1710 set_bit(R5_LOCKED, &dev->flags); 1671 set_bit(R5_LOCKED, &dev->flags);
1711 if (!expand) 1672 if (!expand)
1712 clear_bit(R5_UPTODATE, &dev->flags); 1673 clear_bit(R5_UPTODATE, &dev->flags);
1713 locked++; 1674 s->locked++;
1714 } 1675 }
1715 } 1676 }
1716 if (locked + 1 == disks) 1677 if (s->locked + 1 == disks)
1717 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) 1678 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
1718 atomic_inc(&sh->raid_conf->pending_full_writes); 1679 atomic_inc(&sh->raid_conf->pending_full_writes);
1719 } else { 1680 } else {
1720 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || 1681 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
1721 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); 1682 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
1722 1683
1723 set_bit(STRIPE_OP_PREXOR, &sh->ops.pending); 1684 sh->reconstruct_state = reconstruct_state_drain_run;
1724 set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending); 1685 set_bit(STRIPE_OP_PREXOR, &s->ops_request);
1725 set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); 1686 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
1726 1687 set_bit(STRIPE_OP_POSTXOR, &s->ops_request);
1727 sh->ops.count += 3;
1728 1688
1729 for (i = disks; i--; ) { 1689 for (i = disks; i--; ) {
1730 struct r5dev *dev = &sh->dev[i]; 1690 struct r5dev *dev = &sh->dev[i];
@@ -1742,7 +1702,7 @@ handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
1742 set_bit(R5_Wantprexor, &dev->flags); 1702 set_bit(R5_Wantprexor, &dev->flags);
1743 set_bit(R5_LOCKED, &dev->flags); 1703 set_bit(R5_LOCKED, &dev->flags);
1744 clear_bit(R5_UPTODATE, &dev->flags); 1704 clear_bit(R5_UPTODATE, &dev->flags);
1745 locked++; 1705 s->locked++;
1746 } 1706 }
1747 } 1707 }
1748 } 1708 }
@@ -1752,13 +1712,11 @@ handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
1752 */ 1712 */
1753 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); 1713 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
1754 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); 1714 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
1755 locked++; 1715 s->locked++;
1756 1716
1757 pr_debug("%s: stripe %llu locked: %d pending: %lx\n", 1717 pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
1758 __func__, (unsigned long long)sh->sector, 1718 __func__, (unsigned long long)sh->sector,
1759 locked, sh->ops.pending); 1719 s->locked, s->ops_request);
1760
1761 return locked;
1762} 1720}
1763 1721
1764/* 1722/*
@@ -2005,8 +1963,7 @@ static void handle_issuing_new_read_requests5(struct stripe_head *sh,
2005 * midst of changing due to a write 1963 * midst of changing due to a write
2006 */ 1964 */
2007 if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state && 1965 if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
2008 !test_bit(STRIPE_OP_PREXOR, &sh->ops.pending) && 1966 !sh->reconstruct_state) {
2009 !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
2010 for (i = disks; i--; ) 1967 for (i = disks; i--; )
2011 if (__handle_issuing_new_read_requests5( 1968 if (__handle_issuing_new_read_requests5(
2012 sh, s, i, disks) == 0) 1969 sh, s, i, disks) == 0)
@@ -2211,7 +2168,7 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
2211 if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && 2168 if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
2212 (s->locked == 0 && (rcw == 0 || rmw == 0) && 2169 (s->locked == 0 && (rcw == 0 || rmw == 0) &&
2213 !test_bit(STRIPE_BIT_DELAY, &sh->state))) 2170 !test_bit(STRIPE_BIT_DELAY, &sh->state)))
2214 s->locked += handle_write_operations5(sh, rcw == 0, 0); 2171 handle_write_operations5(sh, s, rcw == 0, 0);
2215} 2172}
2216 2173
2217static void handle_issuing_new_write_requests6(raid5_conf_t *conf, 2174static void handle_issuing_new_write_requests6(raid5_conf_t *conf,
@@ -2581,15 +2538,14 @@ static void handle_stripe5(struct stripe_head *sh)
2581 struct bio *return_bi = NULL; 2538 struct bio *return_bi = NULL;
2582 struct stripe_head_state s; 2539 struct stripe_head_state s;
2583 struct r5dev *dev; 2540 struct r5dev *dev;
2584 unsigned long pending = 0;
2585 mdk_rdev_t *blocked_rdev = NULL; 2541 mdk_rdev_t *blocked_rdev = NULL;
2586 int prexor; 2542 int prexor;
2587 2543
2588 memset(&s, 0, sizeof(s)); 2544 memset(&s, 0, sizeof(s));
2589 pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " 2545 pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d check:%d "
2590 "ops=%lx:%lx:%lx\n", (unsigned long long)sh->sector, sh->state, 2546 "reconstruct:%d\n", (unsigned long long)sh->sector, sh->state,
2591 atomic_read(&sh->count), sh->pd_idx, 2547 atomic_read(&sh->count), sh->pd_idx, sh->check_state,
2592 sh->ops.pending, sh->ops.ack, sh->ops.complete); 2548 sh->reconstruct_state);
2593 2549
2594 spin_lock(&sh->lock); 2550 spin_lock(&sh->lock);
2595 clear_bit(STRIPE_HANDLE, &sh->state); 2551 clear_bit(STRIPE_HANDLE, &sh->state);
@@ -2703,34 +2659,12 @@ static void handle_stripe5(struct stripe_head *sh)
2703 /* Now we check to see if any write operations have recently 2659 /* Now we check to see if any write operations have recently
2704 * completed 2660 * completed
2705 */ 2661 */
2706
2707 /* leave prexor set until postxor is done, allows us to distinguish
2708 * a rmw from a rcw during biodrain
2709 */
2710 prexor = 0; 2662 prexor = 0;
2711 if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && 2663 if (sh->reconstruct_state == reconstruct_state_drain_result) {
2712 test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { 2664 sh->reconstruct_state = reconstruct_state_idle;
2713
2714 prexor = 1;
2715 clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
2716 clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
2717 clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
2718
2719 for (i = disks; i--; ) 2665 for (i = disks; i--; )
2720 clear_bit(R5_Wantprexor, &sh->dev[i].flags); 2666 prexor += test_and_clear_bit(R5_Wantprexor,
2721 } 2667 &sh->dev[i].flags);
2722
2723 /* if only POSTXOR is set then this is an 'expand' postxor */
2724 if (test_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete) &&
2725 test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
2726
2727 clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete);
2728 clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.ack);
2729 clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
2730
2731 clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
2732 clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
2733 clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
2734 2668
2735 /* All the 'written' buffers and the parity block are ready to 2669 /* All the 'written' buffers and the parity block are ready to
2736 * be written back to disk 2670 * be written back to disk
@@ -2763,8 +2697,7 @@ static void handle_stripe5(struct stripe_head *sh)
2763 * 2/ A 'check' operation is in flight, as it may clobber the parity 2697 * 2/ A 'check' operation is in flight, as it may clobber the parity
2764 * block. 2698 * block.
2765 */ 2699 */
2766 if (s.to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) && 2700 if (s.to_write && !sh->reconstruct_state && !sh->check_state)
2767 !sh->check_state)
2768 handle_issuing_new_write_requests5(conf, sh, &s, disks); 2701 handle_issuing_new_write_requests5(conf, sh, &s, disks);
2769 2702
2770 /* maybe we need to check and possibly fix the parity for this stripe 2703 /* maybe we need to check and possibly fix the parity for this stripe
@@ -2805,18 +2738,10 @@ static void handle_stripe5(struct stripe_head *sh)
2805 } 2738 }
2806 } 2739 }
2807 2740
2808 /* Finish postxor operations initiated by the expansion 2741 /* Finish reconstruct operations initiated by the expansion process */
2809 * process 2742 if (sh->reconstruct_state == reconstruct_state_result) {
2810 */ 2743 sh->reconstruct_state = reconstruct_state_idle;
2811 if (test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete) &&
2812 !test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending)) {
2813
2814 clear_bit(STRIPE_EXPANDING, &sh->state); 2744 clear_bit(STRIPE_EXPANDING, &sh->state);
2815
2816 clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
2817 clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
2818 clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
2819
2820 for (i = conf->raid_disks; i--; ) 2745 for (i = conf->raid_disks; i--; )
2821 set_bit(R5_Wantwrite, &sh->dev[i].flags); 2746 set_bit(R5_Wantwrite, &sh->dev[i].flags);
2822 set_bit(R5_LOCKED, &dev->flags); 2747 set_bit(R5_LOCKED, &dev->flags);
@@ -2824,15 +2749,13 @@ static void handle_stripe5(struct stripe_head *sh)
2824 } 2749 }
2825 2750
2826 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && 2751 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
2827 !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { 2752 !sh->reconstruct_state) {
2828 /* Need to write out all blocks after computing parity */ 2753 /* Need to write out all blocks after computing parity */
2829 sh->disks = conf->raid_disks; 2754 sh->disks = conf->raid_disks;
2830 sh->pd_idx = stripe_to_pdidx(sh->sector, conf, 2755 sh->pd_idx = stripe_to_pdidx(sh->sector, conf,
2831 conf->raid_disks); 2756 conf->raid_disks);
2832 s.locked += handle_write_operations5(sh, 1, 1); 2757 handle_write_operations5(sh, &s, 1, 1);
2833 } else if (s.expanded && 2758 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
2834 s.locked == 0 &&
2835 !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
2836 clear_bit(STRIPE_EXPAND_READY, &sh->state); 2759 clear_bit(STRIPE_EXPAND_READY, &sh->state);
2837 atomic_dec(&conf->reshape_stripes); 2760 atomic_dec(&conf->reshape_stripes);
2838 wake_up(&conf->wait_for_overlap); 2761 wake_up(&conf->wait_for_overlap);
@@ -2843,9 +2766,6 @@ static void handle_stripe5(struct stripe_head *sh)
2843 !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) 2766 !test_bit(STRIPE_COMPUTE_RUN, &sh->state))
2844 handle_stripe_expansion(conf, sh, NULL); 2767 handle_stripe_expansion(conf, sh, NULL);
2845 2768
2846 if (sh->ops.count)
2847 pending = get_stripe_work(sh);
2848
2849 unlock: 2769 unlock:
2850 spin_unlock(&sh->lock); 2770 spin_unlock(&sh->lock);
2851 2771
@@ -2853,8 +2773,8 @@ static void handle_stripe5(struct stripe_head *sh)
2853 if (unlikely(blocked_rdev)) 2773 if (unlikely(blocked_rdev))
2854 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); 2774 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
2855 2775
2856 if (pending || s.ops_request) 2776 if (s.ops_request)
2857 raid5_run_ops(sh, pending, s.ops_request); 2777 raid5_run_ops(sh, s.ops_request);
2858 2778
2859 ops_run_io(sh, &s); 2779 ops_run_io(sh, &s);
2860 2780
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index 2c96d5fd54bf..5f3e674b87dd 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -205,19 +205,12 @@ struct stripe_head {
205 int bm_seq; /* sequence number for bitmap flushes */ 205 int bm_seq; /* sequence number for bitmap flushes */
206 int disks; /* disks in stripe */ 206 int disks; /* disks in stripe */
207 enum check_states check_state; 207 enum check_states check_state;
208 enum reconstruct_states reconstruct_state;
208 /* stripe_operations 209 /* stripe_operations
209 * @pending - pending ops flags (set for request->issue->complete)
210 * @ack - submitted ops flags (set for issue->complete)
211 * @complete - completed ops flags (set for complete)
212 * @target - STRIPE_OP_COMPUTE_BLK target 210 * @target - STRIPE_OP_COMPUTE_BLK target
213 * @count - raid5_runs_ops is set to run when this is non-zero
214 */ 211 */
215 struct stripe_operations { 212 struct stripe_operations {
216 unsigned long pending;
217 unsigned long ack;
218 unsigned long complete;
219 int target; 213 int target;
220 int count;
221 u32 zero_sum_result; 214 u32 zero_sum_result;
222 } ops; 215 } ops;
223 struct r5dev { 216 struct r5dev {