summaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c376
1 files changed, 248 insertions, 128 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index d26767246d26..adda94df5eb2 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -99,34 +99,40 @@ static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
99 * We maintain a biased count of active stripes in the bottom 16 bits of 99 * We maintain a biased count of active stripes in the bottom 16 bits of
100 * bi_phys_segments, and a count of processed stripes in the upper 16 bits 100 * bi_phys_segments, and a count of processed stripes in the upper 16 bits
101 */ 101 */
102static inline int raid5_bi_phys_segments(struct bio *bio) 102static inline int raid5_bi_processed_stripes(struct bio *bio)
103{ 103{
104 return bio->bi_phys_segments & 0xffff; 104 atomic_t *segments = (atomic_t *)&bio->bi_phys_segments;
105 return (atomic_read(segments) >> 16) & 0xffff;
105} 106}
106 107
107static inline int raid5_bi_hw_segments(struct bio *bio) 108static inline int raid5_dec_bi_active_stripes(struct bio *bio)
108{ 109{
109 return (bio->bi_phys_segments >> 16) & 0xffff; 110 atomic_t *segments = (atomic_t *)&bio->bi_phys_segments;
111 return atomic_sub_return(1, segments) & 0xffff;
110} 112}
111 113
112static inline int raid5_dec_bi_phys_segments(struct bio *bio) 114static inline void raid5_inc_bi_active_stripes(struct bio *bio)
113{ 115{
114 --bio->bi_phys_segments; 116 atomic_t *segments = (atomic_t *)&bio->bi_phys_segments;
115 return raid5_bi_phys_segments(bio); 117 atomic_inc(segments);
116} 118}
117 119
118static inline int raid5_dec_bi_hw_segments(struct bio *bio) 120static inline void raid5_set_bi_processed_stripes(struct bio *bio,
121 unsigned int cnt)
119{ 122{
120 unsigned short val = raid5_bi_hw_segments(bio); 123 atomic_t *segments = (atomic_t *)&bio->bi_phys_segments;
124 int old, new;
121 125
122 --val; 126 do {
123 bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio); 127 old = atomic_read(segments);
124 return val; 128 new = (old & 0xffff) | (cnt << 16);
129 } while (atomic_cmpxchg(segments, old, new) != old);
125} 130}
126 131
127static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) 132static inline void raid5_set_bi_stripes(struct bio *bio, unsigned int cnt)
128{ 133{
129 bio->bi_phys_segments = raid5_bi_phys_segments(bio) | (cnt << 16); 134 atomic_t *segments = (atomic_t *)&bio->bi_phys_segments;
135 atomic_set(segments, cnt);
130} 136}
131 137
132/* Find first data disk in a raid6 stripe */ 138/* Find first data disk in a raid6 stripe */
@@ -190,47 +196,56 @@ static int stripe_operations_active(struct stripe_head *sh)
190 test_bit(STRIPE_COMPUTE_RUN, &sh->state); 196 test_bit(STRIPE_COMPUTE_RUN, &sh->state);
191} 197}
192 198
193static void __release_stripe(struct r5conf *conf, struct stripe_head *sh) 199static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh)
194{ 200{
195 if (atomic_dec_and_test(&sh->count)) { 201 BUG_ON(!list_empty(&sh->lru));
196 BUG_ON(!list_empty(&sh->lru)); 202 BUG_ON(atomic_read(&conf->active_stripes)==0);
197 BUG_ON(atomic_read(&conf->active_stripes)==0); 203 if (test_bit(STRIPE_HANDLE, &sh->state)) {
198 if (test_bit(STRIPE_HANDLE, &sh->state)) { 204 if (test_bit(STRIPE_DELAYED, &sh->state) &&
199 if (test_bit(STRIPE_DELAYED, &sh->state)) 205 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
200 list_add_tail(&sh->lru, &conf->delayed_list); 206 list_add_tail(&sh->lru, &conf->delayed_list);
201 else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && 207 else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
202 sh->bm_seq - conf->seq_write > 0) 208 sh->bm_seq - conf->seq_write > 0)
203 list_add_tail(&sh->lru, &conf->bitmap_list); 209 list_add_tail(&sh->lru, &conf->bitmap_list);
204 else { 210 else {
205 clear_bit(STRIPE_BIT_DELAY, &sh->state); 211 clear_bit(STRIPE_DELAYED, &sh->state);
206 list_add_tail(&sh->lru, &conf->handle_list); 212 clear_bit(STRIPE_BIT_DELAY, &sh->state);
207 } 213 list_add_tail(&sh->lru, &conf->handle_list);
208 md_wakeup_thread(conf->mddev->thread); 214 }
209 } else { 215 md_wakeup_thread(conf->mddev->thread);
210 BUG_ON(stripe_operations_active(sh)); 216 } else {
211 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 217 BUG_ON(stripe_operations_active(sh));
212 if (atomic_dec_return(&conf->preread_active_stripes) 218 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
213 < IO_THRESHOLD) 219 if (atomic_dec_return(&conf->preread_active_stripes)
214 md_wakeup_thread(conf->mddev->thread); 220 < IO_THRESHOLD)
215 atomic_dec(&conf->active_stripes); 221 md_wakeup_thread(conf->mddev->thread);
216 if (!test_bit(STRIPE_EXPANDING, &sh->state)) { 222 atomic_dec(&conf->active_stripes);
217 list_add_tail(&sh->lru, &conf->inactive_list); 223 if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
218 wake_up(&conf->wait_for_stripe); 224 list_add_tail(&sh->lru, &conf->inactive_list);
219 if (conf->retry_read_aligned) 225 wake_up(&conf->wait_for_stripe);
220 md_wakeup_thread(conf->mddev->thread); 226 if (conf->retry_read_aligned)
221 } 227 md_wakeup_thread(conf->mddev->thread);
222 } 228 }
223 } 229 }
224} 230}
225 231
232static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
233{
234 if (atomic_dec_and_test(&sh->count))
235 do_release_stripe(conf, sh);
236}
237
226static void release_stripe(struct stripe_head *sh) 238static void release_stripe(struct stripe_head *sh)
227{ 239{
228 struct r5conf *conf = sh->raid_conf; 240 struct r5conf *conf = sh->raid_conf;
229 unsigned long flags; 241 unsigned long flags;
230 242
231 spin_lock_irqsave(&conf->device_lock, flags); 243 local_irq_save(flags);
232 __release_stripe(conf, sh); 244 if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) {
233 spin_unlock_irqrestore(&conf->device_lock, flags); 245 do_release_stripe(conf, sh);
246 spin_unlock(&conf->device_lock);
247 }
248 local_irq_restore(flags);
234} 249}
235 250
236static inline void remove_hash(struct stripe_head *sh) 251static inline void remove_hash(struct stripe_head *sh)
@@ -469,7 +484,8 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
469 } else { 484 } else {
470 if (atomic_read(&sh->count)) { 485 if (atomic_read(&sh->count)) {
471 BUG_ON(!list_empty(&sh->lru) 486 BUG_ON(!list_empty(&sh->lru)
472 && !test_bit(STRIPE_EXPANDING, &sh->state)); 487 && !test_bit(STRIPE_EXPANDING, &sh->state)
488 && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state));
473 } else { 489 } else {
474 if (!test_bit(STRIPE_HANDLE, &sh->state)) 490 if (!test_bit(STRIPE_HANDLE, &sh->state))
475 atomic_inc(&conf->active_stripes); 491 atomic_inc(&conf->active_stripes);
@@ -606,6 +622,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
606 * a chance*/ 622 * a chance*/
607 md_check_recovery(conf->mddev); 623 md_check_recovery(conf->mddev);
608 } 624 }
625 /*
626 * Because md_wait_for_blocked_rdev
627 * will dec nr_pending, we must
628 * increment it first.
629 */
630 atomic_inc(&rdev->nr_pending);
609 md_wait_for_blocked_rdev(rdev, conf->mddev); 631 md_wait_for_blocked_rdev(rdev, conf->mddev);
610 } else { 632 } else {
611 /* Acknowledged bad block - skip the write */ 633 /* Acknowledged bad block - skip the write */
@@ -632,6 +654,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
632 else 654 else
633 bi->bi_sector = (sh->sector 655 bi->bi_sector = (sh->sector
634 + rdev->data_offset); 656 + rdev->data_offset);
657 if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
658 bi->bi_rw |= REQ_FLUSH;
659
635 bi->bi_flags = 1 << BIO_UPTODATE; 660 bi->bi_flags = 1 << BIO_UPTODATE;
636 bi->bi_idx = 0; 661 bi->bi_idx = 0;
637 bi->bi_io_vec[0].bv_len = STRIPE_SIZE; 662 bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
@@ -741,14 +766,12 @@ static void ops_complete_biofill(void *stripe_head_ref)
741{ 766{
742 struct stripe_head *sh = stripe_head_ref; 767 struct stripe_head *sh = stripe_head_ref;
743 struct bio *return_bi = NULL; 768 struct bio *return_bi = NULL;
744 struct r5conf *conf = sh->raid_conf;
745 int i; 769 int i;
746 770
747 pr_debug("%s: stripe %llu\n", __func__, 771 pr_debug("%s: stripe %llu\n", __func__,
748 (unsigned long long)sh->sector); 772 (unsigned long long)sh->sector);
749 773
750 /* clear completed biofills */ 774 /* clear completed biofills */
751 spin_lock_irq(&conf->device_lock);
752 for (i = sh->disks; i--; ) { 775 for (i = sh->disks; i--; ) {
753 struct r5dev *dev = &sh->dev[i]; 776 struct r5dev *dev = &sh->dev[i];
754 777
@@ -766,7 +789,7 @@ static void ops_complete_biofill(void *stripe_head_ref)
766 while (rbi && rbi->bi_sector < 789 while (rbi && rbi->bi_sector <
767 dev->sector + STRIPE_SECTORS) { 790 dev->sector + STRIPE_SECTORS) {
768 rbi2 = r5_next_bio(rbi, dev->sector); 791 rbi2 = r5_next_bio(rbi, dev->sector);
769 if (!raid5_dec_bi_phys_segments(rbi)) { 792 if (!raid5_dec_bi_active_stripes(rbi)) {
770 rbi->bi_next = return_bi; 793 rbi->bi_next = return_bi;
771 return_bi = rbi; 794 return_bi = rbi;
772 } 795 }
@@ -774,7 +797,6 @@ static void ops_complete_biofill(void *stripe_head_ref)
774 } 797 }
775 } 798 }
776 } 799 }
777 spin_unlock_irq(&conf->device_lock);
778 clear_bit(STRIPE_BIOFILL_RUN, &sh->state); 800 clear_bit(STRIPE_BIOFILL_RUN, &sh->state);
779 801
780 return_io(return_bi); 802 return_io(return_bi);
@@ -786,7 +808,6 @@ static void ops_complete_biofill(void *stripe_head_ref)
786static void ops_run_biofill(struct stripe_head *sh) 808static void ops_run_biofill(struct stripe_head *sh)
787{ 809{
788 struct dma_async_tx_descriptor *tx = NULL; 810 struct dma_async_tx_descriptor *tx = NULL;
789 struct r5conf *conf = sh->raid_conf;
790 struct async_submit_ctl submit; 811 struct async_submit_ctl submit;
791 int i; 812 int i;
792 813
@@ -797,10 +818,10 @@ static void ops_run_biofill(struct stripe_head *sh)
797 struct r5dev *dev = &sh->dev[i]; 818 struct r5dev *dev = &sh->dev[i];
798 if (test_bit(R5_Wantfill, &dev->flags)) { 819 if (test_bit(R5_Wantfill, &dev->flags)) {
799 struct bio *rbi; 820 struct bio *rbi;
800 spin_lock_irq(&conf->device_lock); 821 spin_lock_irq(&sh->stripe_lock);
801 dev->read = rbi = dev->toread; 822 dev->read = rbi = dev->toread;
802 dev->toread = NULL; 823 dev->toread = NULL;
803 spin_unlock_irq(&conf->device_lock); 824 spin_unlock_irq(&sh->stripe_lock);
804 while (rbi && rbi->bi_sector < 825 while (rbi && rbi->bi_sector <
805 dev->sector + STRIPE_SECTORS) { 826 dev->sector + STRIPE_SECTORS) {
806 tx = async_copy_data(0, rbi, dev->page, 827 tx = async_copy_data(0, rbi, dev->page,
@@ -1136,12 +1157,12 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
1136 if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) { 1157 if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) {
1137 struct bio *wbi; 1158 struct bio *wbi;
1138 1159
1139 spin_lock_irq(&sh->raid_conf->device_lock); 1160 spin_lock_irq(&sh->stripe_lock);
1140 chosen = dev->towrite; 1161 chosen = dev->towrite;
1141 dev->towrite = NULL; 1162 dev->towrite = NULL;
1142 BUG_ON(dev->written); 1163 BUG_ON(dev->written);
1143 wbi = dev->written = chosen; 1164 wbi = dev->written = chosen;
1144 spin_unlock_irq(&sh->raid_conf->device_lock); 1165 spin_unlock_irq(&sh->stripe_lock);
1145 1166
1146 while (wbi && wbi->bi_sector < 1167 while (wbi && wbi->bi_sector <
1147 dev->sector + STRIPE_SECTORS) { 1168 dev->sector + STRIPE_SECTORS) {
@@ -1446,6 +1467,8 @@ static int grow_one_stripe(struct r5conf *conf)
1446 init_waitqueue_head(&sh->ops.wait_for_ops); 1467 init_waitqueue_head(&sh->ops.wait_for_ops);
1447 #endif 1468 #endif
1448 1469
1470 spin_lock_init(&sh->stripe_lock);
1471
1449 if (grow_buffers(sh)) { 1472 if (grow_buffers(sh)) {
1450 shrink_buffers(sh); 1473 shrink_buffers(sh);
1451 kmem_cache_free(conf->slab_cache, sh); 1474 kmem_cache_free(conf->slab_cache, sh);
@@ -1731,12 +1754,15 @@ static void raid5_end_read_request(struct bio * bi, int error)
1731 atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); 1754 atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
1732 clear_bit(R5_ReadError, &sh->dev[i].flags); 1755 clear_bit(R5_ReadError, &sh->dev[i].flags);
1733 clear_bit(R5_ReWrite, &sh->dev[i].flags); 1756 clear_bit(R5_ReWrite, &sh->dev[i].flags);
1734 } 1757 } else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
1758 clear_bit(R5_ReadNoMerge, &sh->dev[i].flags);
1759
1735 if (atomic_read(&rdev->read_errors)) 1760 if (atomic_read(&rdev->read_errors))
1736 atomic_set(&rdev->read_errors, 0); 1761 atomic_set(&rdev->read_errors, 0);
1737 } else { 1762 } else {
1738 const char *bdn = bdevname(rdev->bdev, b); 1763 const char *bdn = bdevname(rdev->bdev, b);
1739 int retry = 0; 1764 int retry = 0;
1765 int set_bad = 0;
1740 1766
1741 clear_bit(R5_UPTODATE, &sh->dev[i].flags); 1767 clear_bit(R5_UPTODATE, &sh->dev[i].flags);
1742 atomic_inc(&rdev->read_errors); 1768 atomic_inc(&rdev->read_errors);
@@ -1748,7 +1774,8 @@ static void raid5_end_read_request(struct bio * bi, int error)
1748 mdname(conf->mddev), 1774 mdname(conf->mddev),
1749 (unsigned long long)s, 1775 (unsigned long long)s,
1750 bdn); 1776 bdn);
1751 else if (conf->mddev->degraded >= conf->max_degraded) 1777 else if (conf->mddev->degraded >= conf->max_degraded) {
1778 set_bad = 1;
1752 printk_ratelimited( 1779 printk_ratelimited(
1753 KERN_WARNING 1780 KERN_WARNING
1754 "md/raid:%s: read error not correctable " 1781 "md/raid:%s: read error not correctable "
@@ -1756,8 +1783,9 @@ static void raid5_end_read_request(struct bio * bi, int error)
1756 mdname(conf->mddev), 1783 mdname(conf->mddev),
1757 (unsigned long long)s, 1784 (unsigned long long)s,
1758 bdn); 1785 bdn);
1759 else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) 1786 } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) {
1760 /* Oh, no!!! */ 1787 /* Oh, no!!! */
1788 set_bad = 1;
1761 printk_ratelimited( 1789 printk_ratelimited(
1762 KERN_WARNING 1790 KERN_WARNING
1763 "md/raid:%s: read error NOT corrected!! " 1791 "md/raid:%s: read error NOT corrected!! "
@@ -1765,7 +1793,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
1765 mdname(conf->mddev), 1793 mdname(conf->mddev),
1766 (unsigned long long)s, 1794 (unsigned long long)s,
1767 bdn); 1795 bdn);
1768 else if (atomic_read(&rdev->read_errors) 1796 } else if (atomic_read(&rdev->read_errors)
1769 > conf->max_nr_stripes) 1797 > conf->max_nr_stripes)
1770 printk(KERN_WARNING 1798 printk(KERN_WARNING
1771 "md/raid:%s: Too many read errors, failing device %s.\n", 1799 "md/raid:%s: Too many read errors, failing device %s.\n",
@@ -1773,11 +1801,19 @@ static void raid5_end_read_request(struct bio * bi, int error)
1773 else 1801 else
1774 retry = 1; 1802 retry = 1;
1775 if (retry) 1803 if (retry)
1776 set_bit(R5_ReadError, &sh->dev[i].flags); 1804 if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) {
1805 set_bit(R5_ReadError, &sh->dev[i].flags);
1806 clear_bit(R5_ReadNoMerge, &sh->dev[i].flags);
1807 } else
1808 set_bit(R5_ReadNoMerge, &sh->dev[i].flags);
1777 else { 1809 else {
1778 clear_bit(R5_ReadError, &sh->dev[i].flags); 1810 clear_bit(R5_ReadError, &sh->dev[i].flags);
1779 clear_bit(R5_ReWrite, &sh->dev[i].flags); 1811 clear_bit(R5_ReWrite, &sh->dev[i].flags);
1780 md_error(conf->mddev, rdev); 1812 if (!(set_bad
1813 && test_bit(In_sync, &rdev->flags)
1814 && rdev_set_badblocks(
1815 rdev, sh->sector, STRIPE_SECTORS, 0)))
1816 md_error(conf->mddev, rdev);
1781 } 1817 }
1782 } 1818 }
1783 rdev_dec_pending(rdev, conf->mddev); 1819 rdev_dec_pending(rdev, conf->mddev);
@@ -2325,11 +2361,18 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
2325 (unsigned long long)bi->bi_sector, 2361 (unsigned long long)bi->bi_sector,
2326 (unsigned long long)sh->sector); 2362 (unsigned long long)sh->sector);
2327 2363
2328 2364 /*
2329 spin_lock_irq(&conf->device_lock); 2365 * If several bio share a stripe. The bio bi_phys_segments acts as a
2366 * reference count to avoid race. The reference count should already be
2367 * increased before this function is called (for example, in
2368 * make_request()), so other bio sharing this stripe will not free the
2369 * stripe. If a stripe is owned by one stripe, the stripe lock will
2370 * protect it.
2371 */
2372 spin_lock_irq(&sh->stripe_lock);
2330 if (forwrite) { 2373 if (forwrite) {
2331 bip = &sh->dev[dd_idx].towrite; 2374 bip = &sh->dev[dd_idx].towrite;
2332 if (*bip == NULL && sh->dev[dd_idx].written == NULL) 2375 if (*bip == NULL)
2333 firstwrite = 1; 2376 firstwrite = 1;
2334 } else 2377 } else
2335 bip = &sh->dev[dd_idx].toread; 2378 bip = &sh->dev[dd_idx].toread;
@@ -2345,7 +2388,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
2345 if (*bip) 2388 if (*bip)
2346 bi->bi_next = *bip; 2389 bi->bi_next = *bip;
2347 *bip = bi; 2390 *bip = bi;
2348 bi->bi_phys_segments++; 2391 raid5_inc_bi_active_stripes(bi);
2349 2392
2350 if (forwrite) { 2393 if (forwrite) {
2351 /* check if page is covered */ 2394 /* check if page is covered */
@@ -2360,7 +2403,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
2360 if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) 2403 if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
2361 set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); 2404 set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
2362 } 2405 }
2363 spin_unlock_irq(&conf->device_lock); 2406 spin_unlock_irq(&sh->stripe_lock);
2364 2407
2365 pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", 2408 pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
2366 (unsigned long long)(*bip)->bi_sector, 2409 (unsigned long long)(*bip)->bi_sector,
@@ -2376,7 +2419,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
2376 2419
2377 overlap: 2420 overlap:
2378 set_bit(R5_Overlap, &sh->dev[dd_idx].flags); 2421 set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
2379 spin_unlock_irq(&conf->device_lock); 2422 spin_unlock_irq(&sh->stripe_lock);
2380 return 0; 2423 return 0;
2381} 2424}
2382 2425
@@ -2426,10 +2469,11 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
2426 rdev_dec_pending(rdev, conf->mddev); 2469 rdev_dec_pending(rdev, conf->mddev);
2427 } 2470 }
2428 } 2471 }
2429 spin_lock_irq(&conf->device_lock); 2472 spin_lock_irq(&sh->stripe_lock);
2430 /* fail all writes first */ 2473 /* fail all writes first */
2431 bi = sh->dev[i].towrite; 2474 bi = sh->dev[i].towrite;
2432 sh->dev[i].towrite = NULL; 2475 sh->dev[i].towrite = NULL;
2476 spin_unlock_irq(&sh->stripe_lock);
2433 if (bi) { 2477 if (bi) {
2434 s->to_write--; 2478 s->to_write--;
2435 bitmap_end = 1; 2479 bitmap_end = 1;
@@ -2442,13 +2486,17 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
2442 sh->dev[i].sector + STRIPE_SECTORS) { 2486 sh->dev[i].sector + STRIPE_SECTORS) {
2443 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); 2487 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
2444 clear_bit(BIO_UPTODATE, &bi->bi_flags); 2488 clear_bit(BIO_UPTODATE, &bi->bi_flags);
2445 if (!raid5_dec_bi_phys_segments(bi)) { 2489 if (!raid5_dec_bi_active_stripes(bi)) {
2446 md_write_end(conf->mddev); 2490 md_write_end(conf->mddev);
2447 bi->bi_next = *return_bi; 2491 bi->bi_next = *return_bi;
2448 *return_bi = bi; 2492 *return_bi = bi;
2449 } 2493 }
2450 bi = nextbi; 2494 bi = nextbi;
2451 } 2495 }
2496 if (bitmap_end)
2497 bitmap_endwrite(conf->mddev->bitmap, sh->sector,
2498 STRIPE_SECTORS, 0, 0);
2499 bitmap_end = 0;
2452 /* and fail all 'written' */ 2500 /* and fail all 'written' */
2453 bi = sh->dev[i].written; 2501 bi = sh->dev[i].written;
2454 sh->dev[i].written = NULL; 2502 sh->dev[i].written = NULL;
@@ -2457,7 +2505,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
2457 sh->dev[i].sector + STRIPE_SECTORS) { 2505 sh->dev[i].sector + STRIPE_SECTORS) {
2458 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); 2506 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
2459 clear_bit(BIO_UPTODATE, &bi->bi_flags); 2507 clear_bit(BIO_UPTODATE, &bi->bi_flags);
2460 if (!raid5_dec_bi_phys_segments(bi)) { 2508 if (!raid5_dec_bi_active_stripes(bi)) {
2461 md_write_end(conf->mddev); 2509 md_write_end(conf->mddev);
2462 bi->bi_next = *return_bi; 2510 bi->bi_next = *return_bi;
2463 *return_bi = bi; 2511 *return_bi = bi;
@@ -2481,14 +2529,13 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
2481 struct bio *nextbi = 2529 struct bio *nextbi =
2482 r5_next_bio(bi, sh->dev[i].sector); 2530 r5_next_bio(bi, sh->dev[i].sector);
2483 clear_bit(BIO_UPTODATE, &bi->bi_flags); 2531 clear_bit(BIO_UPTODATE, &bi->bi_flags);
2484 if (!raid5_dec_bi_phys_segments(bi)) { 2532 if (!raid5_dec_bi_active_stripes(bi)) {
2485 bi->bi_next = *return_bi; 2533 bi->bi_next = *return_bi;
2486 *return_bi = bi; 2534 *return_bi = bi;
2487 } 2535 }
2488 bi = nextbi; 2536 bi = nextbi;
2489 } 2537 }
2490 } 2538 }
2491 spin_unlock_irq(&conf->device_lock);
2492 if (bitmap_end) 2539 if (bitmap_end)
2493 bitmap_endwrite(conf->mddev->bitmap, sh->sector, 2540 bitmap_endwrite(conf->mddev->bitmap, sh->sector,
2494 STRIPE_SECTORS, 0, 0); 2541 STRIPE_SECTORS, 0, 0);
@@ -2692,30 +2739,23 @@ static void handle_stripe_clean_event(struct r5conf *conf,
2692 test_bit(R5_UPTODATE, &dev->flags)) { 2739 test_bit(R5_UPTODATE, &dev->flags)) {
2693 /* We can return any write requests */ 2740 /* We can return any write requests */
2694 struct bio *wbi, *wbi2; 2741 struct bio *wbi, *wbi2;
2695 int bitmap_end = 0;
2696 pr_debug("Return write for disc %d\n", i); 2742 pr_debug("Return write for disc %d\n", i);
2697 spin_lock_irq(&conf->device_lock);
2698 wbi = dev->written; 2743 wbi = dev->written;
2699 dev->written = NULL; 2744 dev->written = NULL;
2700 while (wbi && wbi->bi_sector < 2745 while (wbi && wbi->bi_sector <
2701 dev->sector + STRIPE_SECTORS) { 2746 dev->sector + STRIPE_SECTORS) {
2702 wbi2 = r5_next_bio(wbi, dev->sector); 2747 wbi2 = r5_next_bio(wbi, dev->sector);
2703 if (!raid5_dec_bi_phys_segments(wbi)) { 2748 if (!raid5_dec_bi_active_stripes(wbi)) {
2704 md_write_end(conf->mddev); 2749 md_write_end(conf->mddev);
2705 wbi->bi_next = *return_bi; 2750 wbi->bi_next = *return_bi;
2706 *return_bi = wbi; 2751 *return_bi = wbi;
2707 } 2752 }
2708 wbi = wbi2; 2753 wbi = wbi2;
2709 } 2754 }
2710 if (dev->towrite == NULL) 2755 bitmap_endwrite(conf->mddev->bitmap, sh->sector,
2711 bitmap_end = 1; 2756 STRIPE_SECTORS,
2712 spin_unlock_irq(&conf->device_lock);
2713 if (bitmap_end)
2714 bitmap_endwrite(conf->mddev->bitmap,
2715 sh->sector,
2716 STRIPE_SECTORS,
2717 !test_bit(STRIPE_DEGRADED, &sh->state), 2757 !test_bit(STRIPE_DEGRADED, &sh->state),
2718 0); 2758 0);
2719 } 2759 }
2720 } 2760 }
2721 2761
@@ -3167,7 +3207,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
3167 3207
3168 /* Now to look around and see what can be done */ 3208 /* Now to look around and see what can be done */
3169 rcu_read_lock(); 3209 rcu_read_lock();
3170 spin_lock_irq(&conf->device_lock);
3171 for (i=disks; i--; ) { 3210 for (i=disks; i--; ) {
3172 struct md_rdev *rdev; 3211 struct md_rdev *rdev;
3173 sector_t first_bad; 3212 sector_t first_bad;
@@ -3313,7 +3352,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
3313 do_recovery = 1; 3352 do_recovery = 1;
3314 } 3353 }
3315 } 3354 }
3316 spin_unlock_irq(&conf->device_lock);
3317 if (test_bit(STRIPE_SYNCING, &sh->state)) { 3355 if (test_bit(STRIPE_SYNCING, &sh->state)) {
3318 /* If there is a failed device being replaced, 3356 /* If there is a failed device being replaced,
3319 * we must be recovering. 3357 * we must be recovering.
@@ -3582,8 +3620,18 @@ static void handle_stripe(struct stripe_head *sh)
3582 3620
3583finish: 3621finish:
3584 /* wait for this device to become unblocked */ 3622 /* wait for this device to become unblocked */
3585 if (conf->mddev->external && unlikely(s.blocked_rdev)) 3623 if (unlikely(s.blocked_rdev)) {
3586 md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev); 3624 if (conf->mddev->external)
3625 md_wait_for_blocked_rdev(s.blocked_rdev,
3626 conf->mddev);
3627 else
3628 /* Internal metadata will immediately
3629 * be written by raid5d, so we don't
3630 * need to wait here.
3631 */
3632 rdev_dec_pending(s.blocked_rdev,
3633 conf->mddev);
3634 }
3587 3635
3588 if (s.handle_bad_blocks) 3636 if (s.handle_bad_blocks)
3589 for (i = disks; i--; ) { 3637 for (i = disks; i--; ) {
@@ -3766,7 +3814,7 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf)
3766 * this sets the active strip count to 1 and the processed 3814 * this sets the active strip count to 1 and the processed
3767 * strip count to zero (upper 8 bits) 3815 * strip count to zero (upper 8 bits)
3768 */ 3816 */
3769 bi->bi_phys_segments = 1; /* biased count of active stripes */ 3817 raid5_set_bi_stripes(bi, 1); /* biased count of active stripes */
3770 } 3818 }
3771 3819
3772 return bi; 3820 return bi;
@@ -3881,8 +3929,6 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
3881 raid_bio->bi_next = (void*)rdev; 3929 raid_bio->bi_next = (void*)rdev;
3882 align_bi->bi_bdev = rdev->bdev; 3930 align_bi->bi_bdev = rdev->bdev;
3883 align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); 3931 align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
3884 /* No reshape active, so we can trust rdev->data_offset */
3885 align_bi->bi_sector += rdev->data_offset;
3886 3932
3887 if (!bio_fits_rdev(align_bi) || 3933 if (!bio_fits_rdev(align_bi) ||
3888 is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9, 3934 is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9,
@@ -3893,6 +3939,9 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
3893 return 0; 3939 return 0;
3894 } 3940 }
3895 3941
3942 /* No reshape active, so we can trust rdev->data_offset */
3943 align_bi->bi_sector += rdev->data_offset;
3944
3896 spin_lock_irq(&conf->device_lock); 3945 spin_lock_irq(&conf->device_lock);
3897 wait_event_lock_irq(conf->wait_for_stripe, 3946 wait_event_lock_irq(conf->wait_for_stripe,
3898 conf->quiesce == 0, 3947 conf->quiesce == 0,
@@ -3962,6 +4011,62 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf)
3962 return sh; 4011 return sh;
3963} 4012}
3964 4013
4014struct raid5_plug_cb {
4015 struct blk_plug_cb cb;
4016 struct list_head list;
4017};
4018
4019static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
4020{
4021 struct raid5_plug_cb *cb = container_of(
4022 blk_cb, struct raid5_plug_cb, cb);
4023 struct stripe_head *sh;
4024 struct mddev *mddev = cb->cb.data;
4025 struct r5conf *conf = mddev->private;
4026
4027 if (cb->list.next && !list_empty(&cb->list)) {
4028 spin_lock_irq(&conf->device_lock);
4029 while (!list_empty(&cb->list)) {
4030 sh = list_first_entry(&cb->list, struct stripe_head, lru);
4031 list_del_init(&sh->lru);
4032 /*
4033 * avoid race release_stripe_plug() sees
4034 * STRIPE_ON_UNPLUG_LIST clear but the stripe
4035 * is still in our list
4036 */
4037 smp_mb__before_clear_bit();
4038 clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
4039 __release_stripe(conf, sh);
4040 }
4041 spin_unlock_irq(&conf->device_lock);
4042 }
4043 kfree(cb);
4044}
4045
4046static void release_stripe_plug(struct mddev *mddev,
4047 struct stripe_head *sh)
4048{
4049 struct blk_plug_cb *blk_cb = blk_check_plugged(
4050 raid5_unplug, mddev,
4051 sizeof(struct raid5_plug_cb));
4052 struct raid5_plug_cb *cb;
4053
4054 if (!blk_cb) {
4055 release_stripe(sh);
4056 return;
4057 }
4058
4059 cb = container_of(blk_cb, struct raid5_plug_cb, cb);
4060
4061 if (cb->list.next == NULL)
4062 INIT_LIST_HEAD(&cb->list);
4063
4064 if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state))
4065 list_add_tail(&sh->lru, &cb->list);
4066 else
4067 release_stripe(sh);
4068}
4069
3965static void make_request(struct mddev *mddev, struct bio * bi) 4070static void make_request(struct mddev *mddev, struct bio * bi)
3966{ 4071{
3967 struct r5conf *conf = mddev->private; 4072 struct r5conf *conf = mddev->private;
@@ -3971,7 +4076,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
3971 struct stripe_head *sh; 4076 struct stripe_head *sh;
3972 const int rw = bio_data_dir(bi); 4077 const int rw = bio_data_dir(bi);
3973 int remaining; 4078 int remaining;
3974 int plugged;
3975 4079
3976 if (unlikely(bi->bi_rw & REQ_FLUSH)) { 4080 if (unlikely(bi->bi_rw & REQ_FLUSH)) {
3977 md_flush_request(mddev, bi); 4081 md_flush_request(mddev, bi);
@@ -3990,7 +4094,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
3990 bi->bi_next = NULL; 4094 bi->bi_next = NULL;
3991 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ 4095 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
3992 4096
3993 plugged = mddev_check_plugged(mddev);
3994 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { 4097 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
3995 DEFINE_WAIT(w); 4098 DEFINE_WAIT(w);
3996 int previous; 4099 int previous;
@@ -4089,24 +4192,19 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4089 finish_wait(&conf->wait_for_overlap, &w); 4192 finish_wait(&conf->wait_for_overlap, &w);
4090 set_bit(STRIPE_HANDLE, &sh->state); 4193 set_bit(STRIPE_HANDLE, &sh->state);
4091 clear_bit(STRIPE_DELAYED, &sh->state); 4194 clear_bit(STRIPE_DELAYED, &sh->state);
4092 if ((bi->bi_rw & REQ_SYNC) && 4195 if ((bi->bi_rw & REQ_NOIDLE) &&
4093 !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 4196 !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
4094 atomic_inc(&conf->preread_active_stripes); 4197 atomic_inc(&conf->preread_active_stripes);
4095 release_stripe(sh); 4198 release_stripe_plug(mddev, sh);
4096 } else { 4199 } else {
4097 /* cannot get stripe for read-ahead, just give-up */ 4200 /* cannot get stripe for read-ahead, just give-up */
4098 clear_bit(BIO_UPTODATE, &bi->bi_flags); 4201 clear_bit(BIO_UPTODATE, &bi->bi_flags);
4099 finish_wait(&conf->wait_for_overlap, &w); 4202 finish_wait(&conf->wait_for_overlap, &w);
4100 break; 4203 break;
4101 } 4204 }
4102
4103 } 4205 }
4104 if (!plugged)
4105 md_wakeup_thread(mddev->thread);
4106 4206
4107 spin_lock_irq(&conf->device_lock); 4207 remaining = raid5_dec_bi_active_stripes(bi);
4108 remaining = raid5_dec_bi_phys_segments(bi);
4109 spin_unlock_irq(&conf->device_lock);
4110 if (remaining == 0) { 4208 if (remaining == 0) {
4111 4209
4112 if ( rw == WRITE ) 4210 if ( rw == WRITE )
@@ -4462,7 +4560,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
4462 sector += STRIPE_SECTORS, 4560 sector += STRIPE_SECTORS,
4463 scnt++) { 4561 scnt++) {
4464 4562
4465 if (scnt < raid5_bi_hw_segments(raid_bio)) 4563 if (scnt < raid5_bi_processed_stripes(raid_bio))
4466 /* already done this stripe */ 4564 /* already done this stripe */
4467 continue; 4565 continue;
4468 4566
@@ -4470,25 +4568,24 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
4470 4568
4471 if (!sh) { 4569 if (!sh) {
4472 /* failed to get a stripe - must wait */ 4570 /* failed to get a stripe - must wait */
4473 raid5_set_bi_hw_segments(raid_bio, scnt); 4571 raid5_set_bi_processed_stripes(raid_bio, scnt);
4474 conf->retry_read_aligned = raid_bio; 4572 conf->retry_read_aligned = raid_bio;
4475 return handled; 4573 return handled;
4476 } 4574 }
4477 4575
4478 if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { 4576 if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
4479 release_stripe(sh); 4577 release_stripe(sh);
4480 raid5_set_bi_hw_segments(raid_bio, scnt); 4578 raid5_set_bi_processed_stripes(raid_bio, scnt);
4481 conf->retry_read_aligned = raid_bio; 4579 conf->retry_read_aligned = raid_bio;
4482 return handled; 4580 return handled;
4483 } 4581 }
4484 4582
4583 set_bit(R5_ReadNoMerge, &sh->dev[dd_idx].flags);
4485 handle_stripe(sh); 4584 handle_stripe(sh);
4486 release_stripe(sh); 4585 release_stripe(sh);
4487 handled++; 4586 handled++;
4488 } 4587 }
4489 spin_lock_irq(&conf->device_lock); 4588 remaining = raid5_dec_bi_active_stripes(raid_bio);
4490 remaining = raid5_dec_bi_phys_segments(raid_bio);
4491 spin_unlock_irq(&conf->device_lock);
4492 if (remaining == 0) 4589 if (remaining == 0)
4493 bio_endio(raid_bio, 0); 4590 bio_endio(raid_bio, 0);
4494 if (atomic_dec_and_test(&conf->active_aligned_reads)) 4591 if (atomic_dec_and_test(&conf->active_aligned_reads))
@@ -4496,6 +4593,30 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
4496 return handled; 4593 return handled;
4497} 4594}
4498 4595
4596#define MAX_STRIPE_BATCH 8
4597static int handle_active_stripes(struct r5conf *conf)
4598{
4599 struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
4600 int i, batch_size = 0;
4601
4602 while (batch_size < MAX_STRIPE_BATCH &&
4603 (sh = __get_priority_stripe(conf)) != NULL)
4604 batch[batch_size++] = sh;
4605
4606 if (batch_size == 0)
4607 return batch_size;
4608 spin_unlock_irq(&conf->device_lock);
4609
4610 for (i = 0; i < batch_size; i++)
4611 handle_stripe(batch[i]);
4612
4613 cond_resched();
4614
4615 spin_lock_irq(&conf->device_lock);
4616 for (i = 0; i < batch_size; i++)
4617 __release_stripe(conf, batch[i]);
4618 return batch_size;
4619}
4499 4620
4500/* 4621/*
4501 * This is our raid5 kernel thread. 4622 * This is our raid5 kernel thread.
@@ -4506,7 +4627,6 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
4506 */ 4627 */
4507static void raid5d(struct mddev *mddev) 4628static void raid5d(struct mddev *mddev)
4508{ 4629{
4509 struct stripe_head *sh;
4510 struct r5conf *conf = mddev->private; 4630 struct r5conf *conf = mddev->private;
4511 int handled; 4631 int handled;
4512 struct blk_plug plug; 4632 struct blk_plug plug;
@@ -4520,8 +4640,9 @@ static void raid5d(struct mddev *mddev)
4520 spin_lock_irq(&conf->device_lock); 4640 spin_lock_irq(&conf->device_lock);
4521 while (1) { 4641 while (1) {
4522 struct bio *bio; 4642 struct bio *bio;
4643 int batch_size;
4523 4644
4524 if (atomic_read(&mddev->plug_cnt) == 0 && 4645 if (
4525 !list_empty(&conf->bitmap_list)) { 4646 !list_empty(&conf->bitmap_list)) {
4526 /* Now is a good time to flush some bitmap updates */ 4647 /* Now is a good time to flush some bitmap updates */
4527 conf->seq_flush++; 4648 conf->seq_flush++;
@@ -4531,8 +4652,7 @@ static void raid5d(struct mddev *mddev)
4531 conf->seq_write = conf->seq_flush; 4652 conf->seq_write = conf->seq_flush;
4532 activate_bit_delay(conf); 4653 activate_bit_delay(conf);
4533 } 4654 }
4534 if (atomic_read(&mddev->plug_cnt) == 0) 4655 raid5_activate_delayed(conf);
4535 raid5_activate_delayed(conf);
4536 4656
4537 while ((bio = remove_bio_from_retry(conf))) { 4657 while ((bio = remove_bio_from_retry(conf))) {
4538 int ok; 4658 int ok;
@@ -4544,21 +4664,16 @@ static void raid5d(struct mddev *mddev)
4544 handled++; 4664 handled++;
4545 } 4665 }
4546 4666
4547 sh = __get_priority_stripe(conf); 4667 batch_size = handle_active_stripes(conf);
4548 4668 if (!batch_size)
4549 if (!sh)
4550 break; 4669 break;
4551 spin_unlock_irq(&conf->device_lock); 4670 handled += batch_size;
4552
4553 handled++;
4554 handle_stripe(sh);
4555 release_stripe(sh);
4556 cond_resched();
4557 4671
4558 if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) 4672 if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) {
4673 spin_unlock_irq(&conf->device_lock);
4559 md_check_recovery(mddev); 4674 md_check_recovery(mddev);
4560 4675 spin_lock_irq(&conf->device_lock);
4561 spin_lock_irq(&conf->device_lock); 4676 }
4562 } 4677 }
4563 pr_debug("%d stripes handled\n", handled); 4678 pr_debug("%d stripes handled\n", handled);
4564 4679
@@ -4823,6 +4938,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
4823 int raid_disk, memory, max_disks; 4938 int raid_disk, memory, max_disks;
4824 struct md_rdev *rdev; 4939 struct md_rdev *rdev;
4825 struct disk_info *disk; 4940 struct disk_info *disk;
4941 char pers_name[6];
4826 4942
4827 if (mddev->new_level != 5 4943 if (mddev->new_level != 5
4828 && mddev->new_level != 4 4944 && mddev->new_level != 4
@@ -4946,7 +5062,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
4946 printk(KERN_INFO "md/raid:%s: allocated %dkB\n", 5062 printk(KERN_INFO "md/raid:%s: allocated %dkB\n",
4947 mdname(mddev), memory); 5063 mdname(mddev), memory);
4948 5064
4949 conf->thread = md_register_thread(raid5d, mddev, NULL); 5065 sprintf(pers_name, "raid%d", mddev->new_level);
5066 conf->thread = md_register_thread(raid5d, mddev, pers_name);
4950 if (!conf->thread) { 5067 if (!conf->thread) {
4951 printk(KERN_ERR 5068 printk(KERN_ERR
4952 "md/raid:%s: couldn't allocate thread.\n", 5069 "md/raid:%s: couldn't allocate thread.\n",
@@ -5465,10 +5582,9 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
5465 if (rdev->saved_raid_disk >= 0 && 5582 if (rdev->saved_raid_disk >= 0 &&
5466 rdev->saved_raid_disk >= first && 5583 rdev->saved_raid_disk >= first &&
5467 conf->disks[rdev->saved_raid_disk].rdev == NULL) 5584 conf->disks[rdev->saved_raid_disk].rdev == NULL)
5468 disk = rdev->saved_raid_disk; 5585 first = rdev->saved_raid_disk;
5469 else 5586
5470 disk = first; 5587 for (disk = first; disk <= last; disk++) {
5471 for ( ; disk <= last ; disk++) {
5472 p = conf->disks + disk; 5588 p = conf->disks + disk;
5473 if (p->rdev == NULL) { 5589 if (p->rdev == NULL) {
5474 clear_bit(In_sync, &rdev->flags); 5590 clear_bit(In_sync, &rdev->flags);
@@ -5477,8 +5593,11 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
5477 if (rdev->saved_raid_disk != disk) 5593 if (rdev->saved_raid_disk != disk)
5478 conf->fullsync = 1; 5594 conf->fullsync = 1;
5479 rcu_assign_pointer(p->rdev, rdev); 5595 rcu_assign_pointer(p->rdev, rdev);
5480 break; 5596 goto out;
5481 } 5597 }
5598 }
5599 for (disk = first; disk <= last; disk++) {
5600 p = conf->disks + disk;
5482 if (test_bit(WantReplacement, &p->rdev->flags) && 5601 if (test_bit(WantReplacement, &p->rdev->flags) &&
5483 p->replacement == NULL) { 5602 p->replacement == NULL) {
5484 clear_bit(In_sync, &rdev->flags); 5603 clear_bit(In_sync, &rdev->flags);
@@ -5490,6 +5609,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
5490 break; 5609 break;
5491 } 5610 }
5492 } 5611 }
5612out:
5493 print_raid5_conf(conf); 5613 print_raid5_conf(conf);
5494 return err; 5614 return err;
5495} 5615}