diff options
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 376 |
1 files changed, 248 insertions, 128 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d26767246d26..adda94df5eb2 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -99,34 +99,40 @@ static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector) | |||
99 | * We maintain a biased count of active stripes in the bottom 16 bits of | 99 | * We maintain a biased count of active stripes in the bottom 16 bits of |
100 | * bi_phys_segments, and a count of processed stripes in the upper 16 bits | 100 | * bi_phys_segments, and a count of processed stripes in the upper 16 bits |
101 | */ | 101 | */ |
102 | static inline int raid5_bi_phys_segments(struct bio *bio) | 102 | static inline int raid5_bi_processed_stripes(struct bio *bio) |
103 | { | 103 | { |
104 | return bio->bi_phys_segments & 0xffff; | 104 | atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; |
105 | return (atomic_read(segments) >> 16) & 0xffff; | ||
105 | } | 106 | } |
106 | 107 | ||
107 | static inline int raid5_bi_hw_segments(struct bio *bio) | 108 | static inline int raid5_dec_bi_active_stripes(struct bio *bio) |
108 | { | 109 | { |
109 | return (bio->bi_phys_segments >> 16) & 0xffff; | 110 | atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; |
111 | return atomic_sub_return(1, segments) & 0xffff; | ||
110 | } | 112 | } |
111 | 113 | ||
112 | static inline int raid5_dec_bi_phys_segments(struct bio *bio) | 114 | static inline void raid5_inc_bi_active_stripes(struct bio *bio) |
113 | { | 115 | { |
114 | --bio->bi_phys_segments; | 116 | atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; |
115 | return raid5_bi_phys_segments(bio); | 117 | atomic_inc(segments); |
116 | } | 118 | } |
117 | 119 | ||
118 | static inline int raid5_dec_bi_hw_segments(struct bio *bio) | 120 | static inline void raid5_set_bi_processed_stripes(struct bio *bio, |
121 | unsigned int cnt) | ||
119 | { | 122 | { |
120 | unsigned short val = raid5_bi_hw_segments(bio); | 123 | atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; |
124 | int old, new; | ||
121 | 125 | ||
122 | --val; | 126 | do { |
123 | bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio); | 127 | old = atomic_read(segments); |
124 | return val; | 128 | new = (old & 0xffff) | (cnt << 16); |
129 | } while (atomic_cmpxchg(segments, old, new) != old); | ||
125 | } | 130 | } |
126 | 131 | ||
127 | static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) | 132 | static inline void raid5_set_bi_stripes(struct bio *bio, unsigned int cnt) |
128 | { | 133 | { |
129 | bio->bi_phys_segments = raid5_bi_phys_segments(bio) | (cnt << 16); | 134 | atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; |
135 | atomic_set(segments, cnt); | ||
130 | } | 136 | } |
131 | 137 | ||
132 | /* Find first data disk in a raid6 stripe */ | 138 | /* Find first data disk in a raid6 stripe */ |
@@ -190,47 +196,56 @@ static int stripe_operations_active(struct stripe_head *sh) | |||
190 | test_bit(STRIPE_COMPUTE_RUN, &sh->state); | 196 | test_bit(STRIPE_COMPUTE_RUN, &sh->state); |
191 | } | 197 | } |
192 | 198 | ||
193 | static void __release_stripe(struct r5conf *conf, struct stripe_head *sh) | 199 | static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh) |
194 | { | 200 | { |
195 | if (atomic_dec_and_test(&sh->count)) { | 201 | BUG_ON(!list_empty(&sh->lru)); |
196 | BUG_ON(!list_empty(&sh->lru)); | 202 | BUG_ON(atomic_read(&conf->active_stripes)==0); |
197 | BUG_ON(atomic_read(&conf->active_stripes)==0); | 203 | if (test_bit(STRIPE_HANDLE, &sh->state)) { |
198 | if (test_bit(STRIPE_HANDLE, &sh->state)) { | 204 | if (test_bit(STRIPE_DELAYED, &sh->state) && |
199 | if (test_bit(STRIPE_DELAYED, &sh->state)) | 205 | !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
200 | list_add_tail(&sh->lru, &conf->delayed_list); | 206 | list_add_tail(&sh->lru, &conf->delayed_list); |
201 | else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | 207 | else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && |
202 | sh->bm_seq - conf->seq_write > 0) | 208 | sh->bm_seq - conf->seq_write > 0) |
203 | list_add_tail(&sh->lru, &conf->bitmap_list); | 209 | list_add_tail(&sh->lru, &conf->bitmap_list); |
204 | else { | 210 | else { |
205 | clear_bit(STRIPE_BIT_DELAY, &sh->state); | 211 | clear_bit(STRIPE_DELAYED, &sh->state); |
206 | list_add_tail(&sh->lru, &conf->handle_list); | 212 | clear_bit(STRIPE_BIT_DELAY, &sh->state); |
207 | } | 213 | list_add_tail(&sh->lru, &conf->handle_list); |
208 | md_wakeup_thread(conf->mddev->thread); | 214 | } |
209 | } else { | 215 | md_wakeup_thread(conf->mddev->thread); |
210 | BUG_ON(stripe_operations_active(sh)); | 216 | } else { |
211 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 217 | BUG_ON(stripe_operations_active(sh)); |
212 | if (atomic_dec_return(&conf->preread_active_stripes) | 218 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
213 | < IO_THRESHOLD) | 219 | if (atomic_dec_return(&conf->preread_active_stripes) |
214 | md_wakeup_thread(conf->mddev->thread); | 220 | < IO_THRESHOLD) |
215 | atomic_dec(&conf->active_stripes); | 221 | md_wakeup_thread(conf->mddev->thread); |
216 | if (!test_bit(STRIPE_EXPANDING, &sh->state)) { | 222 | atomic_dec(&conf->active_stripes); |
217 | list_add_tail(&sh->lru, &conf->inactive_list); | 223 | if (!test_bit(STRIPE_EXPANDING, &sh->state)) { |
218 | wake_up(&conf->wait_for_stripe); | 224 | list_add_tail(&sh->lru, &conf->inactive_list); |
219 | if (conf->retry_read_aligned) | 225 | wake_up(&conf->wait_for_stripe); |
220 | md_wakeup_thread(conf->mddev->thread); | 226 | if (conf->retry_read_aligned) |
221 | } | 227 | md_wakeup_thread(conf->mddev->thread); |
222 | } | 228 | } |
223 | } | 229 | } |
224 | } | 230 | } |
225 | 231 | ||
232 | static void __release_stripe(struct r5conf *conf, struct stripe_head *sh) | ||
233 | { | ||
234 | if (atomic_dec_and_test(&sh->count)) | ||
235 | do_release_stripe(conf, sh); | ||
236 | } | ||
237 | |||
226 | static void release_stripe(struct stripe_head *sh) | 238 | static void release_stripe(struct stripe_head *sh) |
227 | { | 239 | { |
228 | struct r5conf *conf = sh->raid_conf; | 240 | struct r5conf *conf = sh->raid_conf; |
229 | unsigned long flags; | 241 | unsigned long flags; |
230 | 242 | ||
231 | spin_lock_irqsave(&conf->device_lock, flags); | 243 | local_irq_save(flags); |
232 | __release_stripe(conf, sh); | 244 | if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) { |
233 | spin_unlock_irqrestore(&conf->device_lock, flags); | 245 | do_release_stripe(conf, sh); |
246 | spin_unlock(&conf->device_lock); | ||
247 | } | ||
248 | local_irq_restore(flags); | ||
234 | } | 249 | } |
235 | 250 | ||
236 | static inline void remove_hash(struct stripe_head *sh) | 251 | static inline void remove_hash(struct stripe_head *sh) |
@@ -469,7 +484,8 @@ get_active_stripe(struct r5conf *conf, sector_t sector, | |||
469 | } else { | 484 | } else { |
470 | if (atomic_read(&sh->count)) { | 485 | if (atomic_read(&sh->count)) { |
471 | BUG_ON(!list_empty(&sh->lru) | 486 | BUG_ON(!list_empty(&sh->lru) |
472 | && !test_bit(STRIPE_EXPANDING, &sh->state)); | 487 | && !test_bit(STRIPE_EXPANDING, &sh->state) |
488 | && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)); | ||
473 | } else { | 489 | } else { |
474 | if (!test_bit(STRIPE_HANDLE, &sh->state)) | 490 | if (!test_bit(STRIPE_HANDLE, &sh->state)) |
475 | atomic_inc(&conf->active_stripes); | 491 | atomic_inc(&conf->active_stripes); |
@@ -606,6 +622,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
606 | * a chance*/ | 622 | * a chance*/ |
607 | md_check_recovery(conf->mddev); | 623 | md_check_recovery(conf->mddev); |
608 | } | 624 | } |
625 | /* | ||
626 | * Because md_wait_for_blocked_rdev | ||
627 | * will dec nr_pending, we must | ||
628 | * increment it first. | ||
629 | */ | ||
630 | atomic_inc(&rdev->nr_pending); | ||
609 | md_wait_for_blocked_rdev(rdev, conf->mddev); | 631 | md_wait_for_blocked_rdev(rdev, conf->mddev); |
610 | } else { | 632 | } else { |
611 | /* Acknowledged bad block - skip the write */ | 633 | /* Acknowledged bad block - skip the write */ |
@@ -632,6 +654,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
632 | else | 654 | else |
633 | bi->bi_sector = (sh->sector | 655 | bi->bi_sector = (sh->sector |
634 | + rdev->data_offset); | 656 | + rdev->data_offset); |
657 | if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) | ||
658 | bi->bi_rw |= REQ_FLUSH; | ||
659 | |||
635 | bi->bi_flags = 1 << BIO_UPTODATE; | 660 | bi->bi_flags = 1 << BIO_UPTODATE; |
636 | bi->bi_idx = 0; | 661 | bi->bi_idx = 0; |
637 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; | 662 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; |
@@ -741,14 +766,12 @@ static void ops_complete_biofill(void *stripe_head_ref) | |||
741 | { | 766 | { |
742 | struct stripe_head *sh = stripe_head_ref; | 767 | struct stripe_head *sh = stripe_head_ref; |
743 | struct bio *return_bi = NULL; | 768 | struct bio *return_bi = NULL; |
744 | struct r5conf *conf = sh->raid_conf; | ||
745 | int i; | 769 | int i; |
746 | 770 | ||
747 | pr_debug("%s: stripe %llu\n", __func__, | 771 | pr_debug("%s: stripe %llu\n", __func__, |
748 | (unsigned long long)sh->sector); | 772 | (unsigned long long)sh->sector); |
749 | 773 | ||
750 | /* clear completed biofills */ | 774 | /* clear completed biofills */ |
751 | spin_lock_irq(&conf->device_lock); | ||
752 | for (i = sh->disks; i--; ) { | 775 | for (i = sh->disks; i--; ) { |
753 | struct r5dev *dev = &sh->dev[i]; | 776 | struct r5dev *dev = &sh->dev[i]; |
754 | 777 | ||
@@ -766,7 +789,7 @@ static void ops_complete_biofill(void *stripe_head_ref) | |||
766 | while (rbi && rbi->bi_sector < | 789 | while (rbi && rbi->bi_sector < |
767 | dev->sector + STRIPE_SECTORS) { | 790 | dev->sector + STRIPE_SECTORS) { |
768 | rbi2 = r5_next_bio(rbi, dev->sector); | 791 | rbi2 = r5_next_bio(rbi, dev->sector); |
769 | if (!raid5_dec_bi_phys_segments(rbi)) { | 792 | if (!raid5_dec_bi_active_stripes(rbi)) { |
770 | rbi->bi_next = return_bi; | 793 | rbi->bi_next = return_bi; |
771 | return_bi = rbi; | 794 | return_bi = rbi; |
772 | } | 795 | } |
@@ -774,7 +797,6 @@ static void ops_complete_biofill(void *stripe_head_ref) | |||
774 | } | 797 | } |
775 | } | 798 | } |
776 | } | 799 | } |
777 | spin_unlock_irq(&conf->device_lock); | ||
778 | clear_bit(STRIPE_BIOFILL_RUN, &sh->state); | 800 | clear_bit(STRIPE_BIOFILL_RUN, &sh->state); |
779 | 801 | ||
780 | return_io(return_bi); | 802 | return_io(return_bi); |
@@ -786,7 +808,6 @@ static void ops_complete_biofill(void *stripe_head_ref) | |||
786 | static void ops_run_biofill(struct stripe_head *sh) | 808 | static void ops_run_biofill(struct stripe_head *sh) |
787 | { | 809 | { |
788 | struct dma_async_tx_descriptor *tx = NULL; | 810 | struct dma_async_tx_descriptor *tx = NULL; |
789 | struct r5conf *conf = sh->raid_conf; | ||
790 | struct async_submit_ctl submit; | 811 | struct async_submit_ctl submit; |
791 | int i; | 812 | int i; |
792 | 813 | ||
@@ -797,10 +818,10 @@ static void ops_run_biofill(struct stripe_head *sh) | |||
797 | struct r5dev *dev = &sh->dev[i]; | 818 | struct r5dev *dev = &sh->dev[i]; |
798 | if (test_bit(R5_Wantfill, &dev->flags)) { | 819 | if (test_bit(R5_Wantfill, &dev->flags)) { |
799 | struct bio *rbi; | 820 | struct bio *rbi; |
800 | spin_lock_irq(&conf->device_lock); | 821 | spin_lock_irq(&sh->stripe_lock); |
801 | dev->read = rbi = dev->toread; | 822 | dev->read = rbi = dev->toread; |
802 | dev->toread = NULL; | 823 | dev->toread = NULL; |
803 | spin_unlock_irq(&conf->device_lock); | 824 | spin_unlock_irq(&sh->stripe_lock); |
804 | while (rbi && rbi->bi_sector < | 825 | while (rbi && rbi->bi_sector < |
805 | dev->sector + STRIPE_SECTORS) { | 826 | dev->sector + STRIPE_SECTORS) { |
806 | tx = async_copy_data(0, rbi, dev->page, | 827 | tx = async_copy_data(0, rbi, dev->page, |
@@ -1136,12 +1157,12 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
1136 | if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) { | 1157 | if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) { |
1137 | struct bio *wbi; | 1158 | struct bio *wbi; |
1138 | 1159 | ||
1139 | spin_lock_irq(&sh->raid_conf->device_lock); | 1160 | spin_lock_irq(&sh->stripe_lock); |
1140 | chosen = dev->towrite; | 1161 | chosen = dev->towrite; |
1141 | dev->towrite = NULL; | 1162 | dev->towrite = NULL; |
1142 | BUG_ON(dev->written); | 1163 | BUG_ON(dev->written); |
1143 | wbi = dev->written = chosen; | 1164 | wbi = dev->written = chosen; |
1144 | spin_unlock_irq(&sh->raid_conf->device_lock); | 1165 | spin_unlock_irq(&sh->stripe_lock); |
1145 | 1166 | ||
1146 | while (wbi && wbi->bi_sector < | 1167 | while (wbi && wbi->bi_sector < |
1147 | dev->sector + STRIPE_SECTORS) { | 1168 | dev->sector + STRIPE_SECTORS) { |
@@ -1446,6 +1467,8 @@ static int grow_one_stripe(struct r5conf *conf) | |||
1446 | init_waitqueue_head(&sh->ops.wait_for_ops); | 1467 | init_waitqueue_head(&sh->ops.wait_for_ops); |
1447 | #endif | 1468 | #endif |
1448 | 1469 | ||
1470 | spin_lock_init(&sh->stripe_lock); | ||
1471 | |||
1449 | if (grow_buffers(sh)) { | 1472 | if (grow_buffers(sh)) { |
1450 | shrink_buffers(sh); | 1473 | shrink_buffers(sh); |
1451 | kmem_cache_free(conf->slab_cache, sh); | 1474 | kmem_cache_free(conf->slab_cache, sh); |
@@ -1731,12 +1754,15 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1731 | atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); | 1754 | atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); |
1732 | clear_bit(R5_ReadError, &sh->dev[i].flags); | 1755 | clear_bit(R5_ReadError, &sh->dev[i].flags); |
1733 | clear_bit(R5_ReWrite, &sh->dev[i].flags); | 1756 | clear_bit(R5_ReWrite, &sh->dev[i].flags); |
1734 | } | 1757 | } else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) |
1758 | clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); | ||
1759 | |||
1735 | if (atomic_read(&rdev->read_errors)) | 1760 | if (atomic_read(&rdev->read_errors)) |
1736 | atomic_set(&rdev->read_errors, 0); | 1761 | atomic_set(&rdev->read_errors, 0); |
1737 | } else { | 1762 | } else { |
1738 | const char *bdn = bdevname(rdev->bdev, b); | 1763 | const char *bdn = bdevname(rdev->bdev, b); |
1739 | int retry = 0; | 1764 | int retry = 0; |
1765 | int set_bad = 0; | ||
1740 | 1766 | ||
1741 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); | 1767 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); |
1742 | atomic_inc(&rdev->read_errors); | 1768 | atomic_inc(&rdev->read_errors); |
@@ -1748,7 +1774,8 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1748 | mdname(conf->mddev), | 1774 | mdname(conf->mddev), |
1749 | (unsigned long long)s, | 1775 | (unsigned long long)s, |
1750 | bdn); | 1776 | bdn); |
1751 | else if (conf->mddev->degraded >= conf->max_degraded) | 1777 | else if (conf->mddev->degraded >= conf->max_degraded) { |
1778 | set_bad = 1; | ||
1752 | printk_ratelimited( | 1779 | printk_ratelimited( |
1753 | KERN_WARNING | 1780 | KERN_WARNING |
1754 | "md/raid:%s: read error not correctable " | 1781 | "md/raid:%s: read error not correctable " |
@@ -1756,8 +1783,9 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1756 | mdname(conf->mddev), | 1783 | mdname(conf->mddev), |
1757 | (unsigned long long)s, | 1784 | (unsigned long long)s, |
1758 | bdn); | 1785 | bdn); |
1759 | else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) | 1786 | } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) { |
1760 | /* Oh, no!!! */ | 1787 | /* Oh, no!!! */ |
1788 | set_bad = 1; | ||
1761 | printk_ratelimited( | 1789 | printk_ratelimited( |
1762 | KERN_WARNING | 1790 | KERN_WARNING |
1763 | "md/raid:%s: read error NOT corrected!! " | 1791 | "md/raid:%s: read error NOT corrected!! " |
@@ -1765,7 +1793,7 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1765 | mdname(conf->mddev), | 1793 | mdname(conf->mddev), |
1766 | (unsigned long long)s, | 1794 | (unsigned long long)s, |
1767 | bdn); | 1795 | bdn); |
1768 | else if (atomic_read(&rdev->read_errors) | 1796 | } else if (atomic_read(&rdev->read_errors) |
1769 | > conf->max_nr_stripes) | 1797 | > conf->max_nr_stripes) |
1770 | printk(KERN_WARNING | 1798 | printk(KERN_WARNING |
1771 | "md/raid:%s: Too many read errors, failing device %s.\n", | 1799 | "md/raid:%s: Too many read errors, failing device %s.\n", |
@@ -1773,11 +1801,19 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1773 | else | 1801 | else |
1774 | retry = 1; | 1802 | retry = 1; |
1775 | if (retry) | 1803 | if (retry) |
1776 | set_bit(R5_ReadError, &sh->dev[i].flags); | 1804 | if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) { |
1805 | set_bit(R5_ReadError, &sh->dev[i].flags); | ||
1806 | clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); | ||
1807 | } else | ||
1808 | set_bit(R5_ReadNoMerge, &sh->dev[i].flags); | ||
1777 | else { | 1809 | else { |
1778 | clear_bit(R5_ReadError, &sh->dev[i].flags); | 1810 | clear_bit(R5_ReadError, &sh->dev[i].flags); |
1779 | clear_bit(R5_ReWrite, &sh->dev[i].flags); | 1811 | clear_bit(R5_ReWrite, &sh->dev[i].flags); |
1780 | md_error(conf->mddev, rdev); | 1812 | if (!(set_bad |
1813 | && test_bit(In_sync, &rdev->flags) | ||
1814 | && rdev_set_badblocks( | ||
1815 | rdev, sh->sector, STRIPE_SECTORS, 0))) | ||
1816 | md_error(conf->mddev, rdev); | ||
1781 | } | 1817 | } |
1782 | } | 1818 | } |
1783 | rdev_dec_pending(rdev, conf->mddev); | 1819 | rdev_dec_pending(rdev, conf->mddev); |
@@ -2325,11 +2361,18 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
2325 | (unsigned long long)bi->bi_sector, | 2361 | (unsigned long long)bi->bi_sector, |
2326 | (unsigned long long)sh->sector); | 2362 | (unsigned long long)sh->sector); |
2327 | 2363 | ||
2328 | 2364 | /* | |
2329 | spin_lock_irq(&conf->device_lock); | 2365 | * If several bio share a stripe. The bio bi_phys_segments acts as a |
2366 | * reference count to avoid race. The reference count should already be | ||
2367 | * increased before this function is called (for example, in | ||
2368 | * make_request()), so other bio sharing this stripe will not free the | ||
2369 | * stripe. If a stripe is owned by one stripe, the stripe lock will | ||
2370 | * protect it. | ||
2371 | */ | ||
2372 | spin_lock_irq(&sh->stripe_lock); | ||
2330 | if (forwrite) { | 2373 | if (forwrite) { |
2331 | bip = &sh->dev[dd_idx].towrite; | 2374 | bip = &sh->dev[dd_idx].towrite; |
2332 | if (*bip == NULL && sh->dev[dd_idx].written == NULL) | 2375 | if (*bip == NULL) |
2333 | firstwrite = 1; | 2376 | firstwrite = 1; |
2334 | } else | 2377 | } else |
2335 | bip = &sh->dev[dd_idx].toread; | 2378 | bip = &sh->dev[dd_idx].toread; |
@@ -2345,7 +2388,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
2345 | if (*bip) | 2388 | if (*bip) |
2346 | bi->bi_next = *bip; | 2389 | bi->bi_next = *bip; |
2347 | *bip = bi; | 2390 | *bip = bi; |
2348 | bi->bi_phys_segments++; | 2391 | raid5_inc_bi_active_stripes(bi); |
2349 | 2392 | ||
2350 | if (forwrite) { | 2393 | if (forwrite) { |
2351 | /* check if page is covered */ | 2394 | /* check if page is covered */ |
@@ -2360,7 +2403,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
2360 | if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) | 2403 | if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) |
2361 | set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); | 2404 | set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); |
2362 | } | 2405 | } |
2363 | spin_unlock_irq(&conf->device_lock); | 2406 | spin_unlock_irq(&sh->stripe_lock); |
2364 | 2407 | ||
2365 | pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", | 2408 | pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", |
2366 | (unsigned long long)(*bip)->bi_sector, | 2409 | (unsigned long long)(*bip)->bi_sector, |
@@ -2376,7 +2419,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
2376 | 2419 | ||
2377 | overlap: | 2420 | overlap: |
2378 | set_bit(R5_Overlap, &sh->dev[dd_idx].flags); | 2421 | set_bit(R5_Overlap, &sh->dev[dd_idx].flags); |
2379 | spin_unlock_irq(&conf->device_lock); | 2422 | spin_unlock_irq(&sh->stripe_lock); |
2380 | return 0; | 2423 | return 0; |
2381 | } | 2424 | } |
2382 | 2425 | ||
@@ -2426,10 +2469,11 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
2426 | rdev_dec_pending(rdev, conf->mddev); | 2469 | rdev_dec_pending(rdev, conf->mddev); |
2427 | } | 2470 | } |
2428 | } | 2471 | } |
2429 | spin_lock_irq(&conf->device_lock); | 2472 | spin_lock_irq(&sh->stripe_lock); |
2430 | /* fail all writes first */ | 2473 | /* fail all writes first */ |
2431 | bi = sh->dev[i].towrite; | 2474 | bi = sh->dev[i].towrite; |
2432 | sh->dev[i].towrite = NULL; | 2475 | sh->dev[i].towrite = NULL; |
2476 | spin_unlock_irq(&sh->stripe_lock); | ||
2433 | if (bi) { | 2477 | if (bi) { |
2434 | s->to_write--; | 2478 | s->to_write--; |
2435 | bitmap_end = 1; | 2479 | bitmap_end = 1; |
@@ -2442,13 +2486,17 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
2442 | sh->dev[i].sector + STRIPE_SECTORS) { | 2486 | sh->dev[i].sector + STRIPE_SECTORS) { |
2443 | struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); | 2487 | struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); |
2444 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | 2488 | clear_bit(BIO_UPTODATE, &bi->bi_flags); |
2445 | if (!raid5_dec_bi_phys_segments(bi)) { | 2489 | if (!raid5_dec_bi_active_stripes(bi)) { |
2446 | md_write_end(conf->mddev); | 2490 | md_write_end(conf->mddev); |
2447 | bi->bi_next = *return_bi; | 2491 | bi->bi_next = *return_bi; |
2448 | *return_bi = bi; | 2492 | *return_bi = bi; |
2449 | } | 2493 | } |
2450 | bi = nextbi; | 2494 | bi = nextbi; |
2451 | } | 2495 | } |
2496 | if (bitmap_end) | ||
2497 | bitmap_endwrite(conf->mddev->bitmap, sh->sector, | ||
2498 | STRIPE_SECTORS, 0, 0); | ||
2499 | bitmap_end = 0; | ||
2452 | /* and fail all 'written' */ | 2500 | /* and fail all 'written' */ |
2453 | bi = sh->dev[i].written; | 2501 | bi = sh->dev[i].written; |
2454 | sh->dev[i].written = NULL; | 2502 | sh->dev[i].written = NULL; |
@@ -2457,7 +2505,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
2457 | sh->dev[i].sector + STRIPE_SECTORS) { | 2505 | sh->dev[i].sector + STRIPE_SECTORS) { |
2458 | struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); | 2506 | struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); |
2459 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | 2507 | clear_bit(BIO_UPTODATE, &bi->bi_flags); |
2460 | if (!raid5_dec_bi_phys_segments(bi)) { | 2508 | if (!raid5_dec_bi_active_stripes(bi)) { |
2461 | md_write_end(conf->mddev); | 2509 | md_write_end(conf->mddev); |
2462 | bi->bi_next = *return_bi; | 2510 | bi->bi_next = *return_bi; |
2463 | *return_bi = bi; | 2511 | *return_bi = bi; |
@@ -2481,14 +2529,13 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
2481 | struct bio *nextbi = | 2529 | struct bio *nextbi = |
2482 | r5_next_bio(bi, sh->dev[i].sector); | 2530 | r5_next_bio(bi, sh->dev[i].sector); |
2483 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | 2531 | clear_bit(BIO_UPTODATE, &bi->bi_flags); |
2484 | if (!raid5_dec_bi_phys_segments(bi)) { | 2532 | if (!raid5_dec_bi_active_stripes(bi)) { |
2485 | bi->bi_next = *return_bi; | 2533 | bi->bi_next = *return_bi; |
2486 | *return_bi = bi; | 2534 | *return_bi = bi; |
2487 | } | 2535 | } |
2488 | bi = nextbi; | 2536 | bi = nextbi; |
2489 | } | 2537 | } |
2490 | } | 2538 | } |
2491 | spin_unlock_irq(&conf->device_lock); | ||
2492 | if (bitmap_end) | 2539 | if (bitmap_end) |
2493 | bitmap_endwrite(conf->mddev->bitmap, sh->sector, | 2540 | bitmap_endwrite(conf->mddev->bitmap, sh->sector, |
2494 | STRIPE_SECTORS, 0, 0); | 2541 | STRIPE_SECTORS, 0, 0); |
@@ -2692,30 +2739,23 @@ static void handle_stripe_clean_event(struct r5conf *conf, | |||
2692 | test_bit(R5_UPTODATE, &dev->flags)) { | 2739 | test_bit(R5_UPTODATE, &dev->flags)) { |
2693 | /* We can return any write requests */ | 2740 | /* We can return any write requests */ |
2694 | struct bio *wbi, *wbi2; | 2741 | struct bio *wbi, *wbi2; |
2695 | int bitmap_end = 0; | ||
2696 | pr_debug("Return write for disc %d\n", i); | 2742 | pr_debug("Return write for disc %d\n", i); |
2697 | spin_lock_irq(&conf->device_lock); | ||
2698 | wbi = dev->written; | 2743 | wbi = dev->written; |
2699 | dev->written = NULL; | 2744 | dev->written = NULL; |
2700 | while (wbi && wbi->bi_sector < | 2745 | while (wbi && wbi->bi_sector < |
2701 | dev->sector + STRIPE_SECTORS) { | 2746 | dev->sector + STRIPE_SECTORS) { |
2702 | wbi2 = r5_next_bio(wbi, dev->sector); | 2747 | wbi2 = r5_next_bio(wbi, dev->sector); |
2703 | if (!raid5_dec_bi_phys_segments(wbi)) { | 2748 | if (!raid5_dec_bi_active_stripes(wbi)) { |
2704 | md_write_end(conf->mddev); | 2749 | md_write_end(conf->mddev); |
2705 | wbi->bi_next = *return_bi; | 2750 | wbi->bi_next = *return_bi; |
2706 | *return_bi = wbi; | 2751 | *return_bi = wbi; |
2707 | } | 2752 | } |
2708 | wbi = wbi2; | 2753 | wbi = wbi2; |
2709 | } | 2754 | } |
2710 | if (dev->towrite == NULL) | 2755 | bitmap_endwrite(conf->mddev->bitmap, sh->sector, |
2711 | bitmap_end = 1; | 2756 | STRIPE_SECTORS, |
2712 | spin_unlock_irq(&conf->device_lock); | ||
2713 | if (bitmap_end) | ||
2714 | bitmap_endwrite(conf->mddev->bitmap, | ||
2715 | sh->sector, | ||
2716 | STRIPE_SECTORS, | ||
2717 | !test_bit(STRIPE_DEGRADED, &sh->state), | 2757 | !test_bit(STRIPE_DEGRADED, &sh->state), |
2718 | 0); | 2758 | 0); |
2719 | } | 2759 | } |
2720 | } | 2760 | } |
2721 | 2761 | ||
@@ -3167,7 +3207,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) | |||
3167 | 3207 | ||
3168 | /* Now to look around and see what can be done */ | 3208 | /* Now to look around and see what can be done */ |
3169 | rcu_read_lock(); | 3209 | rcu_read_lock(); |
3170 | spin_lock_irq(&conf->device_lock); | ||
3171 | for (i=disks; i--; ) { | 3210 | for (i=disks; i--; ) { |
3172 | struct md_rdev *rdev; | 3211 | struct md_rdev *rdev; |
3173 | sector_t first_bad; | 3212 | sector_t first_bad; |
@@ -3313,7 +3352,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) | |||
3313 | do_recovery = 1; | 3352 | do_recovery = 1; |
3314 | } | 3353 | } |
3315 | } | 3354 | } |
3316 | spin_unlock_irq(&conf->device_lock); | ||
3317 | if (test_bit(STRIPE_SYNCING, &sh->state)) { | 3355 | if (test_bit(STRIPE_SYNCING, &sh->state)) { |
3318 | /* If there is a failed device being replaced, | 3356 | /* If there is a failed device being replaced, |
3319 | * we must be recovering. | 3357 | * we must be recovering. |
@@ -3582,8 +3620,18 @@ static void handle_stripe(struct stripe_head *sh) | |||
3582 | 3620 | ||
3583 | finish: | 3621 | finish: |
3584 | /* wait for this device to become unblocked */ | 3622 | /* wait for this device to become unblocked */ |
3585 | if (conf->mddev->external && unlikely(s.blocked_rdev)) | 3623 | if (unlikely(s.blocked_rdev)) { |
3586 | md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev); | 3624 | if (conf->mddev->external) |
3625 | md_wait_for_blocked_rdev(s.blocked_rdev, | ||
3626 | conf->mddev); | ||
3627 | else | ||
3628 | /* Internal metadata will immediately | ||
3629 | * be written by raid5d, so we don't | ||
3630 | * need to wait here. | ||
3631 | */ | ||
3632 | rdev_dec_pending(s.blocked_rdev, | ||
3633 | conf->mddev); | ||
3634 | } | ||
3587 | 3635 | ||
3588 | if (s.handle_bad_blocks) | 3636 | if (s.handle_bad_blocks) |
3589 | for (i = disks; i--; ) { | 3637 | for (i = disks; i--; ) { |
@@ -3766,7 +3814,7 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf) | |||
3766 | * this sets the active strip count to 1 and the processed | 3814 | * this sets the active strip count to 1 and the processed |
3767 | * strip count to zero (upper 8 bits) | 3815 | * strip count to zero (upper 8 bits) |
3768 | */ | 3816 | */ |
3769 | bi->bi_phys_segments = 1; /* biased count of active stripes */ | 3817 | raid5_set_bi_stripes(bi, 1); /* biased count of active stripes */ |
3770 | } | 3818 | } |
3771 | 3819 | ||
3772 | return bi; | 3820 | return bi; |
@@ -3881,8 +3929,6 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) | |||
3881 | raid_bio->bi_next = (void*)rdev; | 3929 | raid_bio->bi_next = (void*)rdev; |
3882 | align_bi->bi_bdev = rdev->bdev; | 3930 | align_bi->bi_bdev = rdev->bdev; |
3883 | align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); | 3931 | align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); |
3884 | /* No reshape active, so we can trust rdev->data_offset */ | ||
3885 | align_bi->bi_sector += rdev->data_offset; | ||
3886 | 3932 | ||
3887 | if (!bio_fits_rdev(align_bi) || | 3933 | if (!bio_fits_rdev(align_bi) || |
3888 | is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9, | 3934 | is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9, |
@@ -3893,6 +3939,9 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) | |||
3893 | return 0; | 3939 | return 0; |
3894 | } | 3940 | } |
3895 | 3941 | ||
3942 | /* No reshape active, so we can trust rdev->data_offset */ | ||
3943 | align_bi->bi_sector += rdev->data_offset; | ||
3944 | |||
3896 | spin_lock_irq(&conf->device_lock); | 3945 | spin_lock_irq(&conf->device_lock); |
3897 | wait_event_lock_irq(conf->wait_for_stripe, | 3946 | wait_event_lock_irq(conf->wait_for_stripe, |
3898 | conf->quiesce == 0, | 3947 | conf->quiesce == 0, |
@@ -3962,6 +4011,62 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf) | |||
3962 | return sh; | 4011 | return sh; |
3963 | } | 4012 | } |
3964 | 4013 | ||
4014 | struct raid5_plug_cb { | ||
4015 | struct blk_plug_cb cb; | ||
4016 | struct list_head list; | ||
4017 | }; | ||
4018 | |||
4019 | static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule) | ||
4020 | { | ||
4021 | struct raid5_plug_cb *cb = container_of( | ||
4022 | blk_cb, struct raid5_plug_cb, cb); | ||
4023 | struct stripe_head *sh; | ||
4024 | struct mddev *mddev = cb->cb.data; | ||
4025 | struct r5conf *conf = mddev->private; | ||
4026 | |||
4027 | if (cb->list.next && !list_empty(&cb->list)) { | ||
4028 | spin_lock_irq(&conf->device_lock); | ||
4029 | while (!list_empty(&cb->list)) { | ||
4030 | sh = list_first_entry(&cb->list, struct stripe_head, lru); | ||
4031 | list_del_init(&sh->lru); | ||
4032 | /* | ||
4033 | * avoid race release_stripe_plug() sees | ||
4034 | * STRIPE_ON_UNPLUG_LIST clear but the stripe | ||
4035 | * is still in our list | ||
4036 | */ | ||
4037 | smp_mb__before_clear_bit(); | ||
4038 | clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state); | ||
4039 | __release_stripe(conf, sh); | ||
4040 | } | ||
4041 | spin_unlock_irq(&conf->device_lock); | ||
4042 | } | ||
4043 | kfree(cb); | ||
4044 | } | ||
4045 | |||
4046 | static void release_stripe_plug(struct mddev *mddev, | ||
4047 | struct stripe_head *sh) | ||
4048 | { | ||
4049 | struct blk_plug_cb *blk_cb = blk_check_plugged( | ||
4050 | raid5_unplug, mddev, | ||
4051 | sizeof(struct raid5_plug_cb)); | ||
4052 | struct raid5_plug_cb *cb; | ||
4053 | |||
4054 | if (!blk_cb) { | ||
4055 | release_stripe(sh); | ||
4056 | return; | ||
4057 | } | ||
4058 | |||
4059 | cb = container_of(blk_cb, struct raid5_plug_cb, cb); | ||
4060 | |||
4061 | if (cb->list.next == NULL) | ||
4062 | INIT_LIST_HEAD(&cb->list); | ||
4063 | |||
4064 | if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)) | ||
4065 | list_add_tail(&sh->lru, &cb->list); | ||
4066 | else | ||
4067 | release_stripe(sh); | ||
4068 | } | ||
4069 | |||
3965 | static void make_request(struct mddev *mddev, struct bio * bi) | 4070 | static void make_request(struct mddev *mddev, struct bio * bi) |
3966 | { | 4071 | { |
3967 | struct r5conf *conf = mddev->private; | 4072 | struct r5conf *conf = mddev->private; |
@@ -3971,7 +4076,6 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
3971 | struct stripe_head *sh; | 4076 | struct stripe_head *sh; |
3972 | const int rw = bio_data_dir(bi); | 4077 | const int rw = bio_data_dir(bi); |
3973 | int remaining; | 4078 | int remaining; |
3974 | int plugged; | ||
3975 | 4079 | ||
3976 | if (unlikely(bi->bi_rw & REQ_FLUSH)) { | 4080 | if (unlikely(bi->bi_rw & REQ_FLUSH)) { |
3977 | md_flush_request(mddev, bi); | 4081 | md_flush_request(mddev, bi); |
@@ -3990,7 +4094,6 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
3990 | bi->bi_next = NULL; | 4094 | bi->bi_next = NULL; |
3991 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ | 4095 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ |
3992 | 4096 | ||
3993 | plugged = mddev_check_plugged(mddev); | ||
3994 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { | 4097 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { |
3995 | DEFINE_WAIT(w); | 4098 | DEFINE_WAIT(w); |
3996 | int previous; | 4099 | int previous; |
@@ -4089,24 +4192,19 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4089 | finish_wait(&conf->wait_for_overlap, &w); | 4192 | finish_wait(&conf->wait_for_overlap, &w); |
4090 | set_bit(STRIPE_HANDLE, &sh->state); | 4193 | set_bit(STRIPE_HANDLE, &sh->state); |
4091 | clear_bit(STRIPE_DELAYED, &sh->state); | 4194 | clear_bit(STRIPE_DELAYED, &sh->state); |
4092 | if ((bi->bi_rw & REQ_SYNC) && | 4195 | if ((bi->bi_rw & REQ_NOIDLE) && |
4093 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 4196 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
4094 | atomic_inc(&conf->preread_active_stripes); | 4197 | atomic_inc(&conf->preread_active_stripes); |
4095 | release_stripe(sh); | 4198 | release_stripe_plug(mddev, sh); |
4096 | } else { | 4199 | } else { |
4097 | /* cannot get stripe for read-ahead, just give-up */ | 4200 | /* cannot get stripe for read-ahead, just give-up */ |
4098 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | 4201 | clear_bit(BIO_UPTODATE, &bi->bi_flags); |
4099 | finish_wait(&conf->wait_for_overlap, &w); | 4202 | finish_wait(&conf->wait_for_overlap, &w); |
4100 | break; | 4203 | break; |
4101 | } | 4204 | } |
4102 | |||
4103 | } | 4205 | } |
4104 | if (!plugged) | ||
4105 | md_wakeup_thread(mddev->thread); | ||
4106 | 4206 | ||
4107 | spin_lock_irq(&conf->device_lock); | 4207 | remaining = raid5_dec_bi_active_stripes(bi); |
4108 | remaining = raid5_dec_bi_phys_segments(bi); | ||
4109 | spin_unlock_irq(&conf->device_lock); | ||
4110 | if (remaining == 0) { | 4208 | if (remaining == 0) { |
4111 | 4209 | ||
4112 | if ( rw == WRITE ) | 4210 | if ( rw == WRITE ) |
@@ -4462,7 +4560,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
4462 | sector += STRIPE_SECTORS, | 4560 | sector += STRIPE_SECTORS, |
4463 | scnt++) { | 4561 | scnt++) { |
4464 | 4562 | ||
4465 | if (scnt < raid5_bi_hw_segments(raid_bio)) | 4563 | if (scnt < raid5_bi_processed_stripes(raid_bio)) |
4466 | /* already done this stripe */ | 4564 | /* already done this stripe */ |
4467 | continue; | 4565 | continue; |
4468 | 4566 | ||
@@ -4470,25 +4568,24 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
4470 | 4568 | ||
4471 | if (!sh) { | 4569 | if (!sh) { |
4472 | /* failed to get a stripe - must wait */ | 4570 | /* failed to get a stripe - must wait */ |
4473 | raid5_set_bi_hw_segments(raid_bio, scnt); | 4571 | raid5_set_bi_processed_stripes(raid_bio, scnt); |
4474 | conf->retry_read_aligned = raid_bio; | 4572 | conf->retry_read_aligned = raid_bio; |
4475 | return handled; | 4573 | return handled; |
4476 | } | 4574 | } |
4477 | 4575 | ||
4478 | if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { | 4576 | if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { |
4479 | release_stripe(sh); | 4577 | release_stripe(sh); |
4480 | raid5_set_bi_hw_segments(raid_bio, scnt); | 4578 | raid5_set_bi_processed_stripes(raid_bio, scnt); |
4481 | conf->retry_read_aligned = raid_bio; | 4579 | conf->retry_read_aligned = raid_bio; |
4482 | return handled; | 4580 | return handled; |
4483 | } | 4581 | } |
4484 | 4582 | ||
4583 | set_bit(R5_ReadNoMerge, &sh->dev[dd_idx].flags); | ||
4485 | handle_stripe(sh); | 4584 | handle_stripe(sh); |
4486 | release_stripe(sh); | 4585 | release_stripe(sh); |
4487 | handled++; | 4586 | handled++; |
4488 | } | 4587 | } |
4489 | spin_lock_irq(&conf->device_lock); | 4588 | remaining = raid5_dec_bi_active_stripes(raid_bio); |
4490 | remaining = raid5_dec_bi_phys_segments(raid_bio); | ||
4491 | spin_unlock_irq(&conf->device_lock); | ||
4492 | if (remaining == 0) | 4589 | if (remaining == 0) |
4493 | bio_endio(raid_bio, 0); | 4590 | bio_endio(raid_bio, 0); |
4494 | if (atomic_dec_and_test(&conf->active_aligned_reads)) | 4591 | if (atomic_dec_and_test(&conf->active_aligned_reads)) |
@@ -4496,6 +4593,30 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
4496 | return handled; | 4593 | return handled; |
4497 | } | 4594 | } |
4498 | 4595 | ||
4596 | #define MAX_STRIPE_BATCH 8 | ||
4597 | static int handle_active_stripes(struct r5conf *conf) | ||
4598 | { | ||
4599 | struct stripe_head *batch[MAX_STRIPE_BATCH], *sh; | ||
4600 | int i, batch_size = 0; | ||
4601 | |||
4602 | while (batch_size < MAX_STRIPE_BATCH && | ||
4603 | (sh = __get_priority_stripe(conf)) != NULL) | ||
4604 | batch[batch_size++] = sh; | ||
4605 | |||
4606 | if (batch_size == 0) | ||
4607 | return batch_size; | ||
4608 | spin_unlock_irq(&conf->device_lock); | ||
4609 | |||
4610 | for (i = 0; i < batch_size; i++) | ||
4611 | handle_stripe(batch[i]); | ||
4612 | |||
4613 | cond_resched(); | ||
4614 | |||
4615 | spin_lock_irq(&conf->device_lock); | ||
4616 | for (i = 0; i < batch_size; i++) | ||
4617 | __release_stripe(conf, batch[i]); | ||
4618 | return batch_size; | ||
4619 | } | ||
4499 | 4620 | ||
4500 | /* | 4621 | /* |
4501 | * This is our raid5 kernel thread. | 4622 | * This is our raid5 kernel thread. |
@@ -4506,7 +4627,6 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
4506 | */ | 4627 | */ |
4507 | static void raid5d(struct mddev *mddev) | 4628 | static void raid5d(struct mddev *mddev) |
4508 | { | 4629 | { |
4509 | struct stripe_head *sh; | ||
4510 | struct r5conf *conf = mddev->private; | 4630 | struct r5conf *conf = mddev->private; |
4511 | int handled; | 4631 | int handled; |
4512 | struct blk_plug plug; | 4632 | struct blk_plug plug; |
@@ -4520,8 +4640,9 @@ static void raid5d(struct mddev *mddev) | |||
4520 | spin_lock_irq(&conf->device_lock); | 4640 | spin_lock_irq(&conf->device_lock); |
4521 | while (1) { | 4641 | while (1) { |
4522 | struct bio *bio; | 4642 | struct bio *bio; |
4643 | int batch_size; | ||
4523 | 4644 | ||
4524 | if (atomic_read(&mddev->plug_cnt) == 0 && | 4645 | if ( |
4525 | !list_empty(&conf->bitmap_list)) { | 4646 | !list_empty(&conf->bitmap_list)) { |
4526 | /* Now is a good time to flush some bitmap updates */ | 4647 | /* Now is a good time to flush some bitmap updates */ |
4527 | conf->seq_flush++; | 4648 | conf->seq_flush++; |
@@ -4531,8 +4652,7 @@ static void raid5d(struct mddev *mddev) | |||
4531 | conf->seq_write = conf->seq_flush; | 4652 | conf->seq_write = conf->seq_flush; |
4532 | activate_bit_delay(conf); | 4653 | activate_bit_delay(conf); |
4533 | } | 4654 | } |
4534 | if (atomic_read(&mddev->plug_cnt) == 0) | 4655 | raid5_activate_delayed(conf); |
4535 | raid5_activate_delayed(conf); | ||
4536 | 4656 | ||
4537 | while ((bio = remove_bio_from_retry(conf))) { | 4657 | while ((bio = remove_bio_from_retry(conf))) { |
4538 | int ok; | 4658 | int ok; |
@@ -4544,21 +4664,16 @@ static void raid5d(struct mddev *mddev) | |||
4544 | handled++; | 4664 | handled++; |
4545 | } | 4665 | } |
4546 | 4666 | ||
4547 | sh = __get_priority_stripe(conf); | 4667 | batch_size = handle_active_stripes(conf); |
4548 | 4668 | if (!batch_size) | |
4549 | if (!sh) | ||
4550 | break; | 4669 | break; |
4551 | spin_unlock_irq(&conf->device_lock); | 4670 | handled += batch_size; |
4552 | |||
4553 | handled++; | ||
4554 | handle_stripe(sh); | ||
4555 | release_stripe(sh); | ||
4556 | cond_resched(); | ||
4557 | 4671 | ||
4558 | if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) | 4672 | if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) { |
4673 | spin_unlock_irq(&conf->device_lock); | ||
4559 | md_check_recovery(mddev); | 4674 | md_check_recovery(mddev); |
4560 | 4675 | spin_lock_irq(&conf->device_lock); | |
4561 | spin_lock_irq(&conf->device_lock); | 4676 | } |
4562 | } | 4677 | } |
4563 | pr_debug("%d stripes handled\n", handled); | 4678 | pr_debug("%d stripes handled\n", handled); |
4564 | 4679 | ||
@@ -4823,6 +4938,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) | |||
4823 | int raid_disk, memory, max_disks; | 4938 | int raid_disk, memory, max_disks; |
4824 | struct md_rdev *rdev; | 4939 | struct md_rdev *rdev; |
4825 | struct disk_info *disk; | 4940 | struct disk_info *disk; |
4941 | char pers_name[6]; | ||
4826 | 4942 | ||
4827 | if (mddev->new_level != 5 | 4943 | if (mddev->new_level != 5 |
4828 | && mddev->new_level != 4 | 4944 | && mddev->new_level != 4 |
@@ -4946,7 +5062,8 @@ static struct r5conf *setup_conf(struct mddev *mddev) | |||
4946 | printk(KERN_INFO "md/raid:%s: allocated %dkB\n", | 5062 | printk(KERN_INFO "md/raid:%s: allocated %dkB\n", |
4947 | mdname(mddev), memory); | 5063 | mdname(mddev), memory); |
4948 | 5064 | ||
4949 | conf->thread = md_register_thread(raid5d, mddev, NULL); | 5065 | sprintf(pers_name, "raid%d", mddev->new_level); |
5066 | conf->thread = md_register_thread(raid5d, mddev, pers_name); | ||
4950 | if (!conf->thread) { | 5067 | if (!conf->thread) { |
4951 | printk(KERN_ERR | 5068 | printk(KERN_ERR |
4952 | "md/raid:%s: couldn't allocate thread.\n", | 5069 | "md/raid:%s: couldn't allocate thread.\n", |
@@ -5465,10 +5582,9 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
5465 | if (rdev->saved_raid_disk >= 0 && | 5582 | if (rdev->saved_raid_disk >= 0 && |
5466 | rdev->saved_raid_disk >= first && | 5583 | rdev->saved_raid_disk >= first && |
5467 | conf->disks[rdev->saved_raid_disk].rdev == NULL) | 5584 | conf->disks[rdev->saved_raid_disk].rdev == NULL) |
5468 | disk = rdev->saved_raid_disk; | 5585 | first = rdev->saved_raid_disk; |
5469 | else | 5586 | |
5470 | disk = first; | 5587 | for (disk = first; disk <= last; disk++) { |
5471 | for ( ; disk <= last ; disk++) { | ||
5472 | p = conf->disks + disk; | 5588 | p = conf->disks + disk; |
5473 | if (p->rdev == NULL) { | 5589 | if (p->rdev == NULL) { |
5474 | clear_bit(In_sync, &rdev->flags); | 5590 | clear_bit(In_sync, &rdev->flags); |
@@ -5477,8 +5593,11 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
5477 | if (rdev->saved_raid_disk != disk) | 5593 | if (rdev->saved_raid_disk != disk) |
5478 | conf->fullsync = 1; | 5594 | conf->fullsync = 1; |
5479 | rcu_assign_pointer(p->rdev, rdev); | 5595 | rcu_assign_pointer(p->rdev, rdev); |
5480 | break; | 5596 | goto out; |
5481 | } | 5597 | } |
5598 | } | ||
5599 | for (disk = first; disk <= last; disk++) { | ||
5600 | p = conf->disks + disk; | ||
5482 | if (test_bit(WantReplacement, &p->rdev->flags) && | 5601 | if (test_bit(WantReplacement, &p->rdev->flags) && |
5483 | p->replacement == NULL) { | 5602 | p->replacement == NULL) { |
5484 | clear_bit(In_sync, &rdev->flags); | 5603 | clear_bit(In_sync, &rdev->flags); |
@@ -5490,6 +5609,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
5490 | break; | 5609 | break; |
5491 | } | 5610 | } |
5492 | } | 5611 | } |
5612 | out: | ||
5493 | print_raid5_conf(conf); | 5613 | print_raid5_conf(conf); |
5494 | return err; | 5614 | return err; |
5495 | } | 5615 | } |