aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c191
1 files changed, 97 insertions, 94 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index cc055da02e2a..16f5c21963db 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -133,7 +133,7 @@ static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
133static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector) 133static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
134{ 134{
135 int sectors = bio_sectors(bio); 135 int sectors = bio_sectors(bio);
136 if (bio->bi_sector + sectors < sector + STRIPE_SECTORS) 136 if (bio->bi_iter.bi_sector + sectors < sector + STRIPE_SECTORS)
137 return bio->bi_next; 137 return bio->bi_next;
138 else 138 else
139 return NULL; 139 return NULL;
@@ -225,7 +225,7 @@ static void return_io(struct bio *return_bi)
225 225
226 return_bi = bi->bi_next; 226 return_bi = bi->bi_next;
227 bi->bi_next = NULL; 227 bi->bi_next = NULL;
228 bi->bi_size = 0; 228 bi->bi_iter.bi_size = 0;
229 trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), 229 trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
230 bi, 0); 230 bi, 0);
231 bio_endio(bi, 0); 231 bio_endio(bi, 0);
@@ -675,8 +675,10 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
675 || !conf->inactive_blocked), 675 || !conf->inactive_blocked),
676 *(conf->hash_locks + hash)); 676 *(conf->hash_locks + hash));
677 conf->inactive_blocked = 0; 677 conf->inactive_blocked = 0;
678 } else 678 } else {
679 init_stripe(sh, sector, previous); 679 init_stripe(sh, sector, previous);
680 atomic_inc(&sh->count);
681 }
680 } else { 682 } else {
681 spin_lock(&conf->device_lock); 683 spin_lock(&conf->device_lock);
682 if (atomic_read(&sh->count)) { 684 if (atomic_read(&sh->count)) {
@@ -687,20 +689,19 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
687 } else { 689 } else {
688 if (!test_bit(STRIPE_HANDLE, &sh->state)) 690 if (!test_bit(STRIPE_HANDLE, &sh->state))
689 atomic_inc(&conf->active_stripes); 691 atomic_inc(&conf->active_stripes);
690 BUG_ON(list_empty(&sh->lru)); 692 BUG_ON(list_empty(&sh->lru) &&
693 !test_bit(STRIPE_EXPANDING, &sh->state));
691 list_del_init(&sh->lru); 694 list_del_init(&sh->lru);
692 if (sh->group) { 695 if (sh->group) {
693 sh->group->stripes_cnt--; 696 sh->group->stripes_cnt--;
694 sh->group = NULL; 697 sh->group = NULL;
695 } 698 }
696 } 699 }
700 atomic_inc(&sh->count);
697 spin_unlock(&conf->device_lock); 701 spin_unlock(&conf->device_lock);
698 } 702 }
699 } while (sh == NULL); 703 } while (sh == NULL);
700 704
701 if (sh)
702 atomic_inc(&sh->count);
703
704 spin_unlock_irq(conf->hash_locks + hash); 705 spin_unlock_irq(conf->hash_locks + hash);
705 return sh; 706 return sh;
706} 707}
@@ -851,10 +852,10 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
851 bi->bi_rw, i); 852 bi->bi_rw, i);
852 atomic_inc(&sh->count); 853 atomic_inc(&sh->count);
853 if (use_new_offset(conf, sh)) 854 if (use_new_offset(conf, sh))
854 bi->bi_sector = (sh->sector 855 bi->bi_iter.bi_sector = (sh->sector
855 + rdev->new_data_offset); 856 + rdev->new_data_offset);
856 else 857 else
857 bi->bi_sector = (sh->sector 858 bi->bi_iter.bi_sector = (sh->sector
858 + rdev->data_offset); 859 + rdev->data_offset);
859 if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) 860 if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
860 bi->bi_rw |= REQ_NOMERGE; 861 bi->bi_rw |= REQ_NOMERGE;
@@ -862,7 +863,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
862 bi->bi_vcnt = 1; 863 bi->bi_vcnt = 1;
863 bi->bi_io_vec[0].bv_len = STRIPE_SIZE; 864 bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
864 bi->bi_io_vec[0].bv_offset = 0; 865 bi->bi_io_vec[0].bv_offset = 0;
865 bi->bi_size = STRIPE_SIZE; 866 bi->bi_iter.bi_size = STRIPE_SIZE;
866 /* 867 /*
867 * If this is discard request, set bi_vcnt 0. We don't 868 * If this is discard request, set bi_vcnt 0. We don't
868 * want to confuse SCSI because SCSI will replace payload 869 * want to confuse SCSI because SCSI will replace payload
@@ -898,15 +899,15 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
898 rbi->bi_rw, i); 899 rbi->bi_rw, i);
899 atomic_inc(&sh->count); 900 atomic_inc(&sh->count);
900 if (use_new_offset(conf, sh)) 901 if (use_new_offset(conf, sh))
901 rbi->bi_sector = (sh->sector 902 rbi->bi_iter.bi_sector = (sh->sector
902 + rrdev->new_data_offset); 903 + rrdev->new_data_offset);
903 else 904 else
904 rbi->bi_sector = (sh->sector 905 rbi->bi_iter.bi_sector = (sh->sector
905 + rrdev->data_offset); 906 + rrdev->data_offset);
906 rbi->bi_vcnt = 1; 907 rbi->bi_vcnt = 1;
907 rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; 908 rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
908 rbi->bi_io_vec[0].bv_offset = 0; 909 rbi->bi_io_vec[0].bv_offset = 0;
909 rbi->bi_size = STRIPE_SIZE; 910 rbi->bi_iter.bi_size = STRIPE_SIZE;
910 /* 911 /*
911 * If this is discard request, set bi_vcnt 0. We don't 912 * If this is discard request, set bi_vcnt 0. We don't
912 * want to confuse SCSI because SCSI will replace payload 913 * want to confuse SCSI because SCSI will replace payload
@@ -934,24 +935,24 @@ static struct dma_async_tx_descriptor *
934async_copy_data(int frombio, struct bio *bio, struct page *page, 935async_copy_data(int frombio, struct bio *bio, struct page *page,
935 sector_t sector, struct dma_async_tx_descriptor *tx) 936 sector_t sector, struct dma_async_tx_descriptor *tx)
936{ 937{
937 struct bio_vec *bvl; 938 struct bio_vec bvl;
939 struct bvec_iter iter;
938 struct page *bio_page; 940 struct page *bio_page;
939 int i;
940 int page_offset; 941 int page_offset;
941 struct async_submit_ctl submit; 942 struct async_submit_ctl submit;
942 enum async_tx_flags flags = 0; 943 enum async_tx_flags flags = 0;
943 944
944 if (bio->bi_sector >= sector) 945 if (bio->bi_iter.bi_sector >= sector)
945 page_offset = (signed)(bio->bi_sector - sector) * 512; 946 page_offset = (signed)(bio->bi_iter.bi_sector - sector) * 512;
946 else 947 else
947 page_offset = (signed)(sector - bio->bi_sector) * -512; 948 page_offset = (signed)(sector - bio->bi_iter.bi_sector) * -512;
948 949
949 if (frombio) 950 if (frombio)
950 flags |= ASYNC_TX_FENCE; 951 flags |= ASYNC_TX_FENCE;
951 init_async_submit(&submit, flags, tx, NULL, NULL, NULL); 952 init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
952 953
953 bio_for_each_segment(bvl, bio, i) { 954 bio_for_each_segment(bvl, bio, iter) {
954 int len = bvl->bv_len; 955 int len = bvl.bv_len;
955 int clen; 956 int clen;
956 int b_offset = 0; 957 int b_offset = 0;
957 958
@@ -967,8 +968,8 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
967 clen = len; 968 clen = len;
968 969
969 if (clen > 0) { 970 if (clen > 0) {
970 b_offset += bvl->bv_offset; 971 b_offset += bvl.bv_offset;
971 bio_page = bvl->bv_page; 972 bio_page = bvl.bv_page;
972 if (frombio) 973 if (frombio)
973 tx = async_memcpy(page, bio_page, page_offset, 974 tx = async_memcpy(page, bio_page, page_offset,
974 b_offset, clen, &submit); 975 b_offset, clen, &submit);
@@ -1011,7 +1012,7 @@ static void ops_complete_biofill(void *stripe_head_ref)
1011 BUG_ON(!dev->read); 1012 BUG_ON(!dev->read);
1012 rbi = dev->read; 1013 rbi = dev->read;
1013 dev->read = NULL; 1014 dev->read = NULL;
1014 while (rbi && rbi->bi_sector < 1015 while (rbi && rbi->bi_iter.bi_sector <
1015 dev->sector + STRIPE_SECTORS) { 1016 dev->sector + STRIPE_SECTORS) {
1016 rbi2 = r5_next_bio(rbi, dev->sector); 1017 rbi2 = r5_next_bio(rbi, dev->sector);
1017 if (!raid5_dec_bi_active_stripes(rbi)) { 1018 if (!raid5_dec_bi_active_stripes(rbi)) {
@@ -1047,7 +1048,7 @@ static void ops_run_biofill(struct stripe_head *sh)
1047 dev->read = rbi = dev->toread; 1048 dev->read = rbi = dev->toread;
1048 dev->toread = NULL; 1049 dev->toread = NULL;
1049 spin_unlock_irq(&sh->stripe_lock); 1050 spin_unlock_irq(&sh->stripe_lock);
1050 while (rbi && rbi->bi_sector < 1051 while (rbi && rbi->bi_iter.bi_sector <
1051 dev->sector + STRIPE_SECTORS) { 1052 dev->sector + STRIPE_SECTORS) {
1052 tx = async_copy_data(0, rbi, dev->page, 1053 tx = async_copy_data(0, rbi, dev->page,
1053 dev->sector, tx); 1054 dev->sector, tx);
@@ -1389,7 +1390,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
1389 wbi = dev->written = chosen; 1390 wbi = dev->written = chosen;
1390 spin_unlock_irq(&sh->stripe_lock); 1391 spin_unlock_irq(&sh->stripe_lock);
1391 1392
1392 while (wbi && wbi->bi_sector < 1393 while (wbi && wbi->bi_iter.bi_sector <
1393 dev->sector + STRIPE_SECTORS) { 1394 dev->sector + STRIPE_SECTORS) {
1394 if (wbi->bi_rw & REQ_FUA) 1395 if (wbi->bi_rw & REQ_FUA)
1395 set_bit(R5_WantFUA, &dev->flags); 1396 set_bit(R5_WantFUA, &dev->flags);
@@ -2110,6 +2111,7 @@ static void raid5_end_write_request(struct bio *bi, int error)
2110 set_bit(R5_MadeGoodRepl, &sh->dev[i].flags); 2111 set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
2111 } else { 2112 } else {
2112 if (!uptodate) { 2113 if (!uptodate) {
2114 set_bit(STRIPE_DEGRADED, &sh->state);
2113 set_bit(WriteErrorSeen, &rdev->flags); 2115 set_bit(WriteErrorSeen, &rdev->flags);
2114 set_bit(R5_WriteError, &sh->dev[i].flags); 2116 set_bit(R5_WriteError, &sh->dev[i].flags);
2115 if (!test_and_set_bit(WantReplacement, &rdev->flags)) 2117 if (!test_and_set_bit(WantReplacement, &rdev->flags))
@@ -2613,7 +2615,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
2613 int firstwrite=0; 2615 int firstwrite=0;
2614 2616
2615 pr_debug("adding bi b#%llu to stripe s#%llu\n", 2617 pr_debug("adding bi b#%llu to stripe s#%llu\n",
2616 (unsigned long long)bi->bi_sector, 2618 (unsigned long long)bi->bi_iter.bi_sector,
2617 (unsigned long long)sh->sector); 2619 (unsigned long long)sh->sector);
2618 2620
2619 /* 2621 /*
@@ -2631,12 +2633,12 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
2631 firstwrite = 1; 2633 firstwrite = 1;
2632 } else 2634 } else
2633 bip = &sh->dev[dd_idx].toread; 2635 bip = &sh->dev[dd_idx].toread;
2634 while (*bip && (*bip)->bi_sector < bi->bi_sector) { 2636 while (*bip && (*bip)->bi_iter.bi_sector < bi->bi_iter.bi_sector) {
2635 if (bio_end_sector(*bip) > bi->bi_sector) 2637 if (bio_end_sector(*bip) > bi->bi_iter.bi_sector)
2636 goto overlap; 2638 goto overlap;
2637 bip = & (*bip)->bi_next; 2639 bip = & (*bip)->bi_next;
2638 } 2640 }
2639 if (*bip && (*bip)->bi_sector < bio_end_sector(bi)) 2641 if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi))
2640 goto overlap; 2642 goto overlap;
2641 2643
2642 BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next); 2644 BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next);
@@ -2650,7 +2652,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
2650 sector_t sector = sh->dev[dd_idx].sector; 2652 sector_t sector = sh->dev[dd_idx].sector;
2651 for (bi=sh->dev[dd_idx].towrite; 2653 for (bi=sh->dev[dd_idx].towrite;
2652 sector < sh->dev[dd_idx].sector + STRIPE_SECTORS && 2654 sector < sh->dev[dd_idx].sector + STRIPE_SECTORS &&
2653 bi && bi->bi_sector <= sector; 2655 bi && bi->bi_iter.bi_sector <= sector;
2654 bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) { 2656 bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) {
2655 if (bio_end_sector(bi) >= sector) 2657 if (bio_end_sector(bi) >= sector)
2656 sector = bio_end_sector(bi); 2658 sector = bio_end_sector(bi);
@@ -2660,7 +2662,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
2660 } 2662 }
2661 2663
2662 pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", 2664 pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
2663 (unsigned long long)(*bip)->bi_sector, 2665 (unsigned long long)(*bip)->bi_iter.bi_sector,
2664 (unsigned long long)sh->sector, dd_idx); 2666 (unsigned long long)sh->sector, dd_idx);
2665 spin_unlock_irq(&sh->stripe_lock); 2667 spin_unlock_irq(&sh->stripe_lock);
2666 2668
@@ -2735,7 +2737,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
2735 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) 2737 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
2736 wake_up(&conf->wait_for_overlap); 2738 wake_up(&conf->wait_for_overlap);
2737 2739
2738 while (bi && bi->bi_sector < 2740 while (bi && bi->bi_iter.bi_sector <
2739 sh->dev[i].sector + STRIPE_SECTORS) { 2741 sh->dev[i].sector + STRIPE_SECTORS) {
2740 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); 2742 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
2741 clear_bit(BIO_UPTODATE, &bi->bi_flags); 2743 clear_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -2754,7 +2756,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
2754 bi = sh->dev[i].written; 2756 bi = sh->dev[i].written;
2755 sh->dev[i].written = NULL; 2757 sh->dev[i].written = NULL;
2756 if (bi) bitmap_end = 1; 2758 if (bi) bitmap_end = 1;
2757 while (bi && bi->bi_sector < 2759 while (bi && bi->bi_iter.bi_sector <
2758 sh->dev[i].sector + STRIPE_SECTORS) { 2760 sh->dev[i].sector + STRIPE_SECTORS) {
2759 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); 2761 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
2760 clear_bit(BIO_UPTODATE, &bi->bi_flags); 2762 clear_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -2778,7 +2780,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
2778 spin_unlock_irq(&sh->stripe_lock); 2780 spin_unlock_irq(&sh->stripe_lock);
2779 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) 2781 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
2780 wake_up(&conf->wait_for_overlap); 2782 wake_up(&conf->wait_for_overlap);
2781 while (bi && bi->bi_sector < 2783 while (bi && bi->bi_iter.bi_sector <
2782 sh->dev[i].sector + STRIPE_SECTORS) { 2784 sh->dev[i].sector + STRIPE_SECTORS) {
2783 struct bio *nextbi = 2785 struct bio *nextbi =
2784 r5_next_bio(bi, sh->dev[i].sector); 2786 r5_next_bio(bi, sh->dev[i].sector);
@@ -3002,7 +3004,7 @@ static void handle_stripe_clean_event(struct r5conf *conf,
3002 clear_bit(R5_UPTODATE, &dev->flags); 3004 clear_bit(R5_UPTODATE, &dev->flags);
3003 wbi = dev->written; 3005 wbi = dev->written;
3004 dev->written = NULL; 3006 dev->written = NULL;
3005 while (wbi && wbi->bi_sector < 3007 while (wbi && wbi->bi_iter.bi_sector <
3006 dev->sector + STRIPE_SECTORS) { 3008 dev->sector + STRIPE_SECTORS) {
3007 wbi2 = r5_next_bio(wbi, dev->sector); 3009 wbi2 = r5_next_bio(wbi, dev->sector);
3008 if (!raid5_dec_bi_active_stripes(wbi)) { 3010 if (!raid5_dec_bi_active_stripes(wbi)) {
@@ -3608,7 +3610,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
3608 */ 3610 */
3609 set_bit(R5_Insync, &dev->flags); 3611 set_bit(R5_Insync, &dev->flags);
3610 3612
3611 if (rdev && test_bit(R5_WriteError, &dev->flags)) { 3613 if (test_bit(R5_WriteError, &dev->flags)) {
3612 /* This flag does not apply to '.replacement' 3614 /* This flag does not apply to '.replacement'
3613 * only to .rdev, so make sure to check that*/ 3615 * only to .rdev, so make sure to check that*/
3614 struct md_rdev *rdev2 = rcu_dereference( 3616 struct md_rdev *rdev2 = rcu_dereference(
@@ -3621,7 +3623,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
3621 } else 3623 } else
3622 clear_bit(R5_WriteError, &dev->flags); 3624 clear_bit(R5_WriteError, &dev->flags);
3623 } 3625 }
3624 if (rdev && test_bit(R5_MadeGood, &dev->flags)) { 3626 if (test_bit(R5_MadeGood, &dev->flags)) {
3625 /* This flag does not apply to '.replacement' 3627 /* This flag does not apply to '.replacement'
3626 * only to .rdev, so make sure to check that*/ 3628 * only to .rdev, so make sure to check that*/
3627 struct md_rdev *rdev2 = rcu_dereference( 3629 struct md_rdev *rdev2 = rcu_dereference(
@@ -4094,7 +4096,7 @@ static int raid5_mergeable_bvec(struct request_queue *q,
4094 4096
4095static int in_chunk_boundary(struct mddev *mddev, struct bio *bio) 4097static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
4096{ 4098{
4097 sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); 4099 sector_t sector = bio->bi_iter.bi_sector + get_start_sect(bio->bi_bdev);
4098 unsigned int chunk_sectors = mddev->chunk_sectors; 4100 unsigned int chunk_sectors = mddev->chunk_sectors;
4099 unsigned int bio_sectors = bio_sectors(bio); 4101 unsigned int bio_sectors = bio_sectors(bio);
4100 4102
@@ -4231,9 +4233,9 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
4231 /* 4233 /*
4232 * compute position 4234 * compute position
4233 */ 4235 */
4234 align_bi->bi_sector = raid5_compute_sector(conf, raid_bio->bi_sector, 4236 align_bi->bi_iter.bi_sector =
4235 0, 4237 raid5_compute_sector(conf, raid_bio->bi_iter.bi_sector,
4236 &dd_idx, NULL); 4238 0, &dd_idx, NULL);
4237 4239
4238 end_sector = bio_end_sector(align_bi); 4240 end_sector = bio_end_sector(align_bi);
4239 rcu_read_lock(); 4241 rcu_read_lock();
@@ -4258,7 +4260,8 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
4258 align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); 4260 align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
4259 4261
4260 if (!bio_fits_rdev(align_bi) || 4262 if (!bio_fits_rdev(align_bi) ||
4261 is_badblock(rdev, align_bi->bi_sector, bio_sectors(align_bi), 4263 is_badblock(rdev, align_bi->bi_iter.bi_sector,
4264 bio_sectors(align_bi),
4262 &first_bad, &bad_sectors)) { 4265 &first_bad, &bad_sectors)) {
4263 /* too big in some way, or has a known bad block */ 4266 /* too big in some way, or has a known bad block */
4264 bio_put(align_bi); 4267 bio_put(align_bi);
@@ -4267,7 +4270,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
4267 } 4270 }
4268 4271
4269 /* No reshape active, so we can trust rdev->data_offset */ 4272 /* No reshape active, so we can trust rdev->data_offset */
4270 align_bi->bi_sector += rdev->data_offset; 4273 align_bi->bi_iter.bi_sector += rdev->data_offset;
4271 4274
4272 spin_lock_irq(&conf->device_lock); 4275 spin_lock_irq(&conf->device_lock);
4273 wait_event_lock_irq(conf->wait_for_stripe, 4276 wait_event_lock_irq(conf->wait_for_stripe,
@@ -4279,7 +4282,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
4279 if (mddev->gendisk) 4282 if (mddev->gendisk)
4280 trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev), 4283 trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
4281 align_bi, disk_devt(mddev->gendisk), 4284 align_bi, disk_devt(mddev->gendisk),
4282 raid_bio->bi_sector); 4285 raid_bio->bi_iter.bi_sector);
4283 generic_make_request(align_bi); 4286 generic_make_request(align_bi);
4284 return 1; 4287 return 1;
4285 } else { 4288 } else {
@@ -4462,8 +4465,8 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
4462 /* Skip discard while reshape is happening */ 4465 /* Skip discard while reshape is happening */
4463 return; 4466 return;
4464 4467
4465 logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); 4468 logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1);
4466 last_sector = bi->bi_sector + (bi->bi_size>>9); 4469 last_sector = bi->bi_iter.bi_sector + (bi->bi_iter.bi_size>>9);
4467 4470
4468 bi->bi_next = NULL; 4471 bi->bi_next = NULL;
4469 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ 4472 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
@@ -4567,7 +4570,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4567 return; 4570 return;
4568 } 4571 }
4569 4572
4570 logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); 4573 logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1);
4571 last_sector = bio_end_sector(bi); 4574 last_sector = bio_end_sector(bi);
4572 bi->bi_next = NULL; 4575 bi->bi_next = NULL;
4573 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ 4576 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
@@ -5051,7 +5054,8 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
5051 int remaining; 5054 int remaining;
5052 int handled = 0; 5055 int handled = 0;
5053 5056
5054 logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1); 5057 logical_sector = raid_bio->bi_iter.bi_sector &
5058 ~((sector_t)STRIPE_SECTORS-1);
5055 sector = raid5_compute_sector(conf, logical_sector, 5059 sector = raid5_compute_sector(conf, logical_sector,
5056 0, &dd_idx, NULL); 5060 0, &dd_idx, NULL);
5057 last_sector = bio_end_sector(raid_bio); 5061 last_sector = bio_end_sector(raid_bio);
@@ -5510,23 +5514,43 @@ raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks)
5510 return sectors * (raid_disks - conf->max_degraded); 5514 return sectors * (raid_disks - conf->max_degraded);
5511} 5515}
5512 5516
5517static void free_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu)
5518{
5519 safe_put_page(percpu->spare_page);
5520 kfree(percpu->scribble);
5521 percpu->spare_page = NULL;
5522 percpu->scribble = NULL;
5523}
5524
5525static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu)
5526{
5527 if (conf->level == 6 && !percpu->spare_page)
5528 percpu->spare_page = alloc_page(GFP_KERNEL);
5529 if (!percpu->scribble)
5530 percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
5531
5532 if (!percpu->scribble || (conf->level == 6 && !percpu->spare_page)) {
5533 free_scratch_buffer(conf, percpu);
5534 return -ENOMEM;
5535 }
5536
5537 return 0;
5538}
5539
5513static void raid5_free_percpu(struct r5conf *conf) 5540static void raid5_free_percpu(struct r5conf *conf)
5514{ 5541{
5515 struct raid5_percpu *percpu;
5516 unsigned long cpu; 5542 unsigned long cpu;
5517 5543
5518 if (!conf->percpu) 5544 if (!conf->percpu)
5519 return; 5545 return;
5520 5546
5521 get_online_cpus();
5522 for_each_possible_cpu(cpu) {
5523 percpu = per_cpu_ptr(conf->percpu, cpu);
5524 safe_put_page(percpu->spare_page);
5525 kfree(percpu->scribble);
5526 }
5527#ifdef CONFIG_HOTPLUG_CPU 5547#ifdef CONFIG_HOTPLUG_CPU
5528 unregister_cpu_notifier(&conf->cpu_notify); 5548 unregister_cpu_notifier(&conf->cpu_notify);
5529#endif 5549#endif
5550
5551 get_online_cpus();
5552 for_each_possible_cpu(cpu)
5553 free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
5530 put_online_cpus(); 5554 put_online_cpus();
5531 5555
5532 free_percpu(conf->percpu); 5556 free_percpu(conf->percpu);
@@ -5553,15 +5577,7 @@ static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
5553 switch (action) { 5577 switch (action) {
5554 case CPU_UP_PREPARE: 5578 case CPU_UP_PREPARE:
5555 case CPU_UP_PREPARE_FROZEN: 5579 case CPU_UP_PREPARE_FROZEN:
5556 if (conf->level == 6 && !percpu->spare_page) 5580 if (alloc_scratch_buffer(conf, percpu)) {
5557 percpu->spare_page = alloc_page(GFP_KERNEL);
5558 if (!percpu->scribble)
5559 percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
5560
5561 if (!percpu->scribble ||
5562 (conf->level == 6 && !percpu->spare_page)) {
5563 safe_put_page(percpu->spare_page);
5564 kfree(percpu->scribble);
5565 pr_err("%s: failed memory allocation for cpu%ld\n", 5581 pr_err("%s: failed memory allocation for cpu%ld\n",
5566 __func__, cpu); 5582 __func__, cpu);
5567 return notifier_from_errno(-ENOMEM); 5583 return notifier_from_errno(-ENOMEM);
@@ -5569,10 +5585,7 @@ static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
5569 break; 5585 break;
5570 case CPU_DEAD: 5586 case CPU_DEAD:
5571 case CPU_DEAD_FROZEN: 5587 case CPU_DEAD_FROZEN:
5572 safe_put_page(percpu->spare_page); 5588 free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
5573 kfree(percpu->scribble);
5574 percpu->spare_page = NULL;
5575 percpu->scribble = NULL;
5576 break; 5589 break;
5577 default: 5590 default:
5578 break; 5591 break;
@@ -5584,40 +5597,29 @@ static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
5584static int raid5_alloc_percpu(struct r5conf *conf) 5597static int raid5_alloc_percpu(struct r5conf *conf)
5585{ 5598{
5586 unsigned long cpu; 5599 unsigned long cpu;
5587 struct page *spare_page; 5600 int err = 0;
5588 struct raid5_percpu __percpu *allcpus;
5589 void *scribble;
5590 int err;
5591 5601
5592 allcpus = alloc_percpu(struct raid5_percpu); 5602 conf->percpu = alloc_percpu(struct raid5_percpu);
5593 if (!allcpus) 5603 if (!conf->percpu)
5594 return -ENOMEM; 5604 return -ENOMEM;
5595 conf->percpu = allcpus; 5605
5606#ifdef CONFIG_HOTPLUG_CPU
5607 conf->cpu_notify.notifier_call = raid456_cpu_notify;
5608 conf->cpu_notify.priority = 0;
5609 err = register_cpu_notifier(&conf->cpu_notify);
5610 if (err)
5611 return err;
5612#endif
5596 5613
5597 get_online_cpus(); 5614 get_online_cpus();
5598 err = 0;
5599 for_each_present_cpu(cpu) { 5615 for_each_present_cpu(cpu) {
5600 if (conf->level == 6) { 5616 err = alloc_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
5601 spare_page = alloc_page(GFP_KERNEL); 5617 if (err) {
5602 if (!spare_page) { 5618 pr_err("%s: failed memory allocation for cpu%ld\n",
5603 err = -ENOMEM; 5619 __func__, cpu);
5604 break;
5605 }
5606 per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page;
5607 }
5608 scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
5609 if (!scribble) {
5610 err = -ENOMEM;
5611 break; 5620 break;
5612 } 5621 }
5613 per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
5614 } 5622 }
5615#ifdef CONFIG_HOTPLUG_CPU
5616 conf->cpu_notify.notifier_call = raid456_cpu_notify;
5617 conf->cpu_notify.priority = 0;
5618 if (err == 0)
5619 err = register_cpu_notifier(&conf->cpu_notify);
5620#endif
5621 put_online_cpus(); 5623 put_online_cpus();
5622 5624
5623 return err; 5625 return err;
@@ -6099,6 +6101,7 @@ static int run(struct mddev *mddev)
6099 blk_queue_io_min(mddev->queue, chunk_size); 6101 blk_queue_io_min(mddev->queue, chunk_size);
6100 blk_queue_io_opt(mddev->queue, chunk_size * 6102 blk_queue_io_opt(mddev->queue, chunk_size *
6101 (conf->raid_disks - conf->max_degraded)); 6103 (conf->raid_disks - conf->max_degraded));
6104 mddev->queue->limits.raid_partial_stripes_expensive = 1;
6102 /* 6105 /*
6103 * We can only discard a whole stripe. It doesn't make sense to 6106 * We can only discard a whole stripe. It doesn't make sense to
6104 * discard data disk but write parity disk 6107 * discard data disk but write parity disk