aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid1.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-02 16:10:25 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-02 16:10:25 -0400
commit1081230b748de8f03f37f80c53dfa89feda9b8de (patch)
tree7238d60e01f0843bad8f03b5d84e4220fbba5e76 /drivers/md/raid1.c
parentdf910390e2db07a76c87f258475f6c96253cee6c (diff)
parent2ca495ac27d245513c11fed70591b1838250e240 (diff)
Merge branch 'for-4.3/core' of git://git.kernel.dk/linux-block
Pull core block updates from Jens Axboe: "This first core part of the block IO changes contains: - Cleanup of the bio IO error signaling from Christoph. We used to rely on the uptodate bit and passing around of an error, now we store the error in the bio itself. - Improvement of the above from myself, by shrinking the bio size down again to fit in two cachelines on x86-64. - Revert of the max_hw_sectors cap removal from a revision again, from Jeff Moyer. This caused performance regressions in various tests. Reinstate the limit, bump it to a more reasonable size instead. - Make /sys/block/<dev>/queue/discard_max_bytes writeable, by me. Most devices have huge trim limits, which can cause nasty latencies when deleting files. Enable the admin to configure the size down. We will look into having a more sane default instead of UINT_MAX sectors. - Improvement of the SGP gaps logic from Keith Busch. - Enable the block core to handle arbitrarily sized bios, which enables a nice simplification of bio_add_page() (which is an IO hot path). From Kent. - Improvements to the partition io stats accounting, making it faster. From Ming Lei. - Also from Ming Lei, a basic fixup for overflow of the sysfs pending file in blk-mq, as well as a fix for a blk-mq timeout race condition. - Ming Lin has been carrying Kents above mentioned patches forward for a while, and testing them. Ming also did a few fixes around that. - Sasha Levin found and fixed a use-after-free problem introduced by the bio->bi_error changes from Christoph. - Small blk cgroup cleanup from Viresh Kumar" * 'for-4.3/core' of git://git.kernel.dk/linux-block: (26 commits) blk: Fix bio_io_vec index when checking bvec gaps block: Replace SG_GAPS with new queue limits mask block: bump BLK_DEF_MAX_SECTORS to 2560 Revert "block: remove artifical max_hw_sectors cap" blk-mq: fix race between timeout and freeing request blk-mq: fix buffer overflow when reading sysfs file of 'pending' Documentation: update notes in biovecs about arbitrarily sized bios block: remove bio_get_nr_vecs() fs: use helper bio_add_page() instead of open coding on bi_io_vec block: kill merge_bvec_fn() completely md/raid5: get rid of bio_fits_rdev() md/raid5: split bio for chunk_aligned_read block: remove split code in blkdev_issue_{discard,write_same} btrfs: remove bio splitting and merge_bvec_fn() calls bcache: remove driver private bio splitting code block: simplify bio_add_page() block: make generic_make_request handle arbitrarily sized bios blk-cgroup: Drop unlikely before IS_ERR(_OR_NULL) block: don't access bio->bi_error after bio_put() block: shrink struct bio down to 2 cache lines again ...
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r--drivers/md/raid1.c115
1 files changed, 29 insertions, 86 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 967a4ed73929..f39d69f884de 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -255,9 +255,10 @@ static void call_bio_endio(struct r1bio *r1_bio)
255 done = 1; 255 done = 1;
256 256
257 if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) 257 if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
258 clear_bit(BIO_UPTODATE, &bio->bi_flags); 258 bio->bi_error = -EIO;
259
259 if (done) { 260 if (done) {
260 bio_endio(bio, 0); 261 bio_endio(bio);
261 /* 262 /*
262 * Wake up any possible resync thread that waits for the device 263 * Wake up any possible resync thread that waits for the device
263 * to go idle. 264 * to go idle.
@@ -312,9 +313,9 @@ static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)
312 return mirror; 313 return mirror;
313} 314}
314 315
315static void raid1_end_read_request(struct bio *bio, int error) 316static void raid1_end_read_request(struct bio *bio)
316{ 317{
317 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 318 int uptodate = !bio->bi_error;
318 struct r1bio *r1_bio = bio->bi_private; 319 struct r1bio *r1_bio = bio->bi_private;
319 int mirror; 320 int mirror;
320 struct r1conf *conf = r1_bio->mddev->private; 321 struct r1conf *conf = r1_bio->mddev->private;
@@ -397,9 +398,8 @@ static void r1_bio_write_done(struct r1bio *r1_bio)
397 } 398 }
398} 399}
399 400
400static void raid1_end_write_request(struct bio *bio, int error) 401static void raid1_end_write_request(struct bio *bio)
401{ 402{
402 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
403 struct r1bio *r1_bio = bio->bi_private; 403 struct r1bio *r1_bio = bio->bi_private;
404 int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state); 404 int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state);
405 struct r1conf *conf = r1_bio->mddev->private; 405 struct r1conf *conf = r1_bio->mddev->private;
@@ -410,7 +410,7 @@ static void raid1_end_write_request(struct bio *bio, int error)
410 /* 410 /*
411 * 'one mirror IO has finished' event handler: 411 * 'one mirror IO has finished' event handler:
412 */ 412 */
413 if (!uptodate) { 413 if (bio->bi_error) {
414 set_bit(WriteErrorSeen, 414 set_bit(WriteErrorSeen,
415 &conf->mirrors[mirror].rdev->flags); 415 &conf->mirrors[mirror].rdev->flags);
416 if (!test_and_set_bit(WantReplacement, 416 if (!test_and_set_bit(WantReplacement,
@@ -557,7 +557,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
557 rdev = rcu_dereference(conf->mirrors[disk].rdev); 557 rdev = rcu_dereference(conf->mirrors[disk].rdev);
558 if (r1_bio->bios[disk] == IO_BLOCKED 558 if (r1_bio->bios[disk] == IO_BLOCKED
559 || rdev == NULL 559 || rdev == NULL
560 || test_bit(Unmerged, &rdev->flags)
561 || test_bit(Faulty, &rdev->flags)) 560 || test_bit(Faulty, &rdev->flags))
562 continue; 561 continue;
563 if (!test_bit(In_sync, &rdev->flags) && 562 if (!test_bit(In_sync, &rdev->flags) &&
@@ -708,38 +707,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
708 return best_disk; 707 return best_disk;
709} 708}
710 709
711static int raid1_mergeable_bvec(struct mddev *mddev,
712 struct bvec_merge_data *bvm,
713 struct bio_vec *biovec)
714{
715 struct r1conf *conf = mddev->private;
716 sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
717 int max = biovec->bv_len;
718
719 if (mddev->merge_check_needed) {
720 int disk;
721 rcu_read_lock();
722 for (disk = 0; disk < conf->raid_disks * 2; disk++) {
723 struct md_rdev *rdev = rcu_dereference(
724 conf->mirrors[disk].rdev);
725 if (rdev && !test_bit(Faulty, &rdev->flags)) {
726 struct request_queue *q =
727 bdev_get_queue(rdev->bdev);
728 if (q->merge_bvec_fn) {
729 bvm->bi_sector = sector +
730 rdev->data_offset;
731 bvm->bi_bdev = rdev->bdev;
732 max = min(max, q->merge_bvec_fn(
733 q, bvm, biovec));
734 }
735 }
736 }
737 rcu_read_unlock();
738 }
739 return max;
740
741}
742
743static int raid1_congested(struct mddev *mddev, int bits) 710static int raid1_congested(struct mddev *mddev, int bits)
744{ 711{
745 struct r1conf *conf = mddev->private; 712 struct r1conf *conf = mddev->private;
@@ -793,7 +760,7 @@ static void flush_pending_writes(struct r1conf *conf)
793 if (unlikely((bio->bi_rw & REQ_DISCARD) && 760 if (unlikely((bio->bi_rw & REQ_DISCARD) &&
794 !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) 761 !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
795 /* Just ignore it */ 762 /* Just ignore it */
796 bio_endio(bio, 0); 763 bio_endio(bio);
797 else 764 else
798 generic_make_request(bio); 765 generic_make_request(bio);
799 bio = next; 766 bio = next;
@@ -1068,7 +1035,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
1068 if (unlikely((bio->bi_rw & REQ_DISCARD) && 1035 if (unlikely((bio->bi_rw & REQ_DISCARD) &&
1069 !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) 1036 !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
1070 /* Just ignore it */ 1037 /* Just ignore it */
1071 bio_endio(bio, 0); 1038 bio_endio(bio);
1072 else 1039 else
1073 generic_make_request(bio); 1040 generic_make_request(bio);
1074 bio = next; 1041 bio = next;
@@ -1158,7 +1125,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1158 * non-zero, then it is the number of not-completed requests. 1125 * non-zero, then it is the number of not-completed requests.
1159 */ 1126 */
1160 bio->bi_phys_segments = 0; 1127 bio->bi_phys_segments = 0;
1161 clear_bit(BIO_SEG_VALID, &bio->bi_flags); 1128 bio_clear_flag(bio, BIO_SEG_VALID);
1162 1129
1163 if (rw == READ) { 1130 if (rw == READ) {
1164 /* 1131 /*
@@ -1269,8 +1236,7 @@ read_again:
1269 break; 1236 break;
1270 } 1237 }
1271 r1_bio->bios[i] = NULL; 1238 r1_bio->bios[i] = NULL;
1272 if (!rdev || test_bit(Faulty, &rdev->flags) 1239 if (!rdev || test_bit(Faulty, &rdev->flags)) {
1273 || test_bit(Unmerged, &rdev->flags)) {
1274 if (i < conf->raid_disks) 1240 if (i < conf->raid_disks)
1275 set_bit(R1BIO_Degraded, &r1_bio->state); 1241 set_bit(R1BIO_Degraded, &r1_bio->state);
1276 continue; 1242 continue;
@@ -1617,7 +1583,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1617 struct raid1_info *p; 1583 struct raid1_info *p;
1618 int first = 0; 1584 int first = 0;
1619 int last = conf->raid_disks - 1; 1585 int last = conf->raid_disks - 1;
1620 struct request_queue *q = bdev_get_queue(rdev->bdev);
1621 1586
1622 if (mddev->recovery_disabled == conf->recovery_disabled) 1587 if (mddev->recovery_disabled == conf->recovery_disabled)
1623 return -EBUSY; 1588 return -EBUSY;
@@ -1625,11 +1590,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1625 if (rdev->raid_disk >= 0) 1590 if (rdev->raid_disk >= 0)
1626 first = last = rdev->raid_disk; 1591 first = last = rdev->raid_disk;
1627 1592
1628 if (q->merge_bvec_fn) {
1629 set_bit(Unmerged, &rdev->flags);
1630 mddev->merge_check_needed = 1;
1631 }
1632
1633 for (mirror = first; mirror <= last; mirror++) { 1593 for (mirror = first; mirror <= last; mirror++) {
1634 p = conf->mirrors+mirror; 1594 p = conf->mirrors+mirror;
1635 if (!p->rdev) { 1595 if (!p->rdev) {
@@ -1661,19 +1621,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1661 break; 1621 break;
1662 } 1622 }
1663 } 1623 }
1664 if (err == 0 && test_bit(Unmerged, &rdev->flags)) {
1665 /* Some requests might not have seen this new
1666 * merge_bvec_fn. We must wait for them to complete
1667 * before merging the device fully.
1668 * First we make sure any code which has tested
1669 * our function has submitted the request, then
1670 * we wait for all outstanding requests to complete.
1671 */
1672 synchronize_sched();
1673 freeze_array(conf, 0);
1674 unfreeze_array(conf);
1675 clear_bit(Unmerged, &rdev->flags);
1676 }
1677 md_integrity_add_rdev(rdev, mddev); 1624 md_integrity_add_rdev(rdev, mddev);
1678 if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev))) 1625 if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
1679 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); 1626 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
@@ -1737,7 +1684,7 @@ abort:
1737 return err; 1684 return err;
1738} 1685}
1739 1686
1740static void end_sync_read(struct bio *bio, int error) 1687static void end_sync_read(struct bio *bio)
1741{ 1688{
1742 struct r1bio *r1_bio = bio->bi_private; 1689 struct r1bio *r1_bio = bio->bi_private;
1743 1690
@@ -1748,16 +1695,16 @@ static void end_sync_read(struct bio *bio, int error)
1748 * or re-read if the read failed. 1695 * or re-read if the read failed.
1749 * We don't do much here, just schedule handling by raid1d 1696 * We don't do much here, just schedule handling by raid1d
1750 */ 1697 */
1751 if (test_bit(BIO_UPTODATE, &bio->bi_flags)) 1698 if (!bio->bi_error)
1752 set_bit(R1BIO_Uptodate, &r1_bio->state); 1699 set_bit(R1BIO_Uptodate, &r1_bio->state);
1753 1700
1754 if (atomic_dec_and_test(&r1_bio->remaining)) 1701 if (atomic_dec_and_test(&r1_bio->remaining))
1755 reschedule_retry(r1_bio); 1702 reschedule_retry(r1_bio);
1756} 1703}
1757 1704
1758static void end_sync_write(struct bio *bio, int error) 1705static void end_sync_write(struct bio *bio)
1759{ 1706{
1760 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1707 int uptodate = !bio->bi_error;
1761 struct r1bio *r1_bio = bio->bi_private; 1708 struct r1bio *r1_bio = bio->bi_private;
1762 struct mddev *mddev = r1_bio->mddev; 1709 struct mddev *mddev = r1_bio->mddev;
1763 struct r1conf *conf = mddev->private; 1710 struct r1conf *conf = mddev->private;
@@ -1944,7 +1891,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
1944 idx ++; 1891 idx ++;
1945 } 1892 }
1946 set_bit(R1BIO_Uptodate, &r1_bio->state); 1893 set_bit(R1BIO_Uptodate, &r1_bio->state);
1947 set_bit(BIO_UPTODATE, &bio->bi_flags); 1894 bio->bi_error = 0;
1948 return 1; 1895 return 1;
1949} 1896}
1950 1897
@@ -1968,15 +1915,14 @@ static void process_checks(struct r1bio *r1_bio)
1968 for (i = 0; i < conf->raid_disks * 2; i++) { 1915 for (i = 0; i < conf->raid_disks * 2; i++) {
1969 int j; 1916 int j;
1970 int size; 1917 int size;
1971 int uptodate; 1918 int error;
1972 struct bio *b = r1_bio->bios[i]; 1919 struct bio *b = r1_bio->bios[i];
1973 if (b->bi_end_io != end_sync_read) 1920 if (b->bi_end_io != end_sync_read)
1974 continue; 1921 continue;
1975 /* fixup the bio for reuse, but preserve BIO_UPTODATE */ 1922 /* fixup the bio for reuse, but preserve errno */
1976 uptodate = test_bit(BIO_UPTODATE, &b->bi_flags); 1923 error = b->bi_error;
1977 bio_reset(b); 1924 bio_reset(b);
1978 if (!uptodate) 1925 b->bi_error = error;
1979 clear_bit(BIO_UPTODATE, &b->bi_flags);
1980 b->bi_vcnt = vcnt; 1926 b->bi_vcnt = vcnt;
1981 b->bi_iter.bi_size = r1_bio->sectors << 9; 1927 b->bi_iter.bi_size = r1_bio->sectors << 9;
1982 b->bi_iter.bi_sector = r1_bio->sector + 1928 b->bi_iter.bi_sector = r1_bio->sector +
@@ -1999,7 +1945,7 @@ static void process_checks(struct r1bio *r1_bio)
1999 } 1945 }
2000 for (primary = 0; primary < conf->raid_disks * 2; primary++) 1946 for (primary = 0; primary < conf->raid_disks * 2; primary++)
2001 if (r1_bio->bios[primary]->bi_end_io == end_sync_read && 1947 if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
2002 test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) { 1948 !r1_bio->bios[primary]->bi_error) {
2003 r1_bio->bios[primary]->bi_end_io = NULL; 1949 r1_bio->bios[primary]->bi_end_io = NULL;
2004 rdev_dec_pending(conf->mirrors[primary].rdev, mddev); 1950 rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
2005 break; 1951 break;
@@ -2009,14 +1955,14 @@ static void process_checks(struct r1bio *r1_bio)
2009 int j; 1955 int j;
2010 struct bio *pbio = r1_bio->bios[primary]; 1956 struct bio *pbio = r1_bio->bios[primary];
2011 struct bio *sbio = r1_bio->bios[i]; 1957 struct bio *sbio = r1_bio->bios[i];
2012 int uptodate = test_bit(BIO_UPTODATE, &sbio->bi_flags); 1958 int error = sbio->bi_error;
2013 1959
2014 if (sbio->bi_end_io != end_sync_read) 1960 if (sbio->bi_end_io != end_sync_read)
2015 continue; 1961 continue;
2016 /* Now we can 'fixup' the BIO_UPTODATE flag */ 1962 /* Now we can 'fixup' the error value */
2017 set_bit(BIO_UPTODATE, &sbio->bi_flags); 1963 sbio->bi_error = 0;
2018 1964
2019 if (uptodate) { 1965 if (!error) {
2020 for (j = vcnt; j-- ; ) { 1966 for (j = vcnt; j-- ; ) {
2021 struct page *p, *s; 1967 struct page *p, *s;
2022 p = pbio->bi_io_vec[j].bv_page; 1968 p = pbio->bi_io_vec[j].bv_page;
@@ -2031,7 +1977,7 @@ static void process_checks(struct r1bio *r1_bio)
2031 if (j >= 0) 1977 if (j >= 0)
2032 atomic64_add(r1_bio->sectors, &mddev->resync_mismatches); 1978 atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
2033 if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery) 1979 if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
2034 && uptodate)) { 1980 && !error)) {
2035 /* No need to write to this device. */ 1981 /* No need to write to this device. */
2036 sbio->bi_end_io = NULL; 1982 sbio->bi_end_io = NULL;
2037 rdev_dec_pending(conf->mirrors[i].rdev, mddev); 1983 rdev_dec_pending(conf->mirrors[i].rdev, mddev);
@@ -2272,11 +2218,11 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
2272 struct bio *bio = r1_bio->bios[m]; 2218 struct bio *bio = r1_bio->bios[m];
2273 if (bio->bi_end_io == NULL) 2219 if (bio->bi_end_io == NULL)
2274 continue; 2220 continue;
2275 if (test_bit(BIO_UPTODATE, &bio->bi_flags) && 2221 if (!bio->bi_error &&
2276 test_bit(R1BIO_MadeGood, &r1_bio->state)) { 2222 test_bit(R1BIO_MadeGood, &r1_bio->state)) {
2277 rdev_clear_badblocks(rdev, r1_bio->sector, s, 0); 2223 rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
2278 } 2224 }
2279 if (!test_bit(BIO_UPTODATE, &bio->bi_flags) && 2225 if (bio->bi_error &&
2280 test_bit(R1BIO_WriteError, &r1_bio->state)) { 2226 test_bit(R1BIO_WriteError, &r1_bio->state)) {
2281 if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0)) 2227 if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
2282 md_error(conf->mddev, rdev); 2228 md_error(conf->mddev, rdev);
@@ -2715,7 +2661,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
2715 /* remove last page from this bio */ 2661 /* remove last page from this bio */
2716 bio->bi_vcnt--; 2662 bio->bi_vcnt--;
2717 bio->bi_iter.bi_size -= len; 2663 bio->bi_iter.bi_size -= len;
2718 __clear_bit(BIO_SEG_VALID, &bio->bi_flags); 2664 bio_clear_flag(bio, BIO_SEG_VALID);
2719 } 2665 }
2720 goto bio_full; 2666 goto bio_full;
2721 } 2667 }
@@ -2810,8 +2756,6 @@ static struct r1conf *setup_conf(struct mddev *mddev)
2810 goto abort; 2756 goto abort;
2811 disk->rdev = rdev; 2757 disk->rdev = rdev;
2812 q = bdev_get_queue(rdev->bdev); 2758 q = bdev_get_queue(rdev->bdev);
2813 if (q->merge_bvec_fn)
2814 mddev->merge_check_needed = 1;
2815 2759
2816 disk->head_position = 0; 2760 disk->head_position = 0;
2817 disk->seq_start = MaxSector; 2761 disk->seq_start = MaxSector;
@@ -3176,7 +3120,6 @@ static struct md_personality raid1_personality =
3176 .quiesce = raid1_quiesce, 3120 .quiesce = raid1_quiesce,
3177 .takeover = raid1_takeover, 3121 .takeover = raid1_takeover,
3178 .congested = raid1_congested, 3122 .congested = raid1_congested,
3179 .mergeable_bvec = raid1_mergeable_bvec,
3180}; 3123};
3181 3124
3182static int __init raid_init(void) 3125static int __init raid_init(void)