aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2013-03-24 23:38:59 -0400
committerJens Axboe <axboe@kernel.dk>2013-03-24 23:38:59 -0400
commit705cd0ea1cde2ce9225f1485c5a32c5841cacc5f (patch)
tree72bc2c5eded0e04fc5e70ca2bd6df93a2e264112
parentc8158819d506a8aedeca53c52dfb709a0aabe011 (diff)
parent29ed7813ce5c4661261aeebddb1b8660e0860223 (diff)
Merge branch 'for-jens' of http://evilpiepirate.org/git/linux-bcache into for-3.10/core
This contains Kents prep work for the immutable bio_vecs.
-rw-r--r--block/blk-core.c82
-rw-r--r--block/cfq-iosched.c7
-rw-r--r--block/deadline-iosched.c2
-rw-r--r--drivers/block/aoe/aoecmd.c2
-rw-r--r--drivers/block/brd.c3
-rw-r--r--drivers/block/floppy.c1
-rw-r--r--drivers/block/pktcdvd.c102
-rw-r--r--drivers/block/rbd.c2
-rw-r--r--drivers/md/dm-crypt.c3
-rw-r--r--drivers/md/dm-raid1.c2
-rw-r--r--drivers/md/dm-stripe.c2
-rw-r--r--drivers/md/dm-verity.c4
-rw-r--r--drivers/md/faulty.c6
-rw-r--r--drivers/md/linear.c3
-rw-r--r--drivers/md/md.c17
-rw-r--r--drivers/md/raid0.c9
-rw-r--r--drivers/md/raid1.c133
-rw-r--r--drivers/md/raid10.c78
-rw-r--r--drivers/md/raid5.c49
-rw-r--r--drivers/message/fusion/mptsas.c6
-rw-r--r--drivers/s390/block/dcssblk.c3
-rw-r--r--drivers/scsi/libsas/sas_expander.c6
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_transport.c10
-rw-r--r--fs/bio-integrity.c144
-rw-r--r--fs/bio.c366
-rw-r--r--fs/btrfs/extent_io.c3
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/buffer.c1
-rw-r--r--fs/direct-io.c8
-rw-r--r--fs/exofs/ore.c2
-rw-r--r--fs/exofs/ore_raid.c2
-rw-r--r--fs/gfs2/lops.c2
-rw-r--r--fs/jfs/jfs_logmgr.c2
-rw-r--r--fs/logfs/dev_bdev.c5
-rw-r--r--include/linux/bio.h115
-rw-r--r--include/linux/blk_types.h3
-rw-r--r--include/trace/events/block.h12
-rw-r--r--mm/bounce.c75
-rw-r--r--mm/page_io.c1
39 files changed, 645 insertions, 630 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 441f3488a766..f224d1793ee5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -159,20 +159,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
159 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 159 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
160 error = -EIO; 160 error = -EIO;
161 161
162 if (unlikely(nbytes > bio->bi_size)) {
163 printk(KERN_ERR "%s: want %u bytes done, %u left\n",
164 __func__, nbytes, bio->bi_size);
165 nbytes = bio->bi_size;
166 }
167
168 if (unlikely(rq->cmd_flags & REQ_QUIET)) 162 if (unlikely(rq->cmd_flags & REQ_QUIET))
169 set_bit(BIO_QUIET, &bio->bi_flags); 163 set_bit(BIO_QUIET, &bio->bi_flags);
170 164
171 bio->bi_size -= nbytes; 165 bio_advance(bio, nbytes);
172 bio->bi_sector += (nbytes >> 9);
173
174 if (bio_integrity(bio))
175 bio_integrity_advance(bio, nbytes);
176 166
177 /* don't actually finish bio if it's part of flush sequence */ 167 /* don't actually finish bio if it's part of flush sequence */
178 if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) 168 if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
@@ -1609,7 +1599,7 @@ static void handle_bad_sector(struct bio *bio)
1609 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", 1599 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
1610 bdevname(bio->bi_bdev, b), 1600 bdevname(bio->bi_bdev, b),
1611 bio->bi_rw, 1601 bio->bi_rw,
1612 (unsigned long long)bio->bi_sector + bio_sectors(bio), 1602 (unsigned long long)bio_end_sector(bio),
1613 (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9)); 1603 (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
1614 1604
1615 set_bit(BIO_EOF, &bio->bi_flags); 1605 set_bit(BIO_EOF, &bio->bi_flags);
@@ -2292,8 +2282,7 @@ EXPORT_SYMBOL(blk_fetch_request);
2292 **/ 2282 **/
2293bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) 2283bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2294{ 2284{
2295 int total_bytes, bio_nbytes, next_idx = 0; 2285 int total_bytes;
2296 struct bio *bio;
2297 2286
2298 if (!req->bio) 2287 if (!req->bio)
2299 return false; 2288 return false;
@@ -2339,56 +2328,21 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2339 2328
2340 blk_account_io_completion(req, nr_bytes); 2329 blk_account_io_completion(req, nr_bytes);
2341 2330
2342 total_bytes = bio_nbytes = 0; 2331 total_bytes = 0;
2343 while ((bio = req->bio) != NULL) { 2332 while (req->bio) {
2344 int nbytes; 2333 struct bio *bio = req->bio;
2334 unsigned bio_bytes = min(bio->bi_size, nr_bytes);
2345 2335
2346 if (nr_bytes >= bio->bi_size) { 2336 if (bio_bytes == bio->bi_size)
2347 req->bio = bio->bi_next; 2337 req->bio = bio->bi_next;
2348 nbytes = bio->bi_size;
2349 req_bio_endio(req, bio, nbytes, error);
2350 next_idx = 0;
2351 bio_nbytes = 0;
2352 } else {
2353 int idx = bio->bi_idx + next_idx;
2354
2355 if (unlikely(idx >= bio->bi_vcnt)) {
2356 blk_dump_rq_flags(req, "__end_that");
2357 printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",
2358 __func__, idx, bio->bi_vcnt);
2359 break;
2360 }
2361
2362 nbytes = bio_iovec_idx(bio, idx)->bv_len;
2363 BIO_BUG_ON(nbytes > bio->bi_size);
2364
2365 /*
2366 * not a complete bvec done
2367 */
2368 if (unlikely(nbytes > nr_bytes)) {
2369 bio_nbytes += nr_bytes;
2370 total_bytes += nr_bytes;
2371 break;
2372 }
2373 2338
2374 /* 2339 req_bio_endio(req, bio, bio_bytes, error);
2375 * advance to the next vector
2376 */
2377 next_idx++;
2378 bio_nbytes += nbytes;
2379 }
2380 2340
2381 total_bytes += nbytes; 2341 total_bytes += bio_bytes;
2382 nr_bytes -= nbytes; 2342 nr_bytes -= bio_bytes;
2383 2343
2384 bio = req->bio; 2344 if (!nr_bytes)
2385 if (bio) { 2345 break;
2386 /*
2387 * end more in this run, or just return 'not-done'
2388 */
2389 if (unlikely(nr_bytes <= 0))
2390 break;
2391 }
2392 } 2346 }
2393 2347
2394 /* 2348 /*
@@ -2404,16 +2358,6 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2404 return false; 2358 return false;
2405 } 2359 }
2406 2360
2407 /*
2408 * if the request wasn't completed, update state
2409 */
2410 if (bio_nbytes) {
2411 req_bio_endio(req, bio, bio_nbytes, error);
2412 bio->bi_idx += next_idx;
2413 bio_iovec(bio)->bv_offset += nr_bytes;
2414 bio_iovec(bio)->bv_len -= nr_bytes;
2415 }
2416
2417 req->__data_len -= total_bytes; 2361 req->__data_len -= total_bytes;
2418 req->buffer = bio_data(req->bio); 2362 req->buffer = bio_data(req->bio);
2419 2363
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 4f0ade74cfd0..d5cd3131c57a 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2270,11 +2270,8 @@ cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
2270 return NULL; 2270 return NULL;
2271 2271
2272 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); 2272 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
2273 if (cfqq) { 2273 if (cfqq)
2274 sector_t sector = bio->bi_sector + bio_sectors(bio); 2274 return elv_rb_find(&cfqq->sort_list, bio_end_sector(bio));
2275
2276 return elv_rb_find(&cfqq->sort_list, sector);
2277 }
2278 2275
2279 return NULL; 2276 return NULL;
2280} 2277}
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 90037b5eb17f..ba19a3afab79 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -132,7 +132,7 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
132 * check for front merge 132 * check for front merge
133 */ 133 */
134 if (dd->front_merges) { 134 if (dd->front_merges) {
135 sector_t sector = bio->bi_sector + bio_sectors(bio); 135 sector_t sector = bio_end_sector(bio);
136 136
137 __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector); 137 __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
138 if (__rq) { 138 if (__rq) {
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 25ef5c014fca..8188ad131b97 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -927,7 +927,7 @@ bufinit(struct buf *buf, struct request *rq, struct bio *bio)
927 buf->resid = bio->bi_size; 927 buf->resid = bio->bi_size;
928 buf->sector = bio->bi_sector; 928 buf->sector = bio->bi_sector;
929 bio_pageinc(bio); 929 bio_pageinc(bio);
930 buf->bv = bv = &bio->bi_io_vec[bio->bi_idx]; 930 buf->bv = bio_iovec(bio);
931 buf->bv_resid = bv->bv_len; 931 buf->bv_resid = bv->bv_len;
932 WARN_ON(buf->bv_resid == 0); 932 WARN_ON(buf->bv_resid == 0);
933} 933}
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 531ceb31d0ff..f1a29f8e9d33 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -334,8 +334,7 @@ static void brd_make_request(struct request_queue *q, struct bio *bio)
334 int err = -EIO; 334 int err = -EIO;
335 335
336 sector = bio->bi_sector; 336 sector = bio->bi_sector;
337 if (sector + (bio->bi_size >> SECTOR_SHIFT) > 337 if (bio_end_sector(bio) > get_capacity(bdev->bd_disk))
338 get_capacity(bdev->bd_disk))
339 goto out; 338 goto out;
340 339
341 if (unlikely(bio->bi_rw & REQ_DISCARD)) { 340 if (unlikely(bio->bi_rw & REQ_DISCARD)) {
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 2ddd64a9ffde..83232639034e 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3777,7 +3777,6 @@ static int __floppy_read_block_0(struct block_device *bdev)
3777 bio_vec.bv_len = size; 3777 bio_vec.bv_len = size;
3778 bio_vec.bv_offset = 0; 3778 bio_vec.bv_offset = 0;
3779 bio.bi_vcnt = 1; 3779 bio.bi_vcnt = 1;
3780 bio.bi_idx = 0;
3781 bio.bi_size = size; 3780 bio.bi_size = size;
3782 bio.bi_bdev = bdev; 3781 bio.bi_bdev = bdev;
3783 bio.bi_sector = 0; 3782 bio.bi_sector = 0;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 2e7de7a59bfc..11190424536a 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -901,7 +901,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
901 pd->iosched.successive_reads += bio->bi_size >> 10; 901 pd->iosched.successive_reads += bio->bi_size >> 10;
902 else { 902 else {
903 pd->iosched.successive_reads = 0; 903 pd->iosched.successive_reads = 0;
904 pd->iosched.last_write = bio->bi_sector + bio_sectors(bio); 904 pd->iosched.last_write = bio_end_sector(bio);
905 } 905 }
906 if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) { 906 if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) {
907 if (pd->read_speed == pd->write_speed) { 907 if (pd->read_speed == pd->write_speed) {
@@ -948,31 +948,6 @@ static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_que
948} 948}
949 949
950/* 950/*
951 * Copy CD_FRAMESIZE bytes from src_bio into a destination page
952 */
953static void pkt_copy_bio_data(struct bio *src_bio, int seg, int offs, struct page *dst_page, int dst_offs)
954{
955 unsigned int copy_size = CD_FRAMESIZE;
956
957 while (copy_size > 0) {
958 struct bio_vec *src_bvl = bio_iovec_idx(src_bio, seg);
959 void *vfrom = kmap_atomic(src_bvl->bv_page) +
960 src_bvl->bv_offset + offs;
961 void *vto = page_address(dst_page) + dst_offs;
962 int len = min_t(int, copy_size, src_bvl->bv_len - offs);
963
964 BUG_ON(len < 0);
965 memcpy(vto, vfrom, len);
966 kunmap_atomic(vfrom);
967
968 seg++;
969 offs = 0;
970 dst_offs += len;
971 copy_size -= len;
972 }
973}
974
975/*
976 * Copy all data for this packet to pkt->pages[], so that 951 * Copy all data for this packet to pkt->pages[], so that
977 * a) The number of required segments for the write bio is minimized, which 952 * a) The number of required segments for the write bio is minimized, which
978 * is necessary for some scsi controllers. 953 * is necessary for some scsi controllers.
@@ -1181,16 +1156,15 @@ static int pkt_start_recovery(struct packet_data *pkt)
1181 new_sector = new_block * (CD_FRAMESIZE >> 9); 1156 new_sector = new_block * (CD_FRAMESIZE >> 9);
1182 pkt->sector = new_sector; 1157 pkt->sector = new_sector;
1183 1158
1159 bio_reset(pkt->bio);
1160 pkt->bio->bi_bdev = pd->bdev;
1161 pkt->bio->bi_rw = REQ_WRITE;
1184 pkt->bio->bi_sector = new_sector; 1162 pkt->bio->bi_sector = new_sector;
1185 pkt->bio->bi_next = NULL; 1163 pkt->bio->bi_size = pkt->frames * CD_FRAMESIZE;
1186 pkt->bio->bi_flags = 1 << BIO_UPTODATE; 1164 pkt->bio->bi_vcnt = pkt->frames;
1187 pkt->bio->bi_idx = 0;
1188 1165
1189 BUG_ON(pkt->bio->bi_rw != REQ_WRITE); 1166 pkt->bio->bi_end_io = pkt_end_io_packet_write;
1190 BUG_ON(pkt->bio->bi_vcnt != pkt->frames); 1167 pkt->bio->bi_private = pkt;
1191 BUG_ON(pkt->bio->bi_size != pkt->frames * CD_FRAMESIZE);
1192 BUG_ON(pkt->bio->bi_end_io != pkt_end_io_packet_write);
1193 BUG_ON(pkt->bio->bi_private != pkt);
1194 1168
1195 drop_super(sb); 1169 drop_super(sb);
1196 return 1; 1170 return 1;
@@ -1325,55 +1299,35 @@ try_next_bio:
1325 */ 1299 */
1326static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) 1300static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
1327{ 1301{
1328 struct bio *bio;
1329 int f; 1302 int f;
1330 int frames_write;
1331 struct bio_vec *bvec = pkt->w_bio->bi_io_vec; 1303 struct bio_vec *bvec = pkt->w_bio->bi_io_vec;
1332 1304
1305 bio_reset(pkt->w_bio);
1306 pkt->w_bio->bi_sector = pkt->sector;
1307 pkt->w_bio->bi_bdev = pd->bdev;
1308 pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
1309 pkt->w_bio->bi_private = pkt;
1310
1311 /* XXX: locking? */
1333 for (f = 0; f < pkt->frames; f++) { 1312 for (f = 0; f < pkt->frames; f++) {
1334 bvec[f].bv_page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE]; 1313 bvec[f].bv_page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE];
1335 bvec[f].bv_offset = (f * CD_FRAMESIZE) % PAGE_SIZE; 1314 bvec[f].bv_offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
1315 if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset))
1316 BUG();
1336 } 1317 }
1318 VPRINTK(DRIVER_NAME": vcnt=%d\n", pkt->w_bio->bi_vcnt);
1337 1319
1338 /* 1320 /*
1339 * Fill-in bvec with data from orig_bios. 1321 * Fill-in bvec with data from orig_bios.
1340 */ 1322 */
1341 frames_write = 0;
1342 spin_lock(&pkt->lock); 1323 spin_lock(&pkt->lock);
1343 bio_list_for_each(bio, &pkt->orig_bios) { 1324 bio_copy_data(pkt->w_bio, pkt->orig_bios.head);
1344 int segment = bio->bi_idx;
1345 int src_offs = 0;
1346 int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9);
1347 int num_frames = bio->bi_size / CD_FRAMESIZE;
1348 BUG_ON(first_frame < 0);
1349 BUG_ON(first_frame + num_frames > pkt->frames);
1350 for (f = first_frame; f < first_frame + num_frames; f++) {
1351 struct bio_vec *src_bvl = bio_iovec_idx(bio, segment);
1352
1353 while (src_offs >= src_bvl->bv_len) {
1354 src_offs -= src_bvl->bv_len;
1355 segment++;
1356 BUG_ON(segment >= bio->bi_vcnt);
1357 src_bvl = bio_iovec_idx(bio, segment);
1358 }
1359 1325
1360 if (src_bvl->bv_len - src_offs >= CD_FRAMESIZE) {
1361 bvec[f].bv_page = src_bvl->bv_page;
1362 bvec[f].bv_offset = src_bvl->bv_offset + src_offs;
1363 } else {
1364 pkt_copy_bio_data(bio, segment, src_offs,
1365 bvec[f].bv_page, bvec[f].bv_offset);
1366 }
1367 src_offs += CD_FRAMESIZE;
1368 frames_write++;
1369 }
1370 }
1371 pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE); 1326 pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE);
1372 spin_unlock(&pkt->lock); 1327 spin_unlock(&pkt->lock);
1373 1328
1374 VPRINTK("pkt_start_write: Writing %d frames for zone %llx\n", 1329 VPRINTK("pkt_start_write: Writing %d frames for zone %llx\n",
1375 frames_write, (unsigned long long)pkt->sector); 1330 pkt->write_size, (unsigned long long)pkt->sector);
1376 BUG_ON(frames_write != pkt->write_size);
1377 1331
1378 if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) { 1332 if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) {
1379 pkt_make_local_copy(pkt, bvec); 1333 pkt_make_local_copy(pkt, bvec);
@@ -1383,16 +1337,6 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
1383 } 1337 }
1384 1338
1385 /* Start the write request */ 1339 /* Start the write request */
1386 bio_reset(pkt->w_bio);
1387 pkt->w_bio->bi_sector = pkt->sector;
1388 pkt->w_bio->bi_bdev = pd->bdev;
1389 pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
1390 pkt->w_bio->bi_private = pkt;
1391 for (f = 0; f < pkt->frames; f++)
1392 if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset))
1393 BUG();
1394 VPRINTK(DRIVER_NAME": vcnt=%d\n", pkt->w_bio->bi_vcnt);
1395
1396 atomic_set(&pkt->io_wait, 1); 1340 atomic_set(&pkt->io_wait, 1);
1397 pkt->w_bio->bi_rw = WRITE; 1341 pkt->w_bio->bi_rw = WRITE;
1398 pkt_queue_bio(pd, pkt->w_bio); 1342 pkt_queue_bio(pd, pkt->w_bio);
@@ -2433,7 +2377,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
2433 cloned_bio->bi_bdev = pd->bdev; 2377 cloned_bio->bi_bdev = pd->bdev;
2434 cloned_bio->bi_private = psd; 2378 cloned_bio->bi_private = psd;
2435 cloned_bio->bi_end_io = pkt_end_io_read_cloned; 2379 cloned_bio->bi_end_io = pkt_end_io_read_cloned;
2436 pd->stats.secs_r += bio->bi_size >> 9; 2380 pd->stats.secs_r += bio_sectors(bio);
2437 pkt_queue_bio(pd, cloned_bio); 2381 pkt_queue_bio(pd, cloned_bio);
2438 return; 2382 return;
2439 } 2383 }
@@ -2454,7 +2398,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
2454 zone = ZONE(bio->bi_sector, pd); 2398 zone = ZONE(bio->bi_sector, pd);
2455 VPRINTK("pkt_make_request: start = %6llx stop = %6llx\n", 2399 VPRINTK("pkt_make_request: start = %6llx stop = %6llx\n",
2456 (unsigned long long)bio->bi_sector, 2400 (unsigned long long)bio->bi_sector,
2457 (unsigned long long)(bio->bi_sector + bio_sectors(bio))); 2401 (unsigned long long)bio_end_sector(bio));
2458 2402
2459 /* Check if we have to split the bio */ 2403 /* Check if we have to split the bio */
2460 { 2404 {
@@ -2462,7 +2406,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
2462 sector_t last_zone; 2406 sector_t last_zone;
2463 int first_sectors; 2407 int first_sectors;
2464 2408
2465 last_zone = ZONE(bio->bi_sector + bio_sectors(bio) - 1, pd); 2409 last_zone = ZONE(bio_end_sector(bio) - 1, pd);
2466 if (last_zone != zone) { 2410 if (last_zone != zone) {
2467 BUG_ON(last_zone != zone + pd->settings.size); 2411 BUG_ON(last_zone != zone + pd->settings.size);
2468 first_sectors = last_zone - bio->bi_sector; 2412 first_sectors = last_zone - bio->bi_sector;
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 6c81a4c040b9..11e179826b60 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -952,7 +952,7 @@ static struct bio *bio_clone_range(struct bio *bio_src,
952 /* Find first affected segment... */ 952 /* Find first affected segment... */
953 953
954 resid = offset; 954 resid = offset;
955 __bio_for_each_segment(bv, bio_src, idx, 0) { 955 bio_for_each_segment(bv, bio_src, idx) {
956 if (resid < bv->bv_len) 956 if (resid < bv->bv_len)
957 break; 957 break;
958 resid -= bv->bv_len; 958 resid -= bv->bv_len;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 13c15480d940..6d2d41ae9e32 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -858,8 +858,7 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
858 unsigned int i; 858 unsigned int i;
859 struct bio_vec *bv; 859 struct bio_vec *bv;
860 860
861 for (i = 0; i < clone->bi_vcnt; i++) { 861 bio_for_each_segment_all(bv, clone, i) {
862 bv = bio_iovec_idx(clone, i);
863 BUG_ON(!bv->bv_page); 862 BUG_ON(!bv->bv_page);
864 mempool_free(bv->bv_page, cc->page_pool); 863 mempool_free(bv->bv_page, cc->page_pool);
865 bv->bv_page = NULL; 864 bv->bv_page = NULL;
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index d053098c6a91..699b5be68d31 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -458,7 +458,7 @@ static void map_region(struct dm_io_region *io, struct mirror *m,
458{ 458{
459 io->bdev = m->dev->bdev; 459 io->bdev = m->dev->bdev;
460 io->sector = map_sector(m, bio); 460 io->sector = map_sector(m, bio);
461 io->count = bio->bi_size >> 9; 461 io->count = bio_sectors(bio);
462} 462}
463 463
464static void hold_bio(struct mirror_set *ms, struct bio *bio) 464static void hold_bio(struct mirror_set *ms, struct bio *bio)
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index d8837d313f54..ea5e878a30b9 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -258,7 +258,7 @@ static int stripe_map_range(struct stripe_c *sc, struct bio *bio,
258 sector_t begin, end; 258 sector_t begin, end;
259 259
260 stripe_map_range_sector(sc, bio->bi_sector, target_stripe, &begin); 260 stripe_map_range_sector(sc, bio->bi_sector, target_stripe, &begin);
261 stripe_map_range_sector(sc, bio->bi_sector + bio_sectors(bio), 261 stripe_map_range_sector(sc, bio_end_sector(bio),
262 target_stripe, &end); 262 target_stripe, &end);
263 if (begin < end) { 263 if (begin < end) {
264 bio->bi_bdev = sc->stripe[target_stripe].dev->bdev; 264 bio->bi_bdev = sc->stripe[target_stripe].dev->bdev;
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 6ad538375c3c..4f06d9adf1ed 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -472,7 +472,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
472 return -EIO; 472 return -EIO;
473 } 473 }
474 474
475 if ((bio->bi_sector + bio_sectors(bio)) >> 475 if (bio_end_sector(bio) >>
476 (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) { 476 (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
477 DMERR_LIMIT("io out of range"); 477 DMERR_LIMIT("io out of range");
478 return -EIO; 478 return -EIO;
@@ -490,7 +490,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
490 490
491 bio->bi_end_io = verity_end_io; 491 bio->bi_end_io = verity_end_io;
492 bio->bi_private = io; 492 bio->bi_private = io;
493 io->io_vec_size = bio->bi_vcnt - bio->bi_idx; 493 io->io_vec_size = bio_segments(bio);
494 if (io->io_vec_size < DM_VERITY_IO_VEC_INLINE) 494 if (io->io_vec_size < DM_VERITY_IO_VEC_INLINE)
495 io->io_vec = io->io_vec_inline; 495 io->io_vec = io->io_vec_inline;
496 else 496 else
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c
index 5e7dc772f5de..3193aefe982b 100644
--- a/drivers/md/faulty.c
+++ b/drivers/md/faulty.c
@@ -185,8 +185,7 @@ static void make_request(struct mddev *mddev, struct bio *bio)
185 return; 185 return;
186 } 186 }
187 187
188 if (check_sector(conf, bio->bi_sector, bio->bi_sector+(bio->bi_size>>9), 188 if (check_sector(conf, bio->bi_sector, bio_end_sector(bio), WRITE))
189 WRITE))
190 failit = 1; 189 failit = 1;
191 if (check_mode(conf, WritePersistent)) { 190 if (check_mode(conf, WritePersistent)) {
192 add_sector(conf, bio->bi_sector, WritePersistent); 191 add_sector(conf, bio->bi_sector, WritePersistent);
@@ -196,8 +195,7 @@ static void make_request(struct mddev *mddev, struct bio *bio)
196 failit = 1; 195 failit = 1;
197 } else { 196 } else {
198 /* read request */ 197 /* read request */
199 if (check_sector(conf, bio->bi_sector, bio->bi_sector + (bio->bi_size>>9), 198 if (check_sector(conf, bio->bi_sector, bio_end_sector(bio), READ))
200 READ))
201 failit = 1; 199 failit = 1;
202 if (check_mode(conf, ReadTransient)) 200 if (check_mode(conf, ReadTransient))
203 failit = 1; 201 failit = 1;
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 21014836bdbf..f03fabd2b37b 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -317,8 +317,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
317 bio_io_error(bio); 317 bio_io_error(bio);
318 return; 318 return;
319 } 319 }
320 if (unlikely(bio->bi_sector + (bio->bi_size >> 9) > 320 if (unlikely(bio_end_sector(bio) > tmp_dev->end_sector)) {
321 tmp_dev->end_sector)) {
322 /* This bio crosses a device boundary, so we have to 321 /* This bio crosses a device boundary, so we have to
323 * split it. 322 * split it.
324 */ 323 */
diff --git a/drivers/md/md.c b/drivers/md/md.c
index fcb878f88796..d323676580a9 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -194,21 +194,12 @@ void md_trim_bio(struct bio *bio, int offset, int size)
194 if (offset == 0 && size == bio->bi_size) 194 if (offset == 0 && size == bio->bi_size)
195 return; 195 return;
196 196
197 bio->bi_sector += offset;
198 bio->bi_size = size;
199 offset <<= 9;
200 clear_bit(BIO_SEG_VALID, &bio->bi_flags); 197 clear_bit(BIO_SEG_VALID, &bio->bi_flags);
201 198
202 while (bio->bi_idx < bio->bi_vcnt && 199 bio_advance(bio, offset << 9);
203 bio->bi_io_vec[bio->bi_idx].bv_len <= offset) { 200
204 /* remove this whole bio_vec */ 201 bio->bi_size = size;
205 offset -= bio->bi_io_vec[bio->bi_idx].bv_len; 202
206 bio->bi_idx++;
207 }
208 if (bio->bi_idx < bio->bi_vcnt) {
209 bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
210 bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
211 }
212 /* avoid any complications with bi_idx being non-zero*/ 203 /* avoid any complications with bi_idx being non-zero*/
213 if (bio->bi_idx) { 204 if (bio->bi_idx) {
214 memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx, 205 memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 0505452de8d6..fcf65e512cf5 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -502,11 +502,11 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev,
502{ 502{
503 if (likely(is_power_of_2(chunk_sects))) { 503 if (likely(is_power_of_2(chunk_sects))) {
504 return chunk_sects >= ((bio->bi_sector & (chunk_sects-1)) 504 return chunk_sects >= ((bio->bi_sector & (chunk_sects-1))
505 + (bio->bi_size >> 9)); 505 + bio_sectors(bio));
506 } else{ 506 } else{
507 sector_t sector = bio->bi_sector; 507 sector_t sector = bio->bi_sector;
508 return chunk_sects >= (sector_div(sector, chunk_sects) 508 return chunk_sects >= (sector_div(sector, chunk_sects)
509 + (bio->bi_size >> 9)); 509 + bio_sectors(bio));
510 } 510 }
511} 511}
512 512
@@ -527,8 +527,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
527 sector_t sector = bio->bi_sector; 527 sector_t sector = bio->bi_sector;
528 struct bio_pair *bp; 528 struct bio_pair *bp;
529 /* Sanity check -- queue functions should prevent this happening */ 529 /* Sanity check -- queue functions should prevent this happening */
530 if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) || 530 if (bio_segments(bio) > 1)
531 bio->bi_idx != 0)
532 goto bad_map; 531 goto bad_map;
533 /* This is a one page bio that upper layers 532 /* This is a one page bio that upper layers
534 * refuse to split for us, so we need to split it. 533 * refuse to split for us, so we need to split it.
@@ -567,7 +566,7 @@ bad_map:
567 printk("md/raid0:%s: make_request bug: can't convert block across chunks" 566 printk("md/raid0:%s: make_request bug: can't convert block across chunks"
568 " or bigger than %dk %llu %d\n", 567 " or bigger than %dk %llu %d\n",
569 mdname(mddev), chunk_sects / 2, 568 mdname(mddev), chunk_sects / 2,
570 (unsigned long long)bio->bi_sector, bio->bi_size >> 10); 569 (unsigned long long)bio->bi_sector, bio_sectors(bio) / 2);
571 570
572 bio_io_error(bio); 571 bio_io_error(bio);
573 return; 572 return;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index fd86b372692d..aeb4e3f74791 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -92,7 +92,6 @@ static void r1bio_pool_free(void *r1_bio, void *data)
92static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) 92static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
93{ 93{
94 struct pool_info *pi = data; 94 struct pool_info *pi = data;
95 struct page *page;
96 struct r1bio *r1_bio; 95 struct r1bio *r1_bio;
97 struct bio *bio; 96 struct bio *bio;
98 int i, j; 97 int i, j;
@@ -122,14 +121,10 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
122 j = 1; 121 j = 1;
123 while(j--) { 122 while(j--) {
124 bio = r1_bio->bios[j]; 123 bio = r1_bio->bios[j];
125 for (i = 0; i < RESYNC_PAGES; i++) { 124 bio->bi_vcnt = RESYNC_PAGES;
126 page = alloc_page(gfp_flags);
127 if (unlikely(!page))
128 goto out_free_pages;
129 125
130 bio->bi_io_vec[i].bv_page = page; 126 if (bio_alloc_pages(bio, gfp_flags))
131 bio->bi_vcnt = i+1; 127 goto out_free_bio;
132 }
133 } 128 }
134 /* If not user-requests, copy the page pointers to all bios */ 129 /* If not user-requests, copy the page pointers to all bios */
135 if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) { 130 if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) {
@@ -143,11 +138,6 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
143 138
144 return r1_bio; 139 return r1_bio;
145 140
146out_free_pages:
147 for (j=0 ; j < pi->raid_disks; j++)
148 for (i=0; i < r1_bio->bios[j]->bi_vcnt ; i++)
149 put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
150 j = -1;
151out_free_bio: 141out_free_bio:
152 while (++j < pi->raid_disks) 142 while (++j < pi->raid_disks)
153 bio_put(r1_bio->bios[j]); 143 bio_put(r1_bio->bios[j]);
@@ -267,7 +257,7 @@ static void raid_end_bio_io(struct r1bio *r1_bio)
267 (bio_data_dir(bio) == WRITE) ? "write" : "read", 257 (bio_data_dir(bio) == WRITE) ? "write" : "read",
268 (unsigned long long) bio->bi_sector, 258 (unsigned long long) bio->bi_sector,
269 (unsigned long long) bio->bi_sector + 259 (unsigned long long) bio->bi_sector +
270 (bio->bi_size >> 9) - 1); 260 bio_sectors(bio) - 1);
271 261
272 call_bio_endio(r1_bio); 262 call_bio_endio(r1_bio);
273 } 263 }
@@ -458,7 +448,7 @@ static void raid1_end_write_request(struct bio *bio, int error)
458 " %llu-%llu\n", 448 " %llu-%llu\n",
459 (unsigned long long) mbio->bi_sector, 449 (unsigned long long) mbio->bi_sector,
460 (unsigned long long) mbio->bi_sector + 450 (unsigned long long) mbio->bi_sector +
461 (mbio->bi_size >> 9) - 1); 451 bio_sectors(mbio) - 1);
462 call_bio_endio(r1_bio); 452 call_bio_endio(r1_bio);
463 } 453 }
464 } 454 }
@@ -925,7 +915,7 @@ static void alloc_behind_pages(struct bio *bio, struct r1bio *r1_bio)
925 if (unlikely(!bvecs)) 915 if (unlikely(!bvecs))
926 return; 916 return;
927 917
928 bio_for_each_segment(bvec, bio, i) { 918 bio_for_each_segment_all(bvec, bio, i) {
929 bvecs[i] = *bvec; 919 bvecs[i] = *bvec;
930 bvecs[i].bv_page = alloc_page(GFP_NOIO); 920 bvecs[i].bv_page = alloc_page(GFP_NOIO);
931 if (unlikely(!bvecs[i].bv_page)) 921 if (unlikely(!bvecs[i].bv_page))
@@ -1018,7 +1008,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1018 md_write_start(mddev, bio); /* wait on superblock update early */ 1008 md_write_start(mddev, bio); /* wait on superblock update early */
1019 1009
1020 if (bio_data_dir(bio) == WRITE && 1010 if (bio_data_dir(bio) == WRITE &&
1021 bio->bi_sector + bio->bi_size/512 > mddev->suspend_lo && 1011 bio_end_sector(bio) > mddev->suspend_lo &&
1022 bio->bi_sector < mddev->suspend_hi) { 1012 bio->bi_sector < mddev->suspend_hi) {
1023 /* As the suspend_* range is controlled by 1013 /* As the suspend_* range is controlled by
1024 * userspace, we want an interruptible 1014 * userspace, we want an interruptible
@@ -1029,7 +1019,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1029 flush_signals(current); 1019 flush_signals(current);
1030 prepare_to_wait(&conf->wait_barrier, 1020 prepare_to_wait(&conf->wait_barrier,
1031 &w, TASK_INTERRUPTIBLE); 1021 &w, TASK_INTERRUPTIBLE);
1032 if (bio->bi_sector + bio->bi_size/512 <= mddev->suspend_lo || 1022 if (bio_end_sector(bio) <= mddev->suspend_lo ||
1033 bio->bi_sector >= mddev->suspend_hi) 1023 bio->bi_sector >= mddev->suspend_hi)
1034 break; 1024 break;
1035 schedule(); 1025 schedule();
@@ -1049,7 +1039,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1049 r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); 1039 r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
1050 1040
1051 r1_bio->master_bio = bio; 1041 r1_bio->master_bio = bio;
1052 r1_bio->sectors = bio->bi_size >> 9; 1042 r1_bio->sectors = bio_sectors(bio);
1053 r1_bio->state = 0; 1043 r1_bio->state = 0;
1054 r1_bio->mddev = mddev; 1044 r1_bio->mddev = mddev;
1055 r1_bio->sector = bio->bi_sector; 1045 r1_bio->sector = bio->bi_sector;
@@ -1127,7 +1117,7 @@ read_again:
1127 r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); 1117 r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
1128 1118
1129 r1_bio->master_bio = bio; 1119 r1_bio->master_bio = bio;
1130 r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled; 1120 r1_bio->sectors = bio_sectors(bio) - sectors_handled;
1131 r1_bio->state = 0; 1121 r1_bio->state = 0;
1132 r1_bio->mddev = mddev; 1122 r1_bio->mddev = mddev;
1133 r1_bio->sector = bio->bi_sector + sectors_handled; 1123 r1_bio->sector = bio->bi_sector + sectors_handled;
@@ -1284,14 +1274,10 @@ read_again:
1284 struct bio_vec *bvec; 1274 struct bio_vec *bvec;
1285 int j; 1275 int j;
1286 1276
1287 /* Yes, I really want the '__' version so that 1277 /*
1288 * we clear any unused pointer in the io_vec, rather 1278 * We trimmed the bio, so _all is legit
1289 * than leave them unchanged. This is important
1290 * because when we come to free the pages, we won't
1291 * know the original bi_idx, so we just free
1292 * them all
1293 */ 1279 */
1294 __bio_for_each_segment(bvec, mbio, j, 0) 1280 bio_for_each_segment_all(bvec, mbio, j)
1295 bvec->bv_page = r1_bio->behind_bvecs[j].bv_page; 1281 bvec->bv_page = r1_bio->behind_bvecs[j].bv_page;
1296 if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) 1282 if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
1297 atomic_inc(&r1_bio->behind_remaining); 1283 atomic_inc(&r1_bio->behind_remaining);
@@ -1329,14 +1315,14 @@ read_again:
1329 /* Mustn't call r1_bio_write_done before this next test, 1315 /* Mustn't call r1_bio_write_done before this next test,
1330 * as it could result in the bio being freed. 1316 * as it could result in the bio being freed.
1331 */ 1317 */
1332 if (sectors_handled < (bio->bi_size >> 9)) { 1318 if (sectors_handled < bio_sectors(bio)) {
1333 r1_bio_write_done(r1_bio); 1319 r1_bio_write_done(r1_bio);
1334 /* We need another r1_bio. It has already been counted 1320 /* We need another r1_bio. It has already been counted
1335 * in bio->bi_phys_segments 1321 * in bio->bi_phys_segments
1336 */ 1322 */
1337 r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); 1323 r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
1338 r1_bio->master_bio = bio; 1324 r1_bio->master_bio = bio;
1339 r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled; 1325 r1_bio->sectors = bio_sectors(bio) - sectors_handled;
1340 r1_bio->state = 0; 1326 r1_bio->state = 0;
1341 r1_bio->mddev = mddev; 1327 r1_bio->mddev = mddev;
1342 r1_bio->sector = bio->bi_sector + sectors_handled; 1328 r1_bio->sector = bio->bi_sector + sectors_handled;
@@ -1862,7 +1848,7 @@ static int process_checks(struct r1bio *r1_bio)
1862 struct bio *sbio = r1_bio->bios[i]; 1848 struct bio *sbio = r1_bio->bios[i];
1863 int size; 1849 int size;
1864 1850
1865 if (r1_bio->bios[i]->bi_end_io != end_sync_read) 1851 if (sbio->bi_end_io != end_sync_read)
1866 continue; 1852 continue;
1867 1853
1868 if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) { 1854 if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
@@ -1887,16 +1873,15 @@ static int process_checks(struct r1bio *r1_bio)
1887 continue; 1873 continue;
1888 } 1874 }
1889 /* fixup the bio for reuse */ 1875 /* fixup the bio for reuse */
1876 bio_reset(sbio);
1890 sbio->bi_vcnt = vcnt; 1877 sbio->bi_vcnt = vcnt;
1891 sbio->bi_size = r1_bio->sectors << 9; 1878 sbio->bi_size = r1_bio->sectors << 9;
1892 sbio->bi_idx = 0;
1893 sbio->bi_phys_segments = 0;
1894 sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
1895 sbio->bi_flags |= 1 << BIO_UPTODATE;
1896 sbio->bi_next = NULL;
1897 sbio->bi_sector = r1_bio->sector + 1879 sbio->bi_sector = r1_bio->sector +
1898 conf->mirrors[i].rdev->data_offset; 1880 conf->mirrors[i].rdev->data_offset;
1899 sbio->bi_bdev = conf->mirrors[i].rdev->bdev; 1881 sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
1882 sbio->bi_end_io = end_sync_read;
1883 sbio->bi_private = r1_bio;
1884
1900 size = sbio->bi_size; 1885 size = sbio->bi_size;
1901 for (j = 0; j < vcnt ; j++) { 1886 for (j = 0; j < vcnt ; j++) {
1902 struct bio_vec *bi; 1887 struct bio_vec *bi;
@@ -1907,10 +1892,9 @@ static int process_checks(struct r1bio *r1_bio)
1907 else 1892 else
1908 bi->bv_len = size; 1893 bi->bv_len = size;
1909 size -= PAGE_SIZE; 1894 size -= PAGE_SIZE;
1910 memcpy(page_address(bi->bv_page),
1911 page_address(pbio->bi_io_vec[j].bv_page),
1912 PAGE_SIZE);
1913 } 1895 }
1896
1897 bio_copy_data(sbio, pbio);
1914 } 1898 }
1915 return 0; 1899 return 0;
1916} 1900}
@@ -1947,7 +1931,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
1947 wbio->bi_rw = WRITE; 1931 wbio->bi_rw = WRITE;
1948 wbio->bi_end_io = end_sync_write; 1932 wbio->bi_end_io = end_sync_write;
1949 atomic_inc(&r1_bio->remaining); 1933 atomic_inc(&r1_bio->remaining);
1950 md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9); 1934 md_sync_acct(conf->mirrors[i].rdev->bdev, bio_sectors(wbio));
1951 1935
1952 generic_make_request(wbio); 1936 generic_make_request(wbio);
1953 } 1937 }
@@ -2059,32 +2043,11 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
2059 } 2043 }
2060} 2044}
2061 2045
2062static void bi_complete(struct bio *bio, int error)
2063{
2064 complete((struct completion *)bio->bi_private);
2065}
2066
2067static int submit_bio_wait(int rw, struct bio *bio)
2068{
2069 struct completion event;
2070 rw |= REQ_SYNC;
2071
2072 init_completion(&event);
2073 bio->bi_private = &event;
2074 bio->bi_end_io = bi_complete;
2075 submit_bio(rw, bio);
2076 wait_for_completion(&event);
2077
2078 return test_bit(BIO_UPTODATE, &bio->bi_flags);
2079}
2080
2081static int narrow_write_error(struct r1bio *r1_bio, int i) 2046static int narrow_write_error(struct r1bio *r1_bio, int i)
2082{ 2047{
2083 struct mddev *mddev = r1_bio->mddev; 2048 struct mddev *mddev = r1_bio->mddev;
2084 struct r1conf *conf = mddev->private; 2049 struct r1conf *conf = mddev->private;
2085 struct md_rdev *rdev = conf->mirrors[i].rdev; 2050 struct md_rdev *rdev = conf->mirrors[i].rdev;
2086 int vcnt, idx;
2087 struct bio_vec *vec;
2088 2051
2089 /* bio has the data to be written to device 'i' where 2052 /* bio has the data to be written to device 'i' where
2090 * we just recently had a write error. 2053 * we just recently had a write error.
@@ -2112,30 +2075,32 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
2112 & ~(sector_t)(block_sectors - 1)) 2075 & ~(sector_t)(block_sectors - 1))
2113 - sector; 2076 - sector;
2114 2077
2115 if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
2116 vcnt = r1_bio->behind_page_count;
2117 vec = r1_bio->behind_bvecs;
2118 idx = 0;
2119 while (vec[idx].bv_page == NULL)
2120 idx++;
2121 } else {
2122 vcnt = r1_bio->master_bio->bi_vcnt;
2123 vec = r1_bio->master_bio->bi_io_vec;
2124 idx = r1_bio->master_bio->bi_idx;
2125 }
2126 while (sect_to_write) { 2078 while (sect_to_write) {
2127 struct bio *wbio; 2079 struct bio *wbio;
2128 if (sectors > sect_to_write) 2080 if (sectors > sect_to_write)
2129 sectors = sect_to_write; 2081 sectors = sect_to_write;
2130 /* Write at 'sector' for 'sectors'*/ 2082 /* Write at 'sector' for 'sectors'*/
2131 2083
2132 wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev); 2084 if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
2133 memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec)); 2085 unsigned vcnt = r1_bio->behind_page_count;
2134 wbio->bi_sector = r1_bio->sector; 2086 struct bio_vec *vec = r1_bio->behind_bvecs;
2087
2088 while (!vec->bv_page) {
2089 vec++;
2090 vcnt--;
2091 }
2092
2093 wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
2094 memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
2095
2096 wbio->bi_vcnt = vcnt;
2097 } else {
2098 wbio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev);
2099 }
2100
2135 wbio->bi_rw = WRITE; 2101 wbio->bi_rw = WRITE;
2136 wbio->bi_vcnt = vcnt; 2102 wbio->bi_sector = r1_bio->sector;
2137 wbio->bi_size = r1_bio->sectors << 9; 2103 wbio->bi_size = r1_bio->sectors << 9;
2138 wbio->bi_idx = idx;
2139 2104
2140 md_trim_bio(wbio, sector - r1_bio->sector, sectors); 2105 md_trim_bio(wbio, sector - r1_bio->sector, sectors);
2141 wbio->bi_sector += rdev->data_offset; 2106 wbio->bi_sector += rdev->data_offset;
@@ -2284,8 +2249,7 @@ read_more:
2284 r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); 2249 r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
2285 2250
2286 r1_bio->master_bio = mbio; 2251 r1_bio->master_bio = mbio;
2287 r1_bio->sectors = (mbio->bi_size >> 9) 2252 r1_bio->sectors = bio_sectors(mbio) - sectors_handled;
2288 - sectors_handled;
2289 r1_bio->state = 0; 2253 r1_bio->state = 0;
2290 set_bit(R1BIO_ReadError, &r1_bio->state); 2254 set_bit(R1BIO_ReadError, &r1_bio->state);
2291 r1_bio->mddev = mddev; 2255 r1_bio->mddev = mddev;
@@ -2459,18 +2423,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
2459 for (i = 0; i < conf->raid_disks * 2; i++) { 2423 for (i = 0; i < conf->raid_disks * 2; i++) {
2460 struct md_rdev *rdev; 2424 struct md_rdev *rdev;
2461 bio = r1_bio->bios[i]; 2425 bio = r1_bio->bios[i];
2462 2426 bio_reset(bio);
2463 /* take from bio_init */
2464 bio->bi_next = NULL;
2465 bio->bi_flags &= ~(BIO_POOL_MASK-1);
2466 bio->bi_flags |= 1 << BIO_UPTODATE;
2467 bio->bi_rw = READ;
2468 bio->bi_vcnt = 0;
2469 bio->bi_idx = 0;
2470 bio->bi_phys_segments = 0;
2471 bio->bi_size = 0;
2472 bio->bi_end_io = NULL;
2473 bio->bi_private = NULL;
2474 2427
2475 rdev = rcu_dereference(conf->mirrors[i].rdev); 2428 rdev = rcu_dereference(conf->mirrors[i].rdev);
2476 if (rdev == NULL || 2429 if (rdev == NULL ||
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 77b562d18a90..e32e8b1042f8 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1169,14 +1169,13 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1169 /* If this request crosses a chunk boundary, we need to 1169 /* If this request crosses a chunk boundary, we need to
1170 * split it. This will only happen for 1 PAGE (or less) requests. 1170 * split it. This will only happen for 1 PAGE (or less) requests.
1171 */ 1171 */
1172 if (unlikely((bio->bi_sector & chunk_mask) + (bio->bi_size >> 9) 1172 if (unlikely((bio->bi_sector & chunk_mask) + bio_sectors(bio)
1173 > chunk_sects 1173 > chunk_sects
1174 && (conf->geo.near_copies < conf->geo.raid_disks 1174 && (conf->geo.near_copies < conf->geo.raid_disks
1175 || conf->prev.near_copies < conf->prev.raid_disks))) { 1175 || conf->prev.near_copies < conf->prev.raid_disks))) {
1176 struct bio_pair *bp; 1176 struct bio_pair *bp;
1177 /* Sanity check -- queue functions should prevent this happening */ 1177 /* Sanity check -- queue functions should prevent this happening */
1178 if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) || 1178 if (bio_segments(bio) > 1)
1179 bio->bi_idx != 0)
1180 goto bad_map; 1179 goto bad_map;
1181 /* This is a one page bio that upper layers 1180 /* This is a one page bio that upper layers
1182 * refuse to split for us, so we need to split it. 1181 * refuse to split for us, so we need to split it.
@@ -1209,7 +1208,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1209 bad_map: 1208 bad_map:
1210 printk("md/raid10:%s: make_request bug: can't convert block across chunks" 1209 printk("md/raid10:%s: make_request bug: can't convert block across chunks"
1211 " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2, 1210 " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2,
1212 (unsigned long long)bio->bi_sector, bio->bi_size >> 10); 1211 (unsigned long long)bio->bi_sector, bio_sectors(bio) / 2);
1213 1212
1214 bio_io_error(bio); 1213 bio_io_error(bio);
1215 return; 1214 return;
@@ -1224,7 +1223,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1224 */ 1223 */
1225 wait_barrier(conf); 1224 wait_barrier(conf);
1226 1225
1227 sectors = bio->bi_size >> 9; 1226 sectors = bio_sectors(bio);
1228 while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && 1227 while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
1229 bio->bi_sector < conf->reshape_progress && 1228 bio->bi_sector < conf->reshape_progress &&
1230 bio->bi_sector + sectors > conf->reshape_progress) { 1229 bio->bi_sector + sectors > conf->reshape_progress) {
@@ -1326,8 +1325,7 @@ read_again:
1326 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); 1325 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
1327 1326
1328 r10_bio->master_bio = bio; 1327 r10_bio->master_bio = bio;
1329 r10_bio->sectors = ((bio->bi_size >> 9) 1328 r10_bio->sectors = bio_sectors(bio) - sectors_handled;
1330 - sectors_handled);
1331 r10_bio->state = 0; 1329 r10_bio->state = 0;
1332 r10_bio->mddev = mddev; 1330 r10_bio->mddev = mddev;
1333 r10_bio->sector = bio->bi_sector + sectors_handled; 1331 r10_bio->sector = bio->bi_sector + sectors_handled;
@@ -1569,7 +1567,7 @@ retry_write:
1569 * after checking if we need to go around again. 1567 * after checking if we need to go around again.
1570 */ 1568 */
1571 1569
1572 if (sectors_handled < (bio->bi_size >> 9)) { 1570 if (sectors_handled < bio_sectors(bio)) {
1573 one_write_done(r10_bio); 1571 one_write_done(r10_bio);
1574 /* We need another r10_bio. It has already been counted 1572 /* We need another r10_bio. It has already been counted
1575 * in bio->bi_phys_segments. 1573 * in bio->bi_phys_segments.
@@ -1577,7 +1575,7 @@ retry_write:
1577 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); 1575 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
1578 1576
1579 r10_bio->master_bio = bio; 1577 r10_bio->master_bio = bio;
1580 r10_bio->sectors = (bio->bi_size >> 9) - sectors_handled; 1578 r10_bio->sectors = bio_sectors(bio) - sectors_handled;
1581 1579
1582 r10_bio->mddev = mddev; 1580 r10_bio->mddev = mddev;
1583 r10_bio->sector = bio->bi_sector + sectors_handled; 1581 r10_bio->sector = bio->bi_sector + sectors_handled;
@@ -2079,13 +2077,10 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
2079 * First we need to fixup bv_offset, bv_len and 2077 * First we need to fixup bv_offset, bv_len and
2080 * bi_vecs, as the read request might have corrupted these 2078 * bi_vecs, as the read request might have corrupted these
2081 */ 2079 */
2080 bio_reset(tbio);
2081
2082 tbio->bi_vcnt = vcnt; 2082 tbio->bi_vcnt = vcnt;
2083 tbio->bi_size = r10_bio->sectors << 9; 2083 tbio->bi_size = r10_bio->sectors << 9;
2084 tbio->bi_idx = 0;
2085 tbio->bi_phys_segments = 0;
2086 tbio->bi_flags &= ~(BIO_POOL_MASK - 1);
2087 tbio->bi_flags |= 1 << BIO_UPTODATE;
2088 tbio->bi_next = NULL;
2089 tbio->bi_rw = WRITE; 2084 tbio->bi_rw = WRITE;
2090 tbio->bi_private = r10_bio; 2085 tbio->bi_private = r10_bio;
2091 tbio->bi_sector = r10_bio->devs[i].addr; 2086 tbio->bi_sector = r10_bio->devs[i].addr;
@@ -2103,7 +2098,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
2103 d = r10_bio->devs[i].devnum; 2098 d = r10_bio->devs[i].devnum;
2104 atomic_inc(&conf->mirrors[d].rdev->nr_pending); 2099 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
2105 atomic_inc(&r10_bio->remaining); 2100 atomic_inc(&r10_bio->remaining);
2106 md_sync_acct(conf->mirrors[d].rdev->bdev, tbio->bi_size >> 9); 2101 md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio));
2107 2102
2108 tbio->bi_sector += conf->mirrors[d].rdev->data_offset; 2103 tbio->bi_sector += conf->mirrors[d].rdev->data_offset;
2109 tbio->bi_bdev = conf->mirrors[d].rdev->bdev; 2104 tbio->bi_bdev = conf->mirrors[d].rdev->bdev;
@@ -2128,7 +2123,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
2128 d = r10_bio->devs[i].devnum; 2123 d = r10_bio->devs[i].devnum;
2129 atomic_inc(&r10_bio->remaining); 2124 atomic_inc(&r10_bio->remaining);
2130 md_sync_acct(conf->mirrors[d].replacement->bdev, 2125 md_sync_acct(conf->mirrors[d].replacement->bdev,
2131 tbio->bi_size >> 9); 2126 bio_sectors(tbio));
2132 generic_make_request(tbio); 2127 generic_make_request(tbio);
2133 } 2128 }
2134 2129
@@ -2254,13 +2249,13 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
2254 wbio2 = r10_bio->devs[1].repl_bio; 2249 wbio2 = r10_bio->devs[1].repl_bio;
2255 if (wbio->bi_end_io) { 2250 if (wbio->bi_end_io) {
2256 atomic_inc(&conf->mirrors[d].rdev->nr_pending); 2251 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
2257 md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9); 2252 md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
2258 generic_make_request(wbio); 2253 generic_make_request(wbio);
2259 } 2254 }
2260 if (wbio2 && wbio2->bi_end_io) { 2255 if (wbio2 && wbio2->bi_end_io) {
2261 atomic_inc(&conf->mirrors[d].replacement->nr_pending); 2256 atomic_inc(&conf->mirrors[d].replacement->nr_pending);
2262 md_sync_acct(conf->mirrors[d].replacement->bdev, 2257 md_sync_acct(conf->mirrors[d].replacement->bdev,
2263 wbio2->bi_size >> 9); 2258 bio_sectors(wbio2));
2264 generic_make_request(wbio2); 2259 generic_make_request(wbio2);
2265 } 2260 }
2266} 2261}
@@ -2531,25 +2526,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
2531 } 2526 }
2532} 2527}
2533 2528
2534static void bi_complete(struct bio *bio, int error)
2535{
2536 complete((struct completion *)bio->bi_private);
2537}
2538
2539static int submit_bio_wait(int rw, struct bio *bio)
2540{
2541 struct completion event;
2542 rw |= REQ_SYNC;
2543
2544 init_completion(&event);
2545 bio->bi_private = &event;
2546 bio->bi_end_io = bi_complete;
2547 submit_bio(rw, bio);
2548 wait_for_completion(&event);
2549
2550 return test_bit(BIO_UPTODATE, &bio->bi_flags);
2551}
2552
2553static int narrow_write_error(struct r10bio *r10_bio, int i) 2529static int narrow_write_error(struct r10bio *r10_bio, int i)
2554{ 2530{
2555 struct bio *bio = r10_bio->master_bio; 2531 struct bio *bio = r10_bio->master_bio;
@@ -2690,8 +2666,7 @@ read_more:
2690 r10_bio = mempool_alloc(conf->r10bio_pool, 2666 r10_bio = mempool_alloc(conf->r10bio_pool,
2691 GFP_NOIO); 2667 GFP_NOIO);
2692 r10_bio->master_bio = mbio; 2668 r10_bio->master_bio = mbio;
2693 r10_bio->sectors = (mbio->bi_size >> 9) 2669 r10_bio->sectors = bio_sectors(mbio) - sectors_handled;
2694 - sectors_handled;
2695 r10_bio->state = 0; 2670 r10_bio->state = 0;
2696 set_bit(R10BIO_ReadError, 2671 set_bit(R10BIO_ReadError,
2697 &r10_bio->state); 2672 &r10_bio->state);
@@ -3112,6 +3087,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
3112 } 3087 }
3113 } 3088 }
3114 bio = r10_bio->devs[0].bio; 3089 bio = r10_bio->devs[0].bio;
3090 bio_reset(bio);
3115 bio->bi_next = biolist; 3091 bio->bi_next = biolist;
3116 biolist = bio; 3092 biolist = bio;
3117 bio->bi_private = r10_bio; 3093 bio->bi_private = r10_bio;
@@ -3136,6 +3112,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
3136 rdev = mirror->rdev; 3112 rdev = mirror->rdev;
3137 if (!test_bit(In_sync, &rdev->flags)) { 3113 if (!test_bit(In_sync, &rdev->flags)) {
3138 bio = r10_bio->devs[1].bio; 3114 bio = r10_bio->devs[1].bio;
3115 bio_reset(bio);
3139 bio->bi_next = biolist; 3116 bio->bi_next = biolist;
3140 biolist = bio; 3117 biolist = bio;
3141 bio->bi_private = r10_bio; 3118 bio->bi_private = r10_bio;
@@ -3164,6 +3141,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
3164 if (rdev == NULL || bio == NULL || 3141 if (rdev == NULL || bio == NULL ||
3165 test_bit(Faulty, &rdev->flags)) 3142 test_bit(Faulty, &rdev->flags))
3166 break; 3143 break;
3144 bio_reset(bio);
3167 bio->bi_next = biolist; 3145 bio->bi_next = biolist;
3168 biolist = bio; 3146 biolist = bio;
3169 bio->bi_private = r10_bio; 3147 bio->bi_private = r10_bio;
@@ -3262,7 +3240,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
3262 r10_bio->devs[i].repl_bio->bi_end_io = NULL; 3240 r10_bio->devs[i].repl_bio->bi_end_io = NULL;
3263 3241
3264 bio = r10_bio->devs[i].bio; 3242 bio = r10_bio->devs[i].bio;
3265 bio->bi_end_io = NULL; 3243 bio_reset(bio);
3266 clear_bit(BIO_UPTODATE, &bio->bi_flags); 3244 clear_bit(BIO_UPTODATE, &bio->bi_flags);
3267 if (conf->mirrors[d].rdev == NULL || 3245 if (conf->mirrors[d].rdev == NULL ||
3268 test_bit(Faulty, &conf->mirrors[d].rdev->flags)) 3246 test_bit(Faulty, &conf->mirrors[d].rdev->flags))
@@ -3299,6 +3277,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
3299 3277
3300 /* Need to set up for writing to the replacement */ 3278 /* Need to set up for writing to the replacement */
3301 bio = r10_bio->devs[i].repl_bio; 3279 bio = r10_bio->devs[i].repl_bio;
3280 bio_reset(bio);
3302 clear_bit(BIO_UPTODATE, &bio->bi_flags); 3281 clear_bit(BIO_UPTODATE, &bio->bi_flags);
3303 3282
3304 sector = r10_bio->devs[i].addr; 3283 sector = r10_bio->devs[i].addr;
@@ -3332,17 +3311,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
3332 } 3311 }
3333 } 3312 }
3334 3313
3335 for (bio = biolist; bio ; bio=bio->bi_next) {
3336
3337 bio->bi_flags &= ~(BIO_POOL_MASK - 1);
3338 if (bio->bi_end_io)
3339 bio->bi_flags |= 1 << BIO_UPTODATE;
3340 bio->bi_vcnt = 0;
3341 bio->bi_idx = 0;
3342 bio->bi_phys_segments = 0;
3343 bio->bi_size = 0;
3344 }
3345
3346 nr_sectors = 0; 3314 nr_sectors = 0;
3347 if (sector_nr + max_sync < max_sector) 3315 if (sector_nr + max_sync < max_sector)
3348 max_sector = sector_nr + max_sync; 3316 max_sector = sector_nr + max_sync;
@@ -4389,7 +4357,6 @@ read_more:
4389 read_bio->bi_flags &= ~(BIO_POOL_MASK - 1); 4357 read_bio->bi_flags &= ~(BIO_POOL_MASK - 1);
4390 read_bio->bi_flags |= 1 << BIO_UPTODATE; 4358 read_bio->bi_flags |= 1 << BIO_UPTODATE;
4391 read_bio->bi_vcnt = 0; 4359 read_bio->bi_vcnt = 0;
4392 read_bio->bi_idx = 0;
4393 read_bio->bi_size = 0; 4360 read_bio->bi_size = 0;
4394 r10_bio->master_bio = read_bio; 4361 r10_bio->master_bio = read_bio;
4395 r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum; 4362 r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum;
@@ -4413,17 +4380,14 @@ read_more:
4413 } 4380 }
4414 if (!rdev2 || test_bit(Faulty, &rdev2->flags)) 4381 if (!rdev2 || test_bit(Faulty, &rdev2->flags))
4415 continue; 4382 continue;
4383
4384 bio_reset(b);
4416 b->bi_bdev = rdev2->bdev; 4385 b->bi_bdev = rdev2->bdev;
4417 b->bi_sector = r10_bio->devs[s/2].addr + rdev2->new_data_offset; 4386 b->bi_sector = r10_bio->devs[s/2].addr + rdev2->new_data_offset;
4418 b->bi_private = r10_bio; 4387 b->bi_private = r10_bio;
4419 b->bi_end_io = end_reshape_write; 4388 b->bi_end_io = end_reshape_write;
4420 b->bi_rw = WRITE; 4389 b->bi_rw = WRITE;
4421 b->bi_flags &= ~(BIO_POOL_MASK - 1);
4422 b->bi_flags |= 1 << BIO_UPTODATE;
4423 b->bi_next = blist; 4390 b->bi_next = blist;
4424 b->bi_vcnt = 0;
4425 b->bi_idx = 0;
4426 b->bi_size = 0;
4427 blist = b; 4391 blist = b;
4428 } 4392 }
4429 4393
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 3ee2912889e7..7bbd28546214 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -90,7 +90,7 @@ static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect)
90 */ 90 */
91static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector) 91static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
92{ 92{
93 int sectors = bio->bi_size >> 9; 93 int sectors = bio_sectors(bio);
94 if (bio->bi_sector + sectors < sector + STRIPE_SECTORS) 94 if (bio->bi_sector + sectors < sector + STRIPE_SECTORS)
95 return bio->bi_next; 95 return bio->bi_next;
96 else 96 else
@@ -567,14 +567,6 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
567 bi = &sh->dev[i].req; 567 bi = &sh->dev[i].req;
568 rbi = &sh->dev[i].rreq; /* For writing to replacement */ 568 rbi = &sh->dev[i].rreq; /* For writing to replacement */
569 569
570 bi->bi_rw = rw;
571 rbi->bi_rw = rw;
572 if (rw & WRITE) {
573 bi->bi_end_io = raid5_end_write_request;
574 rbi->bi_end_io = raid5_end_write_request;
575 } else
576 bi->bi_end_io = raid5_end_read_request;
577
578 rcu_read_lock(); 570 rcu_read_lock();
579 rrdev = rcu_dereference(conf->disks[i].replacement); 571 rrdev = rcu_dereference(conf->disks[i].replacement);
580 smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */ 572 smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */
@@ -649,7 +641,14 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
649 641
650 set_bit(STRIPE_IO_STARTED, &sh->state); 642 set_bit(STRIPE_IO_STARTED, &sh->state);
651 643
644 bio_reset(bi);
652 bi->bi_bdev = rdev->bdev; 645 bi->bi_bdev = rdev->bdev;
646 bi->bi_rw = rw;
647 bi->bi_end_io = (rw & WRITE)
648 ? raid5_end_write_request
649 : raid5_end_read_request;
650 bi->bi_private = sh;
651
653 pr_debug("%s: for %llu schedule op %ld on disc %d\n", 652 pr_debug("%s: for %llu schedule op %ld on disc %d\n",
654 __func__, (unsigned long long)sh->sector, 653 __func__, (unsigned long long)sh->sector,
655 bi->bi_rw, i); 654 bi->bi_rw, i);
@@ -663,12 +662,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
663 if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) 662 if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
664 bi->bi_rw |= REQ_FLUSH; 663 bi->bi_rw |= REQ_FLUSH;
665 664
666 bi->bi_flags = 1 << BIO_UPTODATE;
667 bi->bi_idx = 0;
668 bi->bi_io_vec[0].bv_len = STRIPE_SIZE; 665 bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
669 bi->bi_io_vec[0].bv_offset = 0; 666 bi->bi_io_vec[0].bv_offset = 0;
670 bi->bi_size = STRIPE_SIZE; 667 bi->bi_size = STRIPE_SIZE;
671 bi->bi_next = NULL;
672 if (rrdev) 668 if (rrdev)
673 set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); 669 set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
674 trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), 670 trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
@@ -683,7 +679,13 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
683 679
684 set_bit(STRIPE_IO_STARTED, &sh->state); 680 set_bit(STRIPE_IO_STARTED, &sh->state);
685 681
682 bio_reset(rbi);
686 rbi->bi_bdev = rrdev->bdev; 683 rbi->bi_bdev = rrdev->bdev;
684 rbi->bi_rw = rw;
685 BUG_ON(!(rw & WRITE));
686 rbi->bi_end_io = raid5_end_write_request;
687 rbi->bi_private = sh;
688
687 pr_debug("%s: for %llu schedule op %ld on " 689 pr_debug("%s: for %llu schedule op %ld on "
688 "replacement disc %d\n", 690 "replacement disc %d\n",
689 __func__, (unsigned long long)sh->sector, 691 __func__, (unsigned long long)sh->sector,
@@ -695,12 +697,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
695 else 697 else
696 rbi->bi_sector = (sh->sector 698 rbi->bi_sector = (sh->sector
697 + rrdev->data_offset); 699 + rrdev->data_offset);
698 rbi->bi_flags = 1 << BIO_UPTODATE;
699 rbi->bi_idx = 0;
700 rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; 700 rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
701 rbi->bi_io_vec[0].bv_offset = 0; 701 rbi->bi_io_vec[0].bv_offset = 0;
702 rbi->bi_size = STRIPE_SIZE; 702 rbi->bi_size = STRIPE_SIZE;
703 rbi->bi_next = NULL;
704 trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), 703 trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
705 rbi, disk_devt(conf->mddev->gendisk), 704 rbi, disk_devt(conf->mddev->gendisk),
706 sh->dev[i].sector); 705 sh->dev[i].sector);
@@ -2384,11 +2383,11 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
2384 } else 2383 } else
2385 bip = &sh->dev[dd_idx].toread; 2384 bip = &sh->dev[dd_idx].toread;
2386 while (*bip && (*bip)->bi_sector < bi->bi_sector) { 2385 while (*bip && (*bip)->bi_sector < bi->bi_sector) {
2387 if ((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector) 2386 if (bio_end_sector(*bip) > bi->bi_sector)
2388 goto overlap; 2387 goto overlap;
2389 bip = & (*bip)->bi_next; 2388 bip = & (*bip)->bi_next;
2390 } 2389 }
2391 if (*bip && (*bip)->bi_sector < bi->bi_sector + ((bi->bi_size)>>9)) 2390 if (*bip && (*bip)->bi_sector < bio_end_sector(bi))
2392 goto overlap; 2391 goto overlap;
2393 2392
2394 BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next); 2393 BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next);
@@ -2404,8 +2403,8 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
2404 sector < sh->dev[dd_idx].sector + STRIPE_SECTORS && 2403 sector < sh->dev[dd_idx].sector + STRIPE_SECTORS &&
2405 bi && bi->bi_sector <= sector; 2404 bi && bi->bi_sector <= sector;
2406 bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) { 2405 bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) {
2407 if (bi->bi_sector + (bi->bi_size>>9) >= sector) 2406 if (bio_end_sector(bi) >= sector)
2408 sector = bi->bi_sector + (bi->bi_size>>9); 2407 sector = bio_end_sector(bi);
2409 } 2408 }
2410 if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) 2409 if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
2411 set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); 2410 set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
@@ -3804,7 +3803,7 @@ static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
3804{ 3803{
3805 sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); 3804 sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
3806 unsigned int chunk_sectors = mddev->chunk_sectors; 3805 unsigned int chunk_sectors = mddev->chunk_sectors;
3807 unsigned int bio_sectors = bio->bi_size >> 9; 3806 unsigned int bio_sectors = bio_sectors(bio);
3808 3807
3809 if (mddev->new_chunk_sectors < mddev->chunk_sectors) 3808 if (mddev->new_chunk_sectors < mddev->chunk_sectors)
3810 chunk_sectors = mddev->new_chunk_sectors; 3809 chunk_sectors = mddev->new_chunk_sectors;
@@ -3894,7 +3893,7 @@ static int bio_fits_rdev(struct bio *bi)
3894{ 3893{
3895 struct request_queue *q = bdev_get_queue(bi->bi_bdev); 3894 struct request_queue *q = bdev_get_queue(bi->bi_bdev);
3896 3895
3897 if ((bi->bi_size>>9) > queue_max_sectors(q)) 3896 if (bio_sectors(bi) > queue_max_sectors(q))
3898 return 0; 3897 return 0;
3899 blk_recount_segments(q, bi); 3898 blk_recount_segments(q, bi);
3900 if (bi->bi_phys_segments > queue_max_segments(q)) 3899 if (bi->bi_phys_segments > queue_max_segments(q))
@@ -3941,7 +3940,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
3941 0, 3940 0,
3942 &dd_idx, NULL); 3941 &dd_idx, NULL);
3943 3942
3944 end_sector = align_bi->bi_sector + (align_bi->bi_size >> 9); 3943 end_sector = bio_end_sector(align_bi);
3945 rcu_read_lock(); 3944 rcu_read_lock();
3946 rdev = rcu_dereference(conf->disks[dd_idx].replacement); 3945 rdev = rcu_dereference(conf->disks[dd_idx].replacement);
3947 if (!rdev || test_bit(Faulty, &rdev->flags) || 3946 if (!rdev || test_bit(Faulty, &rdev->flags) ||
@@ -3964,7 +3963,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
3964 align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); 3963 align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
3965 3964
3966 if (!bio_fits_rdev(align_bi) || 3965 if (!bio_fits_rdev(align_bi) ||
3967 is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9, 3966 is_badblock(rdev, align_bi->bi_sector, bio_sectors(align_bi),
3968 &first_bad, &bad_sectors)) { 3967 &first_bad, &bad_sectors)) {
3969 /* too big in some way, or has a known bad block */ 3968 /* too big in some way, or has a known bad block */
3970 bio_put(align_bi); 3969 bio_put(align_bi);
@@ -4216,7 +4215,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4216 } 4215 }
4217 4216
4218 logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); 4217 logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
4219 last_sector = bi->bi_sector + (bi->bi_size>>9); 4218 last_sector = bio_end_sector(bi);
4220 bi->bi_next = NULL; 4219 bi->bi_next = NULL;
4221 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ 4220 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
4222 4221
@@ -4679,7 +4678,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
4679 logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1); 4678 logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
4680 sector = raid5_compute_sector(conf, logical_sector, 4679 sector = raid5_compute_sector(conf, logical_sector,
4681 0, &dd_idx, NULL); 4680 0, &dd_idx, NULL);
4682 last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9); 4681 last_sector = bio_end_sector(raid_bio);
4683 4682
4684 for (; logical_sector < last_sector; 4683 for (; logical_sector < last_sector;
4685 logical_sector += STRIPE_SECTORS, 4684 logical_sector += STRIPE_SECTORS,
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index fa43c391c8ed..2bb01546df0b 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -2235,10 +2235,10 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
2235 } 2235 }
2236 2236
2237 /* do we need to support multiple segments? */ 2237 /* do we need to support multiple segments? */
2238 if (req->bio->bi_vcnt > 1 || rsp->bio->bi_vcnt > 1) { 2238 if (bio_segments(req->bio) > 1 || bio_segments(rsp->bio) > 1) {
2239 printk(MYIOC_s_ERR_FMT "%s: multiple segments req %u %u, rsp %u %u\n", 2239 printk(MYIOC_s_ERR_FMT "%s: multiple segments req %u %u, rsp %u %u\n",
2240 ioc->name, __func__, req->bio->bi_vcnt, blk_rq_bytes(req), 2240 ioc->name, __func__, bio_segments(req->bio), blk_rq_bytes(req),
2241 rsp->bio->bi_vcnt, blk_rq_bytes(rsp)); 2241 bio_segments(rsp->bio), blk_rq_bytes(rsp));
2242 return -EINVAL; 2242 return -EINVAL;
2243 } 2243 }
2244 2244
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index b6ad0de07930..12d08b4529e9 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -826,8 +826,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
826 if ((bio->bi_sector & 7) != 0 || (bio->bi_size & 4095) != 0) 826 if ((bio->bi_sector & 7) != 0 || (bio->bi_size & 4095) != 0)
827 /* Request is not page-aligned. */ 827 /* Request is not page-aligned. */
828 goto fail; 828 goto fail;
829 if (((bio->bi_size >> 9) + bio->bi_sector) 829 if (bio_end_sector(bio) > get_capacity(bio->bi_bdev->bd_disk)) {
830 > get_capacity(bio->bi_bdev->bd_disk)) {
831 /* Request beyond end of DCSS segment. */ 830 /* Request beyond end of DCSS segment. */
832 goto fail; 831 goto fail;
833 } 832 }
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index aec2e0da5016..7af776737b40 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -2151,10 +2151,10 @@ int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
2151 } 2151 }
2152 2152
2153 /* do we need to support multiple segments? */ 2153 /* do we need to support multiple segments? */
2154 if (req->bio->bi_vcnt > 1 || rsp->bio->bi_vcnt > 1) { 2154 if (bio_segments(req->bio) > 1 || bio_segments(rsp->bio) > 1) {
2155 printk("%s: multiple segments req %u %u, rsp %u %u\n", 2155 printk("%s: multiple segments req %u %u, rsp %u %u\n",
2156 __func__, req->bio->bi_vcnt, blk_rq_bytes(req), 2156 __func__, bio_segments(req->bio), blk_rq_bytes(req),
2157 rsp->bio->bi_vcnt, blk_rq_bytes(rsp)); 2157 bio_segments(rsp->bio), blk_rq_bytes(rsp));
2158 return -EINVAL; 2158 return -EINVAL;
2159 } 2159 }
2160 2160
diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c
index 8c2ffbe6af0f..193e7ae90c3b 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_transport.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c
@@ -1939,7 +1939,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
1939 ioc->transport_cmds.status = MPT2_CMD_PENDING; 1939 ioc->transport_cmds.status = MPT2_CMD_PENDING;
1940 1940
1941 /* Check if the request is split across multiple segments */ 1941 /* Check if the request is split across multiple segments */
1942 if (req->bio->bi_vcnt > 1) { 1942 if (bio_segments(req->bio) > 1) {
1943 u32 offset = 0; 1943 u32 offset = 0;
1944 1944
1945 /* Allocate memory and copy the request */ 1945 /* Allocate memory and copy the request */
@@ -1971,7 +1971,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
1971 1971
1972 /* Check if the response needs to be populated across 1972 /* Check if the response needs to be populated across
1973 * multiple segments */ 1973 * multiple segments */
1974 if (rsp->bio->bi_vcnt > 1) { 1974 if (bio_segments(rsp->bio) > 1) {
1975 pci_addr_in = pci_alloc_consistent(ioc->pdev, blk_rq_bytes(rsp), 1975 pci_addr_in = pci_alloc_consistent(ioc->pdev, blk_rq_bytes(rsp),
1976 &pci_dma_in); 1976 &pci_dma_in);
1977 if (!pci_addr_in) { 1977 if (!pci_addr_in) {
@@ -2038,7 +2038,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
2038 sgl_flags = (MPI2_SGE_FLAGS_SIMPLE_ELEMENT | 2038 sgl_flags = (MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
2039 MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC); 2039 MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC);
2040 sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT; 2040 sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
2041 if (req->bio->bi_vcnt > 1) { 2041 if (bio_segments(req->bio) > 1) {
2042 ioc->base_add_sg_single(psge, sgl_flags | 2042 ioc->base_add_sg_single(psge, sgl_flags |
2043 (blk_rq_bytes(req) - 4), pci_dma_out); 2043 (blk_rq_bytes(req) - 4), pci_dma_out);
2044 } else { 2044 } else {
@@ -2054,7 +2054,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
2054 MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | 2054 MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER |
2055 MPI2_SGE_FLAGS_END_OF_LIST); 2055 MPI2_SGE_FLAGS_END_OF_LIST);
2056 sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT; 2056 sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
2057 if (rsp->bio->bi_vcnt > 1) { 2057 if (bio_segments(rsp->bio) > 1) {
2058 ioc->base_add_sg_single(psge, sgl_flags | 2058 ioc->base_add_sg_single(psge, sgl_flags |
2059 (blk_rq_bytes(rsp) + 4), pci_dma_in); 2059 (blk_rq_bytes(rsp) + 4), pci_dma_in);
2060 } else { 2060 } else {
@@ -2099,7 +2099,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
2099 le16_to_cpu(mpi_reply->ResponseDataLength); 2099 le16_to_cpu(mpi_reply->ResponseDataLength);
2100 /* check if the resp needs to be copied from the allocated 2100 /* check if the resp needs to be copied from the allocated
2101 * pci mem */ 2101 * pci mem */
2102 if (rsp->bio->bi_vcnt > 1) { 2102 if (bio_segments(rsp->bio) > 1) {
2103 u32 offset = 0; 2103 u32 offset = 0;
2104 u32 bytes_to_copy = 2104 u32 bytes_to_copy =
2105 le16_to_cpu(mpi_reply->ResponseDataLength); 2105 le16_to_cpu(mpi_reply->ResponseDataLength);
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index a3f28f331b2b..8fb42916d8a2 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -27,48 +27,11 @@
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29 29
30struct integrity_slab { 30#define BIP_INLINE_VECS 4
31 struct kmem_cache *slab;
32 unsigned short nr_vecs;
33 char name[8];
34};
35
36#define IS(x) { .nr_vecs = x, .name = "bip-"__stringify(x) }
37struct integrity_slab bip_slab[BIOVEC_NR_POOLS] __read_mostly = {
38 IS(1), IS(4), IS(16), IS(64), IS(128), IS(BIO_MAX_PAGES),
39};
40#undef IS
41 31
32static struct kmem_cache *bip_slab;
42static struct workqueue_struct *kintegrityd_wq; 33static struct workqueue_struct *kintegrityd_wq;
43 34
44static inline unsigned int vecs_to_idx(unsigned int nr)
45{
46 switch (nr) {
47 case 1:
48 return 0;
49 case 2 ... 4:
50 return 1;
51 case 5 ... 16:
52 return 2;
53 case 17 ... 64:
54 return 3;
55 case 65 ... 128:
56 return 4;
57 case 129 ... BIO_MAX_PAGES:
58 return 5;
59 default:
60 BUG();
61 }
62}
63
64static inline int use_bip_pool(unsigned int idx)
65{
66 if (idx == BIOVEC_MAX_IDX)
67 return 1;
68
69 return 0;
70}
71
72/** 35/**
73 * bio_integrity_alloc - Allocate integrity payload and attach it to bio 36 * bio_integrity_alloc - Allocate integrity payload and attach it to bio
74 * @bio: bio to attach integrity metadata to 37 * @bio: bio to attach integrity metadata to
@@ -84,37 +47,41 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
84 unsigned int nr_vecs) 47 unsigned int nr_vecs)
85{ 48{
86 struct bio_integrity_payload *bip; 49 struct bio_integrity_payload *bip;
87 unsigned int idx = vecs_to_idx(nr_vecs);
88 struct bio_set *bs = bio->bi_pool; 50 struct bio_set *bs = bio->bi_pool;
89 51 unsigned long idx = BIO_POOL_NONE;
90 if (!bs) 52 unsigned inline_vecs;
91 bs = fs_bio_set; 53
92 54 if (!bs) {
93 BUG_ON(bio == NULL); 55 bip = kmalloc(sizeof(struct bio_integrity_payload) +
94 bip = NULL; 56 sizeof(struct bio_vec) * nr_vecs, gfp_mask);
95 57 inline_vecs = nr_vecs;
96 /* Lower order allocations come straight from slab */ 58 } else {
97 if (!use_bip_pool(idx))
98 bip = kmem_cache_alloc(bip_slab[idx].slab, gfp_mask);
99
100 /* Use mempool if lower order alloc failed or max vecs were requested */
101 if (bip == NULL) {
102 idx = BIOVEC_MAX_IDX; /* so we free the payload properly later */
103 bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); 59 bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
104 60 inline_vecs = BIP_INLINE_VECS;
105 if (unlikely(bip == NULL)) {
106 printk(KERN_ERR "%s: could not alloc bip\n", __func__);
107 return NULL;
108 }
109 } 61 }
110 62
63 if (unlikely(!bip))
64 return NULL;
65
111 memset(bip, 0, sizeof(*bip)); 66 memset(bip, 0, sizeof(*bip));
112 67
68 if (nr_vecs > inline_vecs) {
69 bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx,
70 bs->bvec_integrity_pool);
71 if (!bip->bip_vec)
72 goto err;
73 } else {
74 bip->bip_vec = bip->bip_inline_vecs;
75 }
76
113 bip->bip_slab = idx; 77 bip->bip_slab = idx;
114 bip->bip_bio = bio; 78 bip->bip_bio = bio;
115 bio->bi_integrity = bip; 79 bio->bi_integrity = bip;
116 80
117 return bip; 81 return bip;
82err:
83 mempool_free(bip, bs->bio_integrity_pool);
84 return NULL;
118} 85}
119EXPORT_SYMBOL(bio_integrity_alloc); 86EXPORT_SYMBOL(bio_integrity_alloc);
120 87
@@ -130,20 +97,18 @@ void bio_integrity_free(struct bio *bio)
130 struct bio_integrity_payload *bip = bio->bi_integrity; 97 struct bio_integrity_payload *bip = bio->bi_integrity;
131 struct bio_set *bs = bio->bi_pool; 98 struct bio_set *bs = bio->bi_pool;
132 99
133 if (!bs) 100 if (bip->bip_owns_buf)
134 bs = fs_bio_set;
135
136 BUG_ON(bip == NULL);
137
138 /* A cloned bio doesn't own the integrity metadata */
139 if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY)
140 && bip->bip_buf != NULL)
141 kfree(bip->bip_buf); 101 kfree(bip->bip_buf);
142 102
143 if (use_bip_pool(bip->bip_slab)) 103 if (bs) {
104 if (bip->bip_slab != BIO_POOL_NONE)
105 bvec_free(bs->bvec_integrity_pool, bip->bip_vec,
106 bip->bip_slab);
107
144 mempool_free(bip, bs->bio_integrity_pool); 108 mempool_free(bip, bs->bio_integrity_pool);
145 else 109 } else {
146 kmem_cache_free(bip_slab[bip->bip_slab].slab, bip); 110 kfree(bip);
111 }
147 112
148 bio->bi_integrity = NULL; 113 bio->bi_integrity = NULL;
149} 114}
@@ -419,6 +384,7 @@ int bio_integrity_prep(struct bio *bio)
419 return -EIO; 384 return -EIO;
420 } 385 }
421 386
387 bip->bip_owns_buf = 1;
422 bip->bip_buf = buf; 388 bip->bip_buf = buf;
423 bip->bip_size = len; 389 bip->bip_size = len;
424 bip->bip_sector = bio->bi_sector; 390 bip->bip_sector = bio->bi_sector;
@@ -694,11 +660,11 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
694 bp->bio1.bi_integrity = &bp->bip1; 660 bp->bio1.bi_integrity = &bp->bip1;
695 bp->bio2.bi_integrity = &bp->bip2; 661 bp->bio2.bi_integrity = &bp->bip2;
696 662
697 bp->iv1 = bip->bip_vec[0]; 663 bp->iv1 = bip->bip_vec[bip->bip_idx];
698 bp->iv2 = bip->bip_vec[0]; 664 bp->iv2 = bip->bip_vec[bip->bip_idx];
699 665
700 bp->bip1.bip_vec[0] = bp->iv1; 666 bp->bip1.bip_vec = &bp->iv1;
701 bp->bip2.bip_vec[0] = bp->iv2; 667 bp->bip2.bip_vec = &bp->iv2;
702 668
703 bp->iv1.bv_len = sectors * bi->tuple_size; 669 bp->iv1.bv_len = sectors * bi->tuple_size;
704 bp->iv2.bv_offset += sectors * bi->tuple_size; 670 bp->iv2.bv_offset += sectors * bi->tuple_size;
@@ -746,13 +712,14 @@ EXPORT_SYMBOL(bio_integrity_clone);
746 712
747int bioset_integrity_create(struct bio_set *bs, int pool_size) 713int bioset_integrity_create(struct bio_set *bs, int pool_size)
748{ 714{
749 unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES);
750
751 if (bs->bio_integrity_pool) 715 if (bs->bio_integrity_pool)
752 return 0; 716 return 0;
753 717
754 bs->bio_integrity_pool = 718 bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab);
755 mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab); 719
720 bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size);
721 if (!bs->bvec_integrity_pool)
722 return -1;
756 723
757 if (!bs->bio_integrity_pool) 724 if (!bs->bio_integrity_pool)
758 return -1; 725 return -1;
@@ -765,13 +732,14 @@ void bioset_integrity_free(struct bio_set *bs)
765{ 732{
766 if (bs->bio_integrity_pool) 733 if (bs->bio_integrity_pool)
767 mempool_destroy(bs->bio_integrity_pool); 734 mempool_destroy(bs->bio_integrity_pool);
735
736 if (bs->bvec_integrity_pool)
737 mempool_destroy(bs->bio_integrity_pool);
768} 738}
769EXPORT_SYMBOL(bioset_integrity_free); 739EXPORT_SYMBOL(bioset_integrity_free);
770 740
771void __init bio_integrity_init(void) 741void __init bio_integrity_init(void)
772{ 742{
773 unsigned int i;
774
775 /* 743 /*
776 * kintegrityd won't block much but may burn a lot of CPU cycles. 744 * kintegrityd won't block much but may burn a lot of CPU cycles.
777 * Make it highpri CPU intensive wq with max concurrency of 1. 745 * Make it highpri CPU intensive wq with max concurrency of 1.
@@ -781,14 +749,10 @@ void __init bio_integrity_init(void)
781 if (!kintegrityd_wq) 749 if (!kintegrityd_wq)
782 panic("Failed to create kintegrityd\n"); 750 panic("Failed to create kintegrityd\n");
783 751
784 for (i = 0 ; i < BIOVEC_NR_POOLS ; i++) { 752 bip_slab = kmem_cache_create("bio_integrity_payload",
785 unsigned int size; 753 sizeof(struct bio_integrity_payload) +
786 754 sizeof(struct bio_vec) * BIP_INLINE_VECS,
787 size = sizeof(struct bio_integrity_payload) 755 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
788 + bip_slab[i].nr_vecs * sizeof(struct bio_vec); 756 if (!bip_slab)
789 757 panic("Failed to create slab\n");
790 bip_slab[i].slab =
791 kmem_cache_create(bip_slab[i].name, size, 0,
792 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
793 }
794} 758}
diff --git a/fs/bio.c b/fs/bio.c
index bb5768f59b32..9238a54b562c 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -160,12 +160,12 @@ unsigned int bvec_nr_vecs(unsigned short idx)
160 return bvec_slabs[idx].nr_vecs; 160 return bvec_slabs[idx].nr_vecs;
161} 161}
162 162
163void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx) 163void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx)
164{ 164{
165 BIO_BUG_ON(idx >= BIOVEC_NR_POOLS); 165 BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
166 166
167 if (idx == BIOVEC_MAX_IDX) 167 if (idx == BIOVEC_MAX_IDX)
168 mempool_free(bv, bs->bvec_pool); 168 mempool_free(bv, pool);
169 else { 169 else {
170 struct biovec_slab *bvs = bvec_slabs + idx; 170 struct biovec_slab *bvs = bvec_slabs + idx;
171 171
@@ -173,8 +173,8 @@ void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
173 } 173 }
174} 174}
175 175
176struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, 176struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx,
177 struct bio_set *bs) 177 mempool_t *pool)
178{ 178{
179 struct bio_vec *bvl; 179 struct bio_vec *bvl;
180 180
@@ -210,7 +210,7 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
210 */ 210 */
211 if (*idx == BIOVEC_MAX_IDX) { 211 if (*idx == BIOVEC_MAX_IDX) {
212fallback: 212fallback:
213 bvl = mempool_alloc(bs->bvec_pool, gfp_mask); 213 bvl = mempool_alloc(pool, gfp_mask);
214 } else { 214 } else {
215 struct biovec_slab *bvs = bvec_slabs + *idx; 215 struct biovec_slab *bvs = bvec_slabs + *idx;
216 gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO); 216 gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
@@ -252,8 +252,8 @@ static void bio_free(struct bio *bio)
252 __bio_free(bio); 252 __bio_free(bio);
253 253
254 if (bs) { 254 if (bs) {
255 if (bio_has_allocated_vec(bio)) 255 if (bio_flagged(bio, BIO_OWNS_VEC))
256 bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); 256 bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio));
257 257
258 /* 258 /*
259 * If we have front padding, adjust the bio pointer before freeing 259 * If we have front padding, adjust the bio pointer before freeing
@@ -297,6 +297,54 @@ void bio_reset(struct bio *bio)
297} 297}
298EXPORT_SYMBOL(bio_reset); 298EXPORT_SYMBOL(bio_reset);
299 299
300static void bio_alloc_rescue(struct work_struct *work)
301{
302 struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
303 struct bio *bio;
304
305 while (1) {
306 spin_lock(&bs->rescue_lock);
307 bio = bio_list_pop(&bs->rescue_list);
308 spin_unlock(&bs->rescue_lock);
309
310 if (!bio)
311 break;
312
313 generic_make_request(bio);
314 }
315}
316
317static void punt_bios_to_rescuer(struct bio_set *bs)
318{
319 struct bio_list punt, nopunt;
320 struct bio *bio;
321
322 /*
323 * In order to guarantee forward progress we must punt only bios that
324 * were allocated from this bio_set; otherwise, if there was a bio on
325 * there for a stacking driver higher up in the stack, processing it
326 * could require allocating bios from this bio_set, and doing that from
327 * our own rescuer would be bad.
328 *
329 * Since bio lists are singly linked, pop them all instead of trying to
330 * remove from the middle of the list:
331 */
332
333 bio_list_init(&punt);
334 bio_list_init(&nopunt);
335
336 while ((bio = bio_list_pop(current->bio_list)))
337 bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
338
339 *current->bio_list = nopunt;
340
341 spin_lock(&bs->rescue_lock);
342 bio_list_merge(&bs->rescue_list, &punt);
343 spin_unlock(&bs->rescue_lock);
344
345 queue_work(bs->rescue_workqueue, &bs->rescue_work);
346}
347
300/** 348/**
301 * bio_alloc_bioset - allocate a bio for I/O 349 * bio_alloc_bioset - allocate a bio for I/O
302 * @gfp_mask: the GFP_ mask given to the slab allocator 350 * @gfp_mask: the GFP_ mask given to the slab allocator
@@ -314,11 +362,27 @@ EXPORT_SYMBOL(bio_reset);
314 * previously allocated bio for IO before attempting to allocate a new one. 362 * previously allocated bio for IO before attempting to allocate a new one.
315 * Failure to do so can cause deadlocks under memory pressure. 363 * Failure to do so can cause deadlocks under memory pressure.
316 * 364 *
365 * Note that when running under generic_make_request() (i.e. any block
366 * driver), bios are not submitted until after you return - see the code in
367 * generic_make_request() that converts recursion into iteration, to prevent
368 * stack overflows.
369 *
370 * This would normally mean allocating multiple bios under
371 * generic_make_request() would be susceptible to deadlocks, but we have
372 * deadlock avoidance code that resubmits any blocked bios from a rescuer
373 * thread.
374 *
375 * However, we do not guarantee forward progress for allocations from other
376 * mempools. Doing multiple allocations from the same mempool under
377 * generic_make_request() should be avoided - instead, use bio_set's front_pad
378 * for per bio allocations.
379 *
317 * RETURNS: 380 * RETURNS:
318 * Pointer to new bio on success, NULL on failure. 381 * Pointer to new bio on success, NULL on failure.
319 */ 382 */
320struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) 383struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
321{ 384{
385 gfp_t saved_gfp = gfp_mask;
322 unsigned front_pad; 386 unsigned front_pad;
323 unsigned inline_vecs; 387 unsigned inline_vecs;
324 unsigned long idx = BIO_POOL_NONE; 388 unsigned long idx = BIO_POOL_NONE;
@@ -336,7 +400,37 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
336 front_pad = 0; 400 front_pad = 0;
337 inline_vecs = nr_iovecs; 401 inline_vecs = nr_iovecs;
338 } else { 402 } else {
403 /*
404 * generic_make_request() converts recursion to iteration; this
405 * means if we're running beneath it, any bios we allocate and
406 * submit will not be submitted (and thus freed) until after we
407 * return.
408 *
409 * This exposes us to a potential deadlock if we allocate
410 * multiple bios from the same bio_set() while running
411 * underneath generic_make_request(). If we were to allocate
412 * multiple bios (say a stacking block driver that was splitting
413 * bios), we would deadlock if we exhausted the mempool's
414 * reserve.
415 *
416 * We solve this, and guarantee forward progress, with a rescuer
417 * workqueue per bio_set. If we go to allocate and there are
418 * bios on current->bio_list, we first try the allocation
419 * without __GFP_WAIT; if that fails, we punt those bios we
420 * would be blocking to the rescuer workqueue before we retry
421 * with the original gfp_flags.
422 */
423
424 if (current->bio_list && !bio_list_empty(current->bio_list))
425 gfp_mask &= ~__GFP_WAIT;
426
339 p = mempool_alloc(bs->bio_pool, gfp_mask); 427 p = mempool_alloc(bs->bio_pool, gfp_mask);
428 if (!p && gfp_mask != saved_gfp) {
429 punt_bios_to_rescuer(bs);
430 gfp_mask = saved_gfp;
431 p = mempool_alloc(bs->bio_pool, gfp_mask);
432 }
433
340 front_pad = bs->front_pad; 434 front_pad = bs->front_pad;
341 inline_vecs = BIO_INLINE_VECS; 435 inline_vecs = BIO_INLINE_VECS;
342 } 436 }
@@ -348,9 +442,17 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
348 bio_init(bio); 442 bio_init(bio);
349 443
350 if (nr_iovecs > inline_vecs) { 444 if (nr_iovecs > inline_vecs) {
351 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); 445 bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
446 if (!bvl && gfp_mask != saved_gfp) {
447 punt_bios_to_rescuer(bs);
448 gfp_mask = saved_gfp;
449 bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
450 }
451
352 if (unlikely(!bvl)) 452 if (unlikely(!bvl))
353 goto err_free; 453 goto err_free;
454
455 bio->bi_flags |= 1 << BIO_OWNS_VEC;
354 } else if (nr_iovecs) { 456 } else if (nr_iovecs) {
355 bvl = bio->bi_inline_vecs; 457 bvl = bio->bi_inline_vecs;
356 } 458 }
@@ -652,6 +754,181 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
652} 754}
653EXPORT_SYMBOL(bio_add_page); 755EXPORT_SYMBOL(bio_add_page);
654 756
757struct submit_bio_ret {
758 struct completion event;
759 int error;
760};
761
762static void submit_bio_wait_endio(struct bio *bio, int error)
763{
764 struct submit_bio_ret *ret = bio->bi_private;
765
766 ret->error = error;
767 complete(&ret->event);
768}
769
770/**
771 * submit_bio_wait - submit a bio, and wait until it completes
772 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
773 * @bio: The &struct bio which describes the I/O
774 *
775 * Simple wrapper around submit_bio(). Returns 0 on success, or the error from
776 * bio_endio() on failure.
777 */
778int submit_bio_wait(int rw, struct bio *bio)
779{
780 struct submit_bio_ret ret;
781
782 rw |= REQ_SYNC;
783 init_completion(&ret.event);
784 bio->bi_private = &ret;
785 bio->bi_end_io = submit_bio_wait_endio;
786 submit_bio(rw, bio);
787 wait_for_completion(&ret.event);
788
789 return ret.error;
790}
791EXPORT_SYMBOL(submit_bio_wait);
792
793/**
794 * bio_advance - increment/complete a bio by some number of bytes
795 * @bio: bio to advance
796 * @bytes: number of bytes to complete
797 *
798 * This updates bi_sector, bi_size and bi_idx; if the number of bytes to
799 * complete doesn't align with a bvec boundary, then bv_len and bv_offset will
800 * be updated on the last bvec as well.
801 *
802 * @bio will then represent the remaining, uncompleted portion of the io.
803 */
804void bio_advance(struct bio *bio, unsigned bytes)
805{
806 if (bio_integrity(bio))
807 bio_integrity_advance(bio, bytes);
808
809 bio->bi_sector += bytes >> 9;
810 bio->bi_size -= bytes;
811
812 if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK)
813 return;
814
815 while (bytes) {
816 if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
817 WARN_ONCE(1, "bio idx %d >= vcnt %d\n",
818 bio->bi_idx, bio->bi_vcnt);
819 break;
820 }
821
822 if (bytes >= bio_iovec(bio)->bv_len) {
823 bytes -= bio_iovec(bio)->bv_len;
824 bio->bi_idx++;
825 } else {
826 bio_iovec(bio)->bv_len -= bytes;
827 bio_iovec(bio)->bv_offset += bytes;
828 bytes = 0;
829 }
830 }
831}
832EXPORT_SYMBOL(bio_advance);
833
834/**
835 * bio_alloc_pages - allocates a single page for each bvec in a bio
836 * @bio: bio to allocate pages for
837 * @gfp_mask: flags for allocation
838 *
839 * Allocates pages up to @bio->bi_vcnt.
840 *
841 * Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are
842 * freed.
843 */
844int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask)
845{
846 int i;
847 struct bio_vec *bv;
848
849 bio_for_each_segment_all(bv, bio, i) {
850 bv->bv_page = alloc_page(gfp_mask);
851 if (!bv->bv_page) {
852 while (--bv >= bio->bi_io_vec)
853 __free_page(bv->bv_page);
854 return -ENOMEM;
855 }
856 }
857
858 return 0;
859}
860EXPORT_SYMBOL(bio_alloc_pages);
861
862/**
863 * bio_copy_data - copy contents of data buffers from one chain of bios to
864 * another
865 * @src: source bio list
866 * @dst: destination bio list
867 *
868 * If @src and @dst are single bios, bi_next must be NULL - otherwise, treats
869 * @src and @dst as linked lists of bios.
870 *
871 * Stops when it reaches the end of either @src or @dst - that is, copies
872 * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
873 */
874void bio_copy_data(struct bio *dst, struct bio *src)
875{
876 struct bio_vec *src_bv, *dst_bv;
877 unsigned src_offset, dst_offset, bytes;
878 void *src_p, *dst_p;
879
880 src_bv = bio_iovec(src);
881 dst_bv = bio_iovec(dst);
882
883 src_offset = src_bv->bv_offset;
884 dst_offset = dst_bv->bv_offset;
885
886 while (1) {
887 if (src_offset == src_bv->bv_offset + src_bv->bv_len) {
888 src_bv++;
889 if (src_bv == bio_iovec_idx(src, src->bi_vcnt)) {
890 src = src->bi_next;
891 if (!src)
892 break;
893
894 src_bv = bio_iovec(src);
895 }
896
897 src_offset = src_bv->bv_offset;
898 }
899
900 if (dst_offset == dst_bv->bv_offset + dst_bv->bv_len) {
901 dst_bv++;
902 if (dst_bv == bio_iovec_idx(dst, dst->bi_vcnt)) {
903 dst = dst->bi_next;
904 if (!dst)
905 break;
906
907 dst_bv = bio_iovec(dst);
908 }
909
910 dst_offset = dst_bv->bv_offset;
911 }
912
913 bytes = min(dst_bv->bv_offset + dst_bv->bv_len - dst_offset,
914 src_bv->bv_offset + src_bv->bv_len - src_offset);
915
916 src_p = kmap_atomic(src_bv->bv_page);
917 dst_p = kmap_atomic(dst_bv->bv_page);
918
919 memcpy(dst_p + dst_bv->bv_offset,
920 src_p + src_bv->bv_offset,
921 bytes);
922
923 kunmap_atomic(dst_p);
924 kunmap_atomic(src_p);
925
926 src_offset += bytes;
927 dst_offset += bytes;
928 }
929}
930EXPORT_SYMBOL(bio_copy_data);
931
655struct bio_map_data { 932struct bio_map_data {
656 struct bio_vec *iovecs; 933 struct bio_vec *iovecs;
657 struct sg_iovec *sgvecs; 934 struct sg_iovec *sgvecs;
@@ -714,7 +991,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
714 int iov_idx = 0; 991 int iov_idx = 0;
715 unsigned int iov_off = 0; 992 unsigned int iov_off = 0;
716 993
717 __bio_for_each_segment(bvec, bio, i, 0) { 994 bio_for_each_segment_all(bvec, bio, i) {
718 char *bv_addr = page_address(bvec->bv_page); 995 char *bv_addr = page_address(bvec->bv_page);
719 unsigned int bv_len = iovecs[i].bv_len; 996 unsigned int bv_len = iovecs[i].bv_len;
720 997
@@ -896,7 +1173,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
896 return bio; 1173 return bio;
897cleanup: 1174cleanup:
898 if (!map_data) 1175 if (!map_data)
899 bio_for_each_segment(bvec, bio, i) 1176 bio_for_each_segment_all(bvec, bio, i)
900 __free_page(bvec->bv_page); 1177 __free_page(bvec->bv_page);
901 1178
902 bio_put(bio); 1179 bio_put(bio);
@@ -1110,7 +1387,7 @@ static void __bio_unmap_user(struct bio *bio)
1110 /* 1387 /*
1111 * make sure we dirty pages we wrote to 1388 * make sure we dirty pages we wrote to
1112 */ 1389 */
1113 __bio_for_each_segment(bvec, bio, i, 0) { 1390 bio_for_each_segment_all(bvec, bio, i) {
1114 if (bio_data_dir(bio) == READ) 1391 if (bio_data_dir(bio) == READ)
1115 set_page_dirty_lock(bvec->bv_page); 1392 set_page_dirty_lock(bvec->bv_page);
1116 1393
@@ -1216,7 +1493,7 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
1216 int i; 1493 int i;
1217 char *p = bmd->sgvecs[0].iov_base; 1494 char *p = bmd->sgvecs[0].iov_base;
1218 1495
1219 __bio_for_each_segment(bvec, bio, i, 0) { 1496 bio_for_each_segment_all(bvec, bio, i) {
1220 char *addr = page_address(bvec->bv_page); 1497 char *addr = page_address(bvec->bv_page);
1221 int len = bmd->iovecs[i].bv_len; 1498 int len = bmd->iovecs[i].bv_len;
1222 1499
@@ -1256,7 +1533,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
1256 if (!reading) { 1533 if (!reading) {
1257 void *p = data; 1534 void *p = data;
1258 1535
1259 bio_for_each_segment(bvec, bio, i) { 1536 bio_for_each_segment_all(bvec, bio, i) {
1260 char *addr = page_address(bvec->bv_page); 1537 char *addr = page_address(bvec->bv_page);
1261 1538
1262 memcpy(addr, p, bvec->bv_len); 1539 memcpy(addr, p, bvec->bv_len);
@@ -1301,11 +1578,11 @@ EXPORT_SYMBOL(bio_copy_kern);
1301 */ 1578 */
1302void bio_set_pages_dirty(struct bio *bio) 1579void bio_set_pages_dirty(struct bio *bio)
1303{ 1580{
1304 struct bio_vec *bvec = bio->bi_io_vec; 1581 struct bio_vec *bvec;
1305 int i; 1582 int i;
1306 1583
1307 for (i = 0; i < bio->bi_vcnt; i++) { 1584 bio_for_each_segment_all(bvec, bio, i) {
1308 struct page *page = bvec[i].bv_page; 1585 struct page *page = bvec->bv_page;
1309 1586
1310 if (page && !PageCompound(page)) 1587 if (page && !PageCompound(page))
1311 set_page_dirty_lock(page); 1588 set_page_dirty_lock(page);
@@ -1314,11 +1591,11 @@ void bio_set_pages_dirty(struct bio *bio)
1314 1591
1315static void bio_release_pages(struct bio *bio) 1592static void bio_release_pages(struct bio *bio)
1316{ 1593{
1317 struct bio_vec *bvec = bio->bi_io_vec; 1594 struct bio_vec *bvec;
1318 int i; 1595 int i;
1319 1596
1320 for (i = 0; i < bio->bi_vcnt; i++) { 1597 bio_for_each_segment_all(bvec, bio, i) {
1321 struct page *page = bvec[i].bv_page; 1598 struct page *page = bvec->bv_page;
1322 1599
1323 if (page) 1600 if (page)
1324 put_page(page); 1601 put_page(page);
@@ -1367,16 +1644,16 @@ static void bio_dirty_fn(struct work_struct *work)
1367 1644
1368void bio_check_pages_dirty(struct bio *bio) 1645void bio_check_pages_dirty(struct bio *bio)
1369{ 1646{
1370 struct bio_vec *bvec = bio->bi_io_vec; 1647 struct bio_vec *bvec;
1371 int nr_clean_pages = 0; 1648 int nr_clean_pages = 0;
1372 int i; 1649 int i;
1373 1650
1374 for (i = 0; i < bio->bi_vcnt; i++) { 1651 bio_for_each_segment_all(bvec, bio, i) {
1375 struct page *page = bvec[i].bv_page; 1652 struct page *page = bvec->bv_page;
1376 1653
1377 if (PageDirty(page) || PageCompound(page)) { 1654 if (PageDirty(page) || PageCompound(page)) {
1378 page_cache_release(page); 1655 page_cache_release(page);
1379 bvec[i].bv_page = NULL; 1656 bvec->bv_page = NULL;
1380 } else { 1657 } else {
1381 nr_clean_pages++; 1658 nr_clean_pages++;
1382 } 1659 }
@@ -1479,8 +1756,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
1479 trace_block_split(bdev_get_queue(bi->bi_bdev), bi, 1756 trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
1480 bi->bi_sector + first_sectors); 1757 bi->bi_sector + first_sectors);
1481 1758
1482 BUG_ON(bi->bi_vcnt != 1 && bi->bi_vcnt != 0); 1759 BUG_ON(bio_segments(bi) > 1);
1483 BUG_ON(bi->bi_idx != 0);
1484 atomic_set(&bp->cnt, 3); 1760 atomic_set(&bp->cnt, 3);
1485 bp->error = 0; 1761 bp->error = 0;
1486 bp->bio1 = *bi; 1762 bp->bio1 = *bi;
@@ -1490,8 +1766,8 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
1490 bp->bio1.bi_size = first_sectors << 9; 1766 bp->bio1.bi_size = first_sectors << 9;
1491 1767
1492 if (bi->bi_vcnt != 0) { 1768 if (bi->bi_vcnt != 0) {
1493 bp->bv1 = bi->bi_io_vec[0]; 1769 bp->bv1 = *bio_iovec(bi);
1494 bp->bv2 = bi->bi_io_vec[0]; 1770 bp->bv2 = *bio_iovec(bi);
1495 1771
1496 if (bio_is_rw(bi)) { 1772 if (bio_is_rw(bi)) {
1497 bp->bv2.bv_offset += first_sectors << 9; 1773 bp->bv2.bv_offset += first_sectors << 9;
@@ -1543,7 +1819,7 @@ sector_t bio_sector_offset(struct bio *bio, unsigned short index,
1543 if (index >= bio->bi_idx) 1819 if (index >= bio->bi_idx)
1544 index = bio->bi_vcnt - 1; 1820 index = bio->bi_vcnt - 1;
1545 1821
1546 __bio_for_each_segment(bv, bio, i, 0) { 1822 bio_for_each_segment_all(bv, bio, i) {
1547 if (i == index) { 1823 if (i == index) {
1548 if (offset > bv->bv_offset) 1824 if (offset > bv->bv_offset)
1549 sectors += (offset - bv->bv_offset) / sector_sz; 1825 sectors += (offset - bv->bv_offset) / sector_sz;
@@ -1561,29 +1837,25 @@ EXPORT_SYMBOL(bio_sector_offset);
1561 * create memory pools for biovec's in a bio_set. 1837 * create memory pools for biovec's in a bio_set.
1562 * use the global biovec slabs created for general use. 1838 * use the global biovec slabs created for general use.
1563 */ 1839 */
1564static int biovec_create_pools(struct bio_set *bs, int pool_entries) 1840mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries)
1565{ 1841{
1566 struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX; 1842 struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
1567 1843
1568 bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab); 1844 return mempool_create_slab_pool(pool_entries, bp->slab);
1569 if (!bs->bvec_pool)
1570 return -ENOMEM;
1571
1572 return 0;
1573}
1574
1575static void biovec_free_pools(struct bio_set *bs)
1576{
1577 mempool_destroy(bs->bvec_pool);
1578} 1845}
1579 1846
1580void bioset_free(struct bio_set *bs) 1847void bioset_free(struct bio_set *bs)
1581{ 1848{
1849 if (bs->rescue_workqueue)
1850 destroy_workqueue(bs->rescue_workqueue);
1851
1582 if (bs->bio_pool) 1852 if (bs->bio_pool)
1583 mempool_destroy(bs->bio_pool); 1853 mempool_destroy(bs->bio_pool);
1584 1854
1855 if (bs->bvec_pool)
1856 mempool_destroy(bs->bvec_pool);
1857
1585 bioset_integrity_free(bs); 1858 bioset_integrity_free(bs);
1586 biovec_free_pools(bs);
1587 bio_put_slab(bs); 1859 bio_put_slab(bs);
1588 1860
1589 kfree(bs); 1861 kfree(bs);
@@ -1614,6 +1886,10 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1614 1886
1615 bs->front_pad = front_pad; 1887 bs->front_pad = front_pad;
1616 1888
1889 spin_lock_init(&bs->rescue_lock);
1890 bio_list_init(&bs->rescue_list);
1891 INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
1892
1617 bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad); 1893 bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
1618 if (!bs->bio_slab) { 1894 if (!bs->bio_slab) {
1619 kfree(bs); 1895 kfree(bs);
@@ -1624,9 +1900,15 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1624 if (!bs->bio_pool) 1900 if (!bs->bio_pool)
1625 goto bad; 1901 goto bad;
1626 1902
1627 if (!biovec_create_pools(bs, pool_size)) 1903 bs->bvec_pool = biovec_create_pool(bs, pool_size);
1628 return bs; 1904 if (!bs->bvec_pool)
1905 goto bad;
1906
1907 bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
1908 if (!bs->rescue_workqueue)
1909 goto bad;
1629 1910
1911 return bs;
1630bad: 1912bad:
1631 bioset_free(bs); 1913 bioset_free(bs);
1632 return NULL; 1914 return NULL;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f173c5af6461..bed072aa461f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2527,8 +2527,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
2527 if (old_compressed) 2527 if (old_compressed)
2528 contig = bio->bi_sector == sector; 2528 contig = bio->bi_sector == sector;
2529 else 2529 else
2530 contig = bio->bi_sector + (bio->bi_size >> 9) == 2530 contig = bio_end_sector(bio) == sector;
2531 sector;
2532 2531
2533 if (prev_bio_flags != bio_flags || !contig || 2532 if (prev_bio_flags != bio_flags || !contig ||
2534 merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || 2533 merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5989a92236f7..d90e0485e01b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5166,7 +5166,7 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio,
5166 } 5166 }
5167 5167
5168 prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; 5168 prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
5169 if ((bio->bi_size >> 9) > max_sectors) 5169 if (bio_sectors(bio) > max_sectors)
5170 return 0; 5170 return 0;
5171 5171
5172 if (!q->merge_bvec_fn) 5172 if (!q->merge_bvec_fn)
diff --git a/fs/buffer.c b/fs/buffer.c
index b4dcb34c9635..ecd3792ae0e9 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2979,7 +2979,6 @@ int submit_bh(int rw, struct buffer_head * bh)
2979 bio->bi_io_vec[0].bv_offset = bh_offset(bh); 2979 bio->bi_io_vec[0].bv_offset = bh_offset(bh);
2980 2980
2981 bio->bi_vcnt = 1; 2981 bio->bi_vcnt = 1;
2982 bio->bi_idx = 0;
2983 bio->bi_size = bh->b_size; 2982 bio->bi_size = bh->b_size;
2984 2983
2985 bio->bi_end_io = end_bio_bh_io_sync; 2984 bio->bi_end_io = end_bio_bh_io_sync;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index f853263cf74f..38484b08a39a 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -441,8 +441,8 @@ static struct bio *dio_await_one(struct dio *dio)
441static int dio_bio_complete(struct dio *dio, struct bio *bio) 441static int dio_bio_complete(struct dio *dio, struct bio *bio)
442{ 442{
443 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 443 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
444 struct bio_vec *bvec = bio->bi_io_vec; 444 struct bio_vec *bvec;
445 int page_no; 445 unsigned i;
446 446
447 if (!uptodate) 447 if (!uptodate)
448 dio->io_error = -EIO; 448 dio->io_error = -EIO;
@@ -450,8 +450,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
450 if (dio->is_async && dio->rw == READ) { 450 if (dio->is_async && dio->rw == READ) {
451 bio_check_pages_dirty(bio); /* transfers ownership */ 451 bio_check_pages_dirty(bio); /* transfers ownership */
452 } else { 452 } else {
453 for (page_no = 0; page_no < bio->bi_vcnt; page_no++) { 453 bio_for_each_segment_all(bvec, bio, i) {
454 struct page *page = bvec[page_no].bv_page; 454 struct page *page = bvec->bv_page;
455 455
456 if (dio->rw == READ && !PageCompound(page)) 456 if (dio->rw == READ && !PageCompound(page))
457 set_page_dirty_lock(page); 457 set_page_dirty_lock(page);
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index f936cb50dc0d..b74422888604 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -401,7 +401,7 @@ static void _clear_bio(struct bio *bio)
401 struct bio_vec *bv; 401 struct bio_vec *bv;
402 unsigned i; 402 unsigned i;
403 403
404 __bio_for_each_segment(bv, bio, i, 0) { 404 bio_for_each_segment_all(bv, bio, i) {
405 unsigned this_count = bv->bv_len; 405 unsigned this_count = bv->bv_len;
406 406
407 if (likely(PAGE_SIZE == this_count)) 407 if (likely(PAGE_SIZE == this_count))
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index b963f38ac298..7682b970d0f1 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -432,7 +432,7 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
432 if (!bio) 432 if (!bio)
433 continue; 433 continue;
434 434
435 __bio_for_each_segment(bv, bio, i, 0) { 435 bio_for_each_segment_all(bv, bio, i) {
436 struct page *page = bv->bv_page; 436 struct page *page = bv->bv_page;
437 437
438 SetPageUptodate(page); 438 SetPageUptodate(page);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index a5055977a214..5c37ef982390 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -300,7 +300,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno)
300 u64 nblk; 300 u64 nblk;
301 301
302 if (bio) { 302 if (bio) {
303 nblk = bio->bi_sector + bio_sectors(bio); 303 nblk = bio_end_sector(bio);
304 nblk >>= sdp->sd_fsb2bb_shift; 304 nblk >>= sdp->sd_fsb2bb_shift;
305 if (blkno == nblk) 305 if (blkno == nblk)
306 return bio; 306 return bio;
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 2eb952c41a69..8ae5e350da43 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2004,7 +2004,6 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
2004 bio->bi_io_vec[0].bv_offset = bp->l_offset; 2004 bio->bi_io_vec[0].bv_offset = bp->l_offset;
2005 2005
2006 bio->bi_vcnt = 1; 2006 bio->bi_vcnt = 1;
2007 bio->bi_idx = 0;
2008 bio->bi_size = LOGPSIZE; 2007 bio->bi_size = LOGPSIZE;
2009 2008
2010 bio->bi_end_io = lbmIODone; 2009 bio->bi_end_io = lbmIODone;
@@ -2145,7 +2144,6 @@ static void lbmStartIO(struct lbuf * bp)
2145 bio->bi_io_vec[0].bv_offset = bp->l_offset; 2144 bio->bi_io_vec[0].bv_offset = bp->l_offset;
2146 2145
2147 bio->bi_vcnt = 1; 2146 bio->bi_vcnt = 1;
2148 bio->bi_idx = 0;
2149 bio->bi_size = LOGPSIZE; 2147 bio->bi_size = LOGPSIZE;
2150 2148
2151 bio->bi_end_io = lbmIODone; 2149 bio->bi_end_io = lbmIODone;
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index e784a217b500..550475ca6a0e 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -32,7 +32,6 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
32 bio_vec.bv_len = PAGE_SIZE; 32 bio_vec.bv_len = PAGE_SIZE;
33 bio_vec.bv_offset = 0; 33 bio_vec.bv_offset = 0;
34 bio.bi_vcnt = 1; 34 bio.bi_vcnt = 1;
35 bio.bi_idx = 0;
36 bio.bi_size = PAGE_SIZE; 35 bio.bi_size = PAGE_SIZE;
37 bio.bi_bdev = bdev; 36 bio.bi_bdev = bdev;
38 bio.bi_sector = page->index * (PAGE_SIZE >> 9); 37 bio.bi_sector = page->index * (PAGE_SIZE >> 9);
@@ -108,7 +107,6 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
108 if (i >= max_pages) { 107 if (i >= max_pages) {
109 /* Block layer cannot split bios :( */ 108 /* Block layer cannot split bios :( */
110 bio->bi_vcnt = i; 109 bio->bi_vcnt = i;
111 bio->bi_idx = 0;
112 bio->bi_size = i * PAGE_SIZE; 110 bio->bi_size = i * PAGE_SIZE;
113 bio->bi_bdev = super->s_bdev; 111 bio->bi_bdev = super->s_bdev;
114 bio->bi_sector = ofs >> 9; 112 bio->bi_sector = ofs >> 9;
@@ -136,7 +134,6 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
136 unlock_page(page); 134 unlock_page(page);
137 } 135 }
138 bio->bi_vcnt = nr_pages; 136 bio->bi_vcnt = nr_pages;
139 bio->bi_idx = 0;
140 bio->bi_size = nr_pages * PAGE_SIZE; 137 bio->bi_size = nr_pages * PAGE_SIZE;
141 bio->bi_bdev = super->s_bdev; 138 bio->bi_bdev = super->s_bdev;
142 bio->bi_sector = ofs >> 9; 139 bio->bi_sector = ofs >> 9;
@@ -202,7 +199,6 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
202 if (i >= max_pages) { 199 if (i >= max_pages) {
203 /* Block layer cannot split bios :( */ 200 /* Block layer cannot split bios :( */
204 bio->bi_vcnt = i; 201 bio->bi_vcnt = i;
205 bio->bi_idx = 0;
206 bio->bi_size = i * PAGE_SIZE; 202 bio->bi_size = i * PAGE_SIZE;
207 bio->bi_bdev = super->s_bdev; 203 bio->bi_bdev = super->s_bdev;
208 bio->bi_sector = ofs >> 9; 204 bio->bi_sector = ofs >> 9;
@@ -224,7 +220,6 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
224 bio->bi_io_vec[i].bv_offset = 0; 220 bio->bi_io_vec[i].bv_offset = 0;
225 } 221 }
226 bio->bi_vcnt = nr_pages; 222 bio->bi_vcnt = nr_pages;
227 bio->bi_idx = 0;
228 bio->bi_size = nr_pages * PAGE_SIZE; 223 bio->bi_size = nr_pages * PAGE_SIZE;
229 bio->bi_bdev = super->s_bdev; 224 bio->bi_bdev = super->s_bdev;
230 bio->bi_sector = ofs >> 9; 225 bio->bi_sector = ofs >> 9;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 820e7aaad4fd..ef24466d8f82 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -67,6 +67,7 @@
67#define bio_offset(bio) bio_iovec((bio))->bv_offset 67#define bio_offset(bio) bio_iovec((bio))->bv_offset
68#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) 68#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx)
69#define bio_sectors(bio) ((bio)->bi_size >> 9) 69#define bio_sectors(bio) ((bio)->bi_size >> 9)
70#define bio_end_sector(bio) ((bio)->bi_sector + bio_sectors((bio)))
70 71
71static inline unsigned int bio_cur_bytes(struct bio *bio) 72static inline unsigned int bio_cur_bytes(struct bio *bio)
72{ 73{
@@ -84,11 +85,6 @@ static inline void *bio_data(struct bio *bio)
84 return NULL; 85 return NULL;
85} 86}
86 87
87static inline int bio_has_allocated_vec(struct bio *bio)
88{
89 return bio->bi_io_vec && bio->bi_io_vec != bio->bi_inline_vecs;
90}
91
92/* 88/*
93 * will die 89 * will die
94 */ 90 */
@@ -136,16 +132,27 @@ static inline int bio_has_allocated_vec(struct bio *bio)
136#define bio_io_error(bio) bio_endio((bio), -EIO) 132#define bio_io_error(bio) bio_endio((bio), -EIO)
137 133
138/* 134/*
139 * drivers should not use the __ version unless they _really_ want to 135 * drivers should not use the __ version unless they _really_ know what
140 * run through the entire bio and not just pending pieces 136 * they're doing
141 */ 137 */
142#define __bio_for_each_segment(bvl, bio, i, start_idx) \ 138#define __bio_for_each_segment(bvl, bio, i, start_idx) \
143 for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx); \ 139 for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx); \
144 i < (bio)->bi_vcnt; \ 140 i < (bio)->bi_vcnt; \
145 bvl++, i++) 141 bvl++, i++)
146 142
143/*
144 * drivers should _never_ use the all version - the bio may have been split
145 * before it got to the driver and the driver won't own all of it
146 */
147#define bio_for_each_segment_all(bvl, bio, i) \
148 for (i = 0; \
149 bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \
150 i++)
151
147#define bio_for_each_segment(bvl, bio, i) \ 152#define bio_for_each_segment(bvl, bio, i) \
148 __bio_for_each_segment(bvl, bio, i, (bio)->bi_idx) 153 for (i = (bio)->bi_idx; \
154 bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \
155 i++)
149 156
150/* 157/*
151 * get a reference to a bio, so it won't disappear. the intended use is 158 * get a reference to a bio, so it won't disappear. the intended use is
@@ -180,9 +187,12 @@ struct bio_integrity_payload {
180 unsigned short bip_slab; /* slab the bip came from */ 187 unsigned short bip_slab; /* slab the bip came from */
181 unsigned short bip_vcnt; /* # of integrity bio_vecs */ 188 unsigned short bip_vcnt; /* # of integrity bio_vecs */
182 unsigned short bip_idx; /* current bip_vec index */ 189 unsigned short bip_idx; /* current bip_vec index */
190 unsigned bip_owns_buf:1; /* should free bip_buf */
183 191
184 struct work_struct bip_work; /* I/O completion */ 192 struct work_struct bip_work; /* I/O completion */
185 struct bio_vec bip_vec[0]; /* embedded bvec array */ 193
194 struct bio_vec *bip_vec;
195 struct bio_vec bip_inline_vecs[0];/* embedded bvec array */
186}; 196};
187#endif /* CONFIG_BLK_DEV_INTEGRITY */ 197#endif /* CONFIG_BLK_DEV_INTEGRITY */
188 198
@@ -211,6 +221,7 @@ extern void bio_pair_release(struct bio_pair *dbio);
211 221
212extern struct bio_set *bioset_create(unsigned int, unsigned int); 222extern struct bio_set *bioset_create(unsigned int, unsigned int);
213extern void bioset_free(struct bio_set *); 223extern void bioset_free(struct bio_set *);
224extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries);
214 225
215extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); 226extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
216extern void bio_put(struct bio *); 227extern void bio_put(struct bio *);
@@ -245,6 +256,9 @@ extern void bio_endio(struct bio *, int);
245struct request_queue; 256struct request_queue;
246extern int bio_phys_segments(struct request_queue *, struct bio *); 257extern int bio_phys_segments(struct request_queue *, struct bio *);
247 258
259extern int submit_bio_wait(int rw, struct bio *bio);
260extern void bio_advance(struct bio *, unsigned);
261
248extern void bio_init(struct bio *); 262extern void bio_init(struct bio *);
249extern void bio_reset(struct bio *); 263extern void bio_reset(struct bio *);
250 264
@@ -279,6 +293,9 @@ static inline void bio_flush_dcache_pages(struct bio *bi)
279} 293}
280#endif 294#endif
281 295
296extern void bio_copy_data(struct bio *dst, struct bio *src);
297extern int bio_alloc_pages(struct bio *bio, gfp_t gfp);
298
282extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, 299extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *,
283 unsigned long, unsigned int, int, gfp_t); 300 unsigned long, unsigned int, int, gfp_t);
284extern struct bio *bio_copy_user_iov(struct request_queue *, 301extern struct bio *bio_copy_user_iov(struct request_queue *,
@@ -286,8 +303,8 @@ extern struct bio *bio_copy_user_iov(struct request_queue *,
286 int, int, gfp_t); 303 int, int, gfp_t);
287extern int bio_uncopy_user(struct bio *); 304extern int bio_uncopy_user(struct bio *);
288void zero_fill_bio(struct bio *bio); 305void zero_fill_bio(struct bio *bio);
289extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); 306extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *);
290extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int); 307extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int);
291extern unsigned int bvec_nr_vecs(unsigned short idx); 308extern unsigned int bvec_nr_vecs(unsigned short idx);
292 309
293#ifdef CONFIG_BLK_CGROUP 310#ifdef CONFIG_BLK_CGROUP
@@ -298,39 +315,6 @@ static inline int bio_associate_current(struct bio *bio) { return -ENOENT; }
298static inline void bio_disassociate_task(struct bio *bio) { } 315static inline void bio_disassociate_task(struct bio *bio) { }
299#endif /* CONFIG_BLK_CGROUP */ 316#endif /* CONFIG_BLK_CGROUP */
300 317
301/*
302 * bio_set is used to allow other portions of the IO system to
303 * allocate their own private memory pools for bio and iovec structures.
304 * These memory pools in turn all allocate from the bio_slab
305 * and the bvec_slabs[].
306 */
307#define BIO_POOL_SIZE 2
308#define BIOVEC_NR_POOLS 6
309#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1)
310
311struct bio_set {
312 struct kmem_cache *bio_slab;
313 unsigned int front_pad;
314
315 mempool_t *bio_pool;
316#if defined(CONFIG_BLK_DEV_INTEGRITY)
317 mempool_t *bio_integrity_pool;
318#endif
319 mempool_t *bvec_pool;
320};
321
322struct biovec_slab {
323 int nr_vecs;
324 char *name;
325 struct kmem_cache *slab;
326};
327
328/*
329 * a small number of entries is fine, not going to be performance critical.
330 * basically we just need to survive
331 */
332#define BIO_SPLIT_ENTRIES 2
333
334#ifdef CONFIG_HIGHMEM 318#ifdef CONFIG_HIGHMEM
335/* 319/*
336 * remember never ever reenable interrupts between a bvec_kmap_irq and 320 * remember never ever reenable interrupts between a bvec_kmap_irq and
@@ -527,6 +511,49 @@ static inline struct bio *bio_list_get(struct bio_list *bl)
527 return bio; 511 return bio;
528} 512}
529 513
514/*
515 * bio_set is used to allow other portions of the IO system to
516 * allocate their own private memory pools for bio and iovec structures.
517 * These memory pools in turn all allocate from the bio_slab
518 * and the bvec_slabs[].
519 */
520#define BIO_POOL_SIZE 2
521#define BIOVEC_NR_POOLS 6
522#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1)
523
524struct bio_set {
525 struct kmem_cache *bio_slab;
526 unsigned int front_pad;
527
528 mempool_t *bio_pool;
529 mempool_t *bvec_pool;
530#if defined(CONFIG_BLK_DEV_INTEGRITY)
531 mempool_t *bio_integrity_pool;
532 mempool_t *bvec_integrity_pool;
533#endif
534
535 /*
536 * Deadlock avoidance for stacking block drivers: see comments in
537 * bio_alloc_bioset() for details
538 */
539 spinlock_t rescue_lock;
540 struct bio_list rescue_list;
541 struct work_struct rescue_work;
542 struct workqueue_struct *rescue_workqueue;
543};
544
545struct biovec_slab {
546 int nr_vecs;
547 char *name;
548 struct kmem_cache *slab;
549};
550
551/*
552 * a small number of entries is fine, not going to be performance critical.
553 * basically we just need to survive
554 */
555#define BIO_SPLIT_ENTRIES 2
556
530#if defined(CONFIG_BLK_DEV_INTEGRITY) 557#if defined(CONFIG_BLK_DEV_INTEGRITY)
531 558
532#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) 559#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)]))
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index fcc1ce28d5ca..e8de67053cd4 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -117,6 +117,7 @@ struct bio {
117 * BIO_POOL_IDX() 117 * BIO_POOL_IDX()
118 */ 118 */
119#define BIO_RESET_BITS 12 119#define BIO_RESET_BITS 12
120#define BIO_OWNS_VEC 12 /* bio_free() should free bvec */
120 121
121#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) 122#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
122 123
@@ -198,6 +199,8 @@ enum rq_flag_bits {
198 REQ_SECURE) 199 REQ_SECURE)
199#define REQ_CLONE_MASK REQ_COMMON_MASK 200#define REQ_CLONE_MASK REQ_COMMON_MASK
200 201
202#define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME)
203
201/* This mask is used for both bio and request merge checking */ 204/* This mask is used for both bio and request merge checking */
202#define REQ_NOMERGE_FLAGS \ 205#define REQ_NOMERGE_FLAGS \
203 (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) 206 (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 9961726523d0..5a28843725df 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -244,7 +244,7 @@ TRACE_EVENT(block_bio_bounce,
244 __entry->dev = bio->bi_bdev ? 244 __entry->dev = bio->bi_bdev ?
245 bio->bi_bdev->bd_dev : 0; 245 bio->bi_bdev->bd_dev : 0;
246 __entry->sector = bio->bi_sector; 246 __entry->sector = bio->bi_sector;
247 __entry->nr_sector = bio->bi_size >> 9; 247 __entry->nr_sector = bio_sectors(bio);
248 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 248 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
249 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 249 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
250 ), 250 ),
@@ -281,7 +281,7 @@ TRACE_EVENT(block_bio_complete,
281 __entry->dev = bio->bi_bdev ? 281 __entry->dev = bio->bi_bdev ?
282 bio->bi_bdev->bd_dev : 0; 282 bio->bi_bdev->bd_dev : 0;
283 __entry->sector = bio->bi_sector; 283 __entry->sector = bio->bi_sector;
284 __entry->nr_sector = bio->bi_size >> 9; 284 __entry->nr_sector = bio_sectors(bio);
285 __entry->error = error; 285 __entry->error = error;
286 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 286 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
287 ), 287 ),
@@ -309,7 +309,7 @@ DECLARE_EVENT_CLASS(block_bio_merge,
309 TP_fast_assign( 309 TP_fast_assign(
310 __entry->dev = bio->bi_bdev->bd_dev; 310 __entry->dev = bio->bi_bdev->bd_dev;
311 __entry->sector = bio->bi_sector; 311 __entry->sector = bio->bi_sector;
312 __entry->nr_sector = bio->bi_size >> 9; 312 __entry->nr_sector = bio_sectors(bio);
313 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 313 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
314 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 314 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
315 ), 315 ),
@@ -376,7 +376,7 @@ TRACE_EVENT(block_bio_queue,
376 TP_fast_assign( 376 TP_fast_assign(
377 __entry->dev = bio->bi_bdev->bd_dev; 377 __entry->dev = bio->bi_bdev->bd_dev;
378 __entry->sector = bio->bi_sector; 378 __entry->sector = bio->bi_sector;
379 __entry->nr_sector = bio->bi_size >> 9; 379 __entry->nr_sector = bio_sectors(bio);
380 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 380 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
381 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 381 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
382 ), 382 ),
@@ -404,7 +404,7 @@ DECLARE_EVENT_CLASS(block_get_rq,
404 TP_fast_assign( 404 TP_fast_assign(
405 __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; 405 __entry->dev = bio ? bio->bi_bdev->bd_dev : 0;
406 __entry->sector = bio ? bio->bi_sector : 0; 406 __entry->sector = bio ? bio->bi_sector : 0;
407 __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; 407 __entry->nr_sector = bio ? bio_sectors(bio) : 0;
408 blk_fill_rwbs(__entry->rwbs, 408 blk_fill_rwbs(__entry->rwbs,
409 bio ? bio->bi_rw : 0, __entry->nr_sector); 409 bio ? bio->bi_rw : 0, __entry->nr_sector);
410 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 410 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
@@ -580,7 +580,7 @@ TRACE_EVENT(block_bio_remap,
580 TP_fast_assign( 580 TP_fast_assign(
581 __entry->dev = bio->bi_bdev->bd_dev; 581 __entry->dev = bio->bi_bdev->bd_dev;
582 __entry->sector = bio->bi_sector; 582 __entry->sector = bio->bi_sector;
583 __entry->nr_sector = bio->bi_size >> 9; 583 __entry->nr_sector = bio_sectors(bio);
584 __entry->old_dev = dev; 584 __entry->old_dev = dev;
585 __entry->old_sector = from; 585 __entry->old_sector = from;
586 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); 586 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
diff --git a/mm/bounce.c b/mm/bounce.c
index 5f8901768602..f5326b24d65d 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -101,7 +101,7 @@ static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
101 struct bio_vec *tovec, *fromvec; 101 struct bio_vec *tovec, *fromvec;
102 int i; 102 int i;
103 103
104 __bio_for_each_segment(tovec, to, i, 0) { 104 bio_for_each_segment(tovec, to, i) {
105 fromvec = from->bi_io_vec + i; 105 fromvec = from->bi_io_vec + i;
106 106
107 /* 107 /*
@@ -134,7 +134,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
134 /* 134 /*
135 * free up bounce indirect pages used 135 * free up bounce indirect pages used
136 */ 136 */
137 __bio_for_each_segment(bvec, bio, i, 0) { 137 bio_for_each_segment_all(bvec, bio, i) {
138 org_vec = bio_orig->bi_io_vec + i; 138 org_vec = bio_orig->bi_io_vec + i;
139 if (bvec->bv_page == org_vec->bv_page) 139 if (bvec->bv_page == org_vec->bv_page)
140 continue; 140 continue;
@@ -218,78 +218,43 @@ static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio)
218static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, 218static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
219 mempool_t *pool, int force) 219 mempool_t *pool, int force)
220{ 220{
221 struct page *page; 221 struct bio *bio;
222 struct bio *bio = NULL; 222 int rw = bio_data_dir(*bio_orig);
223 int i, rw = bio_data_dir(*bio_orig);
224 struct bio_vec *to, *from; 223 struct bio_vec *to, *from;
224 unsigned i;
225 225
226 bio_for_each_segment(from, *bio_orig, i) { 226 bio_for_each_segment(from, *bio_orig, i)
227 page = from->bv_page; 227 if (page_to_pfn(from->bv_page) > queue_bounce_pfn(q))
228 goto bounce;
228 229
229 /* 230 return;
230 * is destination page below bounce pfn? 231bounce:
231 */ 232 bio = bio_clone_bioset(*bio_orig, GFP_NOIO, fs_bio_set);
232 if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force)
233 continue;
234 233
235 /* 234 bio_for_each_segment_all(to, bio, i) {
236 * irk, bounce it 235 struct page *page = to->bv_page;
237 */
238 if (!bio) {
239 unsigned int cnt = (*bio_orig)->bi_vcnt;
240
241 bio = bio_alloc(GFP_NOIO, cnt);
242 memset(bio->bi_io_vec, 0, cnt * sizeof(struct bio_vec));
243 }
244
245 236
246 to = bio->bi_io_vec + i; 237 if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force)
238 continue;
247 239
248 to->bv_page = mempool_alloc(pool, q->bounce_gfp);
249 to->bv_len = from->bv_len;
250 to->bv_offset = from->bv_offset;
251 inc_zone_page_state(to->bv_page, NR_BOUNCE); 240 inc_zone_page_state(to->bv_page, NR_BOUNCE);
241 to->bv_page = mempool_alloc(pool, q->bounce_gfp);
252 242
253 if (rw == WRITE) { 243 if (rw == WRITE) {
254 char *vto, *vfrom; 244 char *vto, *vfrom;
255 245
256 flush_dcache_page(from->bv_page); 246 flush_dcache_page(page);
247
257 vto = page_address(to->bv_page) + to->bv_offset; 248 vto = page_address(to->bv_page) + to->bv_offset;
258 vfrom = kmap(from->bv_page) + from->bv_offset; 249 vfrom = kmap_atomic(page) + to->bv_offset;
259 memcpy(vto, vfrom, to->bv_len); 250 memcpy(vto, vfrom, to->bv_len);
260 kunmap(from->bv_page); 251 kunmap_atomic(vfrom);
261 } 252 }
262 } 253 }
263 254
264 /*
265 * no pages bounced
266 */
267 if (!bio)
268 return;
269
270 trace_block_bio_bounce(q, *bio_orig); 255 trace_block_bio_bounce(q, *bio_orig);
271 256
272 /*
273 * at least one page was bounced, fill in possible non-highmem
274 * pages
275 */
276 __bio_for_each_segment(from, *bio_orig, i, 0) {
277 to = bio_iovec_idx(bio, i);
278 if (!to->bv_page) {
279 to->bv_page = from->bv_page;
280 to->bv_len = from->bv_len;
281 to->bv_offset = from->bv_offset;
282 }
283 }
284
285 bio->bi_bdev = (*bio_orig)->bi_bdev;
286 bio->bi_flags |= (1 << BIO_BOUNCED); 257 bio->bi_flags |= (1 << BIO_BOUNCED);
287 bio->bi_sector = (*bio_orig)->bi_sector;
288 bio->bi_rw = (*bio_orig)->bi_rw;
289
290 bio->bi_vcnt = (*bio_orig)->bi_vcnt;
291 bio->bi_idx = (*bio_orig)->bi_idx;
292 bio->bi_size = (*bio_orig)->bi_size;
293 258
294 if (pool == page_pool) { 259 if (pool == page_pool) {
295 bio->bi_end_io = bounce_end_io_write; 260 bio->bi_end_io = bounce_end_io_write;
diff --git a/mm/page_io.c b/mm/page_io.c
index 78eee32ee486..8d3c0c088105 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -35,7 +35,6 @@ static struct bio *get_swap_bio(gfp_t gfp_flags,
35 bio->bi_io_vec[0].bv_len = PAGE_SIZE; 35 bio->bi_io_vec[0].bv_len = PAGE_SIZE;
36 bio->bi_io_vec[0].bv_offset = 0; 36 bio->bi_io_vec[0].bv_offset = 0;
37 bio->bi_vcnt = 1; 37 bio->bi_vcnt = 1;
38 bio->bi_idx = 0;
39 bio->bi_size = PAGE_SIZE; 38 bio->bi_size = PAGE_SIZE;
40 bio->bi_end_io = end_io; 39 bio->bi_end_io = end_io;
41 } 40 }