aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKent Overstreet <kmo@daterainc.com>2013-11-23 21:21:01 -0500
committerKent Overstreet <kmo@daterainc.com>2013-11-24 01:33:57 -0500
commit20d0189b1012a37d2533a87fb451f7852f2418d1 (patch)
tree5ceaa6cfc0e1f1cec423c6c9f5de72d49f2d63a1
parentee67891bf132612feb7b999ee1f3350b40867cb4 (diff)
block: Introduce new bio_split()
The new bio_split() can split arbitrary bios - it's not restricted to single page bios, like the old bio_split() (previously renamed to bio_pair_split()). It also has different semantics - it doesn't allocate a struct bio_pair, leaving it up to the caller to handle completions. Then convert the existing bio_pair_split() users to the new bio_split() - and also nvme, which was open coding bio splitting. (We have to take that BUG_ON() out of bio_integrity_trim() because this bio_split() needs to use it, and there's no reason it has to be used on bios marked as cloned; BIO_CLONED doesn't seem to have clearly documented semantics anyways.) Signed-off-by: Kent Overstreet <kmo@daterainc.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Martin K. Petersen <martin.petersen@oracle.com> Cc: Matthew Wilcox <matthew.r.wilcox@intel.com> Cc: Keith Busch <keith.busch@intel.com> Cc: Vishal Verma <vishal.l.verma@intel.com> Cc: Jiri Kosina <jkosina@suse.cz> Cc: Neil Brown <neilb@suse.de>
-rw-r--r--drivers/block/nvme-core.c106
-rw-r--r--drivers/block/pktcdvd.c136
-rw-r--r--drivers/md/bcache/bcache.h1
-rw-r--r--drivers/md/bcache/io.c82
-rw-r--r--drivers/md/bcache/request.c12
-rw-r--r--drivers/md/linear.c96
-rw-r--r--drivers/md/raid0.c77
-rw-r--r--drivers/md/raid10.c113
-rw-r--r--fs/bio.c36
-rw-r--r--include/linux/bio.h22
10 files changed, 272 insertions, 409 deletions
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 5539d2920872..1f14ac403945 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -441,104 +441,19 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd,
441 return total_len; 441 return total_len;
442} 442}
443 443
444struct nvme_bio_pair {
445 struct bio b1, b2, *parent;
446 struct bio_vec *bv1, *bv2;
447 int err;
448 atomic_t cnt;
449};
450
451static void nvme_bio_pair_endio(struct bio *bio, int err)
452{
453 struct nvme_bio_pair *bp = bio->bi_private;
454
455 if (err)
456 bp->err = err;
457
458 if (atomic_dec_and_test(&bp->cnt)) {
459 bio_endio(bp->parent, bp->err);
460 kfree(bp->bv1);
461 kfree(bp->bv2);
462 kfree(bp);
463 }
464}
465
466static struct nvme_bio_pair *nvme_bio_split(struct bio *bio, int idx,
467 int len, int offset)
468{
469 struct nvme_bio_pair *bp;
470
471 BUG_ON(len > bio->bi_iter.bi_size);
472 BUG_ON(idx > bio->bi_vcnt);
473
474 bp = kmalloc(sizeof(*bp), GFP_ATOMIC);
475 if (!bp)
476 return NULL;
477 bp->err = 0;
478
479 bp->b1 = *bio;
480 bp->b2 = *bio;
481
482 bp->b1.bi_iter.bi_size = len;
483 bp->b2.bi_iter.bi_size -= len;
484 bp->b1.bi_vcnt = idx;
485 bp->b2.bi_iter.bi_idx = idx;
486 bp->b2.bi_iter.bi_sector += len >> 9;
487
488 if (offset) {
489 bp->bv1 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
490 GFP_ATOMIC);
491 if (!bp->bv1)
492 goto split_fail_1;
493
494 bp->bv2 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
495 GFP_ATOMIC);
496 if (!bp->bv2)
497 goto split_fail_2;
498
499 memcpy(bp->bv1, bio->bi_io_vec,
500 bio->bi_max_vecs * sizeof(struct bio_vec));
501 memcpy(bp->bv2, bio->bi_io_vec,
502 bio->bi_max_vecs * sizeof(struct bio_vec));
503
504 bp->b1.bi_io_vec = bp->bv1;
505 bp->b2.bi_io_vec = bp->bv2;
506 bp->b2.bi_io_vec[idx].bv_offset += offset;
507 bp->b2.bi_io_vec[idx].bv_len -= offset;
508 bp->b1.bi_io_vec[idx].bv_len = offset;
509 bp->b1.bi_vcnt++;
510 } else
511 bp->bv1 = bp->bv2 = NULL;
512
513 bp->b1.bi_private = bp;
514 bp->b2.bi_private = bp;
515
516 bp->b1.bi_end_io = nvme_bio_pair_endio;
517 bp->b2.bi_end_io = nvme_bio_pair_endio;
518
519 bp->parent = bio;
520 atomic_set(&bp->cnt, 2);
521
522 return bp;
523
524 split_fail_2:
525 kfree(bp->bv1);
526 split_fail_1:
527 kfree(bp);
528 return NULL;
529}
530
531static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq, 444static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq,
532 int idx, int len, int offset) 445 int len)
533{ 446{
534 struct nvme_bio_pair *bp = nvme_bio_split(bio, idx, len, offset); 447 struct bio *split = bio_split(bio, len >> 9, GFP_ATOMIC, NULL);
535 if (!bp) 448 if (!split)
536 return -ENOMEM; 449 return -ENOMEM;
537 450
451 bio_chain(split, bio);
452
538 if (bio_list_empty(&nvmeq->sq_cong)) 453 if (bio_list_empty(&nvmeq->sq_cong))
539 add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); 454 add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
540 bio_list_add(&nvmeq->sq_cong, &bp->b1); 455 bio_list_add(&nvmeq->sq_cong, split);
541 bio_list_add(&nvmeq->sq_cong, &bp->b2); 456 bio_list_add(&nvmeq->sq_cong, bio);
542 457
543 return 0; 458 return 0;
544} 459}
@@ -568,8 +483,7 @@ static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod,
568 } else { 483 } else {
569 if (!first && BIOVEC_NOT_VIRT_MERGEABLE(&bvprv, &bvec)) 484 if (!first && BIOVEC_NOT_VIRT_MERGEABLE(&bvprv, &bvec))
570 return nvme_split_and_submit(bio, nvmeq, 485 return nvme_split_and_submit(bio, nvmeq,
571 iter.bi_idx, 486 length);
572 length, 0);
573 487
574 sg = sg ? sg + 1 : iod->sg; 488 sg = sg ? sg + 1 : iod->sg;
575 sg_set_page(sg, bvec.bv_page, 489 sg_set_page(sg, bvec.bv_page,
@@ -578,9 +492,7 @@ static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod,
578 } 492 }
579 493
580 if (split_len - length < bvec.bv_len) 494 if (split_len - length < bvec.bv_len)
581 return nvme_split_and_submit(bio, nvmeq, iter.bi_idx, 495 return nvme_split_and_submit(bio, nvmeq, split_len);
582 split_len,
583 split_len - length);
584 length += bvec.bv_len; 496 length += bvec.bv_len;
585 bvprv = bvec; 497 bvprv = bvec;
586 first = 0; 498 first = 0;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 28789b82ae7d..3dda09a5ec41 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2338,75 +2338,29 @@ static void pkt_end_io_read_cloned(struct bio *bio, int err)
2338 pkt_bio_finished(pd); 2338 pkt_bio_finished(pd);
2339} 2339}
2340 2340
2341static void pkt_make_request(struct request_queue *q, struct bio *bio) 2341static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
2342{ 2342{
2343 struct pktcdvd_device *pd; 2343 struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
2344 char b[BDEVNAME_SIZE]; 2344 struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
2345
2346 psd->pd = pd;
2347 psd->bio = bio;
2348 cloned_bio->bi_bdev = pd->bdev;
2349 cloned_bio->bi_private = psd;
2350 cloned_bio->bi_end_io = pkt_end_io_read_cloned;
2351 pd->stats.secs_r += bio_sectors(bio);
2352 pkt_queue_bio(pd, cloned_bio);
2353}
2354
2355static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
2356{
2357 struct pktcdvd_device *pd = q->queuedata;
2345 sector_t zone; 2358 sector_t zone;
2346 struct packet_data *pkt; 2359 struct packet_data *pkt;
2347 int was_empty, blocked_bio; 2360 int was_empty, blocked_bio;
2348 struct pkt_rb_node *node; 2361 struct pkt_rb_node *node;
2349 2362
2350 pd = q->queuedata;
2351 if (!pd) {
2352 pr_err("%s incorrect request queue\n",
2353 bdevname(bio->bi_bdev, b));
2354 goto end_io;
2355 }
2356
2357 /*
2358 * Clone READ bios so we can have our own bi_end_io callback.
2359 */
2360 if (bio_data_dir(bio) == READ) {
2361 struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
2362 struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
2363
2364 psd->pd = pd;
2365 psd->bio = bio;
2366 cloned_bio->bi_bdev = pd->bdev;
2367 cloned_bio->bi_private = psd;
2368 cloned_bio->bi_end_io = pkt_end_io_read_cloned;
2369 pd->stats.secs_r += bio_sectors(bio);
2370 pkt_queue_bio(pd, cloned_bio);
2371 return;
2372 }
2373
2374 if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
2375 pkt_notice(pd, "WRITE for ro device (%llu)\n",
2376 (unsigned long long)bio->bi_iter.bi_sector);
2377 goto end_io;
2378 }
2379
2380 if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) {
2381 pkt_err(pd, "wrong bio size\n");
2382 goto end_io;
2383 }
2384
2385 blk_queue_bounce(q, &bio);
2386
2387 zone = get_zone(bio->bi_iter.bi_sector, pd); 2363 zone = get_zone(bio->bi_iter.bi_sector, pd);
2388 pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
2389 (unsigned long long)bio->bi_iter.bi_sector,
2390 (unsigned long long)bio_end_sector(bio));
2391
2392 /* Check if we have to split the bio */
2393 {
2394 struct bio_pair *bp;
2395 sector_t last_zone;
2396 int first_sectors;
2397
2398 last_zone = get_zone(bio_end_sector(bio) - 1, pd);
2399 if (last_zone != zone) {
2400 BUG_ON(last_zone != zone + pd->settings.size);
2401 first_sectors = last_zone - bio->bi_iter.bi_sector;
2402 bp = bio_pair_split(bio, first_sectors);
2403 BUG_ON(!bp);
2404 pkt_make_request(q, &bp->bio1);
2405 pkt_make_request(q, &bp->bio2);
2406 bio_pair_release(bp);
2407 return;
2408 }
2409 }
2410 2364
2411 /* 2365 /*
2412 * If we find a matching packet in state WAITING or READ_WAIT, we can 2366 * If we find a matching packet in state WAITING or READ_WAIT, we can
@@ -2480,6 +2434,64 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
2480 */ 2434 */
2481 wake_up(&pd->wqueue); 2435 wake_up(&pd->wqueue);
2482 } 2436 }
2437}
2438
2439static void pkt_make_request(struct request_queue *q, struct bio *bio)
2440{
2441 struct pktcdvd_device *pd;
2442 char b[BDEVNAME_SIZE];
2443 struct bio *split;
2444
2445 pd = q->queuedata;
2446 if (!pd) {
2447 pr_err("%s incorrect request queue\n",
2448 bdevname(bio->bi_bdev, b));
2449 goto end_io;
2450 }
2451
2452 pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
2453 (unsigned long long)bio->bi_iter.bi_sector,
2454 (unsigned long long)bio_end_sector(bio));
2455
2456 /*
2457 * Clone READ bios so we can have our own bi_end_io callback.
2458 */
2459 if (bio_data_dir(bio) == READ) {
2460 pkt_make_request_read(pd, bio);
2461 return;
2462 }
2463
2464 if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
2465 pkt_notice(pd, "WRITE for ro device (%llu)\n",
2466 (unsigned long long)bio->bi_iter.bi_sector);
2467 goto end_io;
2468 }
2469
2470 if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) {
2471 pkt_err(pd, "wrong bio size\n");
2472 goto end_io;
2473 }
2474
2475 blk_queue_bounce(q, &bio);
2476
2477 do {
2478 sector_t zone = get_zone(bio->bi_iter.bi_sector, pd);
2479 sector_t last_zone = get_zone(bio_end_sector(bio) - 1, pd);
2480
2481 if (last_zone != zone) {
2482 BUG_ON(last_zone != zone + pd->settings.size);
2483
2484 split = bio_split(bio, last_zone -
2485 bio->bi_iter.bi_sector,
2486 GFP_NOIO, fs_bio_set);
2487 bio_chain(split, bio);
2488 } else {
2489 split = bio;
2490 }
2491
2492 pkt_make_request_write(q, split);
2493 } while (split != bio);
2494
2483 return; 2495 return;
2484end_io: 2496end_io:
2485 bio_io_error(bio); 2497 bio_io_error(bio);
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 6b6fe935be73..964353c5329d 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -901,7 +901,6 @@ void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *);
901void bch_bbio_free(struct bio *, struct cache_set *); 901void bch_bbio_free(struct bio *, struct cache_set *);
902struct bio *bch_bbio_alloc(struct cache_set *); 902struct bio *bch_bbio_alloc(struct cache_set *);
903 903
904struct bio *bch_bio_split(struct bio *, int, gfp_t, struct bio_set *);
905void bch_generic_make_request(struct bio *, struct bio_split_pool *); 904void bch_generic_make_request(struct bio *, struct bio_split_pool *);
906void __bch_submit_bbio(struct bio *, struct cache_set *); 905void __bch_submit_bbio(struct bio *, struct cache_set *);
907void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned); 906void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 522f95778443..fa028fa82df4 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -11,84 +11,6 @@
11 11
12#include <linux/blkdev.h> 12#include <linux/blkdev.h>
13 13
14/**
15 * bch_bio_split - split a bio
16 * @bio: bio to split
17 * @sectors: number of sectors to split from the front of @bio
18 * @gfp: gfp mask
19 * @bs: bio set to allocate from
20 *
21 * Allocates and returns a new bio which represents @sectors from the start of
22 * @bio, and updates @bio to represent the remaining sectors.
23 *
24 * If bio_sectors(@bio) was less than or equal to @sectors, returns @bio
25 * unchanged.
26 *
27 * The newly allocated bio will point to @bio's bi_io_vec, if the split was on a
28 * bvec boundry; it is the caller's responsibility to ensure that @bio is not
29 * freed before the split.
30 */
31struct bio *bch_bio_split(struct bio *bio, int sectors,
32 gfp_t gfp, struct bio_set *bs)
33{
34 unsigned vcnt = 0, nbytes = sectors << 9;
35 struct bio_vec bv;
36 struct bvec_iter iter;
37 struct bio *ret = NULL;
38
39 BUG_ON(sectors <= 0);
40
41 if (sectors >= bio_sectors(bio))
42 return bio;
43
44 if (bio->bi_rw & REQ_DISCARD) {
45 ret = bio_alloc_bioset(gfp, 1, bs);
46 if (!ret)
47 return NULL;
48 goto out;
49 }
50
51 bio_for_each_segment(bv, bio, iter) {
52 vcnt++;
53
54 if (nbytes <= bv.bv_len)
55 break;
56
57 nbytes -= bv.bv_len;
58 }
59
60 ret = bio_alloc_bioset(gfp, vcnt, bs);
61 if (!ret)
62 return NULL;
63
64 bio_for_each_segment(bv, bio, iter) {
65 ret->bi_io_vec[ret->bi_vcnt++] = bv;
66
67 if (ret->bi_vcnt == vcnt)
68 break;
69 }
70
71 ret->bi_io_vec[ret->bi_vcnt - 1].bv_len = nbytes;
72out:
73 ret->bi_bdev = bio->bi_bdev;
74 ret->bi_iter.bi_sector = bio->bi_iter.bi_sector;
75 ret->bi_iter.bi_size = sectors << 9;
76 ret->bi_rw = bio->bi_rw;
77
78 if (bio_integrity(bio)) {
79 if (bio_integrity_clone(ret, bio, gfp)) {
80 bio_put(ret);
81 return NULL;
82 }
83
84 bio_integrity_trim(ret, 0, bio_sectors(ret));
85 }
86
87 bio_advance(bio, ret->bi_iter.bi_size);
88
89 return ret;
90}
91
92static unsigned bch_bio_max_sectors(struct bio *bio) 14static unsigned bch_bio_max_sectors(struct bio *bio)
93{ 15{
94 struct request_queue *q = bdev_get_queue(bio->bi_bdev); 16 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
@@ -172,8 +94,8 @@ void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p)
172 bio_get(bio); 94 bio_get(bio);
173 95
174 do { 96 do {
175 n = bch_bio_split(bio, bch_bio_max_sectors(bio), 97 n = bio_next_split(bio, bch_bio_max_sectors(bio),
176 GFP_NOIO, s->p->bio_split); 98 GFP_NOIO, s->p->bio_split);
177 99
178 n->bi_end_io = bch_bio_submit_split_endio; 100 n->bi_end_io = bch_bio_submit_split_endio;
179 n->bi_private = &s->cl; 101 n->bi_private = &s->cl;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 63451c724781..5878cdb39529 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -371,7 +371,7 @@ static void bch_data_insert_start(struct closure *cl)
371 op->writeback)) 371 op->writeback))
372 goto err; 372 goto err;
373 373
374 n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split); 374 n = bio_next_split(bio, KEY_SIZE(k), GFP_NOIO, split);
375 375
376 n->bi_end_io = bch_data_insert_endio; 376 n->bi_end_io = bch_data_insert_endio;
377 n->bi_private = cl; 377 n->bi_private = cl;
@@ -679,9 +679,9 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
679 if (KEY_DIRTY(k)) 679 if (KEY_DIRTY(k))
680 s->read_dirty_data = true; 680 s->read_dirty_data = true;
681 681
682 n = bch_bio_split(bio, min_t(uint64_t, INT_MAX, 682 n = bio_next_split(bio, min_t(uint64_t, INT_MAX,
683 KEY_OFFSET(k) - bio->bi_iter.bi_sector), 683 KEY_OFFSET(k) - bio->bi_iter.bi_sector),
684 GFP_NOIO, s->d->bio_split); 684 GFP_NOIO, s->d->bio_split);
685 685
686 bio_key = &container_of(n, struct bbio, bio)->key; 686 bio_key = &container_of(n, struct bbio, bio)->key;
687 bch_bkey_copy_single_ptr(bio_key, k, ptr); 687 bch_bkey_copy_single_ptr(bio_key, k, ptr);
@@ -920,7 +920,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
920 struct bio *miss, *cache_bio; 920 struct bio *miss, *cache_bio;
921 921
922 if (s->cache_miss || s->iop.bypass) { 922 if (s->cache_miss || s->iop.bypass) {
923 miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); 923 miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
924 ret = miss == bio ? MAP_DONE : MAP_CONTINUE; 924 ret = miss == bio ? MAP_DONE : MAP_CONTINUE;
925 goto out_submit; 925 goto out_submit;
926 } 926 }
@@ -943,7 +943,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
943 943
944 s->iop.replace = true; 944 s->iop.replace = true;
945 945
946 miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); 946 miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
947 947
948 /* btree_search_recurse()'s btree iterator is no good anymore */ 948 /* btree_search_recurse()'s btree iterator is no good anymore */
949 ret = miss == bio ? MAP_DONE : -EINTR; 949 ret = miss == bio ? MAP_DONE : -EINTR;
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index e9b53e9793bf..56f534b4a2d2 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -288,65 +288,65 @@ static int linear_stop (struct mddev *mddev)
288 288
289static void linear_make_request(struct mddev *mddev, struct bio *bio) 289static void linear_make_request(struct mddev *mddev, struct bio *bio)
290{ 290{
291 char b[BDEVNAME_SIZE];
291 struct dev_info *tmp_dev; 292 struct dev_info *tmp_dev;
292 sector_t start_sector; 293 struct bio *split;
294 sector_t start_sector, end_sector, data_offset;
293 295
294 if (unlikely(bio->bi_rw & REQ_FLUSH)) { 296 if (unlikely(bio->bi_rw & REQ_FLUSH)) {
295 md_flush_request(mddev, bio); 297 md_flush_request(mddev, bio);
296 return; 298 return;
297 } 299 }
298 300
299 rcu_read_lock(); 301 do {
300 tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector); 302 rcu_read_lock();
301 start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
302
303
304 if (unlikely(bio->bi_iter.bi_sector >= (tmp_dev->end_sector)
305 || (bio->bi_iter.bi_sector < start_sector))) {
306 char b[BDEVNAME_SIZE];
307
308 printk(KERN_ERR
309 "md/linear:%s: make_request: Sector %llu out of bounds on "
310 "dev %s: %llu sectors, offset %llu\n",
311 mdname(mddev),
312 (unsigned long long)bio->bi_iter.bi_sector,
313 bdevname(tmp_dev->rdev->bdev, b),
314 (unsigned long long)tmp_dev->rdev->sectors,
315 (unsigned long long)start_sector);
316 rcu_read_unlock();
317 bio_io_error(bio);
318 return;
319 }
320 if (unlikely(bio_end_sector(bio) > tmp_dev->end_sector)) {
321 /* This bio crosses a device boundary, so we have to
322 * split it.
323 */
324 struct bio_pair *bp;
325 sector_t end_sector = tmp_dev->end_sector;
326 303
327 rcu_read_unlock(); 304 tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector);
328 305 start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
329 bp = bio_pair_split(bio, end_sector - bio->bi_iter.bi_sector); 306 end_sector = tmp_dev->end_sector;
307 data_offset = tmp_dev->rdev->data_offset;
308 bio->bi_bdev = tmp_dev->rdev->bdev;
330 309
331 linear_make_request(mddev, &bp->bio1); 310 rcu_read_unlock();
332 linear_make_request(mddev, &bp->bio2);
333 bio_pair_release(bp);
334 return;
335 }
336
337 bio->bi_bdev = tmp_dev->rdev->bdev;
338 bio->bi_iter.bi_sector = bio->bi_iter.bi_sector - start_sector
339 + tmp_dev->rdev->data_offset;
340 rcu_read_unlock();
341 311
342 if (unlikely((bio->bi_rw & REQ_DISCARD) && 312 if (unlikely(bio->bi_iter.bi_sector >= end_sector ||
343 !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) { 313 bio->bi_iter.bi_sector < start_sector))
344 /* Just ignore it */ 314 goto out_of_bounds;
345 bio_endio(bio, 0); 315
346 return; 316 if (unlikely(bio_end_sector(bio) > end_sector)) {
347 } 317 /* This bio crosses a device boundary, so we have to
318 * split it.
319 */
320 split = bio_split(bio, end_sector -
321 bio->bi_iter.bi_sector,
322 GFP_NOIO, fs_bio_set);
323 bio_chain(split, bio);
324 } else {
325 split = bio;
326 }
348 327
349 generic_make_request(bio); 328 split->bi_iter.bi_sector = split->bi_iter.bi_sector -
329 start_sector + data_offset;
330
331 if (unlikely((split->bi_rw & REQ_DISCARD) &&
332 !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
333 /* Just ignore it */
334 bio_endio(split, 0);
335 } else
336 generic_make_request(split);
337 } while (split != bio);
338 return;
339
340out_of_bounds:
341 printk(KERN_ERR
342 "md/linear:%s: make_request: Sector %llu out of bounds on "
343 "dev %s: %llu sectors, offset %llu\n",
344 mdname(mddev),
345 (unsigned long long)bio->bi_iter.bi_sector,
346 bdevname(tmp_dev->rdev->bdev, b),
347 (unsigned long long)tmp_dev->rdev->sectors,
348 (unsigned long long)start_sector);
349 bio_io_error(bio);
350} 350}
351 351
352static void linear_status (struct seq_file *seq, struct mddev *mddev) 352static void linear_status (struct seq_file *seq, struct mddev *mddev)
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index ea754dd1a5f5..407a99e46f69 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -513,65 +513,44 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev,
513 513
514static void raid0_make_request(struct mddev *mddev, struct bio *bio) 514static void raid0_make_request(struct mddev *mddev, struct bio *bio)
515{ 515{
516 unsigned int chunk_sects;
517 sector_t sector_offset;
518 struct strip_zone *zone; 516 struct strip_zone *zone;
519 struct md_rdev *tmp_dev; 517 struct md_rdev *tmp_dev;
518 struct bio *split;
520 519
521 if (unlikely(bio->bi_rw & REQ_FLUSH)) { 520 if (unlikely(bio->bi_rw & REQ_FLUSH)) {
522 md_flush_request(mddev, bio); 521 md_flush_request(mddev, bio);
523 return; 522 return;
524 } 523 }
525 524
526 chunk_sects = mddev->chunk_sectors; 525 do {
527 if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) {
528 sector_t sector = bio->bi_iter.bi_sector; 526 sector_t sector = bio->bi_iter.bi_sector;
529 struct bio_pair *bp; 527 unsigned chunk_sects = mddev->chunk_sectors;
530 /* Sanity check -- queue functions should prevent this happening */ 528
531 if (bio_multiple_segments(bio)) 529 unsigned sectors = chunk_sects -
532 goto bad_map; 530 (likely(is_power_of_2(chunk_sects))
533 /* This is a one page bio that upper layers 531 ? (sector & (chunk_sects-1))
534 * refuse to split for us, so we need to split it. 532 : sector_div(sector, chunk_sects));
535 */ 533
536 if (likely(is_power_of_2(chunk_sects))) 534 if (sectors < bio_sectors(bio)) {
537 bp = bio_pair_split(bio, chunk_sects - (sector & 535 split = bio_split(bio, sectors, GFP_NOIO, fs_bio_set);
538 (chunk_sects-1))); 536 bio_chain(split, bio);
539 else 537 } else {
540 bp = bio_pair_split(bio, chunk_sects - 538 split = bio;
541 sector_div(sector, chunk_sects)); 539 }
542 raid0_make_request(mddev, &bp->bio1);
543 raid0_make_request(mddev, &bp->bio2);
544 bio_pair_release(bp);
545 return;
546 }
547
548 sector_offset = bio->bi_iter.bi_sector;
549 zone = find_zone(mddev->private, &sector_offset);
550 tmp_dev = map_sector(mddev, zone, bio->bi_iter.bi_sector,
551 &sector_offset);
552 bio->bi_bdev = tmp_dev->bdev;
553 bio->bi_iter.bi_sector = sector_offset + zone->dev_start +
554 tmp_dev->data_offset;
555
556 if (unlikely((bio->bi_rw & REQ_DISCARD) &&
557 !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) {
558 /* Just ignore it */
559 bio_endio(bio, 0);
560 return;
561 }
562
563 generic_make_request(bio);
564 return;
565
566bad_map:
567 printk("md/raid0:%s: make_request bug: can't convert block across chunks"
568 " or bigger than %dk %llu %d\n",
569 mdname(mddev), chunk_sects / 2,
570 (unsigned long long)bio->bi_iter.bi_sector,
571 bio_sectors(bio) / 2);
572 540
573 bio_io_error(bio); 541 zone = find_zone(mddev->private, &sector);
574 return; 542 tmp_dev = map_sector(mddev, zone, sector, &sector);
543 split->bi_bdev = tmp_dev->bdev;
544 split->bi_iter.bi_sector = sector + zone->dev_start +
545 tmp_dev->data_offset;
546
547 if (unlikely((split->bi_rw & REQ_DISCARD) &&
548 !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
549 /* Just ignore it */
550 bio_endio(split, 0);
551 } else
552 generic_make_request(split);
553 } while (split != bio);
575} 554}
576 555
577static void raid0_status(struct seq_file *seq, struct mddev *mddev) 556static void raid0_status(struct seq_file *seq, struct mddev *mddev)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 69c1bc8da88f..6d43d88657aa 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1152,14 +1152,12 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
1152 kfree(plug); 1152 kfree(plug);
1153} 1153}
1154 1154
1155static void make_request(struct mddev *mddev, struct bio * bio) 1155static void __make_request(struct mddev *mddev, struct bio *bio)
1156{ 1156{
1157 struct r10conf *conf = mddev->private; 1157 struct r10conf *conf = mddev->private;
1158 struct r10bio *r10_bio; 1158 struct r10bio *r10_bio;
1159 struct bio *read_bio; 1159 struct bio *read_bio;
1160 int i; 1160 int i;
1161 sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
1162 int chunk_sects = chunk_mask + 1;
1163 const int rw = bio_data_dir(bio); 1161 const int rw = bio_data_dir(bio);
1164 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); 1162 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
1165 const unsigned long do_fua = (bio->bi_rw & REQ_FUA); 1163 const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
@@ -1174,69 +1172,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1174 int max_sectors; 1172 int max_sectors;
1175 int sectors; 1173 int sectors;
1176 1174
1177 if (unlikely(bio->bi_rw & REQ_FLUSH)) {
1178 md_flush_request(mddev, bio);
1179 return;
1180 }
1181
1182 /* If this request crosses a chunk boundary, we need to
1183 * split it. This will only happen for 1 PAGE (or less) requests.
1184 */
1185 if (unlikely((bio->bi_iter.bi_sector & chunk_mask) + bio_sectors(bio)
1186 > chunk_sects
1187 && (conf->geo.near_copies < conf->geo.raid_disks
1188 || conf->prev.near_copies < conf->prev.raid_disks))) {
1189 struct bio_pair *bp;
1190 /* Sanity check -- queue functions should prevent this happening */
1191 if (bio_multiple_segments(bio))
1192 goto bad_map;
1193 /* This is a one page bio that upper layers
1194 * refuse to split for us, so we need to split it.
1195 */
1196 bp = bio_pair_split(bio, chunk_sects -
1197 (bio->bi_iter.bi_sector & (chunk_sects - 1)));
1198
1199 /* Each of these 'make_request' calls will call 'wait_barrier'.
1200 * If the first succeeds but the second blocks due to the resync
1201 * thread raising the barrier, we will deadlock because the
1202 * IO to the underlying device will be queued in generic_make_request
1203 * and will never complete, so will never reduce nr_pending.
1204 * So increment nr_waiting here so no new raise_barriers will
1205 * succeed, and so the second wait_barrier cannot block.
1206 */
1207 spin_lock_irq(&conf->resync_lock);
1208 conf->nr_waiting++;
1209 spin_unlock_irq(&conf->resync_lock);
1210
1211 make_request(mddev, &bp->bio1);
1212 make_request(mddev, &bp->bio2);
1213
1214 spin_lock_irq(&conf->resync_lock);
1215 conf->nr_waiting--;
1216 wake_up(&conf->wait_barrier);
1217 spin_unlock_irq(&conf->resync_lock);
1218
1219 bio_pair_release(bp);
1220 return;
1221 bad_map:
1222 printk("md/raid10:%s: make_request bug: can't convert block across chunks"
1223 " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2,
1224 (unsigned long long)bio->bi_iter.bi_sector,
1225 bio_sectors(bio) / 2);
1226
1227 bio_io_error(bio);
1228 return;
1229 }
1230
1231 md_write_start(mddev, bio);
1232
1233 /*
1234 * Register the new request and wait if the reconstruction
1235 * thread has put up a bar for new requests.
1236 * Continue immediately if no resync is active currently.
1237 */
1238 wait_barrier(conf);
1239
1240 sectors = bio_sectors(bio); 1175 sectors = bio_sectors(bio);
1241 while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && 1176 while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
1242 bio->bi_iter.bi_sector < conf->reshape_progress && 1177 bio->bi_iter.bi_sector < conf->reshape_progress &&
@@ -1600,6 +1535,52 @@ retry_write:
1600 goto retry_write; 1535 goto retry_write;
1601 } 1536 }
1602 one_write_done(r10_bio); 1537 one_write_done(r10_bio);
1538}
1539
1540static void make_request(struct mddev *mddev, struct bio *bio)
1541{
1542 struct r10conf *conf = mddev->private;
1543 sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
1544 int chunk_sects = chunk_mask + 1;
1545
1546 struct bio *split;
1547
1548 if (unlikely(bio->bi_rw & REQ_FLUSH)) {
1549 md_flush_request(mddev, bio);
1550 return;
1551 }
1552
1553 md_write_start(mddev, bio);
1554
1555 /*
1556 * Register the new request and wait if the reconstruction
1557 * thread has put up a bar for new requests.
1558 * Continue immediately if no resync is active currently.
1559 */
1560 wait_barrier(conf);
1561
1562 do {
1563
1564 /*
1565 * If this request crosses a chunk boundary, we need to split
1566 * it.
1567 */
1568 if (unlikely((bio->bi_iter.bi_sector & chunk_mask) +
1569 bio_sectors(bio) > chunk_sects
1570 && (conf->geo.near_copies < conf->geo.raid_disks
1571 || conf->prev.near_copies <
1572 conf->prev.raid_disks))) {
1573 split = bio_split(bio, chunk_sects -
1574 (bio->bi_iter.bi_sector &
1575 (chunk_sects - 1)),
1576 GFP_NOIO, fs_bio_set);
1577 bio_chain(split, bio);
1578 } else {
1579 split = bio;
1580 }
1581
1582 __make_request(mddev, split);
1583 } while (split != bio);
1603 1584
1604 /* In case raid10d snuck in to freeze_array */ 1585 /* In case raid10d snuck in to freeze_array */
1605 wake_up(&conf->wait_barrier); 1586 wake_up(&conf->wait_barrier);
diff --git a/fs/bio.c b/fs/bio.c
index a3e753f4d5a6..7b062befac82 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1793,6 +1793,42 @@ void bio_endio_nodec(struct bio *bio, int error)
1793} 1793}
1794EXPORT_SYMBOL(bio_endio_nodec); 1794EXPORT_SYMBOL(bio_endio_nodec);
1795 1795
1796/**
1797 * bio_split - split a bio
1798 * @bio: bio to split
1799 * @sectors: number of sectors to split from the front of @bio
1800 * @gfp: gfp mask
1801 * @bs: bio set to allocate from
1802 *
1803 * Allocates and returns a new bio which represents @sectors from the start of
1804 * @bio, and updates @bio to represent the remaining sectors.
1805 *
1806 * The newly allocated bio will point to @bio's bi_io_vec; it is the caller's
1807 * responsibility to ensure that @bio is not freed before the split.
1808 */
1809struct bio *bio_split(struct bio *bio, int sectors,
1810 gfp_t gfp, struct bio_set *bs)
1811{
1812 struct bio *split = NULL;
1813
1814 BUG_ON(sectors <= 0);
1815 BUG_ON(sectors >= bio_sectors(bio));
1816
1817 split = bio_clone_fast(bio, gfp, bs);
1818 if (!split)
1819 return NULL;
1820
1821 split->bi_iter.bi_size = sectors << 9;
1822
1823 if (bio_integrity(split))
1824 bio_integrity_trim(split, 0, sectors);
1825
1826 bio_advance(bio, split->bi_iter.bi_size);
1827
1828 return split;
1829}
1830EXPORT_SYMBOL(bio_split);
1831
1796void bio_pair_release(struct bio_pair *bp) 1832void bio_pair_release(struct bio_pair *bp)
1797{ 1833{
1798 if (atomic_dec_and_test(&bp->cnt)) { 1834 if (atomic_dec_and_test(&bp->cnt)) {
diff --git a/include/linux/bio.h b/include/linux/bio.h
index aa67af0b31ac..19e31b2f5b2c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -321,6 +321,28 @@ extern struct bio_pair *bio_pair_split(struct bio *bi, int first_sectors);
321extern void bio_pair_release(struct bio_pair *dbio); 321extern void bio_pair_release(struct bio_pair *dbio);
322extern void bio_trim(struct bio *bio, int offset, int size); 322extern void bio_trim(struct bio *bio, int offset, int size);
323 323
324extern struct bio *bio_split(struct bio *bio, int sectors,
325 gfp_t gfp, struct bio_set *bs);
326
327/**
328 * bio_next_split - get next @sectors from a bio, splitting if necessary
329 * @bio: bio to split
330 * @sectors: number of sectors to split from the front of @bio
331 * @gfp: gfp mask
332 * @bs: bio set to allocate from
333 *
334 * Returns a bio representing the next @sectors of @bio - if the bio is smaller
335 * than @sectors, returns the original bio unchanged.
336 */
337static inline struct bio *bio_next_split(struct bio *bio, int sectors,
338 gfp_t gfp, struct bio_set *bs)
339{
340 if (sectors >= bio_sectors(bio))
341 return bio;
342
343 return bio_split(bio, sectors, gfp, bs);
344}
345
324extern struct bio_set *bioset_create(unsigned int, unsigned int); 346extern struct bio_set *bioset_create(unsigned int, unsigned int);
325extern void bioset_free(struct bio_set *); 347extern void bioset_free(struct bio_set *);
326extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries); 348extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries);