aboutsummaryrefslogtreecommitdiffstats
path: root/fs/bio.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bio.c')
-rw-r--r--fs/bio.c408
1 files changed, 248 insertions, 160 deletions
diff --git a/fs/bio.c b/fs/bio.c
index 78562574cb52..77a55bcceedb 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -28,24 +28,9 @@
28#include <linux/blktrace_api.h> 28#include <linux/blktrace_api.h>
29#include <scsi/sg.h> /* for struct sg_iovec */ 29#include <scsi/sg.h> /* for struct sg_iovec */
30 30
31#define BIO_POOL_SIZE 2
32
33static struct kmem_cache *bio_slab __read_mostly; 31static struct kmem_cache *bio_slab __read_mostly;
34 32
35#define BIOVEC_NR_POOLS 6 33static mempool_t *bio_split_pool __read_mostly;
36
37/*
38 * a small number of entries is fine, not going to be performance critical.
39 * basically we just need to survive
40 */
41#define BIO_SPLIT_ENTRIES 2
42mempool_t *bio_split_pool __read_mostly;
43
44struct biovec_slab {
45 int nr_vecs;
46 char *name;
47 struct kmem_cache *slab;
48};
49 34
50/* 35/*
51 * if you change this list, also change bvec_alloc or things will 36 * if you change this list, also change bvec_alloc or things will
@@ -60,49 +45,61 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
60#undef BV 45#undef BV
61 46
62/* 47/*
63 * bio_set is used to allow other portions of the IO system to
64 * allocate their own private memory pools for bio and iovec structures.
65 * These memory pools in turn all allocate from the bio_slab
66 * and the bvec_slabs[].
67 */
68struct bio_set {
69 mempool_t *bio_pool;
70 mempool_t *bvec_pools[BIOVEC_NR_POOLS];
71};
72
73/*
74 * fs_bio_set is the bio_set containing bio and iovec memory pools used by 48 * fs_bio_set is the bio_set containing bio and iovec memory pools used by
75 * IO code that does not need private memory pools. 49 * IO code that does not need private memory pools.
76 */ 50 */
77static struct bio_set *fs_bio_set; 51struct bio_set *fs_bio_set;
52
53unsigned int bvec_nr_vecs(unsigned short idx)
54{
55 return bvec_slabs[idx].nr_vecs;
56}
78 57
79static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) 58struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs)
80{ 59{
81 struct bio_vec *bvl; 60 struct bio_vec *bvl;
82 61
83 /* 62 /*
84 * see comment near bvec_array define! 63 * If 'bs' is given, lookup the pool and do the mempool alloc.
64 * If not, this is a bio_kmalloc() allocation and just do a
65 * kzalloc() for the exact number of vecs right away.
85 */ 66 */
86 switch (nr) { 67 if (bs) {
87 case 1 : *idx = 0; break; 68 /*
88 case 2 ... 4: *idx = 1; break; 69 * see comment near bvec_array define!
89 case 5 ... 16: *idx = 2; break; 70 */
90 case 17 ... 64: *idx = 3; break; 71 switch (nr) {
91 case 65 ... 128: *idx = 4; break; 72 case 1:
92 case 129 ... BIO_MAX_PAGES: *idx = 5; break; 73 *idx = 0;
74 break;
75 case 2 ... 4:
76 *idx = 1;
77 break;
78 case 5 ... 16:
79 *idx = 2;
80 break;
81 case 17 ... 64:
82 *idx = 3;
83 break;
84 case 65 ... 128:
85 *idx = 4;
86 break;
87 case 129 ... BIO_MAX_PAGES:
88 *idx = 5;
89 break;
93 default: 90 default:
94 return NULL; 91 return NULL;
95 } 92 }
96 /*
97 * idx now points to the pool we want to allocate from
98 */
99
100 bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
101 if (bvl) {
102 struct biovec_slab *bp = bvec_slabs + *idx;
103 93
104 memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec)); 94 /*
105 } 95 * idx now points to the pool we want to allocate from
96 */
97 bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
98 if (bvl)
99 memset(bvl, 0,
100 bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
101 } else
102 bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask);
106 103
107 return bvl; 104 return bvl;
108} 105}
@@ -117,6 +114,9 @@ void bio_free(struct bio *bio, struct bio_set *bio_set)
117 mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]); 114 mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
118 } 115 }
119 116
117 if (bio_integrity(bio))
118 bio_integrity_free(bio, bio_set);
119
120 mempool_free(bio, bio_set->bio_pool); 120 mempool_free(bio, bio_set->bio_pool);
121} 121}
122 122
@@ -128,10 +128,17 @@ static void bio_fs_destructor(struct bio *bio)
128 bio_free(bio, fs_bio_set); 128 bio_free(bio, fs_bio_set);
129} 129}
130 130
131static void bio_kmalloc_destructor(struct bio *bio)
132{
133 kfree(bio->bi_io_vec);
134 kfree(bio);
135}
136
131void bio_init(struct bio *bio) 137void bio_init(struct bio *bio)
132{ 138{
133 memset(bio, 0, sizeof(*bio)); 139 memset(bio, 0, sizeof(*bio));
134 bio->bi_flags = 1 << BIO_UPTODATE; 140 bio->bi_flags = 1 << BIO_UPTODATE;
141 bio->bi_comp_cpu = -1;
135 atomic_set(&bio->bi_cnt, 1); 142 atomic_set(&bio->bi_cnt, 1);
136} 143}
137 144
@@ -139,19 +146,25 @@ void bio_init(struct bio *bio)
139 * bio_alloc_bioset - allocate a bio for I/O 146 * bio_alloc_bioset - allocate a bio for I/O
140 * @gfp_mask: the GFP_ mask given to the slab allocator 147 * @gfp_mask: the GFP_ mask given to the slab allocator
141 * @nr_iovecs: number of iovecs to pre-allocate 148 * @nr_iovecs: number of iovecs to pre-allocate
142 * @bs: the bio_set to allocate from 149 * @bs: the bio_set to allocate from. If %NULL, just use kmalloc
143 * 150 *
144 * Description: 151 * Description:
145 * bio_alloc_bioset will first try it's on mempool to satisfy the allocation. 152 * bio_alloc_bioset will first try its own mempool to satisfy the allocation.
146 * If %__GFP_WAIT is set then we will block on the internal pool waiting 153 * If %__GFP_WAIT is set then we will block on the internal pool waiting
147 * for a &struct bio to become free. 154 * for a &struct bio to become free. If a %NULL @bs is passed in, we will
155 * fall back to just using @kmalloc to allocate the required memory.
148 * 156 *
149 * allocate bio and iovecs from the memory pools specified by the 157 * allocate bio and iovecs from the memory pools specified by the
150 * bio_set structure. 158 * bio_set structure, or @kmalloc if none given.
151 **/ 159 **/
152struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) 160struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
153{ 161{
154 struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask); 162 struct bio *bio;
163
164 if (bs)
165 bio = mempool_alloc(bs->bio_pool, gfp_mask);
166 else
167 bio = kmalloc(sizeof(*bio), gfp_mask);
155 168
156 if (likely(bio)) { 169 if (likely(bio)) {
157 struct bio_vec *bvl = NULL; 170 struct bio_vec *bvl = NULL;
@@ -162,12 +175,15 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
162 175
163 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); 176 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
164 if (unlikely(!bvl)) { 177 if (unlikely(!bvl)) {
165 mempool_free(bio, bs->bio_pool); 178 if (bs)
179 mempool_free(bio, bs->bio_pool);
180 else
181 kfree(bio);
166 bio = NULL; 182 bio = NULL;
167 goto out; 183 goto out;
168 } 184 }
169 bio->bi_flags |= idx << BIO_POOL_OFFSET; 185 bio->bi_flags |= idx << BIO_POOL_OFFSET;
170 bio->bi_max_vecs = bvec_slabs[idx].nr_vecs; 186 bio->bi_max_vecs = bvec_nr_vecs(idx);
171 } 187 }
172 bio->bi_io_vec = bvl; 188 bio->bi_io_vec = bvl;
173 } 189 }
@@ -185,6 +201,23 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
185 return bio; 201 return bio;
186} 202}
187 203
204/*
205 * Like bio_alloc(), but doesn't use a mempool backing. This means that
206 * it CAN fail, but while bio_alloc() can only be used for allocations
207 * that have a short (finite) life span, bio_kmalloc() should be used
208 * for more permanent bio allocations (like allocating some bio's for
209 * initalization or setup purposes).
210 */
211struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
212{
213 struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
214
215 if (bio)
216 bio->bi_destructor = bio_kmalloc_destructor;
217
218 return bio;
219}
220
188void zero_fill_bio(struct bio *bio) 221void zero_fill_bio(struct bio *bio)
189{ 222{
190 unsigned long flags; 223 unsigned long flags;
@@ -229,14 +262,6 @@ inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
229 return bio->bi_phys_segments; 262 return bio->bi_phys_segments;
230} 263}
231 264
232inline int bio_hw_segments(struct request_queue *q, struct bio *bio)
233{
234 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
235 blk_recount_segments(q, bio);
236
237 return bio->bi_hw_segments;
238}
239
240/** 265/**
241 * __bio_clone - clone a bio 266 * __bio_clone - clone a bio
242 * @bio: destination bio 267 * @bio: destination bio
@@ -275,9 +300,19 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
275{ 300{
276 struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); 301 struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set);
277 302
278 if (b) { 303 if (!b)
279 b->bi_destructor = bio_fs_destructor; 304 return NULL;
280 __bio_clone(b, bio); 305
306 b->bi_destructor = bio_fs_destructor;
307 __bio_clone(b, bio);
308
309 if (bio_integrity(bio)) {
310 int ret;
311
312 ret = bio_integrity_clone(b, bio, fs_bio_set);
313
314 if (ret < 0)
315 return NULL;
281 } 316 }
282 317
283 return b; 318 return b;
@@ -333,10 +368,19 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
333 if (page == prev->bv_page && 368 if (page == prev->bv_page &&
334 offset == prev->bv_offset + prev->bv_len) { 369 offset == prev->bv_offset + prev->bv_len) {
335 prev->bv_len += len; 370 prev->bv_len += len;
336 if (q->merge_bvec_fn && 371
337 q->merge_bvec_fn(q, bio, prev) < len) { 372 if (q->merge_bvec_fn) {
338 prev->bv_len -= len; 373 struct bvec_merge_data bvm = {
339 return 0; 374 .bi_bdev = bio->bi_bdev,
375 .bi_sector = bio->bi_sector,
376 .bi_size = bio->bi_size,
377 .bi_rw = bio->bi_rw,
378 };
379
380 if (q->merge_bvec_fn(q, &bvm, prev) < len) {
381 prev->bv_len -= len;
382 return 0;
383 }
340 } 384 }
341 385
342 goto done; 386 goto done;
@@ -352,8 +396,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
352 */ 396 */
353 397
354 while (bio->bi_phys_segments >= q->max_phys_segments 398 while (bio->bi_phys_segments >= q->max_phys_segments
355 || bio->bi_hw_segments >= q->max_hw_segments 399 || bio->bi_phys_segments >= q->max_hw_segments) {
356 || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) {
357 400
358 if (retried_segments) 401 if (retried_segments)
359 return 0; 402 return 0;
@@ -377,11 +420,18 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
377 * queue to get further control 420 * queue to get further control
378 */ 421 */
379 if (q->merge_bvec_fn) { 422 if (q->merge_bvec_fn) {
423 struct bvec_merge_data bvm = {
424 .bi_bdev = bio->bi_bdev,
425 .bi_sector = bio->bi_sector,
426 .bi_size = bio->bi_size,
427 .bi_rw = bio->bi_rw,
428 };
429
380 /* 430 /*
381 * merge_bvec_fn() returns number of bytes it can accept 431 * merge_bvec_fn() returns number of bytes it can accept
382 * at this offset 432 * at this offset
383 */ 433 */
384 if (q->merge_bvec_fn(q, bio, bvec) < len) { 434 if (q->merge_bvec_fn(q, &bvm, bvec) < len) {
385 bvec->bv_page = NULL; 435 bvec->bv_page = NULL;
386 bvec->bv_len = 0; 436 bvec->bv_len = 0;
387 bvec->bv_offset = 0; 437 bvec->bv_offset = 0;
@@ -390,13 +440,11 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
390 } 440 }
391 441
392 /* If we may be able to merge these biovecs, force a recount */ 442 /* If we may be able to merge these biovecs, force a recount */
393 if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) || 443 if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
394 BIOVEC_VIRT_MERGEABLE(bvec-1, bvec)))
395 bio->bi_flags &= ~(1 << BIO_SEG_VALID); 444 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
396 445
397 bio->bi_vcnt++; 446 bio->bi_vcnt++;
398 bio->bi_phys_segments++; 447 bio->bi_phys_segments++;
399 bio->bi_hw_segments++;
400 done: 448 done:
401 bio->bi_size += len; 449 bio->bi_size += len;
402 return len; 450 return len;
@@ -444,16 +492,19 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
444 492
445struct bio_map_data { 493struct bio_map_data {
446 struct bio_vec *iovecs; 494 struct bio_vec *iovecs;
447 int nr_sgvecs;
448 struct sg_iovec *sgvecs; 495 struct sg_iovec *sgvecs;
496 int nr_sgvecs;
497 int is_our_pages;
449}; 498};
450 499
451static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, 500static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
452 struct sg_iovec *iov, int iov_count) 501 struct sg_iovec *iov, int iov_count,
502 int is_our_pages)
453{ 503{
454 memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); 504 memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
455 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); 505 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
456 bmd->nr_sgvecs = iov_count; 506 bmd->nr_sgvecs = iov_count;
507 bmd->is_our_pages = is_our_pages;
457 bio->bi_private = bmd; 508 bio->bi_private = bmd;
458} 509}
459 510
@@ -464,20 +515,21 @@ static void bio_free_map_data(struct bio_map_data *bmd)
464 kfree(bmd); 515 kfree(bmd);
465} 516}
466 517
467static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count) 518static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
519 gfp_t gfp_mask)
468{ 520{
469 struct bio_map_data *bmd = kmalloc(sizeof(*bmd), GFP_KERNEL); 521 struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask);
470 522
471 if (!bmd) 523 if (!bmd)
472 return NULL; 524 return NULL;
473 525
474 bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, GFP_KERNEL); 526 bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask);
475 if (!bmd->iovecs) { 527 if (!bmd->iovecs) {
476 kfree(bmd); 528 kfree(bmd);
477 return NULL; 529 return NULL;
478 } 530 }
479 531
480 bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, GFP_KERNEL); 532 bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask);
481 if (bmd->sgvecs) 533 if (bmd->sgvecs)
482 return bmd; 534 return bmd;
483 535
@@ -486,8 +538,9 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count)
486 return NULL; 538 return NULL;
487} 539}
488 540
489static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, 541static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
490 int uncopy) 542 struct sg_iovec *iov, int iov_count, int uncopy,
543 int do_free_page)
491{ 544{
492 int ret = 0, i; 545 int ret = 0, i;
493 struct bio_vec *bvec; 546 struct bio_vec *bvec;
@@ -497,7 +550,7 @@ static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
497 550
498 __bio_for_each_segment(bvec, bio, i, 0) { 551 __bio_for_each_segment(bvec, bio, i, 0) {
499 char *bv_addr = page_address(bvec->bv_page); 552 char *bv_addr = page_address(bvec->bv_page);
500 unsigned int bv_len = bvec->bv_len; 553 unsigned int bv_len = iovecs[i].bv_len;
501 554
502 while (bv_len && iov_idx < iov_count) { 555 while (bv_len && iov_idx < iov_count) {
503 unsigned int bytes; 556 unsigned int bytes;
@@ -530,7 +583,7 @@ static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
530 } 583 }
531 } 584 }
532 585
533 if (uncopy) 586 if (do_free_page)
534 __free_page(bvec->bv_page); 587 __free_page(bvec->bv_page);
535 } 588 }
536 589
@@ -547,10 +600,11 @@ static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
547int bio_uncopy_user(struct bio *bio) 600int bio_uncopy_user(struct bio *bio)
548{ 601{
549 struct bio_map_data *bmd = bio->bi_private; 602 struct bio_map_data *bmd = bio->bi_private;
550 int ret; 603 int ret = 0;
551
552 ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, 1);
553 604
605 if (!bio_flagged(bio, BIO_NULL_MAPPED))
606 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
607 bmd->nr_sgvecs, 1, bmd->is_our_pages);
554 bio_free_map_data(bmd); 608 bio_free_map_data(bmd);
555 bio_put(bio); 609 bio_put(bio);
556 return ret; 610 return ret;
@@ -559,16 +613,20 @@ int bio_uncopy_user(struct bio *bio)
559/** 613/**
560 * bio_copy_user_iov - copy user data to bio 614 * bio_copy_user_iov - copy user data to bio
561 * @q: destination block queue 615 * @q: destination block queue
616 * @map_data: pointer to the rq_map_data holding pages (if necessary)
562 * @iov: the iovec. 617 * @iov: the iovec.
563 * @iov_count: number of elements in the iovec 618 * @iov_count: number of elements in the iovec
564 * @write_to_vm: bool indicating writing to pages or not 619 * @write_to_vm: bool indicating writing to pages or not
620 * @gfp_mask: memory allocation flags
565 * 621 *
566 * Prepares and returns a bio for indirect user io, bouncing data 622 * Prepares and returns a bio for indirect user io, bouncing data
567 * to/from kernel pages as necessary. Must be paired with 623 * to/from kernel pages as necessary. Must be paired with
568 * call bio_uncopy_user() on io completion. 624 * call bio_uncopy_user() on io completion.
569 */ 625 */
570struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, 626struct bio *bio_copy_user_iov(struct request_queue *q,
571 int iov_count, int write_to_vm) 627 struct rq_map_data *map_data,
628 struct sg_iovec *iov, int iov_count,
629 int write_to_vm, gfp_t gfp_mask)
572{ 630{
573 struct bio_map_data *bmd; 631 struct bio_map_data *bmd;
574 struct bio_vec *bvec; 632 struct bio_vec *bvec;
@@ -591,25 +649,38 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
591 len += iov[i].iov_len; 649 len += iov[i].iov_len;
592 } 650 }
593 651
594 bmd = bio_alloc_map_data(nr_pages, iov_count); 652 bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
595 if (!bmd) 653 if (!bmd)
596 return ERR_PTR(-ENOMEM); 654 return ERR_PTR(-ENOMEM);
597 655
598 ret = -ENOMEM; 656 ret = -ENOMEM;
599 bio = bio_alloc(GFP_KERNEL, nr_pages); 657 bio = bio_alloc(gfp_mask, nr_pages);
600 if (!bio) 658 if (!bio)
601 goto out_bmd; 659 goto out_bmd;
602 660
603 bio->bi_rw |= (!write_to_vm << BIO_RW); 661 bio->bi_rw |= (!write_to_vm << BIO_RW);
604 662
605 ret = 0; 663 ret = 0;
664 i = 0;
606 while (len) { 665 while (len) {
607 unsigned int bytes = PAGE_SIZE; 666 unsigned int bytes;
667
668 if (map_data)
669 bytes = 1U << (PAGE_SHIFT + map_data->page_order);
670 else
671 bytes = PAGE_SIZE;
608 672
609 if (bytes > len) 673 if (bytes > len)
610 bytes = len; 674 bytes = len;
611 675
612 page = alloc_page(q->bounce_gfp | GFP_KERNEL); 676 if (map_data) {
677 if (i == map_data->nr_entries) {
678 ret = -ENOMEM;
679 break;
680 }
681 page = map_data->pages[i++];
682 } else
683 page = alloc_page(q->bounce_gfp | gfp_mask);
613 if (!page) { 684 if (!page) {
614 ret = -ENOMEM; 685 ret = -ENOMEM;
615 break; 686 break;
@@ -628,16 +699,17 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
628 * success 699 * success
629 */ 700 */
630 if (!write_to_vm) { 701 if (!write_to_vm) {
631 ret = __bio_copy_iov(bio, iov, iov_count, 0); 702 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0);
632 if (ret) 703 if (ret)
633 goto cleanup; 704 goto cleanup;
634 } 705 }
635 706
636 bio_set_map_data(bmd, bio, iov, iov_count); 707 bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
637 return bio; 708 return bio;
638cleanup: 709cleanup:
639 bio_for_each_segment(bvec, bio, i) 710 if (!map_data)
640 __free_page(bvec->bv_page); 711 bio_for_each_segment(bvec, bio, i)
712 __free_page(bvec->bv_page);
641 713
642 bio_put(bio); 714 bio_put(bio);
643out_bmd: 715out_bmd:
@@ -648,29 +720,32 @@ out_bmd:
648/** 720/**
649 * bio_copy_user - copy user data to bio 721 * bio_copy_user - copy user data to bio
650 * @q: destination block queue 722 * @q: destination block queue
723 * @map_data: pointer to the rq_map_data holding pages (if necessary)
651 * @uaddr: start of user address 724 * @uaddr: start of user address
652 * @len: length in bytes 725 * @len: length in bytes
653 * @write_to_vm: bool indicating writing to pages or not 726 * @write_to_vm: bool indicating writing to pages or not
727 * @gfp_mask: memory allocation flags
654 * 728 *
655 * Prepares and returns a bio for indirect user io, bouncing data 729 * Prepares and returns a bio for indirect user io, bouncing data
656 * to/from kernel pages as necessary. Must be paired with 730 * to/from kernel pages as necessary. Must be paired with
657 * call bio_uncopy_user() on io completion. 731 * call bio_uncopy_user() on io completion.
658 */ 732 */
659struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, 733struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data,
660 unsigned int len, int write_to_vm) 734 unsigned long uaddr, unsigned int len,
735 int write_to_vm, gfp_t gfp_mask)
661{ 736{
662 struct sg_iovec iov; 737 struct sg_iovec iov;
663 738
664 iov.iov_base = (void __user *)uaddr; 739 iov.iov_base = (void __user *)uaddr;
665 iov.iov_len = len; 740 iov.iov_len = len;
666 741
667 return bio_copy_user_iov(q, &iov, 1, write_to_vm); 742 return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
668} 743}
669 744
670static struct bio *__bio_map_user_iov(struct request_queue *q, 745static struct bio *__bio_map_user_iov(struct request_queue *q,
671 struct block_device *bdev, 746 struct block_device *bdev,
672 struct sg_iovec *iov, int iov_count, 747 struct sg_iovec *iov, int iov_count,
673 int write_to_vm) 748 int write_to_vm, gfp_t gfp_mask)
674{ 749{
675 int i, j; 750 int i, j;
676 int nr_pages = 0; 751 int nr_pages = 0;
@@ -696,12 +771,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
696 if (!nr_pages) 771 if (!nr_pages)
697 return ERR_PTR(-EINVAL); 772 return ERR_PTR(-EINVAL);
698 773
699 bio = bio_alloc(GFP_KERNEL, nr_pages); 774 bio = bio_alloc(gfp_mask, nr_pages);
700 if (!bio) 775 if (!bio)
701 return ERR_PTR(-ENOMEM); 776 return ERR_PTR(-ENOMEM);
702 777
703 ret = -ENOMEM; 778 ret = -ENOMEM;
704 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); 779 pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
705 if (!pages) 780 if (!pages)
706 goto out; 781 goto out;
707 782
@@ -713,12 +788,8 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
713 const int local_nr_pages = end - start; 788 const int local_nr_pages = end - start;
714 const int page_limit = cur_page + local_nr_pages; 789 const int page_limit = cur_page + local_nr_pages;
715 790
716 down_read(&current->mm->mmap_sem); 791 ret = get_user_pages_fast(uaddr, local_nr_pages,
717 ret = get_user_pages(current, current->mm, uaddr, 792 write_to_vm, &pages[cur_page]);
718 local_nr_pages,
719 write_to_vm, 0, &pages[cur_page], NULL);
720 up_read(&current->mm->mmap_sem);
721
722 if (ret < local_nr_pages) { 793 if (ret < local_nr_pages) {
723 ret = -EFAULT; 794 ret = -EFAULT;
724 goto out_unmap; 795 goto out_unmap;
@@ -784,19 +855,21 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
784 * @uaddr: start of user address 855 * @uaddr: start of user address
785 * @len: length in bytes 856 * @len: length in bytes
786 * @write_to_vm: bool indicating writing to pages or not 857 * @write_to_vm: bool indicating writing to pages or not
858 * @gfp_mask: memory allocation flags
787 * 859 *
788 * Map the user space address into a bio suitable for io to a block 860 * Map the user space address into a bio suitable for io to a block
789 * device. Returns an error pointer in case of error. 861 * device. Returns an error pointer in case of error.
790 */ 862 */
791struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, 863struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
792 unsigned long uaddr, unsigned int len, int write_to_vm) 864 unsigned long uaddr, unsigned int len, int write_to_vm,
865 gfp_t gfp_mask)
793{ 866{
794 struct sg_iovec iov; 867 struct sg_iovec iov;
795 868
796 iov.iov_base = (void __user *)uaddr; 869 iov.iov_base = (void __user *)uaddr;
797 iov.iov_len = len; 870 iov.iov_len = len;
798 871
799 return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); 872 return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
800} 873}
801 874
802/** 875/**
@@ -806,18 +879,19 @@ struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
806 * @iov: the iovec. 879 * @iov: the iovec.
807 * @iov_count: number of elements in the iovec 880 * @iov_count: number of elements in the iovec
808 * @write_to_vm: bool indicating writing to pages or not 881 * @write_to_vm: bool indicating writing to pages or not
882 * @gfp_mask: memory allocation flags
809 * 883 *
810 * Map the user space address into a bio suitable for io to a block 884 * Map the user space address into a bio suitable for io to a block
811 * device. Returns an error pointer in case of error. 885 * device. Returns an error pointer in case of error.
812 */ 886 */
813struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, 887struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
814 struct sg_iovec *iov, int iov_count, 888 struct sg_iovec *iov, int iov_count,
815 int write_to_vm) 889 int write_to_vm, gfp_t gfp_mask)
816{ 890{
817 struct bio *bio; 891 struct bio *bio;
818 892
819 bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm); 893 bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
820 894 gfp_mask);
821 if (IS_ERR(bio)) 895 if (IS_ERR(bio))
822 return bio; 896 return bio;
823 897
@@ -941,19 +1015,22 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
941{ 1015{
942 struct bio_vec *bvec; 1016 struct bio_vec *bvec;
943 const int read = bio_data_dir(bio) == READ; 1017 const int read = bio_data_dir(bio) == READ;
944 char *p = bio->bi_private; 1018 struct bio_map_data *bmd = bio->bi_private;
945 int i; 1019 int i;
1020 char *p = bmd->sgvecs[0].iov_base;
946 1021
947 __bio_for_each_segment(bvec, bio, i, 0) { 1022 __bio_for_each_segment(bvec, bio, i, 0) {
948 char *addr = page_address(bvec->bv_page); 1023 char *addr = page_address(bvec->bv_page);
1024 int len = bmd->iovecs[i].bv_len;
949 1025
950 if (read && !err) 1026 if (read && !err)
951 memcpy(p, addr, bvec->bv_len); 1027 memcpy(p, addr, len);
952 1028
953 __free_page(bvec->bv_page); 1029 __free_page(bvec->bv_page);
954 p += bvec->bv_len; 1030 p += len;
955 } 1031 }
956 1032
1033 bio_free_map_data(bmd);
957 bio_put(bio); 1034 bio_put(bio);
958} 1035}
959 1036
@@ -971,38 +1048,13 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
971struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, 1048struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
972 gfp_t gfp_mask, int reading) 1049 gfp_t gfp_mask, int reading)
973{ 1050{
974 unsigned long kaddr = (unsigned long)data;
975 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
976 unsigned long start = kaddr >> PAGE_SHIFT;
977 const int nr_pages = end - start;
978 struct bio *bio; 1051 struct bio *bio;
979 struct bio_vec *bvec; 1052 struct bio_vec *bvec;
980 int i, ret; 1053 int i;
981
982 bio = bio_alloc(gfp_mask, nr_pages);
983 if (!bio)
984 return ERR_PTR(-ENOMEM);
985
986 while (len) {
987 struct page *page;
988 unsigned int bytes = PAGE_SIZE;
989
990 if (bytes > len)
991 bytes = len;
992
993 page = alloc_page(q->bounce_gfp | gfp_mask);
994 if (!page) {
995 ret = -ENOMEM;
996 goto cleanup;
997 }
998
999 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) {
1000 ret = -EINVAL;
1001 goto cleanup;
1002 }
1003 1054
1004 len -= bytes; 1055 bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
1005 } 1056 if (IS_ERR(bio))
1057 return bio;
1006 1058
1007 if (!reading) { 1059 if (!reading) {
1008 void *p = data; 1060 void *p = data;
@@ -1015,16 +1067,9 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
1015 } 1067 }
1016 } 1068 }
1017 1069
1018 bio->bi_private = data;
1019 bio->bi_end_io = bio_copy_kern_endio; 1070 bio->bi_end_io = bio_copy_kern_endio;
1020 return bio;
1021cleanup:
1022 bio_for_each_segment(bvec, bio, i)
1023 __free_page(bvec->bv_page);
1024
1025 bio_put(bio);
1026 1071
1027 return ERR_PTR(ret); 1072 return bio;
1028} 1073}
1029 1074
1030/* 1075/*
@@ -1211,9 +1256,9 @@ static void bio_pair_end_2(struct bio *bi, int err)
1211 * split a bio - only worry about a bio with a single page 1256 * split a bio - only worry about a bio with a single page
1212 * in it's iovec 1257 * in it's iovec
1213 */ 1258 */
1214struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) 1259struct bio_pair *bio_split(struct bio *bi, int first_sectors)
1215{ 1260{
1216 struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO); 1261 struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
1217 1262
1218 if (!bp) 1263 if (!bp)
1219 return bp; 1264 return bp;
@@ -1247,11 +1292,50 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
1247 bp->bio2.bi_end_io = bio_pair_end_2; 1292 bp->bio2.bi_end_io = bio_pair_end_2;
1248 1293
1249 bp->bio1.bi_private = bi; 1294 bp->bio1.bi_private = bi;
1250 bp->bio2.bi_private = pool; 1295 bp->bio2.bi_private = bio_split_pool;
1296
1297 if (bio_integrity(bi))
1298 bio_integrity_split(bi, bp, first_sectors);
1251 1299
1252 return bp; 1300 return bp;
1253} 1301}
1254 1302
1303/**
1304 * bio_sector_offset - Find hardware sector offset in bio
1305 * @bio: bio to inspect
1306 * @index: bio_vec index
1307 * @offset: offset in bv_page
1308 *
1309 * Return the number of hardware sectors between beginning of bio
1310 * and an end point indicated by a bio_vec index and an offset
1311 * within that vector's page.
1312 */
1313sector_t bio_sector_offset(struct bio *bio, unsigned short index,
1314 unsigned int offset)
1315{
1316 unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue);
1317 struct bio_vec *bv;
1318 sector_t sectors;
1319 int i;
1320
1321 sectors = 0;
1322
1323 if (index >= bio->bi_idx)
1324 index = bio->bi_vcnt - 1;
1325
1326 __bio_for_each_segment(bv, bio, i, 0) {
1327 if (i == index) {
1328 if (offset > bv->bv_offset)
1329 sectors += (offset - bv->bv_offset) / sector_sz;
1330 break;
1331 }
1332
1333 sectors += bv->bv_len / sector_sz;
1334 }
1335
1336 return sectors;
1337}
1338EXPORT_SYMBOL(bio_sector_offset);
1255 1339
1256/* 1340/*
1257 * create memory pools for biovec's in a bio_set. 1341 * create memory pools for biovec's in a bio_set.
@@ -1290,6 +1374,7 @@ void bioset_free(struct bio_set *bs)
1290 if (bs->bio_pool) 1374 if (bs->bio_pool)
1291 mempool_destroy(bs->bio_pool); 1375 mempool_destroy(bs->bio_pool);
1292 1376
1377 bioset_integrity_free(bs);
1293 biovec_free_pools(bs); 1378 biovec_free_pools(bs);
1294 1379
1295 kfree(bs); 1380 kfree(bs);
@@ -1306,6 +1391,9 @@ struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size)
1306 if (!bs->bio_pool) 1391 if (!bs->bio_pool)
1307 goto bad; 1392 goto bad;
1308 1393
1394 if (bioset_integrity_create(bs, bio_pool_size))
1395 goto bad;
1396
1309 if (!biovec_create_pools(bs, bvec_pool_size)) 1397 if (!biovec_create_pools(bs, bvec_pool_size))
1310 return bs; 1398 return bs;
1311 1399
@@ -1332,6 +1420,7 @@ static int __init init_bio(void)
1332{ 1420{
1333 bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 1421 bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
1334 1422
1423 bio_integrity_init_slab();
1335 biovec_init_slabs(); 1424 biovec_init_slabs();
1336 1425
1337 fs_bio_set = bioset_create(BIO_POOL_SIZE, 2); 1426 fs_bio_set = bioset_create(BIO_POOL_SIZE, 2);
@@ -1349,6 +1438,7 @@ static int __init init_bio(void)
1349subsys_initcall(init_bio); 1438subsys_initcall(init_bio);
1350 1439
1351EXPORT_SYMBOL(bio_alloc); 1440EXPORT_SYMBOL(bio_alloc);
1441EXPORT_SYMBOL(bio_kmalloc);
1352EXPORT_SYMBOL(bio_put); 1442EXPORT_SYMBOL(bio_put);
1353EXPORT_SYMBOL(bio_free); 1443EXPORT_SYMBOL(bio_free);
1354EXPORT_SYMBOL(bio_endio); 1444EXPORT_SYMBOL(bio_endio);
@@ -1356,7 +1446,6 @@ EXPORT_SYMBOL(bio_init);
1356EXPORT_SYMBOL(__bio_clone); 1446EXPORT_SYMBOL(__bio_clone);
1357EXPORT_SYMBOL(bio_clone); 1447EXPORT_SYMBOL(bio_clone);
1358EXPORT_SYMBOL(bio_phys_segments); 1448EXPORT_SYMBOL(bio_phys_segments);
1359EXPORT_SYMBOL(bio_hw_segments);
1360EXPORT_SYMBOL(bio_add_page); 1449EXPORT_SYMBOL(bio_add_page);
1361EXPORT_SYMBOL(bio_add_pc_page); 1450EXPORT_SYMBOL(bio_add_pc_page);
1362EXPORT_SYMBOL(bio_get_nr_vecs); 1451EXPORT_SYMBOL(bio_get_nr_vecs);
@@ -1366,7 +1455,6 @@ EXPORT_SYMBOL(bio_map_kern);
1366EXPORT_SYMBOL(bio_copy_kern); 1455EXPORT_SYMBOL(bio_copy_kern);
1367EXPORT_SYMBOL(bio_pair_release); 1456EXPORT_SYMBOL(bio_pair_release);
1368EXPORT_SYMBOL(bio_split); 1457EXPORT_SYMBOL(bio_split);
1369EXPORT_SYMBOL(bio_split_pool);
1370EXPORT_SYMBOL(bio_copy_user); 1458EXPORT_SYMBOL(bio_copy_user);
1371EXPORT_SYMBOL(bio_uncopy_user); 1459EXPORT_SYMBOL(bio_uncopy_user);
1372EXPORT_SYMBOL(bioset_create); 1460EXPORT_SYMBOL(bioset_create);