diff options
Diffstat (limited to 'fs/bio.c')
| -rw-r--r-- | fs/bio.c | 270 |
1 files changed, 207 insertions, 63 deletions
| @@ -25,6 +25,7 @@ | |||
| 25 | #include <linux/module.h> | 25 | #include <linux/module.h> |
| 26 | #include <linux/mempool.h> | 26 | #include <linux/mempool.h> |
| 27 | #include <linux/workqueue.h> | 27 | #include <linux/workqueue.h> |
| 28 | #include <scsi/sg.h> /* for struct sg_iovec */ | ||
| 28 | 29 | ||
| 29 | #define BIO_POOL_SIZE 256 | 30 | #define BIO_POOL_SIZE 256 |
| 30 | 31 | ||
| @@ -52,7 +53,7 @@ struct biovec_slab { | |||
| 52 | */ | 53 | */ |
| 53 | 54 | ||
| 54 | #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } | 55 | #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } |
| 55 | static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] = { | 56 | static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { |
| 56 | BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), | 57 | BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), |
| 57 | }; | 58 | }; |
| 58 | #undef BV | 59 | #undef BV |
| @@ -104,18 +105,22 @@ static inline struct bio_vec *bvec_alloc_bs(unsigned int __nocast gfp_mask, int | |||
| 104 | return bvl; | 105 | return bvl; |
| 105 | } | 106 | } |
| 106 | 107 | ||
| 107 | /* | 108 | void bio_free(struct bio *bio, struct bio_set *bio_set) |
| 108 | * default destructor for a bio allocated with bio_alloc_bioset() | ||
| 109 | */ | ||
| 110 | static void bio_destructor(struct bio *bio) | ||
| 111 | { | 109 | { |
| 112 | const int pool_idx = BIO_POOL_IDX(bio); | 110 | const int pool_idx = BIO_POOL_IDX(bio); |
| 113 | struct bio_set *bs = bio->bi_set; | ||
| 114 | 111 | ||
| 115 | BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS); | 112 | BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS); |
| 116 | 113 | ||
| 117 | mempool_free(bio->bi_io_vec, bs->bvec_pools[pool_idx]); | 114 | mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]); |
| 118 | mempool_free(bio, bs->bio_pool); | 115 | mempool_free(bio, bio_set->bio_pool); |
| 116 | } | ||
| 117 | |||
| 118 | /* | ||
| 119 | * default destructor for a bio allocated with bio_alloc_bioset() | ||
| 120 | */ | ||
| 121 | static void bio_fs_destructor(struct bio *bio) | ||
| 122 | { | ||
| 123 | bio_free(bio, fs_bio_set); | ||
| 119 | } | 124 | } |
| 120 | 125 | ||
| 121 | inline void bio_init(struct bio *bio) | 126 | inline void bio_init(struct bio *bio) |
| @@ -171,8 +176,6 @@ struct bio *bio_alloc_bioset(unsigned int __nocast gfp_mask, int nr_iovecs, stru | |||
| 171 | bio->bi_max_vecs = bvec_slabs[idx].nr_vecs; | 176 | bio->bi_max_vecs = bvec_slabs[idx].nr_vecs; |
| 172 | } | 177 | } |
| 173 | bio->bi_io_vec = bvl; | 178 | bio->bi_io_vec = bvl; |
| 174 | bio->bi_destructor = bio_destructor; | ||
| 175 | bio->bi_set = bs; | ||
| 176 | } | 179 | } |
| 177 | out: | 180 | out: |
| 178 | return bio; | 181 | return bio; |
| @@ -180,7 +183,12 @@ out: | |||
| 180 | 183 | ||
| 181 | struct bio *bio_alloc(unsigned int __nocast gfp_mask, int nr_iovecs) | 184 | struct bio *bio_alloc(unsigned int __nocast gfp_mask, int nr_iovecs) |
| 182 | { | 185 | { |
| 183 | return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); | 186 | struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); |
| 187 | |||
| 188 | if (bio) | ||
| 189 | bio->bi_destructor = bio_fs_destructor; | ||
| 190 | |||
| 191 | return bio; | ||
| 184 | } | 192 | } |
| 185 | 193 | ||
| 186 | void zero_fill_bio(struct bio *bio) | 194 | void zero_fill_bio(struct bio *bio) |
| @@ -248,19 +256,16 @@ inline void __bio_clone(struct bio *bio, struct bio *bio_src) | |||
| 248 | { | 256 | { |
| 249 | request_queue_t *q = bdev_get_queue(bio_src->bi_bdev); | 257 | request_queue_t *q = bdev_get_queue(bio_src->bi_bdev); |
| 250 | 258 | ||
| 251 | memcpy(bio->bi_io_vec, bio_src->bi_io_vec, bio_src->bi_max_vecs * sizeof(struct bio_vec)); | 259 | memcpy(bio->bi_io_vec, bio_src->bi_io_vec, |
| 260 | bio_src->bi_max_vecs * sizeof(struct bio_vec)); | ||
| 252 | 261 | ||
| 253 | bio->bi_sector = bio_src->bi_sector; | 262 | bio->bi_sector = bio_src->bi_sector; |
| 254 | bio->bi_bdev = bio_src->bi_bdev; | 263 | bio->bi_bdev = bio_src->bi_bdev; |
| 255 | bio->bi_flags |= 1 << BIO_CLONED; | 264 | bio->bi_flags |= 1 << BIO_CLONED; |
| 256 | bio->bi_rw = bio_src->bi_rw; | 265 | bio->bi_rw = bio_src->bi_rw; |
| 257 | |||
| 258 | /* | ||
| 259 | * notes -- maybe just leave bi_idx alone. assume identical mapping | ||
| 260 | * for the clone | ||
| 261 | */ | ||
| 262 | bio->bi_vcnt = bio_src->bi_vcnt; | 266 | bio->bi_vcnt = bio_src->bi_vcnt; |
| 263 | bio->bi_size = bio_src->bi_size; | 267 | bio->bi_size = bio_src->bi_size; |
| 268 | bio->bi_idx = bio_src->bi_idx; | ||
| 264 | bio_phys_segments(q, bio); | 269 | bio_phys_segments(q, bio); |
| 265 | bio_hw_segments(q, bio); | 270 | bio_hw_segments(q, bio); |
| 266 | } | 271 | } |
| @@ -276,8 +281,10 @@ struct bio *bio_clone(struct bio *bio, unsigned int __nocast gfp_mask) | |||
| 276 | { | 281 | { |
| 277 | struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); | 282 | struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); |
| 278 | 283 | ||
| 279 | if (b) | 284 | if (b) { |
| 285 | b->bi_destructor = bio_fs_destructor; | ||
| 280 | __bio_clone(b, bio); | 286 | __bio_clone(b, bio); |
| 287 | } | ||
| 281 | 288 | ||
| 282 | return b; | 289 | return b; |
| 283 | } | 290 | } |
| @@ -549,22 +556,34 @@ out_bmd: | |||
| 549 | return ERR_PTR(ret); | 556 | return ERR_PTR(ret); |
| 550 | } | 557 | } |
| 551 | 558 | ||
| 552 | static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, | 559 | static struct bio *__bio_map_user_iov(request_queue_t *q, |
| 553 | unsigned long uaddr, unsigned int len, | 560 | struct block_device *bdev, |
| 554 | int write_to_vm) | 561 | struct sg_iovec *iov, int iov_count, |
| 562 | int write_to_vm) | ||
| 555 | { | 563 | { |
| 556 | unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 564 | int i, j; |
| 557 | unsigned long start = uaddr >> PAGE_SHIFT; | 565 | int nr_pages = 0; |
| 558 | const int nr_pages = end - start; | ||
| 559 | int ret, offset, i; | ||
| 560 | struct page **pages; | 566 | struct page **pages; |
| 561 | struct bio *bio; | 567 | struct bio *bio; |
| 568 | int cur_page = 0; | ||
| 569 | int ret, offset; | ||
| 562 | 570 | ||
| 563 | /* | 571 | for (i = 0; i < iov_count; i++) { |
| 564 | * transfer and buffer must be aligned to at least hardsector | 572 | unsigned long uaddr = (unsigned long)iov[i].iov_base; |
| 565 | * size for now, in the future we can relax this restriction | 573 | unsigned long len = iov[i].iov_len; |
| 566 | */ | 574 | unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
| 567 | if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q))) | 575 | unsigned long start = uaddr >> PAGE_SHIFT; |
| 576 | |||
| 577 | nr_pages += end - start; | ||
| 578 | /* | ||
| 579 | * transfer and buffer must be aligned to at least hardsector | ||
| 580 | * size for now, in the future we can relax this restriction | ||
| 581 | */ | ||
| 582 | if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q))) | ||
| 583 | return ERR_PTR(-EINVAL); | ||
| 584 | } | ||
| 585 | |||
| 586 | if (!nr_pages) | ||
| 568 | return ERR_PTR(-EINVAL); | 587 | return ERR_PTR(-EINVAL); |
| 569 | 588 | ||
| 570 | bio = bio_alloc(GFP_KERNEL, nr_pages); | 589 | bio = bio_alloc(GFP_KERNEL, nr_pages); |
| @@ -576,42 +595,54 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, | |||
| 576 | if (!pages) | 595 | if (!pages) |
| 577 | goto out; | 596 | goto out; |
| 578 | 597 | ||
| 579 | down_read(¤t->mm->mmap_sem); | 598 | memset(pages, 0, nr_pages * sizeof(struct page *)); |
| 580 | ret = get_user_pages(current, current->mm, uaddr, nr_pages, | 599 | |
| 581 | write_to_vm, 0, pages, NULL); | 600 | for (i = 0; i < iov_count; i++) { |
| 582 | up_read(¤t->mm->mmap_sem); | 601 | unsigned long uaddr = (unsigned long)iov[i].iov_base; |
| 583 | 602 | unsigned long len = iov[i].iov_len; | |
| 584 | if (ret < nr_pages) | 603 | unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
| 585 | goto out; | 604 | unsigned long start = uaddr >> PAGE_SHIFT; |
| 586 | 605 | const int local_nr_pages = end - start; | |
| 587 | bio->bi_bdev = bdev; | 606 | const int page_limit = cur_page + local_nr_pages; |
| 588 | 607 | ||
| 589 | offset = uaddr & ~PAGE_MASK; | 608 | down_read(¤t->mm->mmap_sem); |
| 590 | for (i = 0; i < nr_pages; i++) { | 609 | ret = get_user_pages(current, current->mm, uaddr, |
| 591 | unsigned int bytes = PAGE_SIZE - offset; | 610 | local_nr_pages, |
| 592 | 611 | write_to_vm, 0, &pages[cur_page], NULL); | |
| 593 | if (len <= 0) | 612 | up_read(¤t->mm->mmap_sem); |
| 594 | break; | 613 | |
| 595 | 614 | if (ret < local_nr_pages) | |
| 596 | if (bytes > len) | 615 | goto out_unmap; |
| 597 | bytes = len; | 616 | |
| 617 | |||
| 618 | offset = uaddr & ~PAGE_MASK; | ||
| 619 | for (j = cur_page; j < page_limit; j++) { | ||
| 620 | unsigned int bytes = PAGE_SIZE - offset; | ||
| 621 | |||
| 622 | if (len <= 0) | ||
| 623 | break; | ||
| 624 | |||
| 625 | if (bytes > len) | ||
| 626 | bytes = len; | ||
| 627 | |||
| 628 | /* | ||
| 629 | * sorry... | ||
| 630 | */ | ||
| 631 | if (__bio_add_page(q, bio, pages[j], bytes, offset) < bytes) | ||
| 632 | break; | ||
| 633 | |||
| 634 | len -= bytes; | ||
| 635 | offset = 0; | ||
| 636 | } | ||
| 598 | 637 | ||
| 638 | cur_page = j; | ||
| 599 | /* | 639 | /* |
| 600 | * sorry... | 640 | * release the pages we didn't map into the bio, if any |
| 601 | */ | 641 | */ |
| 602 | if (__bio_add_page(q, bio, pages[i], bytes, offset) < bytes) | 642 | while (j < page_limit) |
| 603 | break; | 643 | page_cache_release(pages[j++]); |
| 604 | |||
| 605 | len -= bytes; | ||
| 606 | offset = 0; | ||
| 607 | } | 644 | } |
| 608 | 645 | ||
| 609 | /* | ||
| 610 | * release the pages we didn't map into the bio, if any | ||
| 611 | */ | ||
| 612 | while (i < nr_pages) | ||
| 613 | page_cache_release(pages[i++]); | ||
| 614 | |||
| 615 | kfree(pages); | 646 | kfree(pages); |
| 616 | 647 | ||
| 617 | /* | 648 | /* |
| @@ -620,9 +651,17 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, | |||
| 620 | if (!write_to_vm) | 651 | if (!write_to_vm) |
| 621 | bio->bi_rw |= (1 << BIO_RW); | 652 | bio->bi_rw |= (1 << BIO_RW); |
| 622 | 653 | ||
| 654 | bio->bi_bdev = bdev; | ||
| 623 | bio->bi_flags |= (1 << BIO_USER_MAPPED); | 655 | bio->bi_flags |= (1 << BIO_USER_MAPPED); |
| 624 | return bio; | 656 | return bio; |
| 625 | out: | 657 | |
| 658 | out_unmap: | ||
| 659 | for (i = 0; i < nr_pages; i++) { | ||
| 660 | if(!pages[i]) | ||
| 661 | break; | ||
| 662 | page_cache_release(pages[i]); | ||
| 663 | } | ||
| 664 | out: | ||
| 626 | kfree(pages); | 665 | kfree(pages); |
| 627 | bio_put(bio); | 666 | bio_put(bio); |
| 628 | return ERR_PTR(ret); | 667 | return ERR_PTR(ret); |
| @@ -642,9 +681,33 @@ out: | |||
| 642 | struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, | 681 | struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, |
| 643 | unsigned long uaddr, unsigned int len, int write_to_vm) | 682 | unsigned long uaddr, unsigned int len, int write_to_vm) |
| 644 | { | 683 | { |
| 684 | struct sg_iovec iov; | ||
| 685 | |||
| 686 | iov.iov_base = (void __user *)uaddr; | ||
| 687 | iov.iov_len = len; | ||
| 688 | |||
| 689 | return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); | ||
| 690 | } | ||
| 691 | |||
| 692 | /** | ||
| 693 | * bio_map_user_iov - map user sg_iovec table into bio | ||
| 694 | * @q: the request_queue_t for the bio | ||
| 695 | * @bdev: destination block device | ||
| 696 | * @iov: the iovec. | ||
| 697 | * @iov_count: number of elements in the iovec | ||
| 698 | * @write_to_vm: bool indicating writing to pages or not | ||
| 699 | * | ||
| 700 | * Map the user space address into a bio suitable for io to a block | ||
| 701 | * device. Returns an error pointer in case of error. | ||
| 702 | */ | ||
| 703 | struct bio *bio_map_user_iov(request_queue_t *q, struct block_device *bdev, | ||
| 704 | struct sg_iovec *iov, int iov_count, | ||
| 705 | int write_to_vm) | ||
| 706 | { | ||
| 645 | struct bio *bio; | 707 | struct bio *bio; |
| 708 | int len = 0, i; | ||
| 646 | 709 | ||
| 647 | bio = __bio_map_user(q, bdev, uaddr, len, write_to_vm); | 710 | bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm); |
| 648 | 711 | ||
| 649 | if (IS_ERR(bio)) | 712 | if (IS_ERR(bio)) |
| 650 | return bio; | 713 | return bio; |
| @@ -657,6 +720,9 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, | |||
| 657 | */ | 720 | */ |
| 658 | bio_get(bio); | 721 | bio_get(bio); |
| 659 | 722 | ||
| 723 | for (i = 0; i < iov_count; i++) | ||
| 724 | len += iov[i].iov_len; | ||
| 725 | |||
| 660 | if (bio->bi_size == len) | 726 | if (bio->bi_size == len) |
| 661 | return bio; | 727 | return bio; |
| 662 | 728 | ||
| @@ -701,6 +767,82 @@ void bio_unmap_user(struct bio *bio) | |||
| 701 | bio_put(bio); | 767 | bio_put(bio); |
| 702 | } | 768 | } |
| 703 | 769 | ||
| 770 | static int bio_map_kern_endio(struct bio *bio, unsigned int bytes_done, int err) | ||
| 771 | { | ||
| 772 | if (bio->bi_size) | ||
| 773 | return 1; | ||
| 774 | |||
| 775 | bio_put(bio); | ||
| 776 | return 0; | ||
| 777 | } | ||
| 778 | |||
| 779 | |||
| 780 | static struct bio *__bio_map_kern(request_queue_t *q, void *data, | ||
| 781 | unsigned int len, unsigned int gfp_mask) | ||
| 782 | { | ||
| 783 | unsigned long kaddr = (unsigned long)data; | ||
| 784 | unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
| 785 | unsigned long start = kaddr >> PAGE_SHIFT; | ||
| 786 | const int nr_pages = end - start; | ||
| 787 | int offset, i; | ||
| 788 | struct bio *bio; | ||
| 789 | |||
| 790 | bio = bio_alloc(gfp_mask, nr_pages); | ||
| 791 | if (!bio) | ||
| 792 | return ERR_PTR(-ENOMEM); | ||
| 793 | |||
| 794 | offset = offset_in_page(kaddr); | ||
| 795 | for (i = 0; i < nr_pages; i++) { | ||
| 796 | unsigned int bytes = PAGE_SIZE - offset; | ||
| 797 | |||
| 798 | if (len <= 0) | ||
| 799 | break; | ||
| 800 | |||
| 801 | if (bytes > len) | ||
| 802 | bytes = len; | ||
| 803 | |||
| 804 | if (__bio_add_page(q, bio, virt_to_page(data), bytes, | ||
| 805 | offset) < bytes) | ||
| 806 | break; | ||
| 807 | |||
| 808 | data += bytes; | ||
| 809 | len -= bytes; | ||
| 810 | offset = 0; | ||
| 811 | } | ||
| 812 | |||
| 813 | bio->bi_end_io = bio_map_kern_endio; | ||
| 814 | return bio; | ||
| 815 | } | ||
| 816 | |||
| 817 | /** | ||
| 818 | * bio_map_kern - map kernel address into bio | ||
| 819 | * @q: the request_queue_t for the bio | ||
| 820 | * @data: pointer to buffer to map | ||
| 821 | * @len: length in bytes | ||
| 822 | * @gfp_mask: allocation flags for bio allocation | ||
| 823 | * | ||
| 824 | * Map the kernel address into a bio suitable for io to a block | ||
| 825 | * device. Returns an error pointer in case of error. | ||
| 826 | */ | ||
| 827 | struct bio *bio_map_kern(request_queue_t *q, void *data, unsigned int len, | ||
| 828 | unsigned int gfp_mask) | ||
| 829 | { | ||
| 830 | struct bio *bio; | ||
| 831 | |||
| 832 | bio = __bio_map_kern(q, data, len, gfp_mask); | ||
| 833 | if (IS_ERR(bio)) | ||
| 834 | return bio; | ||
| 835 | |||
| 836 | if (bio->bi_size == len) | ||
| 837 | return bio; | ||
| 838 | |||
| 839 | /* | ||
| 840 | * Don't support partial mappings. | ||
| 841 | */ | ||
| 842 | bio_put(bio); | ||
| 843 | return ERR_PTR(-EINVAL); | ||
| 844 | } | ||
| 845 | |||
| 704 | /* | 846 | /* |
| 705 | * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions | 847 | * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions |
| 706 | * for performing direct-IO in BIOs. | 848 | * for performing direct-IO in BIOs. |
| @@ -1078,6 +1220,7 @@ subsys_initcall(init_bio); | |||
| 1078 | 1220 | ||
| 1079 | EXPORT_SYMBOL(bio_alloc); | 1221 | EXPORT_SYMBOL(bio_alloc); |
| 1080 | EXPORT_SYMBOL(bio_put); | 1222 | EXPORT_SYMBOL(bio_put); |
| 1223 | EXPORT_SYMBOL(bio_free); | ||
| 1081 | EXPORT_SYMBOL(bio_endio); | 1224 | EXPORT_SYMBOL(bio_endio); |
| 1082 | EXPORT_SYMBOL(bio_init); | 1225 | EXPORT_SYMBOL(bio_init); |
| 1083 | EXPORT_SYMBOL(__bio_clone); | 1226 | EXPORT_SYMBOL(__bio_clone); |
| @@ -1088,6 +1231,7 @@ EXPORT_SYMBOL(bio_add_page); | |||
| 1088 | EXPORT_SYMBOL(bio_get_nr_vecs); | 1231 | EXPORT_SYMBOL(bio_get_nr_vecs); |
| 1089 | EXPORT_SYMBOL(bio_map_user); | 1232 | EXPORT_SYMBOL(bio_map_user); |
| 1090 | EXPORT_SYMBOL(bio_unmap_user); | 1233 | EXPORT_SYMBOL(bio_unmap_user); |
| 1234 | EXPORT_SYMBOL(bio_map_kern); | ||
| 1091 | EXPORT_SYMBOL(bio_pair_release); | 1235 | EXPORT_SYMBOL(bio_pair_release); |
| 1092 | EXPORT_SYMBOL(bio_split); | 1236 | EXPORT_SYMBOL(bio_split); |
| 1093 | EXPORT_SYMBOL(bio_split_pool); | 1237 | EXPORT_SYMBOL(bio_split_pool); |
