diff options
| author | Jens Axboe <axboe@kernel.dk> | 2018-11-30 10:47:03 -0500 |
|---|---|---|
| committer | Jens Axboe <axboe@kernel.dk> | 2019-02-28 10:24:23 -0500 |
| commit | 6d0c48aede85e38316d0251564cab39cbc2422f6 (patch) | |
| tree | 4aa1374cc7c486a68d666e6dd120a2e66e48231d | |
| parent | 2579f913d41a086563bb81762c519f3d62ddee37 (diff) | |
block: implement bio helper to add iter bvec pages to bio
For an ITER_BVEC, we can just iterate the iov and add the pages
to the bio directly. For now, we grab a reference to those pages,
and release them normally on IO completion. This isn't really needed
for the normal case of O_DIRECT from/to a file, but some of the more
esoteric use cases (like splice(2)) will unconditionally put the
pipe buffer pages when the buffers are released. Until we can manage
that case properly, ITER_BVEC pages are treated like normal pages
in terms of reference counting.
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
| -rw-r--r-- | block/bio.c | 62 |
1 files changed, 54 insertions, 8 deletions
diff --git a/block/bio.c b/block/bio.c index 83a2dfa417ca..71a78d9fb8b7 100644 --- a/block/bio.c +++ b/block/bio.c | |||
| @@ -836,6 +836,40 @@ int bio_add_page(struct bio *bio, struct page *page, | |||
| 836 | } | 836 | } |
| 837 | EXPORT_SYMBOL(bio_add_page); | 837 | EXPORT_SYMBOL(bio_add_page); |
| 838 | 838 | ||
| 839 | static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter) | ||
| 840 | { | ||
| 841 | const struct bio_vec *bv = iter->bvec; | ||
| 842 | unsigned int len; | ||
| 843 | size_t size; | ||
| 844 | |||
| 845 | if (WARN_ON_ONCE(iter->iov_offset > bv->bv_len)) | ||
| 846 | return -EINVAL; | ||
| 847 | |||
| 848 | len = min_t(size_t, bv->bv_len - iter->iov_offset, iter->count); | ||
| 849 | size = bio_add_page(bio, bv->bv_page, len, | ||
| 850 | bv->bv_offset + iter->iov_offset); | ||
| 851 | if (size == len) { | ||
| 852 | struct page *page; | ||
| 853 | int i; | ||
| 854 | |||
| 855 | /* | ||
| 856 | * For the normal O_DIRECT case, we could skip grabbing this | ||
| 857 | * reference and then not have to put them again when IO | ||
| 858 | * completes. But this breaks some in-kernel users, like | ||
| 859 | * splicing to/from a loop device, where we release the pipe | ||
| 860 | * pages unconditionally. If we can fix that case, we can | ||
| 861 | * get rid of the get here and the need to call | ||
| 862 | * bio_release_pages() at IO completion time. | ||
| 863 | */ | ||
| 864 | mp_bvec_for_each_page(page, bv, i) | ||
| 865 | get_page(page); | ||
| 866 | iov_iter_advance(iter, size); | ||
| 867 | return 0; | ||
| 868 | } | ||
| 869 | |||
| 870 | return -EINVAL; | ||
| 871 | } | ||
| 872 | |||
| 839 | #define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *)) | 873 | #define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *)) |
| 840 | 874 | ||
| 841 | /** | 875 | /** |
| @@ -884,23 +918,35 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) | |||
| 884 | } | 918 | } |
| 885 | 919 | ||
| 886 | /** | 920 | /** |
| 887 | * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio | 921 | * bio_iov_iter_get_pages - add user or kernel pages to a bio |
| 888 | * @bio: bio to add pages to | 922 | * @bio: bio to add pages to |
| 889 | * @iter: iov iterator describing the region to be mapped | 923 | * @iter: iov iterator describing the region to be added |
| 924 | * | ||
| 925 | * This takes either an iterator pointing to user memory, or one pointing to | ||
| 926 | * kernel pages (BVEC iterator). If we're adding user pages, we pin them and | ||
| 927 | * map them into the kernel. On IO completion, the caller should put those | ||
| 928 | * pages. For now, when adding kernel pages, we still grab a reference to the | ||
| 929 | * page. This isn't strictly needed for the common case, but some call paths | ||
| 930 | * end up releasing pages from eg a pipe and we can't easily control these. | ||
| 931 | * See comment in __bio_iov_bvec_add_pages(). | ||
| 890 | * | 932 | * |
| 891 | * Pins pages from *iter and appends them to @bio's bvec array. The | ||
| 892 | * pages will have to be released using put_page() when done. | ||
| 893 | * The function tries, but does not guarantee, to pin as many pages as | 933 | * The function tries, but does not guarantee, to pin as many pages as |
| 894 | * fit into the bio, or are requested in *iter, whatever is smaller. | 934 | * fit into the bio, or are requested in *iter, whatever is smaller. If |
| 895 | * If MM encounters an error pinning the requested pages, it stops. | 935 | * MM encounters an error pinning the requested pages, it stops. Error |
| 896 | * Error is returned only if 0 pages could be pinned. | 936 | * is returned only if 0 pages could be pinned. |
| 897 | */ | 937 | */ |
| 898 | int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) | 938 | int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) |
| 899 | { | 939 | { |
| 940 | const bool is_bvec = iov_iter_is_bvec(iter); | ||
| 900 | unsigned short orig_vcnt = bio->bi_vcnt; | 941 | unsigned short orig_vcnt = bio->bi_vcnt; |
| 901 | 942 | ||
| 902 | do { | 943 | do { |
| 903 | int ret = __bio_iov_iter_get_pages(bio, iter); | 944 | int ret; |
| 945 | |||
| 946 | if (is_bvec) | ||
| 947 | ret = __bio_iov_bvec_add_pages(bio, iter); | ||
| 948 | else | ||
| 949 | ret = __bio_iov_iter_get_pages(bio, iter); | ||
| 904 | 950 | ||
| 905 | if (unlikely(ret)) | 951 | if (unlikely(ret)) |
| 906 | return bio->bi_vcnt > orig_vcnt ? 0 : ret; | 952 | return bio->bi_vcnt > orig_vcnt ? 0 : ret; |
