diff options
author | Jens Axboe <axboe@kernel.dk> | 2019-02-27 15:05:25 -0500 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2019-03-18 12:44:48 -0400 |
commit | 875f1d0769cdcfe1596ff0ca609b453359e42ec9 (patch) | |
tree | 7a62a8236e2412bc136376e0c22114b5e631f98a | |
parent | bf33a7699e992b12d4c7d39dc3f0b61f6b26c5c2 (diff) |
iov_iter: add ITER_BVEC_FLAG_NO_REF flag
For ITER_BVEC, if we're holding on to kernel pages, the caller
doesn't need to grab a reference to the bvec pages, and drop that
same reference on IO completion. This is essentially safe for any
ITER_BVEC, but some use cases end up reusing pages and uncondtionally
dropping a page reference on completion. And example of that is
sendfile(2), that ends up being a splice_in + splice_out on the
pipe pages.
Add a flag that tells us it's fine to not grab a page reference
to the bvec pages, since that caller knows not to drop a reference
when it's done with the pages.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | fs/io_uring.c | 3 | ||||
-rw-r--r-- | include/linux/uio.h | 24 |
2 files changed, 22 insertions, 5 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index 4c6a5e60ddbe..c592a0933b0d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c | |||
@@ -855,6 +855,9 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, | |||
855 | iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); | 855 | iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); |
856 | if (offset) | 856 | if (offset) |
857 | iov_iter_advance(iter, offset); | 857 | iov_iter_advance(iter, offset); |
858 | |||
859 | /* don't drop a reference to these pages */ | ||
860 | iter->type |= ITER_BVEC_FLAG_NO_REF; | ||
858 | return 0; | 861 | return 0; |
859 | } | 862 | } |
860 | 863 | ||
diff --git a/include/linux/uio.h b/include/linux/uio.h index ecf584f6b82d..4e926641fa80 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h | |||
@@ -23,14 +23,23 @@ struct kvec { | |||
23 | }; | 23 | }; |
24 | 24 | ||
25 | enum iter_type { | 25 | enum iter_type { |
26 | ITER_IOVEC = 0, | 26 | /* set if ITER_BVEC doesn't hold a bv_page ref */ |
27 | ITER_KVEC = 2, | 27 | ITER_BVEC_FLAG_NO_REF = 2, |
28 | ITER_BVEC = 4, | 28 | |
29 | ITER_PIPE = 8, | 29 | /* iter types */ |
30 | ITER_DISCARD = 16, | 30 | ITER_IOVEC = 4, |
31 | ITER_KVEC = 8, | ||
32 | ITER_BVEC = 16, | ||
33 | ITER_PIPE = 32, | ||
34 | ITER_DISCARD = 64, | ||
31 | }; | 35 | }; |
32 | 36 | ||
33 | struct iov_iter { | 37 | struct iov_iter { |
38 | /* | ||
39 | * Bit 0 is the read/write bit, set if we're writing. | ||
40 | * Bit 1 is the BVEC_FLAG_NO_REF bit, set if type is a bvec and | ||
41 | * the caller isn't expecting to drop a page reference when done. | ||
42 | */ | ||
34 | unsigned int type; | 43 | unsigned int type; |
35 | size_t iov_offset; | 44 | size_t iov_offset; |
36 | size_t count; | 45 | size_t count; |
@@ -84,6 +93,11 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i) | |||
84 | return i->type & (READ | WRITE); | 93 | return i->type & (READ | WRITE); |
85 | } | 94 | } |
86 | 95 | ||
96 | static inline bool iov_iter_bvec_no_ref(const struct iov_iter *i) | ||
97 | { | ||
98 | return (i->type & ITER_BVEC_FLAG_NO_REF) != 0; | ||
99 | } | ||
100 | |||
87 | /* | 101 | /* |
88 | * Total number of bytes covered by an iovec. | 102 | * Total number of bytes covered by an iovec. |
89 | * | 103 | * |