summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2019-02-27 15:05:25 -0500
committerJens Axboe <axboe@kernel.dk>2019-03-18 12:44:48 -0400
commit875f1d0769cdcfe1596ff0ca609b453359e42ec9 (patch)
tree7a62a8236e2412bc136376e0c22114b5e631f98a
parentbf33a7699e992b12d4c7d39dc3f0b61f6b26c5c2 (diff)
iov_iter: add ITER_BVEC_FLAG_NO_REF flag
For ITER_BVEC, if we're holding on to kernel pages, the caller doesn't need to grab a reference to the bvec pages, and drop that same reference on IO completion. This is essentially safe for any ITER_BVEC, but some use cases end up reusing pages and uncondtionally dropping a page reference on completion. And example of that is sendfile(2), that ends up being a splice_in + splice_out on the pipe pages. Add a flag that tells us it's fine to not grab a page reference to the bvec pages, since that caller knows not to drop a reference when it's done with the pages. Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--fs/io_uring.c3
-rw-r--r--include/linux/uio.h24
2 files changed, 22 insertions, 5 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4c6a5e60ddbe..c592a0933b0d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -855,6 +855,9 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
855 iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); 855 iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
856 if (offset) 856 if (offset)
857 iov_iter_advance(iter, offset); 857 iov_iter_advance(iter, offset);
858
859 /* don't drop a reference to these pages */
860 iter->type |= ITER_BVEC_FLAG_NO_REF;
858 return 0; 861 return 0;
859} 862}
860 863
diff --git a/include/linux/uio.h b/include/linux/uio.h
index ecf584f6b82d..4e926641fa80 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -23,14 +23,23 @@ struct kvec {
23}; 23};
24 24
25enum iter_type { 25enum iter_type {
26 ITER_IOVEC = 0, 26 /* set if ITER_BVEC doesn't hold a bv_page ref */
27 ITER_KVEC = 2, 27 ITER_BVEC_FLAG_NO_REF = 2,
28 ITER_BVEC = 4, 28
29 ITER_PIPE = 8, 29 /* iter types */
30 ITER_DISCARD = 16, 30 ITER_IOVEC = 4,
31 ITER_KVEC = 8,
32 ITER_BVEC = 16,
33 ITER_PIPE = 32,
34 ITER_DISCARD = 64,
31}; 35};
32 36
33struct iov_iter { 37struct iov_iter {
38 /*
39 * Bit 0 is the read/write bit, set if we're writing.
40 * Bit 1 is the BVEC_FLAG_NO_REF bit, set if type is a bvec and
41 * the caller isn't expecting to drop a page reference when done.
42 */
34 unsigned int type; 43 unsigned int type;
35 size_t iov_offset; 44 size_t iov_offset;
36 size_t count; 45 size_t count;
@@ -84,6 +93,11 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i)
84 return i->type & (READ | WRITE); 93 return i->type & (READ | WRITE);
85} 94}
86 95
96static inline bool iov_iter_bvec_no_ref(const struct iov_iter *i)
97{
98 return (i->type & ITER_BVEC_FLAG_NO_REF) != 0;
99}
100
87/* 101/*
88 * Total number of bytes covered by an iovec. 102 * Total number of bytes covered by an iovec.
89 * 103 *