aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-10-07 18:36:58 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-10-07 18:36:58 -0400
commitd1f5323370fceaed43a7ee38f4c7bfc7e70f28d0 (patch)
treecadb1dc22207a4e1838b7af31ac3fc15363e809b
parent2eee010d092903ee95716b6c2fbd9d3289839aa4 (diff)
parenta949e63992469fed87aef197347960ced31701b8 (diff)
Merge branch 'work.splice_read' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull VFS splice updates from Al Viro: "There's a bunch of branches this cycle, both mine and from other folks and I'd rather send pull requests separately. This one is the conversion of ->splice_read() to ITER_PIPE iov_iter (and introduction of such). Gets rid of a lot of code in fs/splice.c and elsewhere; there will be followups, but these are for the next cycle... Some pipe/splice-related cleanups from Miklos in the same branch as well" * 'work.splice_read' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: pipe: fix comment in pipe_buf_operations pipe: add pipe_buf_steal() helper pipe: add pipe_buf_confirm() helper pipe: add pipe_buf_release() helper pipe: add pipe_buf_get() helper relay: simplify relay_file_read() switch default_file_splice_read() to use of pipe-backed iov_iter switch generic_file_splice_read() to use of ->read_iter() new iov_iter flavour: pipe-backed fuse_dev_splice_read(): switch to add_to_pipe() skb_splice_bits(): get rid of callback new helper: add_to_pipe() splice: lift pipe_lock out of splice_to_pipe() splice: switch get_iovec_page_array() to iov_iter splice_to_pipe(): don't open-code wakeup_pipe_readers() consistent treatment of EFAULT on O_DIRECT read/write
-rw-r--r--drivers/char/virtio_console.c2
-rw-r--r--drivers/staging/lustre/lustre/llite/file.c89
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_internal.h15
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_internal.h14
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_io.c45
-rw-r--r--fs/coda/file.c23
-rw-r--r--fs/direct-io.c3
-rw-r--r--fs/fuse/dev.c63
-rw-r--r--fs/gfs2/file.c28
-rw-r--r--fs/nfs/file.c25
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/ocfs2/file.c34
-rw-r--r--fs/ocfs2/ocfs2_trace.h2
-rw-r--r--fs/pipe.c13
-rw-r--r--fs/splice.c683
-rw-r--r--fs/xfs/xfs_file.c41
-rw-r--r--fs/xfs/xfs_trace.h1
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/pipe_fs_i.h59
-rw-r--r--include/linux/skbuff.h8
-rw-r--r--include/linux/splice.h3
-rw-r--r--include/linux/uio.h14
-rw-r--r--kernel/relay.c78
-rw-r--r--lib/iov_iter.c395
-rw-r--r--mm/shmem.c115
-rw-r--r--net/core/skbuff.c28
-rw-r--r--net/ipv4/tcp.c3
-rw-r--r--net/kcm/kcmsock.c16
-rw-r--r--net/unix/af_unix.c17
30 files changed, 746 insertions, 1077 deletions
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 5da47e26a012..8114744bf30c 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -889,7 +889,7 @@ static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
889 return 0; 889 return 0;
890 890
891 /* Try lock this page */ 891 /* Try lock this page */
892 if (buf->ops->steal(pipe, buf) == 0) { 892 if (pipe_buf_steal(pipe, buf) == 0) {
893 /* Get reference and unlock page for moving */ 893 /* Get reference and unlock page for moving */
894 get_page(buf->page); 894 get_page(buf->page);
895 unlock_page(buf->page); 895 unlock_page(buf->page);
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index 6e3a188baaae..d56863ff5866 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -1138,45 +1138,31 @@ restart:
1138 range_lock_init(&range, *ppos, *ppos + count - 1); 1138 range_lock_init(&range, *ppos, *ppos + count - 1);
1139 1139
1140 vio->vui_fd = LUSTRE_FPRIVATE(file); 1140 vio->vui_fd = LUSTRE_FPRIVATE(file);
1141 vio->vui_io_subtype = args->via_io_subtype; 1141 vio->vui_iter = args->u.normal.via_iter;
1142 vio->vui_iocb = args->u.normal.via_iocb;
1143 /*
1144 * Direct IO reads must also take range lock,
1145 * or multiple reads will try to work on the same pages
1146 * See LU-6227 for details.
1147 */
1148 if (((iot == CIT_WRITE) ||
1149 (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
1150 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1151 CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n",
1152 range.rl_node.in_extent.start,
1153 range.rl_node.in_extent.end);
1154 result = range_lock(&lli->lli_write_tree,
1155 &range);
1156 if (result < 0)
1157 goto out;
1142 1158
1143 switch (vio->vui_io_subtype) { 1159 range_locked = true;
1144 case IO_NORMAL:
1145 vio->vui_iter = args->u.normal.via_iter;
1146 vio->vui_iocb = args->u.normal.via_iocb;
1147 /*
1148 * Direct IO reads must also take range lock,
1149 * or multiple reads will try to work on the same pages
1150 * See LU-6227 for details.
1151 */
1152 if (((iot == CIT_WRITE) ||
1153 (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
1154 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1155 CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n",
1156 range.rl_node.in_extent.start,
1157 range.rl_node.in_extent.end);
1158 result = range_lock(&lli->lli_write_tree,
1159 &range);
1160 if (result < 0)
1161 goto out;
1162
1163 range_locked = true;
1164 }
1165 down_read(&lli->lli_trunc_sem);
1166 break;
1167 case IO_SPLICE:
1168 vio->u.splice.vui_pipe = args->u.splice.via_pipe;
1169 vio->u.splice.vui_flags = args->u.splice.via_flags;
1170 break;
1171 default:
1172 CERROR("Unknown IO type - %u\n", vio->vui_io_subtype);
1173 LBUG();
1174 } 1160 }
1161 down_read(&lli->lli_trunc_sem);
1175 ll_cl_add(file, env, io); 1162 ll_cl_add(file, env, io);
1176 result = cl_io_loop(env, io); 1163 result = cl_io_loop(env, io);
1177 ll_cl_remove(file, env); 1164 ll_cl_remove(file, env);
1178 if (args->via_io_subtype == IO_NORMAL) 1165 up_read(&lli->lli_trunc_sem);
1179 up_read(&lli->lli_trunc_sem);
1180 if (range_locked) { 1166 if (range_locked) {
1181 CDEBUG(D_VFSTRACE, "Range unlock [%llu, %llu]\n", 1167 CDEBUG(D_VFSTRACE, "Range unlock [%llu, %llu]\n",
1182 range.rl_node.in_extent.start, 1168 range.rl_node.in_extent.start,
@@ -1235,7 +1221,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
1235 if (IS_ERR(env)) 1221 if (IS_ERR(env))
1236 return PTR_ERR(env); 1222 return PTR_ERR(env);
1237 1223
1238 args = ll_env_args(env, IO_NORMAL); 1224 args = ll_env_args(env);
1239 args->u.normal.via_iter = to; 1225 args->u.normal.via_iter = to;
1240 args->u.normal.via_iocb = iocb; 1226 args->u.normal.via_iocb = iocb;
1241 1227
@@ -1259,7 +1245,7 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1259 if (IS_ERR(env)) 1245 if (IS_ERR(env))
1260 return PTR_ERR(env); 1246 return PTR_ERR(env);
1261 1247
1262 args = ll_env_args(env, IO_NORMAL); 1248 args = ll_env_args(env);
1263 args->u.normal.via_iter = from; 1249 args->u.normal.via_iter = from;
1264 args->u.normal.via_iocb = iocb; 1250 args->u.normal.via_iocb = iocb;
1265 1251
@@ -1269,31 +1255,6 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1269 return result; 1255 return result;
1270} 1256}
1271 1257
1272/*
1273 * Send file content (through pagecache) somewhere with helper
1274 */
1275static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1276 struct pipe_inode_info *pipe, size_t count,
1277 unsigned int flags)
1278{
1279 struct lu_env *env;
1280 struct vvp_io_args *args;
1281 ssize_t result;
1282 int refcheck;
1283
1284 env = cl_env_get(&refcheck);
1285 if (IS_ERR(env))
1286 return PTR_ERR(env);
1287
1288 args = ll_env_args(env, IO_SPLICE);
1289 args->u.splice.via_pipe = pipe;
1290 args->u.splice.via_flags = flags;
1291
1292 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1293 cl_env_put(env, &refcheck);
1294 return result;
1295}
1296
1297int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, 1258int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
1298 __u64 flags, struct lov_user_md *lum, 1259 __u64 flags, struct lov_user_md *lum,
1299 int lum_size) 1260 int lum_size)
@@ -3267,7 +3228,7 @@ struct file_operations ll_file_operations = {
3267 .release = ll_file_release, 3228 .release = ll_file_release,
3268 .mmap = ll_file_mmap, 3229 .mmap = ll_file_mmap,
3269 .llseek = ll_file_seek, 3230 .llseek = ll_file_seek,
3270 .splice_read = ll_file_splice_read, 3231 .splice_read = generic_file_splice_read,
3271 .fsync = ll_fsync, 3232 .fsync = ll_fsync,
3272 .flush = ll_flush 3233 .flush = ll_flush
3273}; 3234};
@@ -3280,7 +3241,7 @@ struct file_operations ll_file_operations_flock = {
3280 .release = ll_file_release, 3241 .release = ll_file_release,
3281 .mmap = ll_file_mmap, 3242 .mmap = ll_file_mmap,
3282 .llseek = ll_file_seek, 3243 .llseek = ll_file_seek,
3283 .splice_read = ll_file_splice_read, 3244 .splice_read = generic_file_splice_read,
3284 .fsync = ll_fsync, 3245 .fsync = ll_fsync,
3285 .flush = ll_flush, 3246 .flush = ll_flush,
3286 .flock = ll_file_flock, 3247 .flock = ll_file_flock,
@@ -3296,7 +3257,7 @@ struct file_operations ll_file_operations_noflock = {
3296 .release = ll_file_release, 3257 .release = ll_file_release,
3297 .mmap = ll_file_mmap, 3258 .mmap = ll_file_mmap,
3298 .llseek = ll_file_seek, 3259 .llseek = ll_file_seek,
3299 .splice_read = ll_file_splice_read, 3260 .splice_read = generic_file_splice_read,
3300 .fsync = ll_fsync, 3261 .fsync = ll_fsync,
3301 .flush = ll_flush, 3262 .flush = ll_flush,
3302 .flock = ll_file_noflock, 3263 .flock = ll_file_noflock,
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index 3e98bd685061..4bc551279aa4 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -908,17 +908,11 @@ void vvp_write_complete(struct vvp_object *club, struct vvp_page *page);
908 */ 908 */
909struct vvp_io_args { 909struct vvp_io_args {
910 /** normal/splice */ 910 /** normal/splice */
911 enum vvp_io_subtype via_io_subtype;
912
913 union { 911 union {
914 struct { 912 struct {
915 struct kiocb *via_iocb; 913 struct kiocb *via_iocb;
916 struct iov_iter *via_iter; 914 struct iov_iter *via_iter;
917 } normal; 915 } normal;
918 struct {
919 struct pipe_inode_info *via_pipe;
920 unsigned int via_flags;
921 } splice;
922 } u; 916 } u;
923}; 917};
924 918
@@ -946,14 +940,9 @@ static inline struct ll_thread_info *ll_env_info(const struct lu_env *env)
946 return lti; 940 return lti;
947} 941}
948 942
949static inline struct vvp_io_args *ll_env_args(const struct lu_env *env, 943static inline struct vvp_io_args *ll_env_args(const struct lu_env *env)
950 enum vvp_io_subtype type)
951{ 944{
952 struct vvp_io_args *via = &ll_env_info(env)->lti_args; 945 return &ll_env_info(env)->lti_args;
953
954 via->via_io_subtype = type;
955
956 return via;
957} 946}
958 947
959void ll_queue_done_writing(struct inode *inode, unsigned long flags); 948void ll_queue_done_writing(struct inode *inode, unsigned long flags);
diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h
index 5802da81cd0e..4464ad258387 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_internal.h
+++ b/drivers/staging/lustre/lustre/llite/vvp_internal.h
@@ -49,14 +49,6 @@ struct obd_device;
49struct obd_export; 49struct obd_export;
50struct page; 50struct page;
51 51
52/* specific architecture can implement only part of this list */
53enum vvp_io_subtype {
54 /** normal IO */
55 IO_NORMAL,
56 /** io started from splice_{read|write} */
57 IO_SPLICE
58};
59
60/** 52/**
61 * IO state private to IO state private to VVP layer. 53 * IO state private to IO state private to VVP layer.
62 */ 54 */
@@ -99,10 +91,6 @@ struct vvp_io {
99 bool ft_flags_valid; 91 bool ft_flags_valid;
100 } fault; 92 } fault;
101 struct { 93 struct {
102 struct pipe_inode_info *vui_pipe;
103 unsigned int vui_flags;
104 } splice;
105 struct {
106 struct cl_page_list vui_queue; 94 struct cl_page_list vui_queue;
107 unsigned long vui_written; 95 unsigned long vui_written;
108 int vui_from; 96 int vui_from;
@@ -110,8 +98,6 @@ struct vvp_io {
110 } write; 98 } write;
111 } u; 99 } u;
112 100
113 enum vvp_io_subtype vui_io_subtype;
114
115 /** 101 /**
116 * Layout version when this IO is initialized 102 * Layout version when this IO is initialized
117 */ 103 */
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index 2ab450359b6d..2b7f182a15e2 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -54,18 +54,6 @@ static struct vvp_io *cl2vvp_io(const struct lu_env *env,
54} 54}
55 55
56/** 56/**
57 * True, if \a io is a normal io, False for splice_{read,write}
58 */
59static int cl_is_normalio(const struct lu_env *env, const struct cl_io *io)
60{
61 struct vvp_io *vio = vvp_env_io(env);
62
63 LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
64
65 return vio->vui_io_subtype == IO_NORMAL;
66}
67
68/**
69 * For swapping layout. The file's layout may have changed. 57 * For swapping layout. The file's layout may have changed.
70 * To avoid populating pages to a wrong stripe, we have to verify the 58 * To avoid populating pages to a wrong stripe, we have to verify the
71 * correctness of layout. It works because swapping layout processes 59 * correctness of layout. It works because swapping layout processes
@@ -390,9 +378,6 @@ static int vvp_mmap_locks(const struct lu_env *env,
390 378
391 LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); 379 LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
392 380
393 if (!cl_is_normalio(env, io))
394 return 0;
395
396 if (!vio->vui_iter) /* nfs or loop back device write */ 381 if (!vio->vui_iter) /* nfs or loop back device write */
397 return 0; 382 return 0;
398 383
@@ -461,15 +446,10 @@ static void vvp_io_advance(const struct lu_env *env,
461 const struct cl_io_slice *ios, 446 const struct cl_io_slice *ios,
462 size_t nob) 447 size_t nob)
463{ 448{
464 struct vvp_io *vio = cl2vvp_io(env, ios);
465 struct cl_io *io = ios->cis_io;
466 struct cl_object *obj = ios->cis_io->ci_obj; 449 struct cl_object *obj = ios->cis_io->ci_obj;
467 450 struct vvp_io *vio = cl2vvp_io(env, ios);
468 CLOBINVRNT(env, obj, vvp_object_invariant(obj)); 451 CLOBINVRNT(env, obj, vvp_object_invariant(obj));
469 452
470 if (!cl_is_normalio(env, io))
471 return;
472
473 iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count -= nob); 453 iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count -= nob);
474} 454}
475 455
@@ -478,7 +458,7 @@ static void vvp_io_update_iov(const struct lu_env *env,
478{ 458{
479 size_t size = io->u.ci_rw.crw_count; 459 size_t size = io->u.ci_rw.crw_count;
480 460
481 if (!cl_is_normalio(env, io) || !vio->vui_iter) 461 if (!vio->vui_iter)
482 return; 462 return;
483 463
484 iov_iter_truncate(vio->vui_iter, size); 464 iov_iter_truncate(vio->vui_iter, size);
@@ -715,25 +695,8 @@ static int vvp_io_read_start(const struct lu_env *env,
715 695
716 /* BUG: 5972 */ 696 /* BUG: 5972 */
717 file_accessed(file); 697 file_accessed(file);
718 switch (vio->vui_io_subtype) { 698 LASSERT(vio->vui_iocb->ki_pos == pos);
719 case IO_NORMAL: 699 result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter);
720 LASSERT(vio->vui_iocb->ki_pos == pos);
721 result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter);
722 break;
723 case IO_SPLICE:
724 result = generic_file_splice_read(file, &pos,
725 vio->u.splice.vui_pipe, cnt,
726 vio->u.splice.vui_flags);
727 /* LU-1109: do splice read stripe by stripe otherwise if it
728 * may make nfsd stuck if this read occupied all internal pipe
729 * buffers.
730 */
731 io->ci_continue = 0;
732 break;
733 default:
734 CERROR("Wrong IO type %u\n", vio->vui_io_subtype);
735 LBUG();
736 }
737 700
738out: 701out:
739 if (result >= 0) { 702 if (result >= 0) {
diff --git a/fs/coda/file.c b/fs/coda/file.c
index f47c7483863b..8415d4f8d1a1 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -38,27 +38,6 @@ coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
38} 38}
39 39
40static ssize_t 40static ssize_t
41coda_file_splice_read(struct file *coda_file, loff_t *ppos,
42 struct pipe_inode_info *pipe, size_t count,
43 unsigned int flags)
44{
45 ssize_t (*splice_read)(struct file *, loff_t *,
46 struct pipe_inode_info *, size_t, unsigned int);
47 struct coda_file_info *cfi;
48 struct file *host_file;
49
50 cfi = CODA_FTOC(coda_file);
51 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
52 host_file = cfi->cfi_container;
53
54 splice_read = host_file->f_op->splice_read;
55 if (!splice_read)
56 splice_read = default_file_splice_read;
57
58 return splice_read(host_file, ppos, pipe, count, flags);
59}
60
61static ssize_t
62coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to) 41coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
63{ 42{
64 struct file *coda_file = iocb->ki_filp; 43 struct file *coda_file = iocb->ki_filp;
@@ -225,6 +204,6 @@ const struct file_operations coda_file_operations = {
225 .open = coda_open, 204 .open = coda_open,
226 .release = coda_release, 205 .release = coda_release,
227 .fsync = coda_fsync, 206 .fsync = coda_fsync,
228 .splice_read = coda_file_splice_read, 207 .splice_read = generic_file_splice_read,
229}; 208};
230 209
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 7c3ce73cb617..fb9aa16a7727 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -246,6 +246,9 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
246 if ((dio->op == REQ_OP_READ) && 246 if ((dio->op == REQ_OP_READ) &&
247 ((offset + transferred) > dio->i_size)) 247 ((offset + transferred) > dio->i_size))
248 transferred = dio->i_size - offset; 248 transferred = dio->i_size - offset;
249 /* ignore EFAULT if some IO has been done */
250 if (unlikely(ret == -EFAULT) && transferred)
251 ret = 0;
249 } 252 }
250 253
251 if (ret == 0) 254 if (ret == 0)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index c41bde26c338..70ea57c7b6bb 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -728,7 +728,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
728 struct pipe_buffer *buf = cs->pipebufs; 728 struct pipe_buffer *buf = cs->pipebufs;
729 729
730 if (!cs->write) { 730 if (!cs->write) {
731 err = buf->ops->confirm(cs->pipe, buf); 731 err = pipe_buf_confirm(cs->pipe, buf);
732 if (err) 732 if (err)
733 return err; 733 return err;
734 734
@@ -827,7 +827,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
827 827
828 fuse_copy_finish(cs); 828 fuse_copy_finish(cs);
829 829
830 err = buf->ops->confirm(cs->pipe, buf); 830 err = pipe_buf_confirm(cs->pipe, buf);
831 if (err) 831 if (err)
832 return err; 832 return err;
833 833
@@ -840,7 +840,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
840 if (cs->len != PAGE_SIZE) 840 if (cs->len != PAGE_SIZE)
841 goto out_fallback; 841 goto out_fallback;
842 842
843 if (buf->ops->steal(cs->pipe, buf) != 0) 843 if (pipe_buf_steal(cs->pipe, buf) != 0)
844 goto out_fallback; 844 goto out_fallback;
845 845
846 newpage = buf->page; 846 newpage = buf->page;
@@ -1341,9 +1341,8 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1341 struct pipe_inode_info *pipe, 1341 struct pipe_inode_info *pipe,
1342 size_t len, unsigned int flags) 1342 size_t len, unsigned int flags)
1343{ 1343{
1344 int ret; 1344 int total, ret;
1345 int page_nr = 0; 1345 int page_nr = 0;
1346 int do_wakeup = 0;
1347 struct pipe_buffer *bufs; 1346 struct pipe_buffer *bufs;
1348 struct fuse_copy_state cs; 1347 struct fuse_copy_state cs;
1349 struct fuse_dev *fud = fuse_get_dev(in); 1348 struct fuse_dev *fud = fuse_get_dev(in);
@@ -1362,52 +1361,23 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1362 if (ret < 0) 1361 if (ret < 0)
1363 goto out; 1362 goto out;
1364 1363
1365 ret = 0;
1366 pipe_lock(pipe);
1367
1368 if (!pipe->readers) {
1369 send_sig(SIGPIPE, current, 0);
1370 if (!ret)
1371 ret = -EPIPE;
1372 goto out_unlock;
1373 }
1374
1375 if (pipe->nrbufs + cs.nr_segs > pipe->buffers) { 1364 if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
1376 ret = -EIO; 1365 ret = -EIO;
1377 goto out_unlock; 1366 goto out;
1378 } 1367 }
1379 1368
1380 while (page_nr < cs.nr_segs) { 1369 for (ret = total = 0; page_nr < cs.nr_segs; total += ret) {
1381 int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1382 struct pipe_buffer *buf = pipe->bufs + newbuf;
1383
1384 buf->page = bufs[page_nr].page;
1385 buf->offset = bufs[page_nr].offset;
1386 buf->len = bufs[page_nr].len;
1387 /* 1370 /*
1388 * Need to be careful about this. Having buf->ops in module 1371 * Need to be careful about this. Having buf->ops in module
1389 * code can Oops if the buffer persists after module unload. 1372 * code can Oops if the buffer persists after module unload.
1390 */ 1373 */
1391 buf->ops = &nosteal_pipe_buf_ops; 1374 bufs[page_nr].ops = &nosteal_pipe_buf_ops;
1392 1375 ret = add_to_pipe(pipe, &bufs[page_nr++]);
1393 pipe->nrbufs++; 1376 if (unlikely(ret < 0))
1394 page_nr++; 1377 break;
1395 ret += buf->len;
1396
1397 if (pipe->files)
1398 do_wakeup = 1;
1399 }
1400
1401out_unlock:
1402 pipe_unlock(pipe);
1403
1404 if (do_wakeup) {
1405 smp_mb();
1406 if (waitqueue_active(&pipe->wait))
1407 wake_up_interruptible(&pipe->wait);
1408 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
1409 } 1378 }
1410 1379 if (total)
1380 ret = total;
1411out: 1381out:
1412 for (; page_nr < cs.nr_segs; page_nr++) 1382 for (; page_nr < cs.nr_segs; page_nr++)
1413 put_page(bufs[page_nr].page); 1383 put_page(bufs[page_nr].page);
@@ -1992,7 +1962,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1992 pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); 1962 pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
1993 pipe->nrbufs--; 1963 pipe->nrbufs--;
1994 } else { 1964 } else {
1995 ibuf->ops->get(pipe, ibuf); 1965 pipe_buf_get(pipe, ibuf);
1996 *obuf = *ibuf; 1966 *obuf = *ibuf;
1997 obuf->flags &= ~PIPE_BUF_FLAG_GIFT; 1967 obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1998 obuf->len = rem; 1968 obuf->len = rem;
@@ -2014,10 +1984,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
2014 1984
2015 ret = fuse_dev_do_write(fud, &cs, len); 1985 ret = fuse_dev_do_write(fud, &cs, len);
2016 1986
2017 for (idx = 0; idx < nbuf; idx++) { 1987 for (idx = 0; idx < nbuf; idx++)
2018 struct pipe_buffer *buf = &bufs[idx]; 1988 pipe_buf_release(pipe, &bufs[idx]);
2019 buf->ops->release(pipe, buf); 1989
2020 }
2021out: 1990out:
2022 kfree(bufs); 1991 kfree(bufs);
2023 return ret; 1992 return ret;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 360188f162bd..e23ff70b3435 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -954,30 +954,6 @@ out_uninit:
954 return ret; 954 return ret;
955} 955}
956 956
957static ssize_t gfs2_file_splice_read(struct file *in, loff_t *ppos,
958 struct pipe_inode_info *pipe, size_t len,
959 unsigned int flags)
960{
961 struct inode *inode = in->f_mapping->host;
962 struct gfs2_inode *ip = GFS2_I(inode);
963 struct gfs2_holder gh;
964 int ret;
965
966 inode_lock(inode);
967
968 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
969 if (ret) {
970 inode_unlock(inode);
971 return ret;
972 }
973
974 gfs2_glock_dq_uninit(&gh);
975 inode_unlock(inode);
976
977 return generic_file_splice_read(in, ppos, pipe, len, flags);
978}
979
980
981static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, 957static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
982 struct file *out, loff_t *ppos, 958 struct file *out, loff_t *ppos,
983 size_t len, unsigned int flags) 959 size_t len, unsigned int flags)
@@ -1140,7 +1116,7 @@ const struct file_operations gfs2_file_fops = {
1140 .fsync = gfs2_fsync, 1116 .fsync = gfs2_fsync,
1141 .lock = gfs2_lock, 1117 .lock = gfs2_lock,
1142 .flock = gfs2_flock, 1118 .flock = gfs2_flock,
1143 .splice_read = gfs2_file_splice_read, 1119 .splice_read = generic_file_splice_read,
1144 .splice_write = gfs2_file_splice_write, 1120 .splice_write = gfs2_file_splice_write,
1145 .setlease = simple_nosetlease, 1121 .setlease = simple_nosetlease,
1146 .fallocate = gfs2_fallocate, 1122 .fallocate = gfs2_fallocate,
@@ -1168,7 +1144,7 @@ const struct file_operations gfs2_file_fops_nolock = {
1168 .open = gfs2_open, 1144 .open = gfs2_open,
1169 .release = gfs2_release, 1145 .release = gfs2_release,
1170 .fsync = gfs2_fsync, 1146 .fsync = gfs2_fsync,
1171 .splice_read = gfs2_file_splice_read, 1147 .splice_read = generic_file_splice_read,
1172 .splice_write = gfs2_file_splice_write, 1148 .splice_write = gfs2_file_splice_write,
1173 .setlease = generic_setlease, 1149 .setlease = generic_setlease,
1174 .fallocate = gfs2_fallocate, 1150 .fallocate = gfs2_fallocate,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index ca699ddc11c1..2efbdde36c3e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -182,29 +182,6 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
182} 182}
183EXPORT_SYMBOL_GPL(nfs_file_read); 183EXPORT_SYMBOL_GPL(nfs_file_read);
184 184
185ssize_t
186nfs_file_splice_read(struct file *filp, loff_t *ppos,
187 struct pipe_inode_info *pipe, size_t count,
188 unsigned int flags)
189{
190 struct inode *inode = file_inode(filp);
191 ssize_t res;
192
193 dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
194 filp, (unsigned long) count, (unsigned long long) *ppos);
195
196 nfs_start_io_read(inode);
197 res = nfs_revalidate_mapping(inode, filp->f_mapping);
198 if (!res) {
199 res = generic_file_splice_read(filp, ppos, pipe, count, flags);
200 if (res > 0)
201 nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res);
202 }
203 nfs_end_io_read(inode);
204 return res;
205}
206EXPORT_SYMBOL_GPL(nfs_file_splice_read);
207
208int 185int
209nfs_file_mmap(struct file * file, struct vm_area_struct * vma) 186nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
210{ 187{
@@ -871,7 +848,7 @@ const struct file_operations nfs_file_operations = {
871 .fsync = nfs_file_fsync, 848 .fsync = nfs_file_fsync,
872 .lock = nfs_lock, 849 .lock = nfs_lock,
873 .flock = nfs_flock, 850 .flock = nfs_flock,
874 .splice_read = nfs_file_splice_read, 851 .splice_read = generic_file_splice_read,
875 .splice_write = iter_file_splice_write, 852 .splice_write = iter_file_splice_write,
876 .check_flags = nfs_check_flags, 853 .check_flags = nfs_check_flags,
877 .setlease = simple_nosetlease, 854 .setlease = simple_nosetlease,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 74935a19e4bf..d7b062bdc504 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -365,8 +365,6 @@ int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *)
365int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); 365int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync);
366loff_t nfs_file_llseek(struct file *, loff_t, int); 366loff_t nfs_file_llseek(struct file *, loff_t, int);
367ssize_t nfs_file_read(struct kiocb *, struct iov_iter *); 367ssize_t nfs_file_read(struct kiocb *, struct iov_iter *);
368ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *,
369 size_t, unsigned int);
370int nfs_file_mmap(struct file *, struct vm_area_struct *); 368int nfs_file_mmap(struct file *, struct vm_area_struct *);
371ssize_t nfs_file_write(struct kiocb *, struct iov_iter *); 369ssize_t nfs_file_write(struct kiocb *, struct iov_iter *);
372int nfs_file_release(struct inode *, struct file *); 370int nfs_file_release(struct inode *, struct file *);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index d085ad794884..89a77950e0b0 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -248,7 +248,7 @@ const struct file_operations nfs4_file_operations = {
248 .fsync = nfs_file_fsync, 248 .fsync = nfs_file_fsync,
249 .lock = nfs_lock, 249 .lock = nfs_lock,
250 .flock = nfs_flock, 250 .flock = nfs_flock,
251 .splice_read = nfs_file_splice_read, 251 .splice_read = generic_file_splice_read,
252 .splice_write = iter_file_splice_write, 252 .splice_write = iter_file_splice_write,
253 .check_flags = nfs_check_flags, 253 .check_flags = nfs_check_flags,
254 .setlease = simple_nosetlease, 254 .setlease = simple_nosetlease,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 0b055bfb8e86..8f91639f8364 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2321,36 +2321,6 @@ out_mutex:
2321 return ret; 2321 return ret;
2322} 2322}
2323 2323
2324static ssize_t ocfs2_file_splice_read(struct file *in,
2325 loff_t *ppos,
2326 struct pipe_inode_info *pipe,
2327 size_t len,
2328 unsigned int flags)
2329{
2330 int ret = 0, lock_level = 0;
2331 struct inode *inode = file_inode(in);
2332
2333 trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry,
2334 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2335 in->f_path.dentry->d_name.len,
2336 in->f_path.dentry->d_name.name, len);
2337
2338 /*
2339 * See the comment in ocfs2_file_read_iter()
2340 */
2341 ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level);
2342 if (ret < 0) {
2343 mlog_errno(ret);
2344 goto bail;
2345 }
2346 ocfs2_inode_unlock(inode, lock_level);
2347
2348 ret = generic_file_splice_read(in, ppos, pipe, len, flags);
2349
2350bail:
2351 return ret;
2352}
2353
2354static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, 2324static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
2355 struct iov_iter *to) 2325 struct iov_iter *to)
2356{ 2326{
@@ -2509,7 +2479,7 @@ const struct file_operations ocfs2_fops = {
2509#endif 2479#endif
2510 .lock = ocfs2_lock, 2480 .lock = ocfs2_lock,
2511 .flock = ocfs2_flock, 2481 .flock = ocfs2_flock,
2512 .splice_read = ocfs2_file_splice_read, 2482 .splice_read = generic_file_splice_read,
2513 .splice_write = iter_file_splice_write, 2483 .splice_write = iter_file_splice_write,
2514 .fallocate = ocfs2_fallocate, 2484 .fallocate = ocfs2_fallocate,
2515}; 2485};
@@ -2554,7 +2524,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
2554 .compat_ioctl = ocfs2_compat_ioctl, 2524 .compat_ioctl = ocfs2_compat_ioctl,
2555#endif 2525#endif
2556 .flock = ocfs2_flock, 2526 .flock = ocfs2_flock,
2557 .splice_read = ocfs2_file_splice_read, 2527 .splice_read = generic_file_splice_read,
2558 .splice_write = iter_file_splice_write, 2528 .splice_write = iter_file_splice_write,
2559 .fallocate = ocfs2_fallocate, 2529 .fallocate = ocfs2_fallocate,
2560}; 2530};
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index f8f5fc5e6c05..0b58abcf1c6d 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -1314,8 +1314,6 @@ DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_write);
1314 1314
1315DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write); 1315DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write);
1316 1316
1317DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_read);
1318
1319DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_read); 1317DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_read);
1320 1318
1321DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file); 1319DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file);
diff --git a/fs/pipe.c b/fs/pipe.c
index 4ebe6b2e5217..4fc422f0dea8 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -267,7 +267,6 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
267 if (bufs) { 267 if (bufs) {
268 int curbuf = pipe->curbuf; 268 int curbuf = pipe->curbuf;
269 struct pipe_buffer *buf = pipe->bufs + curbuf; 269 struct pipe_buffer *buf = pipe->bufs + curbuf;
270 const struct pipe_buf_operations *ops = buf->ops;
271 size_t chars = buf->len; 270 size_t chars = buf->len;
272 size_t written; 271 size_t written;
273 int error; 272 int error;
@@ -275,7 +274,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
275 if (chars > total_len) 274 if (chars > total_len)
276 chars = total_len; 275 chars = total_len;
277 276
278 error = ops->confirm(pipe, buf); 277 error = pipe_buf_confirm(pipe, buf);
279 if (error) { 278 if (error) {
280 if (!ret) 279 if (!ret)
281 ret = error; 280 ret = error;
@@ -299,8 +298,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
299 } 298 }
300 299
301 if (!buf->len) { 300 if (!buf->len) {
302 buf->ops = NULL; 301 pipe_buf_release(pipe, buf);
303 ops->release(pipe, buf);
304 curbuf = (curbuf + 1) & (pipe->buffers - 1); 302 curbuf = (curbuf + 1) & (pipe->buffers - 1);
305 pipe->curbuf = curbuf; 303 pipe->curbuf = curbuf;
306 pipe->nrbufs = --bufs; 304 pipe->nrbufs = --bufs;
@@ -383,11 +381,10 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
383 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & 381 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
384 (pipe->buffers - 1); 382 (pipe->buffers - 1);
385 struct pipe_buffer *buf = pipe->bufs + lastbuf; 383 struct pipe_buffer *buf = pipe->bufs + lastbuf;
386 const struct pipe_buf_operations *ops = buf->ops;
387 int offset = buf->offset + buf->len; 384 int offset = buf->offset + buf->len;
388 385
389 if (ops->can_merge && offset + chars <= PAGE_SIZE) { 386 if (buf->ops->can_merge && offset + chars <= PAGE_SIZE) {
390 ret = ops->confirm(pipe, buf); 387 ret = pipe_buf_confirm(pipe, buf);
391 if (ret) 388 if (ret)
392 goto out; 389 goto out;
393 390
@@ -664,7 +661,7 @@ void free_pipe_info(struct pipe_inode_info *pipe)
664 for (i = 0; i < pipe->buffers; i++) { 661 for (i = 0; i < pipe->buffers; i++) {
665 struct pipe_buffer *buf = pipe->bufs + i; 662 struct pipe_buffer *buf = pipe->bufs + i;
666 if (buf->ops) 663 if (buf->ops)
667 buf->ops->release(pipe, buf); 664 pipe_buf_release(pipe, buf);
668 } 665 }
669 if (pipe->tmp_page) 666 if (pipe->tmp_page)
670 __free_page(pipe->tmp_page); 667 __free_page(pipe->tmp_page);
diff --git a/fs/splice.c b/fs/splice.c
index dd9bf7e410d2..aa38901a4f10 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -183,82 +183,39 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
183 struct splice_pipe_desc *spd) 183 struct splice_pipe_desc *spd)
184{ 184{
185 unsigned int spd_pages = spd->nr_pages; 185 unsigned int spd_pages = spd->nr_pages;
186 int ret, do_wakeup, page_nr; 186 int ret = 0, page_nr = 0;
187 187
188 if (!spd_pages) 188 if (!spd_pages)
189 return 0; 189 return 0;
190 190
191 ret = 0; 191 if (unlikely(!pipe->readers)) {
192 do_wakeup = 0; 192 send_sig(SIGPIPE, current, 0);
193 page_nr = 0; 193 ret = -EPIPE;
194 194 goto out;
195 pipe_lock(pipe); 195 }
196
197 for (;;) {
198 if (!pipe->readers) {
199 send_sig(SIGPIPE, current, 0);
200 if (!ret)
201 ret = -EPIPE;
202 break;
203 }
204
205 if (pipe->nrbufs < pipe->buffers) {
206 int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
207 struct pipe_buffer *buf = pipe->bufs + newbuf;
208
209 buf->page = spd->pages[page_nr];
210 buf->offset = spd->partial[page_nr].offset;
211 buf->len = spd->partial[page_nr].len;
212 buf->private = spd->partial[page_nr].private;
213 buf->ops = spd->ops;
214 if (spd->flags & SPLICE_F_GIFT)
215 buf->flags |= PIPE_BUF_FLAG_GIFT;
216
217 pipe->nrbufs++;
218 page_nr++;
219 ret += buf->len;
220
221 if (pipe->files)
222 do_wakeup = 1;
223 196
224 if (!--spd->nr_pages) 197 while (pipe->nrbufs < pipe->buffers) {
225 break; 198 int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
226 if (pipe->nrbufs < pipe->buffers) 199 struct pipe_buffer *buf = pipe->bufs + newbuf;
227 continue;
228 200
229 break; 201 buf->page = spd->pages[page_nr];
230 } 202 buf->offset = spd->partial[page_nr].offset;
203 buf->len = spd->partial[page_nr].len;
204 buf->private = spd->partial[page_nr].private;
205 buf->ops = spd->ops;
231 206
232 if (spd->flags & SPLICE_F_NONBLOCK) { 207 pipe->nrbufs++;
233 if (!ret) 208 page_nr++;
234 ret = -EAGAIN; 209 ret += buf->len;
235 break;
236 }
237 210
238 if (signal_pending(current)) { 211 if (!--spd->nr_pages)
239 if (!ret)
240 ret = -ERESTARTSYS;
241 break; 212 break;
242 }
243
244 if (do_wakeup) {
245 smp_mb();
246 if (waitqueue_active(&pipe->wait))
247 wake_up_interruptible_sync(&pipe->wait);
248 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
249 do_wakeup = 0;
250 }
251
252 pipe->waiting_writers++;
253 pipe_wait(pipe);
254 pipe->waiting_writers--;
255 } 213 }
256 214
257 pipe_unlock(pipe); 215 if (!ret)
258 216 ret = -EAGAIN;
259 if (do_wakeup)
260 wakeup_pipe_readers(pipe);
261 217
218out:
262 while (page_nr < spd_pages) 219 while (page_nr < spd_pages)
263 spd->spd_release(spd, page_nr++); 220 spd->spd_release(spd, page_nr++);
264 221
@@ -266,6 +223,26 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
266} 223}
267EXPORT_SYMBOL_GPL(splice_to_pipe); 224EXPORT_SYMBOL_GPL(splice_to_pipe);
268 225
226ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
227{
228 int ret;
229
230 if (unlikely(!pipe->readers)) {
231 send_sig(SIGPIPE, current, 0);
232 ret = -EPIPE;
233 } else if (pipe->nrbufs == pipe->buffers) {
234 ret = -EAGAIN;
235 } else {
236 int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
237 pipe->bufs[newbuf] = *buf;
238 pipe->nrbufs++;
239 return buf->len;
240 }
241 pipe_buf_release(pipe, buf);
242 return ret;
243}
244EXPORT_SYMBOL(add_to_pipe);
245
269void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) 246void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
270{ 247{
271 put_page(spd->pages[i]); 248 put_page(spd->pages[i]);
@@ -303,207 +280,6 @@ void splice_shrink_spd(struct splice_pipe_desc *spd)
303 kfree(spd->partial); 280 kfree(spd->partial);
304} 281}
305 282
306static int
307__generic_file_splice_read(struct file *in, loff_t *ppos,
308 struct pipe_inode_info *pipe, size_t len,
309 unsigned int flags)
310{
311 struct address_space *mapping = in->f_mapping;
312 unsigned int loff, nr_pages, req_pages;
313 struct page *pages[PIPE_DEF_BUFFERS];
314 struct partial_page partial[PIPE_DEF_BUFFERS];
315 struct page *page;
316 pgoff_t index, end_index;
317 loff_t isize;
318 int error, page_nr;
319 struct splice_pipe_desc spd = {
320 .pages = pages,
321 .partial = partial,
322 .nr_pages_max = PIPE_DEF_BUFFERS,
323 .flags = flags,
324 .ops = &page_cache_pipe_buf_ops,
325 .spd_release = spd_release_page,
326 };
327
328 if (splice_grow_spd(pipe, &spd))
329 return -ENOMEM;
330
331 index = *ppos >> PAGE_SHIFT;
332 loff = *ppos & ~PAGE_MASK;
333 req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT;
334 nr_pages = min(req_pages, spd.nr_pages_max);
335
336 /*
337 * Lookup the (hopefully) full range of pages we need.
338 */
339 spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages);
340 index += spd.nr_pages;
341
342 /*
343 * If find_get_pages_contig() returned fewer pages than we needed,
344 * readahead/allocate the rest and fill in the holes.
345 */
346 if (spd.nr_pages < nr_pages)
347 page_cache_sync_readahead(mapping, &in->f_ra, in,
348 index, req_pages - spd.nr_pages);
349
350 error = 0;
351 while (spd.nr_pages < nr_pages) {
352 /*
353 * Page could be there, find_get_pages_contig() breaks on
354 * the first hole.
355 */
356 page = find_get_page(mapping, index);
357 if (!page) {
358 /*
359 * page didn't exist, allocate one.
360 */
361 page = page_cache_alloc_cold(mapping);
362 if (!page)
363 break;
364
365 error = add_to_page_cache_lru(page, mapping, index,
366 mapping_gfp_constraint(mapping, GFP_KERNEL));
367 if (unlikely(error)) {
368 put_page(page);
369 if (error == -EEXIST)
370 continue;
371 break;
372 }
373 /*
374 * add_to_page_cache() locks the page, unlock it
375 * to avoid convoluting the logic below even more.
376 */
377 unlock_page(page);
378 }
379
380 spd.pages[spd.nr_pages++] = page;
381 index++;
382 }
383
384 /*
385 * Now loop over the map and see if we need to start IO on any
386 * pages, fill in the partial map, etc.
387 */
388 index = *ppos >> PAGE_SHIFT;
389 nr_pages = spd.nr_pages;
390 spd.nr_pages = 0;
391 for (page_nr = 0; page_nr < nr_pages; page_nr++) {
392 unsigned int this_len;
393
394 if (!len)
395 break;
396
397 /*
398 * this_len is the max we'll use from this page
399 */
400 this_len = min_t(unsigned long, len, PAGE_SIZE - loff);
401 page = spd.pages[page_nr];
402
403 if (PageReadahead(page))
404 page_cache_async_readahead(mapping, &in->f_ra, in,
405 page, index, req_pages - page_nr);
406
407 /*
408 * If the page isn't uptodate, we may need to start io on it
409 */
410 if (!PageUptodate(page)) {
411 lock_page(page);
412
413 /*
414 * Page was truncated, or invalidated by the
415 * filesystem. Redo the find/create, but this time the
416 * page is kept locked, so there's no chance of another
417 * race with truncate/invalidate.
418 */
419 if (!page->mapping) {
420 unlock_page(page);
421retry_lookup:
422 page = find_or_create_page(mapping, index,
423 mapping_gfp_mask(mapping));
424
425 if (!page) {
426 error = -ENOMEM;
427 break;
428 }
429 put_page(spd.pages[page_nr]);
430 spd.pages[page_nr] = page;
431 }
432 /*
433 * page was already under io and is now done, great
434 */
435 if (PageUptodate(page)) {
436 unlock_page(page);
437 goto fill_it;
438 }
439
440 /*
441 * need to read in the page
442 */
443 error = mapping->a_ops->readpage(in, page);
444 if (unlikely(error)) {
445 /*
446 * Re-lookup the page
447 */
448 if (error == AOP_TRUNCATED_PAGE)
449 goto retry_lookup;
450
451 break;
452 }
453 }
454fill_it:
455 /*
456 * i_size must be checked after PageUptodate.
457 */
458 isize = i_size_read(mapping->host);
459 end_index = (isize - 1) >> PAGE_SHIFT;
460 if (unlikely(!isize || index > end_index))
461 break;
462
463 /*
464 * if this is the last page, see if we need to shrink
465 * the length and stop
466 */
467 if (end_index == index) {
468 unsigned int plen;
469
470 /*
471 * max good bytes in this page
472 */
473 plen = ((isize - 1) & ~PAGE_MASK) + 1;
474 if (plen <= loff)
475 break;
476
477 /*
478 * force quit after adding this page
479 */
480 this_len = min(this_len, plen - loff);
481 len = this_len;
482 }
483
484 spd.partial[page_nr].offset = loff;
485 spd.partial[page_nr].len = this_len;
486 len -= this_len;
487 loff = 0;
488 spd.nr_pages++;
489 index++;
490 }
491
492 /*
493 * Release any pages at the end, if we quit early. 'page_nr' is how far
494 * we got, 'nr_pages' is how many pages are in the map.
495 */
496 while (page_nr < nr_pages)
497 put_page(spd.pages[page_nr++]);
498 in->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
499
500 if (spd.nr_pages)
501 error = splice_to_pipe(pipe, &spd);
502
503 splice_shrink_spd(&spd);
504 return error;
505}
506
507/** 283/**
508 * generic_file_splice_read - splice data from file to a pipe 284 * generic_file_splice_read - splice data from file to a pipe
509 * @in: file to splice from 285 * @in: file to splice from
@@ -514,39 +290,53 @@ fill_it:
514 * 290 *
515 * Description: 291 * Description:
516 * Will read pages from given file and fill them into a pipe. Can be 292 * Will read pages from given file and fill them into a pipe. Can be
517 * used as long as the address_space operations for the source implements 293 * used as long as it has more or less sane ->read_iter().
518 * a readpage() hook.
519 * 294 *
520 */ 295 */
521ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, 296ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
522 struct pipe_inode_info *pipe, size_t len, 297 struct pipe_inode_info *pipe, size_t len,
523 unsigned int flags) 298 unsigned int flags)
524{ 299{
525 loff_t isize, left; 300 struct iov_iter to;
526 int ret; 301 struct kiocb kiocb;
527 302 loff_t isize;
528 if (IS_DAX(in->f_mapping->host)) 303 int idx, ret;
529 return default_file_splice_read(in, ppos, pipe, len, flags);
530 304
531 isize = i_size_read(in->f_mapping->host); 305 isize = i_size_read(in->f_mapping->host);
532 if (unlikely(*ppos >= isize)) 306 if (unlikely(*ppos >= isize))
533 return 0; 307 return 0;
534 308
535 left = isize - *ppos; 309 iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len);
536 if (unlikely(left < len)) 310 idx = to.idx;
537 len = left; 311 init_sync_kiocb(&kiocb, in);
538 312 kiocb.ki_pos = *ppos;
539 ret = __generic_file_splice_read(in, ppos, pipe, len, flags); 313 ret = in->f_op->read_iter(&kiocb, &to);
540 if (ret > 0) { 314 if (ret > 0) {
541 *ppos += ret; 315 *ppos = kiocb.ki_pos;
542 file_accessed(in); 316 file_accessed(in);
317 } else if (ret < 0) {
318 if (WARN_ON(to.idx != idx || to.iov_offset)) {
319 /*
320 * a bogus ->read_iter() has copied something and still
321 * returned an error instead of a short read.
322 */
323 to.idx = idx;
324 to.iov_offset = 0;
325 iov_iter_advance(&to, 0); /* to free what was emitted */
326 }
327 /*
328 * callers of ->splice_read() expect -EAGAIN on
329 * "can't put anything in there", rather than -EFAULT.
330 */
331 if (ret == -EFAULT)
332 ret = -EAGAIN;
543 } 333 }
544 334
545 return ret; 335 return ret;
546} 336}
547EXPORT_SYMBOL(generic_file_splice_read); 337EXPORT_SYMBOL(generic_file_splice_read);
548 338
549static const struct pipe_buf_operations default_pipe_buf_ops = { 339const struct pipe_buf_operations default_pipe_buf_ops = {
550 .can_merge = 0, 340 .can_merge = 0,
551 .confirm = generic_pipe_buf_confirm, 341 .confirm = generic_pipe_buf_confirm,
552 .release = generic_pipe_buf_release, 342 .release = generic_pipe_buf_release,
@@ -570,7 +360,7 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = {
570}; 360};
571EXPORT_SYMBOL(nosteal_pipe_buf_ops); 361EXPORT_SYMBOL(nosteal_pipe_buf_ops);
572 362
573static ssize_t kernel_readv(struct file *file, const struct iovec *vec, 363static ssize_t kernel_readv(struct file *file, const struct kvec *vec,
574 unsigned long vlen, loff_t offset) 364 unsigned long vlen, loff_t offset)
575{ 365{
576 mm_segment_t old_fs; 366 mm_segment_t old_fs;
@@ -602,102 +392,70 @@ ssize_t kernel_write(struct file *file, const char *buf, size_t count,
602} 392}
603EXPORT_SYMBOL(kernel_write); 393EXPORT_SYMBOL(kernel_write);
604 394
605ssize_t default_file_splice_read(struct file *in, loff_t *ppos, 395static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
606 struct pipe_inode_info *pipe, size_t len, 396 struct pipe_inode_info *pipe, size_t len,
607 unsigned int flags) 397 unsigned int flags)
608{ 398{
399 struct kvec *vec, __vec[PIPE_DEF_BUFFERS];
400 struct iov_iter to;
401 struct page **pages;
609 unsigned int nr_pages; 402 unsigned int nr_pages;
610 unsigned int nr_freed; 403 size_t offset, dummy, copied = 0;
611 size_t offset;
612 struct page *pages[PIPE_DEF_BUFFERS];
613 struct partial_page partial[PIPE_DEF_BUFFERS];
614 struct iovec *vec, __vec[PIPE_DEF_BUFFERS];
615 ssize_t res; 404 ssize_t res;
616 size_t this_len;
617 int error;
618 int i; 405 int i;
619 struct splice_pipe_desc spd = {
620 .pages = pages,
621 .partial = partial,
622 .nr_pages_max = PIPE_DEF_BUFFERS,
623 .flags = flags,
624 .ops = &default_pipe_buf_ops,
625 .spd_release = spd_release_page,
626 };
627 406
628 if (splice_grow_spd(pipe, &spd)) 407 if (pipe->nrbufs == pipe->buffers)
408 return -EAGAIN;
409
410 /*
411 * Try to keep page boundaries matching to source pagecache ones -
412 * it probably won't be much help, but...
413 */
414 offset = *ppos & ~PAGE_MASK;
415
416 iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset);
417
418 res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &dummy);
419 if (res <= 0)
629 return -ENOMEM; 420 return -ENOMEM;
630 421
631 res = -ENOMEM; 422 nr_pages = res / PAGE_SIZE;
423
632 vec = __vec; 424 vec = __vec;
633 if (spd.nr_pages_max > PIPE_DEF_BUFFERS) { 425 if (nr_pages > PIPE_DEF_BUFFERS) {
634 vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL); 426 vec = kmalloc(nr_pages * sizeof(struct kvec), GFP_KERNEL);
635 if (!vec) 427 if (unlikely(!vec)) {
636 goto shrink_ret; 428 res = -ENOMEM;
429 goto out;
430 }
637 } 431 }
638 432
639 offset = *ppos & ~PAGE_MASK; 433 pipe->bufs[to.idx].offset = offset;
640 nr_pages = (len + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; 434 pipe->bufs[to.idx].len -= offset;
641
642 for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) {
643 struct page *page;
644 435
645 page = alloc_page(GFP_USER); 436 for (i = 0; i < nr_pages; i++) {
646 error = -ENOMEM; 437 size_t this_len = min_t(size_t, len, PAGE_SIZE - offset);
647 if (!page) 438 vec[i].iov_base = page_address(pages[i]) + offset;
648 goto err;
649
650 this_len = min_t(size_t, len, PAGE_SIZE - offset);
651 vec[i].iov_base = (void __user *) page_address(page);
652 vec[i].iov_len = this_len; 439 vec[i].iov_len = this_len;
653 spd.pages[i] = page;
654 spd.nr_pages++;
655 len -= this_len; 440 len -= this_len;
656 offset = 0; 441 offset = 0;
657 } 442 }
658 443
659 res = kernel_readv(in, vec, spd.nr_pages, *ppos); 444 res = kernel_readv(in, vec, nr_pages, *ppos);
660 if (res < 0) { 445 if (res > 0) {
661 error = res; 446 copied = res;
662 goto err;
663 }
664
665 error = 0;
666 if (!res)
667 goto err;
668
669 nr_freed = 0;
670 for (i = 0; i < spd.nr_pages; i++) {
671 this_len = min_t(size_t, vec[i].iov_len, res);
672 spd.partial[i].offset = 0;
673 spd.partial[i].len = this_len;
674 if (!this_len) {
675 __free_page(spd.pages[i]);
676 spd.pages[i] = NULL;
677 nr_freed++;
678 }
679 res -= this_len;
680 }
681 spd.nr_pages -= nr_freed;
682
683 res = splice_to_pipe(pipe, &spd);
684 if (res > 0)
685 *ppos += res; 447 *ppos += res;
448 }
686 449
687shrink_ret:
688 if (vec != __vec) 450 if (vec != __vec)
689 kfree(vec); 451 kfree(vec);
690 splice_shrink_spd(&spd); 452out:
453 for (i = 0; i < nr_pages; i++)
454 put_page(pages[i]);
455 kvfree(pages);
456 iov_iter_advance(&to, copied); /* truncates and discards */
691 return res; 457 return res;
692
693err:
694 for (i = 0; i < spd.nr_pages; i++)
695 __free_page(spd.pages[i]);
696
697 res = error;
698 goto shrink_ret;
699} 458}
700EXPORT_SYMBOL(default_file_splice_read);
701 459
702/* 460/*
703 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' 461 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
@@ -757,13 +515,12 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
757 515
758 while (pipe->nrbufs) { 516 while (pipe->nrbufs) {
759 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; 517 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
760 const struct pipe_buf_operations *ops = buf->ops;
761 518
762 sd->len = buf->len; 519 sd->len = buf->len;
763 if (sd->len > sd->total_len) 520 if (sd->len > sd->total_len)
764 sd->len = sd->total_len; 521 sd->len = sd->total_len;
765 522
766 ret = buf->ops->confirm(pipe, buf); 523 ret = pipe_buf_confirm(pipe, buf);
767 if (unlikely(ret)) { 524 if (unlikely(ret)) {
768 if (ret == -ENODATA) 525 if (ret == -ENODATA)
769 ret = 0; 526 ret = 0;
@@ -783,8 +540,7 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
783 sd->total_len -= ret; 540 sd->total_len -= ret;
784 541
785 if (!buf->len) { 542 if (!buf->len) {
786 buf->ops = NULL; 543 pipe_buf_release(pipe, buf);
787 ops->release(pipe, buf);
788 pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); 544 pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
789 pipe->nrbufs--; 545 pipe->nrbufs--;
790 if (pipe->files) 546 if (pipe->files)
@@ -1003,7 +759,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
1003 if (idx == pipe->buffers - 1) 759 if (idx == pipe->buffers - 1)
1004 idx = -1; 760 idx = -1;
1005 761
1006 ret = buf->ops->confirm(pipe, buf); 762 ret = pipe_buf_confirm(pipe, buf);
1007 if (unlikely(ret)) { 763 if (unlikely(ret)) {
1008 if (ret == -ENODATA) 764 if (ret == -ENODATA)
1009 ret = 0; 765 ret = 0;
@@ -1030,11 +786,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
1030 while (ret) { 786 while (ret) {
1031 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; 787 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
1032 if (ret >= buf->len) { 788 if (ret >= buf->len) {
1033 const struct pipe_buf_operations *ops = buf->ops;
1034 ret -= buf->len; 789 ret -= buf->len;
1035 buf->len = 0; 790 buf->len = 0;
1036 buf->ops = NULL; 791 pipe_buf_release(pipe, buf);
1037 ops->release(pipe, buf);
1038 pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); 792 pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
1039 pipe->nrbufs--; 793 pipe->nrbufs--;
1040 if (pipe->files) 794 if (pipe->files)
@@ -1273,10 +1027,8 @@ out_release:
1273 for (i = 0; i < pipe->buffers; i++) { 1027 for (i = 0; i < pipe->buffers; i++) {
1274 struct pipe_buffer *buf = pipe->bufs + i; 1028 struct pipe_buffer *buf = pipe->bufs + i;
1275 1029
1276 if (buf->ops) { 1030 if (buf->ops)
1277 buf->ops->release(pipe, buf); 1031 pipe_buf_release(pipe, buf);
1278 buf->ops = NULL;
1279 }
1280 } 1032 }
1281 1033
1282 if (!bytes) 1034 if (!bytes)
@@ -1342,6 +1094,20 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1342} 1094}
1343EXPORT_SYMBOL(do_splice_direct); 1095EXPORT_SYMBOL(do_splice_direct);
1344 1096
1097static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
1098{
1099 while (pipe->nrbufs == pipe->buffers) {
1100 if (flags & SPLICE_F_NONBLOCK)
1101 return -EAGAIN;
1102 if (signal_pending(current))
1103 return -ERESTARTSYS;
1104 pipe->waiting_writers++;
1105 pipe_wait(pipe);
1106 pipe->waiting_writers--;
1107 }
1108 return 0;
1109}
1110
1345static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, 1111static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
1346 struct pipe_inode_info *opipe, 1112 struct pipe_inode_info *opipe,
1347 size_t len, unsigned int flags); 1113 size_t len, unsigned int flags);
@@ -1424,8 +1190,13 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1424 offset = in->f_pos; 1190 offset = in->f_pos;
1425 } 1191 }
1426 1192
1427 ret = do_splice_to(in, &offset, opipe, len, flags); 1193 pipe_lock(opipe);
1428 1194 ret = wait_for_space(opipe, flags);
1195 if (!ret)
1196 ret = do_splice_to(in, &offset, opipe, len, flags);
1197 pipe_unlock(opipe);
1198 if (ret > 0)
1199 wakeup_pipe_readers(opipe);
1429 if (!off_in) 1200 if (!off_in)
1430 in->f_pos = offset; 1201 in->f_pos = offset;
1431 else if (copy_to_user(off_in, &offset, sizeof(loff_t))) 1202 else if (copy_to_user(off_in, &offset, sizeof(loff_t)))
@@ -1437,106 +1208,50 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1437 return -EINVAL; 1208 return -EINVAL;
1438} 1209}
1439 1210
1440/* 1211static int iter_to_pipe(struct iov_iter *from,
1441 * Map an iov into an array of pages and offset/length tupples. With the 1212 struct pipe_inode_info *pipe,
1442 * partial_page structure, we can map several non-contiguous ranges into 1213 unsigned flags)
1443 * our ones pages[] map instead of splitting that operation into pieces.
1444 * Could easily be exported as a generic helper for other users, in which
1445 * case one would probably want to add a 'max_nr_pages' parameter as well.
1446 */
1447static int get_iovec_page_array(const struct iovec __user *iov,
1448 unsigned int nr_vecs, struct page **pages,
1449 struct partial_page *partial, bool aligned,
1450 unsigned int pipe_buffers)
1451{ 1214{
1452 int buffers = 0, error = 0; 1215 struct pipe_buffer buf = {
1453 1216 .ops = &user_page_pipe_buf_ops,
1454 while (nr_vecs) { 1217 .flags = flags
1455 unsigned long off, npages; 1218 };
1456 struct iovec entry; 1219 size_t total = 0;
1457 void __user *base; 1220 int ret = 0;
1458 size_t len; 1221 bool failed = false;
1459 int i; 1222
1460 1223 while (iov_iter_count(from) && !failed) {
1461 error = -EFAULT; 1224 struct page *pages[16];
1462 if (copy_from_user(&entry, iov, sizeof(entry))) 1225 ssize_t copied;
1463 break; 1226 size_t start;
1464 1227 int n;
1465 base = entry.iov_base; 1228
1466 len = entry.iov_len; 1229 copied = iov_iter_get_pages(from, pages, ~0UL, 16, &start);
1467 1230 if (copied <= 0) {
1468 /* 1231 ret = copied;
1469 * Sanity check this iovec. 0 read succeeds.
1470 */
1471 error = 0;
1472 if (unlikely(!len))
1473 break;
1474 error = -EFAULT;
1475 if (!access_ok(VERIFY_READ, base, len))
1476 break;
1477
1478 /*
1479 * Get this base offset and number of pages, then map
1480 * in the user pages.
1481 */
1482 off = (unsigned long) base & ~PAGE_MASK;
1483
1484 /*
1485 * If asked for alignment, the offset must be zero and the
1486 * length a multiple of the PAGE_SIZE.
1487 */
1488 error = -EINVAL;
1489 if (aligned && (off || len & ~PAGE_MASK))
1490 break;
1491
1492 npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1493 if (npages > pipe_buffers - buffers)
1494 npages = pipe_buffers - buffers;
1495
1496 error = get_user_pages_fast((unsigned long)base, npages,
1497 0, &pages[buffers]);
1498
1499 if (unlikely(error <= 0))
1500 break; 1232 break;
1501
1502 /*
1503 * Fill this contiguous range into the partial page map.
1504 */
1505 for (i = 0; i < error; i++) {
1506 const int plen = min_t(size_t, len, PAGE_SIZE - off);
1507
1508 partial[buffers].offset = off;
1509 partial[buffers].len = plen;
1510
1511 off = 0;
1512 len -= plen;
1513 buffers++;
1514 } 1233 }
1515 1234
1516 /* 1235 for (n = 0; copied; n++, start = 0) {
1517 * We didn't complete this iov, stop here since it probably 1236 int size = min_t(int, copied, PAGE_SIZE - start);
1518 * means we have to move some of this into a pipe to 1237 if (!failed) {
1519 * be able to continue. 1238 buf.page = pages[n];
1520 */ 1239 buf.offset = start;
1521 if (len) 1240 buf.len = size;
1522 break; 1241 ret = add_to_pipe(pipe, &buf);
1523 1242 if (unlikely(ret < 0)) {
1524 /* 1243 failed = true;
1525 * Don't continue if we mapped fewer pages than we asked for, 1244 } else {
1526 * or if we mapped the max number of pages that we have 1245 iov_iter_advance(from, ret);
1527 * room for. 1246 total += ret;
1528 */ 1247 }
1529 if (error < npages || buffers == pipe_buffers) 1248 } else {
1530 break; 1249 put_page(pages[n]);
1531 1250 }
1532 nr_vecs--; 1251 copied -= size;
1533 iov++; 1252 }
1534 } 1253 }
1535 1254 return total ? total : ret;
1536 if (buffers)
1537 return buffers;
1538
1539 return error;
1540} 1255}
1541 1256
1542static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 1257static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
@@ -1590,38 +1305,36 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
1590 * as splice-from-memory, where the regular splice is splice-from-file (or 1305 * as splice-from-memory, where the regular splice is splice-from-file (or
1591 * to file). In both cases the output is a pipe, naturally. 1306 * to file). In both cases the output is a pipe, naturally.
1592 */ 1307 */
1593static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, 1308static long vmsplice_to_pipe(struct file *file, const struct iovec __user *uiov,
1594 unsigned long nr_segs, unsigned int flags) 1309 unsigned long nr_segs, unsigned int flags)
1595{ 1310{
1596 struct pipe_inode_info *pipe; 1311 struct pipe_inode_info *pipe;
1597 struct page *pages[PIPE_DEF_BUFFERS]; 1312 struct iovec iovstack[UIO_FASTIOV];
1598 struct partial_page partial[PIPE_DEF_BUFFERS]; 1313 struct iovec *iov = iovstack;
1599 struct splice_pipe_desc spd = { 1314 struct iov_iter from;
1600 .pages = pages,
1601 .partial = partial,
1602 .nr_pages_max = PIPE_DEF_BUFFERS,
1603 .flags = flags,
1604 .ops = &user_page_pipe_buf_ops,
1605 .spd_release = spd_release_page,
1606 };
1607 long ret; 1315 long ret;
1316 unsigned buf_flag = 0;
1317
1318 if (flags & SPLICE_F_GIFT)
1319 buf_flag = PIPE_BUF_FLAG_GIFT;
1608 1320
1609 pipe = get_pipe_info(file); 1321 pipe = get_pipe_info(file);
1610 if (!pipe) 1322 if (!pipe)
1611 return -EBADF; 1323 return -EBADF;
1612 1324
1613 if (splice_grow_spd(pipe, &spd)) 1325 ret = import_iovec(WRITE, uiov, nr_segs,
1614 return -ENOMEM; 1326 ARRAY_SIZE(iovstack), &iov, &from);
1615 1327 if (ret < 0)
1616 spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages, 1328 return ret;
1617 spd.partial, false,
1618 spd.nr_pages_max);
1619 if (spd.nr_pages <= 0)
1620 ret = spd.nr_pages;
1621 else
1622 ret = splice_to_pipe(pipe, &spd);
1623 1329
1624 splice_shrink_spd(&spd); 1330 pipe_lock(pipe);
1331 ret = wait_for_space(pipe, flags);
1332 if (!ret)
1333 ret = iter_to_pipe(&from, pipe, buf_flag);
1334 pipe_unlock(pipe);
1335 if (ret > 0)
1336 wakeup_pipe_readers(pipe);
1337 kfree(iov);
1625 return ret; 1338 return ret;
1626} 1339}
1627 1340
@@ -1876,7 +1589,7 @@ retry:
1876 * Get a reference to this pipe buffer, 1589 * Get a reference to this pipe buffer,
1877 * so we can copy the contents over. 1590 * so we can copy the contents over.
1878 */ 1591 */
1879 ibuf->ops->get(ipipe, ibuf); 1592 pipe_buf_get(ipipe, ibuf);
1880 *obuf = *ibuf; 1593 *obuf = *ibuf;
1881 1594
1882 /* 1595 /*
@@ -1948,7 +1661,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1948 * Get a reference to this pipe buffer, 1661 * Get a reference to this pipe buffer,
1949 * so we can copy the contents over. 1662 * so we can copy the contents over.
1950 */ 1663 */
1951 ibuf->ops->get(ipipe, ibuf); 1664 pipe_buf_get(ipipe, ibuf);
1952 1665
1953 obuf = opipe->bufs + nbuf; 1666 obuf = opipe->bufs + nbuf;
1954 *obuf = *ibuf; 1667 *obuf = *ibuf;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index c68517b0f248..f46b2929c64d 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -393,45 +393,6 @@ xfs_file_read_iter(
393 return ret; 393 return ret;
394} 394}
395 395
396STATIC ssize_t
397xfs_file_splice_read(
398 struct file *infilp,
399 loff_t *ppos,
400 struct pipe_inode_info *pipe,
401 size_t count,
402 unsigned int flags)
403{
404 struct xfs_inode *ip = XFS_I(infilp->f_mapping->host);
405 ssize_t ret;
406
407 XFS_STATS_INC(ip->i_mount, xs_read_calls);
408
409 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
410 return -EIO;
411
412 trace_xfs_file_splice_read(ip, count, *ppos);
413
414 /*
415 * DAX inodes cannot ues the page cache for splice, so we have to push
416 * them through the VFS IO path. This means it goes through
417 * ->read_iter, which for us takes the XFS_IOLOCK_SHARED. Hence we
418 * cannot lock the splice operation at this level for DAX inodes.
419 */
420 if (IS_DAX(VFS_I(ip))) {
421 ret = default_file_splice_read(infilp, ppos, pipe, count,
422 flags);
423 goto out;
424 }
425
426 xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
427 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
428 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
429out:
430 if (ret > 0)
431 XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
432 return ret;
433}
434
435/* 396/*
436 * Zero any on disk space between the current EOF and the new, larger EOF. 397 * Zero any on disk space between the current EOF and the new, larger EOF.
437 * 398 *
@@ -1608,7 +1569,7 @@ const struct file_operations xfs_file_operations = {
1608 .llseek = xfs_file_llseek, 1569 .llseek = xfs_file_llseek,
1609 .read_iter = xfs_file_read_iter, 1570 .read_iter = xfs_file_read_iter,
1610 .write_iter = xfs_file_write_iter, 1571 .write_iter = xfs_file_write_iter,
1611 .splice_read = xfs_file_splice_read, 1572 .splice_read = generic_file_splice_read,
1612 .splice_write = iter_file_splice_write, 1573 .splice_write = iter_file_splice_write,
1613 .unlocked_ioctl = xfs_file_ioctl, 1574 .unlocked_ioctl = xfs_file_ioctl,
1614#ifdef CONFIG_COMPAT 1575#ifdef CONFIG_COMPAT
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index c6b2b1dcde75..16093c7dacde 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1170,7 +1170,6 @@ DEFINE_RW_EVENT(xfs_file_dax_read);
1170DEFINE_RW_EVENT(xfs_file_buffered_write); 1170DEFINE_RW_EVENT(xfs_file_buffered_write);
1171DEFINE_RW_EVENT(xfs_file_direct_write); 1171DEFINE_RW_EVENT(xfs_file_direct_write);
1172DEFINE_RW_EVENT(xfs_file_dax_write); 1172DEFINE_RW_EVENT(xfs_file_dax_write);
1173DEFINE_RW_EVENT(xfs_file_splice_read);
1174 1173
1175DECLARE_EVENT_CLASS(xfs_page_class, 1174DECLARE_EVENT_CLASS(xfs_page_class,
1176 TP_PROTO(struct inode *inode, struct page *page, unsigned long off, 1175 TP_PROTO(struct inode *inode, struct page *page, unsigned long off,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 901e25d495cc..b04883e74579 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2794,8 +2794,6 @@ extern void block_sync_page(struct page *page);
2794/* fs/splice.c */ 2794/* fs/splice.c */
2795extern ssize_t generic_file_splice_read(struct file *, loff_t *, 2795extern ssize_t generic_file_splice_read(struct file *, loff_t *,
2796 struct pipe_inode_info *, size_t, unsigned int); 2796 struct pipe_inode_info *, size_t, unsigned int);
2797extern ssize_t default_file_splice_read(struct file *, loff_t *,
2798 struct pipe_inode_info *, size_t, unsigned int);
2799extern ssize_t iter_file_splice_write(struct pipe_inode_info *, 2797extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
2800 struct file *, loff_t *, size_t, unsigned int); 2798 struct file *, loff_t *, size_t, unsigned int);
2801extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, 2799extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 24f5470d3944..e7497c9dde7f 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -66,15 +66,10 @@ struct pipe_inode_info {
66 * 66 *
67 * ->confirm() 67 * ->confirm()
68 * ->steal() 68 * ->steal()
69 * ...
70 * ->map()
71 * ...
72 * ->unmap()
73 * 69 *
74 * That is, ->map() must be called on a confirmed buffer, 70 * That is, ->steal() must be called on a confirmed buffer.
75 * same goes for ->steal(). See below for the meaning of each 71 * See below for the meaning of each operation. Also see kerneldoc
76 * operation. Also see kerneldoc in fs/pipe.c for the pipe 72 * in fs/pipe.c for the pipe and generic variants of these hooks.
77 * and generic variants of these hooks.
78 */ 73 */
79struct pipe_buf_operations { 74struct pipe_buf_operations {
80 /* 75 /*
@@ -115,6 +110,53 @@ struct pipe_buf_operations {
115 void (*get)(struct pipe_inode_info *, struct pipe_buffer *); 110 void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
116}; 111};
117 112
113/**
114 * pipe_buf_get - get a reference to a pipe_buffer
115 * @pipe: the pipe that the buffer belongs to
116 * @buf: the buffer to get a reference to
117 */
118static inline void pipe_buf_get(struct pipe_inode_info *pipe,
119 struct pipe_buffer *buf)
120{
121 buf->ops->get(pipe, buf);
122}
123
124/**
125 * pipe_buf_release - put a reference to a pipe_buffer
126 * @pipe: the pipe that the buffer belongs to
127 * @buf: the buffer to put a reference to
128 */
129static inline void pipe_buf_release(struct pipe_inode_info *pipe,
130 struct pipe_buffer *buf)
131{
132 const struct pipe_buf_operations *ops = buf->ops;
133
134 buf->ops = NULL;
135 ops->release(pipe, buf);
136}
137
138/**
139 * pipe_buf_confirm - verify contents of the pipe buffer
140 * @pipe: the pipe that the buffer belongs to
141 * @buf: the buffer to confirm
142 */
143static inline int pipe_buf_confirm(struct pipe_inode_info *pipe,
144 struct pipe_buffer *buf)
145{
146 return buf->ops->confirm(pipe, buf);
147}
148
149/**
150 * pipe_buf_steal - attempt to take ownership of a pipe_buffer
151 * @pipe: the pipe that the buffer belongs to
152 * @buf: the buffer to attempt to steal
153 */
154static inline int pipe_buf_steal(struct pipe_inode_info *pipe,
155 struct pipe_buffer *buf)
156{
157 return buf->ops->steal(pipe, buf);
158}
159
118/* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual 160/* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
119 memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ 161 memory allocation, whereas PIPE_BUF makes atomicity guarantees. */
120#define PIPE_SIZE PAGE_SIZE 162#define PIPE_SIZE PAGE_SIZE
@@ -129,7 +171,6 @@ extern unsigned long pipe_user_pages_hard;
129extern unsigned long pipe_user_pages_soft; 171extern unsigned long pipe_user_pages_soft;
130int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *); 172int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *);
131 173
132
133/* Drop the inode semaphore and wait for a pipe event, atomically */ 174/* Drop the inode semaphore and wait for a pipe event, atomically */
134void pipe_wait(struct pipe_inode_info *pipe); 175void pipe_wait(struct pipe_inode_info *pipe);
135 176
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9bf60b556bd2..601258f6e621 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3064,15 +3064,9 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
3064int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); 3064int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
3065__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, 3065__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
3066 int len, __wsum csum); 3066 int len, __wsum csum);
3067ssize_t skb_socket_splice(struct sock *sk,
3068 struct pipe_inode_info *pipe,
3069 struct splice_pipe_desc *spd);
3070int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, 3067int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
3071 struct pipe_inode_info *pipe, unsigned int len, 3068 struct pipe_inode_info *pipe, unsigned int len,
3072 unsigned int flags, 3069 unsigned int flags);
3073 ssize_t (*splice_cb)(struct sock *,
3074 struct pipe_inode_info *,
3075 struct splice_pipe_desc *));
3076void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); 3070void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
3077unsigned int skb_zerocopy_headlen(const struct sk_buff *from); 3071unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
3078int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, 3072int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
diff --git a/include/linux/splice.h b/include/linux/splice.h
index da2751d3b93d..00a21166e268 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -72,6 +72,8 @@ extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
72 struct splice_desc *, splice_actor *); 72 struct splice_desc *, splice_actor *);
73extern ssize_t splice_to_pipe(struct pipe_inode_info *, 73extern ssize_t splice_to_pipe(struct pipe_inode_info *,
74 struct splice_pipe_desc *); 74 struct splice_pipe_desc *);
75extern ssize_t add_to_pipe(struct pipe_inode_info *,
76 struct pipe_buffer *);
75extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, 77extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
76 splice_direct_actor *); 78 splice_direct_actor *);
77 79
@@ -83,4 +85,5 @@ extern void splice_shrink_spd(struct splice_pipe_desc *);
83extern void spd_release_page(struct splice_pipe_desc *, unsigned int); 85extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
84 86
85extern const struct pipe_buf_operations page_cache_pipe_buf_ops; 87extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
88extern const struct pipe_buf_operations default_pipe_buf_ops;
86#endif 89#endif
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 75b4aaf31a9d..b5ebe6dca404 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -13,6 +13,7 @@
13#include <uapi/linux/uio.h> 13#include <uapi/linux/uio.h>
14 14
15struct page; 15struct page;
16struct pipe_inode_info;
16 17
17struct kvec { 18struct kvec {
18 void *iov_base; /* and that should *never* hold a userland pointer */ 19 void *iov_base; /* and that should *never* hold a userland pointer */
@@ -23,6 +24,7 @@ enum {
23 ITER_IOVEC = 0, 24 ITER_IOVEC = 0,
24 ITER_KVEC = 2, 25 ITER_KVEC = 2,
25 ITER_BVEC = 4, 26 ITER_BVEC = 4,
27 ITER_PIPE = 8,
26}; 28};
27 29
28struct iov_iter { 30struct iov_iter {
@@ -33,8 +35,12 @@ struct iov_iter {
33 const struct iovec *iov; 35 const struct iovec *iov;
34 const struct kvec *kvec; 36 const struct kvec *kvec;
35 const struct bio_vec *bvec; 37 const struct bio_vec *bvec;
38 struct pipe_inode_info *pipe;
39 };
40 union {
41 unsigned long nr_segs;
42 int idx;
36 }; 43 };
37 unsigned long nr_segs;
38}; 44};
39 45
40/* 46/*
@@ -64,7 +70,7 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
64} 70}
65 71
66#define iov_for_each(iov, iter, start) \ 72#define iov_for_each(iov, iter, start) \
67 if (!((start).type & ITER_BVEC)) \ 73 if (!((start).type & (ITER_BVEC | ITER_PIPE))) \
68 for (iter = (start); \ 74 for (iter = (start); \
69 (iter).count && \ 75 (iter).count && \
70 ((iov = iov_iter_iovec(&(iter))), 1); \ 76 ((iov = iov_iter_iovec(&(iter))), 1); \
@@ -94,6 +100,8 @@ void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,
94 unsigned long nr_segs, size_t count); 100 unsigned long nr_segs, size_t count);
95void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec, 101void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec,
96 unsigned long nr_segs, size_t count); 102 unsigned long nr_segs, size_t count);
103void iov_iter_pipe(struct iov_iter *i, int direction, struct pipe_inode_info *pipe,
104 size_t count);
97ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, 105ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
98 size_t maxsize, unsigned maxpages, size_t *start); 106 size_t maxsize, unsigned maxpages, size_t *start);
99ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, 107ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
@@ -109,7 +117,7 @@ static inline size_t iov_iter_count(struct iov_iter *i)
109 117
110static inline bool iter_is_iovec(struct iov_iter *i) 118static inline bool iter_is_iovec(struct iov_iter *i)
111{ 119{
112 return !(i->type & (ITER_BVEC | ITER_KVEC)); 120 return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE));
113} 121}
114 122
115/* 123/*
diff --git a/kernel/relay.c b/kernel/relay.c
index fc9b4a4af463..9988f5cc2d46 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1108,51 +1108,23 @@ static size_t relay_file_read_end_pos(struct rchan_buf *buf,
1108 return end_pos; 1108 return end_pos;
1109} 1109}
1110 1110
1111/* 1111static ssize_t relay_file_read(struct file *filp,
1112 * subbuf_read_actor - read up to one subbuf's worth of data 1112 char __user *buffer,
1113 */ 1113 size_t count,
1114static int subbuf_read_actor(size_t read_start, 1114 loff_t *ppos)
1115 struct rchan_buf *buf,
1116 size_t avail,
1117 read_descriptor_t *desc)
1118{
1119 void *from;
1120 int ret = 0;
1121
1122 from = buf->start + read_start;
1123 ret = avail;
1124 if (copy_to_user(desc->arg.buf, from, avail)) {
1125 desc->error = -EFAULT;
1126 ret = 0;
1127 }
1128 desc->arg.data += ret;
1129 desc->written += ret;
1130 desc->count -= ret;
1131
1132 return ret;
1133}
1134
1135typedef int (*subbuf_actor_t) (size_t read_start,
1136 struct rchan_buf *buf,
1137 size_t avail,
1138 read_descriptor_t *desc);
1139
1140/*
1141 * relay_file_read_subbufs - read count bytes, bridging subbuf boundaries
1142 */
1143static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
1144 subbuf_actor_t subbuf_actor,
1145 read_descriptor_t *desc)
1146{ 1115{
1147 struct rchan_buf *buf = filp->private_data; 1116 struct rchan_buf *buf = filp->private_data;
1148 size_t read_start, avail; 1117 size_t read_start, avail;
1118 size_t written = 0;
1149 int ret; 1119 int ret;
1150 1120
1151 if (!desc->count) 1121 if (!count)
1152 return 0; 1122 return 0;
1153 1123
1154 inode_lock(file_inode(filp)); 1124 inode_lock(file_inode(filp));
1155 do { 1125 do {
1126 void *from;
1127
1156 if (!relay_file_read_avail(buf, *ppos)) 1128 if (!relay_file_read_avail(buf, *ppos))
1157 break; 1129 break;
1158 1130
@@ -1161,32 +1133,22 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
1161 if (!avail) 1133 if (!avail)
1162 break; 1134 break;
1163 1135
1164 avail = min(desc->count, avail); 1136 avail = min(count, avail);
1165 ret = subbuf_actor(read_start, buf, avail, desc); 1137 from = buf->start + read_start;
1166 if (desc->error < 0) 1138 ret = avail;
1139 if (copy_to_user(buffer, from, avail))
1167 break; 1140 break;
1168 1141
1169 if (ret) { 1142 buffer += ret;
1170 relay_file_read_consume(buf, read_start, ret); 1143 written += ret;
1171 *ppos = relay_file_read_end_pos(buf, read_start, ret); 1144 count -= ret;
1172 }
1173 } while (desc->count && ret);
1174 inode_unlock(file_inode(filp));
1175 1145
1176 return desc->written; 1146 relay_file_read_consume(buf, read_start, ret);
1177} 1147 *ppos = relay_file_read_end_pos(buf, read_start, ret);
1148 } while (count);
1149 inode_unlock(file_inode(filp));
1178 1150
1179static ssize_t relay_file_read(struct file *filp, 1151 return written;
1180 char __user *buffer,
1181 size_t count,
1182 loff_t *ppos)
1183{
1184 read_descriptor_t desc;
1185 desc.written = 0;
1186 desc.count = count;
1187 desc.arg.buf = buffer;
1188 desc.error = 0;
1189 return relay_file_read_subbufs(filp, ppos, subbuf_read_actor, &desc);
1190} 1152}
1191 1153
1192static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed) 1154static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 7e3138cfc8c9..48b8c27acabb 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -3,8 +3,11 @@
3#include <linux/pagemap.h> 3#include <linux/pagemap.h>
4#include <linux/slab.h> 4#include <linux/slab.h>
5#include <linux/vmalloc.h> 5#include <linux/vmalloc.h>
6#include <linux/splice.h>
6#include <net/checksum.h> 7#include <net/checksum.h>
7 8
9#define PIPE_PARANOIA /* for now */
10
8#define iterate_iovec(i, n, __v, __p, skip, STEP) { \ 11#define iterate_iovec(i, n, __v, __p, skip, STEP) { \
9 size_t left; \ 12 size_t left; \
10 size_t wanted = n; \ 13 size_t wanted = n; \
@@ -290,6 +293,93 @@ done:
290 return wanted - bytes; 293 return wanted - bytes;
291} 294}
292 295
296#ifdef PIPE_PARANOIA
297static bool sanity(const struct iov_iter *i)
298{
299 struct pipe_inode_info *pipe = i->pipe;
300 int idx = i->idx;
301 int next = pipe->curbuf + pipe->nrbufs;
302 if (i->iov_offset) {
303 struct pipe_buffer *p;
304 if (unlikely(!pipe->nrbufs))
305 goto Bad; // pipe must be non-empty
306 if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
307 goto Bad; // must be at the last buffer...
308
309 p = &pipe->bufs[idx];
310 if (unlikely(p->offset + p->len != i->iov_offset))
311 goto Bad; // ... at the end of segment
312 } else {
313 if (idx != (next & (pipe->buffers - 1)))
314 goto Bad; // must be right after the last buffer
315 }
316 return true;
317Bad:
318 printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
319 printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
320 pipe->curbuf, pipe->nrbufs, pipe->buffers);
321 for (idx = 0; idx < pipe->buffers; idx++)
322 printk(KERN_ERR "[%p %p %d %d]\n",
323 pipe->bufs[idx].ops,
324 pipe->bufs[idx].page,
325 pipe->bufs[idx].offset,
326 pipe->bufs[idx].len);
327 WARN_ON(1);
328 return false;
329}
330#else
331#define sanity(i) true
332#endif
333
334static inline int next_idx(int idx, struct pipe_inode_info *pipe)
335{
336 return (idx + 1) & (pipe->buffers - 1);
337}
338
339static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
340 struct iov_iter *i)
341{
342 struct pipe_inode_info *pipe = i->pipe;
343 struct pipe_buffer *buf;
344 size_t off;
345 int idx;
346
347 if (unlikely(bytes > i->count))
348 bytes = i->count;
349
350 if (unlikely(!bytes))
351 return 0;
352
353 if (!sanity(i))
354 return 0;
355
356 off = i->iov_offset;
357 idx = i->idx;
358 buf = &pipe->bufs[idx];
359 if (off) {
360 if (offset == off && buf->page == page) {
361 /* merge with the last one */
362 buf->len += bytes;
363 i->iov_offset += bytes;
364 goto out;
365 }
366 idx = next_idx(idx, pipe);
367 buf = &pipe->bufs[idx];
368 }
369 if (idx == pipe->curbuf && pipe->nrbufs)
370 return 0;
371 pipe->nrbufs++;
372 buf->ops = &page_cache_pipe_buf_ops;
373 get_page(buf->page = page);
374 buf->offset = offset;
375 buf->len = bytes;
376 i->iov_offset = offset + bytes;
377 i->idx = idx;
378out:
379 i->count -= bytes;
380 return bytes;
381}
382
293/* 383/*
294 * Fault in one or more iovecs of the given iov_iter, to a maximum length of 384 * Fault in one or more iovecs of the given iov_iter, to a maximum length of
295 * bytes. For each iovec, fault in each page that constitutes the iovec. 385 * bytes. For each iovec, fault in each page that constitutes the iovec.
@@ -356,9 +446,98 @@ static void memzero_page(struct page *page, size_t offset, size_t len)
356 kunmap_atomic(addr); 446 kunmap_atomic(addr);
357} 447}
358 448
449static inline bool allocated(struct pipe_buffer *buf)
450{
451 return buf->ops == &default_pipe_buf_ops;
452}
453
454static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
455{
456 size_t off = i->iov_offset;
457 int idx = i->idx;
458 if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
459 idx = next_idx(idx, i->pipe);
460 off = 0;
461 }
462 *idxp = idx;
463 *offp = off;
464}
465
466static size_t push_pipe(struct iov_iter *i, size_t size,
467 int *idxp, size_t *offp)
468{
469 struct pipe_inode_info *pipe = i->pipe;
470 size_t off;
471 int idx;
472 ssize_t left;
473
474 if (unlikely(size > i->count))
475 size = i->count;
476 if (unlikely(!size))
477 return 0;
478
479 left = size;
480 data_start(i, &idx, &off);
481 *idxp = idx;
482 *offp = off;
483 if (off) {
484 left -= PAGE_SIZE - off;
485 if (left <= 0) {
486 pipe->bufs[idx].len += size;
487 return size;
488 }
489 pipe->bufs[idx].len = PAGE_SIZE;
490 idx = next_idx(idx, pipe);
491 }
492 while (idx != pipe->curbuf || !pipe->nrbufs) {
493 struct page *page = alloc_page(GFP_USER);
494 if (!page)
495 break;
496 pipe->nrbufs++;
497 pipe->bufs[idx].ops = &default_pipe_buf_ops;
498 pipe->bufs[idx].page = page;
499 pipe->bufs[idx].offset = 0;
500 if (left <= PAGE_SIZE) {
501 pipe->bufs[idx].len = left;
502 return size;
503 }
504 pipe->bufs[idx].len = PAGE_SIZE;
505 left -= PAGE_SIZE;
506 idx = next_idx(idx, pipe);
507 }
508 return size - left;
509}
510
511static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
512 struct iov_iter *i)
513{
514 struct pipe_inode_info *pipe = i->pipe;
515 size_t n, off;
516 int idx;
517
518 if (!sanity(i))
519 return 0;
520
521 bytes = n = push_pipe(i, bytes, &idx, &off);
522 if (unlikely(!n))
523 return 0;
524 for ( ; n; idx = next_idx(idx, pipe), off = 0) {
525 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
526 memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
527 i->idx = idx;
528 i->iov_offset = off + chunk;
529 n -= chunk;
530 addr += chunk;
531 }
532 i->count -= bytes;
533 return bytes;
534}
535
359size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 536size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
360{ 537{
361 const char *from = addr; 538 const char *from = addr;
539 if (unlikely(i->type & ITER_PIPE))
540 return copy_pipe_to_iter(addr, bytes, i);
362 iterate_and_advance(i, bytes, v, 541 iterate_and_advance(i, bytes, v,
363 __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, 542 __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
364 v.iov_len), 543 v.iov_len),
@@ -374,6 +553,10 @@ EXPORT_SYMBOL(copy_to_iter);
374size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) 553size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
375{ 554{
376 char *to = addr; 555 char *to = addr;
556 if (unlikely(i->type & ITER_PIPE)) {
557 WARN_ON(1);
558 return 0;
559 }
377 iterate_and_advance(i, bytes, v, 560 iterate_and_advance(i, bytes, v,
378 __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, 561 __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
379 v.iov_len), 562 v.iov_len),
@@ -389,6 +572,10 @@ EXPORT_SYMBOL(copy_from_iter);
389size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 572size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
390{ 573{
391 char *to = addr; 574 char *to = addr;
575 if (unlikely(i->type & ITER_PIPE)) {
576 WARN_ON(1);
577 return 0;
578 }
392 iterate_and_advance(i, bytes, v, 579 iterate_and_advance(i, bytes, v,
393 __copy_from_user_nocache((to += v.iov_len) - v.iov_len, 580 __copy_from_user_nocache((to += v.iov_len) - v.iov_len,
394 v.iov_base, v.iov_len), 581 v.iov_base, v.iov_len),
@@ -409,14 +596,20 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
409 size_t wanted = copy_to_iter(kaddr + offset, bytes, i); 596 size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
410 kunmap_atomic(kaddr); 597 kunmap_atomic(kaddr);
411 return wanted; 598 return wanted;
412 } else 599 } else if (likely(!(i->type & ITER_PIPE)))
413 return copy_page_to_iter_iovec(page, offset, bytes, i); 600 return copy_page_to_iter_iovec(page, offset, bytes, i);
601 else
602 return copy_page_to_iter_pipe(page, offset, bytes, i);
414} 603}
415EXPORT_SYMBOL(copy_page_to_iter); 604EXPORT_SYMBOL(copy_page_to_iter);
416 605
417size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 606size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
418 struct iov_iter *i) 607 struct iov_iter *i)
419{ 608{
609 if (unlikely(i->type & ITER_PIPE)) {
610 WARN_ON(1);
611 return 0;
612 }
420 if (i->type & (ITER_BVEC|ITER_KVEC)) { 613 if (i->type & (ITER_BVEC|ITER_KVEC)) {
421 void *kaddr = kmap_atomic(page); 614 void *kaddr = kmap_atomic(page);
422 size_t wanted = copy_from_iter(kaddr + offset, bytes, i); 615 size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
@@ -427,8 +620,34 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
427} 620}
428EXPORT_SYMBOL(copy_page_from_iter); 621EXPORT_SYMBOL(copy_page_from_iter);
429 622
623static size_t pipe_zero(size_t bytes, struct iov_iter *i)
624{
625 struct pipe_inode_info *pipe = i->pipe;
626 size_t n, off;
627 int idx;
628
629 if (!sanity(i))
630 return 0;
631
632 bytes = n = push_pipe(i, bytes, &idx, &off);
633 if (unlikely(!n))
634 return 0;
635
636 for ( ; n; idx = next_idx(idx, pipe), off = 0) {
637 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
638 memzero_page(pipe->bufs[idx].page, off, chunk);
639 i->idx = idx;
640 i->iov_offset = off + chunk;
641 n -= chunk;
642 }
643 i->count -= bytes;
644 return bytes;
645}
646
430size_t iov_iter_zero(size_t bytes, struct iov_iter *i) 647size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
431{ 648{
649 if (unlikely(i->type & ITER_PIPE))
650 return pipe_zero(bytes, i);
432 iterate_and_advance(i, bytes, v, 651 iterate_and_advance(i, bytes, v,
433 __clear_user(v.iov_base, v.iov_len), 652 __clear_user(v.iov_base, v.iov_len),
434 memzero_page(v.bv_page, v.bv_offset, v.bv_len), 653 memzero_page(v.bv_page, v.bv_offset, v.bv_len),
@@ -443,6 +662,11 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
443 struct iov_iter *i, unsigned long offset, size_t bytes) 662 struct iov_iter *i, unsigned long offset, size_t bytes)
444{ 663{
445 char *kaddr = kmap_atomic(page), *p = kaddr + offset; 664 char *kaddr = kmap_atomic(page), *p = kaddr + offset;
665 if (unlikely(i->type & ITER_PIPE)) {
666 kunmap_atomic(kaddr);
667 WARN_ON(1);
668 return 0;
669 }
446 iterate_all_kinds(i, bytes, v, 670 iterate_all_kinds(i, bytes, v,
447 __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, 671 __copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
448 v.iov_base, v.iov_len), 672 v.iov_base, v.iov_len),
@@ -455,8 +679,49 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
455} 679}
456EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 680EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
457 681
682static void pipe_advance(struct iov_iter *i, size_t size)
683{
684 struct pipe_inode_info *pipe = i->pipe;
685 struct pipe_buffer *buf;
686 int idx = i->idx;
687 size_t off = i->iov_offset;
688
689 if (unlikely(i->count < size))
690 size = i->count;
691
692 if (size) {
693 if (off) /* make it relative to the beginning of buffer */
694 size += off - pipe->bufs[idx].offset;
695 while (1) {
696 buf = &pipe->bufs[idx];
697 if (size <= buf->len)
698 break;
699 size -= buf->len;
700 idx = next_idx(idx, pipe);
701 }
702 buf->len = size;
703 i->idx = idx;
704 off = i->iov_offset = buf->offset + size;
705 }
706 if (off)
707 idx = next_idx(idx, pipe);
708 if (pipe->nrbufs) {
709 int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
710 /* [curbuf,unused) is in use. Free [idx,unused) */
711 while (idx != unused) {
712 pipe_buf_release(pipe, &pipe->bufs[idx]);
713 idx = next_idx(idx, pipe);
714 pipe->nrbufs--;
715 }
716 }
717}
718
458void iov_iter_advance(struct iov_iter *i, size_t size) 719void iov_iter_advance(struct iov_iter *i, size_t size)
459{ 720{
721 if (unlikely(i->type & ITER_PIPE)) {
722 pipe_advance(i, size);
723 return;
724 }
460 iterate_and_advance(i, size, v, 0, 0, 0) 725 iterate_and_advance(i, size, v, 0, 0, 0)
461} 726}
462EXPORT_SYMBOL(iov_iter_advance); 727EXPORT_SYMBOL(iov_iter_advance);
@@ -466,6 +731,8 @@ EXPORT_SYMBOL(iov_iter_advance);
466 */ 731 */
467size_t iov_iter_single_seg_count(const struct iov_iter *i) 732size_t iov_iter_single_seg_count(const struct iov_iter *i)
468{ 733{
734 if (unlikely(i->type & ITER_PIPE))
735 return i->count; // it is a silly place, anyway
469 if (i->nr_segs == 1) 736 if (i->nr_segs == 1)
470 return i->count; 737 return i->count;
471 else if (i->type & ITER_BVEC) 738 else if (i->type & ITER_BVEC)
@@ -501,6 +768,19 @@ void iov_iter_bvec(struct iov_iter *i, int direction,
501} 768}
502EXPORT_SYMBOL(iov_iter_bvec); 769EXPORT_SYMBOL(iov_iter_bvec);
503 770
771void iov_iter_pipe(struct iov_iter *i, int direction,
772 struct pipe_inode_info *pipe,
773 size_t count)
774{
775 BUG_ON(direction != ITER_PIPE);
776 i->type = direction;
777 i->pipe = pipe;
778 i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
779 i->iov_offset = 0;
780 i->count = count;
781}
782EXPORT_SYMBOL(iov_iter_pipe);
783
504unsigned long iov_iter_alignment(const struct iov_iter *i) 784unsigned long iov_iter_alignment(const struct iov_iter *i)
505{ 785{
506 unsigned long res = 0; 786 unsigned long res = 0;
@@ -509,6 +789,11 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
509 if (!size) 789 if (!size)
510 return 0; 790 return 0;
511 791
792 if (unlikely(i->type & ITER_PIPE)) {
793 if (i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
794 return size | i->iov_offset;
795 return size;
796 }
512 iterate_all_kinds(i, size, v, 797 iterate_all_kinds(i, size, v,
513 (res |= (unsigned long)v.iov_base | v.iov_len, 0), 798 (res |= (unsigned long)v.iov_base | v.iov_len, 0),
514 res |= v.bv_offset | v.bv_len, 799 res |= v.bv_offset | v.bv_len,
@@ -525,6 +810,11 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
525 if (!size) 810 if (!size)
526 return 0; 811 return 0;
527 812
813 if (unlikely(i->type & ITER_PIPE)) {
814 WARN_ON(1);
815 return ~0U;
816 }
817
528 iterate_all_kinds(i, size, v, 818 iterate_all_kinds(i, size, v,
529 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 819 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
530 (size != v.iov_len ? size : 0), 0), 820 (size != v.iov_len ? size : 0), 0),
@@ -537,6 +827,47 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
537} 827}
538EXPORT_SYMBOL(iov_iter_gap_alignment); 828EXPORT_SYMBOL(iov_iter_gap_alignment);
539 829
830static inline size_t __pipe_get_pages(struct iov_iter *i,
831 size_t maxsize,
832 struct page **pages,
833 int idx,
834 size_t *start)
835{
836 struct pipe_inode_info *pipe = i->pipe;
837 size_t n = push_pipe(i, maxsize, &idx, start);
838 if (!n)
839 return -EFAULT;
840
841 maxsize = n;
842 n += *start;
843 while (n >= PAGE_SIZE) {
844 get_page(*pages++ = pipe->bufs[idx].page);
845 idx = next_idx(idx, pipe);
846 n -= PAGE_SIZE;
847 }
848
849 return maxsize;
850}
851
852static ssize_t pipe_get_pages(struct iov_iter *i,
853 struct page **pages, size_t maxsize, unsigned maxpages,
854 size_t *start)
855{
856 unsigned npages;
857 size_t capacity;
858 int idx;
859
860 if (!sanity(i))
861 return -EFAULT;
862
863 data_start(i, &idx, start);
864 /* some of this one + all after this one */
865 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
866 capacity = min(npages,maxpages) * PAGE_SIZE - *start;
867
868 return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
869}
870
540ssize_t iov_iter_get_pages(struct iov_iter *i, 871ssize_t iov_iter_get_pages(struct iov_iter *i,
541 struct page **pages, size_t maxsize, unsigned maxpages, 872 struct page **pages, size_t maxsize, unsigned maxpages,
542 size_t *start) 873 size_t *start)
@@ -547,6 +878,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
547 if (!maxsize) 878 if (!maxsize)
548 return 0; 879 return 0;
549 880
881 if (unlikely(i->type & ITER_PIPE))
882 return pipe_get_pages(i, pages, maxsize, maxpages, start);
550 iterate_all_kinds(i, maxsize, v, ({ 883 iterate_all_kinds(i, maxsize, v, ({
551 unsigned long addr = (unsigned long)v.iov_base; 884 unsigned long addr = (unsigned long)v.iov_base;
552 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 885 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -582,6 +915,37 @@ static struct page **get_pages_array(size_t n)
582 return p; 915 return p;
583} 916}
584 917
918static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
919 struct page ***pages, size_t maxsize,
920 size_t *start)
921{
922 struct page **p;
923 size_t n;
924 int idx;
925 int npages;
926
927 if (!sanity(i))
928 return -EFAULT;
929
930 data_start(i, &idx, start);
931 /* some of this one + all after this one */
932 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
933 n = npages * PAGE_SIZE - *start;
934 if (maxsize > n)
935 maxsize = n;
936 else
937 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
938 p = get_pages_array(npages);
939 if (!p)
940 return -ENOMEM;
941 n = __pipe_get_pages(i, maxsize, p, idx, start);
942 if (n > 0)
943 *pages = p;
944 else
945 kvfree(p);
946 return n;
947}
948
585ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 949ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
586 struct page ***pages, size_t maxsize, 950 struct page ***pages, size_t maxsize,
587 size_t *start) 951 size_t *start)
@@ -594,6 +958,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
594 if (!maxsize) 958 if (!maxsize)
595 return 0; 959 return 0;
596 960
961 if (unlikely(i->type & ITER_PIPE))
962 return pipe_get_pages_alloc(i, pages, maxsize, start);
597 iterate_all_kinds(i, maxsize, v, ({ 963 iterate_all_kinds(i, maxsize, v, ({
598 unsigned long addr = (unsigned long)v.iov_base; 964 unsigned long addr = (unsigned long)v.iov_base;
599 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 965 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -635,6 +1001,10 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
635 __wsum sum, next; 1001 __wsum sum, next;
636 size_t off = 0; 1002 size_t off = 0;
637 sum = *csum; 1003 sum = *csum;
1004 if (unlikely(i->type & ITER_PIPE)) {
1005 WARN_ON(1);
1006 return 0;
1007 }
638 iterate_and_advance(i, bytes, v, ({ 1008 iterate_and_advance(i, bytes, v, ({
639 int err = 0; 1009 int err = 0;
640 next = csum_and_copy_from_user(v.iov_base, 1010 next = csum_and_copy_from_user(v.iov_base,
@@ -673,6 +1043,10 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
673 __wsum sum, next; 1043 __wsum sum, next;
674 size_t off = 0; 1044 size_t off = 0;
675 sum = *csum; 1045 sum = *csum;
1046 if (unlikely(i->type & ITER_PIPE)) {
1047 WARN_ON(1); /* for now */
1048 return 0;
1049 }
676 iterate_and_advance(i, bytes, v, ({ 1050 iterate_and_advance(i, bytes, v, ({
677 int err = 0; 1051 int err = 0;
678 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, 1052 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
@@ -712,7 +1086,20 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
712 if (!size) 1086 if (!size)
713 return 0; 1087 return 0;
714 1088
715 iterate_all_kinds(i, size, v, ({ 1089 if (unlikely(i->type & ITER_PIPE)) {
1090 struct pipe_inode_info *pipe = i->pipe;
1091 size_t off;
1092 int idx;
1093
1094 if (!sanity(i))
1095 return 0;
1096
1097 data_start(i, &idx, &off);
1098 /* some of this one + all after this one */
1099 npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
1100 if (npages >= maxpages)
1101 return maxpages;
1102 } else iterate_all_kinds(i, size, v, ({
716 unsigned long p = (unsigned long)v.iov_base; 1103 unsigned long p = (unsigned long)v.iov_base;
717 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1104 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
718 - p / PAGE_SIZE; 1105 - p / PAGE_SIZE;
@@ -737,6 +1124,10 @@ EXPORT_SYMBOL(iov_iter_npages);
737const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) 1124const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
738{ 1125{
739 *new = *old; 1126 *new = *old;
1127 if (unlikely(new->type & ITER_PIPE)) {
1128 WARN_ON(1);
1129 return NULL;
1130 }
740 if (new->type & ITER_BVEC) 1131 if (new->type & ITER_BVEC)
741 return new->bvec = kmemdup(new->bvec, 1132 return new->bvec = kmemdup(new->bvec,
742 new->nr_segs * sizeof(struct bio_vec), 1133 new->nr_segs * sizeof(struct bio_vec),
diff --git a/mm/shmem.c b/mm/shmem.c
index 971fc83e6402..d86b5e455fef 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2311,119 +2311,6 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
2311 return retval ? retval : error; 2311 return retval ? retval : error;
2312} 2312}
2313 2313
2314static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
2315 struct pipe_inode_info *pipe, size_t len,
2316 unsigned int flags)
2317{
2318 struct address_space *mapping = in->f_mapping;
2319 struct inode *inode = mapping->host;
2320 unsigned int loff, nr_pages, req_pages;
2321 struct page *pages[PIPE_DEF_BUFFERS];
2322 struct partial_page partial[PIPE_DEF_BUFFERS];
2323 struct page *page;
2324 pgoff_t index, end_index;
2325 loff_t isize, left;
2326 int error, page_nr;
2327 struct splice_pipe_desc spd = {
2328 .pages = pages,
2329 .partial = partial,
2330 .nr_pages_max = PIPE_DEF_BUFFERS,
2331 .flags = flags,
2332 .ops = &page_cache_pipe_buf_ops,
2333 .spd_release = spd_release_page,
2334 };
2335
2336 isize = i_size_read(inode);
2337 if (unlikely(*ppos >= isize))
2338 return 0;
2339
2340 left = isize - *ppos;
2341 if (unlikely(left < len))
2342 len = left;
2343
2344 if (splice_grow_spd(pipe, &spd))
2345 return -ENOMEM;
2346
2347 index = *ppos >> PAGE_SHIFT;
2348 loff = *ppos & ~PAGE_MASK;
2349 req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT;
2350 nr_pages = min(req_pages, spd.nr_pages_max);
2351
2352 spd.nr_pages = find_get_pages_contig(mapping, index,
2353 nr_pages, spd.pages);
2354 index += spd.nr_pages;
2355 error = 0;
2356
2357 while (spd.nr_pages < nr_pages) {
2358 error = shmem_getpage(inode, index, &page, SGP_CACHE);
2359 if (error)
2360 break;
2361 unlock_page(page);
2362 spd.pages[spd.nr_pages++] = page;
2363 index++;
2364 }
2365
2366 index = *ppos >> PAGE_SHIFT;
2367 nr_pages = spd.nr_pages;
2368 spd.nr_pages = 0;
2369
2370 for (page_nr = 0; page_nr < nr_pages; page_nr++) {
2371 unsigned int this_len;
2372
2373 if (!len)
2374 break;
2375
2376 this_len = min_t(unsigned long, len, PAGE_SIZE - loff);
2377 page = spd.pages[page_nr];
2378
2379 if (!PageUptodate(page) || page->mapping != mapping) {
2380 error = shmem_getpage(inode, index, &page, SGP_CACHE);
2381 if (error)
2382 break;
2383 unlock_page(page);
2384 put_page(spd.pages[page_nr]);
2385 spd.pages[page_nr] = page;
2386 }
2387
2388 isize = i_size_read(inode);
2389 end_index = (isize - 1) >> PAGE_SHIFT;
2390 if (unlikely(!isize || index > end_index))
2391 break;
2392
2393 if (end_index == index) {
2394 unsigned int plen;
2395
2396 plen = ((isize - 1) & ~PAGE_MASK) + 1;
2397 if (plen <= loff)
2398 break;
2399
2400 this_len = min(this_len, plen - loff);
2401 len = this_len;
2402 }
2403
2404 spd.partial[page_nr].offset = loff;
2405 spd.partial[page_nr].len = this_len;
2406 len -= this_len;
2407 loff = 0;
2408 spd.nr_pages++;
2409 index++;
2410 }
2411
2412 while (page_nr < nr_pages)
2413 put_page(spd.pages[page_nr++]);
2414
2415 if (spd.nr_pages)
2416 error = splice_to_pipe(pipe, &spd);
2417
2418 splice_shrink_spd(&spd);
2419
2420 if (error > 0) {
2421 *ppos += error;
2422 file_accessed(in);
2423 }
2424 return error;
2425}
2426
2427/* 2314/*
2428 * llseek SEEK_DATA or SEEK_HOLE through the radix_tree. 2315 * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
2429 */ 2316 */
@@ -3786,7 +3673,7 @@ static const struct file_operations shmem_file_operations = {
3786 .read_iter = shmem_file_read_iter, 3673 .read_iter = shmem_file_read_iter,
3787 .write_iter = generic_file_write_iter, 3674 .write_iter = generic_file_write_iter,
3788 .fsync = noop_fsync, 3675 .fsync = noop_fsync,
3789 .splice_read = shmem_file_splice_read, 3676 .splice_read = generic_file_splice_read,
3790 .splice_write = iter_file_splice_write, 3677 .splice_write = iter_file_splice_write,
3791 .fallocate = shmem_fallocate, 3678 .fallocate = shmem_fallocate,
3792#endif 3679#endif
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index cbd19d250947..1e3e0087245b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1962,37 +1962,13 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
1962 return false; 1962 return false;
1963} 1963}
1964 1964
1965ssize_t skb_socket_splice(struct sock *sk,
1966 struct pipe_inode_info *pipe,
1967 struct splice_pipe_desc *spd)
1968{
1969 int ret;
1970
1971 /* Drop the socket lock, otherwise we have reverse
1972 * locking dependencies between sk_lock and i_mutex
1973 * here as compared to sendfile(). We enter here
1974 * with the socket lock held, and splice_to_pipe() will
1975 * grab the pipe inode lock. For sendfile() emulation,
1976 * we call into ->sendpage() with the i_mutex lock held
1977 * and networking will grab the socket lock.
1978 */
1979 release_sock(sk);
1980 ret = splice_to_pipe(pipe, spd);
1981 lock_sock(sk);
1982
1983 return ret;
1984}
1985
1986/* 1965/*
1987 * Map data from the skb to a pipe. Should handle both the linear part, 1966 * Map data from the skb to a pipe. Should handle both the linear part,
1988 * the fragments, and the frag list. 1967 * the fragments, and the frag list.
1989 */ 1968 */
1990int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, 1969int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
1991 struct pipe_inode_info *pipe, unsigned int tlen, 1970 struct pipe_inode_info *pipe, unsigned int tlen,
1992 unsigned int flags, 1971 unsigned int flags)
1993 ssize_t (*splice_cb)(struct sock *,
1994 struct pipe_inode_info *,
1995 struct splice_pipe_desc *))
1996{ 1972{
1997 struct partial_page partial[MAX_SKB_FRAGS]; 1973 struct partial_page partial[MAX_SKB_FRAGS];
1998 struct page *pages[MAX_SKB_FRAGS]; 1974 struct page *pages[MAX_SKB_FRAGS];
@@ -2009,7 +1985,7 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
2009 __skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk); 1985 __skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk);
2010 1986
2011 if (spd.nr_pages) 1987 if (spd.nr_pages)
2012 ret = splice_cb(sk, pipe, &spd); 1988 ret = splice_to_pipe(pipe, &spd);
2013 1989
2014 return ret; 1990 return ret;
2015} 1991}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f253e5019d22..2414b7c80b87 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -691,8 +691,7 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
691 int ret; 691 int ret;
692 692
693 ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe, 693 ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe,
694 min(rd_desc->count, len), tss->flags, 694 min(rd_desc->count, len), tss->flags);
695 skb_socket_splice);
696 if (ret > 0) 695 if (ret > 0)
697 rd_desc->count -= ret; 696 rd_desc->count -= ret;
698 return ret; 697 return ret;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index b7f869a85ab7..7e08a4d3d77d 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1160,19 +1160,6 @@ out:
1160 return copied ? : err; 1160 return copied ? : err;
1161} 1161}
1162 1162
1163static ssize_t kcm_sock_splice(struct sock *sk,
1164 struct pipe_inode_info *pipe,
1165 struct splice_pipe_desc *spd)
1166{
1167 int ret;
1168
1169 release_sock(sk);
1170 ret = splice_to_pipe(pipe, spd);
1171 lock_sock(sk);
1172
1173 return ret;
1174}
1175
1176static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, 1163static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
1177 struct pipe_inode_info *pipe, size_t len, 1164 struct pipe_inode_info *pipe, size_t len,
1178 unsigned int flags) 1165 unsigned int flags)
@@ -1202,8 +1189,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
1202 if (len > rxm->full_len) 1189 if (len > rxm->full_len)
1203 len = rxm->full_len; 1190 len = rxm->full_len;
1204 1191
1205 copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags, 1192 copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags);
1206 kcm_sock_splice);
1207 if (copied < 0) { 1193 if (copied < 0) {
1208 err = copied; 1194 err = copied;
1209 goto err_out; 1195 goto err_out;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 8309687a56b0..145082e2ba36 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2475,28 +2475,13 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2475 return unix_stream_read_generic(&state); 2475 return unix_stream_read_generic(&state);
2476} 2476}
2477 2477
2478static ssize_t skb_unix_socket_splice(struct sock *sk,
2479 struct pipe_inode_info *pipe,
2480 struct splice_pipe_desc *spd)
2481{
2482 int ret;
2483 struct unix_sock *u = unix_sk(sk);
2484
2485 mutex_unlock(&u->iolock);
2486 ret = splice_to_pipe(pipe, spd);
2487 mutex_lock(&u->iolock);
2488
2489 return ret;
2490}
2491
2492static int unix_stream_splice_actor(struct sk_buff *skb, 2478static int unix_stream_splice_actor(struct sk_buff *skb,
2493 int skip, int chunk, 2479 int skip, int chunk,
2494 struct unix_stream_read_state *state) 2480 struct unix_stream_read_state *state)
2495{ 2481{
2496 return skb_splice_bits(skb, state->socket->sk, 2482 return skb_splice_bits(skb, state->socket->sk,
2497 UNIXCB(skb).consumed + skip, 2483 UNIXCB(skb).consumed + skip,
2498 state->pipe, chunk, state->splice_flags, 2484 state->pipe, chunk, state->splice_flags);
2499 skb_unix_socket_splice);
2500} 2485}
2501 2486
2502static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, 2487static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,