diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-07 18:36:58 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-07 18:36:58 -0400 |
commit | d1f5323370fceaed43a7ee38f4c7bfc7e70f28d0 (patch) | |
tree | cadb1dc22207a4e1838b7af31ac3fc15363e809b | |
parent | 2eee010d092903ee95716b6c2fbd9d3289839aa4 (diff) | |
parent | a949e63992469fed87aef197347960ced31701b8 (diff) |
Merge branch 'work.splice_read' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull VFS splice updates from Al Viro:
"There's a bunch of branches this cycle, both mine and from other folks
and I'd rather send pull requests separately.
This one is the conversion of ->splice_read() to ITER_PIPE iov_iter
(and introduction of such). Gets rid of a lot of code in fs/splice.c
and elsewhere; there will be followups, but these are for the next
cycle... Some pipe/splice-related cleanups from Miklos in the same
branch as well"
* 'work.splice_read' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
pipe: fix comment in pipe_buf_operations
pipe: add pipe_buf_steal() helper
pipe: add pipe_buf_confirm() helper
pipe: add pipe_buf_release() helper
pipe: add pipe_buf_get() helper
relay: simplify relay_file_read()
switch default_file_splice_read() to use of pipe-backed iov_iter
switch generic_file_splice_read() to use of ->read_iter()
new iov_iter flavour: pipe-backed
fuse_dev_splice_read(): switch to add_to_pipe()
skb_splice_bits(): get rid of callback
new helper: add_to_pipe()
splice: lift pipe_lock out of splice_to_pipe()
splice: switch get_iovec_page_array() to iov_iter
splice_to_pipe(): don't open-code wakeup_pipe_readers()
consistent treatment of EFAULT on O_DIRECT read/write
30 files changed, 746 insertions, 1077 deletions
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 5da47e26a012..8114744bf30c 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c | |||
@@ -889,7 +889,7 @@ static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf, | |||
889 | return 0; | 889 | return 0; |
890 | 890 | ||
891 | /* Try lock this page */ | 891 | /* Try lock this page */ |
892 | if (buf->ops->steal(pipe, buf) == 0) { | 892 | if (pipe_buf_steal(pipe, buf) == 0) { |
893 | /* Get reference and unlock page for moving */ | 893 | /* Get reference and unlock page for moving */ |
894 | get_page(buf->page); | 894 | get_page(buf->page); |
895 | unlock_page(buf->page); | 895 | unlock_page(buf->page); |
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c index 6e3a188baaae..d56863ff5866 100644 --- a/drivers/staging/lustre/lustre/llite/file.c +++ b/drivers/staging/lustre/lustre/llite/file.c | |||
@@ -1138,45 +1138,31 @@ restart: | |||
1138 | range_lock_init(&range, *ppos, *ppos + count - 1); | 1138 | range_lock_init(&range, *ppos, *ppos + count - 1); |
1139 | 1139 | ||
1140 | vio->vui_fd = LUSTRE_FPRIVATE(file); | 1140 | vio->vui_fd = LUSTRE_FPRIVATE(file); |
1141 | vio->vui_io_subtype = args->via_io_subtype; | 1141 | vio->vui_iter = args->u.normal.via_iter; |
1142 | vio->vui_iocb = args->u.normal.via_iocb; | ||
1143 | /* | ||
1144 | * Direct IO reads must also take range lock, | ||
1145 | * or multiple reads will try to work on the same pages | ||
1146 | * See LU-6227 for details. | ||
1147 | */ | ||
1148 | if (((iot == CIT_WRITE) || | ||
1149 | (iot == CIT_READ && (file->f_flags & O_DIRECT))) && | ||
1150 | !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { | ||
1151 | CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n", | ||
1152 | range.rl_node.in_extent.start, | ||
1153 | range.rl_node.in_extent.end); | ||
1154 | result = range_lock(&lli->lli_write_tree, | ||
1155 | &range); | ||
1156 | if (result < 0) | ||
1157 | goto out; | ||
1142 | 1158 | ||
1143 | switch (vio->vui_io_subtype) { | 1159 | range_locked = true; |
1144 | case IO_NORMAL: | ||
1145 | vio->vui_iter = args->u.normal.via_iter; | ||
1146 | vio->vui_iocb = args->u.normal.via_iocb; | ||
1147 | /* | ||
1148 | * Direct IO reads must also take range lock, | ||
1149 | * or multiple reads will try to work on the same pages | ||
1150 | * See LU-6227 for details. | ||
1151 | */ | ||
1152 | if (((iot == CIT_WRITE) || | ||
1153 | (iot == CIT_READ && (file->f_flags & O_DIRECT))) && | ||
1154 | !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { | ||
1155 | CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n", | ||
1156 | range.rl_node.in_extent.start, | ||
1157 | range.rl_node.in_extent.end); | ||
1158 | result = range_lock(&lli->lli_write_tree, | ||
1159 | &range); | ||
1160 | if (result < 0) | ||
1161 | goto out; | ||
1162 | |||
1163 | range_locked = true; | ||
1164 | } | ||
1165 | down_read(&lli->lli_trunc_sem); | ||
1166 | break; | ||
1167 | case IO_SPLICE: | ||
1168 | vio->u.splice.vui_pipe = args->u.splice.via_pipe; | ||
1169 | vio->u.splice.vui_flags = args->u.splice.via_flags; | ||
1170 | break; | ||
1171 | default: | ||
1172 | CERROR("Unknown IO type - %u\n", vio->vui_io_subtype); | ||
1173 | LBUG(); | ||
1174 | } | 1160 | } |
1161 | down_read(&lli->lli_trunc_sem); | ||
1175 | ll_cl_add(file, env, io); | 1162 | ll_cl_add(file, env, io); |
1176 | result = cl_io_loop(env, io); | 1163 | result = cl_io_loop(env, io); |
1177 | ll_cl_remove(file, env); | 1164 | ll_cl_remove(file, env); |
1178 | if (args->via_io_subtype == IO_NORMAL) | 1165 | up_read(&lli->lli_trunc_sem); |
1179 | up_read(&lli->lli_trunc_sem); | ||
1180 | if (range_locked) { | 1166 | if (range_locked) { |
1181 | CDEBUG(D_VFSTRACE, "Range unlock [%llu, %llu]\n", | 1167 | CDEBUG(D_VFSTRACE, "Range unlock [%llu, %llu]\n", |
1182 | range.rl_node.in_extent.start, | 1168 | range.rl_node.in_extent.start, |
@@ -1235,7 +1221,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) | |||
1235 | if (IS_ERR(env)) | 1221 | if (IS_ERR(env)) |
1236 | return PTR_ERR(env); | 1222 | return PTR_ERR(env); |
1237 | 1223 | ||
1238 | args = ll_env_args(env, IO_NORMAL); | 1224 | args = ll_env_args(env); |
1239 | args->u.normal.via_iter = to; | 1225 | args->u.normal.via_iter = to; |
1240 | args->u.normal.via_iocb = iocb; | 1226 | args->u.normal.via_iocb = iocb; |
1241 | 1227 | ||
@@ -1259,7 +1245,7 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
1259 | if (IS_ERR(env)) | 1245 | if (IS_ERR(env)) |
1260 | return PTR_ERR(env); | 1246 | return PTR_ERR(env); |
1261 | 1247 | ||
1262 | args = ll_env_args(env, IO_NORMAL); | 1248 | args = ll_env_args(env); |
1263 | args->u.normal.via_iter = from; | 1249 | args->u.normal.via_iter = from; |
1264 | args->u.normal.via_iocb = iocb; | 1250 | args->u.normal.via_iocb = iocb; |
1265 | 1251 | ||
@@ -1269,31 +1255,6 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
1269 | return result; | 1255 | return result; |
1270 | } | 1256 | } |
1271 | 1257 | ||
1272 | /* | ||
1273 | * Send file content (through pagecache) somewhere with helper | ||
1274 | */ | ||
1275 | static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos, | ||
1276 | struct pipe_inode_info *pipe, size_t count, | ||
1277 | unsigned int flags) | ||
1278 | { | ||
1279 | struct lu_env *env; | ||
1280 | struct vvp_io_args *args; | ||
1281 | ssize_t result; | ||
1282 | int refcheck; | ||
1283 | |||
1284 | env = cl_env_get(&refcheck); | ||
1285 | if (IS_ERR(env)) | ||
1286 | return PTR_ERR(env); | ||
1287 | |||
1288 | args = ll_env_args(env, IO_SPLICE); | ||
1289 | args->u.splice.via_pipe = pipe; | ||
1290 | args->u.splice.via_flags = flags; | ||
1291 | |||
1292 | result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count); | ||
1293 | cl_env_put(env, &refcheck); | ||
1294 | return result; | ||
1295 | } | ||
1296 | |||
1297 | int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, | 1258 | int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, |
1298 | __u64 flags, struct lov_user_md *lum, | 1259 | __u64 flags, struct lov_user_md *lum, |
1299 | int lum_size) | 1260 | int lum_size) |
@@ -3267,7 +3228,7 @@ struct file_operations ll_file_operations = { | |||
3267 | .release = ll_file_release, | 3228 | .release = ll_file_release, |
3268 | .mmap = ll_file_mmap, | 3229 | .mmap = ll_file_mmap, |
3269 | .llseek = ll_file_seek, | 3230 | .llseek = ll_file_seek, |
3270 | .splice_read = ll_file_splice_read, | 3231 | .splice_read = generic_file_splice_read, |
3271 | .fsync = ll_fsync, | 3232 | .fsync = ll_fsync, |
3272 | .flush = ll_flush | 3233 | .flush = ll_flush |
3273 | }; | 3234 | }; |
@@ -3280,7 +3241,7 @@ struct file_operations ll_file_operations_flock = { | |||
3280 | .release = ll_file_release, | 3241 | .release = ll_file_release, |
3281 | .mmap = ll_file_mmap, | 3242 | .mmap = ll_file_mmap, |
3282 | .llseek = ll_file_seek, | 3243 | .llseek = ll_file_seek, |
3283 | .splice_read = ll_file_splice_read, | 3244 | .splice_read = generic_file_splice_read, |
3284 | .fsync = ll_fsync, | 3245 | .fsync = ll_fsync, |
3285 | .flush = ll_flush, | 3246 | .flush = ll_flush, |
3286 | .flock = ll_file_flock, | 3247 | .flock = ll_file_flock, |
@@ -3296,7 +3257,7 @@ struct file_operations ll_file_operations_noflock = { | |||
3296 | .release = ll_file_release, | 3257 | .release = ll_file_release, |
3297 | .mmap = ll_file_mmap, | 3258 | .mmap = ll_file_mmap, |
3298 | .llseek = ll_file_seek, | 3259 | .llseek = ll_file_seek, |
3299 | .splice_read = ll_file_splice_read, | 3260 | .splice_read = generic_file_splice_read, |
3300 | .fsync = ll_fsync, | 3261 | .fsync = ll_fsync, |
3301 | .flush = ll_flush, | 3262 | .flush = ll_flush, |
3302 | .flock = ll_file_noflock, | 3263 | .flock = ll_file_noflock, |
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h index 3e98bd685061..4bc551279aa4 100644 --- a/drivers/staging/lustre/lustre/llite/llite_internal.h +++ b/drivers/staging/lustre/lustre/llite/llite_internal.h | |||
@@ -908,17 +908,11 @@ void vvp_write_complete(struct vvp_object *club, struct vvp_page *page); | |||
908 | */ | 908 | */ |
909 | struct vvp_io_args { | 909 | struct vvp_io_args { |
910 | /** normal/splice */ | 910 | /** normal/splice */ |
911 | enum vvp_io_subtype via_io_subtype; | ||
912 | |||
913 | union { | 911 | union { |
914 | struct { | 912 | struct { |
915 | struct kiocb *via_iocb; | 913 | struct kiocb *via_iocb; |
916 | struct iov_iter *via_iter; | 914 | struct iov_iter *via_iter; |
917 | } normal; | 915 | } normal; |
918 | struct { | ||
919 | struct pipe_inode_info *via_pipe; | ||
920 | unsigned int via_flags; | ||
921 | } splice; | ||
922 | } u; | 916 | } u; |
923 | }; | 917 | }; |
924 | 918 | ||
@@ -946,14 +940,9 @@ static inline struct ll_thread_info *ll_env_info(const struct lu_env *env) | |||
946 | return lti; | 940 | return lti; |
947 | } | 941 | } |
948 | 942 | ||
949 | static inline struct vvp_io_args *ll_env_args(const struct lu_env *env, | 943 | static inline struct vvp_io_args *ll_env_args(const struct lu_env *env) |
950 | enum vvp_io_subtype type) | ||
951 | { | 944 | { |
952 | struct vvp_io_args *via = &ll_env_info(env)->lti_args; | 945 | return &ll_env_info(env)->lti_args; |
953 | |||
954 | via->via_io_subtype = type; | ||
955 | |||
956 | return via; | ||
957 | } | 946 | } |
958 | 947 | ||
959 | void ll_queue_done_writing(struct inode *inode, unsigned long flags); | 948 | void ll_queue_done_writing(struct inode *inode, unsigned long flags); |
diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h index 5802da81cd0e..4464ad258387 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_internal.h +++ b/drivers/staging/lustre/lustre/llite/vvp_internal.h | |||
@@ -49,14 +49,6 @@ struct obd_device; | |||
49 | struct obd_export; | 49 | struct obd_export; |
50 | struct page; | 50 | struct page; |
51 | 51 | ||
52 | /* specific architecture can implement only part of this list */ | ||
53 | enum vvp_io_subtype { | ||
54 | /** normal IO */ | ||
55 | IO_NORMAL, | ||
56 | /** io started from splice_{read|write} */ | ||
57 | IO_SPLICE | ||
58 | }; | ||
59 | |||
60 | /** | 52 | /** |
61 | * IO state private to IO state private to VVP layer. | 53 | * IO state private to IO state private to VVP layer. |
62 | */ | 54 | */ |
@@ -99,10 +91,6 @@ struct vvp_io { | |||
99 | bool ft_flags_valid; | 91 | bool ft_flags_valid; |
100 | } fault; | 92 | } fault; |
101 | struct { | 93 | struct { |
102 | struct pipe_inode_info *vui_pipe; | ||
103 | unsigned int vui_flags; | ||
104 | } splice; | ||
105 | struct { | ||
106 | struct cl_page_list vui_queue; | 94 | struct cl_page_list vui_queue; |
107 | unsigned long vui_written; | 95 | unsigned long vui_written; |
108 | int vui_from; | 96 | int vui_from; |
@@ -110,8 +98,6 @@ struct vvp_io { | |||
110 | } write; | 98 | } write; |
111 | } u; | 99 | } u; |
112 | 100 | ||
113 | enum vvp_io_subtype vui_io_subtype; | ||
114 | |||
115 | /** | 101 | /** |
116 | * Layout version when this IO is initialized | 102 | * Layout version when this IO is initialized |
117 | */ | 103 | */ |
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c index 2ab450359b6d..2b7f182a15e2 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_io.c +++ b/drivers/staging/lustre/lustre/llite/vvp_io.c | |||
@@ -54,18 +54,6 @@ static struct vvp_io *cl2vvp_io(const struct lu_env *env, | |||
54 | } | 54 | } |
55 | 55 | ||
56 | /** | 56 | /** |
57 | * True, if \a io is a normal io, False for splice_{read,write} | ||
58 | */ | ||
59 | static int cl_is_normalio(const struct lu_env *env, const struct cl_io *io) | ||
60 | { | ||
61 | struct vvp_io *vio = vvp_env_io(env); | ||
62 | |||
63 | LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); | ||
64 | |||
65 | return vio->vui_io_subtype == IO_NORMAL; | ||
66 | } | ||
67 | |||
68 | /** | ||
69 | * For swapping layout. The file's layout may have changed. | 57 | * For swapping layout. The file's layout may have changed. |
70 | * To avoid populating pages to a wrong stripe, we have to verify the | 58 | * To avoid populating pages to a wrong stripe, we have to verify the |
71 | * correctness of layout. It works because swapping layout processes | 59 | * correctness of layout. It works because swapping layout processes |
@@ -390,9 +378,6 @@ static int vvp_mmap_locks(const struct lu_env *env, | |||
390 | 378 | ||
391 | LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); | 379 | LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); |
392 | 380 | ||
393 | if (!cl_is_normalio(env, io)) | ||
394 | return 0; | ||
395 | |||
396 | if (!vio->vui_iter) /* nfs or loop back device write */ | 381 | if (!vio->vui_iter) /* nfs or loop back device write */ |
397 | return 0; | 382 | return 0; |
398 | 383 | ||
@@ -461,15 +446,10 @@ static void vvp_io_advance(const struct lu_env *env, | |||
461 | const struct cl_io_slice *ios, | 446 | const struct cl_io_slice *ios, |
462 | size_t nob) | 447 | size_t nob) |
463 | { | 448 | { |
464 | struct vvp_io *vio = cl2vvp_io(env, ios); | ||
465 | struct cl_io *io = ios->cis_io; | ||
466 | struct cl_object *obj = ios->cis_io->ci_obj; | 449 | struct cl_object *obj = ios->cis_io->ci_obj; |
467 | 450 | struct vvp_io *vio = cl2vvp_io(env, ios); | |
468 | CLOBINVRNT(env, obj, vvp_object_invariant(obj)); | 451 | CLOBINVRNT(env, obj, vvp_object_invariant(obj)); |
469 | 452 | ||
470 | if (!cl_is_normalio(env, io)) | ||
471 | return; | ||
472 | |||
473 | iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count -= nob); | 453 | iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count -= nob); |
474 | } | 454 | } |
475 | 455 | ||
@@ -478,7 +458,7 @@ static void vvp_io_update_iov(const struct lu_env *env, | |||
478 | { | 458 | { |
479 | size_t size = io->u.ci_rw.crw_count; | 459 | size_t size = io->u.ci_rw.crw_count; |
480 | 460 | ||
481 | if (!cl_is_normalio(env, io) || !vio->vui_iter) | 461 | if (!vio->vui_iter) |
482 | return; | 462 | return; |
483 | 463 | ||
484 | iov_iter_truncate(vio->vui_iter, size); | 464 | iov_iter_truncate(vio->vui_iter, size); |
@@ -715,25 +695,8 @@ static int vvp_io_read_start(const struct lu_env *env, | |||
715 | 695 | ||
716 | /* BUG: 5972 */ | 696 | /* BUG: 5972 */ |
717 | file_accessed(file); | 697 | file_accessed(file); |
718 | switch (vio->vui_io_subtype) { | 698 | LASSERT(vio->vui_iocb->ki_pos == pos); |
719 | case IO_NORMAL: | 699 | result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter); |
720 | LASSERT(vio->vui_iocb->ki_pos == pos); | ||
721 | result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter); | ||
722 | break; | ||
723 | case IO_SPLICE: | ||
724 | result = generic_file_splice_read(file, &pos, | ||
725 | vio->u.splice.vui_pipe, cnt, | ||
726 | vio->u.splice.vui_flags); | ||
727 | /* LU-1109: do splice read stripe by stripe otherwise if it | ||
728 | * may make nfsd stuck if this read occupied all internal pipe | ||
729 | * buffers. | ||
730 | */ | ||
731 | io->ci_continue = 0; | ||
732 | break; | ||
733 | default: | ||
734 | CERROR("Wrong IO type %u\n", vio->vui_io_subtype); | ||
735 | LBUG(); | ||
736 | } | ||
737 | 700 | ||
738 | out: | 701 | out: |
739 | if (result >= 0) { | 702 | if (result >= 0) { |
diff --git a/fs/coda/file.c b/fs/coda/file.c index f47c7483863b..8415d4f8d1a1 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c | |||
@@ -38,27 +38,6 @@ coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to) | |||
38 | } | 38 | } |
39 | 39 | ||
40 | static ssize_t | 40 | static ssize_t |
41 | coda_file_splice_read(struct file *coda_file, loff_t *ppos, | ||
42 | struct pipe_inode_info *pipe, size_t count, | ||
43 | unsigned int flags) | ||
44 | { | ||
45 | ssize_t (*splice_read)(struct file *, loff_t *, | ||
46 | struct pipe_inode_info *, size_t, unsigned int); | ||
47 | struct coda_file_info *cfi; | ||
48 | struct file *host_file; | ||
49 | |||
50 | cfi = CODA_FTOC(coda_file); | ||
51 | BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); | ||
52 | host_file = cfi->cfi_container; | ||
53 | |||
54 | splice_read = host_file->f_op->splice_read; | ||
55 | if (!splice_read) | ||
56 | splice_read = default_file_splice_read; | ||
57 | |||
58 | return splice_read(host_file, ppos, pipe, count, flags); | ||
59 | } | ||
60 | |||
61 | static ssize_t | ||
62 | coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to) | 41 | coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to) |
63 | { | 42 | { |
64 | struct file *coda_file = iocb->ki_filp; | 43 | struct file *coda_file = iocb->ki_filp; |
@@ -225,6 +204,6 @@ const struct file_operations coda_file_operations = { | |||
225 | .open = coda_open, | 204 | .open = coda_open, |
226 | .release = coda_release, | 205 | .release = coda_release, |
227 | .fsync = coda_fsync, | 206 | .fsync = coda_fsync, |
228 | .splice_read = coda_file_splice_read, | 207 | .splice_read = generic_file_splice_read, |
229 | }; | 208 | }; |
230 | 209 | ||
diff --git a/fs/direct-io.c b/fs/direct-io.c index 7c3ce73cb617..fb9aa16a7727 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -246,6 +246,9 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) | |||
246 | if ((dio->op == REQ_OP_READ) && | 246 | if ((dio->op == REQ_OP_READ) && |
247 | ((offset + transferred) > dio->i_size)) | 247 | ((offset + transferred) > dio->i_size)) |
248 | transferred = dio->i_size - offset; | 248 | transferred = dio->i_size - offset; |
249 | /* ignore EFAULT if some IO has been done */ | ||
250 | if (unlikely(ret == -EFAULT) && transferred) | ||
251 | ret = 0; | ||
249 | } | 252 | } |
250 | 253 | ||
251 | if (ret == 0) | 254 | if (ret == 0) |
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index c41bde26c338..70ea57c7b6bb 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -728,7 +728,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) | |||
728 | struct pipe_buffer *buf = cs->pipebufs; | 728 | struct pipe_buffer *buf = cs->pipebufs; |
729 | 729 | ||
730 | if (!cs->write) { | 730 | if (!cs->write) { |
731 | err = buf->ops->confirm(cs->pipe, buf); | 731 | err = pipe_buf_confirm(cs->pipe, buf); |
732 | if (err) | 732 | if (err) |
733 | return err; | 733 | return err; |
734 | 734 | ||
@@ -827,7 +827,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) | |||
827 | 827 | ||
828 | fuse_copy_finish(cs); | 828 | fuse_copy_finish(cs); |
829 | 829 | ||
830 | err = buf->ops->confirm(cs->pipe, buf); | 830 | err = pipe_buf_confirm(cs->pipe, buf); |
831 | if (err) | 831 | if (err) |
832 | return err; | 832 | return err; |
833 | 833 | ||
@@ -840,7 +840,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) | |||
840 | if (cs->len != PAGE_SIZE) | 840 | if (cs->len != PAGE_SIZE) |
841 | goto out_fallback; | 841 | goto out_fallback; |
842 | 842 | ||
843 | if (buf->ops->steal(cs->pipe, buf) != 0) | 843 | if (pipe_buf_steal(cs->pipe, buf) != 0) |
844 | goto out_fallback; | 844 | goto out_fallback; |
845 | 845 | ||
846 | newpage = buf->page; | 846 | newpage = buf->page; |
@@ -1341,9 +1341,8 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, | |||
1341 | struct pipe_inode_info *pipe, | 1341 | struct pipe_inode_info *pipe, |
1342 | size_t len, unsigned int flags) | 1342 | size_t len, unsigned int flags) |
1343 | { | 1343 | { |
1344 | int ret; | 1344 | int total, ret; |
1345 | int page_nr = 0; | 1345 | int page_nr = 0; |
1346 | int do_wakeup = 0; | ||
1347 | struct pipe_buffer *bufs; | 1346 | struct pipe_buffer *bufs; |
1348 | struct fuse_copy_state cs; | 1347 | struct fuse_copy_state cs; |
1349 | struct fuse_dev *fud = fuse_get_dev(in); | 1348 | struct fuse_dev *fud = fuse_get_dev(in); |
@@ -1362,52 +1361,23 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, | |||
1362 | if (ret < 0) | 1361 | if (ret < 0) |
1363 | goto out; | 1362 | goto out; |
1364 | 1363 | ||
1365 | ret = 0; | ||
1366 | pipe_lock(pipe); | ||
1367 | |||
1368 | if (!pipe->readers) { | ||
1369 | send_sig(SIGPIPE, current, 0); | ||
1370 | if (!ret) | ||
1371 | ret = -EPIPE; | ||
1372 | goto out_unlock; | ||
1373 | } | ||
1374 | |||
1375 | if (pipe->nrbufs + cs.nr_segs > pipe->buffers) { | 1364 | if (pipe->nrbufs + cs.nr_segs > pipe->buffers) { |
1376 | ret = -EIO; | 1365 | ret = -EIO; |
1377 | goto out_unlock; | 1366 | goto out; |
1378 | } | 1367 | } |
1379 | 1368 | ||
1380 | while (page_nr < cs.nr_segs) { | 1369 | for (ret = total = 0; page_nr < cs.nr_segs; total += ret) { |
1381 | int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); | ||
1382 | struct pipe_buffer *buf = pipe->bufs + newbuf; | ||
1383 | |||
1384 | buf->page = bufs[page_nr].page; | ||
1385 | buf->offset = bufs[page_nr].offset; | ||
1386 | buf->len = bufs[page_nr].len; | ||
1387 | /* | 1370 | /* |
1388 | * Need to be careful about this. Having buf->ops in module | 1371 | * Need to be careful about this. Having buf->ops in module |
1389 | * code can Oops if the buffer persists after module unload. | 1372 | * code can Oops if the buffer persists after module unload. |
1390 | */ | 1373 | */ |
1391 | buf->ops = &nosteal_pipe_buf_ops; | 1374 | bufs[page_nr].ops = &nosteal_pipe_buf_ops; |
1392 | 1375 | ret = add_to_pipe(pipe, &bufs[page_nr++]); | |
1393 | pipe->nrbufs++; | 1376 | if (unlikely(ret < 0)) |
1394 | page_nr++; | 1377 | break; |
1395 | ret += buf->len; | ||
1396 | |||
1397 | if (pipe->files) | ||
1398 | do_wakeup = 1; | ||
1399 | } | ||
1400 | |||
1401 | out_unlock: | ||
1402 | pipe_unlock(pipe); | ||
1403 | |||
1404 | if (do_wakeup) { | ||
1405 | smp_mb(); | ||
1406 | if (waitqueue_active(&pipe->wait)) | ||
1407 | wake_up_interruptible(&pipe->wait); | ||
1408 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | ||
1409 | } | 1378 | } |
1410 | 1379 | if (total) | |
1380 | ret = total; | ||
1411 | out: | 1381 | out: |
1412 | for (; page_nr < cs.nr_segs; page_nr++) | 1382 | for (; page_nr < cs.nr_segs; page_nr++) |
1413 | put_page(bufs[page_nr].page); | 1383 | put_page(bufs[page_nr].page); |
@@ -1992,7 +1962,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, | |||
1992 | pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); | 1962 | pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); |
1993 | pipe->nrbufs--; | 1963 | pipe->nrbufs--; |
1994 | } else { | 1964 | } else { |
1995 | ibuf->ops->get(pipe, ibuf); | 1965 | pipe_buf_get(pipe, ibuf); |
1996 | *obuf = *ibuf; | 1966 | *obuf = *ibuf; |
1997 | obuf->flags &= ~PIPE_BUF_FLAG_GIFT; | 1967 | obuf->flags &= ~PIPE_BUF_FLAG_GIFT; |
1998 | obuf->len = rem; | 1968 | obuf->len = rem; |
@@ -2014,10 +1984,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, | |||
2014 | 1984 | ||
2015 | ret = fuse_dev_do_write(fud, &cs, len); | 1985 | ret = fuse_dev_do_write(fud, &cs, len); |
2016 | 1986 | ||
2017 | for (idx = 0; idx < nbuf; idx++) { | 1987 | for (idx = 0; idx < nbuf; idx++) |
2018 | struct pipe_buffer *buf = &bufs[idx]; | 1988 | pipe_buf_release(pipe, &bufs[idx]); |
2019 | buf->ops->release(pipe, buf); | 1989 | |
2020 | } | ||
2021 | out: | 1990 | out: |
2022 | kfree(bufs); | 1991 | kfree(bufs); |
2023 | return ret; | 1992 | return ret; |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 360188f162bd..e23ff70b3435 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
@@ -954,30 +954,6 @@ out_uninit: | |||
954 | return ret; | 954 | return ret; |
955 | } | 955 | } |
956 | 956 | ||
957 | static ssize_t gfs2_file_splice_read(struct file *in, loff_t *ppos, | ||
958 | struct pipe_inode_info *pipe, size_t len, | ||
959 | unsigned int flags) | ||
960 | { | ||
961 | struct inode *inode = in->f_mapping->host; | ||
962 | struct gfs2_inode *ip = GFS2_I(inode); | ||
963 | struct gfs2_holder gh; | ||
964 | int ret; | ||
965 | |||
966 | inode_lock(inode); | ||
967 | |||
968 | ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); | ||
969 | if (ret) { | ||
970 | inode_unlock(inode); | ||
971 | return ret; | ||
972 | } | ||
973 | |||
974 | gfs2_glock_dq_uninit(&gh); | ||
975 | inode_unlock(inode); | ||
976 | |||
977 | return generic_file_splice_read(in, ppos, pipe, len, flags); | ||
978 | } | ||
979 | |||
980 | |||
981 | static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, | 957 | static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, |
982 | struct file *out, loff_t *ppos, | 958 | struct file *out, loff_t *ppos, |
983 | size_t len, unsigned int flags) | 959 | size_t len, unsigned int flags) |
@@ -1140,7 +1116,7 @@ const struct file_operations gfs2_file_fops = { | |||
1140 | .fsync = gfs2_fsync, | 1116 | .fsync = gfs2_fsync, |
1141 | .lock = gfs2_lock, | 1117 | .lock = gfs2_lock, |
1142 | .flock = gfs2_flock, | 1118 | .flock = gfs2_flock, |
1143 | .splice_read = gfs2_file_splice_read, | 1119 | .splice_read = generic_file_splice_read, |
1144 | .splice_write = gfs2_file_splice_write, | 1120 | .splice_write = gfs2_file_splice_write, |
1145 | .setlease = simple_nosetlease, | 1121 | .setlease = simple_nosetlease, |
1146 | .fallocate = gfs2_fallocate, | 1122 | .fallocate = gfs2_fallocate, |
@@ -1168,7 +1144,7 @@ const struct file_operations gfs2_file_fops_nolock = { | |||
1168 | .open = gfs2_open, | 1144 | .open = gfs2_open, |
1169 | .release = gfs2_release, | 1145 | .release = gfs2_release, |
1170 | .fsync = gfs2_fsync, | 1146 | .fsync = gfs2_fsync, |
1171 | .splice_read = gfs2_file_splice_read, | 1147 | .splice_read = generic_file_splice_read, |
1172 | .splice_write = gfs2_file_splice_write, | 1148 | .splice_write = gfs2_file_splice_write, |
1173 | .setlease = generic_setlease, | 1149 | .setlease = generic_setlease, |
1174 | .fallocate = gfs2_fallocate, | 1150 | .fallocate = gfs2_fallocate, |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index ca699ddc11c1..2efbdde36c3e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -182,29 +182,6 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) | |||
182 | } | 182 | } |
183 | EXPORT_SYMBOL_GPL(nfs_file_read); | 183 | EXPORT_SYMBOL_GPL(nfs_file_read); |
184 | 184 | ||
185 | ssize_t | ||
186 | nfs_file_splice_read(struct file *filp, loff_t *ppos, | ||
187 | struct pipe_inode_info *pipe, size_t count, | ||
188 | unsigned int flags) | ||
189 | { | ||
190 | struct inode *inode = file_inode(filp); | ||
191 | ssize_t res; | ||
192 | |||
193 | dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n", | ||
194 | filp, (unsigned long) count, (unsigned long long) *ppos); | ||
195 | |||
196 | nfs_start_io_read(inode); | ||
197 | res = nfs_revalidate_mapping(inode, filp->f_mapping); | ||
198 | if (!res) { | ||
199 | res = generic_file_splice_read(filp, ppos, pipe, count, flags); | ||
200 | if (res > 0) | ||
201 | nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res); | ||
202 | } | ||
203 | nfs_end_io_read(inode); | ||
204 | return res; | ||
205 | } | ||
206 | EXPORT_SYMBOL_GPL(nfs_file_splice_read); | ||
207 | |||
208 | int | 185 | int |
209 | nfs_file_mmap(struct file * file, struct vm_area_struct * vma) | 186 | nfs_file_mmap(struct file * file, struct vm_area_struct * vma) |
210 | { | 187 | { |
@@ -871,7 +848,7 @@ const struct file_operations nfs_file_operations = { | |||
871 | .fsync = nfs_file_fsync, | 848 | .fsync = nfs_file_fsync, |
872 | .lock = nfs_lock, | 849 | .lock = nfs_lock, |
873 | .flock = nfs_flock, | 850 | .flock = nfs_flock, |
874 | .splice_read = nfs_file_splice_read, | 851 | .splice_read = generic_file_splice_read, |
875 | .splice_write = iter_file_splice_write, | 852 | .splice_write = iter_file_splice_write, |
876 | .check_flags = nfs_check_flags, | 853 | .check_flags = nfs_check_flags, |
877 | .setlease = simple_nosetlease, | 854 | .setlease = simple_nosetlease, |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 74935a19e4bf..d7b062bdc504 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -365,8 +365,6 @@ int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *) | |||
365 | int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); | 365 | int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); |
366 | loff_t nfs_file_llseek(struct file *, loff_t, int); | 366 | loff_t nfs_file_llseek(struct file *, loff_t, int); |
367 | ssize_t nfs_file_read(struct kiocb *, struct iov_iter *); | 367 | ssize_t nfs_file_read(struct kiocb *, struct iov_iter *); |
368 | ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, | ||
369 | size_t, unsigned int); | ||
370 | int nfs_file_mmap(struct file *, struct vm_area_struct *); | 368 | int nfs_file_mmap(struct file *, struct vm_area_struct *); |
371 | ssize_t nfs_file_write(struct kiocb *, struct iov_iter *); | 369 | ssize_t nfs_file_write(struct kiocb *, struct iov_iter *); |
372 | int nfs_file_release(struct inode *, struct file *); | 370 | int nfs_file_release(struct inode *, struct file *); |
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index d085ad794884..89a77950e0b0 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c | |||
@@ -248,7 +248,7 @@ const struct file_operations nfs4_file_operations = { | |||
248 | .fsync = nfs_file_fsync, | 248 | .fsync = nfs_file_fsync, |
249 | .lock = nfs_lock, | 249 | .lock = nfs_lock, |
250 | .flock = nfs_flock, | 250 | .flock = nfs_flock, |
251 | .splice_read = nfs_file_splice_read, | 251 | .splice_read = generic_file_splice_read, |
252 | .splice_write = iter_file_splice_write, | 252 | .splice_write = iter_file_splice_write, |
253 | .check_flags = nfs_check_flags, | 253 | .check_flags = nfs_check_flags, |
254 | .setlease = simple_nosetlease, | 254 | .setlease = simple_nosetlease, |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 0b055bfb8e86..8f91639f8364 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -2321,36 +2321,6 @@ out_mutex: | |||
2321 | return ret; | 2321 | return ret; |
2322 | } | 2322 | } |
2323 | 2323 | ||
2324 | static ssize_t ocfs2_file_splice_read(struct file *in, | ||
2325 | loff_t *ppos, | ||
2326 | struct pipe_inode_info *pipe, | ||
2327 | size_t len, | ||
2328 | unsigned int flags) | ||
2329 | { | ||
2330 | int ret = 0, lock_level = 0; | ||
2331 | struct inode *inode = file_inode(in); | ||
2332 | |||
2333 | trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry, | ||
2334 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
2335 | in->f_path.dentry->d_name.len, | ||
2336 | in->f_path.dentry->d_name.name, len); | ||
2337 | |||
2338 | /* | ||
2339 | * See the comment in ocfs2_file_read_iter() | ||
2340 | */ | ||
2341 | ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level); | ||
2342 | if (ret < 0) { | ||
2343 | mlog_errno(ret); | ||
2344 | goto bail; | ||
2345 | } | ||
2346 | ocfs2_inode_unlock(inode, lock_level); | ||
2347 | |||
2348 | ret = generic_file_splice_read(in, ppos, pipe, len, flags); | ||
2349 | |||
2350 | bail: | ||
2351 | return ret; | ||
2352 | } | ||
2353 | |||
2354 | static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, | 2324 | static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, |
2355 | struct iov_iter *to) | 2325 | struct iov_iter *to) |
2356 | { | 2326 | { |
@@ -2509,7 +2479,7 @@ const struct file_operations ocfs2_fops = { | |||
2509 | #endif | 2479 | #endif |
2510 | .lock = ocfs2_lock, | 2480 | .lock = ocfs2_lock, |
2511 | .flock = ocfs2_flock, | 2481 | .flock = ocfs2_flock, |
2512 | .splice_read = ocfs2_file_splice_read, | 2482 | .splice_read = generic_file_splice_read, |
2513 | .splice_write = iter_file_splice_write, | 2483 | .splice_write = iter_file_splice_write, |
2514 | .fallocate = ocfs2_fallocate, | 2484 | .fallocate = ocfs2_fallocate, |
2515 | }; | 2485 | }; |
@@ -2554,7 +2524,7 @@ const struct file_operations ocfs2_fops_no_plocks = { | |||
2554 | .compat_ioctl = ocfs2_compat_ioctl, | 2524 | .compat_ioctl = ocfs2_compat_ioctl, |
2555 | #endif | 2525 | #endif |
2556 | .flock = ocfs2_flock, | 2526 | .flock = ocfs2_flock, |
2557 | .splice_read = ocfs2_file_splice_read, | 2527 | .splice_read = generic_file_splice_read, |
2558 | .splice_write = iter_file_splice_write, | 2528 | .splice_write = iter_file_splice_write, |
2559 | .fallocate = ocfs2_fallocate, | 2529 | .fallocate = ocfs2_fallocate, |
2560 | }; | 2530 | }; |
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index f8f5fc5e6c05..0b58abcf1c6d 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h | |||
@@ -1314,8 +1314,6 @@ DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_write); | |||
1314 | 1314 | ||
1315 | DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write); | 1315 | DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write); |
1316 | 1316 | ||
1317 | DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_read); | ||
1318 | |||
1319 | DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_read); | 1317 | DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_read); |
1320 | 1318 | ||
1321 | DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file); | 1319 | DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file); |
@@ -267,7 +267,6 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) | |||
267 | if (bufs) { | 267 | if (bufs) { |
268 | int curbuf = pipe->curbuf; | 268 | int curbuf = pipe->curbuf; |
269 | struct pipe_buffer *buf = pipe->bufs + curbuf; | 269 | struct pipe_buffer *buf = pipe->bufs + curbuf; |
270 | const struct pipe_buf_operations *ops = buf->ops; | ||
271 | size_t chars = buf->len; | 270 | size_t chars = buf->len; |
272 | size_t written; | 271 | size_t written; |
273 | int error; | 272 | int error; |
@@ -275,7 +274,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) | |||
275 | if (chars > total_len) | 274 | if (chars > total_len) |
276 | chars = total_len; | 275 | chars = total_len; |
277 | 276 | ||
278 | error = ops->confirm(pipe, buf); | 277 | error = pipe_buf_confirm(pipe, buf); |
279 | if (error) { | 278 | if (error) { |
280 | if (!ret) | 279 | if (!ret) |
281 | ret = error; | 280 | ret = error; |
@@ -299,8 +298,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) | |||
299 | } | 298 | } |
300 | 299 | ||
301 | if (!buf->len) { | 300 | if (!buf->len) { |
302 | buf->ops = NULL; | 301 | pipe_buf_release(pipe, buf); |
303 | ops->release(pipe, buf); | ||
304 | curbuf = (curbuf + 1) & (pipe->buffers - 1); | 302 | curbuf = (curbuf + 1) & (pipe->buffers - 1); |
305 | pipe->curbuf = curbuf; | 303 | pipe->curbuf = curbuf; |
306 | pipe->nrbufs = --bufs; | 304 | pipe->nrbufs = --bufs; |
@@ -383,11 +381,10 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) | |||
383 | int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & | 381 | int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & |
384 | (pipe->buffers - 1); | 382 | (pipe->buffers - 1); |
385 | struct pipe_buffer *buf = pipe->bufs + lastbuf; | 383 | struct pipe_buffer *buf = pipe->bufs + lastbuf; |
386 | const struct pipe_buf_operations *ops = buf->ops; | ||
387 | int offset = buf->offset + buf->len; | 384 | int offset = buf->offset + buf->len; |
388 | 385 | ||
389 | if (ops->can_merge && offset + chars <= PAGE_SIZE) { | 386 | if (buf->ops->can_merge && offset + chars <= PAGE_SIZE) { |
390 | ret = ops->confirm(pipe, buf); | 387 | ret = pipe_buf_confirm(pipe, buf); |
391 | if (ret) | 388 | if (ret) |
392 | goto out; | 389 | goto out; |
393 | 390 | ||
@@ -664,7 +661,7 @@ void free_pipe_info(struct pipe_inode_info *pipe) | |||
664 | for (i = 0; i < pipe->buffers; i++) { | 661 | for (i = 0; i < pipe->buffers; i++) { |
665 | struct pipe_buffer *buf = pipe->bufs + i; | 662 | struct pipe_buffer *buf = pipe->bufs + i; |
666 | if (buf->ops) | 663 | if (buf->ops) |
667 | buf->ops->release(pipe, buf); | 664 | pipe_buf_release(pipe, buf); |
668 | } | 665 | } |
669 | if (pipe->tmp_page) | 666 | if (pipe->tmp_page) |
670 | __free_page(pipe->tmp_page); | 667 | __free_page(pipe->tmp_page); |
diff --git a/fs/splice.c b/fs/splice.c index dd9bf7e410d2..aa38901a4f10 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -183,82 +183,39 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, | |||
183 | struct splice_pipe_desc *spd) | 183 | struct splice_pipe_desc *spd) |
184 | { | 184 | { |
185 | unsigned int spd_pages = spd->nr_pages; | 185 | unsigned int spd_pages = spd->nr_pages; |
186 | int ret, do_wakeup, page_nr; | 186 | int ret = 0, page_nr = 0; |
187 | 187 | ||
188 | if (!spd_pages) | 188 | if (!spd_pages) |
189 | return 0; | 189 | return 0; |
190 | 190 | ||
191 | ret = 0; | 191 | if (unlikely(!pipe->readers)) { |
192 | do_wakeup = 0; | 192 | send_sig(SIGPIPE, current, 0); |
193 | page_nr = 0; | 193 | ret = -EPIPE; |
194 | 194 | goto out; | |
195 | pipe_lock(pipe); | 195 | } |
196 | |||
197 | for (;;) { | ||
198 | if (!pipe->readers) { | ||
199 | send_sig(SIGPIPE, current, 0); | ||
200 | if (!ret) | ||
201 | ret = -EPIPE; | ||
202 | break; | ||
203 | } | ||
204 | |||
205 | if (pipe->nrbufs < pipe->buffers) { | ||
206 | int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); | ||
207 | struct pipe_buffer *buf = pipe->bufs + newbuf; | ||
208 | |||
209 | buf->page = spd->pages[page_nr]; | ||
210 | buf->offset = spd->partial[page_nr].offset; | ||
211 | buf->len = spd->partial[page_nr].len; | ||
212 | buf->private = spd->partial[page_nr].private; | ||
213 | buf->ops = spd->ops; | ||
214 | if (spd->flags & SPLICE_F_GIFT) | ||
215 | buf->flags |= PIPE_BUF_FLAG_GIFT; | ||
216 | |||
217 | pipe->nrbufs++; | ||
218 | page_nr++; | ||
219 | ret += buf->len; | ||
220 | |||
221 | if (pipe->files) | ||
222 | do_wakeup = 1; | ||
223 | 196 | ||
224 | if (!--spd->nr_pages) | 197 | while (pipe->nrbufs < pipe->buffers) { |
225 | break; | 198 | int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); |
226 | if (pipe->nrbufs < pipe->buffers) | 199 | struct pipe_buffer *buf = pipe->bufs + newbuf; |
227 | continue; | ||
228 | 200 | ||
229 | break; | 201 | buf->page = spd->pages[page_nr]; |
230 | } | 202 | buf->offset = spd->partial[page_nr].offset; |
203 | buf->len = spd->partial[page_nr].len; | ||
204 | buf->private = spd->partial[page_nr].private; | ||
205 | buf->ops = spd->ops; | ||
231 | 206 | ||
232 | if (spd->flags & SPLICE_F_NONBLOCK) { | 207 | pipe->nrbufs++; |
233 | if (!ret) | 208 | page_nr++; |
234 | ret = -EAGAIN; | 209 | ret += buf->len; |
235 | break; | ||
236 | } | ||
237 | 210 | ||
238 | if (signal_pending(current)) { | 211 | if (!--spd->nr_pages) |
239 | if (!ret) | ||
240 | ret = -ERESTARTSYS; | ||
241 | break; | 212 | break; |
242 | } | ||
243 | |||
244 | if (do_wakeup) { | ||
245 | smp_mb(); | ||
246 | if (waitqueue_active(&pipe->wait)) | ||
247 | wake_up_interruptible_sync(&pipe->wait); | ||
248 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | ||
249 | do_wakeup = 0; | ||
250 | } | ||
251 | |||
252 | pipe->waiting_writers++; | ||
253 | pipe_wait(pipe); | ||
254 | pipe->waiting_writers--; | ||
255 | } | 213 | } |
256 | 214 | ||
257 | pipe_unlock(pipe); | 215 | if (!ret) |
258 | 216 | ret = -EAGAIN; | |
259 | if (do_wakeup) | ||
260 | wakeup_pipe_readers(pipe); | ||
261 | 217 | ||
218 | out: | ||
262 | while (page_nr < spd_pages) | 219 | while (page_nr < spd_pages) |
263 | spd->spd_release(spd, page_nr++); | 220 | spd->spd_release(spd, page_nr++); |
264 | 221 | ||
@@ -266,6 +223,26 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, | |||
266 | } | 223 | } |
267 | EXPORT_SYMBOL_GPL(splice_to_pipe); | 224 | EXPORT_SYMBOL_GPL(splice_to_pipe); |
268 | 225 | ||
226 | ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf) | ||
227 | { | ||
228 | int ret; | ||
229 | |||
230 | if (unlikely(!pipe->readers)) { | ||
231 | send_sig(SIGPIPE, current, 0); | ||
232 | ret = -EPIPE; | ||
233 | } else if (pipe->nrbufs == pipe->buffers) { | ||
234 | ret = -EAGAIN; | ||
235 | } else { | ||
236 | int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); | ||
237 | pipe->bufs[newbuf] = *buf; | ||
238 | pipe->nrbufs++; | ||
239 | return buf->len; | ||
240 | } | ||
241 | pipe_buf_release(pipe, buf); | ||
242 | return ret; | ||
243 | } | ||
244 | EXPORT_SYMBOL(add_to_pipe); | ||
245 | |||
269 | void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) | 246 | void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) |
270 | { | 247 | { |
271 | put_page(spd->pages[i]); | 248 | put_page(spd->pages[i]); |
@@ -303,207 +280,6 @@ void splice_shrink_spd(struct splice_pipe_desc *spd) | |||
303 | kfree(spd->partial); | 280 | kfree(spd->partial); |
304 | } | 281 | } |
305 | 282 | ||
306 | static int | ||
307 | __generic_file_splice_read(struct file *in, loff_t *ppos, | ||
308 | struct pipe_inode_info *pipe, size_t len, | ||
309 | unsigned int flags) | ||
310 | { | ||
311 | struct address_space *mapping = in->f_mapping; | ||
312 | unsigned int loff, nr_pages, req_pages; | ||
313 | struct page *pages[PIPE_DEF_BUFFERS]; | ||
314 | struct partial_page partial[PIPE_DEF_BUFFERS]; | ||
315 | struct page *page; | ||
316 | pgoff_t index, end_index; | ||
317 | loff_t isize; | ||
318 | int error, page_nr; | ||
319 | struct splice_pipe_desc spd = { | ||
320 | .pages = pages, | ||
321 | .partial = partial, | ||
322 | .nr_pages_max = PIPE_DEF_BUFFERS, | ||
323 | .flags = flags, | ||
324 | .ops = &page_cache_pipe_buf_ops, | ||
325 | .spd_release = spd_release_page, | ||
326 | }; | ||
327 | |||
328 | if (splice_grow_spd(pipe, &spd)) | ||
329 | return -ENOMEM; | ||
330 | |||
331 | index = *ppos >> PAGE_SHIFT; | ||
332 | loff = *ppos & ~PAGE_MASK; | ||
333 | req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
334 | nr_pages = min(req_pages, spd.nr_pages_max); | ||
335 | |||
336 | /* | ||
337 | * Lookup the (hopefully) full range of pages we need. | ||
338 | */ | ||
339 | spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages); | ||
340 | index += spd.nr_pages; | ||
341 | |||
342 | /* | ||
343 | * If find_get_pages_contig() returned fewer pages than we needed, | ||
344 | * readahead/allocate the rest and fill in the holes. | ||
345 | */ | ||
346 | if (spd.nr_pages < nr_pages) | ||
347 | page_cache_sync_readahead(mapping, &in->f_ra, in, | ||
348 | index, req_pages - spd.nr_pages); | ||
349 | |||
350 | error = 0; | ||
351 | while (spd.nr_pages < nr_pages) { | ||
352 | /* | ||
353 | * Page could be there, find_get_pages_contig() breaks on | ||
354 | * the first hole. | ||
355 | */ | ||
356 | page = find_get_page(mapping, index); | ||
357 | if (!page) { | ||
358 | /* | ||
359 | * page didn't exist, allocate one. | ||
360 | */ | ||
361 | page = page_cache_alloc_cold(mapping); | ||
362 | if (!page) | ||
363 | break; | ||
364 | |||
365 | error = add_to_page_cache_lru(page, mapping, index, | ||
366 | mapping_gfp_constraint(mapping, GFP_KERNEL)); | ||
367 | if (unlikely(error)) { | ||
368 | put_page(page); | ||
369 | if (error == -EEXIST) | ||
370 | continue; | ||
371 | break; | ||
372 | } | ||
373 | /* | ||
374 | * add_to_page_cache() locks the page, unlock it | ||
375 | * to avoid convoluting the logic below even more. | ||
376 | */ | ||
377 | unlock_page(page); | ||
378 | } | ||
379 | |||
380 | spd.pages[spd.nr_pages++] = page; | ||
381 | index++; | ||
382 | } | ||
383 | |||
384 | /* | ||
385 | * Now loop over the map and see if we need to start IO on any | ||
386 | * pages, fill in the partial map, etc. | ||
387 | */ | ||
388 | index = *ppos >> PAGE_SHIFT; | ||
389 | nr_pages = spd.nr_pages; | ||
390 | spd.nr_pages = 0; | ||
391 | for (page_nr = 0; page_nr < nr_pages; page_nr++) { | ||
392 | unsigned int this_len; | ||
393 | |||
394 | if (!len) | ||
395 | break; | ||
396 | |||
397 | /* | ||
398 | * this_len is the max we'll use from this page | ||
399 | */ | ||
400 | this_len = min_t(unsigned long, len, PAGE_SIZE - loff); | ||
401 | page = spd.pages[page_nr]; | ||
402 | |||
403 | if (PageReadahead(page)) | ||
404 | page_cache_async_readahead(mapping, &in->f_ra, in, | ||
405 | page, index, req_pages - page_nr); | ||
406 | |||
407 | /* | ||
408 | * If the page isn't uptodate, we may need to start io on it | ||
409 | */ | ||
410 | if (!PageUptodate(page)) { | ||
411 | lock_page(page); | ||
412 | |||
413 | /* | ||
414 | * Page was truncated, or invalidated by the | ||
415 | * filesystem. Redo the find/create, but this time the | ||
416 | * page is kept locked, so there's no chance of another | ||
417 | * race with truncate/invalidate. | ||
418 | */ | ||
419 | if (!page->mapping) { | ||
420 | unlock_page(page); | ||
421 | retry_lookup: | ||
422 | page = find_or_create_page(mapping, index, | ||
423 | mapping_gfp_mask(mapping)); | ||
424 | |||
425 | if (!page) { | ||
426 | error = -ENOMEM; | ||
427 | break; | ||
428 | } | ||
429 | put_page(spd.pages[page_nr]); | ||
430 | spd.pages[page_nr] = page; | ||
431 | } | ||
432 | /* | ||
433 | * page was already under io and is now done, great | ||
434 | */ | ||
435 | if (PageUptodate(page)) { | ||
436 | unlock_page(page); | ||
437 | goto fill_it; | ||
438 | } | ||
439 | |||
440 | /* | ||
441 | * need to read in the page | ||
442 | */ | ||
443 | error = mapping->a_ops->readpage(in, page); | ||
444 | if (unlikely(error)) { | ||
445 | /* | ||
446 | * Re-lookup the page | ||
447 | */ | ||
448 | if (error == AOP_TRUNCATED_PAGE) | ||
449 | goto retry_lookup; | ||
450 | |||
451 | break; | ||
452 | } | ||
453 | } | ||
454 | fill_it: | ||
455 | /* | ||
456 | * i_size must be checked after PageUptodate. | ||
457 | */ | ||
458 | isize = i_size_read(mapping->host); | ||
459 | end_index = (isize - 1) >> PAGE_SHIFT; | ||
460 | if (unlikely(!isize || index > end_index)) | ||
461 | break; | ||
462 | |||
463 | /* | ||
464 | * if this is the last page, see if we need to shrink | ||
465 | * the length and stop | ||
466 | */ | ||
467 | if (end_index == index) { | ||
468 | unsigned int plen; | ||
469 | |||
470 | /* | ||
471 | * max good bytes in this page | ||
472 | */ | ||
473 | plen = ((isize - 1) & ~PAGE_MASK) + 1; | ||
474 | if (plen <= loff) | ||
475 | break; | ||
476 | |||
477 | /* | ||
478 | * force quit after adding this page | ||
479 | */ | ||
480 | this_len = min(this_len, plen - loff); | ||
481 | len = this_len; | ||
482 | } | ||
483 | |||
484 | spd.partial[page_nr].offset = loff; | ||
485 | spd.partial[page_nr].len = this_len; | ||
486 | len -= this_len; | ||
487 | loff = 0; | ||
488 | spd.nr_pages++; | ||
489 | index++; | ||
490 | } | ||
491 | |||
492 | /* | ||
493 | * Release any pages at the end, if we quit early. 'page_nr' is how far | ||
494 | * we got, 'nr_pages' is how many pages are in the map. | ||
495 | */ | ||
496 | while (page_nr < nr_pages) | ||
497 | put_page(spd.pages[page_nr++]); | ||
498 | in->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT; | ||
499 | |||
500 | if (spd.nr_pages) | ||
501 | error = splice_to_pipe(pipe, &spd); | ||
502 | |||
503 | splice_shrink_spd(&spd); | ||
504 | return error; | ||
505 | } | ||
506 | |||
507 | /** | 283 | /** |
508 | * generic_file_splice_read - splice data from file to a pipe | 284 | * generic_file_splice_read - splice data from file to a pipe |
509 | * @in: file to splice from | 285 | * @in: file to splice from |
@@ -514,39 +290,53 @@ fill_it: | |||
514 | * | 290 | * |
515 | * Description: | 291 | * Description: |
516 | * Will read pages from given file and fill them into a pipe. Can be | 292 | * Will read pages from given file and fill them into a pipe. Can be |
517 | * used as long as the address_space operations for the source implements | 293 | * used as long as it has more or less sane ->read_iter(). |
518 | * a readpage() hook. | ||
519 | * | 294 | * |
520 | */ | 295 | */ |
521 | ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, | 296 | ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, |
522 | struct pipe_inode_info *pipe, size_t len, | 297 | struct pipe_inode_info *pipe, size_t len, |
523 | unsigned int flags) | 298 | unsigned int flags) |
524 | { | 299 | { |
525 | loff_t isize, left; | 300 | struct iov_iter to; |
526 | int ret; | 301 | struct kiocb kiocb; |
527 | 302 | loff_t isize; | |
528 | if (IS_DAX(in->f_mapping->host)) | 303 | int idx, ret; |
529 | return default_file_splice_read(in, ppos, pipe, len, flags); | ||
530 | 304 | ||
531 | isize = i_size_read(in->f_mapping->host); | 305 | isize = i_size_read(in->f_mapping->host); |
532 | if (unlikely(*ppos >= isize)) | 306 | if (unlikely(*ppos >= isize)) |
533 | return 0; | 307 | return 0; |
534 | 308 | ||
535 | left = isize - *ppos; | 309 | iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len); |
536 | if (unlikely(left < len)) | 310 | idx = to.idx; |
537 | len = left; | 311 | init_sync_kiocb(&kiocb, in); |
538 | 312 | kiocb.ki_pos = *ppos; | |
539 | ret = __generic_file_splice_read(in, ppos, pipe, len, flags); | 313 | ret = in->f_op->read_iter(&kiocb, &to); |
540 | if (ret > 0) { | 314 | if (ret > 0) { |
541 | *ppos += ret; | 315 | *ppos = kiocb.ki_pos; |
542 | file_accessed(in); | 316 | file_accessed(in); |
317 | } else if (ret < 0) { | ||
318 | if (WARN_ON(to.idx != idx || to.iov_offset)) { | ||
319 | /* | ||
320 | * a bogus ->read_iter() has copied something and still | ||
321 | * returned an error instead of a short read. | ||
322 | */ | ||
323 | to.idx = idx; | ||
324 | to.iov_offset = 0; | ||
325 | iov_iter_advance(&to, 0); /* to free what was emitted */ | ||
326 | } | ||
327 | /* | ||
328 | * callers of ->splice_read() expect -EAGAIN on | ||
329 | * "can't put anything in there", rather than -EFAULT. | ||
330 | */ | ||
331 | if (ret == -EFAULT) | ||
332 | ret = -EAGAIN; | ||
543 | } | 333 | } |
544 | 334 | ||
545 | return ret; | 335 | return ret; |
546 | } | 336 | } |
547 | EXPORT_SYMBOL(generic_file_splice_read); | 337 | EXPORT_SYMBOL(generic_file_splice_read); |
548 | 338 | ||
549 | static const struct pipe_buf_operations default_pipe_buf_ops = { | 339 | const struct pipe_buf_operations default_pipe_buf_ops = { |
550 | .can_merge = 0, | 340 | .can_merge = 0, |
551 | .confirm = generic_pipe_buf_confirm, | 341 | .confirm = generic_pipe_buf_confirm, |
552 | .release = generic_pipe_buf_release, | 342 | .release = generic_pipe_buf_release, |
@@ -570,7 +360,7 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = { | |||
570 | }; | 360 | }; |
571 | EXPORT_SYMBOL(nosteal_pipe_buf_ops); | 361 | EXPORT_SYMBOL(nosteal_pipe_buf_ops); |
572 | 362 | ||
573 | static ssize_t kernel_readv(struct file *file, const struct iovec *vec, | 363 | static ssize_t kernel_readv(struct file *file, const struct kvec *vec, |
574 | unsigned long vlen, loff_t offset) | 364 | unsigned long vlen, loff_t offset) |
575 | { | 365 | { |
576 | mm_segment_t old_fs; | 366 | mm_segment_t old_fs; |
@@ -602,102 +392,70 @@ ssize_t kernel_write(struct file *file, const char *buf, size_t count, | |||
602 | } | 392 | } |
603 | EXPORT_SYMBOL(kernel_write); | 393 | EXPORT_SYMBOL(kernel_write); |
604 | 394 | ||
605 | ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | 395 | static ssize_t default_file_splice_read(struct file *in, loff_t *ppos, |
606 | struct pipe_inode_info *pipe, size_t len, | 396 | struct pipe_inode_info *pipe, size_t len, |
607 | unsigned int flags) | 397 | unsigned int flags) |
608 | { | 398 | { |
399 | struct kvec *vec, __vec[PIPE_DEF_BUFFERS]; | ||
400 | struct iov_iter to; | ||
401 | struct page **pages; | ||
609 | unsigned int nr_pages; | 402 | unsigned int nr_pages; |
610 | unsigned int nr_freed; | 403 | size_t offset, dummy, copied = 0; |
611 | size_t offset; | ||
612 | struct page *pages[PIPE_DEF_BUFFERS]; | ||
613 | struct partial_page partial[PIPE_DEF_BUFFERS]; | ||
614 | struct iovec *vec, __vec[PIPE_DEF_BUFFERS]; | ||
615 | ssize_t res; | 404 | ssize_t res; |
616 | size_t this_len; | ||
617 | int error; | ||
618 | int i; | 405 | int i; |
619 | struct splice_pipe_desc spd = { | ||
620 | .pages = pages, | ||
621 | .partial = partial, | ||
622 | .nr_pages_max = PIPE_DEF_BUFFERS, | ||
623 | .flags = flags, | ||
624 | .ops = &default_pipe_buf_ops, | ||
625 | .spd_release = spd_release_page, | ||
626 | }; | ||
627 | 406 | ||
628 | if (splice_grow_spd(pipe, &spd)) | 407 | if (pipe->nrbufs == pipe->buffers) |
408 | return -EAGAIN; | ||
409 | |||
410 | /* | ||
411 | * Try to keep page boundaries matching to source pagecache ones - | ||
412 | * it probably won't be much help, but... | ||
413 | */ | ||
414 | offset = *ppos & ~PAGE_MASK; | ||
415 | |||
416 | iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset); | ||
417 | |||
418 | res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &dummy); | ||
419 | if (res <= 0) | ||
629 | return -ENOMEM; | 420 | return -ENOMEM; |
630 | 421 | ||
631 | res = -ENOMEM; | 422 | nr_pages = res / PAGE_SIZE; |
423 | |||
632 | vec = __vec; | 424 | vec = __vec; |
633 | if (spd.nr_pages_max > PIPE_DEF_BUFFERS) { | 425 | if (nr_pages > PIPE_DEF_BUFFERS) { |
634 | vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL); | 426 | vec = kmalloc(nr_pages * sizeof(struct kvec), GFP_KERNEL); |
635 | if (!vec) | 427 | if (unlikely(!vec)) { |
636 | goto shrink_ret; | 428 | res = -ENOMEM; |
429 | goto out; | ||
430 | } | ||
637 | } | 431 | } |
638 | 432 | ||
639 | offset = *ppos & ~PAGE_MASK; | 433 | pipe->bufs[to.idx].offset = offset; |
640 | nr_pages = (len + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; | 434 | pipe->bufs[to.idx].len -= offset; |
641 | |||
642 | for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) { | ||
643 | struct page *page; | ||
644 | 435 | ||
645 | page = alloc_page(GFP_USER); | 436 | for (i = 0; i < nr_pages; i++) { |
646 | error = -ENOMEM; | 437 | size_t this_len = min_t(size_t, len, PAGE_SIZE - offset); |
647 | if (!page) | 438 | vec[i].iov_base = page_address(pages[i]) + offset; |
648 | goto err; | ||
649 | |||
650 | this_len = min_t(size_t, len, PAGE_SIZE - offset); | ||
651 | vec[i].iov_base = (void __user *) page_address(page); | ||
652 | vec[i].iov_len = this_len; | 439 | vec[i].iov_len = this_len; |
653 | spd.pages[i] = page; | ||
654 | spd.nr_pages++; | ||
655 | len -= this_len; | 440 | len -= this_len; |
656 | offset = 0; | 441 | offset = 0; |
657 | } | 442 | } |
658 | 443 | ||
659 | res = kernel_readv(in, vec, spd.nr_pages, *ppos); | 444 | res = kernel_readv(in, vec, nr_pages, *ppos); |
660 | if (res < 0) { | 445 | if (res > 0) { |
661 | error = res; | 446 | copied = res; |
662 | goto err; | ||
663 | } | ||
664 | |||
665 | error = 0; | ||
666 | if (!res) | ||
667 | goto err; | ||
668 | |||
669 | nr_freed = 0; | ||
670 | for (i = 0; i < spd.nr_pages; i++) { | ||
671 | this_len = min_t(size_t, vec[i].iov_len, res); | ||
672 | spd.partial[i].offset = 0; | ||
673 | spd.partial[i].len = this_len; | ||
674 | if (!this_len) { | ||
675 | __free_page(spd.pages[i]); | ||
676 | spd.pages[i] = NULL; | ||
677 | nr_freed++; | ||
678 | } | ||
679 | res -= this_len; | ||
680 | } | ||
681 | spd.nr_pages -= nr_freed; | ||
682 | |||
683 | res = splice_to_pipe(pipe, &spd); | ||
684 | if (res > 0) | ||
685 | *ppos += res; | 447 | *ppos += res; |
448 | } | ||
686 | 449 | ||
687 | shrink_ret: | ||
688 | if (vec != __vec) | 450 | if (vec != __vec) |
689 | kfree(vec); | 451 | kfree(vec); |
690 | splice_shrink_spd(&spd); | 452 | out: |
453 | for (i = 0; i < nr_pages; i++) | ||
454 | put_page(pages[i]); | ||
455 | kvfree(pages); | ||
456 | iov_iter_advance(&to, copied); /* truncates and discards */ | ||
691 | return res; | 457 | return res; |
692 | |||
693 | err: | ||
694 | for (i = 0; i < spd.nr_pages; i++) | ||
695 | __free_page(spd.pages[i]); | ||
696 | |||
697 | res = error; | ||
698 | goto shrink_ret; | ||
699 | } | 458 | } |
700 | EXPORT_SYMBOL(default_file_splice_read); | ||
701 | 459 | ||
702 | /* | 460 | /* |
703 | * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' | 461 | * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' |
@@ -757,13 +515,12 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des | |||
757 | 515 | ||
758 | while (pipe->nrbufs) { | 516 | while (pipe->nrbufs) { |
759 | struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; | 517 | struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; |
760 | const struct pipe_buf_operations *ops = buf->ops; | ||
761 | 518 | ||
762 | sd->len = buf->len; | 519 | sd->len = buf->len; |
763 | if (sd->len > sd->total_len) | 520 | if (sd->len > sd->total_len) |
764 | sd->len = sd->total_len; | 521 | sd->len = sd->total_len; |
765 | 522 | ||
766 | ret = buf->ops->confirm(pipe, buf); | 523 | ret = pipe_buf_confirm(pipe, buf); |
767 | if (unlikely(ret)) { | 524 | if (unlikely(ret)) { |
768 | if (ret == -ENODATA) | 525 | if (ret == -ENODATA) |
769 | ret = 0; | 526 | ret = 0; |
@@ -783,8 +540,7 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des | |||
783 | sd->total_len -= ret; | 540 | sd->total_len -= ret; |
784 | 541 | ||
785 | if (!buf->len) { | 542 | if (!buf->len) { |
786 | buf->ops = NULL; | 543 | pipe_buf_release(pipe, buf); |
787 | ops->release(pipe, buf); | ||
788 | pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); | 544 | pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); |
789 | pipe->nrbufs--; | 545 | pipe->nrbufs--; |
790 | if (pipe->files) | 546 | if (pipe->files) |
@@ -1003,7 +759,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
1003 | if (idx == pipe->buffers - 1) | 759 | if (idx == pipe->buffers - 1) |
1004 | idx = -1; | 760 | idx = -1; |
1005 | 761 | ||
1006 | ret = buf->ops->confirm(pipe, buf); | 762 | ret = pipe_buf_confirm(pipe, buf); |
1007 | if (unlikely(ret)) { | 763 | if (unlikely(ret)) { |
1008 | if (ret == -ENODATA) | 764 | if (ret == -ENODATA) |
1009 | ret = 0; | 765 | ret = 0; |
@@ -1030,11 +786,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
1030 | while (ret) { | 786 | while (ret) { |
1031 | struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; | 787 | struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; |
1032 | if (ret >= buf->len) { | 788 | if (ret >= buf->len) { |
1033 | const struct pipe_buf_operations *ops = buf->ops; | ||
1034 | ret -= buf->len; | 789 | ret -= buf->len; |
1035 | buf->len = 0; | 790 | buf->len = 0; |
1036 | buf->ops = NULL; | 791 | pipe_buf_release(pipe, buf); |
1037 | ops->release(pipe, buf); | ||
1038 | pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); | 792 | pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); |
1039 | pipe->nrbufs--; | 793 | pipe->nrbufs--; |
1040 | if (pipe->files) | 794 | if (pipe->files) |
@@ -1273,10 +1027,8 @@ out_release: | |||
1273 | for (i = 0; i < pipe->buffers; i++) { | 1027 | for (i = 0; i < pipe->buffers; i++) { |
1274 | struct pipe_buffer *buf = pipe->bufs + i; | 1028 | struct pipe_buffer *buf = pipe->bufs + i; |
1275 | 1029 | ||
1276 | if (buf->ops) { | 1030 | if (buf->ops) |
1277 | buf->ops->release(pipe, buf); | 1031 | pipe_buf_release(pipe, buf); |
1278 | buf->ops = NULL; | ||
1279 | } | ||
1280 | } | 1032 | } |
1281 | 1033 | ||
1282 | if (!bytes) | 1034 | if (!bytes) |
@@ -1342,6 +1094,20 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | |||
1342 | } | 1094 | } |
1343 | EXPORT_SYMBOL(do_splice_direct); | 1095 | EXPORT_SYMBOL(do_splice_direct); |
1344 | 1096 | ||
1097 | static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) | ||
1098 | { | ||
1099 | while (pipe->nrbufs == pipe->buffers) { | ||
1100 | if (flags & SPLICE_F_NONBLOCK) | ||
1101 | return -EAGAIN; | ||
1102 | if (signal_pending(current)) | ||
1103 | return -ERESTARTSYS; | ||
1104 | pipe->waiting_writers++; | ||
1105 | pipe_wait(pipe); | ||
1106 | pipe->waiting_writers--; | ||
1107 | } | ||
1108 | return 0; | ||
1109 | } | ||
1110 | |||
1345 | static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, | 1111 | static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, |
1346 | struct pipe_inode_info *opipe, | 1112 | struct pipe_inode_info *opipe, |
1347 | size_t len, unsigned int flags); | 1113 | size_t len, unsigned int flags); |
@@ -1424,8 +1190,13 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1424 | offset = in->f_pos; | 1190 | offset = in->f_pos; |
1425 | } | 1191 | } |
1426 | 1192 | ||
1427 | ret = do_splice_to(in, &offset, opipe, len, flags); | 1193 | pipe_lock(opipe); |
1428 | 1194 | ret = wait_for_space(opipe, flags); | |
1195 | if (!ret) | ||
1196 | ret = do_splice_to(in, &offset, opipe, len, flags); | ||
1197 | pipe_unlock(opipe); | ||
1198 | if (ret > 0) | ||
1199 | wakeup_pipe_readers(opipe); | ||
1429 | if (!off_in) | 1200 | if (!off_in) |
1430 | in->f_pos = offset; | 1201 | in->f_pos = offset; |
1431 | else if (copy_to_user(off_in, &offset, sizeof(loff_t))) | 1202 | else if (copy_to_user(off_in, &offset, sizeof(loff_t))) |
@@ -1437,106 +1208,50 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1437 | return -EINVAL; | 1208 | return -EINVAL; |
1438 | } | 1209 | } |
1439 | 1210 | ||
1440 | /* | 1211 | static int iter_to_pipe(struct iov_iter *from, |
1441 | * Map an iov into an array of pages and offset/length tupples. With the | 1212 | struct pipe_inode_info *pipe, |
1442 | * partial_page structure, we can map several non-contiguous ranges into | 1213 | unsigned flags) |
1443 | * our ones pages[] map instead of splitting that operation into pieces. | ||
1444 | * Could easily be exported as a generic helper for other users, in which | ||
1445 | * case one would probably want to add a 'max_nr_pages' parameter as well. | ||
1446 | */ | ||
1447 | static int get_iovec_page_array(const struct iovec __user *iov, | ||
1448 | unsigned int nr_vecs, struct page **pages, | ||
1449 | struct partial_page *partial, bool aligned, | ||
1450 | unsigned int pipe_buffers) | ||
1451 | { | 1214 | { |
1452 | int buffers = 0, error = 0; | 1215 | struct pipe_buffer buf = { |
1453 | 1216 | .ops = &user_page_pipe_buf_ops, | |
1454 | while (nr_vecs) { | 1217 | .flags = flags |
1455 | unsigned long off, npages; | 1218 | }; |
1456 | struct iovec entry; | 1219 | size_t total = 0; |
1457 | void __user *base; | 1220 | int ret = 0; |
1458 | size_t len; | 1221 | bool failed = false; |
1459 | int i; | 1222 | |
1460 | 1223 | while (iov_iter_count(from) && !failed) { | |
1461 | error = -EFAULT; | 1224 | struct page *pages[16]; |
1462 | if (copy_from_user(&entry, iov, sizeof(entry))) | 1225 | ssize_t copied; |
1463 | break; | 1226 | size_t start; |
1464 | 1227 | int n; | |
1465 | base = entry.iov_base; | 1228 | |
1466 | len = entry.iov_len; | 1229 | copied = iov_iter_get_pages(from, pages, ~0UL, 16, &start); |
1467 | 1230 | if (copied <= 0) { | |
1468 | /* | 1231 | ret = copied; |
1469 | * Sanity check this iovec. 0 read succeeds. | ||
1470 | */ | ||
1471 | error = 0; | ||
1472 | if (unlikely(!len)) | ||
1473 | break; | ||
1474 | error = -EFAULT; | ||
1475 | if (!access_ok(VERIFY_READ, base, len)) | ||
1476 | break; | ||
1477 | |||
1478 | /* | ||
1479 | * Get this base offset and number of pages, then map | ||
1480 | * in the user pages. | ||
1481 | */ | ||
1482 | off = (unsigned long) base & ~PAGE_MASK; | ||
1483 | |||
1484 | /* | ||
1485 | * If asked for alignment, the offset must be zero and the | ||
1486 | * length a multiple of the PAGE_SIZE. | ||
1487 | */ | ||
1488 | error = -EINVAL; | ||
1489 | if (aligned && (off || len & ~PAGE_MASK)) | ||
1490 | break; | ||
1491 | |||
1492 | npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
1493 | if (npages > pipe_buffers - buffers) | ||
1494 | npages = pipe_buffers - buffers; | ||
1495 | |||
1496 | error = get_user_pages_fast((unsigned long)base, npages, | ||
1497 | 0, &pages[buffers]); | ||
1498 | |||
1499 | if (unlikely(error <= 0)) | ||
1500 | break; | 1232 | break; |
1501 | |||
1502 | /* | ||
1503 | * Fill this contiguous range into the partial page map. | ||
1504 | */ | ||
1505 | for (i = 0; i < error; i++) { | ||
1506 | const int plen = min_t(size_t, len, PAGE_SIZE - off); | ||
1507 | |||
1508 | partial[buffers].offset = off; | ||
1509 | partial[buffers].len = plen; | ||
1510 | |||
1511 | off = 0; | ||
1512 | len -= plen; | ||
1513 | buffers++; | ||
1514 | } | 1233 | } |
1515 | 1234 | ||
1516 | /* | 1235 | for (n = 0; copied; n++, start = 0) { |
1517 | * We didn't complete this iov, stop here since it probably | 1236 | int size = min_t(int, copied, PAGE_SIZE - start); |
1518 | * means we have to move some of this into a pipe to | 1237 | if (!failed) { |
1519 | * be able to continue. | 1238 | buf.page = pages[n]; |
1520 | */ | 1239 | buf.offset = start; |
1521 | if (len) | 1240 | buf.len = size; |
1522 | break; | 1241 | ret = add_to_pipe(pipe, &buf); |
1523 | 1242 | if (unlikely(ret < 0)) { | |
1524 | /* | 1243 | failed = true; |
1525 | * Don't continue if we mapped fewer pages than we asked for, | 1244 | } else { |
1526 | * or if we mapped the max number of pages that we have | 1245 | iov_iter_advance(from, ret); |
1527 | * room for. | 1246 | total += ret; |
1528 | */ | 1247 | } |
1529 | if (error < npages || buffers == pipe_buffers) | 1248 | } else { |
1530 | break; | 1249 | put_page(pages[n]); |
1531 | 1250 | } | |
1532 | nr_vecs--; | 1251 | copied -= size; |
1533 | iov++; | 1252 | } |
1534 | } | 1253 | } |
1535 | 1254 | return total ? total : ret; | |
1536 | if (buffers) | ||
1537 | return buffers; | ||
1538 | |||
1539 | return error; | ||
1540 | } | 1255 | } |
1541 | 1256 | ||
1542 | static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, | 1257 | static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, |
@@ -1590,38 +1305,36 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov, | |||
1590 | * as splice-from-memory, where the regular splice is splice-from-file (or | 1305 | * as splice-from-memory, where the regular splice is splice-from-file (or |
1591 | * to file). In both cases the output is a pipe, naturally. | 1306 | * to file). In both cases the output is a pipe, naturally. |
1592 | */ | 1307 | */ |
1593 | static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, | 1308 | static long vmsplice_to_pipe(struct file *file, const struct iovec __user *uiov, |
1594 | unsigned long nr_segs, unsigned int flags) | 1309 | unsigned long nr_segs, unsigned int flags) |
1595 | { | 1310 | { |
1596 | struct pipe_inode_info *pipe; | 1311 | struct pipe_inode_info *pipe; |
1597 | struct page *pages[PIPE_DEF_BUFFERS]; | 1312 | struct iovec iovstack[UIO_FASTIOV]; |
1598 | struct partial_page partial[PIPE_DEF_BUFFERS]; | 1313 | struct iovec *iov = iovstack; |
1599 | struct splice_pipe_desc spd = { | 1314 | struct iov_iter from; |
1600 | .pages = pages, | ||
1601 | .partial = partial, | ||
1602 | .nr_pages_max = PIPE_DEF_BUFFERS, | ||
1603 | .flags = flags, | ||
1604 | .ops = &user_page_pipe_buf_ops, | ||
1605 | .spd_release = spd_release_page, | ||
1606 | }; | ||
1607 | long ret; | 1315 | long ret; |
1316 | unsigned buf_flag = 0; | ||
1317 | |||
1318 | if (flags & SPLICE_F_GIFT) | ||
1319 | buf_flag = PIPE_BUF_FLAG_GIFT; | ||
1608 | 1320 | ||
1609 | pipe = get_pipe_info(file); | 1321 | pipe = get_pipe_info(file); |
1610 | if (!pipe) | 1322 | if (!pipe) |
1611 | return -EBADF; | 1323 | return -EBADF; |
1612 | 1324 | ||
1613 | if (splice_grow_spd(pipe, &spd)) | 1325 | ret = import_iovec(WRITE, uiov, nr_segs, |
1614 | return -ENOMEM; | 1326 | ARRAY_SIZE(iovstack), &iov, &from); |
1615 | 1327 | if (ret < 0) | |
1616 | spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages, | 1328 | return ret; |
1617 | spd.partial, false, | ||
1618 | spd.nr_pages_max); | ||
1619 | if (spd.nr_pages <= 0) | ||
1620 | ret = spd.nr_pages; | ||
1621 | else | ||
1622 | ret = splice_to_pipe(pipe, &spd); | ||
1623 | 1329 | ||
1624 | splice_shrink_spd(&spd); | 1330 | pipe_lock(pipe); |
1331 | ret = wait_for_space(pipe, flags); | ||
1332 | if (!ret) | ||
1333 | ret = iter_to_pipe(&from, pipe, buf_flag); | ||
1334 | pipe_unlock(pipe); | ||
1335 | if (ret > 0) | ||
1336 | wakeup_pipe_readers(pipe); | ||
1337 | kfree(iov); | ||
1625 | return ret; | 1338 | return ret; |
1626 | } | 1339 | } |
1627 | 1340 | ||
@@ -1876,7 +1589,7 @@ retry: | |||
1876 | * Get a reference to this pipe buffer, | 1589 | * Get a reference to this pipe buffer, |
1877 | * so we can copy the contents over. | 1590 | * so we can copy the contents over. |
1878 | */ | 1591 | */ |
1879 | ibuf->ops->get(ipipe, ibuf); | 1592 | pipe_buf_get(ipipe, ibuf); |
1880 | *obuf = *ibuf; | 1593 | *obuf = *ibuf; |
1881 | 1594 | ||
1882 | /* | 1595 | /* |
@@ -1948,7 +1661,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
1948 | * Get a reference to this pipe buffer, | 1661 | * Get a reference to this pipe buffer, |
1949 | * so we can copy the contents over. | 1662 | * so we can copy the contents over. |
1950 | */ | 1663 | */ |
1951 | ibuf->ops->get(ipipe, ibuf); | 1664 | pipe_buf_get(ipipe, ibuf); |
1952 | 1665 | ||
1953 | obuf = opipe->bufs + nbuf; | 1666 | obuf = opipe->bufs + nbuf; |
1954 | *obuf = *ibuf; | 1667 | *obuf = *ibuf; |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index c68517b0f248..f46b2929c64d 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -393,45 +393,6 @@ xfs_file_read_iter( | |||
393 | return ret; | 393 | return ret; |
394 | } | 394 | } |
395 | 395 | ||
396 | STATIC ssize_t | ||
397 | xfs_file_splice_read( | ||
398 | struct file *infilp, | ||
399 | loff_t *ppos, | ||
400 | struct pipe_inode_info *pipe, | ||
401 | size_t count, | ||
402 | unsigned int flags) | ||
403 | { | ||
404 | struct xfs_inode *ip = XFS_I(infilp->f_mapping->host); | ||
405 | ssize_t ret; | ||
406 | |||
407 | XFS_STATS_INC(ip->i_mount, xs_read_calls); | ||
408 | |||
409 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | ||
410 | return -EIO; | ||
411 | |||
412 | trace_xfs_file_splice_read(ip, count, *ppos); | ||
413 | |||
414 | /* | ||
415 | * DAX inodes cannot ues the page cache for splice, so we have to push | ||
416 | * them through the VFS IO path. This means it goes through | ||
417 | * ->read_iter, which for us takes the XFS_IOLOCK_SHARED. Hence we | ||
418 | * cannot lock the splice operation at this level for DAX inodes. | ||
419 | */ | ||
420 | if (IS_DAX(VFS_I(ip))) { | ||
421 | ret = default_file_splice_read(infilp, ppos, pipe, count, | ||
422 | flags); | ||
423 | goto out; | ||
424 | } | ||
425 | |||
426 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); | ||
427 | ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); | ||
428 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); | ||
429 | out: | ||
430 | if (ret > 0) | ||
431 | XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret); | ||
432 | return ret; | ||
433 | } | ||
434 | |||
435 | /* | 396 | /* |
436 | * Zero any on disk space between the current EOF and the new, larger EOF. | 397 | * Zero any on disk space between the current EOF and the new, larger EOF. |
437 | * | 398 | * |
@@ -1608,7 +1569,7 @@ const struct file_operations xfs_file_operations = { | |||
1608 | .llseek = xfs_file_llseek, | 1569 | .llseek = xfs_file_llseek, |
1609 | .read_iter = xfs_file_read_iter, | 1570 | .read_iter = xfs_file_read_iter, |
1610 | .write_iter = xfs_file_write_iter, | 1571 | .write_iter = xfs_file_write_iter, |
1611 | .splice_read = xfs_file_splice_read, | 1572 | .splice_read = generic_file_splice_read, |
1612 | .splice_write = iter_file_splice_write, | 1573 | .splice_write = iter_file_splice_write, |
1613 | .unlocked_ioctl = xfs_file_ioctl, | 1574 | .unlocked_ioctl = xfs_file_ioctl, |
1614 | #ifdef CONFIG_COMPAT | 1575 | #ifdef CONFIG_COMPAT |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index c6b2b1dcde75..16093c7dacde 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -1170,7 +1170,6 @@ DEFINE_RW_EVENT(xfs_file_dax_read); | |||
1170 | DEFINE_RW_EVENT(xfs_file_buffered_write); | 1170 | DEFINE_RW_EVENT(xfs_file_buffered_write); |
1171 | DEFINE_RW_EVENT(xfs_file_direct_write); | 1171 | DEFINE_RW_EVENT(xfs_file_direct_write); |
1172 | DEFINE_RW_EVENT(xfs_file_dax_write); | 1172 | DEFINE_RW_EVENT(xfs_file_dax_write); |
1173 | DEFINE_RW_EVENT(xfs_file_splice_read); | ||
1174 | 1173 | ||
1175 | DECLARE_EVENT_CLASS(xfs_page_class, | 1174 | DECLARE_EVENT_CLASS(xfs_page_class, |
1176 | TP_PROTO(struct inode *inode, struct page *page, unsigned long off, | 1175 | TP_PROTO(struct inode *inode, struct page *page, unsigned long off, |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 901e25d495cc..b04883e74579 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -2794,8 +2794,6 @@ extern void block_sync_page(struct page *page); | |||
2794 | /* fs/splice.c */ | 2794 | /* fs/splice.c */ |
2795 | extern ssize_t generic_file_splice_read(struct file *, loff_t *, | 2795 | extern ssize_t generic_file_splice_read(struct file *, loff_t *, |
2796 | struct pipe_inode_info *, size_t, unsigned int); | 2796 | struct pipe_inode_info *, size_t, unsigned int); |
2797 | extern ssize_t default_file_splice_read(struct file *, loff_t *, | ||
2798 | struct pipe_inode_info *, size_t, unsigned int); | ||
2799 | extern ssize_t iter_file_splice_write(struct pipe_inode_info *, | 2797 | extern ssize_t iter_file_splice_write(struct pipe_inode_info *, |
2800 | struct file *, loff_t *, size_t, unsigned int); | 2798 | struct file *, loff_t *, size_t, unsigned int); |
2801 | extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, | 2799 | extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, |
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 24f5470d3944..e7497c9dde7f 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h | |||
@@ -66,15 +66,10 @@ struct pipe_inode_info { | |||
66 | * | 66 | * |
67 | * ->confirm() | 67 | * ->confirm() |
68 | * ->steal() | 68 | * ->steal() |
69 | * ... | ||
70 | * ->map() | ||
71 | * ... | ||
72 | * ->unmap() | ||
73 | * | 69 | * |
74 | * That is, ->map() must be called on a confirmed buffer, | 70 | * That is, ->steal() must be called on a confirmed buffer. |
75 | * same goes for ->steal(). See below for the meaning of each | 71 | * See below for the meaning of each operation. Also see kerneldoc |
76 | * operation. Also see kerneldoc in fs/pipe.c for the pipe | 72 | * in fs/pipe.c for the pipe and generic variants of these hooks. |
77 | * and generic variants of these hooks. | ||
78 | */ | 73 | */ |
79 | struct pipe_buf_operations { | 74 | struct pipe_buf_operations { |
80 | /* | 75 | /* |
@@ -115,6 +110,53 @@ struct pipe_buf_operations { | |||
115 | void (*get)(struct pipe_inode_info *, struct pipe_buffer *); | 110 | void (*get)(struct pipe_inode_info *, struct pipe_buffer *); |
116 | }; | 111 | }; |
117 | 112 | ||
113 | /** | ||
114 | * pipe_buf_get - get a reference to a pipe_buffer | ||
115 | * @pipe: the pipe that the buffer belongs to | ||
116 | * @buf: the buffer to get a reference to | ||
117 | */ | ||
118 | static inline void pipe_buf_get(struct pipe_inode_info *pipe, | ||
119 | struct pipe_buffer *buf) | ||
120 | { | ||
121 | buf->ops->get(pipe, buf); | ||
122 | } | ||
123 | |||
124 | /** | ||
125 | * pipe_buf_release - put a reference to a pipe_buffer | ||
126 | * @pipe: the pipe that the buffer belongs to | ||
127 | * @buf: the buffer to put a reference to | ||
128 | */ | ||
129 | static inline void pipe_buf_release(struct pipe_inode_info *pipe, | ||
130 | struct pipe_buffer *buf) | ||
131 | { | ||
132 | const struct pipe_buf_operations *ops = buf->ops; | ||
133 | |||
134 | buf->ops = NULL; | ||
135 | ops->release(pipe, buf); | ||
136 | } | ||
137 | |||
138 | /** | ||
139 | * pipe_buf_confirm - verify contents of the pipe buffer | ||
140 | * @pipe: the pipe that the buffer belongs to | ||
141 | * @buf: the buffer to confirm | ||
142 | */ | ||
143 | static inline int pipe_buf_confirm(struct pipe_inode_info *pipe, | ||
144 | struct pipe_buffer *buf) | ||
145 | { | ||
146 | return buf->ops->confirm(pipe, buf); | ||
147 | } | ||
148 | |||
149 | /** | ||
150 | * pipe_buf_steal - attempt to take ownership of a pipe_buffer | ||
151 | * @pipe: the pipe that the buffer belongs to | ||
152 | * @buf: the buffer to attempt to steal | ||
153 | */ | ||
154 | static inline int pipe_buf_steal(struct pipe_inode_info *pipe, | ||
155 | struct pipe_buffer *buf) | ||
156 | { | ||
157 | return buf->ops->steal(pipe, buf); | ||
158 | } | ||
159 | |||
118 | /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual | 160 | /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual |
119 | memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ | 161 | memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ |
120 | #define PIPE_SIZE PAGE_SIZE | 162 | #define PIPE_SIZE PAGE_SIZE |
@@ -129,7 +171,6 @@ extern unsigned long pipe_user_pages_hard; | |||
129 | extern unsigned long pipe_user_pages_soft; | 171 | extern unsigned long pipe_user_pages_soft; |
130 | int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *); | 172 | int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *); |
131 | 173 | ||
132 | |||
133 | /* Drop the inode semaphore and wait for a pipe event, atomically */ | 174 | /* Drop the inode semaphore and wait for a pipe event, atomically */ |
134 | void pipe_wait(struct pipe_inode_info *pipe); | 175 | void pipe_wait(struct pipe_inode_info *pipe); |
135 | 176 | ||
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9bf60b556bd2..601258f6e621 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h | |||
@@ -3064,15 +3064,9 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); | |||
3064 | int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); | 3064 | int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); |
3065 | __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, | 3065 | __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, |
3066 | int len, __wsum csum); | 3066 | int len, __wsum csum); |
3067 | ssize_t skb_socket_splice(struct sock *sk, | ||
3068 | struct pipe_inode_info *pipe, | ||
3069 | struct splice_pipe_desc *spd); | ||
3070 | int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, | 3067 | int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, |
3071 | struct pipe_inode_info *pipe, unsigned int len, | 3068 | struct pipe_inode_info *pipe, unsigned int len, |
3072 | unsigned int flags, | 3069 | unsigned int flags); |
3073 | ssize_t (*splice_cb)(struct sock *, | ||
3074 | struct pipe_inode_info *, | ||
3075 | struct splice_pipe_desc *)); | ||
3076 | void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); | 3070 | void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); |
3077 | unsigned int skb_zerocopy_headlen(const struct sk_buff *from); | 3071 | unsigned int skb_zerocopy_headlen(const struct sk_buff *from); |
3078 | int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, | 3072 | int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, |
diff --git a/include/linux/splice.h b/include/linux/splice.h index da2751d3b93d..00a21166e268 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h | |||
@@ -72,6 +72,8 @@ extern ssize_t __splice_from_pipe(struct pipe_inode_info *, | |||
72 | struct splice_desc *, splice_actor *); | 72 | struct splice_desc *, splice_actor *); |
73 | extern ssize_t splice_to_pipe(struct pipe_inode_info *, | 73 | extern ssize_t splice_to_pipe(struct pipe_inode_info *, |
74 | struct splice_pipe_desc *); | 74 | struct splice_pipe_desc *); |
75 | extern ssize_t add_to_pipe(struct pipe_inode_info *, | ||
76 | struct pipe_buffer *); | ||
75 | extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, | 77 | extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, |
76 | splice_direct_actor *); | 78 | splice_direct_actor *); |
77 | 79 | ||
@@ -83,4 +85,5 @@ extern void splice_shrink_spd(struct splice_pipe_desc *); | |||
83 | extern void spd_release_page(struct splice_pipe_desc *, unsigned int); | 85 | extern void spd_release_page(struct splice_pipe_desc *, unsigned int); |
84 | 86 | ||
85 | extern const struct pipe_buf_operations page_cache_pipe_buf_ops; | 87 | extern const struct pipe_buf_operations page_cache_pipe_buf_ops; |
88 | extern const struct pipe_buf_operations default_pipe_buf_ops; | ||
86 | #endif | 89 | #endif |
diff --git a/include/linux/uio.h b/include/linux/uio.h index 75b4aaf31a9d..b5ebe6dca404 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <uapi/linux/uio.h> | 13 | #include <uapi/linux/uio.h> |
14 | 14 | ||
15 | struct page; | 15 | struct page; |
16 | struct pipe_inode_info; | ||
16 | 17 | ||
17 | struct kvec { | 18 | struct kvec { |
18 | void *iov_base; /* and that should *never* hold a userland pointer */ | 19 | void *iov_base; /* and that should *never* hold a userland pointer */ |
@@ -23,6 +24,7 @@ enum { | |||
23 | ITER_IOVEC = 0, | 24 | ITER_IOVEC = 0, |
24 | ITER_KVEC = 2, | 25 | ITER_KVEC = 2, |
25 | ITER_BVEC = 4, | 26 | ITER_BVEC = 4, |
27 | ITER_PIPE = 8, | ||
26 | }; | 28 | }; |
27 | 29 | ||
28 | struct iov_iter { | 30 | struct iov_iter { |
@@ -33,8 +35,12 @@ struct iov_iter { | |||
33 | const struct iovec *iov; | 35 | const struct iovec *iov; |
34 | const struct kvec *kvec; | 36 | const struct kvec *kvec; |
35 | const struct bio_vec *bvec; | 37 | const struct bio_vec *bvec; |
38 | struct pipe_inode_info *pipe; | ||
39 | }; | ||
40 | union { | ||
41 | unsigned long nr_segs; | ||
42 | int idx; | ||
36 | }; | 43 | }; |
37 | unsigned long nr_segs; | ||
38 | }; | 44 | }; |
39 | 45 | ||
40 | /* | 46 | /* |
@@ -64,7 +70,7 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) | |||
64 | } | 70 | } |
65 | 71 | ||
66 | #define iov_for_each(iov, iter, start) \ | 72 | #define iov_for_each(iov, iter, start) \ |
67 | if (!((start).type & ITER_BVEC)) \ | 73 | if (!((start).type & (ITER_BVEC | ITER_PIPE))) \ |
68 | for (iter = (start); \ | 74 | for (iter = (start); \ |
69 | (iter).count && \ | 75 | (iter).count && \ |
70 | ((iov = iov_iter_iovec(&(iter))), 1); \ | 76 | ((iov = iov_iter_iovec(&(iter))), 1); \ |
@@ -94,6 +100,8 @@ void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec, | |||
94 | unsigned long nr_segs, size_t count); | 100 | unsigned long nr_segs, size_t count); |
95 | void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec, | 101 | void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec, |
96 | unsigned long nr_segs, size_t count); | 102 | unsigned long nr_segs, size_t count); |
103 | void iov_iter_pipe(struct iov_iter *i, int direction, struct pipe_inode_info *pipe, | ||
104 | size_t count); | ||
97 | ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, | 105 | ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, |
98 | size_t maxsize, unsigned maxpages, size_t *start); | 106 | size_t maxsize, unsigned maxpages, size_t *start); |
99 | ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, | 107 | ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, |
@@ -109,7 +117,7 @@ static inline size_t iov_iter_count(struct iov_iter *i) | |||
109 | 117 | ||
110 | static inline bool iter_is_iovec(struct iov_iter *i) | 118 | static inline bool iter_is_iovec(struct iov_iter *i) |
111 | { | 119 | { |
112 | return !(i->type & (ITER_BVEC | ITER_KVEC)); | 120 | return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE)); |
113 | } | 121 | } |
114 | 122 | ||
115 | /* | 123 | /* |
diff --git a/kernel/relay.c b/kernel/relay.c index fc9b4a4af463..9988f5cc2d46 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
@@ -1108,51 +1108,23 @@ static size_t relay_file_read_end_pos(struct rchan_buf *buf, | |||
1108 | return end_pos; | 1108 | return end_pos; |
1109 | } | 1109 | } |
1110 | 1110 | ||
1111 | /* | 1111 | static ssize_t relay_file_read(struct file *filp, |
1112 | * subbuf_read_actor - read up to one subbuf's worth of data | 1112 | char __user *buffer, |
1113 | */ | 1113 | size_t count, |
1114 | static int subbuf_read_actor(size_t read_start, | 1114 | loff_t *ppos) |
1115 | struct rchan_buf *buf, | ||
1116 | size_t avail, | ||
1117 | read_descriptor_t *desc) | ||
1118 | { | ||
1119 | void *from; | ||
1120 | int ret = 0; | ||
1121 | |||
1122 | from = buf->start + read_start; | ||
1123 | ret = avail; | ||
1124 | if (copy_to_user(desc->arg.buf, from, avail)) { | ||
1125 | desc->error = -EFAULT; | ||
1126 | ret = 0; | ||
1127 | } | ||
1128 | desc->arg.data += ret; | ||
1129 | desc->written += ret; | ||
1130 | desc->count -= ret; | ||
1131 | |||
1132 | return ret; | ||
1133 | } | ||
1134 | |||
1135 | typedef int (*subbuf_actor_t) (size_t read_start, | ||
1136 | struct rchan_buf *buf, | ||
1137 | size_t avail, | ||
1138 | read_descriptor_t *desc); | ||
1139 | |||
1140 | /* | ||
1141 | * relay_file_read_subbufs - read count bytes, bridging subbuf boundaries | ||
1142 | */ | ||
1143 | static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos, | ||
1144 | subbuf_actor_t subbuf_actor, | ||
1145 | read_descriptor_t *desc) | ||
1146 | { | 1115 | { |
1147 | struct rchan_buf *buf = filp->private_data; | 1116 | struct rchan_buf *buf = filp->private_data; |
1148 | size_t read_start, avail; | 1117 | size_t read_start, avail; |
1118 | size_t written = 0; | ||
1149 | int ret; | 1119 | int ret; |
1150 | 1120 | ||
1151 | if (!desc->count) | 1121 | if (!count) |
1152 | return 0; | 1122 | return 0; |
1153 | 1123 | ||
1154 | inode_lock(file_inode(filp)); | 1124 | inode_lock(file_inode(filp)); |
1155 | do { | 1125 | do { |
1126 | void *from; | ||
1127 | |||
1156 | if (!relay_file_read_avail(buf, *ppos)) | 1128 | if (!relay_file_read_avail(buf, *ppos)) |
1157 | break; | 1129 | break; |
1158 | 1130 | ||
@@ -1161,32 +1133,22 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos, | |||
1161 | if (!avail) | 1133 | if (!avail) |
1162 | break; | 1134 | break; |
1163 | 1135 | ||
1164 | avail = min(desc->count, avail); | 1136 | avail = min(count, avail); |
1165 | ret = subbuf_actor(read_start, buf, avail, desc); | 1137 | from = buf->start + read_start; |
1166 | if (desc->error < 0) | 1138 | ret = avail; |
1139 | if (copy_to_user(buffer, from, avail)) | ||
1167 | break; | 1140 | break; |
1168 | 1141 | ||
1169 | if (ret) { | 1142 | buffer += ret; |
1170 | relay_file_read_consume(buf, read_start, ret); | 1143 | written += ret; |
1171 | *ppos = relay_file_read_end_pos(buf, read_start, ret); | 1144 | count -= ret; |
1172 | } | ||
1173 | } while (desc->count && ret); | ||
1174 | inode_unlock(file_inode(filp)); | ||
1175 | 1145 | ||
1176 | return desc->written; | 1146 | relay_file_read_consume(buf, read_start, ret); |
1177 | } | 1147 | *ppos = relay_file_read_end_pos(buf, read_start, ret); |
1148 | } while (count); | ||
1149 | inode_unlock(file_inode(filp)); | ||
1178 | 1150 | ||
1179 | static ssize_t relay_file_read(struct file *filp, | 1151 | return written; |
1180 | char __user *buffer, | ||
1181 | size_t count, | ||
1182 | loff_t *ppos) | ||
1183 | { | ||
1184 | read_descriptor_t desc; | ||
1185 | desc.written = 0; | ||
1186 | desc.count = count; | ||
1187 | desc.arg.buf = buffer; | ||
1188 | desc.error = 0; | ||
1189 | return relay_file_read_subbufs(filp, ppos, subbuf_read_actor, &desc); | ||
1190 | } | 1152 | } |
1191 | 1153 | ||
1192 | static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed) | 1154 | static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed) |
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 7e3138cfc8c9..48b8c27acabb 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c | |||
@@ -3,8 +3,11 @@ | |||
3 | #include <linux/pagemap.h> | 3 | #include <linux/pagemap.h> |
4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
5 | #include <linux/vmalloc.h> | 5 | #include <linux/vmalloc.h> |
6 | #include <linux/splice.h> | ||
6 | #include <net/checksum.h> | 7 | #include <net/checksum.h> |
7 | 8 | ||
9 | #define PIPE_PARANOIA /* for now */ | ||
10 | |||
8 | #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ | 11 | #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ |
9 | size_t left; \ | 12 | size_t left; \ |
10 | size_t wanted = n; \ | 13 | size_t wanted = n; \ |
@@ -290,6 +293,93 @@ done: | |||
290 | return wanted - bytes; | 293 | return wanted - bytes; |
291 | } | 294 | } |
292 | 295 | ||
296 | #ifdef PIPE_PARANOIA | ||
297 | static bool sanity(const struct iov_iter *i) | ||
298 | { | ||
299 | struct pipe_inode_info *pipe = i->pipe; | ||
300 | int idx = i->idx; | ||
301 | int next = pipe->curbuf + pipe->nrbufs; | ||
302 | if (i->iov_offset) { | ||
303 | struct pipe_buffer *p; | ||
304 | if (unlikely(!pipe->nrbufs)) | ||
305 | goto Bad; // pipe must be non-empty | ||
306 | if (unlikely(idx != ((next - 1) & (pipe->buffers - 1)))) | ||
307 | goto Bad; // must be at the last buffer... | ||
308 | |||
309 | p = &pipe->bufs[idx]; | ||
310 | if (unlikely(p->offset + p->len != i->iov_offset)) | ||
311 | goto Bad; // ... at the end of segment | ||
312 | } else { | ||
313 | if (idx != (next & (pipe->buffers - 1))) | ||
314 | goto Bad; // must be right after the last buffer | ||
315 | } | ||
316 | return true; | ||
317 | Bad: | ||
318 | printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset); | ||
319 | printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n", | ||
320 | pipe->curbuf, pipe->nrbufs, pipe->buffers); | ||
321 | for (idx = 0; idx < pipe->buffers; idx++) | ||
322 | printk(KERN_ERR "[%p %p %d %d]\n", | ||
323 | pipe->bufs[idx].ops, | ||
324 | pipe->bufs[idx].page, | ||
325 | pipe->bufs[idx].offset, | ||
326 | pipe->bufs[idx].len); | ||
327 | WARN_ON(1); | ||
328 | return false; | ||
329 | } | ||
330 | #else | ||
331 | #define sanity(i) true | ||
332 | #endif | ||
333 | |||
334 | static inline int next_idx(int idx, struct pipe_inode_info *pipe) | ||
335 | { | ||
336 | return (idx + 1) & (pipe->buffers - 1); | ||
337 | } | ||
338 | |||
339 | static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, | ||
340 | struct iov_iter *i) | ||
341 | { | ||
342 | struct pipe_inode_info *pipe = i->pipe; | ||
343 | struct pipe_buffer *buf; | ||
344 | size_t off; | ||
345 | int idx; | ||
346 | |||
347 | if (unlikely(bytes > i->count)) | ||
348 | bytes = i->count; | ||
349 | |||
350 | if (unlikely(!bytes)) | ||
351 | return 0; | ||
352 | |||
353 | if (!sanity(i)) | ||
354 | return 0; | ||
355 | |||
356 | off = i->iov_offset; | ||
357 | idx = i->idx; | ||
358 | buf = &pipe->bufs[idx]; | ||
359 | if (off) { | ||
360 | if (offset == off && buf->page == page) { | ||
361 | /* merge with the last one */ | ||
362 | buf->len += bytes; | ||
363 | i->iov_offset += bytes; | ||
364 | goto out; | ||
365 | } | ||
366 | idx = next_idx(idx, pipe); | ||
367 | buf = &pipe->bufs[idx]; | ||
368 | } | ||
369 | if (idx == pipe->curbuf && pipe->nrbufs) | ||
370 | return 0; | ||
371 | pipe->nrbufs++; | ||
372 | buf->ops = &page_cache_pipe_buf_ops; | ||
373 | get_page(buf->page = page); | ||
374 | buf->offset = offset; | ||
375 | buf->len = bytes; | ||
376 | i->iov_offset = offset + bytes; | ||
377 | i->idx = idx; | ||
378 | out: | ||
379 | i->count -= bytes; | ||
380 | return bytes; | ||
381 | } | ||
382 | |||
293 | /* | 383 | /* |
294 | * Fault in one or more iovecs of the given iov_iter, to a maximum length of | 384 | * Fault in one or more iovecs of the given iov_iter, to a maximum length of |
295 | * bytes. For each iovec, fault in each page that constitutes the iovec. | 385 | * bytes. For each iovec, fault in each page that constitutes the iovec. |
@@ -356,9 +446,98 @@ static void memzero_page(struct page *page, size_t offset, size_t len) | |||
356 | kunmap_atomic(addr); | 446 | kunmap_atomic(addr); |
357 | } | 447 | } |
358 | 448 | ||
449 | static inline bool allocated(struct pipe_buffer *buf) | ||
450 | { | ||
451 | return buf->ops == &default_pipe_buf_ops; | ||
452 | } | ||
453 | |||
454 | static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp) | ||
455 | { | ||
456 | size_t off = i->iov_offset; | ||
457 | int idx = i->idx; | ||
458 | if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) { | ||
459 | idx = next_idx(idx, i->pipe); | ||
460 | off = 0; | ||
461 | } | ||
462 | *idxp = idx; | ||
463 | *offp = off; | ||
464 | } | ||
465 | |||
466 | static size_t push_pipe(struct iov_iter *i, size_t size, | ||
467 | int *idxp, size_t *offp) | ||
468 | { | ||
469 | struct pipe_inode_info *pipe = i->pipe; | ||
470 | size_t off; | ||
471 | int idx; | ||
472 | ssize_t left; | ||
473 | |||
474 | if (unlikely(size > i->count)) | ||
475 | size = i->count; | ||
476 | if (unlikely(!size)) | ||
477 | return 0; | ||
478 | |||
479 | left = size; | ||
480 | data_start(i, &idx, &off); | ||
481 | *idxp = idx; | ||
482 | *offp = off; | ||
483 | if (off) { | ||
484 | left -= PAGE_SIZE - off; | ||
485 | if (left <= 0) { | ||
486 | pipe->bufs[idx].len += size; | ||
487 | return size; | ||
488 | } | ||
489 | pipe->bufs[idx].len = PAGE_SIZE; | ||
490 | idx = next_idx(idx, pipe); | ||
491 | } | ||
492 | while (idx != pipe->curbuf || !pipe->nrbufs) { | ||
493 | struct page *page = alloc_page(GFP_USER); | ||
494 | if (!page) | ||
495 | break; | ||
496 | pipe->nrbufs++; | ||
497 | pipe->bufs[idx].ops = &default_pipe_buf_ops; | ||
498 | pipe->bufs[idx].page = page; | ||
499 | pipe->bufs[idx].offset = 0; | ||
500 | if (left <= PAGE_SIZE) { | ||
501 | pipe->bufs[idx].len = left; | ||
502 | return size; | ||
503 | } | ||
504 | pipe->bufs[idx].len = PAGE_SIZE; | ||
505 | left -= PAGE_SIZE; | ||
506 | idx = next_idx(idx, pipe); | ||
507 | } | ||
508 | return size - left; | ||
509 | } | ||
510 | |||
511 | static size_t copy_pipe_to_iter(const void *addr, size_t bytes, | ||
512 | struct iov_iter *i) | ||
513 | { | ||
514 | struct pipe_inode_info *pipe = i->pipe; | ||
515 | size_t n, off; | ||
516 | int idx; | ||
517 | |||
518 | if (!sanity(i)) | ||
519 | return 0; | ||
520 | |||
521 | bytes = n = push_pipe(i, bytes, &idx, &off); | ||
522 | if (unlikely(!n)) | ||
523 | return 0; | ||
524 | for ( ; n; idx = next_idx(idx, pipe), off = 0) { | ||
525 | size_t chunk = min_t(size_t, n, PAGE_SIZE - off); | ||
526 | memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk); | ||
527 | i->idx = idx; | ||
528 | i->iov_offset = off + chunk; | ||
529 | n -= chunk; | ||
530 | addr += chunk; | ||
531 | } | ||
532 | i->count -= bytes; | ||
533 | return bytes; | ||
534 | } | ||
535 | |||
359 | size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) | 536 | size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) |
360 | { | 537 | { |
361 | const char *from = addr; | 538 | const char *from = addr; |
539 | if (unlikely(i->type & ITER_PIPE)) | ||
540 | return copy_pipe_to_iter(addr, bytes, i); | ||
362 | iterate_and_advance(i, bytes, v, | 541 | iterate_and_advance(i, bytes, v, |
363 | __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, | 542 | __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, |
364 | v.iov_len), | 543 | v.iov_len), |
@@ -374,6 +553,10 @@ EXPORT_SYMBOL(copy_to_iter); | |||
374 | size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) | 553 | size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) |
375 | { | 554 | { |
376 | char *to = addr; | 555 | char *to = addr; |
556 | if (unlikely(i->type & ITER_PIPE)) { | ||
557 | WARN_ON(1); | ||
558 | return 0; | ||
559 | } | ||
377 | iterate_and_advance(i, bytes, v, | 560 | iterate_and_advance(i, bytes, v, |
378 | __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, | 561 | __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, |
379 | v.iov_len), | 562 | v.iov_len), |
@@ -389,6 +572,10 @@ EXPORT_SYMBOL(copy_from_iter); | |||
389 | size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) | 572 | size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) |
390 | { | 573 | { |
391 | char *to = addr; | 574 | char *to = addr; |
575 | if (unlikely(i->type & ITER_PIPE)) { | ||
576 | WARN_ON(1); | ||
577 | return 0; | ||
578 | } | ||
392 | iterate_and_advance(i, bytes, v, | 579 | iterate_and_advance(i, bytes, v, |
393 | __copy_from_user_nocache((to += v.iov_len) - v.iov_len, | 580 | __copy_from_user_nocache((to += v.iov_len) - v.iov_len, |
394 | v.iov_base, v.iov_len), | 581 | v.iov_base, v.iov_len), |
@@ -409,14 +596,20 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, | |||
409 | size_t wanted = copy_to_iter(kaddr + offset, bytes, i); | 596 | size_t wanted = copy_to_iter(kaddr + offset, bytes, i); |
410 | kunmap_atomic(kaddr); | 597 | kunmap_atomic(kaddr); |
411 | return wanted; | 598 | return wanted; |
412 | } else | 599 | } else if (likely(!(i->type & ITER_PIPE))) |
413 | return copy_page_to_iter_iovec(page, offset, bytes, i); | 600 | return copy_page_to_iter_iovec(page, offset, bytes, i); |
601 | else | ||
602 | return copy_page_to_iter_pipe(page, offset, bytes, i); | ||
414 | } | 603 | } |
415 | EXPORT_SYMBOL(copy_page_to_iter); | 604 | EXPORT_SYMBOL(copy_page_to_iter); |
416 | 605 | ||
417 | size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, | 606 | size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, |
418 | struct iov_iter *i) | 607 | struct iov_iter *i) |
419 | { | 608 | { |
609 | if (unlikely(i->type & ITER_PIPE)) { | ||
610 | WARN_ON(1); | ||
611 | return 0; | ||
612 | } | ||
420 | if (i->type & (ITER_BVEC|ITER_KVEC)) { | 613 | if (i->type & (ITER_BVEC|ITER_KVEC)) { |
421 | void *kaddr = kmap_atomic(page); | 614 | void *kaddr = kmap_atomic(page); |
422 | size_t wanted = copy_from_iter(kaddr + offset, bytes, i); | 615 | size_t wanted = copy_from_iter(kaddr + offset, bytes, i); |
@@ -427,8 +620,34 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, | |||
427 | } | 620 | } |
428 | EXPORT_SYMBOL(copy_page_from_iter); | 621 | EXPORT_SYMBOL(copy_page_from_iter); |
429 | 622 | ||
623 | static size_t pipe_zero(size_t bytes, struct iov_iter *i) | ||
624 | { | ||
625 | struct pipe_inode_info *pipe = i->pipe; | ||
626 | size_t n, off; | ||
627 | int idx; | ||
628 | |||
629 | if (!sanity(i)) | ||
630 | return 0; | ||
631 | |||
632 | bytes = n = push_pipe(i, bytes, &idx, &off); | ||
633 | if (unlikely(!n)) | ||
634 | return 0; | ||
635 | |||
636 | for ( ; n; idx = next_idx(idx, pipe), off = 0) { | ||
637 | size_t chunk = min_t(size_t, n, PAGE_SIZE - off); | ||
638 | memzero_page(pipe->bufs[idx].page, off, chunk); | ||
639 | i->idx = idx; | ||
640 | i->iov_offset = off + chunk; | ||
641 | n -= chunk; | ||
642 | } | ||
643 | i->count -= bytes; | ||
644 | return bytes; | ||
645 | } | ||
646 | |||
430 | size_t iov_iter_zero(size_t bytes, struct iov_iter *i) | 647 | size_t iov_iter_zero(size_t bytes, struct iov_iter *i) |
431 | { | 648 | { |
649 | if (unlikely(i->type & ITER_PIPE)) | ||
650 | return pipe_zero(bytes, i); | ||
432 | iterate_and_advance(i, bytes, v, | 651 | iterate_and_advance(i, bytes, v, |
433 | __clear_user(v.iov_base, v.iov_len), | 652 | __clear_user(v.iov_base, v.iov_len), |
434 | memzero_page(v.bv_page, v.bv_offset, v.bv_len), | 653 | memzero_page(v.bv_page, v.bv_offset, v.bv_len), |
@@ -443,6 +662,11 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, | |||
443 | struct iov_iter *i, unsigned long offset, size_t bytes) | 662 | struct iov_iter *i, unsigned long offset, size_t bytes) |
444 | { | 663 | { |
445 | char *kaddr = kmap_atomic(page), *p = kaddr + offset; | 664 | char *kaddr = kmap_atomic(page), *p = kaddr + offset; |
665 | if (unlikely(i->type & ITER_PIPE)) { | ||
666 | kunmap_atomic(kaddr); | ||
667 | WARN_ON(1); | ||
668 | return 0; | ||
669 | } | ||
446 | iterate_all_kinds(i, bytes, v, | 670 | iterate_all_kinds(i, bytes, v, |
447 | __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, | 671 | __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, |
448 | v.iov_base, v.iov_len), | 672 | v.iov_base, v.iov_len), |
@@ -455,8 +679,49 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, | |||
455 | } | 679 | } |
456 | EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); | 680 | EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); |
457 | 681 | ||
682 | static void pipe_advance(struct iov_iter *i, size_t size) | ||
683 | { | ||
684 | struct pipe_inode_info *pipe = i->pipe; | ||
685 | struct pipe_buffer *buf; | ||
686 | int idx = i->idx; | ||
687 | size_t off = i->iov_offset; | ||
688 | |||
689 | if (unlikely(i->count < size)) | ||
690 | size = i->count; | ||
691 | |||
692 | if (size) { | ||
693 | if (off) /* make it relative to the beginning of buffer */ | ||
694 | size += off - pipe->bufs[idx].offset; | ||
695 | while (1) { | ||
696 | buf = &pipe->bufs[idx]; | ||
697 | if (size <= buf->len) | ||
698 | break; | ||
699 | size -= buf->len; | ||
700 | idx = next_idx(idx, pipe); | ||
701 | } | ||
702 | buf->len = size; | ||
703 | i->idx = idx; | ||
704 | off = i->iov_offset = buf->offset + size; | ||
705 | } | ||
706 | if (off) | ||
707 | idx = next_idx(idx, pipe); | ||
708 | if (pipe->nrbufs) { | ||
709 | int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); | ||
710 | /* [curbuf,unused) is in use. Free [idx,unused) */ | ||
711 | while (idx != unused) { | ||
712 | pipe_buf_release(pipe, &pipe->bufs[idx]); | ||
713 | idx = next_idx(idx, pipe); | ||
714 | pipe->nrbufs--; | ||
715 | } | ||
716 | } | ||
717 | } | ||
718 | |||
458 | void iov_iter_advance(struct iov_iter *i, size_t size) | 719 | void iov_iter_advance(struct iov_iter *i, size_t size) |
459 | { | 720 | { |
721 | if (unlikely(i->type & ITER_PIPE)) { | ||
722 | pipe_advance(i, size); | ||
723 | return; | ||
724 | } | ||
460 | iterate_and_advance(i, size, v, 0, 0, 0) | 725 | iterate_and_advance(i, size, v, 0, 0, 0) |
461 | } | 726 | } |
462 | EXPORT_SYMBOL(iov_iter_advance); | 727 | EXPORT_SYMBOL(iov_iter_advance); |
@@ -466,6 +731,8 @@ EXPORT_SYMBOL(iov_iter_advance); | |||
466 | */ | 731 | */ |
467 | size_t iov_iter_single_seg_count(const struct iov_iter *i) | 732 | size_t iov_iter_single_seg_count(const struct iov_iter *i) |
468 | { | 733 | { |
734 | if (unlikely(i->type & ITER_PIPE)) | ||
735 | return i->count; // it is a silly place, anyway | ||
469 | if (i->nr_segs == 1) | 736 | if (i->nr_segs == 1) |
470 | return i->count; | 737 | return i->count; |
471 | else if (i->type & ITER_BVEC) | 738 | else if (i->type & ITER_BVEC) |
@@ -501,6 +768,19 @@ void iov_iter_bvec(struct iov_iter *i, int direction, | |||
501 | } | 768 | } |
502 | EXPORT_SYMBOL(iov_iter_bvec); | 769 | EXPORT_SYMBOL(iov_iter_bvec); |
503 | 770 | ||
771 | void iov_iter_pipe(struct iov_iter *i, int direction, | ||
772 | struct pipe_inode_info *pipe, | ||
773 | size_t count) | ||
774 | { | ||
775 | BUG_ON(direction != ITER_PIPE); | ||
776 | i->type = direction; | ||
777 | i->pipe = pipe; | ||
778 | i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); | ||
779 | i->iov_offset = 0; | ||
780 | i->count = count; | ||
781 | } | ||
782 | EXPORT_SYMBOL(iov_iter_pipe); | ||
783 | |||
504 | unsigned long iov_iter_alignment(const struct iov_iter *i) | 784 | unsigned long iov_iter_alignment(const struct iov_iter *i) |
505 | { | 785 | { |
506 | unsigned long res = 0; | 786 | unsigned long res = 0; |
@@ -509,6 +789,11 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) | |||
509 | if (!size) | 789 | if (!size) |
510 | return 0; | 790 | return 0; |
511 | 791 | ||
792 | if (unlikely(i->type & ITER_PIPE)) { | ||
793 | if (i->iov_offset && allocated(&i->pipe->bufs[i->idx])) | ||
794 | return size | i->iov_offset; | ||
795 | return size; | ||
796 | } | ||
512 | iterate_all_kinds(i, size, v, | 797 | iterate_all_kinds(i, size, v, |
513 | (res |= (unsigned long)v.iov_base | v.iov_len, 0), | 798 | (res |= (unsigned long)v.iov_base | v.iov_len, 0), |
514 | res |= v.bv_offset | v.bv_len, | 799 | res |= v.bv_offset | v.bv_len, |
@@ -525,6 +810,11 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) | |||
525 | if (!size) | 810 | if (!size) |
526 | return 0; | 811 | return 0; |
527 | 812 | ||
813 | if (unlikely(i->type & ITER_PIPE)) { | ||
814 | WARN_ON(1); | ||
815 | return ~0U; | ||
816 | } | ||
817 | |||
528 | iterate_all_kinds(i, size, v, | 818 | iterate_all_kinds(i, size, v, |
529 | (res |= (!res ? 0 : (unsigned long)v.iov_base) | | 819 | (res |= (!res ? 0 : (unsigned long)v.iov_base) | |
530 | (size != v.iov_len ? size : 0), 0), | 820 | (size != v.iov_len ? size : 0), 0), |
@@ -537,6 +827,47 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) | |||
537 | } | 827 | } |
538 | EXPORT_SYMBOL(iov_iter_gap_alignment); | 828 | EXPORT_SYMBOL(iov_iter_gap_alignment); |
539 | 829 | ||
830 | static inline size_t __pipe_get_pages(struct iov_iter *i, | ||
831 | size_t maxsize, | ||
832 | struct page **pages, | ||
833 | int idx, | ||
834 | size_t *start) | ||
835 | { | ||
836 | struct pipe_inode_info *pipe = i->pipe; | ||
837 | size_t n = push_pipe(i, maxsize, &idx, start); | ||
838 | if (!n) | ||
839 | return -EFAULT; | ||
840 | |||
841 | maxsize = n; | ||
842 | n += *start; | ||
843 | while (n >= PAGE_SIZE) { | ||
844 | get_page(*pages++ = pipe->bufs[idx].page); | ||
845 | idx = next_idx(idx, pipe); | ||
846 | n -= PAGE_SIZE; | ||
847 | } | ||
848 | |||
849 | return maxsize; | ||
850 | } | ||
851 | |||
852 | static ssize_t pipe_get_pages(struct iov_iter *i, | ||
853 | struct page **pages, size_t maxsize, unsigned maxpages, | ||
854 | size_t *start) | ||
855 | { | ||
856 | unsigned npages; | ||
857 | size_t capacity; | ||
858 | int idx; | ||
859 | |||
860 | if (!sanity(i)) | ||
861 | return -EFAULT; | ||
862 | |||
863 | data_start(i, &idx, start); | ||
864 | /* some of this one + all after this one */ | ||
865 | npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; | ||
866 | capacity = min(npages,maxpages) * PAGE_SIZE - *start; | ||
867 | |||
868 | return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start); | ||
869 | } | ||
870 | |||
540 | ssize_t iov_iter_get_pages(struct iov_iter *i, | 871 | ssize_t iov_iter_get_pages(struct iov_iter *i, |
541 | struct page **pages, size_t maxsize, unsigned maxpages, | 872 | struct page **pages, size_t maxsize, unsigned maxpages, |
542 | size_t *start) | 873 | size_t *start) |
@@ -547,6 +878,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, | |||
547 | if (!maxsize) | 878 | if (!maxsize) |
548 | return 0; | 879 | return 0; |
549 | 880 | ||
881 | if (unlikely(i->type & ITER_PIPE)) | ||
882 | return pipe_get_pages(i, pages, maxsize, maxpages, start); | ||
550 | iterate_all_kinds(i, maxsize, v, ({ | 883 | iterate_all_kinds(i, maxsize, v, ({ |
551 | unsigned long addr = (unsigned long)v.iov_base; | 884 | unsigned long addr = (unsigned long)v.iov_base; |
552 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); | 885 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); |
@@ -582,6 +915,37 @@ static struct page **get_pages_array(size_t n) | |||
582 | return p; | 915 | return p; |
583 | } | 916 | } |
584 | 917 | ||
918 | static ssize_t pipe_get_pages_alloc(struct iov_iter *i, | ||
919 | struct page ***pages, size_t maxsize, | ||
920 | size_t *start) | ||
921 | { | ||
922 | struct page **p; | ||
923 | size_t n; | ||
924 | int idx; | ||
925 | int npages; | ||
926 | |||
927 | if (!sanity(i)) | ||
928 | return -EFAULT; | ||
929 | |||
930 | data_start(i, &idx, start); | ||
931 | /* some of this one + all after this one */ | ||
932 | npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; | ||
933 | n = npages * PAGE_SIZE - *start; | ||
934 | if (maxsize > n) | ||
935 | maxsize = n; | ||
936 | else | ||
937 | npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); | ||
938 | p = get_pages_array(npages); | ||
939 | if (!p) | ||
940 | return -ENOMEM; | ||
941 | n = __pipe_get_pages(i, maxsize, p, idx, start); | ||
942 | if (n > 0) | ||
943 | *pages = p; | ||
944 | else | ||
945 | kvfree(p); | ||
946 | return n; | ||
947 | } | ||
948 | |||
585 | ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, | 949 | ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, |
586 | struct page ***pages, size_t maxsize, | 950 | struct page ***pages, size_t maxsize, |
587 | size_t *start) | 951 | size_t *start) |
@@ -594,6 +958,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, | |||
594 | if (!maxsize) | 958 | if (!maxsize) |
595 | return 0; | 959 | return 0; |
596 | 960 | ||
961 | if (unlikely(i->type & ITER_PIPE)) | ||
962 | return pipe_get_pages_alloc(i, pages, maxsize, start); | ||
597 | iterate_all_kinds(i, maxsize, v, ({ | 963 | iterate_all_kinds(i, maxsize, v, ({ |
598 | unsigned long addr = (unsigned long)v.iov_base; | 964 | unsigned long addr = (unsigned long)v.iov_base; |
599 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); | 965 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); |
@@ -635,6 +1001,10 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, | |||
635 | __wsum sum, next; | 1001 | __wsum sum, next; |
636 | size_t off = 0; | 1002 | size_t off = 0; |
637 | sum = *csum; | 1003 | sum = *csum; |
1004 | if (unlikely(i->type & ITER_PIPE)) { | ||
1005 | WARN_ON(1); | ||
1006 | return 0; | ||
1007 | } | ||
638 | iterate_and_advance(i, bytes, v, ({ | 1008 | iterate_and_advance(i, bytes, v, ({ |
639 | int err = 0; | 1009 | int err = 0; |
640 | next = csum_and_copy_from_user(v.iov_base, | 1010 | next = csum_and_copy_from_user(v.iov_base, |
@@ -673,6 +1043,10 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, | |||
673 | __wsum sum, next; | 1043 | __wsum sum, next; |
674 | size_t off = 0; | 1044 | size_t off = 0; |
675 | sum = *csum; | 1045 | sum = *csum; |
1046 | if (unlikely(i->type & ITER_PIPE)) { | ||
1047 | WARN_ON(1); /* for now */ | ||
1048 | return 0; | ||
1049 | } | ||
676 | iterate_and_advance(i, bytes, v, ({ | 1050 | iterate_and_advance(i, bytes, v, ({ |
677 | int err = 0; | 1051 | int err = 0; |
678 | next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, | 1052 | next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, |
@@ -712,7 +1086,20 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) | |||
712 | if (!size) | 1086 | if (!size) |
713 | return 0; | 1087 | return 0; |
714 | 1088 | ||
715 | iterate_all_kinds(i, size, v, ({ | 1089 | if (unlikely(i->type & ITER_PIPE)) { |
1090 | struct pipe_inode_info *pipe = i->pipe; | ||
1091 | size_t off; | ||
1092 | int idx; | ||
1093 | |||
1094 | if (!sanity(i)) | ||
1095 | return 0; | ||
1096 | |||
1097 | data_start(i, &idx, &off); | ||
1098 | /* some of this one + all after this one */ | ||
1099 | npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1; | ||
1100 | if (npages >= maxpages) | ||
1101 | return maxpages; | ||
1102 | } else iterate_all_kinds(i, size, v, ({ | ||
716 | unsigned long p = (unsigned long)v.iov_base; | 1103 | unsigned long p = (unsigned long)v.iov_base; |
717 | npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) | 1104 | npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) |
718 | - p / PAGE_SIZE; | 1105 | - p / PAGE_SIZE; |
@@ -737,6 +1124,10 @@ EXPORT_SYMBOL(iov_iter_npages); | |||
737 | const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) | 1124 | const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) |
738 | { | 1125 | { |
739 | *new = *old; | 1126 | *new = *old; |
1127 | if (unlikely(new->type & ITER_PIPE)) { | ||
1128 | WARN_ON(1); | ||
1129 | return NULL; | ||
1130 | } | ||
740 | if (new->type & ITER_BVEC) | 1131 | if (new->type & ITER_BVEC) |
741 | return new->bvec = kmemdup(new->bvec, | 1132 | return new->bvec = kmemdup(new->bvec, |
742 | new->nr_segs * sizeof(struct bio_vec), | 1133 | new->nr_segs * sizeof(struct bio_vec), |
diff --git a/mm/shmem.c b/mm/shmem.c index 971fc83e6402..d86b5e455fef 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -2311,119 +2311,6 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) | |||
2311 | return retval ? retval : error; | 2311 | return retval ? retval : error; |
2312 | } | 2312 | } |
2313 | 2313 | ||
2314 | static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, | ||
2315 | struct pipe_inode_info *pipe, size_t len, | ||
2316 | unsigned int flags) | ||
2317 | { | ||
2318 | struct address_space *mapping = in->f_mapping; | ||
2319 | struct inode *inode = mapping->host; | ||
2320 | unsigned int loff, nr_pages, req_pages; | ||
2321 | struct page *pages[PIPE_DEF_BUFFERS]; | ||
2322 | struct partial_page partial[PIPE_DEF_BUFFERS]; | ||
2323 | struct page *page; | ||
2324 | pgoff_t index, end_index; | ||
2325 | loff_t isize, left; | ||
2326 | int error, page_nr; | ||
2327 | struct splice_pipe_desc spd = { | ||
2328 | .pages = pages, | ||
2329 | .partial = partial, | ||
2330 | .nr_pages_max = PIPE_DEF_BUFFERS, | ||
2331 | .flags = flags, | ||
2332 | .ops = &page_cache_pipe_buf_ops, | ||
2333 | .spd_release = spd_release_page, | ||
2334 | }; | ||
2335 | |||
2336 | isize = i_size_read(inode); | ||
2337 | if (unlikely(*ppos >= isize)) | ||
2338 | return 0; | ||
2339 | |||
2340 | left = isize - *ppos; | ||
2341 | if (unlikely(left < len)) | ||
2342 | len = left; | ||
2343 | |||
2344 | if (splice_grow_spd(pipe, &spd)) | ||
2345 | return -ENOMEM; | ||
2346 | |||
2347 | index = *ppos >> PAGE_SHIFT; | ||
2348 | loff = *ppos & ~PAGE_MASK; | ||
2349 | req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
2350 | nr_pages = min(req_pages, spd.nr_pages_max); | ||
2351 | |||
2352 | spd.nr_pages = find_get_pages_contig(mapping, index, | ||
2353 | nr_pages, spd.pages); | ||
2354 | index += spd.nr_pages; | ||
2355 | error = 0; | ||
2356 | |||
2357 | while (spd.nr_pages < nr_pages) { | ||
2358 | error = shmem_getpage(inode, index, &page, SGP_CACHE); | ||
2359 | if (error) | ||
2360 | break; | ||
2361 | unlock_page(page); | ||
2362 | spd.pages[spd.nr_pages++] = page; | ||
2363 | index++; | ||
2364 | } | ||
2365 | |||
2366 | index = *ppos >> PAGE_SHIFT; | ||
2367 | nr_pages = spd.nr_pages; | ||
2368 | spd.nr_pages = 0; | ||
2369 | |||
2370 | for (page_nr = 0; page_nr < nr_pages; page_nr++) { | ||
2371 | unsigned int this_len; | ||
2372 | |||
2373 | if (!len) | ||
2374 | break; | ||
2375 | |||
2376 | this_len = min_t(unsigned long, len, PAGE_SIZE - loff); | ||
2377 | page = spd.pages[page_nr]; | ||
2378 | |||
2379 | if (!PageUptodate(page) || page->mapping != mapping) { | ||
2380 | error = shmem_getpage(inode, index, &page, SGP_CACHE); | ||
2381 | if (error) | ||
2382 | break; | ||
2383 | unlock_page(page); | ||
2384 | put_page(spd.pages[page_nr]); | ||
2385 | spd.pages[page_nr] = page; | ||
2386 | } | ||
2387 | |||
2388 | isize = i_size_read(inode); | ||
2389 | end_index = (isize - 1) >> PAGE_SHIFT; | ||
2390 | if (unlikely(!isize || index > end_index)) | ||
2391 | break; | ||
2392 | |||
2393 | if (end_index == index) { | ||
2394 | unsigned int plen; | ||
2395 | |||
2396 | plen = ((isize - 1) & ~PAGE_MASK) + 1; | ||
2397 | if (plen <= loff) | ||
2398 | break; | ||
2399 | |||
2400 | this_len = min(this_len, plen - loff); | ||
2401 | len = this_len; | ||
2402 | } | ||
2403 | |||
2404 | spd.partial[page_nr].offset = loff; | ||
2405 | spd.partial[page_nr].len = this_len; | ||
2406 | len -= this_len; | ||
2407 | loff = 0; | ||
2408 | spd.nr_pages++; | ||
2409 | index++; | ||
2410 | } | ||
2411 | |||
2412 | while (page_nr < nr_pages) | ||
2413 | put_page(spd.pages[page_nr++]); | ||
2414 | |||
2415 | if (spd.nr_pages) | ||
2416 | error = splice_to_pipe(pipe, &spd); | ||
2417 | |||
2418 | splice_shrink_spd(&spd); | ||
2419 | |||
2420 | if (error > 0) { | ||
2421 | *ppos += error; | ||
2422 | file_accessed(in); | ||
2423 | } | ||
2424 | return error; | ||
2425 | } | ||
2426 | |||
2427 | /* | 2314 | /* |
2428 | * llseek SEEK_DATA or SEEK_HOLE through the radix_tree. | 2315 | * llseek SEEK_DATA or SEEK_HOLE through the radix_tree. |
2429 | */ | 2316 | */ |
@@ -3786,7 +3673,7 @@ static const struct file_operations shmem_file_operations = { | |||
3786 | .read_iter = shmem_file_read_iter, | 3673 | .read_iter = shmem_file_read_iter, |
3787 | .write_iter = generic_file_write_iter, | 3674 | .write_iter = generic_file_write_iter, |
3788 | .fsync = noop_fsync, | 3675 | .fsync = noop_fsync, |
3789 | .splice_read = shmem_file_splice_read, | 3676 | .splice_read = generic_file_splice_read, |
3790 | .splice_write = iter_file_splice_write, | 3677 | .splice_write = iter_file_splice_write, |
3791 | .fallocate = shmem_fallocate, | 3678 | .fallocate = shmem_fallocate, |
3792 | #endif | 3679 | #endif |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index cbd19d250947..1e3e0087245b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -1962,37 +1962,13 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, | |||
1962 | return false; | 1962 | return false; |
1963 | } | 1963 | } |
1964 | 1964 | ||
1965 | ssize_t skb_socket_splice(struct sock *sk, | ||
1966 | struct pipe_inode_info *pipe, | ||
1967 | struct splice_pipe_desc *spd) | ||
1968 | { | ||
1969 | int ret; | ||
1970 | |||
1971 | /* Drop the socket lock, otherwise we have reverse | ||
1972 | * locking dependencies between sk_lock and i_mutex | ||
1973 | * here as compared to sendfile(). We enter here | ||
1974 | * with the socket lock held, and splice_to_pipe() will | ||
1975 | * grab the pipe inode lock. For sendfile() emulation, | ||
1976 | * we call into ->sendpage() with the i_mutex lock held | ||
1977 | * and networking will grab the socket lock. | ||
1978 | */ | ||
1979 | release_sock(sk); | ||
1980 | ret = splice_to_pipe(pipe, spd); | ||
1981 | lock_sock(sk); | ||
1982 | |||
1983 | return ret; | ||
1984 | } | ||
1985 | |||
1986 | /* | 1965 | /* |
1987 | * Map data from the skb to a pipe. Should handle both the linear part, | 1966 | * Map data from the skb to a pipe. Should handle both the linear part, |
1988 | * the fragments, and the frag list. | 1967 | * the fragments, and the frag list. |
1989 | */ | 1968 | */ |
1990 | int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, | 1969 | int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, |
1991 | struct pipe_inode_info *pipe, unsigned int tlen, | 1970 | struct pipe_inode_info *pipe, unsigned int tlen, |
1992 | unsigned int flags, | 1971 | unsigned int flags) |
1993 | ssize_t (*splice_cb)(struct sock *, | ||
1994 | struct pipe_inode_info *, | ||
1995 | struct splice_pipe_desc *)) | ||
1996 | { | 1972 | { |
1997 | struct partial_page partial[MAX_SKB_FRAGS]; | 1973 | struct partial_page partial[MAX_SKB_FRAGS]; |
1998 | struct page *pages[MAX_SKB_FRAGS]; | 1974 | struct page *pages[MAX_SKB_FRAGS]; |
@@ -2009,7 +1985,7 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, | |||
2009 | __skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk); | 1985 | __skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk); |
2010 | 1986 | ||
2011 | if (spd.nr_pages) | 1987 | if (spd.nr_pages) |
2012 | ret = splice_cb(sk, pipe, &spd); | 1988 | ret = splice_to_pipe(pipe, &spd); |
2013 | 1989 | ||
2014 | return ret; | 1990 | return ret; |
2015 | } | 1991 | } |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f253e5019d22..2414b7c80b87 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -691,8 +691,7 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, | |||
691 | int ret; | 691 | int ret; |
692 | 692 | ||
693 | ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe, | 693 | ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe, |
694 | min(rd_desc->count, len), tss->flags, | 694 | min(rd_desc->count, len), tss->flags); |
695 | skb_socket_splice); | ||
696 | if (ret > 0) | 695 | if (ret > 0) |
697 | rd_desc->count -= ret; | 696 | rd_desc->count -= ret; |
698 | return ret; | 697 | return ret; |
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index b7f869a85ab7..7e08a4d3d77d 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c | |||
@@ -1160,19 +1160,6 @@ out: | |||
1160 | return copied ? : err; | 1160 | return copied ? : err; |
1161 | } | 1161 | } |
1162 | 1162 | ||
1163 | static ssize_t kcm_sock_splice(struct sock *sk, | ||
1164 | struct pipe_inode_info *pipe, | ||
1165 | struct splice_pipe_desc *spd) | ||
1166 | { | ||
1167 | int ret; | ||
1168 | |||
1169 | release_sock(sk); | ||
1170 | ret = splice_to_pipe(pipe, spd); | ||
1171 | lock_sock(sk); | ||
1172 | |||
1173 | return ret; | ||
1174 | } | ||
1175 | |||
1176 | static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, | 1163 | static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, |
1177 | struct pipe_inode_info *pipe, size_t len, | 1164 | struct pipe_inode_info *pipe, size_t len, |
1178 | unsigned int flags) | 1165 | unsigned int flags) |
@@ -1202,8 +1189,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, | |||
1202 | if (len > rxm->full_len) | 1189 | if (len > rxm->full_len) |
1203 | len = rxm->full_len; | 1190 | len = rxm->full_len; |
1204 | 1191 | ||
1205 | copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags, | 1192 | copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags); |
1206 | kcm_sock_splice); | ||
1207 | if (copied < 0) { | 1193 | if (copied < 0) { |
1208 | err = copied; | 1194 | err = copied; |
1209 | goto err_out; | 1195 | goto err_out; |
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 8309687a56b0..145082e2ba36 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
@@ -2475,28 +2475,13 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, | |||
2475 | return unix_stream_read_generic(&state); | 2475 | return unix_stream_read_generic(&state); |
2476 | } | 2476 | } |
2477 | 2477 | ||
2478 | static ssize_t skb_unix_socket_splice(struct sock *sk, | ||
2479 | struct pipe_inode_info *pipe, | ||
2480 | struct splice_pipe_desc *spd) | ||
2481 | { | ||
2482 | int ret; | ||
2483 | struct unix_sock *u = unix_sk(sk); | ||
2484 | |||
2485 | mutex_unlock(&u->iolock); | ||
2486 | ret = splice_to_pipe(pipe, spd); | ||
2487 | mutex_lock(&u->iolock); | ||
2488 | |||
2489 | return ret; | ||
2490 | } | ||
2491 | |||
2492 | static int unix_stream_splice_actor(struct sk_buff *skb, | 2478 | static int unix_stream_splice_actor(struct sk_buff *skb, |
2493 | int skip, int chunk, | 2479 | int skip, int chunk, |
2494 | struct unix_stream_read_state *state) | 2480 | struct unix_stream_read_state *state) |
2495 | { | 2481 | { |
2496 | return skb_splice_bits(skb, state->socket->sk, | 2482 | return skb_splice_bits(skb, state->socket->sk, |
2497 | UNIXCB(skb).consumed + skip, | 2483 | UNIXCB(skb).consumed + skip, |
2498 | state->pipe, chunk, state->splice_flags, | 2484 | state->pipe, chunk, state->splice_flags); |
2499 | skb_unix_socket_splice); | ||
2500 | } | 2485 | } |
2501 | 2486 | ||
2502 | static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, | 2487 | static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, |