aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <jens.axboe@oracle.com>2007-06-14 07:08:55 -0400
committerJens Axboe <jens.axboe@oracle.com>2007-07-10 02:04:12 -0400
commit6a14b90bb6bc7cd83e2a444bf457a2ea645cbfe7 (patch)
treec6f2788cbafd29bdf520c0b2a232818f4d62ec9d
parentc66ab6fa705e1b2887a6d9246b798bdc526839e2 (diff)
vmsplice: add vmsplice-to-user support
A bit of a cheat, it actually just copies the data to userspace. But this makes the interface nice and symmetric and enables people to build on splice, with room for future improvement in performance. Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/splice.c178
-rw-r--r--include/linux/pipe_fs_i.h8
3 files changed, 158 insertions, 30 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 93565c03d315..222f108ee454 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1640,7 +1640,7 @@ static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,
1640 .total_len = len, 1640 .total_len = len,
1641 .flags = flags, 1641 .flags = flags,
1642 .pos = *ppos, 1642 .pos = *ppos,
1643 .file = out, 1643 .u.file = out,
1644 }; 1644 };
1645 1645
1646 ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor); 1646 ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor);
diff --git a/fs/splice.c b/fs/splice.c
index 68f6328236a6..13846f723d72 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -528,7 +528,7 @@ EXPORT_SYMBOL(generic_file_splice_read);
528static int pipe_to_sendpage(struct pipe_inode_info *pipe, 528static int pipe_to_sendpage(struct pipe_inode_info *pipe,
529 struct pipe_buffer *buf, struct splice_desc *sd) 529 struct pipe_buffer *buf, struct splice_desc *sd)
530{ 530{
531 struct file *file = sd->file; 531 struct file *file = sd->u.file;
532 loff_t pos = sd->pos; 532 loff_t pos = sd->pos;
533 int ret, more; 533 int ret, more;
534 534
@@ -566,7 +566,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
566static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 566static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
567 struct splice_desc *sd) 567 struct splice_desc *sd)
568{ 568{
569 struct file *file = sd->file; 569 struct file *file = sd->u.file;
570 struct address_space *mapping = file->f_mapping; 570 struct address_space *mapping = file->f_mapping;
571 unsigned int offset, this_len; 571 unsigned int offset, this_len;
572 struct page *page; 572 struct page *page;
@@ -769,7 +769,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
769 .total_len = len, 769 .total_len = len,
770 .flags = flags, 770 .flags = flags,
771 .pos = *ppos, 771 .pos = *ppos,
772 .file = out, 772 .u.file = out,
773 }; 773 };
774 774
775 /* 775 /*
@@ -807,7 +807,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
807 .total_len = len, 807 .total_len = len,
808 .flags = flags, 808 .flags = flags,
809 .pos = *ppos, 809 .pos = *ppos,
810 .file = out, 810 .u.file = out,
811 }; 811 };
812 ssize_t ret; 812 ssize_t ret;
813 int err; 813 int err;
@@ -1087,7 +1087,7 @@ EXPORT_SYMBOL(splice_direct_to_actor);
1087static int direct_splice_actor(struct pipe_inode_info *pipe, 1087static int direct_splice_actor(struct pipe_inode_info *pipe,
1088 struct splice_desc *sd) 1088 struct splice_desc *sd)
1089{ 1089{
1090 struct file *file = sd->file; 1090 struct file *file = sd->u.file;
1091 1091
1092 return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags); 1092 return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags);
1093} 1093}
@@ -1100,7 +1100,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1100 .total_len = len, 1100 .total_len = len,
1101 .flags = flags, 1101 .flags = flags,
1102 .pos = *ppos, 1102 .pos = *ppos,
1103 .file = out, 1103 .u.file = out,
1104 }; 1104 };
1105 size_t ret; 1105 size_t ret;
1106 1106
@@ -1289,28 +1289,131 @@ static int get_iovec_page_array(const struct iovec __user *iov,
1289 return error; 1289 return error;
1290} 1290}
1291 1291
1292static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
1293 struct splice_desc *sd)
1294{
1295 char *src;
1296 int ret;
1297
1298 ret = buf->ops->pin(pipe, buf);
1299 if (unlikely(ret))
1300 return ret;
1301
1302 /*
1303 * See if we can use the atomic maps, by prefaulting in the
1304 * pages and doing an atomic copy
1305 */
1306 if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) {
1307 src = buf->ops->map(pipe, buf, 1);
1308 ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset,
1309 sd->len);
1310 buf->ops->unmap(pipe, buf, src);
1311 if (!ret) {
1312 ret = sd->len;
1313 goto out;
1314 }
1315 }
1316
1317 /*
1318 * No dice, use slow non-atomic map and copy
1319 */
1320 src = buf->ops->map(pipe, buf, 0);
1321
1322 ret = sd->len;
1323 if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
1324 ret = -EFAULT;
1325
1326out:
1327 if (ret > 0)
1328 sd->u.userptr += ret;
1329 buf->ops->unmap(pipe, buf, src);
1330 return ret;
1331}
1332
1333/*
1334 * For lack of a better implementation, implement vmsplice() to userspace
1335 * as a simple copy of the pipes pages to the user iov.
1336 */
1337static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
1338 unsigned long nr_segs, unsigned int flags)
1339{
1340 struct pipe_inode_info *pipe;
1341 struct splice_desc sd;
1342 ssize_t size;
1343 int error;
1344 long ret;
1345
1346 pipe = pipe_info(file->f_path.dentry->d_inode);
1347 if (!pipe)
1348 return -EBADF;
1349
1350 if (pipe->inode)
1351 mutex_lock(&pipe->inode->i_mutex);
1352
1353 error = ret = 0;
1354 while (nr_segs) {
1355 void __user *base;
1356 size_t len;
1357
1358 /*
1359 * Get user address base and length for this iovec.
1360 */
1361 error = get_user(base, &iov->iov_base);
1362 if (unlikely(error))
1363 break;
1364 error = get_user(len, &iov->iov_len);
1365 if (unlikely(error))
1366 break;
1367
1368 /*
1369 * Sanity check this iovec. 0 read succeeds.
1370 */
1371 if (unlikely(!len))
1372 break;
1373 if (unlikely(!base)) {
1374 error = -EFAULT;
1375 break;
1376 }
1377
1378 sd.len = 0;
1379 sd.total_len = len;
1380 sd.flags = flags;
1381 sd.u.userptr = base;
1382 sd.pos = 0;
1383
1384 size = __splice_from_pipe(pipe, &sd, pipe_to_user);
1385 if (size < 0) {
1386 if (!ret)
1387 ret = size;
1388
1389 break;
1390 }
1391
1392 ret += size;
1393
1394 if (size < len)
1395 break;
1396
1397 nr_segs--;
1398 iov++;
1399 }
1400
1401 if (pipe->inode)
1402 mutex_unlock(&pipe->inode->i_mutex);
1403
1404 if (!ret)
1405 ret = error;
1406
1407 return ret;
1408}
1409
1292/* 1410/*
1293 * vmsplice splices a user address range into a pipe. It can be thought of 1411 * vmsplice splices a user address range into a pipe. It can be thought of
1294 * as splice-from-memory, where the regular splice is splice-from-file (or 1412 * as splice-from-memory, where the regular splice is splice-from-file (or
1295 * to file). In both cases the output is a pipe, naturally. 1413 * to file). In both cases the output is a pipe, naturally.
1296 *
1297 * Note that vmsplice only supports splicing _from_ user memory to a pipe,
1298 * not the other way around. Splicing from user memory is a simple operation
1299 * that can be supported without any funky alignment restrictions or nasty
1300 * vm tricks. We simply map in the user memory and fill them into a pipe.
1301 * The reverse isn't quite as easy, though. There are two possible solutions
1302 * for that:
1303 *
1304 * - memcpy() the data internally, at which point we might as well just
1305 * do a regular read() on the buffer anyway.
1306 * - Lots of nasty vm tricks, that are neither fast nor flexible (it
1307 * has restriction limitations on both ends of the pipe).
1308 *
1309 * Alas, it isn't here.
1310 *
1311 */ 1414 */
1312static long do_vmsplice(struct file *file, const struct iovec __user *iov, 1415static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
1313 unsigned long nr_segs, unsigned int flags) 1416 unsigned long nr_segs, unsigned int flags)
1314{ 1417{
1315 struct pipe_inode_info *pipe; 1418 struct pipe_inode_info *pipe;
1316 struct page *pages[PIPE_BUFFERS]; 1419 struct page *pages[PIPE_BUFFERS];
@@ -1325,10 +1428,6 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
1325 pipe = pipe_info(file->f_path.dentry->d_inode); 1428 pipe = pipe_info(file->f_path.dentry->d_inode);
1326 if (!pipe) 1429 if (!pipe)
1327 return -EBADF; 1430 return -EBADF;
1328 if (unlikely(nr_segs > UIO_MAXIOV))
1329 return -EINVAL;
1330 else if (unlikely(!nr_segs))
1331 return 0;
1332 1431
1333 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, 1432 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
1334 flags & SPLICE_F_GIFT); 1433 flags & SPLICE_F_GIFT);
@@ -1338,6 +1437,22 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
1338 return splice_to_pipe(pipe, &spd); 1437 return splice_to_pipe(pipe, &spd);
1339} 1438}
1340 1439
1440/*
1441 * Note that vmsplice only really supports true splicing _from_ user memory
1442 * to a pipe, not the other way around. Splicing from user memory is a simple
1443 * operation that can be supported without any funky alignment restrictions
1444 * or nasty vm tricks. We simply map in the user memory and fill them into
1445 * a pipe. The reverse isn't quite as easy, though. There are two possible
1446 * solutions for that:
1447 *
1448 * - memcpy() the data internally, at which point we might as well just
1449 * do a regular read() on the buffer anyway.
1450 * - Lots of nasty vm tricks, that are neither fast nor flexible (it
1451 * has restriction limitations on both ends of the pipe).
1452 *
1453 * Currently we punt and implement it as a normal copy, see pipe_to_user().
1454 *
1455 */
1341asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, 1456asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
1342 unsigned long nr_segs, unsigned int flags) 1457 unsigned long nr_segs, unsigned int flags)
1343{ 1458{
@@ -1345,11 +1460,18 @@ asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
1345 long error; 1460 long error;
1346 int fput; 1461 int fput;
1347 1462
1463 if (unlikely(nr_segs > UIO_MAXIOV))
1464 return -EINVAL;
1465 else if (unlikely(!nr_segs))
1466 return 0;
1467
1348 error = -EBADF; 1468 error = -EBADF;
1349 file = fget_light(fd, &fput); 1469 file = fget_light(fd, &fput);
1350 if (file) { 1470 if (file) {
1351 if (file->f_mode & FMODE_WRITE) 1471 if (file->f_mode & FMODE_WRITE)
1352 error = do_vmsplice(file, iov, nr_segs, flags); 1472 error = vmsplice_to_pipe(file, iov, nr_segs, flags);
1473 else if (file->f_mode & FMODE_READ)
1474 error = vmsplice_to_user(file, iov, nr_segs, flags);
1353 1475
1354 fput_light(file, fput); 1476 fput_light(file, fput);
1355 } 1477 }
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 883ba9b78d3f..6e7bfc125425 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -88,7 +88,13 @@ int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
88struct splice_desc { 88struct splice_desc {
89 unsigned int len, total_len; /* current and remaining length */ 89 unsigned int len, total_len; /* current and remaining length */
90 unsigned int flags; /* splice flags */ 90 unsigned int flags; /* splice flags */
91 struct file *file; /* file to read/write */ 91 /*
92 * actor() private data
93 */
94 union {
95 void __user *userptr; /* memory to write to */
96 struct file *file; /* file to read/write */
97 } u;
92 loff_t pos; /* file position */ 98 loff_t pos; /* file position */
93}; 99};
94 100