diff options
author | Jens Axboe <jens.axboe@oracle.com> | 2007-06-14 07:08:55 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2007-07-10 02:04:12 -0400 |
commit | 6a14b90bb6bc7cd83e2a444bf457a2ea645cbfe7 (patch) | |
tree | c6f2788cbafd29bdf520c0b2a232818f4d62ec9d | |
parent | c66ab6fa705e1b2887a6d9246b798bdc526839e2 (diff) |
vmsplice: add vmsplice-to-user support
A bit of a cheat, it actually just copies the data to userspace. But
this makes the interface nice and symmetric and enables people to build
on splice, with room for future improvement in performance.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r-- | fs/ocfs2/file.c | 2 | ||||
-rw-r--r-- | fs/splice.c | 178 | ||||
-rw-r--r-- | include/linux/pipe_fs_i.h | 8 |
3 files changed, 158 insertions, 30 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 93565c03d315..222f108ee454 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -1640,7 +1640,7 @@ static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe, | |||
1640 | .total_len = len, | 1640 | .total_len = len, |
1641 | .flags = flags, | 1641 | .flags = flags, |
1642 | .pos = *ppos, | 1642 | .pos = *ppos, |
1643 | .file = out, | 1643 | .u.file = out, |
1644 | }; | 1644 | }; |
1645 | 1645 | ||
1646 | ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor); | 1646 | ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor); |
diff --git a/fs/splice.c b/fs/splice.c index 68f6328236a6..13846f723d72 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -528,7 +528,7 @@ EXPORT_SYMBOL(generic_file_splice_read); | |||
528 | static int pipe_to_sendpage(struct pipe_inode_info *pipe, | 528 | static int pipe_to_sendpage(struct pipe_inode_info *pipe, |
529 | struct pipe_buffer *buf, struct splice_desc *sd) | 529 | struct pipe_buffer *buf, struct splice_desc *sd) |
530 | { | 530 | { |
531 | struct file *file = sd->file; | 531 | struct file *file = sd->u.file; |
532 | loff_t pos = sd->pos; | 532 | loff_t pos = sd->pos; |
533 | int ret, more; | 533 | int ret, more; |
534 | 534 | ||
@@ -566,7 +566,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, | |||
566 | static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, | 566 | static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, |
567 | struct splice_desc *sd) | 567 | struct splice_desc *sd) |
568 | { | 568 | { |
569 | struct file *file = sd->file; | 569 | struct file *file = sd->u.file; |
570 | struct address_space *mapping = file->f_mapping; | 570 | struct address_space *mapping = file->f_mapping; |
571 | unsigned int offset, this_len; | 571 | unsigned int offset, this_len; |
572 | struct page *page; | 572 | struct page *page; |
@@ -769,7 +769,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |||
769 | .total_len = len, | 769 | .total_len = len, |
770 | .flags = flags, | 770 | .flags = flags, |
771 | .pos = *ppos, | 771 | .pos = *ppos, |
772 | .file = out, | 772 | .u.file = out, |
773 | }; | 773 | }; |
774 | 774 | ||
775 | /* | 775 | /* |
@@ -807,7 +807,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, | |||
807 | .total_len = len, | 807 | .total_len = len, |
808 | .flags = flags, | 808 | .flags = flags, |
809 | .pos = *ppos, | 809 | .pos = *ppos, |
810 | .file = out, | 810 | .u.file = out, |
811 | }; | 811 | }; |
812 | ssize_t ret; | 812 | ssize_t ret; |
813 | int err; | 813 | int err; |
@@ -1087,7 +1087,7 @@ EXPORT_SYMBOL(splice_direct_to_actor); | |||
1087 | static int direct_splice_actor(struct pipe_inode_info *pipe, | 1087 | static int direct_splice_actor(struct pipe_inode_info *pipe, |
1088 | struct splice_desc *sd) | 1088 | struct splice_desc *sd) |
1089 | { | 1089 | { |
1090 | struct file *file = sd->file; | 1090 | struct file *file = sd->u.file; |
1091 | 1091 | ||
1092 | return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags); | 1092 | return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags); |
1093 | } | 1093 | } |
@@ -1100,7 +1100,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | |||
1100 | .total_len = len, | 1100 | .total_len = len, |
1101 | .flags = flags, | 1101 | .flags = flags, |
1102 | .pos = *ppos, | 1102 | .pos = *ppos, |
1103 | .file = out, | 1103 | .u.file = out, |
1104 | }; | 1104 | }; |
1105 | size_t ret; | 1105 | size_t ret; |
1106 | 1106 | ||
@@ -1289,28 +1289,131 @@ static int get_iovec_page_array(const struct iovec __user *iov, | |||
1289 | return error; | 1289 | return error; |
1290 | } | 1290 | } |
1291 | 1291 | ||
1292 | static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, | ||
1293 | struct splice_desc *sd) | ||
1294 | { | ||
1295 | char *src; | ||
1296 | int ret; | ||
1297 | |||
1298 | ret = buf->ops->pin(pipe, buf); | ||
1299 | if (unlikely(ret)) | ||
1300 | return ret; | ||
1301 | |||
1302 | /* | ||
1303 | * See if we can use the atomic maps, by prefaulting in the | ||
1304 | * pages and doing an atomic copy | ||
1305 | */ | ||
1306 | if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) { | ||
1307 | src = buf->ops->map(pipe, buf, 1); | ||
1308 | ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset, | ||
1309 | sd->len); | ||
1310 | buf->ops->unmap(pipe, buf, src); | ||
1311 | if (!ret) { | ||
1312 | ret = sd->len; | ||
1313 | goto out; | ||
1314 | } | ||
1315 | } | ||
1316 | |||
1317 | /* | ||
1318 | * No dice, use slow non-atomic map and copy | ||
1319 | */ | ||
1320 | src = buf->ops->map(pipe, buf, 0); | ||
1321 | |||
1322 | ret = sd->len; | ||
1323 | if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len)) | ||
1324 | ret = -EFAULT; | ||
1325 | |||
1326 | out: | ||
1327 | if (ret > 0) | ||
1328 | sd->u.userptr += ret; | ||
1329 | buf->ops->unmap(pipe, buf, src); | ||
1330 | return ret; | ||
1331 | } | ||
1332 | |||
1333 | /* | ||
1334 | * For lack of a better implementation, implement vmsplice() to userspace | ||
1335 | * as a simple copy of the pipes pages to the user iov. | ||
1336 | */ | ||
1337 | static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, | ||
1338 | unsigned long nr_segs, unsigned int flags) | ||
1339 | { | ||
1340 | struct pipe_inode_info *pipe; | ||
1341 | struct splice_desc sd; | ||
1342 | ssize_t size; | ||
1343 | int error; | ||
1344 | long ret; | ||
1345 | |||
1346 | pipe = pipe_info(file->f_path.dentry->d_inode); | ||
1347 | if (!pipe) | ||
1348 | return -EBADF; | ||
1349 | |||
1350 | if (pipe->inode) | ||
1351 | mutex_lock(&pipe->inode->i_mutex); | ||
1352 | |||
1353 | error = ret = 0; | ||
1354 | while (nr_segs) { | ||
1355 | void __user *base; | ||
1356 | size_t len; | ||
1357 | |||
1358 | /* | ||
1359 | * Get user address base and length for this iovec. | ||
1360 | */ | ||
1361 | error = get_user(base, &iov->iov_base); | ||
1362 | if (unlikely(error)) | ||
1363 | break; | ||
1364 | error = get_user(len, &iov->iov_len); | ||
1365 | if (unlikely(error)) | ||
1366 | break; | ||
1367 | |||
1368 | /* | ||
1369 | * Sanity check this iovec. 0 read succeeds. | ||
1370 | */ | ||
1371 | if (unlikely(!len)) | ||
1372 | break; | ||
1373 | if (unlikely(!base)) { | ||
1374 | error = -EFAULT; | ||
1375 | break; | ||
1376 | } | ||
1377 | |||
1378 | sd.len = 0; | ||
1379 | sd.total_len = len; | ||
1380 | sd.flags = flags; | ||
1381 | sd.u.userptr = base; | ||
1382 | sd.pos = 0; | ||
1383 | |||
1384 | size = __splice_from_pipe(pipe, &sd, pipe_to_user); | ||
1385 | if (size < 0) { | ||
1386 | if (!ret) | ||
1387 | ret = size; | ||
1388 | |||
1389 | break; | ||
1390 | } | ||
1391 | |||
1392 | ret += size; | ||
1393 | |||
1394 | if (size < len) | ||
1395 | break; | ||
1396 | |||
1397 | nr_segs--; | ||
1398 | iov++; | ||
1399 | } | ||
1400 | |||
1401 | if (pipe->inode) | ||
1402 | mutex_unlock(&pipe->inode->i_mutex); | ||
1403 | |||
1404 | if (!ret) | ||
1405 | ret = error; | ||
1406 | |||
1407 | return ret; | ||
1408 | } | ||
1409 | |||
1292 | /* | 1410 | /* |
1293 | * vmsplice splices a user address range into a pipe. It can be thought of | 1411 | * vmsplice splices a user address range into a pipe. It can be thought of |
1294 | * as splice-from-memory, where the regular splice is splice-from-file (or | 1412 | * as splice-from-memory, where the regular splice is splice-from-file (or |
1295 | * to file). In both cases the output is a pipe, naturally. | 1413 | * to file). In both cases the output is a pipe, naturally. |
1296 | * | ||
1297 | * Note that vmsplice only supports splicing _from_ user memory to a pipe, | ||
1298 | * not the other way around. Splicing from user memory is a simple operation | ||
1299 | * that can be supported without any funky alignment restrictions or nasty | ||
1300 | * vm tricks. We simply map in the user memory and fill them into a pipe. | ||
1301 | * The reverse isn't quite as easy, though. There are two possible solutions | ||
1302 | * for that: | ||
1303 | * | ||
1304 | * - memcpy() the data internally, at which point we might as well just | ||
1305 | * do a regular read() on the buffer anyway. | ||
1306 | * - Lots of nasty vm tricks, that are neither fast nor flexible (it | ||
1307 | * has restriction limitations on both ends of the pipe). | ||
1308 | * | ||
1309 | * Alas, it isn't here. | ||
1310 | * | ||
1311 | */ | 1414 | */ |
1312 | static long do_vmsplice(struct file *file, const struct iovec __user *iov, | 1415 | static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, |
1313 | unsigned long nr_segs, unsigned int flags) | 1416 | unsigned long nr_segs, unsigned int flags) |
1314 | { | 1417 | { |
1315 | struct pipe_inode_info *pipe; | 1418 | struct pipe_inode_info *pipe; |
1316 | struct page *pages[PIPE_BUFFERS]; | 1419 | struct page *pages[PIPE_BUFFERS]; |
@@ -1325,10 +1428,6 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov, | |||
1325 | pipe = pipe_info(file->f_path.dentry->d_inode); | 1428 | pipe = pipe_info(file->f_path.dentry->d_inode); |
1326 | if (!pipe) | 1429 | if (!pipe) |
1327 | return -EBADF; | 1430 | return -EBADF; |
1328 | if (unlikely(nr_segs > UIO_MAXIOV)) | ||
1329 | return -EINVAL; | ||
1330 | else if (unlikely(!nr_segs)) | ||
1331 | return 0; | ||
1332 | 1431 | ||
1333 | spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, | 1432 | spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, |
1334 | flags & SPLICE_F_GIFT); | 1433 | flags & SPLICE_F_GIFT); |
@@ -1338,6 +1437,22 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov, | |||
1338 | return splice_to_pipe(pipe, &spd); | 1437 | return splice_to_pipe(pipe, &spd); |
1339 | } | 1438 | } |
1340 | 1439 | ||
1440 | /* | ||
1441 | * Note that vmsplice only really supports true splicing _from_ user memory | ||
1442 | * to a pipe, not the other way around. Splicing from user memory is a simple | ||
1443 | * operation that can be supported without any funky alignment restrictions | ||
1444 | * or nasty vm tricks. We simply map in the user memory and fill them into | ||
1445 | * a pipe. The reverse isn't quite as easy, though. There are two possible | ||
1446 | * solutions for that: | ||
1447 | * | ||
1448 | * - memcpy() the data internally, at which point we might as well just | ||
1449 | * do a regular read() on the buffer anyway. | ||
1450 | * - Lots of nasty vm tricks, that are neither fast nor flexible (it | ||
1451 | * has restriction limitations on both ends of the pipe). | ||
1452 | * | ||
1453 | * Currently we punt and implement it as a normal copy, see pipe_to_user(). | ||
1454 | * | ||
1455 | */ | ||
1341 | asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, | 1456 | asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, |
1342 | unsigned long nr_segs, unsigned int flags) | 1457 | unsigned long nr_segs, unsigned int flags) |
1343 | { | 1458 | { |
@@ -1345,11 +1460,18 @@ asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, | |||
1345 | long error; | 1460 | long error; |
1346 | int fput; | 1461 | int fput; |
1347 | 1462 | ||
1463 | if (unlikely(nr_segs > UIO_MAXIOV)) | ||
1464 | return -EINVAL; | ||
1465 | else if (unlikely(!nr_segs)) | ||
1466 | return 0; | ||
1467 | |||
1348 | error = -EBADF; | 1468 | error = -EBADF; |
1349 | file = fget_light(fd, &fput); | 1469 | file = fget_light(fd, &fput); |
1350 | if (file) { | 1470 | if (file) { |
1351 | if (file->f_mode & FMODE_WRITE) | 1471 | if (file->f_mode & FMODE_WRITE) |
1352 | error = do_vmsplice(file, iov, nr_segs, flags); | 1472 | error = vmsplice_to_pipe(file, iov, nr_segs, flags); |
1473 | else if (file->f_mode & FMODE_READ) | ||
1474 | error = vmsplice_to_user(file, iov, nr_segs, flags); | ||
1353 | 1475 | ||
1354 | fput_light(file, fput); | 1476 | fput_light(file, fput); |
1355 | } | 1477 | } |
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 883ba9b78d3f..6e7bfc125425 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h | |||
@@ -88,7 +88,13 @@ int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); | |||
88 | struct splice_desc { | 88 | struct splice_desc { |
89 | unsigned int len, total_len; /* current and remaining length */ | 89 | unsigned int len, total_len; /* current and remaining length */ |
90 | unsigned int flags; /* splice flags */ | 90 | unsigned int flags; /* splice flags */ |
91 | struct file *file; /* file to read/write */ | 91 | /* |
92 | * actor() private data | ||
93 | */ | ||
94 | union { | ||
95 | void __user *userptr; /* memory to write to */ | ||
96 | struct file *file; /* file to read/write */ | ||
97 | } u; | ||
92 | loff_t pos; /* file position */ | 98 | loff_t pos; /* file position */ |
93 | }; | 99 | }; |
94 | 100 | ||