diff options
Diffstat (limited to 'fs/splice.c')
-rw-r--r-- | fs/splice.c | 338 |
1 files changed, 318 insertions, 20 deletions
diff --git a/fs/splice.c b/fs/splice.c index 666953d59a3..73766d24f97 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -507,9 +507,131 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, | |||
507 | 507 | ||
508 | return ret; | 508 | return ret; |
509 | } | 509 | } |
510 | |||
511 | EXPORT_SYMBOL(generic_file_splice_read); | 510 | EXPORT_SYMBOL(generic_file_splice_read); |
512 | 511 | ||
512 | static const struct pipe_buf_operations default_pipe_buf_ops = { | ||
513 | .can_merge = 0, | ||
514 | .map = generic_pipe_buf_map, | ||
515 | .unmap = generic_pipe_buf_unmap, | ||
516 | .confirm = generic_pipe_buf_confirm, | ||
517 | .release = generic_pipe_buf_release, | ||
518 | .steal = generic_pipe_buf_steal, | ||
519 | .get = generic_pipe_buf_get, | ||
520 | }; | ||
521 | |||
522 | static ssize_t kernel_readv(struct file *file, const struct iovec *vec, | ||
523 | unsigned long vlen, loff_t offset) | ||
524 | { | ||
525 | mm_segment_t old_fs; | ||
526 | loff_t pos = offset; | ||
527 | ssize_t res; | ||
528 | |||
529 | old_fs = get_fs(); | ||
530 | set_fs(get_ds()); | ||
531 | /* The cast to a user pointer is valid due to the set_fs() */ | ||
532 | res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos); | ||
533 | set_fs(old_fs); | ||
534 | |||
535 | return res; | ||
536 | } | ||
537 | |||
538 | static ssize_t kernel_write(struct file *file, const char *buf, size_t count, | ||
539 | loff_t pos) | ||
540 | { | ||
541 | mm_segment_t old_fs; | ||
542 | ssize_t res; | ||
543 | |||
544 | old_fs = get_fs(); | ||
545 | set_fs(get_ds()); | ||
546 | /* The cast to a user pointer is valid due to the set_fs() */ | ||
547 | res = vfs_write(file, (const char __user *)buf, count, &pos); | ||
548 | set_fs(old_fs); | ||
549 | |||
550 | return res; | ||
551 | } | ||
552 | |||
553 | ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | ||
554 | struct pipe_inode_info *pipe, size_t len, | ||
555 | unsigned int flags) | ||
556 | { | ||
557 | unsigned int nr_pages; | ||
558 | unsigned int nr_freed; | ||
559 | size_t offset; | ||
560 | struct page *pages[PIPE_BUFFERS]; | ||
561 | struct partial_page partial[PIPE_BUFFERS]; | ||
562 | struct iovec vec[PIPE_BUFFERS]; | ||
563 | pgoff_t index; | ||
564 | ssize_t res; | ||
565 | size_t this_len; | ||
566 | int error; | ||
567 | int i; | ||
568 | struct splice_pipe_desc spd = { | ||
569 | .pages = pages, | ||
570 | .partial = partial, | ||
571 | .flags = flags, | ||
572 | .ops = &default_pipe_buf_ops, | ||
573 | .spd_release = spd_release_page, | ||
574 | }; | ||
575 | |||
576 | index = *ppos >> PAGE_CACHE_SHIFT; | ||
577 | offset = *ppos & ~PAGE_CACHE_MASK; | ||
578 | nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
579 | |||
580 | for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) { | ||
581 | struct page *page; | ||
582 | |||
583 | page = alloc_page(GFP_USER); | ||
584 | error = -ENOMEM; | ||
585 | if (!page) | ||
586 | goto err; | ||
587 | |||
588 | this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset); | ||
589 | vec[i].iov_base = (void __user *) page_address(page); | ||
590 | vec[i].iov_len = this_len; | ||
591 | pages[i] = page; | ||
592 | spd.nr_pages++; | ||
593 | len -= this_len; | ||
594 | offset = 0; | ||
595 | } | ||
596 | |||
597 | res = kernel_readv(in, vec, spd.nr_pages, *ppos); | ||
598 | if (res < 0) { | ||
599 | error = res; | ||
600 | goto err; | ||
601 | } | ||
602 | |||
603 | error = 0; | ||
604 | if (!res) | ||
605 | goto err; | ||
606 | |||
607 | nr_freed = 0; | ||
608 | for (i = 0; i < spd.nr_pages; i++) { | ||
609 | this_len = min_t(size_t, vec[i].iov_len, res); | ||
610 | partial[i].offset = 0; | ||
611 | partial[i].len = this_len; | ||
612 | if (!this_len) { | ||
613 | __free_page(pages[i]); | ||
614 | pages[i] = NULL; | ||
615 | nr_freed++; | ||
616 | } | ||
617 | res -= this_len; | ||
618 | } | ||
619 | spd.nr_pages -= nr_freed; | ||
620 | |||
621 | res = splice_to_pipe(pipe, &spd); | ||
622 | if (res > 0) | ||
623 | *ppos += res; | ||
624 | |||
625 | return res; | ||
626 | |||
627 | err: | ||
628 | for (i = 0; i < spd.nr_pages; i++) | ||
629 | __free_page(pages[i]); | ||
630 | |||
631 | return error; | ||
632 | } | ||
633 | EXPORT_SYMBOL(default_file_splice_read); | ||
634 | |||
513 | /* | 635 | /* |
514 | * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' | 636 | * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' |
515 | * using sendpage(). Return the number of bytes sent. | 637 | * using sendpage(). Return the number of bytes sent. |
@@ -881,6 +1003,36 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
881 | 1003 | ||
882 | EXPORT_SYMBOL(generic_file_splice_write); | 1004 | EXPORT_SYMBOL(generic_file_splice_write); |
883 | 1005 | ||
1006 | static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf, | ||
1007 | struct splice_desc *sd) | ||
1008 | { | ||
1009 | int ret; | ||
1010 | void *data; | ||
1011 | |||
1012 | ret = buf->ops->confirm(pipe, buf); | ||
1013 | if (ret) | ||
1014 | return ret; | ||
1015 | |||
1016 | data = buf->ops->map(pipe, buf, 0); | ||
1017 | ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos); | ||
1018 | buf->ops->unmap(pipe, buf, data); | ||
1019 | |||
1020 | return ret; | ||
1021 | } | ||
1022 | |||
1023 | static ssize_t default_file_splice_write(struct pipe_inode_info *pipe, | ||
1024 | struct file *out, loff_t *ppos, | ||
1025 | size_t len, unsigned int flags) | ||
1026 | { | ||
1027 | ssize_t ret; | ||
1028 | |||
1029 | ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf); | ||
1030 | if (ret > 0) | ||
1031 | *ppos += ret; | ||
1032 | |||
1033 | return ret; | ||
1034 | } | ||
1035 | |||
884 | /** | 1036 | /** |
885 | * generic_splice_sendpage - splice data from a pipe to a socket | 1037 | * generic_splice_sendpage - splice data from a pipe to a socket |
886 | * @pipe: pipe to splice from | 1038 | * @pipe: pipe to splice from |
@@ -908,11 +1060,10 @@ EXPORT_SYMBOL(generic_splice_sendpage); | |||
908 | static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, | 1060 | static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, |
909 | loff_t *ppos, size_t len, unsigned int flags) | 1061 | loff_t *ppos, size_t len, unsigned int flags) |
910 | { | 1062 | { |
1063 | ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, | ||
1064 | loff_t *, size_t, unsigned int); | ||
911 | int ret; | 1065 | int ret; |
912 | 1066 | ||
913 | if (unlikely(!out->f_op || !out->f_op->splice_write)) | ||
914 | return -EINVAL; | ||
915 | |||
916 | if (unlikely(!(out->f_mode & FMODE_WRITE))) | 1067 | if (unlikely(!(out->f_mode & FMODE_WRITE))) |
917 | return -EBADF; | 1068 | return -EBADF; |
918 | 1069 | ||
@@ -923,7 +1074,11 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, | |||
923 | if (unlikely(ret < 0)) | 1074 | if (unlikely(ret < 0)) |
924 | return ret; | 1075 | return ret; |
925 | 1076 | ||
926 | return out->f_op->splice_write(pipe, out, ppos, len, flags); | 1077 | splice_write = out->f_op->splice_write; |
1078 | if (!splice_write) | ||
1079 | splice_write = default_file_splice_write; | ||
1080 | |||
1081 | return splice_write(pipe, out, ppos, len, flags); | ||
927 | } | 1082 | } |
928 | 1083 | ||
929 | /* | 1084 | /* |
@@ -933,11 +1088,10 @@ static long do_splice_to(struct file *in, loff_t *ppos, | |||
933 | struct pipe_inode_info *pipe, size_t len, | 1088 | struct pipe_inode_info *pipe, size_t len, |
934 | unsigned int flags) | 1089 | unsigned int flags) |
935 | { | 1090 | { |
1091 | ssize_t (*splice_read)(struct file *, loff_t *, | ||
1092 | struct pipe_inode_info *, size_t, unsigned int); | ||
936 | int ret; | 1093 | int ret; |
937 | 1094 | ||
938 | if (unlikely(!in->f_op || !in->f_op->splice_read)) | ||
939 | return -EINVAL; | ||
940 | |||
941 | if (unlikely(!(in->f_mode & FMODE_READ))) | 1095 | if (unlikely(!(in->f_mode & FMODE_READ))) |
942 | return -EBADF; | 1096 | return -EBADF; |
943 | 1097 | ||
@@ -945,7 +1099,11 @@ static long do_splice_to(struct file *in, loff_t *ppos, | |||
945 | if (unlikely(ret < 0)) | 1099 | if (unlikely(ret < 0)) |
946 | return ret; | 1100 | return ret; |
947 | 1101 | ||
948 | return in->f_op->splice_read(in, ppos, pipe, len, flags); | 1102 | splice_read = in->f_op->splice_read; |
1103 | if (!splice_read) | ||
1104 | splice_read = default_file_splice_read; | ||
1105 | |||
1106 | return splice_read(in, ppos, pipe, len, flags); | ||
949 | } | 1107 | } |
950 | 1108 | ||
951 | /** | 1109 | /** |
@@ -1112,6 +1270,9 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | |||
1112 | return ret; | 1270 | return ret; |
1113 | } | 1271 | } |
1114 | 1272 | ||
1273 | static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, | ||
1274 | struct pipe_inode_info *opipe, | ||
1275 | size_t len, unsigned int flags); | ||
1115 | /* | 1276 | /* |
1116 | * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same | 1277 | * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same |
1117 | * location, so checking ->i_pipe is not enough to verify that this is a | 1278 | * location, so checking ->i_pipe is not enough to verify that this is a |
@@ -1132,12 +1293,32 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1132 | struct file *out, loff_t __user *off_out, | 1293 | struct file *out, loff_t __user *off_out, |
1133 | size_t len, unsigned int flags) | 1294 | size_t len, unsigned int flags) |
1134 | { | 1295 | { |
1135 | struct pipe_inode_info *pipe; | 1296 | struct pipe_inode_info *ipipe; |
1297 | struct pipe_inode_info *opipe; | ||
1136 | loff_t offset, *off; | 1298 | loff_t offset, *off; |
1137 | long ret; | 1299 | long ret; |
1138 | 1300 | ||
1139 | pipe = pipe_info(in->f_path.dentry->d_inode); | 1301 | ipipe = pipe_info(in->f_path.dentry->d_inode); |
1140 | if (pipe) { | 1302 | opipe = pipe_info(out->f_path.dentry->d_inode); |
1303 | |||
1304 | if (ipipe && opipe) { | ||
1305 | if (off_in || off_out) | ||
1306 | return -ESPIPE; | ||
1307 | |||
1308 | if (!(in->f_mode & FMODE_READ)) | ||
1309 | return -EBADF; | ||
1310 | |||
1311 | if (!(out->f_mode & FMODE_WRITE)) | ||
1312 | return -EBADF; | ||
1313 | |||
1314 | /* Splicing to self would be fun, but... */ | ||
1315 | if (ipipe == opipe) | ||
1316 | return -EINVAL; | ||
1317 | |||
1318 | return splice_pipe_to_pipe(ipipe, opipe, len, flags); | ||
1319 | } | ||
1320 | |||
1321 | if (ipipe) { | ||
1141 | if (off_in) | 1322 | if (off_in) |
1142 | return -ESPIPE; | 1323 | return -ESPIPE; |
1143 | if (off_out) { | 1324 | if (off_out) { |
@@ -1149,7 +1330,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1149 | } else | 1330 | } else |
1150 | off = &out->f_pos; | 1331 | off = &out->f_pos; |
1151 | 1332 | ||
1152 | ret = do_splice_from(pipe, out, off, len, flags); | 1333 | ret = do_splice_from(ipipe, out, off, len, flags); |
1153 | 1334 | ||
1154 | if (off_out && copy_to_user(off_out, off, sizeof(loff_t))) | 1335 | if (off_out && copy_to_user(off_out, off, sizeof(loff_t))) |
1155 | ret = -EFAULT; | 1336 | ret = -EFAULT; |
@@ -1157,8 +1338,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1157 | return ret; | 1338 | return ret; |
1158 | } | 1339 | } |
1159 | 1340 | ||
1160 | pipe = pipe_info(out->f_path.dentry->d_inode); | 1341 | if (opipe) { |
1161 | if (pipe) { | ||
1162 | if (off_out) | 1342 | if (off_out) |
1163 | return -ESPIPE; | 1343 | return -ESPIPE; |
1164 | if (off_in) { | 1344 | if (off_in) { |
@@ -1170,7 +1350,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1170 | } else | 1350 | } else |
1171 | off = &in->f_pos; | 1351 | off = &in->f_pos; |
1172 | 1352 | ||
1173 | ret = do_splice_to(in, off, pipe, len, flags); | 1353 | ret = do_splice_to(in, off, opipe, len, flags); |
1174 | 1354 | ||
1175 | if (off_in && copy_to_user(off_in, off, sizeof(loff_t))) | 1355 | if (off_in && copy_to_user(off_in, off, sizeof(loff_t))) |
1176 | ret = -EFAULT; | 1356 | ret = -EFAULT; |
@@ -1511,7 +1691,7 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, | |||
1511 | * Make sure there's data to read. Wait for input if we can, otherwise | 1691 | * Make sure there's data to read. Wait for input if we can, otherwise |
1512 | * return an appropriate error. | 1692 | * return an appropriate error. |
1513 | */ | 1693 | */ |
1514 | static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | 1694 | static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) |
1515 | { | 1695 | { |
1516 | int ret; | 1696 | int ret; |
1517 | 1697 | ||
@@ -1549,7 +1729,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | |||
1549 | * Make sure there's writeable room. Wait for room if we can, otherwise | 1729 | * Make sure there's writeable room. Wait for room if we can, otherwise |
1550 | * return an appropriate error. | 1730 | * return an appropriate error. |
1551 | */ | 1731 | */ |
1552 | static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | 1732 | static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) |
1553 | { | 1733 | { |
1554 | int ret; | 1734 | int ret; |
1555 | 1735 | ||
@@ -1587,6 +1767,124 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | |||
1587 | } | 1767 | } |
1588 | 1768 | ||
1589 | /* | 1769 | /* |
1770 | * Splice contents of ipipe to opipe. | ||
1771 | */ | ||
1772 | static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, | ||
1773 | struct pipe_inode_info *opipe, | ||
1774 | size_t len, unsigned int flags) | ||
1775 | { | ||
1776 | struct pipe_buffer *ibuf, *obuf; | ||
1777 | int ret = 0, nbuf; | ||
1778 | bool input_wakeup = false; | ||
1779 | |||
1780 | |||
1781 | retry: | ||
1782 | ret = ipipe_prep(ipipe, flags); | ||
1783 | if (ret) | ||
1784 | return ret; | ||
1785 | |||
1786 | ret = opipe_prep(opipe, flags); | ||
1787 | if (ret) | ||
1788 | return ret; | ||
1789 | |||
1790 | /* | ||
1791 | * Potential ABBA deadlock, work around it by ordering lock | ||
1792 | * grabbing by pipe info address. Otherwise two different processes | ||
1793 | * could deadlock (one doing tee from A -> B, the other from B -> A). | ||
1794 | */ | ||
1795 | pipe_double_lock(ipipe, opipe); | ||
1796 | |||
1797 | do { | ||
1798 | if (!opipe->readers) { | ||
1799 | send_sig(SIGPIPE, current, 0); | ||
1800 | if (!ret) | ||
1801 | ret = -EPIPE; | ||
1802 | break; | ||
1803 | } | ||
1804 | |||
1805 | if (!ipipe->nrbufs && !ipipe->writers) | ||
1806 | break; | ||
1807 | |||
1808 | /* | ||
1809 | * Cannot make any progress, because either the input | ||
1810 | * pipe is empty or the output pipe is full. | ||
1811 | */ | ||
1812 | if (!ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) { | ||
1813 | /* Already processed some buffers, break */ | ||
1814 | if (ret) | ||
1815 | break; | ||
1816 | |||
1817 | if (flags & SPLICE_F_NONBLOCK) { | ||
1818 | ret = -EAGAIN; | ||
1819 | break; | ||
1820 | } | ||
1821 | |||
1822 | /* | ||
1823 | * We raced with another reader/writer and haven't | ||
1824 | * managed to process any buffers. A zero return | ||
1825 | * value means EOF, so retry instead. | ||
1826 | */ | ||
1827 | pipe_unlock(ipipe); | ||
1828 | pipe_unlock(opipe); | ||
1829 | goto retry; | ||
1830 | } | ||
1831 | |||
1832 | ibuf = ipipe->bufs + ipipe->curbuf; | ||
1833 | nbuf = (opipe->curbuf + opipe->nrbufs) % PIPE_BUFFERS; | ||
1834 | obuf = opipe->bufs + nbuf; | ||
1835 | |||
1836 | if (len >= ibuf->len) { | ||
1837 | /* | ||
1838 | * Simply move the whole buffer from ipipe to opipe | ||
1839 | */ | ||
1840 | *obuf = *ibuf; | ||
1841 | ibuf->ops = NULL; | ||
1842 | opipe->nrbufs++; | ||
1843 | ipipe->curbuf = (ipipe->curbuf + 1) % PIPE_BUFFERS; | ||
1844 | ipipe->nrbufs--; | ||
1845 | input_wakeup = true; | ||
1846 | } else { | ||
1847 | /* | ||
1848 | * Get a reference to this pipe buffer, | ||
1849 | * so we can copy the contents over. | ||
1850 | */ | ||
1851 | ibuf->ops->get(ipipe, ibuf); | ||
1852 | *obuf = *ibuf; | ||
1853 | |||
1854 | /* | ||
1855 | * Don't inherit the gift flag, we need to | ||
1856 | * prevent multiple steals of this page. | ||
1857 | */ | ||
1858 | obuf->flags &= ~PIPE_BUF_FLAG_GIFT; | ||
1859 | |||
1860 | obuf->len = len; | ||
1861 | opipe->nrbufs++; | ||
1862 | ibuf->offset += obuf->len; | ||
1863 | ibuf->len -= obuf->len; | ||
1864 | } | ||
1865 | ret += obuf->len; | ||
1866 | len -= obuf->len; | ||
1867 | } while (len); | ||
1868 | |||
1869 | pipe_unlock(ipipe); | ||
1870 | pipe_unlock(opipe); | ||
1871 | |||
1872 | /* | ||
1873 | * If we put data in the output pipe, wakeup any potential readers. | ||
1874 | */ | ||
1875 | if (ret > 0) { | ||
1876 | smp_mb(); | ||
1877 | if (waitqueue_active(&opipe->wait)) | ||
1878 | wake_up_interruptible(&opipe->wait); | ||
1879 | kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); | ||
1880 | } | ||
1881 | if (input_wakeup) | ||
1882 | wakeup_pipe_writers(ipipe); | ||
1883 | |||
1884 | return ret; | ||
1885 | } | ||
1886 | |||
1887 | /* | ||
1590 | * Link contents of ipipe to opipe. | 1888 | * Link contents of ipipe to opipe. |
1591 | */ | 1889 | */ |
1592 | static int link_pipe(struct pipe_inode_info *ipipe, | 1890 | static int link_pipe(struct pipe_inode_info *ipipe, |
@@ -1690,9 +1988,9 @@ static long do_tee(struct file *in, struct file *out, size_t len, | |||
1690 | * Keep going, unless we encounter an error. The ipipe/opipe | 1988 | * Keep going, unless we encounter an error. The ipipe/opipe |
1691 | * ordering doesn't really matter. | 1989 | * ordering doesn't really matter. |
1692 | */ | 1990 | */ |
1693 | ret = link_ipipe_prep(ipipe, flags); | 1991 | ret = ipipe_prep(ipipe, flags); |
1694 | if (!ret) { | 1992 | if (!ret) { |
1695 | ret = link_opipe_prep(opipe, flags); | 1993 | ret = opipe_prep(opipe, flags); |
1696 | if (!ret) | 1994 | if (!ret) |
1697 | ret = link_pipe(ipipe, opipe, len, flags); | 1995 | ret = link_pipe(ipipe, opipe, len, flags); |
1698 | } | 1996 | } |