aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@suse.de>2006-04-11 07:52:07 -0400
committerJens Axboe <axboe@suse.de>2006-04-11 07:52:07 -0400
commitb92ce55893745e011edae70830b8bc863be881f9 (patch)
treee2afd62d2e63d74157905140f5907d07bdfe31b9
parent529565dcb1581c9a1e3f6df1c1763ca3e0f0d512 (diff)
[PATCH] splice: add direct fd <-> fd splicing support
It's more efficient for sendfile() emulation. Basically we cache an internal private pipe and just use that as the intermediate area for pages. Direct splicing is not available from sys_splice(), it is only meant to be used for sendfile() emulation. Additional patch from Ingo Molnar to avoid the PIPE_BUFFERS loop at exit for the normal fast path. Signed-off-by: Jens Axboe <axboe@suse.de>
-rw-r--r--fs/pipe.c10
-rw-r--r--fs/splice.c148
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/pipe_fs_i.h1
-rw-r--r--include/linux/sched.h6
-rw-r--r--kernel/exit.c4
6 files changed, 150 insertions, 21 deletions
diff --git a/fs/pipe.c b/fs/pipe.c
index 705b48692627..036536f072c9 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -691,12 +691,10 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
691 return info; 691 return info;
692} 692}
693 693
694void free_pipe_info(struct inode *inode) 694void __free_pipe_info(struct pipe_inode_info *info)
695{ 695{
696 int i; 696 int i;
697 struct pipe_inode_info *info = inode->i_pipe;
698 697
699 inode->i_pipe = NULL;
700 for (i = 0; i < PIPE_BUFFERS; i++) { 698 for (i = 0; i < PIPE_BUFFERS; i++) {
701 struct pipe_buffer *buf = info->bufs + i; 699 struct pipe_buffer *buf = info->bufs + i;
702 if (buf->ops) 700 if (buf->ops)
@@ -707,6 +705,12 @@ void free_pipe_info(struct inode *inode)
707 kfree(info); 705 kfree(info);
708} 706}
709 707
708void free_pipe_info(struct inode *inode)
709{
710 __free_pipe_info(inode->i_pipe);
711 inode->i_pipe = NULL;
712}
713
710static struct vfsmount *pipe_mnt __read_mostly; 714static struct vfsmount *pipe_mnt __read_mostly;
711static int pipefs_delete_dentry(struct dentry *dentry) 715static int pipefs_delete_dentry(struct dentry *dentry)
712{ 716{
diff --git a/fs/splice.c b/fs/splice.c
index a5326127aad5..c47b561edac0 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -680,8 +680,7 @@ EXPORT_SYMBOL(generic_splice_sendpage);
680 * Attempt to initiate a splice from pipe to file. 680 * Attempt to initiate a splice from pipe to file.
681 */ 681 */
682static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, 682static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
683 loff_t __user *off_out, size_t len, 683 size_t len, unsigned int flags)
684 unsigned int flags)
685{ 684{
686 loff_t pos; 685 loff_t pos;
687 int ret; 686 int ret;
@@ -692,9 +691,6 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
692 if (!(out->f_mode & FMODE_WRITE)) 691 if (!(out->f_mode & FMODE_WRITE))
693 return -EBADF; 692 return -EBADF;
694 693
695 if (off_out && copy_from_user(&out->f_pos, off_out, sizeof(loff_t)))
696 return -EFAULT;
697
698 pos = out->f_pos; 694 pos = out->f_pos;
699 695
700 ret = rw_verify_area(WRITE, out, &pos, len); 696 ret = rw_verify_area(WRITE, out, &pos, len);
@@ -707,9 +703,8 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
707/* 703/*
708 * Attempt to initiate a splice from a file to a pipe. 704 * Attempt to initiate a splice from a file to a pipe.
709 */ 705 */
710static long do_splice_to(struct file *in, loff_t __user *off_in, 706static long do_splice_to(struct file *in, struct pipe_inode_info *pipe,
711 struct pipe_inode_info *pipe, size_t len, 707 size_t len, unsigned int flags)
712 unsigned int flags)
713{ 708{
714 loff_t pos, isize, left; 709 loff_t pos, isize, left;
715 int ret; 710 int ret;
@@ -720,9 +715,6 @@ static long do_splice_to(struct file *in, loff_t __user *off_in,
720 if (!(in->f_mode & FMODE_READ)) 715 if (!(in->f_mode & FMODE_READ))
721 return -EBADF; 716 return -EBADF;
722 717
723 if (off_in && copy_from_user(&in->f_pos, off_in, sizeof(loff_t)))
724 return -EFAULT;
725
726 pos = in->f_pos; 718 pos = in->f_pos;
727 719
728 ret = rw_verify_area(READ, in, &pos, len); 720 ret = rw_verify_area(READ, in, &pos, len);
@@ -740,6 +732,118 @@ static long do_splice_to(struct file *in, loff_t __user *off_in,
740 return in->f_op->splice_read(in, pipe, len, flags); 732 return in->f_op->splice_read(in, pipe, len, flags);
741} 733}
742 734
735long do_splice_direct(struct file *in, struct file *out, size_t len,
736 unsigned int flags)
737{
738 struct pipe_inode_info *pipe;
739 long ret, bytes;
740 umode_t i_mode;
741 int i;
742
743 /*
744 * We require the input being a regular file, as we don't want to
745 * randomly drop data for eg socket -> socket splicing. Use the
746 * piped splicing for that!
747 */
748 i_mode = in->f_dentry->d_inode->i_mode;
749 if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode)))
750 return -EINVAL;
751
752 /*
753 * neither in nor out is a pipe, setup an internal pipe attached to
754 * 'out' and transfer the wanted data from 'in' to 'out' through that
755 */
756 pipe = current->splice_pipe;
757 if (!pipe) {
758 pipe = alloc_pipe_info(NULL);
759 if (!pipe)
760 return -ENOMEM;
761
762 /*
763 * We don't have an immediate reader, but we'll read the stuff
764 * out of the pipe right after the move_to_pipe(). So set
765 * PIPE_READERS appropriately.
766 */
767 pipe->readers = 1;
768
769 current->splice_pipe = pipe;
770 }
771
772 /*
773 * do the splice
774 */
775 ret = 0;
776 bytes = 0;
777
778 while (len) {
779 size_t read_len, max_read_len;
780
781 /*
782 * Do at most PIPE_BUFFERS pages worth of transfer:
783 */
784 max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
785
786 ret = do_splice_to(in, pipe, max_read_len, flags);
787 if (unlikely(ret < 0))
788 goto out_release;
789
790 read_len = ret;
791
792 /*
793 * NOTE: nonblocking mode only applies to the input. We
794 * must not do the output in nonblocking mode as then we
795 * could get stuck data in the internal pipe:
796 */
797 ret = do_splice_from(pipe, out, read_len,
798 flags & ~SPLICE_F_NONBLOCK);
799 if (unlikely(ret < 0))
800 goto out_release;
801
802 bytes += ret;
803 len -= ret;
804
805 /*
806 * In nonblocking mode, if we got back a short read then
807 * that was due to either an IO error or due to the
808 * pagecache entry not being there. In the IO error case
809 * the _next_ splice attempt will produce a clean IO error
810 * return value (not a short read), so in both cases it's
811 * correct to break out of the loop here:
812 */
813 if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len))
814 break;
815 }
816
817 pipe->nrbufs = pipe->curbuf = 0;
818
819 return bytes;
820
821out_release:
822 /*
823 * If we did an incomplete transfer we must release
824 * the pipe buffers in question:
825 */
826 for (i = 0; i < PIPE_BUFFERS; i++) {
827 struct pipe_buffer *buf = pipe->bufs + i;
828
829 if (buf->ops) {
830 buf->ops->release(pipe, buf);
831 buf->ops = NULL;
832 }
833 }
834 pipe->nrbufs = pipe->curbuf = 0;
835
836 /*
837 * If we transferred some data, return the number of bytes:
838 */
839 if (bytes > 0)
840 return bytes;
841
842 return ret;
843}
844
845EXPORT_SYMBOL(do_splice_direct);
846
743/* 847/*
744 * Determine where to splice to/from. 848 * Determine where to splice to/from.
745 */ 849 */
@@ -749,25 +853,33 @@ static long do_splice(struct file *in, loff_t __user *off_in,
749{ 853{
750 struct pipe_inode_info *pipe; 854 struct pipe_inode_info *pipe;
751 855
752 if (off_out && out->f_op->llseek == no_llseek)
753 return -EINVAL;
754 if (off_in && in->f_op->llseek == no_llseek)
755 return -EINVAL;
756
757 pipe = in->f_dentry->d_inode->i_pipe; 856 pipe = in->f_dentry->d_inode->i_pipe;
758 if (pipe) { 857 if (pipe) {
759 if (off_in) 858 if (off_in)
760 return -ESPIPE; 859 return -ESPIPE;
860 if (off_out) {
861 if (out->f_op->llseek == no_llseek)
862 return -EINVAL;
863 if (copy_from_user(&out->f_pos, off_out,
864 sizeof(loff_t)))
865 return -EFAULT;
866 }
761 867
762 return do_splice_from(pipe, out, off_out, len, flags); 868 return do_splice_from(pipe, out, len, flags);
763 } 869 }
764 870
765 pipe = out->f_dentry->d_inode->i_pipe; 871 pipe = out->f_dentry->d_inode->i_pipe;
766 if (pipe) { 872 if (pipe) {
767 if (off_out) 873 if (off_out)
768 return -ESPIPE; 874 return -ESPIPE;
875 if (off_in) {
876 if (in->f_op->llseek == no_llseek)
877 return -EINVAL;
878 if (copy_from_user(&in->f_pos, off_in, sizeof(loff_t)))
879 return -EFAULT;
880 }
769 881
770 return do_splice_to(in, off_in, pipe, len, flags); 882 return do_splice_to(in, pipe, len, flags);
771 } 883 }
772 884
773 return -EINVAL; 885 return -EINVAL;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7e6454454fbd..9e8e2ee353b4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1613,6 +1613,8 @@ extern void do_generic_mapping_read(struct address_space *mapping,
1613 loff_t *, read_descriptor_t *, read_actor_t); 1613 loff_t *, read_descriptor_t *, read_actor_t);
1614extern ssize_t generic_file_splice_read(struct file *, struct pipe_inode_info *, size_t, unsigned int); 1614extern ssize_t generic_file_splice_read(struct file *, struct pipe_inode_info *, size_t, unsigned int);
1615extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, size_t, unsigned int); 1615extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, size_t, unsigned int);
1616extern long do_splice_direct(struct file *in, struct file *out,
1617 size_t len, unsigned int flags);
1616extern void 1618extern void
1617file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); 1619file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
1618extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, 1620extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 9cf99cb34c15..660e9d866e5d 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -58,6 +58,7 @@ void pipe_wait(struct pipe_inode_info *pipe);
58 58
59struct pipe_inode_info * alloc_pipe_info(struct inode * inode); 59struct pipe_inode_info * alloc_pipe_info(struct inode * inode);
60void free_pipe_info(struct inode * inode); 60void free_pipe_info(struct inode * inode);
61void __free_pipe_info(struct pipe_inode_info *);
61 62
62/* 63/*
63 * splice is tied to pipes as a transport (at least for now), so we'll just 64 * splice is tied to pipes as a transport (at least for now), so we'll just
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 541f4828f5e7..e194ec75833d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -684,6 +684,7 @@ static inline void prefetch_stack(struct task_struct *t) { }
684 684
685struct audit_context; /* See audit.c */ 685struct audit_context; /* See audit.c */
686struct mempolicy; 686struct mempolicy;
687struct pipe_inode_info;
687 688
688enum sleep_type { 689enum sleep_type {
689 SLEEP_NORMAL, 690 SLEEP_NORMAL,
@@ -882,6 +883,11 @@ struct task_struct {
882 883
883 atomic_t fs_excl; /* holding fs exclusive resources */ 884 atomic_t fs_excl; /* holding fs exclusive resources */
884 struct rcu_head rcu; 885 struct rcu_head rcu;
886
887 /*
888 * cache last used pipe for splice
889 */
890 struct pipe_inode_info *splice_pipe;
885}; 891};
886 892
887static inline pid_t process_group(struct task_struct *tsk) 893static inline pid_t process_group(struct task_struct *tsk)
diff --git a/kernel/exit.c b/kernel/exit.c
index 6c2eeb8f6390..1a9787ac6173 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -34,6 +34,7 @@
34#include <linux/mutex.h> 34#include <linux/mutex.h>
35#include <linux/futex.h> 35#include <linux/futex.h>
36#include <linux/compat.h> 36#include <linux/compat.h>
37#include <linux/pipe_fs_i.h>
37 38
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
39#include <asm/unistd.h> 40#include <asm/unistd.h>
@@ -941,6 +942,9 @@ fastcall NORET_TYPE void do_exit(long code)
941 if (tsk->io_context) 942 if (tsk->io_context)
942 exit_io_context(); 943 exit_io_context();
943 944
945 if (tsk->splice_pipe)
946 __free_pipe_info(tsk->splice_pipe);
947
944 /* PF_DEAD causes final put_task_struct after we schedule. */ 948 /* PF_DEAD causes final put_task_struct after we schedule. */
945 preempt_disable(); 949 preempt_disable();
946 BUG_ON(tsk->flags & PF_DEAD); 950 BUG_ON(tsk->flags & PF_DEAD);