diff options
author | Jens Axboe <axboe@suse.de> | 2006-04-11 07:52:07 -0400 |
---|---|---|
committer | Jens Axboe <axboe@suse.de> | 2006-04-11 07:52:07 -0400 |
commit | b92ce55893745e011edae70830b8bc863be881f9 (patch) | |
tree | e2afd62d2e63d74157905140f5907d07bdfe31b9 /fs | |
parent | 529565dcb1581c9a1e3f6df1c1763ca3e0f0d512 (diff) |
[PATCH] splice: add direct fd <-> fd splicing support
It's more efficient for sendfile() emulation. Basically we cache an
internal private pipe and just use that as the intermediate area for
pages. Direct splicing is not available from sys_splice(), it is only
meant to be used for sendfile() emulation.
Additional patch from Ingo Molnar to avoid the PIPE_BUFFERS loop at
exit for the normal fast path.
Signed-off-by: Jens Axboe <axboe@suse.de>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/pipe.c | 10 | ||||
-rw-r--r-- | fs/splice.c | 148 |
2 files changed, 137 insertions, 21 deletions
@@ -691,12 +691,10 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode) | |||
691 | return info; | 691 | return info; |
692 | } | 692 | } |
693 | 693 | ||
694 | void free_pipe_info(struct inode *inode) | 694 | void __free_pipe_info(struct pipe_inode_info *info) |
695 | { | 695 | { |
696 | int i; | 696 | int i; |
697 | struct pipe_inode_info *info = inode->i_pipe; | ||
698 | 697 | ||
699 | inode->i_pipe = NULL; | ||
700 | for (i = 0; i < PIPE_BUFFERS; i++) { | 698 | for (i = 0; i < PIPE_BUFFERS; i++) { |
701 | struct pipe_buffer *buf = info->bufs + i; | 699 | struct pipe_buffer *buf = info->bufs + i; |
702 | if (buf->ops) | 700 | if (buf->ops) |
@@ -707,6 +705,12 @@ void free_pipe_info(struct inode *inode) | |||
707 | kfree(info); | 705 | kfree(info); |
708 | } | 706 | } |
709 | 707 | ||
708 | void free_pipe_info(struct inode *inode) | ||
709 | { | ||
710 | __free_pipe_info(inode->i_pipe); | ||
711 | inode->i_pipe = NULL; | ||
712 | } | ||
713 | |||
710 | static struct vfsmount *pipe_mnt __read_mostly; | 714 | static struct vfsmount *pipe_mnt __read_mostly; |
711 | static int pipefs_delete_dentry(struct dentry *dentry) | 715 | static int pipefs_delete_dentry(struct dentry *dentry) |
712 | { | 716 | { |
diff --git a/fs/splice.c b/fs/splice.c index a5326127aad5..c47b561edac0 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -680,8 +680,7 @@ EXPORT_SYMBOL(generic_splice_sendpage); | |||
680 | * Attempt to initiate a splice from pipe to file. | 680 | * Attempt to initiate a splice from pipe to file. |
681 | */ | 681 | */ |
682 | static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, | 682 | static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, |
683 | loff_t __user *off_out, size_t len, | 683 | size_t len, unsigned int flags) |
684 | unsigned int flags) | ||
685 | { | 684 | { |
686 | loff_t pos; | 685 | loff_t pos; |
687 | int ret; | 686 | int ret; |
@@ -692,9 +691,6 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, | |||
692 | if (!(out->f_mode & FMODE_WRITE)) | 691 | if (!(out->f_mode & FMODE_WRITE)) |
693 | return -EBADF; | 692 | return -EBADF; |
694 | 693 | ||
695 | if (off_out && copy_from_user(&out->f_pos, off_out, sizeof(loff_t))) | ||
696 | return -EFAULT; | ||
697 | |||
698 | pos = out->f_pos; | 694 | pos = out->f_pos; |
699 | 695 | ||
700 | ret = rw_verify_area(WRITE, out, &pos, len); | 696 | ret = rw_verify_area(WRITE, out, &pos, len); |
@@ -707,9 +703,8 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, | |||
707 | /* | 703 | /* |
708 | * Attempt to initiate a splice from a file to a pipe. | 704 | * Attempt to initiate a splice from a file to a pipe. |
709 | */ | 705 | */ |
710 | static long do_splice_to(struct file *in, loff_t __user *off_in, | 706 | static long do_splice_to(struct file *in, struct pipe_inode_info *pipe, |
711 | struct pipe_inode_info *pipe, size_t len, | 707 | size_t len, unsigned int flags) |
712 | unsigned int flags) | ||
713 | { | 708 | { |
714 | loff_t pos, isize, left; | 709 | loff_t pos, isize, left; |
715 | int ret; | 710 | int ret; |
@@ -720,9 +715,6 @@ static long do_splice_to(struct file *in, loff_t __user *off_in, | |||
720 | if (!(in->f_mode & FMODE_READ)) | 715 | if (!(in->f_mode & FMODE_READ)) |
721 | return -EBADF; | 716 | return -EBADF; |
722 | 717 | ||
723 | if (off_in && copy_from_user(&in->f_pos, off_in, sizeof(loff_t))) | ||
724 | return -EFAULT; | ||
725 | |||
726 | pos = in->f_pos; | 718 | pos = in->f_pos; |
727 | 719 | ||
728 | ret = rw_verify_area(READ, in, &pos, len); | 720 | ret = rw_verify_area(READ, in, &pos, len); |
@@ -740,6 +732,118 @@ static long do_splice_to(struct file *in, loff_t __user *off_in, | |||
740 | return in->f_op->splice_read(in, pipe, len, flags); | 732 | return in->f_op->splice_read(in, pipe, len, flags); |
741 | } | 733 | } |
742 | 734 | ||
735 | long do_splice_direct(struct file *in, struct file *out, size_t len, | ||
736 | unsigned int flags) | ||
737 | { | ||
738 | struct pipe_inode_info *pipe; | ||
739 | long ret, bytes; | ||
740 | umode_t i_mode; | ||
741 | int i; | ||
742 | |||
743 | /* | ||
744 | * We require the input being a regular file, as we don't want to | ||
745 | * randomly drop data for eg socket -> socket splicing. Use the | ||
746 | * piped splicing for that! | ||
747 | */ | ||
748 | i_mode = in->f_dentry->d_inode->i_mode; | ||
749 | if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode))) | ||
750 | return -EINVAL; | ||
751 | |||
752 | /* | ||
753 | * neither in nor out is a pipe, setup an internal pipe attached to | ||
754 | * 'out' and transfer the wanted data from 'in' to 'out' through that | ||
755 | */ | ||
756 | pipe = current->splice_pipe; | ||
757 | if (!pipe) { | ||
758 | pipe = alloc_pipe_info(NULL); | ||
759 | if (!pipe) | ||
760 | return -ENOMEM; | ||
761 | |||
762 | /* | ||
763 | * We don't have an immediate reader, but we'll read the stuff | ||
764 | * out of the pipe right after the move_to_pipe(). So set | ||
765 | * PIPE_READERS appropriately. | ||
766 | */ | ||
767 | pipe->readers = 1; | ||
768 | |||
769 | current->splice_pipe = pipe; | ||
770 | } | ||
771 | |||
772 | /* | ||
773 | * do the splice | ||
774 | */ | ||
775 | ret = 0; | ||
776 | bytes = 0; | ||
777 | |||
778 | while (len) { | ||
779 | size_t read_len, max_read_len; | ||
780 | |||
781 | /* | ||
782 | * Do at most PIPE_BUFFERS pages worth of transfer: | ||
783 | */ | ||
784 | max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE)); | ||
785 | |||
786 | ret = do_splice_to(in, pipe, max_read_len, flags); | ||
787 | if (unlikely(ret < 0)) | ||
788 | goto out_release; | ||
789 | |||
790 | read_len = ret; | ||
791 | |||
792 | /* | ||
793 | * NOTE: nonblocking mode only applies to the input. We | ||
794 | * must not do the output in nonblocking mode as then we | ||
795 | * could get stuck data in the internal pipe: | ||
796 | */ | ||
797 | ret = do_splice_from(pipe, out, read_len, | ||
798 | flags & ~SPLICE_F_NONBLOCK); | ||
799 | if (unlikely(ret < 0)) | ||
800 | goto out_release; | ||
801 | |||
802 | bytes += ret; | ||
803 | len -= ret; | ||
804 | |||
805 | /* | ||
806 | * In nonblocking mode, if we got back a short read then | ||
807 | * that was due to either an IO error or due to the | ||
808 | * pagecache entry not being there. In the IO error case | ||
809 | * the _next_ splice attempt will produce a clean IO error | ||
810 | * return value (not a short read), so in both cases it's | ||
811 | * correct to break out of the loop here: | ||
812 | */ | ||
813 | if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len)) | ||
814 | break; | ||
815 | } | ||
816 | |||
817 | pipe->nrbufs = pipe->curbuf = 0; | ||
818 | |||
819 | return bytes; | ||
820 | |||
821 | out_release: | ||
822 | /* | ||
823 | * If we did an incomplete transfer we must release | ||
824 | * the pipe buffers in question: | ||
825 | */ | ||
826 | for (i = 0; i < PIPE_BUFFERS; i++) { | ||
827 | struct pipe_buffer *buf = pipe->bufs + i; | ||
828 | |||
829 | if (buf->ops) { | ||
830 | buf->ops->release(pipe, buf); | ||
831 | buf->ops = NULL; | ||
832 | } | ||
833 | } | ||
834 | pipe->nrbufs = pipe->curbuf = 0; | ||
835 | |||
836 | /* | ||
837 | * If we transferred some data, return the number of bytes: | ||
838 | */ | ||
839 | if (bytes > 0) | ||
840 | return bytes; | ||
841 | |||
842 | return ret; | ||
843 | } | ||
844 | |||
845 | EXPORT_SYMBOL(do_splice_direct); | ||
846 | |||
743 | /* | 847 | /* |
744 | * Determine where to splice to/from. | 848 | * Determine where to splice to/from. |
745 | */ | 849 | */ |
@@ -749,25 +853,33 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
749 | { | 853 | { |
750 | struct pipe_inode_info *pipe; | 854 | struct pipe_inode_info *pipe; |
751 | 855 | ||
752 | if (off_out && out->f_op->llseek == no_llseek) | ||
753 | return -EINVAL; | ||
754 | if (off_in && in->f_op->llseek == no_llseek) | ||
755 | return -EINVAL; | ||
756 | |||
757 | pipe = in->f_dentry->d_inode->i_pipe; | 856 | pipe = in->f_dentry->d_inode->i_pipe; |
758 | if (pipe) { | 857 | if (pipe) { |
759 | if (off_in) | 858 | if (off_in) |
760 | return -ESPIPE; | 859 | return -ESPIPE; |
860 | if (off_out) { | ||
861 | if (out->f_op->llseek == no_llseek) | ||
862 | return -EINVAL; | ||
863 | if (copy_from_user(&out->f_pos, off_out, | ||
864 | sizeof(loff_t))) | ||
865 | return -EFAULT; | ||
866 | } | ||
761 | 867 | ||
762 | return do_splice_from(pipe, out, off_out, len, flags); | 868 | return do_splice_from(pipe, out, len, flags); |
763 | } | 869 | } |
764 | 870 | ||
765 | pipe = out->f_dentry->d_inode->i_pipe; | 871 | pipe = out->f_dentry->d_inode->i_pipe; |
766 | if (pipe) { | 872 | if (pipe) { |
767 | if (off_out) | 873 | if (off_out) |
768 | return -ESPIPE; | 874 | return -ESPIPE; |
875 | if (off_in) { | ||
876 | if (in->f_op->llseek == no_llseek) | ||
877 | return -EINVAL; | ||
878 | if (copy_from_user(&in->f_pos, off_in, sizeof(loff_t))) | ||
879 | return -EFAULT; | ||
880 | } | ||
769 | 881 | ||
770 | return do_splice_to(in, off_in, pipe, len, flags); | 882 | return do_splice_to(in, pipe, len, flags); |
771 | } | 883 | } |
772 | 884 | ||
773 | return -EINVAL; | 885 | return -EINVAL; |