aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJens Axboe <axboe@suse.de>2006-04-11 09:51:17 -0400
committerJens Axboe <axboe@suse.de>2006-04-11 09:51:17 -0400
commit70524490ee2ea1bbf6cee6c106597b3ac25a3fc2 (patch)
treec61dd500035bc3e0dea364777de1b7a58b41a75c /fs
parentcbb7e577e732f576b9f399bc2600bdc0626c68dc (diff)
[PATCH] splice: add support for sys_tee()
Basically an in-kernel implementation of tee, which uses splice and the pipe buffers as an intelligent way to pass data around by reference. Where the user space tee consumes the input and produces a stdout and file output, this syscall merely duplicates the data inside a pipe to another pipe. No data is copied, the output just grabs a reference to the input pipe data. Signed-off-by: Jens Axboe <axboe@suse.de>
Diffstat (limited to 'fs')
-rw-r--r--fs/pipe.c7
-rw-r--r--fs/splice.c186
2 files changed, 193 insertions, 0 deletions
diff --git a/fs/pipe.c b/fs/pipe.c
index e984beb93a0e..7fefb10db8d9 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -131,12 +131,19 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
131 return 0; 131 return 0;
132} 132}
133 133
134static void anon_pipe_buf_get(struct pipe_inode_info *info,
135 struct pipe_buffer *buf)
136{
137 page_cache_get(buf->page);
138}
139
134static struct pipe_buf_operations anon_pipe_buf_ops = { 140static struct pipe_buf_operations anon_pipe_buf_ops = {
135 .can_merge = 1, 141 .can_merge = 1,
136 .map = anon_pipe_buf_map, 142 .map = anon_pipe_buf_map,
137 .unmap = anon_pipe_buf_unmap, 143 .unmap = anon_pipe_buf_unmap,
138 .release = anon_pipe_buf_release, 144 .release = anon_pipe_buf_release,
139 .steal = anon_pipe_buf_steal, 145 .steal = anon_pipe_buf_steal,
146 .get = anon_pipe_buf_get,
140}; 147};
141 148
142static ssize_t 149static ssize_t
diff --git a/fs/splice.c b/fs/splice.c
index 5d3eda64703b..8d57e89924a6 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -125,12 +125,19 @@ static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
125 kunmap(buf->page); 125 kunmap(buf->page);
126} 126}
127 127
128static void page_cache_pipe_buf_get(struct pipe_inode_info *info,
129 struct pipe_buffer *buf)
130{
131 page_cache_get(buf->page);
132}
133
128static struct pipe_buf_operations page_cache_pipe_buf_ops = { 134static struct pipe_buf_operations page_cache_pipe_buf_ops = {
129 .can_merge = 0, 135 .can_merge = 0,
130 .map = page_cache_pipe_buf_map, 136 .map = page_cache_pipe_buf_map,
131 .unmap = page_cache_pipe_buf_unmap, 137 .unmap = page_cache_pipe_buf_unmap,
132 .release = page_cache_pipe_buf_release, 138 .release = page_cache_pipe_buf_release,
133 .steal = page_cache_pipe_buf_steal, 139 .steal = page_cache_pipe_buf_steal,
140 .get = page_cache_pipe_buf_get,
134}; 141};
135 142
136/* 143/*
@@ -963,3 +970,182 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
963 970
964 return error; 971 return error;
965} 972}
973
974/*
975 * Link contents of ipipe to opipe.
976 */
977static int link_pipe(struct pipe_inode_info *ipipe,
978 struct pipe_inode_info *opipe,
979 size_t len, unsigned int flags)
980{
981 struct pipe_buffer *ibuf, *obuf;
982 int ret = 0, do_wakeup = 0, i;
983
984 /*
985 * Potential ABBA deadlock, work around it by ordering lock
986 * grabbing by inode address. Otherwise two different processes
987 * could deadlock (one doing tee from A -> B, the other from B -> A).
988 */
989 if (ipipe->inode < opipe->inode) {
990 mutex_lock(&ipipe->inode->i_mutex);
991 mutex_lock(&opipe->inode->i_mutex);
992 } else {
993 mutex_lock(&opipe->inode->i_mutex);
994 mutex_lock(&ipipe->inode->i_mutex);
995 }
996
997 for (i = 0;; i++) {
998 if (!opipe->readers) {
999 send_sig(SIGPIPE, current, 0);
1000 if (!ret)
1001 ret = -EPIPE;
1002 break;
1003 }
1004 if (ipipe->nrbufs - i) {
1005 ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
1006
1007 /*
1008 * If we have room, fill this buffer
1009 */
1010 if (opipe->nrbufs < PIPE_BUFFERS) {
1011 int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
1012
1013 /*
1014 * Get a reference to this pipe buffer,
1015 * so we can copy the contents over.
1016 */
1017 ibuf->ops->get(ipipe, ibuf);
1018
1019 obuf = opipe->bufs + nbuf;
1020 *obuf = *ibuf;
1021
1022 if (obuf->len > len)
1023 obuf->len = len;
1024
1025 opipe->nrbufs++;
1026 do_wakeup = 1;
1027 ret += obuf->len;
1028 len -= obuf->len;
1029
1030 if (!len)
1031 break;
1032 if (opipe->nrbufs < PIPE_BUFFERS)
1033 continue;
1034 }
1035
1036 /*
1037 * We have input available, but no output room.
1038 * If we already copied data, return that.
1039 */
1040 if (flags & SPLICE_F_NONBLOCK) {
1041 if (!ret)
1042 ret = -EAGAIN;
1043 break;
1044 }
1045 if (signal_pending(current)) {
1046 if (!ret)
1047 ret = -ERESTARTSYS;
1048 break;
1049 }
1050 if (do_wakeup) {
1051 smp_mb();
1052 if (waitqueue_active(&opipe->wait))
1053 wake_up_interruptible(&opipe->wait);
1054 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
1055 do_wakeup = 0;
1056 }
1057
1058 opipe->waiting_writers++;
1059 pipe_wait(opipe);
1060 opipe->waiting_writers--;
1061 continue;
1062 }
1063
1064 /*
1065 * No input buffers, do the usual checks for available
1066 * writers and blocking and wait if necessary
1067 */
1068 if (!ipipe->writers)
1069 break;
1070 if (!ipipe->waiting_writers) {
1071 if (ret)
1072 break;
1073 }
1074 if (flags & SPLICE_F_NONBLOCK) {
1075 if (!ret)
1076 ret = -EAGAIN;
1077 break;
1078 }
1079 if (signal_pending(current)) {
1080 if (!ret)
1081 ret = -ERESTARTSYS;
1082 break;
1083 }
1084
1085 if (waitqueue_active(&ipipe->wait))
1086 wake_up_interruptible_sync(&ipipe->wait);
1087 kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT);
1088
1089 pipe_wait(ipipe);
1090 }
1091
1092 mutex_unlock(&ipipe->inode->i_mutex);
1093 mutex_unlock(&opipe->inode->i_mutex);
1094
1095 if (do_wakeup) {
1096 smp_mb();
1097 if (waitqueue_active(&opipe->wait))
1098 wake_up_interruptible(&opipe->wait);
1099 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
1100 }
1101
1102 return ret;
1103}
1104
1105/*
1106 * This is a tee(1) implementation that works on pipes. It doesn't copy
1107 * any data, it simply references the 'in' pages on the 'out' pipe.
1108 * The 'flags' used are the SPLICE_F_* variants, currently the only
1109 * applicable one is SPLICE_F_NONBLOCK.
1110 */
1111static long do_tee(struct file *in, struct file *out, size_t len,
1112 unsigned int flags)
1113{
1114 struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
1115 struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
1116
1117 /*
1118 * Link ipipe to the two output pipes, consuming as we go along.
1119 */
1120 if (ipipe && opipe)
1121 return link_pipe(ipipe, opipe, len, flags);
1122
1123 return -EINVAL;
1124}
1125
1126asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags)
1127{
1128 struct file *in;
1129 int error, fput_in;
1130
1131 if (unlikely(!len))
1132 return 0;
1133
1134 error = -EBADF;
1135 in = fget_light(fdin, &fput_in);
1136 if (in) {
1137 if (in->f_mode & FMODE_READ) {
1138 int fput_out;
1139 struct file *out = fget_light(fdout, &fput_out);
1140
1141 if (out) {
1142 if (out->f_mode & FMODE_WRITE)
1143 error = do_tee(in, out, len, flags);
1144 fput_light(out, fput_out);
1145 }
1146 }
1147 fput_light(in, fput_in);
1148 }
1149
1150 return error;
1151}