diff options
author | Jens Axboe <axboe@suse.de> | 2006-04-11 09:51:17 -0400 |
---|---|---|
committer | Jens Axboe <axboe@suse.de> | 2006-04-11 09:51:17 -0400 |
commit | 70524490ee2ea1bbf6cee6c106597b3ac25a3fc2 (patch) | |
tree | c61dd500035bc3e0dea364777de1b7a58b41a75c /fs | |
parent | cbb7e577e732f576b9f399bc2600bdc0626c68dc (diff) |
[PATCH] splice: add support for sys_tee()
Basically an in-kernel implementation of tee, which uses splice and the
pipe buffers as an intelligent way to pass data around by reference.
Where the user space tee consumes the input and produces a stdout and
file output, this syscall merely duplicates the data inside a pipe to
another pipe. No data is copied, the output just grabs a reference to the
input pipe data.
Signed-off-by: Jens Axboe <axboe@suse.de>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/pipe.c | 7 | ||||
-rw-r--r-- | fs/splice.c | 186 |
2 files changed, 193 insertions, 0 deletions
@@ -131,12 +131,19 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, | |||
131 | return 0; | 131 | return 0; |
132 | } | 132 | } |
133 | 133 | ||
134 | static void anon_pipe_buf_get(struct pipe_inode_info *info, | ||
135 | struct pipe_buffer *buf) | ||
136 | { | ||
137 | page_cache_get(buf->page); | ||
138 | } | ||
139 | |||
134 | static struct pipe_buf_operations anon_pipe_buf_ops = { | 140 | static struct pipe_buf_operations anon_pipe_buf_ops = { |
135 | .can_merge = 1, | 141 | .can_merge = 1, |
136 | .map = anon_pipe_buf_map, | 142 | .map = anon_pipe_buf_map, |
137 | .unmap = anon_pipe_buf_unmap, | 143 | .unmap = anon_pipe_buf_unmap, |
138 | .release = anon_pipe_buf_release, | 144 | .release = anon_pipe_buf_release, |
139 | .steal = anon_pipe_buf_steal, | 145 | .steal = anon_pipe_buf_steal, |
146 | .get = anon_pipe_buf_get, | ||
140 | }; | 147 | }; |
141 | 148 | ||
142 | static ssize_t | 149 | static ssize_t |
diff --git a/fs/splice.c b/fs/splice.c index 5d3eda64703b..8d57e89924a6 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -125,12 +125,19 @@ static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, | |||
125 | kunmap(buf->page); | 125 | kunmap(buf->page); |
126 | } | 126 | } |
127 | 127 | ||
128 | static void page_cache_pipe_buf_get(struct pipe_inode_info *info, | ||
129 | struct pipe_buffer *buf) | ||
130 | { | ||
131 | page_cache_get(buf->page); | ||
132 | } | ||
133 | |||
128 | static struct pipe_buf_operations page_cache_pipe_buf_ops = { | 134 | static struct pipe_buf_operations page_cache_pipe_buf_ops = { |
129 | .can_merge = 0, | 135 | .can_merge = 0, |
130 | .map = page_cache_pipe_buf_map, | 136 | .map = page_cache_pipe_buf_map, |
131 | .unmap = page_cache_pipe_buf_unmap, | 137 | .unmap = page_cache_pipe_buf_unmap, |
132 | .release = page_cache_pipe_buf_release, | 138 | .release = page_cache_pipe_buf_release, |
133 | .steal = page_cache_pipe_buf_steal, | 139 | .steal = page_cache_pipe_buf_steal, |
140 | .get = page_cache_pipe_buf_get, | ||
134 | }; | 141 | }; |
135 | 142 | ||
136 | /* | 143 | /* |
@@ -963,3 +970,182 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, | |||
963 | 970 | ||
964 | return error; | 971 | return error; |
965 | } | 972 | } |
973 | |||
974 | /* | ||
975 | * Link contents of ipipe to opipe. | ||
976 | */ | ||
977 | static int link_pipe(struct pipe_inode_info *ipipe, | ||
978 | struct pipe_inode_info *opipe, | ||
979 | size_t len, unsigned int flags) | ||
980 | { | ||
981 | struct pipe_buffer *ibuf, *obuf; | ||
982 | int ret = 0, do_wakeup = 0, i; | ||
983 | |||
984 | /* | ||
985 | * Potential ABBA deadlock, work around it by ordering lock | ||
986 | * grabbing by inode address. Otherwise two different processes | ||
987 | * could deadlock (one doing tee from A -> B, the other from B -> A). | ||
988 | */ | ||
989 | if (ipipe->inode < opipe->inode) { | ||
990 | mutex_lock(&ipipe->inode->i_mutex); | ||
991 | mutex_lock(&opipe->inode->i_mutex); | ||
992 | } else { | ||
993 | mutex_lock(&opipe->inode->i_mutex); | ||
994 | mutex_lock(&ipipe->inode->i_mutex); | ||
995 | } | ||
996 | |||
997 | for (i = 0;; i++) { | ||
998 | if (!opipe->readers) { | ||
999 | send_sig(SIGPIPE, current, 0); | ||
1000 | if (!ret) | ||
1001 | ret = -EPIPE; | ||
1002 | break; | ||
1003 | } | ||
1004 | if (ipipe->nrbufs - i) { | ||
1005 | ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); | ||
1006 | |||
1007 | /* | ||
1008 | * If we have room, fill this buffer | ||
1009 | */ | ||
1010 | if (opipe->nrbufs < PIPE_BUFFERS) { | ||
1011 | int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); | ||
1012 | |||
1013 | /* | ||
1014 | * Get a reference to this pipe buffer, | ||
1015 | * so we can copy the contents over. | ||
1016 | */ | ||
1017 | ibuf->ops->get(ipipe, ibuf); | ||
1018 | |||
1019 | obuf = opipe->bufs + nbuf; | ||
1020 | *obuf = *ibuf; | ||
1021 | |||
1022 | if (obuf->len > len) | ||
1023 | obuf->len = len; | ||
1024 | |||
1025 | opipe->nrbufs++; | ||
1026 | do_wakeup = 1; | ||
1027 | ret += obuf->len; | ||
1028 | len -= obuf->len; | ||
1029 | |||
1030 | if (!len) | ||
1031 | break; | ||
1032 | if (opipe->nrbufs < PIPE_BUFFERS) | ||
1033 | continue; | ||
1034 | } | ||
1035 | |||
1036 | /* | ||
1037 | * We have input available, but no output room. | ||
1038 | * If we already copied data, return that. | ||
1039 | */ | ||
1040 | if (flags & SPLICE_F_NONBLOCK) { | ||
1041 | if (!ret) | ||
1042 | ret = -EAGAIN; | ||
1043 | break; | ||
1044 | } | ||
1045 | if (signal_pending(current)) { | ||
1046 | if (!ret) | ||
1047 | ret = -ERESTARTSYS; | ||
1048 | break; | ||
1049 | } | ||
1050 | if (do_wakeup) { | ||
1051 | smp_mb(); | ||
1052 | if (waitqueue_active(&opipe->wait)) | ||
1053 | wake_up_interruptible(&opipe->wait); | ||
1054 | kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); | ||
1055 | do_wakeup = 0; | ||
1056 | } | ||
1057 | |||
1058 | opipe->waiting_writers++; | ||
1059 | pipe_wait(opipe); | ||
1060 | opipe->waiting_writers--; | ||
1061 | continue; | ||
1062 | } | ||
1063 | |||
1064 | /* | ||
1065 | * No input buffers, do the usual checks for available | ||
1066 | * writers and blocking and wait if necessary | ||
1067 | */ | ||
1068 | if (!ipipe->writers) | ||
1069 | break; | ||
1070 | if (!ipipe->waiting_writers) { | ||
1071 | if (ret) | ||
1072 | break; | ||
1073 | } | ||
1074 | if (flags & SPLICE_F_NONBLOCK) { | ||
1075 | if (!ret) | ||
1076 | ret = -EAGAIN; | ||
1077 | break; | ||
1078 | } | ||
1079 | if (signal_pending(current)) { | ||
1080 | if (!ret) | ||
1081 | ret = -ERESTARTSYS; | ||
1082 | break; | ||
1083 | } | ||
1084 | |||
1085 | if (waitqueue_active(&ipipe->wait)) | ||
1086 | wake_up_interruptible_sync(&ipipe->wait); | ||
1087 | kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT); | ||
1088 | |||
1089 | pipe_wait(ipipe); | ||
1090 | } | ||
1091 | |||
1092 | mutex_unlock(&ipipe->inode->i_mutex); | ||
1093 | mutex_unlock(&opipe->inode->i_mutex); | ||
1094 | |||
1095 | if (do_wakeup) { | ||
1096 | smp_mb(); | ||
1097 | if (waitqueue_active(&opipe->wait)) | ||
1098 | wake_up_interruptible(&opipe->wait); | ||
1099 | kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); | ||
1100 | } | ||
1101 | |||
1102 | return ret; | ||
1103 | } | ||
1104 | |||
1105 | /* | ||
1106 | * This is a tee(1) implementation that works on pipes. It doesn't copy | ||
1107 | * any data, it simply references the 'in' pages on the 'out' pipe. | ||
1108 | * The 'flags' used are the SPLICE_F_* variants, currently the only | ||
1109 | * applicable one is SPLICE_F_NONBLOCK. | ||
1110 | */ | ||
1111 | static long do_tee(struct file *in, struct file *out, size_t len, | ||
1112 | unsigned int flags) | ||
1113 | { | ||
1114 | struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe; | ||
1115 | struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe; | ||
1116 | |||
1117 | /* | ||
1118 | * Link ipipe to the two output pipes, consuming as we go along. | ||
1119 | */ | ||
1120 | if (ipipe && opipe) | ||
1121 | return link_pipe(ipipe, opipe, len, flags); | ||
1122 | |||
1123 | return -EINVAL; | ||
1124 | } | ||
1125 | |||
1126 | asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags) | ||
1127 | { | ||
1128 | struct file *in; | ||
1129 | int error, fput_in; | ||
1130 | |||
1131 | if (unlikely(!len)) | ||
1132 | return 0; | ||
1133 | |||
1134 | error = -EBADF; | ||
1135 | in = fget_light(fdin, &fput_in); | ||
1136 | if (in) { | ||
1137 | if (in->f_mode & FMODE_READ) { | ||
1138 | int fput_out; | ||
1139 | struct file *out = fget_light(fdout, &fput_out); | ||
1140 | |||
1141 | if (out) { | ||
1142 | if (out->f_mode & FMODE_WRITE) | ||
1143 | error = do_tee(in, out, len, flags); | ||
1144 | fput_light(out, fput_out); | ||
1145 | } | ||
1146 | } | ||
1147 | fput_light(in, fput_in); | ||
1148 | } | ||
1149 | |||
1150 | return error; | ||
1151 | } | ||