aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@suse.de>2006-04-11 09:51:17 -0400
committerJens Axboe <axboe@suse.de>2006-04-11 09:51:17 -0400
commit70524490ee2ea1bbf6cee6c106597b3ac25a3fc2 (patch)
treec61dd500035bc3e0dea364777de1b7a58b41a75c
parentcbb7e577e732f576b9f399bc2600bdc0626c68dc (diff)
[PATCH] splice: add support for sys_tee()
Basically an in-kernel implementation of tee, which uses splice and the pipe buffers as an intelligent way to pass data around by reference. Where the user space tee consumes the input and produces a stdout and file output, this syscall merely duplicates the data inside a pipe to another pipe. No data is copied, the output just grabs a reference to the input pipe data. Signed-off-by: Jens Axboe <axboe@suse.de>
-rw-r--r--arch/i386/kernel/syscall_table.S1
-rw-r--r--arch/ia64/kernel/entry.S1
-rw-r--r--arch/powerpc/kernel/systbl.S1
-rw-r--r--fs/pipe.c7
-rw-r--r--fs/splice.c186
-rw-r--r--include/asm-i386/unistd.h3
-rw-r--r--include/asm-ia64/unistd.h3
-rw-r--r--include/asm-powerpc/unistd.h3
-rw-r--r--include/asm-x86_64/unistd.h4
-rw-r--r--include/linux/pipe_fs_i.h1
-rw-r--r--include/linux/syscalls.h2
11 files changed, 208 insertions, 4 deletions
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index 4f58b9c0efe3..f48bef15b4f0 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -314,3 +314,4 @@ ENTRY(sys_call_table)
314 .long sys_get_robust_list 314 .long sys_get_robust_list
315 .long sys_splice 315 .long sys_splice
316 .long sys_sync_file_range 316 .long sys_sync_file_range
317 .long sys_tee /* 315 */
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 6e16f6b35bd3..e30798811216 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1609,5 +1609,6 @@ sys_call_table:
1609 data8 sys_set_robust_list 1609 data8 sys_set_robust_list
1610 data8 sys_get_robust_list 1610 data8 sys_get_robust_list
1611 data8 sys_sync_file_range // 1300 1611 data8 sys_sync_file_range // 1300
1612 data8 sys_tee
1612 1613
1613 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls 1614 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 1424eab450ee..a14c96403840 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -323,3 +323,4 @@ COMPAT_SYS(pselect6)
323COMPAT_SYS(ppoll) 323COMPAT_SYS(ppoll)
324SYSCALL(unshare) 324SYSCALL(unshare)
325SYSCALL(splice) 325SYSCALL(splice)
326SYSCALL(tee)
diff --git a/fs/pipe.c b/fs/pipe.c
index e984beb93a0e..7fefb10db8d9 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -131,12 +131,19 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
131 return 0; 131 return 0;
132} 132}
133 133
134static void anon_pipe_buf_get(struct pipe_inode_info *info,
135 struct pipe_buffer *buf)
136{
137 page_cache_get(buf->page);
138}
139
134static struct pipe_buf_operations anon_pipe_buf_ops = { 140static struct pipe_buf_operations anon_pipe_buf_ops = {
135 .can_merge = 1, 141 .can_merge = 1,
136 .map = anon_pipe_buf_map, 142 .map = anon_pipe_buf_map,
137 .unmap = anon_pipe_buf_unmap, 143 .unmap = anon_pipe_buf_unmap,
138 .release = anon_pipe_buf_release, 144 .release = anon_pipe_buf_release,
139 .steal = anon_pipe_buf_steal, 145 .steal = anon_pipe_buf_steal,
146 .get = anon_pipe_buf_get,
140}; 147};
141 148
142static ssize_t 149static ssize_t
diff --git a/fs/splice.c b/fs/splice.c
index 5d3eda64703b..8d57e89924a6 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -125,12 +125,19 @@ static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
125 kunmap(buf->page); 125 kunmap(buf->page);
126} 126}
127 127
128static void page_cache_pipe_buf_get(struct pipe_inode_info *info,
129 struct pipe_buffer *buf)
130{
131 page_cache_get(buf->page);
132}
133
128static struct pipe_buf_operations page_cache_pipe_buf_ops = { 134static struct pipe_buf_operations page_cache_pipe_buf_ops = {
129 .can_merge = 0, 135 .can_merge = 0,
130 .map = page_cache_pipe_buf_map, 136 .map = page_cache_pipe_buf_map,
131 .unmap = page_cache_pipe_buf_unmap, 137 .unmap = page_cache_pipe_buf_unmap,
132 .release = page_cache_pipe_buf_release, 138 .release = page_cache_pipe_buf_release,
133 .steal = page_cache_pipe_buf_steal, 139 .steal = page_cache_pipe_buf_steal,
140 .get = page_cache_pipe_buf_get,
134}; 141};
135 142
136/* 143/*
@@ -963,3 +970,182 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
963 970
964 return error; 971 return error;
965} 972}
973
974/*
975 * Link contents of ipipe to opipe.
976 */
977static int link_pipe(struct pipe_inode_info *ipipe,
978 struct pipe_inode_info *opipe,
979 size_t len, unsigned int flags)
980{
981 struct pipe_buffer *ibuf, *obuf;
982 int ret = 0, do_wakeup = 0, i;
983
984 /*
985 * Potential ABBA deadlock, work around it by ordering lock
986 * grabbing by inode address. Otherwise two different processes
987 * could deadlock (one doing tee from A -> B, the other from B -> A).
988 */
989 if (ipipe->inode < opipe->inode) {
990 mutex_lock(&ipipe->inode->i_mutex);
991 mutex_lock(&opipe->inode->i_mutex);
992 } else {
993 mutex_lock(&opipe->inode->i_mutex);
994 mutex_lock(&ipipe->inode->i_mutex);
995 }
996
997 for (i = 0;; i++) {
998 if (!opipe->readers) {
999 send_sig(SIGPIPE, current, 0);
1000 if (!ret)
1001 ret = -EPIPE;
1002 break;
1003 }
1004 if (ipipe->nrbufs - i) {
1005 ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
1006
1007 /*
1008 * If we have room, fill this buffer
1009 */
1010 if (opipe->nrbufs < PIPE_BUFFERS) {
1011 int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
1012
1013 /*
1014 * Get a reference to this pipe buffer,
1015 * so we can copy the contents over.
1016 */
1017 ibuf->ops->get(ipipe, ibuf);
1018
1019 obuf = opipe->bufs + nbuf;
1020 *obuf = *ibuf;
1021
1022 if (obuf->len > len)
1023 obuf->len = len;
1024
1025 opipe->nrbufs++;
1026 do_wakeup = 1;
1027 ret += obuf->len;
1028 len -= obuf->len;
1029
1030 if (!len)
1031 break;
1032 if (opipe->nrbufs < PIPE_BUFFERS)
1033 continue;
1034 }
1035
1036 /*
1037 * We have input available, but no output room.
1038 * If we already copied data, return that.
1039 */
1040 if (flags & SPLICE_F_NONBLOCK) {
1041 if (!ret)
1042 ret = -EAGAIN;
1043 break;
1044 }
1045 if (signal_pending(current)) {
1046 if (!ret)
1047 ret = -ERESTARTSYS;
1048 break;
1049 }
1050 if (do_wakeup) {
1051 smp_mb();
1052 if (waitqueue_active(&opipe->wait))
1053 wake_up_interruptible(&opipe->wait);
1054 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
1055 do_wakeup = 0;
1056 }
1057
1058 opipe->waiting_writers++;
1059 pipe_wait(opipe);
1060 opipe->waiting_writers--;
1061 continue;
1062 }
1063
1064 /*
1065 * No input buffers, do the usual checks for available
1066 * writers and blocking and wait if necessary
1067 */
1068 if (!ipipe->writers)
1069 break;
1070 if (!ipipe->waiting_writers) {
1071 if (ret)
1072 break;
1073 }
1074 if (flags & SPLICE_F_NONBLOCK) {
1075 if (!ret)
1076 ret = -EAGAIN;
1077 break;
1078 }
1079 if (signal_pending(current)) {
1080 if (!ret)
1081 ret = -ERESTARTSYS;
1082 break;
1083 }
1084
1085 if (waitqueue_active(&ipipe->wait))
1086 wake_up_interruptible_sync(&ipipe->wait);
1087 kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT);
1088
1089 pipe_wait(ipipe);
1090 }
1091
1092 mutex_unlock(&ipipe->inode->i_mutex);
1093 mutex_unlock(&opipe->inode->i_mutex);
1094
1095 if (do_wakeup) {
1096 smp_mb();
1097 if (waitqueue_active(&opipe->wait))
1098 wake_up_interruptible(&opipe->wait);
1099 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
1100 }
1101
1102 return ret;
1103}
1104
1105/*
1106 * This is a tee(1) implementation that works on pipes. It doesn't copy
1107 * any data, it simply references the 'in' pages on the 'out' pipe.
1108 * The 'flags' used are the SPLICE_F_* variants, currently the only
1109 * applicable one is SPLICE_F_NONBLOCK.
1110 */
1111static long do_tee(struct file *in, struct file *out, size_t len,
1112 unsigned int flags)
1113{
1114 struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
1115 struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
1116
1117 /*
1118 * Link ipipe to the two output pipes, consuming as we go along.
1119 */
1120 if (ipipe && opipe)
1121 return link_pipe(ipipe, opipe, len, flags);
1122
1123 return -EINVAL;
1124}
1125
1126asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags)
1127{
1128 struct file *in;
1129 int error, fput_in;
1130
1131 if (unlikely(!len))
1132 return 0;
1133
1134 error = -EBADF;
1135 in = fget_light(fdin, &fput_in);
1136 if (in) {
1137 if (in->f_mode & FMODE_READ) {
1138 int fput_out;
1139 struct file *out = fget_light(fdout, &fput_out);
1140
1141 if (out) {
1142 if (out->f_mode & FMODE_WRITE)
1143 error = do_tee(in, out, len, flags);
1144 fput_light(out, fput_out);
1145 }
1146 }
1147 fput_light(in, fput_in);
1148 }
1149
1150 return error;
1151}
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index 6a8dd83c350f..d81d6cfc1bb4 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -320,8 +320,9 @@
320#define __NR_get_robust_list 312 320#define __NR_get_robust_list 312
321#define __NR_splice 313 321#define __NR_splice 313
322#define __NR_sync_file_range 314 322#define __NR_sync_file_range 314
323#define __NR_tee 315
323 324
324#define NR_syscalls 315 325#define NR_syscalls 316
325 326
326/* 327/*
327 * user-visible error numbers are in the range -1 - -128: see 328 * user-visible error numbers are in the range -1 - -128: see
diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h
index 1c749acca021..a40ebec6aeeb 100644
--- a/include/asm-ia64/unistd.h
+++ b/include/asm-ia64/unistd.h
@@ -289,12 +289,13 @@
289#define __NR_set_robust_list 1298 289#define __NR_set_robust_list 1298
290#define __NR_get_robust_list 1299 290#define __NR_get_robust_list 1299
291#define __NR_sync_file_range 1300 291#define __NR_sync_file_range 1300
292#define __NR_tee 1301
292 293
293#ifdef __KERNEL__ 294#ifdef __KERNEL__
294 295
295#include <linux/config.h> 296#include <linux/config.h>
296 297
297#define NR_syscalls 277 /* length of syscall table */ 298#define NR_syscalls 278 /* length of syscall table */
298 299
299#define __ARCH_WANT_SYS_RT_SIGACTION 300#define __ARCH_WANT_SYS_RT_SIGACTION
300 301
diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h
index 536ba0873052..c612f1a62772 100644
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -302,8 +302,9 @@
302#define __NR_ppoll 281 302#define __NR_ppoll 281
303#define __NR_unshare 282 303#define __NR_unshare 282
304#define __NR_splice 283 304#define __NR_splice 283
305#define __NR_tee 284
305 306
306#define __NR_syscalls 284 307#define __NR_syscalls 285
307 308
308#ifdef __KERNEL__ 309#ifdef __KERNEL__
309#define __NR__exit __NR_exit 310#define __NR__exit __NR_exit
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index f21ff2c1e960..d86494e23b63 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -611,8 +611,10 @@ __SYSCALL(__NR_set_robust_list, sys_set_robust_list)
611__SYSCALL(__NR_get_robust_list, sys_get_robust_list) 611__SYSCALL(__NR_get_robust_list, sys_get_robust_list)
612#define __NR_splice 275 612#define __NR_splice 275
613__SYSCALL(__NR_splice, sys_splice) 613__SYSCALL(__NR_splice, sys_splice)
614#define __NR_tee 276
615__SYSCALL(__NR_tee, sys_tee)
614 616
615#define __NR_syscall_max __NR_splice 617#define __NR_syscall_max __NR_tee
616 618
617#ifndef __NO_STUBS 619#ifndef __NO_STUBS
618 620
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 123a7c24bc72..ef7f33c0be19 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -21,6 +21,7 @@ struct pipe_buf_operations {
21 void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *); 21 void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *);
22 void (*release)(struct pipe_inode_info *, struct pipe_buffer *); 22 void (*release)(struct pipe_inode_info *, struct pipe_buffer *);
23 int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); 23 int (*steal)(struct pipe_inode_info *, struct pipe_buffer *);
24 void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
24}; 25};
25 26
26struct pipe_inode_info { 27struct pipe_inode_info {
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index f001bad28d9a..d3ebc0e68b2b 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -574,6 +574,8 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
574 int fd_out, loff_t __user *off_out, 574 int fd_out, loff_t __user *off_out,
575 size_t len, unsigned int flags); 575 size_t len, unsigned int flags);
576 576
577asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags);
578
577asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, 579asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
578 unsigned int flags); 580 unsigned int flags);
579 581