aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-04-11 09:34:02 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-04-11 09:34:02 -0400
commit88dd9c16cecbd105bbe7711b6120333f6f7b5474 (patch)
tree9632e5988abeaa7e4d20350305edc4e4652b56d1
parent6dde432553551ae036aae12c2b940677d36c9a5b (diff)
parentd1195c516a9acd767cb541f914be2c6ddcafcfc1 (diff)
Merge branch 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block
* 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block: [PATCH] vfs: add splice_write and splice_read to documentation [PATCH] Remove sys_ prefix of new syscalls from __NR_sys_* [PATCH] splice: warning fix [PATCH] another round of fs/pipe.c cleanups [PATCH] splice: comment styles [PATCH] splice: add Ingo as addition copyright holder [PATCH] splice: unlikely() optimizations [PATCH] splice: speedups and optimizations [PATCH] pipe.c/fifo.c code cleanups [PATCH] get rid of the PIPE_*() macros [PATCH] splice: speedup __generic_file_splice_read [PATCH] splice: add direct fd <-> fd splicing support [PATCH] splice: add optional input and output offsets [PATCH] introduce a "kernel-internal pipe object" abstraction [PATCH] splice: be smarter about calling do_page_cache_readahead() [PATCH] splice: optimize the splice buffer mapping [PATCH] splice: cleanup __generic_file_splice_read() [PATCH] splice: only call wake_up_interruptible() when we really have to [PATCH] splice: potential !page dereference [PATCH] splice: mark the io page as accessed
-rw-r--r--Documentation/filesystems/vfs.txt12
-rw-r--r--fs/fifo.c65
-rw-r--r--fs/pipe.c310
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/splice.c492
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h4
-rw-r--r--include/asm-i386/unistd.h4
-rw-r--r--include/linux/fs.h17
-rw-r--r--include/linux/pipe_fs_i.h18
-rw-r--r--include/linux/sched.h6
-rw-r--r--include/linux/syscalls.h7
-rw-r--r--kernel/exit.c4
-rw-r--r--net/socket.c4
16 files changed, 601 insertions, 360 deletions
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index adaa899e5c90..3a2e5520c1e3 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -694,7 +694,7 @@ struct file_operations
694---------------------- 694----------------------
695 695
696This describes how the VFS can manipulate an open file. As of kernel 696This describes how the VFS can manipulate an open file. As of kernel
6972.6.13, the following members are defined: 6972.6.17, the following members are defined:
698 698
699struct file_operations { 699struct file_operations {
700 loff_t (*llseek) (struct file *, loff_t, int); 700 loff_t (*llseek) (struct file *, loff_t, int);
@@ -723,6 +723,10 @@ struct file_operations {
723 int (*check_flags)(int); 723 int (*check_flags)(int);
724 int (*dir_notify)(struct file *filp, unsigned long arg); 724 int (*dir_notify)(struct file *filp, unsigned long arg);
725 int (*flock) (struct file *, int, struct file_lock *); 725 int (*flock) (struct file *, int, struct file_lock *);
726 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned
727int);
728 ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned
729int);
726}; 730};
727 731
728Again, all methods are called without any locks being held, unless 732Again, all methods are called without any locks being held, unless
@@ -790,6 +794,12 @@ otherwise noted.
790 794
791 flock: called by the flock(2) system call 795 flock: called by the flock(2) system call
792 796
797 splice_write: called by the VFS to splice data from a pipe to a file. This
798 method is used by the splice(2) system call
799
800 splice_read: called by the VFS to splice data from file to a pipe. This
801 method is used by the splice(2) system call
802
793Note that the file operations are implemented by the specific 803Note that the file operations are implemented by the specific
794filesystem in which the inode resides. When opening a device node 804filesystem in which the inode resides. When opening a device node
795(character or block special) most filesystems will call special 805(character or block special) most filesystems will call special
diff --git a/fs/fifo.c b/fs/fifo.c
index 889f722ee36d..49035b174b48 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -15,30 +15,35 @@
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/pipe_fs_i.h> 16#include <linux/pipe_fs_i.h>
17 17
18static void wait_for_partner(struct inode* inode, unsigned int* cnt) 18static void wait_for_partner(struct inode* inode, unsigned int *cnt)
19{ 19{
20 int cur = *cnt; 20 int cur = *cnt;
21 while(cur == *cnt) { 21
22 pipe_wait(inode); 22 while (cur == *cnt) {
23 if(signal_pending(current)) 23 pipe_wait(inode->i_pipe);
24 if (signal_pending(current))
24 break; 25 break;
25 } 26 }
26} 27}
27 28
28static void wake_up_partner(struct inode* inode) 29static void wake_up_partner(struct inode* inode)
29{ 30{
30 wake_up_interruptible(PIPE_WAIT(*inode)); 31 wake_up_interruptible(&inode->i_pipe->wait);
31} 32}
32 33
33static int fifo_open(struct inode *inode, struct file *filp) 34static int fifo_open(struct inode *inode, struct file *filp)
34{ 35{
36 struct pipe_inode_info *pipe;
35 int ret; 37 int ret;
36 38
37 mutex_lock(PIPE_MUTEX(*inode)); 39 mutex_lock(&inode->i_mutex);
38 if (!inode->i_pipe) { 40 pipe = inode->i_pipe;
41 if (!pipe) {
39 ret = -ENOMEM; 42 ret = -ENOMEM;
40 if(!pipe_new(inode)) 43 pipe = alloc_pipe_info(inode);
44 if (!pipe)
41 goto err_nocleanup; 45 goto err_nocleanup;
46 inode->i_pipe = pipe;
42 } 47 }
43 filp->f_version = 0; 48 filp->f_version = 0;
44 49
@@ -53,18 +58,18 @@ static int fifo_open(struct inode *inode, struct file *filp)
53 * opened, even when there is no process writing the FIFO. 58 * opened, even when there is no process writing the FIFO.
54 */ 59 */
55 filp->f_op = &read_fifo_fops; 60 filp->f_op = &read_fifo_fops;
56 PIPE_RCOUNTER(*inode)++; 61 pipe->r_counter++;
57 if (PIPE_READERS(*inode)++ == 0) 62 if (pipe->readers++ == 0)
58 wake_up_partner(inode); 63 wake_up_partner(inode);
59 64
60 if (!PIPE_WRITERS(*inode)) { 65 if (!pipe->writers) {
61 if ((filp->f_flags & O_NONBLOCK)) { 66 if ((filp->f_flags & O_NONBLOCK)) {
62 /* suppress POLLHUP until we have 67 /* suppress POLLHUP until we have
63 * seen a writer */ 68 * seen a writer */
64 filp->f_version = PIPE_WCOUNTER(*inode); 69 filp->f_version = pipe->w_counter;
65 } else 70 } else
66 { 71 {
67 wait_for_partner(inode, &PIPE_WCOUNTER(*inode)); 72 wait_for_partner(inode, &pipe->w_counter);
68 if(signal_pending(current)) 73 if(signal_pending(current))
69 goto err_rd; 74 goto err_rd;
70 } 75 }
@@ -78,16 +83,16 @@ static int fifo_open(struct inode *inode, struct file *filp)
78 * errno=ENXIO when there is no process reading the FIFO. 83 * errno=ENXIO when there is no process reading the FIFO.
79 */ 84 */
80 ret = -ENXIO; 85 ret = -ENXIO;
81 if ((filp->f_flags & O_NONBLOCK) && !PIPE_READERS(*inode)) 86 if ((filp->f_flags & O_NONBLOCK) && !pipe->readers)
82 goto err; 87 goto err;
83 88
84 filp->f_op = &write_fifo_fops; 89 filp->f_op = &write_fifo_fops;
85 PIPE_WCOUNTER(*inode)++; 90 pipe->w_counter++;
86 if (!PIPE_WRITERS(*inode)++) 91 if (!pipe->writers++)
87 wake_up_partner(inode); 92 wake_up_partner(inode);
88 93
89 if (!PIPE_READERS(*inode)) { 94 if (!pipe->readers) {
90 wait_for_partner(inode, &PIPE_RCOUNTER(*inode)); 95 wait_for_partner(inode, &pipe->r_counter);
91 if (signal_pending(current)) 96 if (signal_pending(current))
92 goto err_wr; 97 goto err_wr;
93 } 98 }
@@ -102,11 +107,11 @@ static int fifo_open(struct inode *inode, struct file *filp)
102 */ 107 */
103 filp->f_op = &rdwr_fifo_fops; 108 filp->f_op = &rdwr_fifo_fops;
104 109
105 PIPE_READERS(*inode)++; 110 pipe->readers++;
106 PIPE_WRITERS(*inode)++; 111 pipe->writers++;
107 PIPE_RCOUNTER(*inode)++; 112 pipe->r_counter++;
108 PIPE_WCOUNTER(*inode)++; 113 pipe->w_counter++;
109 if (PIPE_READERS(*inode) == 1 || PIPE_WRITERS(*inode) == 1) 114 if (pipe->readers == 1 || pipe->writers == 1)
110 wake_up_partner(inode); 115 wake_up_partner(inode);
111 break; 116 break;
112 117
@@ -116,27 +121,27 @@ static int fifo_open(struct inode *inode, struct file *filp)
116 } 121 }
117 122
118 /* Ok! */ 123 /* Ok! */
119 mutex_unlock(PIPE_MUTEX(*inode)); 124 mutex_unlock(&inode->i_mutex);
120 return 0; 125 return 0;
121 126
122err_rd: 127err_rd:
123 if (!--PIPE_READERS(*inode)) 128 if (!--pipe->readers)
124 wake_up_interruptible(PIPE_WAIT(*inode)); 129 wake_up_interruptible(&pipe->wait);
125 ret = -ERESTARTSYS; 130 ret = -ERESTARTSYS;
126 goto err; 131 goto err;
127 132
128err_wr: 133err_wr:
129 if (!--PIPE_WRITERS(*inode)) 134 if (!--pipe->writers)
130 wake_up_interruptible(PIPE_WAIT(*inode)); 135 wake_up_interruptible(&pipe->wait);
131 ret = -ERESTARTSYS; 136 ret = -ERESTARTSYS;
132 goto err; 137 goto err;
133 138
134err: 139err:
135 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) 140 if (!pipe->readers && !pipe->writers)
136 free_pipe_info(inode); 141 free_pipe_info(inode);
137 142
138err_nocleanup: 143err_nocleanup:
139 mutex_unlock(PIPE_MUTEX(*inode)); 144 mutex_unlock(&inode->i_mutex);
140 return ret; 145 return ret;
141} 146}
142 147
diff --git a/fs/pipe.c b/fs/pipe.c
index 795df987cd38..e984beb93a0e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -36,7 +36,7 @@
36 */ 36 */
37 37
38/* Drop the inode semaphore and wait for a pipe event, atomically */ 38/* Drop the inode semaphore and wait for a pipe event, atomically */
39void pipe_wait(struct inode * inode) 39void pipe_wait(struct pipe_inode_info *pipe)
40{ 40{
41 DEFINE_WAIT(wait); 41 DEFINE_WAIT(wait);
42 42
@@ -44,11 +44,14 @@ void pipe_wait(struct inode * inode)
44 * Pipes are system-local resources, so sleeping on them 44 * Pipes are system-local resources, so sleeping on them
45 * is considered a noninteractive wait: 45 * is considered a noninteractive wait:
46 */ 46 */
47 prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE); 47 prepare_to_wait(&pipe->wait, &wait,
48 mutex_unlock(PIPE_MUTEX(*inode)); 48 TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE);
49 if (pipe->inode)
50 mutex_unlock(&pipe->inode->i_mutex);
49 schedule(); 51 schedule();
50 finish_wait(PIPE_WAIT(*inode), &wait); 52 finish_wait(&pipe->wait, &wait);
51 mutex_lock(PIPE_MUTEX(*inode)); 53 if (pipe->inode)
54 mutex_lock(&pipe->inode->i_mutex);
52} 55}
53 56
54static int 57static int
@@ -91,7 +94,8 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
91 return 0; 94 return 0;
92} 95}
93 96
94static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf) 97static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
98 struct pipe_buffer *buf)
95{ 99{
96 struct page *page = buf->page; 100 struct page *page = buf->page;
97 101
@@ -100,30 +104,27 @@ static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buff
100 /* 104 /*
101 * If nobody else uses this page, and we don't already have a 105 * If nobody else uses this page, and we don't already have a
102 * temporary page, let's keep track of it as a one-deep 106 * temporary page, let's keep track of it as a one-deep
103 * allocation cache 107 * allocation cache. (Otherwise just release our reference to it)
104 */ 108 */
105 if (page_count(page) == 1 && !info->tmp_page) { 109 if (page_count(page) == 1 && !pipe->tmp_page)
106 info->tmp_page = page; 110 pipe->tmp_page = page;
107 return; 111 else
108 } 112 page_cache_release(page);
109
110 /*
111 * Otherwise just release our reference to it
112 */
113 page_cache_release(page);
114} 113}
115 114
116static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf) 115static void * anon_pipe_buf_map(struct file *file, struct pipe_inode_info *pipe,
116 struct pipe_buffer *buf)
117{ 117{
118 return kmap(buf->page); 118 return kmap(buf->page);
119} 119}
120 120
121static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf) 121static void anon_pipe_buf_unmap(struct pipe_inode_info *pipe,
122 struct pipe_buffer *buf)
122{ 123{
123 kunmap(buf->page); 124 kunmap(buf->page);
124} 125}
125 126
126static int anon_pipe_buf_steal(struct pipe_inode_info *info, 127static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
127 struct pipe_buffer *buf) 128 struct pipe_buffer *buf)
128{ 129{
129 buf->flags |= PIPE_BUF_FLAG_STOLEN; 130 buf->flags |= PIPE_BUF_FLAG_STOLEN;
@@ -143,7 +144,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
143 unsigned long nr_segs, loff_t *ppos) 144 unsigned long nr_segs, loff_t *ppos)
144{ 145{
145 struct inode *inode = filp->f_dentry->d_inode; 146 struct inode *inode = filp->f_dentry->d_inode;
146 struct pipe_inode_info *info; 147 struct pipe_inode_info *pipe;
147 int do_wakeup; 148 int do_wakeup;
148 ssize_t ret; 149 ssize_t ret;
149 struct iovec *iov = (struct iovec *)_iov; 150 struct iovec *iov = (struct iovec *)_iov;
@@ -156,13 +157,13 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
156 157
157 do_wakeup = 0; 158 do_wakeup = 0;
158 ret = 0; 159 ret = 0;
159 mutex_lock(PIPE_MUTEX(*inode)); 160 mutex_lock(&inode->i_mutex);
160 info = inode->i_pipe; 161 pipe = inode->i_pipe;
161 for (;;) { 162 for (;;) {
162 int bufs = info->nrbufs; 163 int bufs = pipe->nrbufs;
163 if (bufs) { 164 if (bufs) {
164 int curbuf = info->curbuf; 165 int curbuf = pipe->curbuf;
165 struct pipe_buffer *buf = info->bufs + curbuf; 166 struct pipe_buffer *buf = pipe->bufs + curbuf;
166 struct pipe_buf_operations *ops = buf->ops; 167 struct pipe_buf_operations *ops = buf->ops;
167 void *addr; 168 void *addr;
168 size_t chars = buf->len; 169 size_t chars = buf->len;
@@ -171,16 +172,17 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
171 if (chars > total_len) 172 if (chars > total_len)
172 chars = total_len; 173 chars = total_len;
173 174
174 addr = ops->map(filp, info, buf); 175 addr = ops->map(filp, pipe, buf);
175 if (IS_ERR(addr)) { 176 if (IS_ERR(addr)) {
176 if (!ret) 177 if (!ret)
177 ret = PTR_ERR(addr); 178 ret = PTR_ERR(addr);
178 break; 179 break;
179 } 180 }
180 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); 181 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
181 ops->unmap(info, buf); 182 ops->unmap(pipe, buf);
182 if (unlikely(error)) { 183 if (unlikely(error)) {
183 if (!ret) ret = -EFAULT; 184 if (!ret)
185 ret = -EFAULT;
184 break; 186 break;
185 } 187 }
186 ret += chars; 188 ret += chars;
@@ -188,10 +190,10 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
188 buf->len -= chars; 190 buf->len -= chars;
189 if (!buf->len) { 191 if (!buf->len) {
190 buf->ops = NULL; 192 buf->ops = NULL;
191 ops->release(info, buf); 193 ops->release(pipe, buf);
192 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1); 194 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
193 info->curbuf = curbuf; 195 pipe->curbuf = curbuf;
194 info->nrbufs = --bufs; 196 pipe->nrbufs = --bufs;
195 do_wakeup = 1; 197 do_wakeup = 1;
196 } 198 }
197 total_len -= chars; 199 total_len -= chars;
@@ -200,9 +202,9 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
200 } 202 }
201 if (bufs) /* More to do? */ 203 if (bufs) /* More to do? */
202 continue; 204 continue;
203 if (!PIPE_WRITERS(*inode)) 205 if (!pipe->writers)
204 break; 206 break;
205 if (!PIPE_WAITING_WRITERS(*inode)) { 207 if (!pipe->waiting_writers) {
206 /* syscall merging: Usually we must not sleep 208 /* syscall merging: Usually we must not sleep
207 * if O_NONBLOCK is set, or if we got some data. 209 * if O_NONBLOCK is set, or if we got some data.
208 * But if a writer sleeps in kernel space, then 210 * But if a writer sleeps in kernel space, then
@@ -216,20 +218,22 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
216 } 218 }
217 } 219 }
218 if (signal_pending(current)) { 220 if (signal_pending(current)) {
219 if (!ret) ret = -ERESTARTSYS; 221 if (!ret)
222 ret = -ERESTARTSYS;
220 break; 223 break;
221 } 224 }
222 if (do_wakeup) { 225 if (do_wakeup) {
223 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 226 wake_up_interruptible_sync(&pipe->wait);
224 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 227 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
225 } 228 }
226 pipe_wait(inode); 229 pipe_wait(pipe);
227 } 230 }
228 mutex_unlock(PIPE_MUTEX(*inode)); 231 mutex_unlock(&inode->i_mutex);
229 /* Signal writers asynchronously that there is more room. */ 232
233 /* Signal writers asynchronously that there is more room. */
230 if (do_wakeup) { 234 if (do_wakeup) {
231 wake_up_interruptible(PIPE_WAIT(*inode)); 235 wake_up_interruptible(&pipe->wait);
232 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 236 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
233 } 237 }
234 if (ret > 0) 238 if (ret > 0)
235 file_accessed(filp); 239 file_accessed(filp);
@@ -240,6 +244,7 @@ static ssize_t
240pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) 244pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
241{ 245{
242 struct iovec iov = { .iov_base = buf, .iov_len = count }; 246 struct iovec iov = { .iov_base = buf, .iov_len = count };
247
243 return pipe_readv(filp, &iov, 1, ppos); 248 return pipe_readv(filp, &iov, 1, ppos);
244} 249}
245 250
@@ -248,7 +253,7 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
248 unsigned long nr_segs, loff_t *ppos) 253 unsigned long nr_segs, loff_t *ppos)
249{ 254{
250 struct inode *inode = filp->f_dentry->d_inode; 255 struct inode *inode = filp->f_dentry->d_inode;
251 struct pipe_inode_info *info; 256 struct pipe_inode_info *pipe;
252 ssize_t ret; 257 ssize_t ret;
253 int do_wakeup; 258 int do_wakeup;
254 struct iovec *iov = (struct iovec *)_iov; 259 struct iovec *iov = (struct iovec *)_iov;
@@ -262,10 +267,10 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
262 267
263 do_wakeup = 0; 268 do_wakeup = 0;
264 ret = 0; 269 ret = 0;
265 mutex_lock(PIPE_MUTEX(*inode)); 270 mutex_lock(&inode->i_mutex);
266 info = inode->i_pipe; 271 pipe = inode->i_pipe;
267 272
268 if (!PIPE_READERS(*inode)) { 273 if (!pipe->readers) {
269 send_sig(SIGPIPE, current, 0); 274 send_sig(SIGPIPE, current, 0);
270 ret = -EPIPE; 275 ret = -EPIPE;
271 goto out; 276 goto out;
@@ -273,23 +278,25 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
273 278
274 /* We try to merge small writes */ 279 /* We try to merge small writes */
275 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ 280 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
276 if (info->nrbufs && chars != 0) { 281 if (pipe->nrbufs && chars != 0) {
277 int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1); 282 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
278 struct pipe_buffer *buf = info->bufs + lastbuf; 283 (PIPE_BUFFERS-1);
284 struct pipe_buffer *buf = pipe->bufs + lastbuf;
279 struct pipe_buf_operations *ops = buf->ops; 285 struct pipe_buf_operations *ops = buf->ops;
280 int offset = buf->offset + buf->len; 286 int offset = buf->offset + buf->len;
287
281 if (ops->can_merge && offset + chars <= PAGE_SIZE) { 288 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
282 void *addr; 289 void *addr;
283 int error; 290 int error;
284 291
285 addr = ops->map(filp, info, buf); 292 addr = ops->map(filp, pipe, buf);
286 if (IS_ERR(addr)) { 293 if (IS_ERR(addr)) {
287 error = PTR_ERR(addr); 294 error = PTR_ERR(addr);
288 goto out; 295 goto out;
289 } 296 }
290 error = pipe_iov_copy_from_user(offset + addr, iov, 297 error = pipe_iov_copy_from_user(offset + addr, iov,
291 chars); 298 chars);
292 ops->unmap(info, buf); 299 ops->unmap(pipe, buf);
293 ret = error; 300 ret = error;
294 do_wakeup = 1; 301 do_wakeup = 1;
295 if (error) 302 if (error)
@@ -304,16 +311,18 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
304 311
305 for (;;) { 312 for (;;) {
306 int bufs; 313 int bufs;
307 if (!PIPE_READERS(*inode)) { 314
315 if (!pipe->readers) {
308 send_sig(SIGPIPE, current, 0); 316 send_sig(SIGPIPE, current, 0);
309 if (!ret) ret = -EPIPE; 317 if (!ret)
318 ret = -EPIPE;
310 break; 319 break;
311 } 320 }
312 bufs = info->nrbufs; 321 bufs = pipe->nrbufs;
313 if (bufs < PIPE_BUFFERS) { 322 if (bufs < PIPE_BUFFERS) {
314 int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1); 323 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
315 struct pipe_buffer *buf = info->bufs + newbuf; 324 struct pipe_buffer *buf = pipe->bufs + newbuf;
316 struct page *page = info->tmp_page; 325 struct page *page = pipe->tmp_page;
317 int error; 326 int error;
318 327
319 if (!page) { 328 if (!page) {
@@ -322,9 +331,9 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
322 ret = ret ? : -ENOMEM; 331 ret = ret ? : -ENOMEM;
323 break; 332 break;
324 } 333 }
325 info->tmp_page = page; 334 pipe->tmp_page = page;
326 } 335 }
327 /* Always wakeup, even if the copy fails. Otherwise 336 /* Always wake up, even if the copy fails. Otherwise
328 * we lock up (O_NONBLOCK-)readers that sleep due to 337 * we lock up (O_NONBLOCK-)readers that sleep due to
329 * syscall merging. 338 * syscall merging.
330 * FIXME! Is this really true? 339 * FIXME! Is this really true?
@@ -337,7 +346,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
337 error = pipe_iov_copy_from_user(kmap(page), iov, chars); 346 error = pipe_iov_copy_from_user(kmap(page), iov, chars);
338 kunmap(page); 347 kunmap(page);
339 if (unlikely(error)) { 348 if (unlikely(error)) {
340 if (!ret) ret = -EFAULT; 349 if (!ret)
350 ret = -EFAULT;
341 break; 351 break;
342 } 352 }
343 ret += chars; 353 ret += chars;
@@ -347,8 +357,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
347 buf->ops = &anon_pipe_buf_ops; 357 buf->ops = &anon_pipe_buf_ops;
348 buf->offset = 0; 358 buf->offset = 0;
349 buf->len = chars; 359 buf->len = chars;
350 info->nrbufs = ++bufs; 360 pipe->nrbufs = ++bufs;
351 info->tmp_page = NULL; 361 pipe->tmp_page = NULL;
352 362
353 total_len -= chars; 363 total_len -= chars;
354 if (!total_len) 364 if (!total_len)
@@ -357,27 +367,29 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
357 if (bufs < PIPE_BUFFERS) 367 if (bufs < PIPE_BUFFERS)
358 continue; 368 continue;
359 if (filp->f_flags & O_NONBLOCK) { 369 if (filp->f_flags & O_NONBLOCK) {
360 if (!ret) ret = -EAGAIN; 370 if (!ret)
371 ret = -EAGAIN;
361 break; 372 break;
362 } 373 }
363 if (signal_pending(current)) { 374 if (signal_pending(current)) {
364 if (!ret) ret = -ERESTARTSYS; 375 if (!ret)
376 ret = -ERESTARTSYS;
365 break; 377 break;
366 } 378 }
367 if (do_wakeup) { 379 if (do_wakeup) {
368 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 380 wake_up_interruptible_sync(&pipe->wait);
369 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 381 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
370 do_wakeup = 0; 382 do_wakeup = 0;
371 } 383 }
372 PIPE_WAITING_WRITERS(*inode)++; 384 pipe->waiting_writers++;
373 pipe_wait(inode); 385 pipe_wait(pipe);
374 PIPE_WAITING_WRITERS(*inode)--; 386 pipe->waiting_writers--;
375 } 387 }
376out: 388out:
377 mutex_unlock(PIPE_MUTEX(*inode)); 389 mutex_unlock(&inode->i_mutex);
378 if (do_wakeup) { 390 if (do_wakeup) {
379 wake_up_interruptible(PIPE_WAIT(*inode)); 391 wake_up_interruptible(&pipe->wait);
380 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 392 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
381 } 393 }
382 if (ret > 0) 394 if (ret > 0)
383 file_update_time(filp); 395 file_update_time(filp);
@@ -389,6 +401,7 @@ pipe_write(struct file *filp, const char __user *buf,
389 size_t count, loff_t *ppos) 401 size_t count, loff_t *ppos)
390{ 402{
391 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; 403 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
404
392 return pipe_writev(filp, &iov, 1, ppos); 405 return pipe_writev(filp, &iov, 1, ppos);
393} 406}
394 407
@@ -399,7 +412,8 @@ bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
399} 412}
400 413
401static ssize_t 414static ssize_t
402bad_pipe_w(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) 415bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
416 loff_t *ppos)
403{ 417{
404 return -EBADF; 418 return -EBADF;
405} 419}
@@ -409,21 +423,22 @@ pipe_ioctl(struct inode *pino, struct file *filp,
409 unsigned int cmd, unsigned long arg) 423 unsigned int cmd, unsigned long arg)
410{ 424{
411 struct inode *inode = filp->f_dentry->d_inode; 425 struct inode *inode = filp->f_dentry->d_inode;
412 struct pipe_inode_info *info; 426 struct pipe_inode_info *pipe;
413 int count, buf, nrbufs; 427 int count, buf, nrbufs;
414 428
415 switch (cmd) { 429 switch (cmd) {
416 case FIONREAD: 430 case FIONREAD:
417 mutex_lock(PIPE_MUTEX(*inode)); 431 mutex_lock(&inode->i_mutex);
418 info = inode->i_pipe; 432 pipe = inode->i_pipe;
419 count = 0; 433 count = 0;
420 buf = info->curbuf; 434 buf = pipe->curbuf;
421 nrbufs = info->nrbufs; 435 nrbufs = pipe->nrbufs;
422 while (--nrbufs >= 0) { 436 while (--nrbufs >= 0) {
423 count += info->bufs[buf].len; 437 count += pipe->bufs[buf].len;
424 buf = (buf+1) & (PIPE_BUFFERS-1); 438 buf = (buf+1) & (PIPE_BUFFERS-1);
425 } 439 }
426 mutex_unlock(PIPE_MUTEX(*inode)); 440 mutex_unlock(&inode->i_mutex);
441
427 return put_user(count, (int __user *)arg); 442 return put_user(count, (int __user *)arg);
428 default: 443 default:
429 return -EINVAL; 444 return -EINVAL;
@@ -436,17 +451,17 @@ pipe_poll(struct file *filp, poll_table *wait)
436{ 451{
437 unsigned int mask; 452 unsigned int mask;
438 struct inode *inode = filp->f_dentry->d_inode; 453 struct inode *inode = filp->f_dentry->d_inode;
439 struct pipe_inode_info *info = inode->i_pipe; 454 struct pipe_inode_info *pipe = inode->i_pipe;
440 int nrbufs; 455 int nrbufs;
441 456
442 poll_wait(filp, PIPE_WAIT(*inode), wait); 457 poll_wait(filp, &pipe->wait, wait);
443 458
444 /* Reading only -- no need for acquiring the semaphore. */ 459 /* Reading only -- no need for acquiring the semaphore. */
445 nrbufs = info->nrbufs; 460 nrbufs = pipe->nrbufs;
446 mask = 0; 461 mask = 0;
447 if (filp->f_mode & FMODE_READ) { 462 if (filp->f_mode & FMODE_READ) {
448 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0; 463 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
449 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode)) 464 if (!pipe->writers && filp->f_version != pipe->w_counter)
450 mask |= POLLHUP; 465 mask |= POLLHUP;
451 } 466 }
452 467
@@ -456,7 +471,7 @@ pipe_poll(struct file *filp, poll_table *wait)
456 * Most Unices do not set POLLERR for FIFOs but on Linux they 471 * Most Unices do not set POLLERR for FIFOs but on Linux they
457 * behave exactly like pipes for poll(). 472 * behave exactly like pipes for poll().
458 */ 473 */
459 if (!PIPE_READERS(*inode)) 474 if (!pipe->readers)
460 mask |= POLLERR; 475 mask |= POLLERR;
461 } 476 }
462 477
@@ -466,17 +481,21 @@ pipe_poll(struct file *filp, poll_table *wait)
466static int 481static int
467pipe_release(struct inode *inode, int decr, int decw) 482pipe_release(struct inode *inode, int decr, int decw)
468{ 483{
469 mutex_lock(PIPE_MUTEX(*inode)); 484 struct pipe_inode_info *pipe;
470 PIPE_READERS(*inode) -= decr; 485
471 PIPE_WRITERS(*inode) -= decw; 486 mutex_lock(&inode->i_mutex);
472 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) { 487 pipe = inode->i_pipe;
488 pipe->readers -= decr;
489 pipe->writers -= decw;
490
491 if (!pipe->readers && !pipe->writers) {
473 free_pipe_info(inode); 492 free_pipe_info(inode);
474 } else { 493 } else {
475 wake_up_interruptible(PIPE_WAIT(*inode)); 494 wake_up_interruptible(&pipe->wait);
476 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 495 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
477 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 496 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
478 } 497 }
479 mutex_unlock(PIPE_MUTEX(*inode)); 498 mutex_unlock(&inode->i_mutex);
480 499
481 return 0; 500 return 0;
482} 501}
@@ -487,9 +506,9 @@ pipe_read_fasync(int fd, struct file *filp, int on)
487 struct inode *inode = filp->f_dentry->d_inode; 506 struct inode *inode = filp->f_dentry->d_inode;
488 int retval; 507 int retval;
489 508
490 mutex_lock(PIPE_MUTEX(*inode)); 509 mutex_lock(&inode->i_mutex);
491 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode)); 510 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
492 mutex_unlock(PIPE_MUTEX(*inode)); 511 mutex_unlock(&inode->i_mutex);
493 512
494 if (retval < 0) 513 if (retval < 0)
495 return retval; 514 return retval;
@@ -504,9 +523,9 @@ pipe_write_fasync(int fd, struct file *filp, int on)
504 struct inode *inode = filp->f_dentry->d_inode; 523 struct inode *inode = filp->f_dentry->d_inode;
505 int retval; 524 int retval;
506 525
507 mutex_lock(PIPE_MUTEX(*inode)); 526 mutex_lock(&inode->i_mutex);
508 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode)); 527 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
509 mutex_unlock(PIPE_MUTEX(*inode)); 528 mutex_unlock(&inode->i_mutex);
510 529
511 if (retval < 0) 530 if (retval < 0)
512 return retval; 531 return retval;
@@ -519,16 +538,17 @@ static int
519pipe_rdwr_fasync(int fd, struct file *filp, int on) 538pipe_rdwr_fasync(int fd, struct file *filp, int on)
520{ 539{
521 struct inode *inode = filp->f_dentry->d_inode; 540 struct inode *inode = filp->f_dentry->d_inode;
541 struct pipe_inode_info *pipe = inode->i_pipe;
522 int retval; 542 int retval;
523 543
524 mutex_lock(PIPE_MUTEX(*inode)); 544 mutex_lock(&inode->i_mutex);
525 545
526 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode)); 546 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
527 547
528 if (retval >= 0) 548 if (retval >= 0)
529 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode)); 549 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
530 550
531 mutex_unlock(PIPE_MUTEX(*inode)); 551 mutex_unlock(&inode->i_mutex);
532 552
533 if (retval < 0) 553 if (retval < 0)
534 return retval; 554 return retval;
@@ -567,9 +587,9 @@ pipe_read_open(struct inode *inode, struct file *filp)
567{ 587{
568 /* We could have perhaps used atomic_t, but this and friends 588 /* We could have perhaps used atomic_t, but this and friends
569 below are the only places. So it doesn't seem worthwhile. */ 589 below are the only places. So it doesn't seem worthwhile. */
570 mutex_lock(PIPE_MUTEX(*inode)); 590 mutex_lock(&inode->i_mutex);
571 PIPE_READERS(*inode)++; 591 inode->i_pipe->readers++;
572 mutex_unlock(PIPE_MUTEX(*inode)); 592 mutex_unlock(&inode->i_mutex);
573 593
574 return 0; 594 return 0;
575} 595}
@@ -577,9 +597,9 @@ pipe_read_open(struct inode *inode, struct file *filp)
577static int 597static int
578pipe_write_open(struct inode *inode, struct file *filp) 598pipe_write_open(struct inode *inode, struct file *filp)
579{ 599{
580 mutex_lock(PIPE_MUTEX(*inode)); 600 mutex_lock(&inode->i_mutex);
581 PIPE_WRITERS(*inode)++; 601 inode->i_pipe->writers++;
582 mutex_unlock(PIPE_MUTEX(*inode)); 602 mutex_unlock(&inode->i_mutex);
583 603
584 return 0; 604 return 0;
585} 605}
@@ -587,12 +607,12 @@ pipe_write_open(struct inode *inode, struct file *filp)
587static int 607static int
588pipe_rdwr_open(struct inode *inode, struct file *filp) 608pipe_rdwr_open(struct inode *inode, struct file *filp)
589{ 609{
590 mutex_lock(PIPE_MUTEX(*inode)); 610 mutex_lock(&inode->i_mutex);
591 if (filp->f_mode & FMODE_READ) 611 if (filp->f_mode & FMODE_READ)
592 PIPE_READERS(*inode)++; 612 inode->i_pipe->readers++;
593 if (filp->f_mode & FMODE_WRITE) 613 if (filp->f_mode & FMODE_WRITE)
594 PIPE_WRITERS(*inode)++; 614 inode->i_pipe->writers++;
595 mutex_unlock(PIPE_MUTEX(*inode)); 615 mutex_unlock(&inode->i_mutex);
596 616
597 return 0; 617 return 0;
598} 618}
@@ -675,37 +695,38 @@ static struct file_operations rdwr_pipe_fops = {
675 .fasync = pipe_rdwr_fasync, 695 .fasync = pipe_rdwr_fasync,
676}; 696};
677 697
678void free_pipe_info(struct inode *inode) 698struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
699{
700 struct pipe_inode_info *pipe;
701
702 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
703 if (pipe) {
704 init_waitqueue_head(&pipe->wait);
705 pipe->r_counter = pipe->w_counter = 1;
706 pipe->inode = inode;
707 }
708
709 return pipe;
710}
711
712void __free_pipe_info(struct pipe_inode_info *pipe)
679{ 713{
680 int i; 714 int i;
681 struct pipe_inode_info *info = inode->i_pipe;
682 715
683 inode->i_pipe = NULL;
684 for (i = 0; i < PIPE_BUFFERS; i++) { 716 for (i = 0; i < PIPE_BUFFERS; i++) {
685 struct pipe_buffer *buf = info->bufs + i; 717 struct pipe_buffer *buf = pipe->bufs + i;
686 if (buf->ops) 718 if (buf->ops)
687 buf->ops->release(info, buf); 719 buf->ops->release(pipe, buf);
688 } 720 }
689 if (info->tmp_page) 721 if (pipe->tmp_page)
690 __free_page(info->tmp_page); 722 __free_page(pipe->tmp_page);
691 kfree(info); 723 kfree(pipe);
692} 724}
693 725
694struct inode* pipe_new(struct inode* inode) 726void free_pipe_info(struct inode *inode)
695{ 727{
696 struct pipe_inode_info *info; 728 __free_pipe_info(inode->i_pipe);
697 729 inode->i_pipe = NULL;
698 info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
699 if (!info)
700 goto fail_page;
701 inode->i_pipe = info;
702
703 init_waitqueue_head(PIPE_WAIT(*inode));
704 PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
705
706 return inode;
707fail_page:
708 return NULL;
709} 730}
710 731
711static struct vfsmount *pipe_mnt __read_mostly; 732static struct vfsmount *pipe_mnt __read_mostly;
@@ -713,6 +734,7 @@ static int pipefs_delete_dentry(struct dentry *dentry)
713{ 734{
714 return 1; 735 return 1;
715} 736}
737
716static struct dentry_operations pipefs_dentry_operations = { 738static struct dentry_operations pipefs_dentry_operations = {
717 .d_delete = pipefs_delete_dentry, 739 .d_delete = pipefs_delete_dentry,
718}; 740};
@@ -720,13 +742,17 @@ static struct dentry_operations pipefs_dentry_operations = {
720static struct inode * get_pipe_inode(void) 742static struct inode * get_pipe_inode(void)
721{ 743{
722 struct inode *inode = new_inode(pipe_mnt->mnt_sb); 744 struct inode *inode = new_inode(pipe_mnt->mnt_sb);
745 struct pipe_inode_info *pipe;
723 746
724 if (!inode) 747 if (!inode)
725 goto fail_inode; 748 goto fail_inode;
726 749
727 if(!pipe_new(inode)) 750 pipe = alloc_pipe_info(inode);
751 if (!pipe)
728 goto fail_iput; 752 goto fail_iput;
729 PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1; 753 inode->i_pipe = pipe;
754
755 pipe->readers = pipe->writers = 1;
730 inode->i_fop = &rdwr_pipe_fops; 756 inode->i_fop = &rdwr_pipe_fops;
731 757
732 /* 758 /*
@@ -741,10 +767,12 @@ static struct inode * get_pipe_inode(void)
741 inode->i_gid = current->fsgid; 767 inode->i_gid = current->fsgid;
742 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 768 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
743 inode->i_blksize = PAGE_SIZE; 769 inode->i_blksize = PAGE_SIZE;
770
744 return inode; 771 return inode;
745 772
746fail_iput: 773fail_iput:
747 iput(inode); 774 iput(inode);
775
748fail_inode: 776fail_inode:
749 return NULL; 777 return NULL;
750} 778}
@@ -757,7 +785,7 @@ int do_pipe(int *fd)
757 struct inode * inode; 785 struct inode * inode;
758 struct file *f1, *f2; 786 struct file *f1, *f2;
759 int error; 787 int error;
760 int i,j; 788 int i, j;
761 789
762 error = -ENFILE; 790 error = -ENFILE;
763 f1 = get_empty_filp(); 791 f1 = get_empty_filp();
@@ -790,6 +818,7 @@ int do_pipe(int *fd)
790 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this); 818 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
791 if (!dentry) 819 if (!dentry)
792 goto close_f12_inode_i_j; 820 goto close_f12_inode_i_j;
821
793 dentry->d_op = &pipefs_dentry_operations; 822 dentry->d_op = &pipefs_dentry_operations;
794 d_add(dentry, inode); 823 d_add(dentry, inode);
795 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt)); 824 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
@@ -813,6 +842,7 @@ int do_pipe(int *fd)
813 fd_install(j, f2); 842 fd_install(j, f2);
814 fd[0] = i; 843 fd[0] = i;
815 fd[1] = j; 844 fd[1] = j;
845
816 return 0; 846 return 0;
817 847
818close_f12_inode_i_j: 848close_f12_inode_i_j:
@@ -837,8 +867,9 @@ no_files:
837 * d_name - pipe: will go nicely and kill the special-casing in procfs. 867 * d_name - pipe: will go nicely and kill the special-casing in procfs.
838 */ 868 */
839 869
840static struct super_block *pipefs_get_sb(struct file_system_type *fs_type, 870static struct super_block *
841 int flags, const char *dev_name, void *data) 871pipefs_get_sb(struct file_system_type *fs_type, int flags,
872 const char *dev_name, void *data)
842{ 873{
843 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC); 874 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
844} 875}
@@ -852,6 +883,7 @@ static struct file_system_type pipe_fs_type = {
852static int __init init_pipe_fs(void) 883static int __init init_pipe_fs(void)
853{ 884{
854 int err = register_filesystem(&pipe_fs_type); 885 int err = register_filesystem(&pipe_fs_type);
886
855 if (!err) { 887 if (!err) {
856 pipe_mnt = kern_mount(&pipe_fs_type); 888 pipe_mnt = kern_mount(&pipe_fs_type);
857 if (IS_ERR(pipe_mnt)) { 889 if (IS_ERR(pipe_mnt)) {
diff --git a/fs/read_write.c b/fs/read_write.c
index 6256ca81a718..5bc0e9234f9d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -202,7 +202,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
202 goto Einval; 202 goto Einval;
203 203
204 inode = file->f_dentry->d_inode; 204 inode = file->f_dentry->d_inode;
205 if (inode->i_flock && MANDATORY_LOCK(inode)) { 205 if (unlikely(inode->i_flock && MANDATORY_LOCK(inode))) {
206 int retval = locks_mandatory_area( 206 int retval = locks_mandatory_area(
207 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 207 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
208 inode, file, pos, count); 208 inode, file, pos, count);
diff --git a/fs/splice.c b/fs/splice.c
index bfa42a277bb8..e50a460239dd 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -9,11 +9,12 @@
9 * that transfers data buffers to or from a pipe buffer. 9 * that transfers data buffers to or from a pipe buffer.
10 * 10 *
11 * Named by Larry McVoy, original implementation from Linus, extended by 11 * Named by Larry McVoy, original implementation from Linus, extended by
12 * Jens to support splicing to files and fixing the initial implementation 12 * Jens to support splicing to files, network, direct splicing, etc and
13 * bugs. 13 * fixing lots of bugs.
14 * 14 *
15 * Copyright (C) 2005 Jens Axboe <axboe@suse.de> 15 * Copyright (C) 2005-2006 Jens Axboe <axboe@suse.de>
16 * Copyright (C) 2005 Linus Torvalds <torvalds@osdl.org> 16 * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org>
17 * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu>
17 * 18 *
18 */ 19 */
19#include <linux/fs.h> 20#include <linux/fs.h>
@@ -84,26 +85,43 @@ static void *page_cache_pipe_buf_map(struct file *file,
84 struct pipe_buffer *buf) 85 struct pipe_buffer *buf)
85{ 86{
86 struct page *page = buf->page; 87 struct page *page = buf->page;
87 88 int err;
88 lock_page(page);
89 89
90 if (!PageUptodate(page)) { 90 if (!PageUptodate(page)) {
91 unlock_page(page); 91 lock_page(page);
92 return ERR_PTR(-EIO); 92
93 } 93 /*
94 * Page got truncated/unhashed. This will cause a 0-byte
95 * splice, if this is the first page.
96 */
97 if (!page->mapping) {
98 err = -ENODATA;
99 goto error;
100 }
94 101
95 if (!page->mapping) { 102 /*
103 * Uh oh, read-error from disk.
104 */
105 if (!PageUptodate(page)) {
106 err = -EIO;
107 goto error;
108 }
109
110 /*
111 * Page is ok afterall, fall through to mapping.
112 */
96 unlock_page(page); 113 unlock_page(page);
97 return ERR_PTR(-ENODATA);
98 } 114 }
99 115
100 return kmap(buf->page); 116 return kmap(page);
117error:
118 unlock_page(page);
119 return ERR_PTR(err);
101} 120}
102 121
103static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, 122static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
104 struct pipe_buffer *buf) 123 struct pipe_buffer *buf)
105{ 124{
106 unlock_page(buf->page);
107 kunmap(buf->page); 125 kunmap(buf->page);
108} 126}
109 127
@@ -119,34 +137,30 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = {
119 * Pipe output worker. This sets up our pipe format with the page cache 137 * Pipe output worker. This sets up our pipe format with the page cache
120 * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 138 * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
121 */ 139 */
122static ssize_t move_to_pipe(struct inode *inode, struct page **pages, 140static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
123 int nr_pages, unsigned long offset, 141 int nr_pages, unsigned long offset,
124 unsigned long len, unsigned int flags) 142 unsigned long len, unsigned int flags)
125{ 143{
126 struct pipe_inode_info *info;
127 int ret, do_wakeup, i; 144 int ret, do_wakeup, i;
128 145
129 ret = 0; 146 ret = 0;
130 do_wakeup = 0; 147 do_wakeup = 0;
131 i = 0; 148 i = 0;
132 149
133 mutex_lock(PIPE_MUTEX(*inode)); 150 if (pipe->inode)
151 mutex_lock(&pipe->inode->i_mutex);
134 152
135 info = inode->i_pipe;
136 for (;;) { 153 for (;;) {
137 int bufs; 154 if (!pipe->readers) {
138
139 if (!PIPE_READERS(*inode)) {
140 send_sig(SIGPIPE, current, 0); 155 send_sig(SIGPIPE, current, 0);
141 if (!ret) 156 if (!ret)
142 ret = -EPIPE; 157 ret = -EPIPE;
143 break; 158 break;
144 } 159 }
145 160
146 bufs = info->nrbufs; 161 if (pipe->nrbufs < PIPE_BUFFERS) {
147 if (bufs < PIPE_BUFFERS) { 162 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1);
148 int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS - 1); 163 struct pipe_buffer *buf = pipe->bufs + newbuf;
149 struct pipe_buffer *buf = info->bufs + newbuf;
150 struct page *page = pages[i++]; 164 struct page *page = pages[i++];
151 unsigned long this_len; 165 unsigned long this_len;
152 166
@@ -158,8 +172,9 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
158 buf->offset = offset; 172 buf->offset = offset;
159 buf->len = this_len; 173 buf->len = this_len;
160 buf->ops = &page_cache_pipe_buf_ops; 174 buf->ops = &page_cache_pipe_buf_ops;
161 info->nrbufs = ++bufs; 175 pipe->nrbufs++;
162 do_wakeup = 1; 176 if (pipe->inode)
177 do_wakeup = 1;
163 178
164 ret += this_len; 179 ret += this_len;
165 len -= this_len; 180 len -= this_len;
@@ -168,7 +183,7 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
168 break; 183 break;
169 if (!len) 184 if (!len)
170 break; 185 break;
171 if (bufs < PIPE_BUFFERS) 186 if (pipe->nrbufs < PIPE_BUFFERS)
172 continue; 187 continue;
173 188
174 break; 189 break;
@@ -187,22 +202,26 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
187 } 202 }
188 203
189 if (do_wakeup) { 204 if (do_wakeup) {
190 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 205 smp_mb();
191 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, 206 if (waitqueue_active(&pipe->wait))
192 POLL_IN); 207 wake_up_interruptible_sync(&pipe->wait);
208 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
193 do_wakeup = 0; 209 do_wakeup = 0;
194 } 210 }
195 211
196 PIPE_WAITING_WRITERS(*inode)++; 212 pipe->waiting_writers++;
197 pipe_wait(inode); 213 pipe_wait(pipe);
198 PIPE_WAITING_WRITERS(*inode)--; 214 pipe->waiting_writers--;
199 } 215 }
200 216
201 mutex_unlock(PIPE_MUTEX(*inode)); 217 if (pipe->inode)
218 mutex_unlock(&pipe->inode->i_mutex);
202 219
203 if (do_wakeup) { 220 if (do_wakeup) {
204 wake_up_interruptible(PIPE_WAIT(*inode)); 221 smp_mb();
205 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 222 if (waitqueue_active(&pipe->wait))
223 wake_up_interruptible(&pipe->wait);
224 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
206 } 225 }
207 226
208 while (i < nr_pages) 227 while (i < nr_pages)
@@ -211,15 +230,16 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
211 return ret; 230 return ret;
212} 231}
213 232
214static int __generic_file_splice_read(struct file *in, struct inode *pipe, 233static int
215 size_t len, unsigned int flags) 234__generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe,
235 size_t len, unsigned int flags)
216{ 236{
217 struct address_space *mapping = in->f_mapping; 237 struct address_space *mapping = in->f_mapping;
218 unsigned int offset, nr_pages; 238 unsigned int offset, nr_pages;
219 struct page *pages[PIPE_BUFFERS], *shadow[PIPE_BUFFERS]; 239 struct page *pages[PIPE_BUFFERS];
220 struct page *page; 240 struct page *page;
221 pgoff_t index, pidx; 241 pgoff_t index;
222 int i, j; 242 int i, error;
223 243
224 index = in->f_pos >> PAGE_CACHE_SHIFT; 244 index = in->f_pos >> PAGE_CACHE_SHIFT;
225 offset = in->f_pos & ~PAGE_CACHE_MASK; 245 offset = in->f_pos & ~PAGE_CACHE_MASK;
@@ -229,78 +249,94 @@ static int __generic_file_splice_read(struct file *in, struct inode *pipe,
229 nr_pages = PIPE_BUFFERS; 249 nr_pages = PIPE_BUFFERS;
230 250
231 /* 251 /*
232 * initiate read-ahead on this page range 252 * Initiate read-ahead on this page range. however, don't call into
233 */ 253 * read-ahead if this is a non-zero offset (we are likely doing small
234 do_page_cache_readahead(mapping, in, index, nr_pages); 254 * chunk splice and the page is already there) for a single page.
235
236 /*
237 * Get as many pages from the page cache as possible..
238 * Start IO on the page cache entries we create (we
239 * can assume that any pre-existing ones we find have
240 * already had IO started on them).
241 */ 255 */
242 i = find_get_pages(mapping, index, nr_pages, pages); 256 if (!offset || nr_pages > 1)
257 do_page_cache_readahead(mapping, in, index, nr_pages);
243 258
244 /* 259 /*
245 * common case - we found all pages and they are contiguous, 260 * Now fill in the holes:
246 * kick them off
247 */ 261 */
248 if (i && (pages[i - 1]->index == index + i - 1)) 262 error = 0;
249 goto splice_them; 263 for (i = 0; i < nr_pages; i++, index++) {
264find_page:
265 /*
266 * lookup the page for this index
267 */
268 page = find_get_page(mapping, index);
269 if (!page) {
270 /*
271 * If in nonblock mode then dont block on
272 * readpage (we've kicked readahead so there
273 * will be asynchronous progress):
274 */
275 if (flags & SPLICE_F_NONBLOCK)
276 break;
250 277
251 /* 278 /*
252 * fill shadow[] with pages at the right locations, so we only 279 * page didn't exist, allocate one
253 * have to fill holes 280 */
254 */ 281 page = page_cache_alloc_cold(mapping);
255 memset(shadow, 0, nr_pages * sizeof(struct page *)); 282 if (!page)
256 for (j = 0; j < i; j++) 283 break;
257 shadow[pages[j]->index - index] = pages[j];
258 284
259 /* 285 error = add_to_page_cache_lru(page, mapping, index,
260 * now fill in the holes 286 mapping_gfp_mask(mapping));
261 */ 287 if (unlikely(error)) {
262 for (i = 0, pidx = index; i < nr_pages; pidx++, i++) { 288 page_cache_release(page);
263 int error; 289 break;
290 }
264 291
265 if (shadow[i]) 292 goto readpage;
266 continue; 293 }
267 294
268 /* 295 /*
269 * no page there, look one up / create it 296 * If the page isn't uptodate, we may need to start io on it
270 */ 297 */
271 page = find_or_create_page(mapping, pidx, 298 if (!PageUptodate(page)) {
272 mapping_gfp_mask(mapping)); 299 lock_page(page);
273 if (!page) 300
274 break; 301 /*
302 * page was truncated, stop here. if this isn't the
303 * first page, we'll just complete what we already
304 * added
305 */
306 if (!page->mapping) {
307 unlock_page(page);
308 page_cache_release(page);
309 break;
310 }
311 /*
312 * page was already under io and is now done, great
313 */
314 if (PageUptodate(page)) {
315 unlock_page(page);
316 goto fill_it;
317 }
275 318
276 if (PageUptodate(page)) 319readpage:
277 unlock_page(page); 320 /*
278 else { 321 * need to read in the page
322 */
279 error = mapping->a_ops->readpage(in, page); 323 error = mapping->a_ops->readpage(in, page);
280 324
281 if (unlikely(error)) { 325 if (unlikely(error)) {
282 page_cache_release(page); 326 page_cache_release(page);
327 if (error == AOP_TRUNCATED_PAGE)
328 goto find_page;
283 break; 329 break;
284 } 330 }
285 } 331 }
286 shadow[i] = page; 332fill_it:
333 pages[i] = page;
287 } 334 }
288 335
289 if (!i) { 336 if (i)
290 for (i = 0; i < nr_pages; i++) { 337 return move_to_pipe(pipe, pages, i, offset, len, flags);
291 if (shadow[i])
292 page_cache_release(shadow[i]);
293 }
294 return 0;
295 }
296 338
297 memcpy(pages, shadow, i * sizeof(struct page *)); 339 return error;
298
299 /*
300 * Now we splice them into the pipe..
301 */
302splice_them:
303 return move_to_pipe(pipe, pages, i, offset, len, flags);
304} 340}
305 341
306/** 342/**
@@ -311,9 +347,8 @@ splice_them:
311 * @flags: splice modifier flags 347 * @flags: splice modifier flags
312 * 348 *
313 * Will read pages from given file and fill them into a pipe. 349 * Will read pages from given file and fill them into a pipe.
314 *
315 */ 350 */
316ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, 351ssize_t generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe,
317 size_t len, unsigned int flags) 352 size_t len, unsigned int flags)
318{ 353{
319 ssize_t spliced; 354 ssize_t spliced;
@@ -321,6 +356,7 @@ ssize_t generic_file_splice_read(struct file *in, struct inode *pipe,
321 356
322 ret = 0; 357 ret = 0;
323 spliced = 0; 358 spliced = 0;
359
324 while (len) { 360 while (len) {
325 ret = __generic_file_splice_read(in, pipe, len, flags); 361 ret = __generic_file_splice_read(in, pipe, len, flags);
326 362
@@ -360,10 +396,10 @@ static int pipe_to_sendpage(struct pipe_inode_info *info,
360 int more; 396 int more;
361 397
362 /* 398 /*
363 * sub-optimal, but we are limited by the pipe ->map. we don't 399 * Sub-optimal, but we are limited by the pipe ->map. We don't
364 * need a kmap'ed buffer here, we just want to make sure we 400 * need a kmap'ed buffer here, we just want to make sure we
365 * have the page pinned if the pipe page originates from the 401 * have the page pinned if the pipe page originates from the
366 * page cache 402 * page cache.
367 */ 403 */
368 ptr = buf->ops->map(file, info, buf); 404 ptr = buf->ops->map(file, info, buf);
369 if (IS_ERR(ptr)) 405 if (IS_ERR(ptr))
@@ -414,7 +450,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
414 int ret; 450 int ret;
415 451
416 /* 452 /*
417 * after this, page will be locked and unmapped 453 * make sure the data in this buffer is uptodate
418 */ 454 */
419 src = buf->ops->map(file, info, buf); 455 src = buf->ops->map(file, info, buf);
420 if (IS_ERR(src)) 456 if (IS_ERR(src))
@@ -424,7 +460,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
424 offset = sd->pos & ~PAGE_CACHE_MASK; 460 offset = sd->pos & ~PAGE_CACHE_MASK;
425 461
426 /* 462 /*
427 * reuse buf page, if SPLICE_F_MOVE is set 463 * Reuse buf page, if SPLICE_F_MOVE is set.
428 */ 464 */
429 if (sd->flags & SPLICE_F_MOVE) { 465 if (sd->flags & SPLICE_F_MOVE) {
430 /* 466 /*
@@ -434,6 +470,9 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
434 if (buf->ops->steal(info, buf)) 470 if (buf->ops->steal(info, buf))
435 goto find_page; 471 goto find_page;
436 472
473 /*
474 * this will also set the page locked
475 */
437 page = buf->page; 476 page = buf->page;
438 if (add_to_page_cache(page, mapping, index, gfp_mask)) 477 if (add_to_page_cache(page, mapping, index, gfp_mask))
439 goto find_page; 478 goto find_page;
@@ -445,7 +484,7 @@ find_page:
445 ret = -ENOMEM; 484 ret = -ENOMEM;
446 page = find_or_create_page(mapping, index, gfp_mask); 485 page = find_or_create_page(mapping, index, gfp_mask);
447 if (!page) 486 if (!page)
448 goto out; 487 goto out_nomem;
449 488
450 /* 489 /*
451 * If the page is uptodate, it is also locked. If it isn't 490 * If the page is uptodate, it is also locked. If it isn't
@@ -462,7 +501,7 @@ find_page:
462 501
463 if (!PageUptodate(page)) { 502 if (!PageUptodate(page)) {
464 /* 503 /*
465 * page got invalidated, repeat 504 * Page got invalidated, repeat.
466 */ 505 */
467 if (!page->mapping) { 506 if (!page->mapping) {
468 unlock_page(page); 507 unlock_page(page);
@@ -501,12 +540,14 @@ find_page:
501 } else if (ret) 540 } else if (ret)
502 goto out; 541 goto out;
503 542
543 mark_page_accessed(page);
504 balance_dirty_pages_ratelimited(mapping); 544 balance_dirty_pages_ratelimited(mapping);
505out: 545out:
506 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 546 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) {
507 page_cache_release(page); 547 page_cache_release(page);
508 unlock_page(page); 548 unlock_page(page);
509 } 549 }
550out_nomem:
510 buf->ops->unmap(info, buf); 551 buf->ops->unmap(info, buf);
511 return ret; 552 return ret;
512} 553}
@@ -519,11 +560,10 @@ typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
519 * key here is the 'actor' worker passed in that actually moves the data 560 * key here is the 'actor' worker passed in that actually moves the data
520 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 561 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
521 */ 562 */
522static ssize_t move_from_pipe(struct inode *inode, struct file *out, 563static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
523 size_t len, unsigned int flags, 564 size_t len, unsigned int flags,
524 splice_actor *actor) 565 splice_actor *actor)
525{ 566{
526 struct pipe_inode_info *info;
527 int ret, do_wakeup, err; 567 int ret, do_wakeup, err;
528 struct splice_desc sd; 568 struct splice_desc sd;
529 569
@@ -535,22 +575,19 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
535 sd.file = out; 575 sd.file = out;
536 sd.pos = out->f_pos; 576 sd.pos = out->f_pos;
537 577
538 mutex_lock(PIPE_MUTEX(*inode)); 578 if (pipe->inode)
579 mutex_lock(&pipe->inode->i_mutex);
539 580
540 info = inode->i_pipe;
541 for (;;) { 581 for (;;) {
542 int bufs = info->nrbufs; 582 if (pipe->nrbufs) {
543 583 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
544 if (bufs) {
545 int curbuf = info->curbuf;
546 struct pipe_buffer *buf = info->bufs + curbuf;
547 struct pipe_buf_operations *ops = buf->ops; 584 struct pipe_buf_operations *ops = buf->ops;
548 585
549 sd.len = buf->len; 586 sd.len = buf->len;
550 if (sd.len > sd.total_len) 587 if (sd.len > sd.total_len)
551 sd.len = sd.total_len; 588 sd.len = sd.total_len;
552 589
553 err = actor(info, buf, &sd); 590 err = actor(pipe, buf, &sd);
554 if (err) { 591 if (err) {
555 if (!ret && err != -ENODATA) 592 if (!ret && err != -ENODATA)
556 ret = err; 593 ret = err;
@@ -561,13 +598,14 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
561 ret += sd.len; 598 ret += sd.len;
562 buf->offset += sd.len; 599 buf->offset += sd.len;
563 buf->len -= sd.len; 600 buf->len -= sd.len;
601
564 if (!buf->len) { 602 if (!buf->len) {
565 buf->ops = NULL; 603 buf->ops = NULL;
566 ops->release(info, buf); 604 ops->release(pipe, buf);
567 curbuf = (curbuf + 1) & (PIPE_BUFFERS - 1); 605 pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
568 info->curbuf = curbuf; 606 pipe->nrbufs--;
569 info->nrbufs = --bufs; 607 if (pipe->inode)
570 do_wakeup = 1; 608 do_wakeup = 1;
571 } 609 }
572 610
573 sd.pos += sd.len; 611 sd.pos += sd.len;
@@ -576,11 +614,11 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
576 break; 614 break;
577 } 615 }
578 616
579 if (bufs) 617 if (pipe->nrbufs)
580 continue; 618 continue;
581 if (!PIPE_WRITERS(*inode)) 619 if (!pipe->writers)
582 break; 620 break;
583 if (!PIPE_WAITING_WRITERS(*inode)) { 621 if (!pipe->waiting_writers) {
584 if (ret) 622 if (ret)
585 break; 623 break;
586 } 624 }
@@ -598,31 +636,34 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
598 } 636 }
599 637
600 if (do_wakeup) { 638 if (do_wakeup) {
601 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 639 smp_mb();
602 kill_fasync(PIPE_FASYNC_WRITERS(*inode),SIGIO,POLL_OUT); 640 if (waitqueue_active(&pipe->wait))
641 wake_up_interruptible_sync(&pipe->wait);
642 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
603 do_wakeup = 0; 643 do_wakeup = 0;
604 } 644 }
605 645
606 pipe_wait(inode); 646 pipe_wait(pipe);
607 } 647 }
608 648
609 mutex_unlock(PIPE_MUTEX(*inode)); 649 if (pipe->inode)
650 mutex_unlock(&pipe->inode->i_mutex);
610 651
611 if (do_wakeup) { 652 if (do_wakeup) {
612 wake_up_interruptible(PIPE_WAIT(*inode)); 653 smp_mb();
613 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 654 if (waitqueue_active(&pipe->wait))
655 wake_up_interruptible(&pipe->wait);
656 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
614 } 657 }
615 658
616 mutex_lock(&out->f_mapping->host->i_mutex);
617 out->f_pos = sd.pos; 659 out->f_pos = sd.pos;
618 mutex_unlock(&out->f_mapping->host->i_mutex);
619 return ret; 660 return ret;
620 661
621} 662}
622 663
623/** 664/**
624 * generic_file_splice_write - splice data from a pipe to a file 665 * generic_file_splice_write - splice data from a pipe to a file
625 * @inode: pipe inode 666 * @pipe: pipe info
626 * @out: file to write to 667 * @out: file to write to
627 * @len: number of bytes to splice 668 * @len: number of bytes to splice
628 * @flags: splice modifier flags 669 * @flags: splice modifier flags
@@ -631,14 +672,17 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
631 * the given pipe inode to the given file. 672 * the given pipe inode to the given file.
632 * 673 *
633 */ 674 */
634ssize_t generic_file_splice_write(struct inode *inode, struct file *out, 675ssize_t
635 size_t len, unsigned int flags) 676generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
677 size_t len, unsigned int flags)
636{ 678{
637 struct address_space *mapping = out->f_mapping; 679 struct address_space *mapping = out->f_mapping;
638 ssize_t ret = move_from_pipe(inode, out, len, flags, pipe_to_file); 680 ssize_t ret;
681
682 ret = move_from_pipe(pipe, out, len, flags, pipe_to_file);
639 683
640 /* 684 /*
641 * if file or inode is SYNC and we actually wrote some data, sync it 685 * If file or inode is SYNC and we actually wrote some data, sync it.
642 */ 686 */
643 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host)) 687 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host))
644 && ret > 0) { 688 && ret > 0) {
@@ -647,7 +691,7 @@ ssize_t generic_file_splice_write(struct inode *inode, struct file *out,
647 691
648 mutex_lock(&inode->i_mutex); 692 mutex_lock(&inode->i_mutex);
649 err = generic_osync_inode(mapping->host, mapping, 693 err = generic_osync_inode(mapping->host, mapping,
650 OSYNC_METADATA|OSYNC_DATA); 694 OSYNC_METADATA|OSYNC_DATA);
651 mutex_unlock(&inode->i_mutex); 695 mutex_unlock(&inode->i_mutex);
652 696
653 if (err) 697 if (err)
@@ -670,10 +714,10 @@ EXPORT_SYMBOL(generic_file_splice_write);
670 * is involved. 714 * is involved.
671 * 715 *
672 */ 716 */
673ssize_t generic_splice_sendpage(struct inode *inode, struct file *out, 717ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
674 size_t len, unsigned int flags) 718 size_t len, unsigned int flags)
675{ 719{
676 return move_from_pipe(inode, out, len, flags, pipe_to_sendpage); 720 return move_from_pipe(pipe, out, len, flags, pipe_to_sendpage);
677} 721}
678 722
679EXPORT_SYMBOL(generic_splice_sendpage); 723EXPORT_SYMBOL(generic_splice_sendpage);
@@ -681,19 +725,20 @@ EXPORT_SYMBOL(generic_splice_sendpage);
681/* 725/*
682 * Attempt to initiate a splice from pipe to file. 726 * Attempt to initiate a splice from pipe to file.
683 */ 727 */
684static long do_splice_from(struct inode *pipe, struct file *out, size_t len, 728static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
685 unsigned int flags) 729 size_t len, unsigned int flags)
686{ 730{
687 loff_t pos; 731 loff_t pos;
688 int ret; 732 int ret;
689 733
690 if (!out->f_op || !out->f_op->splice_write) 734 if (unlikely(!out->f_op || !out->f_op->splice_write))
691 return -EINVAL; 735 return -EINVAL;
692 736
693 if (!(out->f_mode & FMODE_WRITE)) 737 if (unlikely(!(out->f_mode & FMODE_WRITE)))
694 return -EBADF; 738 return -EBADF;
695 739
696 pos = out->f_pos; 740 pos = out->f_pos;
741
697 ret = rw_verify_area(WRITE, out, &pos, len); 742 ret = rw_verify_area(WRITE, out, &pos, len);
698 if (unlikely(ret < 0)) 743 if (unlikely(ret < 0))
699 return ret; 744 return ret;
@@ -704,19 +749,20 @@ static long do_splice_from(struct inode *pipe, struct file *out, size_t len,
704/* 749/*
705 * Attempt to initiate a splice from a file to a pipe. 750 * Attempt to initiate a splice from a file to a pipe.
706 */ 751 */
707static long do_splice_to(struct file *in, struct inode *pipe, size_t len, 752static long do_splice_to(struct file *in, struct pipe_inode_info *pipe,
708 unsigned int flags) 753 size_t len, unsigned int flags)
709{ 754{
710 loff_t pos, isize, left; 755 loff_t pos, isize, left;
711 int ret; 756 int ret;
712 757
713 if (!in->f_op || !in->f_op->splice_read) 758 if (unlikely(!in->f_op || !in->f_op->splice_read))
714 return -EINVAL; 759 return -EINVAL;
715 760
716 if (!(in->f_mode & FMODE_READ)) 761 if (unlikely(!(in->f_mode & FMODE_READ)))
717 return -EBADF; 762 return -EBADF;
718 763
719 pos = in->f_pos; 764 pos = in->f_pos;
765
720 ret = rw_verify_area(READ, in, &pos, len); 766 ret = rw_verify_area(READ, in, &pos, len);
721 if (unlikely(ret < 0)) 767 if (unlikely(ret < 0))
722 return ret; 768 return ret;
@@ -726,32 +772,168 @@ static long do_splice_to(struct file *in, struct inode *pipe, size_t len,
726 return 0; 772 return 0;
727 773
728 left = isize - in->f_pos; 774 left = isize - in->f_pos;
729 if (left < len) 775 if (unlikely(left < len))
730 len = left; 776 len = left;
731 777
732 return in->f_op->splice_read(in, pipe, len, flags); 778 return in->f_op->splice_read(in, pipe, len, flags);
733} 779}
734 780
781long do_splice_direct(struct file *in, struct file *out, size_t len,
782 unsigned int flags)
783{
784 struct pipe_inode_info *pipe;
785 long ret, bytes;
786 umode_t i_mode;
787 int i;
788
789 /*
790 * We require the input being a regular file, as we don't want to
791 * randomly drop data for eg socket -> socket splicing. Use the
792 * piped splicing for that!
793 */
794 i_mode = in->f_dentry->d_inode->i_mode;
795 if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode)))
796 return -EINVAL;
797
798 /*
799 * neither in nor out is a pipe, setup an internal pipe attached to
800 * 'out' and transfer the wanted data from 'in' to 'out' through that
801 */
802 pipe = current->splice_pipe;
803 if (unlikely(!pipe)) {
804 pipe = alloc_pipe_info(NULL);
805 if (!pipe)
806 return -ENOMEM;
807
808 /*
809 * We don't have an immediate reader, but we'll read the stuff
810 * out of the pipe right after the move_to_pipe(). So set
811 * PIPE_READERS appropriately.
812 */
813 pipe->readers = 1;
814
815 current->splice_pipe = pipe;
816 }
817
818 /*
819 * Do the splice.
820 */
821 ret = 0;
822 bytes = 0;
823
824 while (len) {
825 size_t read_len, max_read_len;
826
827 /*
828 * Do at most PIPE_BUFFERS pages worth of transfer:
829 */
830 max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
831
832 ret = do_splice_to(in, pipe, max_read_len, flags);
833 if (unlikely(ret < 0))
834 goto out_release;
835
836 read_len = ret;
837
838 /*
839 * NOTE: nonblocking mode only applies to the input. We
840 * must not do the output in nonblocking mode as then we
841 * could get stuck data in the internal pipe:
842 */
843 ret = do_splice_from(pipe, out, read_len,
844 flags & ~SPLICE_F_NONBLOCK);
845 if (unlikely(ret < 0))
846 goto out_release;
847
848 bytes += ret;
849 len -= ret;
850
851 /*
852 * In nonblocking mode, if we got back a short read then
853 * that was due to either an IO error or due to the
854 * pagecache entry not being there. In the IO error case
855 * the _next_ splice attempt will produce a clean IO error
856 * return value (not a short read), so in both cases it's
857 * correct to break out of the loop here:
858 */
859 if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len))
860 break;
861 }
862
863 pipe->nrbufs = pipe->curbuf = 0;
864
865 return bytes;
866
867out_release:
868 /*
869 * If we did an incomplete transfer we must release
870 * the pipe buffers in question:
871 */
872 for (i = 0; i < PIPE_BUFFERS; i++) {
873 struct pipe_buffer *buf = pipe->bufs + i;
874
875 if (buf->ops) {
876 buf->ops->release(pipe, buf);
877 buf->ops = NULL;
878 }
879 }
880 pipe->nrbufs = pipe->curbuf = 0;
881
882 /*
883 * If we transferred some data, return the number of bytes:
884 */
885 if (bytes > 0)
886 return bytes;
887
888 return ret;
889}
890
891EXPORT_SYMBOL(do_splice_direct);
892
735/* 893/*
736 * Determine where to splice to/from. 894 * Determine where to splice to/from.
737 */ 895 */
738static long do_splice(struct file *in, struct file *out, size_t len, 896static long do_splice(struct file *in, loff_t __user *off_in,
739 unsigned int flags) 897 struct file *out, loff_t __user *off_out,
898 size_t len, unsigned int flags)
740{ 899{
741 struct inode *pipe; 900 struct pipe_inode_info *pipe;
901
902 pipe = in->f_dentry->d_inode->i_pipe;
903 if (pipe) {
904 if (off_in)
905 return -ESPIPE;
906 if (off_out) {
907 if (out->f_op->llseek == no_llseek)
908 return -EINVAL;
909 if (copy_from_user(&out->f_pos, off_out,
910 sizeof(loff_t)))
911 return -EFAULT;
912 }
742 913
743 pipe = in->f_dentry->d_inode;
744 if (pipe->i_pipe)
745 return do_splice_from(pipe, out, len, flags); 914 return do_splice_from(pipe, out, len, flags);
915 }
916
917 pipe = out->f_dentry->d_inode->i_pipe;
918 if (pipe) {
919 if (off_out)
920 return -ESPIPE;
921 if (off_in) {
922 if (in->f_op->llseek == no_llseek)
923 return -EINVAL;
924 if (copy_from_user(&in->f_pos, off_in, sizeof(loff_t)))
925 return -EFAULT;
926 }
746 927
747 pipe = out->f_dentry->d_inode;
748 if (pipe->i_pipe)
749 return do_splice_to(in, pipe, len, flags); 928 return do_splice_to(in, pipe, len, flags);
929 }
750 930
751 return -EINVAL; 931 return -EINVAL;
752} 932}
753 933
754asmlinkage long sys_splice(int fdin, int fdout, size_t len, unsigned int flags) 934asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
935 int fd_out, loff_t __user *off_out,
936 size_t len, unsigned int flags)
755{ 937{
756 long error; 938 long error;
757 struct file *in, *out; 939 struct file *in, *out;
@@ -761,13 +943,15 @@ asmlinkage long sys_splice(int fdin, int fdout, size_t len, unsigned int flags)
761 return 0; 943 return 0;
762 944
763 error = -EBADF; 945 error = -EBADF;
764 in = fget_light(fdin, &fput_in); 946 in = fget_light(fd_in, &fput_in);
765 if (in) { 947 if (in) {
766 if (in->f_mode & FMODE_READ) { 948 if (in->f_mode & FMODE_READ) {
767 out = fget_light(fdout, &fput_out); 949 out = fget_light(fd_out, &fput_out);
768 if (out) { 950 if (out) {
769 if (out->f_mode & FMODE_WRITE) 951 if (out->f_mode & FMODE_WRITE)
770 error = do_splice(in, out, len, flags); 952 error = do_splice(in, off_in,
953 out, off_out,
954 len, flags);
771 fput_light(out, fput_out); 955 fput_light(out, fput_out);
772 } 956 }
773 } 957 }
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index ae4c4754ed31..269721af02f3 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -252,7 +252,7 @@ xfs_file_sendfile_invis(
252STATIC ssize_t 252STATIC ssize_t
253xfs_file_splice_read( 253xfs_file_splice_read(
254 struct file *infilp, 254 struct file *infilp,
255 struct inode *pipe, 255 struct pipe_inode_info *pipe,
256 size_t len, 256 size_t len,
257 unsigned int flags) 257 unsigned int flags)
258{ 258{
@@ -266,7 +266,7 @@ xfs_file_splice_read(
266STATIC ssize_t 266STATIC ssize_t
267xfs_file_splice_read_invis( 267xfs_file_splice_read_invis(
268 struct file *infilp, 268 struct file *infilp,
269 struct inode *pipe, 269 struct pipe_inode_info *pipe,
270 size_t len, 270 size_t len,
271 unsigned int flags) 271 unsigned int flags)
272{ 272{
@@ -279,7 +279,7 @@ xfs_file_splice_read_invis(
279 279
280STATIC ssize_t 280STATIC ssize_t
281xfs_file_splice_write( 281xfs_file_splice_write(
282 struct inode *pipe, 282 struct pipe_inode_info *pipe,
283 struct file *outfilp, 283 struct file *outfilp,
284 size_t len, 284 size_t len,
285 unsigned int flags) 285 unsigned int flags)
@@ -293,7 +293,7 @@ xfs_file_splice_write(
293 293
294STATIC ssize_t 294STATIC ssize_t
295xfs_file_splice_write_invis( 295xfs_file_splice_write_invis(
296 struct inode *pipe, 296 struct pipe_inode_info *pipe,
297 struct file *outfilp, 297 struct file *outfilp,
298 size_t len, 298 size_t len,
299 unsigned int flags) 299 unsigned int flags)
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 90cd314acbaa..74a52937f208 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -338,7 +338,7 @@ ssize_t
338xfs_splice_read( 338xfs_splice_read(
339 bhv_desc_t *bdp, 339 bhv_desc_t *bdp,
340 struct file *infilp, 340 struct file *infilp,
341 struct inode *pipe, 341 struct pipe_inode_info *pipe,
342 size_t count, 342 size_t count,
343 int flags, 343 int flags,
344 int ioflags, 344 int ioflags,
@@ -380,7 +380,7 @@ xfs_splice_read(
380ssize_t 380ssize_t
381xfs_splice_write( 381xfs_splice_write(
382 bhv_desc_t *bdp, 382 bhv_desc_t *bdp,
383 struct inode *pipe, 383 struct pipe_inode_info *pipe,
384 struct file *outfilp, 384 struct file *outfilp,
385 size_t count, 385 size_t count,
386 int flags, 386 int flags,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index eaa5659713fb..55c689a86ad2 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -94,9 +94,9 @@ extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
94 loff_t *, int, size_t, read_actor_t, 94 loff_t *, int, size_t, read_actor_t,
95 void *, struct cred *); 95 void *, struct cred *);
96extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, 96extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *,
97 struct inode *, size_t, int, int, 97 struct pipe_inode_info *, size_t, int, int,
98 struct cred *); 98 struct cred *);
99extern ssize_t xfs_splice_write(struct bhv_desc *, struct inode *, 99extern ssize_t xfs_splice_write(struct bhv_desc *, struct pipe_inode_info *,
100 struct file *, size_t, int, int, 100 struct file *, size_t, int, int,
101 struct cred *); 101 struct cred *);
102 102
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 6f1c79a28f8b..88b09f186289 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -174,9 +174,9 @@ typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
174 loff_t *, int, size_t, read_actor_t, 174 loff_t *, int, size_t, read_actor_t,
175 void *, struct cred *); 175 void *, struct cred *);
176typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, 176typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *,
177 struct inode *, size_t, int, int, 177 struct pipe_inode_info *, size_t, int, int,
178 struct cred *); 178 struct cred *);
179typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct inode *, 179typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct pipe_inode_info *,
180 struct file *, size_t, int, int, 180 struct file *, size_t, int, int,
181 struct cred *); 181 struct cred *);
182typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *, 182typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *,
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index d7e13e6afa9d..6a8dd83c350f 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -318,8 +318,8 @@
318#define __NR_unshare 310 318#define __NR_unshare 310
319#define __NR_set_robust_list 311 319#define __NR_set_robust_list 311
320#define __NR_get_robust_list 312 320#define __NR_get_robust_list 312
321#define __NR_sys_splice 313 321#define __NR_splice 313
322#define __NR_sys_sync_file_range 314 322#define __NR_sync_file_range 314
323 323
324#define NR_syscalls 315 324#define NR_syscalls 315
325 325
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 504dcf5b297b..162c6e57307a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1039,8 +1039,8 @@ struct file_operations {
1039 int (*check_flags)(int); 1039 int (*check_flags)(int);
1040 int (*dir_notify)(struct file *filp, unsigned long arg); 1040 int (*dir_notify)(struct file *filp, unsigned long arg);
1041 int (*flock) (struct file *, int, struct file_lock *); 1041 int (*flock) (struct file *, int, struct file_lock *);
1042 ssize_t (*splice_write)(struct inode *, struct file *, size_t, unsigned int); 1042 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int);
1043 ssize_t (*splice_read)(struct file *, struct inode *, size_t, unsigned int); 1043 ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
1044}; 1044};
1045 1045
1046struct inode_operations { 1046struct inode_operations {
@@ -1611,8 +1611,17 @@ extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor
1611extern void do_generic_mapping_read(struct address_space *mapping, 1611extern void do_generic_mapping_read(struct address_space *mapping,
1612 struct file_ra_state *, struct file *, 1612 struct file_ra_state *, struct file *,
1613 loff_t *, read_descriptor_t *, read_actor_t); 1613 loff_t *, read_descriptor_t *, read_actor_t);
1614extern ssize_t generic_file_splice_read(struct file *, struct inode *, size_t, unsigned int); 1614
1615extern ssize_t generic_file_splice_write(struct inode *, struct file *, size_t, unsigned int); 1615/* fs/splice.c */
1616extern ssize_t generic_file_splice_read(struct file *,
1617 struct pipe_inode_info *, size_t, unsigned int);
1618extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
1619 struct file *, size_t, unsigned int);
1620extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
1621 struct file *out, size_t len, unsigned int flags);
1622extern long do_splice_direct(struct file *in, struct file *out,
1623 size_t len, unsigned int flags);
1624
1616extern void 1625extern void
1617file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); 1626file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
1618extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, 1627extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index ec384958d509..123a7c24bc72 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -36,27 +36,19 @@ struct pipe_inode_info {
36 unsigned int w_counter; 36 unsigned int w_counter;
37 struct fasync_struct *fasync_readers; 37 struct fasync_struct *fasync_readers;
38 struct fasync_struct *fasync_writers; 38 struct fasync_struct *fasync_writers;
39 struct inode *inode;
39}; 40};
40 41
41/* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual 42/* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
42 memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ 43 memory allocation, whereas PIPE_BUF makes atomicity guarantees. */
43#define PIPE_SIZE PAGE_SIZE 44#define PIPE_SIZE PAGE_SIZE
44 45
45#define PIPE_MUTEX(inode) (&(inode).i_mutex)
46#define PIPE_WAIT(inode) (&(inode).i_pipe->wait)
47#define PIPE_READERS(inode) ((inode).i_pipe->readers)
48#define PIPE_WRITERS(inode) ((inode).i_pipe->writers)
49#define PIPE_WAITING_WRITERS(inode) ((inode).i_pipe->waiting_writers)
50#define PIPE_RCOUNTER(inode) ((inode).i_pipe->r_counter)
51#define PIPE_WCOUNTER(inode) ((inode).i_pipe->w_counter)
52#define PIPE_FASYNC_READERS(inode) (&((inode).i_pipe->fasync_readers))
53#define PIPE_FASYNC_WRITERS(inode) (&((inode).i_pipe->fasync_writers))
54
55/* Drop the inode semaphore and wait for a pipe event, atomically */ 46/* Drop the inode semaphore and wait for a pipe event, atomically */
56void pipe_wait(struct inode * inode); 47void pipe_wait(struct pipe_inode_info *pipe);
57 48
58struct inode* pipe_new(struct inode* inode); 49struct pipe_inode_info * alloc_pipe_info(struct inode * inode);
59void free_pipe_info(struct inode* inode); 50void free_pipe_info(struct inode * inode);
51void __free_pipe_info(struct pipe_inode_info *);
60 52
61/* 53/*
62 * splice is tied to pipes as a transport (at least for now), so we'll just 54 * splice is tied to pipes as a transport (at least for now), so we'll just
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 83d657811d01..e3539c14e47e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -684,6 +684,7 @@ static inline void prefetch_stack(struct task_struct *t) { }
684 684
685struct audit_context; /* See audit.c */ 685struct audit_context; /* See audit.c */
686struct mempolicy; 686struct mempolicy;
687struct pipe_inode_info;
687 688
688enum sleep_type { 689enum sleep_type {
689 SLEEP_NORMAL, 690 SLEEP_NORMAL,
@@ -882,6 +883,11 @@ struct task_struct {
882 883
883 atomic_t fs_excl; /* holding fs exclusive resources */ 884 atomic_t fs_excl; /* holding fs exclusive resources */
884 struct rcu_head rcu; 885 struct rcu_head rcu;
886
887 /*
888 * cache last used pipe for splice
889 */
890 struct pipe_inode_info *splice_pipe;
885}; 891};
886 892
887static inline pid_t process_group(struct task_struct *tsk) 893static inline pid_t process_group(struct task_struct *tsk)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 89c4180d42f5..f001bad28d9a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -569,8 +569,11 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename,
569asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename, 569asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename,
570 int flags, int mode); 570 int flags, int mode);
571asmlinkage long sys_unshare(unsigned long unshare_flags); 571asmlinkage long sys_unshare(unsigned long unshare_flags);
572asmlinkage long sys_splice(int fdin, int fdout, size_t len, 572
573 unsigned int flags); 573asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
574 int fd_out, loff_t __user *off_out,
575 size_t len, unsigned int flags);
576
574asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, 577asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
575 unsigned int flags); 578 unsigned int flags);
576 579
diff --git a/kernel/exit.c b/kernel/exit.c
index 6c2eeb8f6390..1a9787ac6173 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -34,6 +34,7 @@
34#include <linux/mutex.h> 34#include <linux/mutex.h>
35#include <linux/futex.h> 35#include <linux/futex.h>
36#include <linux/compat.h> 36#include <linux/compat.h>
37#include <linux/pipe_fs_i.h>
37 38
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
39#include <asm/unistd.h> 40#include <asm/unistd.h>
@@ -941,6 +942,9 @@ fastcall NORET_TYPE void do_exit(long code)
941 if (tsk->io_context) 942 if (tsk->io_context)
942 exit_io_context(); 943 exit_io_context();
943 944
945 if (tsk->splice_pipe)
946 __free_pipe_info(tsk->splice_pipe);
947
944 /* PF_DEAD causes final put_task_struct after we schedule. */ 948 /* PF_DEAD causes final put_task_struct after we schedule. */
945 preempt_disable(); 949 preempt_disable();
946 BUG_ON(tsk->flags & PF_DEAD); 950 BUG_ON(tsk->flags & PF_DEAD);
diff --git a/net/socket.c b/net/socket.c
index 00cdfd2088db..23898f45f713 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -119,10 +119,6 @@ static ssize_t sock_writev(struct file *file, const struct iovec *vector,
119static ssize_t sock_sendpage(struct file *file, struct page *page, 119static ssize_t sock_sendpage(struct file *file, struct page *page,
120 int offset, size_t size, loff_t *ppos, int more); 120 int offset, size_t size, loff_t *ppos, int more);
121 121
122extern ssize_t generic_splice_sendpage(struct inode *inode, struct file *out,
123 size_t len, unsigned int flags);
124
125
126/* 122/*
127 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear 123 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
128 * in the operation structures but are done directly via the socketcall() multiplexor. 124 * in the operation structures but are done directly via the socketcall() multiplexor.