aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-04-11 09:34:02 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-04-11 09:34:02 -0400
commit88dd9c16cecbd105bbe7711b6120333f6f7b5474 (patch)
tree9632e5988abeaa7e4d20350305edc4e4652b56d1 /fs
parent6dde432553551ae036aae12c2b940677d36c9a5b (diff)
parentd1195c516a9acd767cb541f914be2c6ddcafcfc1 (diff)
Merge branch 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block
* 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block: [PATCH] vfs: add splice_write and splice_read to documentation [PATCH] Remove sys_ prefix of new syscalls from __NR_sys_* [PATCH] splice: warning fix [PATCH] another round of fs/pipe.c cleanups [PATCH] splice: comment styles [PATCH] splice: add Ingo as addition copyright holder [PATCH] splice: unlikely() optimizations [PATCH] splice: speedups and optimizations [PATCH] pipe.c/fifo.c code cleanups [PATCH] get rid of the PIPE_*() macros [PATCH] splice: speedup __generic_file_splice_read [PATCH] splice: add direct fd <-> fd splicing support [PATCH] splice: add optional input and output offsets [PATCH] introduce a "kernel-internal pipe object" abstraction [PATCH] splice: be smarter about calling do_page_cache_readahead() [PATCH] splice: optimize the splice buffer mapping [PATCH] splice: cleanup __generic_file_splice_read() [PATCH] splice: only call wake_up_interruptible() when we really have to [PATCH] splice: potential !page dereference [PATCH] splice: mark the io page as accessed
Diffstat (limited to 'fs')
-rw-r--r--fs/fifo.c65
-rw-r--r--fs/pipe.c310
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/splice.c492
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h4
8 files changed, 555 insertions, 334 deletions
diff --git a/fs/fifo.c b/fs/fifo.c
index 889f722ee36d..49035b174b48 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -15,30 +15,35 @@
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/pipe_fs_i.h> 16#include <linux/pipe_fs_i.h>
17 17
18static void wait_for_partner(struct inode* inode, unsigned int* cnt) 18static void wait_for_partner(struct inode* inode, unsigned int *cnt)
19{ 19{
20 int cur = *cnt; 20 int cur = *cnt;
21 while(cur == *cnt) { 21
22 pipe_wait(inode); 22 while (cur == *cnt) {
23 if(signal_pending(current)) 23 pipe_wait(inode->i_pipe);
24 if (signal_pending(current))
24 break; 25 break;
25 } 26 }
26} 27}
27 28
28static void wake_up_partner(struct inode* inode) 29static void wake_up_partner(struct inode* inode)
29{ 30{
30 wake_up_interruptible(PIPE_WAIT(*inode)); 31 wake_up_interruptible(&inode->i_pipe->wait);
31} 32}
32 33
33static int fifo_open(struct inode *inode, struct file *filp) 34static int fifo_open(struct inode *inode, struct file *filp)
34{ 35{
36 struct pipe_inode_info *pipe;
35 int ret; 37 int ret;
36 38
37 mutex_lock(PIPE_MUTEX(*inode)); 39 mutex_lock(&inode->i_mutex);
38 if (!inode->i_pipe) { 40 pipe = inode->i_pipe;
41 if (!pipe) {
39 ret = -ENOMEM; 42 ret = -ENOMEM;
40 if(!pipe_new(inode)) 43 pipe = alloc_pipe_info(inode);
44 if (!pipe)
41 goto err_nocleanup; 45 goto err_nocleanup;
46 inode->i_pipe = pipe;
42 } 47 }
43 filp->f_version = 0; 48 filp->f_version = 0;
44 49
@@ -53,18 +58,18 @@ static int fifo_open(struct inode *inode, struct file *filp)
53 * opened, even when there is no process writing the FIFO. 58 * opened, even when there is no process writing the FIFO.
54 */ 59 */
55 filp->f_op = &read_fifo_fops; 60 filp->f_op = &read_fifo_fops;
56 PIPE_RCOUNTER(*inode)++; 61 pipe->r_counter++;
57 if (PIPE_READERS(*inode)++ == 0) 62 if (pipe->readers++ == 0)
58 wake_up_partner(inode); 63 wake_up_partner(inode);
59 64
60 if (!PIPE_WRITERS(*inode)) { 65 if (!pipe->writers) {
61 if ((filp->f_flags & O_NONBLOCK)) { 66 if ((filp->f_flags & O_NONBLOCK)) {
62 /* suppress POLLHUP until we have 67 /* suppress POLLHUP until we have
63 * seen a writer */ 68 * seen a writer */
64 filp->f_version = PIPE_WCOUNTER(*inode); 69 filp->f_version = pipe->w_counter;
65 } else 70 } else
66 { 71 {
67 wait_for_partner(inode, &PIPE_WCOUNTER(*inode)); 72 wait_for_partner(inode, &pipe->w_counter);
68 if(signal_pending(current)) 73 if(signal_pending(current))
69 goto err_rd; 74 goto err_rd;
70 } 75 }
@@ -78,16 +83,16 @@ static int fifo_open(struct inode *inode, struct file *filp)
78 * errno=ENXIO when there is no process reading the FIFO. 83 * errno=ENXIO when there is no process reading the FIFO.
79 */ 84 */
80 ret = -ENXIO; 85 ret = -ENXIO;
81 if ((filp->f_flags & O_NONBLOCK) && !PIPE_READERS(*inode)) 86 if ((filp->f_flags & O_NONBLOCK) && !pipe->readers)
82 goto err; 87 goto err;
83 88
84 filp->f_op = &write_fifo_fops; 89 filp->f_op = &write_fifo_fops;
85 PIPE_WCOUNTER(*inode)++; 90 pipe->w_counter++;
86 if (!PIPE_WRITERS(*inode)++) 91 if (!pipe->writers++)
87 wake_up_partner(inode); 92 wake_up_partner(inode);
88 93
89 if (!PIPE_READERS(*inode)) { 94 if (!pipe->readers) {
90 wait_for_partner(inode, &PIPE_RCOUNTER(*inode)); 95 wait_for_partner(inode, &pipe->r_counter);
91 if (signal_pending(current)) 96 if (signal_pending(current))
92 goto err_wr; 97 goto err_wr;
93 } 98 }
@@ -102,11 +107,11 @@ static int fifo_open(struct inode *inode, struct file *filp)
102 */ 107 */
103 filp->f_op = &rdwr_fifo_fops; 108 filp->f_op = &rdwr_fifo_fops;
104 109
105 PIPE_READERS(*inode)++; 110 pipe->readers++;
106 PIPE_WRITERS(*inode)++; 111 pipe->writers++;
107 PIPE_RCOUNTER(*inode)++; 112 pipe->r_counter++;
108 PIPE_WCOUNTER(*inode)++; 113 pipe->w_counter++;
109 if (PIPE_READERS(*inode) == 1 || PIPE_WRITERS(*inode) == 1) 114 if (pipe->readers == 1 || pipe->writers == 1)
110 wake_up_partner(inode); 115 wake_up_partner(inode);
111 break; 116 break;
112 117
@@ -116,27 +121,27 @@ static int fifo_open(struct inode *inode, struct file *filp)
116 } 121 }
117 122
118 /* Ok! */ 123 /* Ok! */
119 mutex_unlock(PIPE_MUTEX(*inode)); 124 mutex_unlock(&inode->i_mutex);
120 return 0; 125 return 0;
121 126
122err_rd: 127err_rd:
123 if (!--PIPE_READERS(*inode)) 128 if (!--pipe->readers)
124 wake_up_interruptible(PIPE_WAIT(*inode)); 129 wake_up_interruptible(&pipe->wait);
125 ret = -ERESTARTSYS; 130 ret = -ERESTARTSYS;
126 goto err; 131 goto err;
127 132
128err_wr: 133err_wr:
129 if (!--PIPE_WRITERS(*inode)) 134 if (!--pipe->writers)
130 wake_up_interruptible(PIPE_WAIT(*inode)); 135 wake_up_interruptible(&pipe->wait);
131 ret = -ERESTARTSYS; 136 ret = -ERESTARTSYS;
132 goto err; 137 goto err;
133 138
134err: 139err:
135 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) 140 if (!pipe->readers && !pipe->writers)
136 free_pipe_info(inode); 141 free_pipe_info(inode);
137 142
138err_nocleanup: 143err_nocleanup:
139 mutex_unlock(PIPE_MUTEX(*inode)); 144 mutex_unlock(&inode->i_mutex);
140 return ret; 145 return ret;
141} 146}
142 147
diff --git a/fs/pipe.c b/fs/pipe.c
index 795df987cd38..e984beb93a0e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -36,7 +36,7 @@
36 */ 36 */
37 37
38/* Drop the inode semaphore and wait for a pipe event, atomically */ 38/* Drop the inode semaphore and wait for a pipe event, atomically */
39void pipe_wait(struct inode * inode) 39void pipe_wait(struct pipe_inode_info *pipe)
40{ 40{
41 DEFINE_WAIT(wait); 41 DEFINE_WAIT(wait);
42 42
@@ -44,11 +44,14 @@ void pipe_wait(struct inode * inode)
44 * Pipes are system-local resources, so sleeping on them 44 * Pipes are system-local resources, so sleeping on them
45 * is considered a noninteractive wait: 45 * is considered a noninteractive wait:
46 */ 46 */
47 prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE); 47 prepare_to_wait(&pipe->wait, &wait,
48 mutex_unlock(PIPE_MUTEX(*inode)); 48 TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE);
49 if (pipe->inode)
50 mutex_unlock(&pipe->inode->i_mutex);
49 schedule(); 51 schedule();
50 finish_wait(PIPE_WAIT(*inode), &wait); 52 finish_wait(&pipe->wait, &wait);
51 mutex_lock(PIPE_MUTEX(*inode)); 53 if (pipe->inode)
54 mutex_lock(&pipe->inode->i_mutex);
52} 55}
53 56
54static int 57static int
@@ -91,7 +94,8 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
91 return 0; 94 return 0;
92} 95}
93 96
94static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf) 97static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
98 struct pipe_buffer *buf)
95{ 99{
96 struct page *page = buf->page; 100 struct page *page = buf->page;
97 101
@@ -100,30 +104,27 @@ static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buff
100 /* 104 /*
101 * If nobody else uses this page, and we don't already have a 105 * If nobody else uses this page, and we don't already have a
102 * temporary page, let's keep track of it as a one-deep 106 * temporary page, let's keep track of it as a one-deep
103 * allocation cache 107 * allocation cache. (Otherwise just release our reference to it)
104 */ 108 */
105 if (page_count(page) == 1 && !info->tmp_page) { 109 if (page_count(page) == 1 && !pipe->tmp_page)
106 info->tmp_page = page; 110 pipe->tmp_page = page;
107 return; 111 else
108 } 112 page_cache_release(page);
109
110 /*
111 * Otherwise just release our reference to it
112 */
113 page_cache_release(page);
114} 113}
115 114
116static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf) 115static void * anon_pipe_buf_map(struct file *file, struct pipe_inode_info *pipe,
116 struct pipe_buffer *buf)
117{ 117{
118 return kmap(buf->page); 118 return kmap(buf->page);
119} 119}
120 120
121static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf) 121static void anon_pipe_buf_unmap(struct pipe_inode_info *pipe,
122 struct pipe_buffer *buf)
122{ 123{
123 kunmap(buf->page); 124 kunmap(buf->page);
124} 125}
125 126
126static int anon_pipe_buf_steal(struct pipe_inode_info *info, 127static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
127 struct pipe_buffer *buf) 128 struct pipe_buffer *buf)
128{ 129{
129 buf->flags |= PIPE_BUF_FLAG_STOLEN; 130 buf->flags |= PIPE_BUF_FLAG_STOLEN;
@@ -143,7 +144,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
143 unsigned long nr_segs, loff_t *ppos) 144 unsigned long nr_segs, loff_t *ppos)
144{ 145{
145 struct inode *inode = filp->f_dentry->d_inode; 146 struct inode *inode = filp->f_dentry->d_inode;
146 struct pipe_inode_info *info; 147 struct pipe_inode_info *pipe;
147 int do_wakeup; 148 int do_wakeup;
148 ssize_t ret; 149 ssize_t ret;
149 struct iovec *iov = (struct iovec *)_iov; 150 struct iovec *iov = (struct iovec *)_iov;
@@ -156,13 +157,13 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
156 157
157 do_wakeup = 0; 158 do_wakeup = 0;
158 ret = 0; 159 ret = 0;
159 mutex_lock(PIPE_MUTEX(*inode)); 160 mutex_lock(&inode->i_mutex);
160 info = inode->i_pipe; 161 pipe = inode->i_pipe;
161 for (;;) { 162 for (;;) {
162 int bufs = info->nrbufs; 163 int bufs = pipe->nrbufs;
163 if (bufs) { 164 if (bufs) {
164 int curbuf = info->curbuf; 165 int curbuf = pipe->curbuf;
165 struct pipe_buffer *buf = info->bufs + curbuf; 166 struct pipe_buffer *buf = pipe->bufs + curbuf;
166 struct pipe_buf_operations *ops = buf->ops; 167 struct pipe_buf_operations *ops = buf->ops;
167 void *addr; 168 void *addr;
168 size_t chars = buf->len; 169 size_t chars = buf->len;
@@ -171,16 +172,17 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
171 if (chars > total_len) 172 if (chars > total_len)
172 chars = total_len; 173 chars = total_len;
173 174
174 addr = ops->map(filp, info, buf); 175 addr = ops->map(filp, pipe, buf);
175 if (IS_ERR(addr)) { 176 if (IS_ERR(addr)) {
176 if (!ret) 177 if (!ret)
177 ret = PTR_ERR(addr); 178 ret = PTR_ERR(addr);
178 break; 179 break;
179 } 180 }
180 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); 181 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
181 ops->unmap(info, buf); 182 ops->unmap(pipe, buf);
182 if (unlikely(error)) { 183 if (unlikely(error)) {
183 if (!ret) ret = -EFAULT; 184 if (!ret)
185 ret = -EFAULT;
184 break; 186 break;
185 } 187 }
186 ret += chars; 188 ret += chars;
@@ -188,10 +190,10 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
188 buf->len -= chars; 190 buf->len -= chars;
189 if (!buf->len) { 191 if (!buf->len) {
190 buf->ops = NULL; 192 buf->ops = NULL;
191 ops->release(info, buf); 193 ops->release(pipe, buf);
192 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1); 194 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
193 info->curbuf = curbuf; 195 pipe->curbuf = curbuf;
194 info->nrbufs = --bufs; 196 pipe->nrbufs = --bufs;
195 do_wakeup = 1; 197 do_wakeup = 1;
196 } 198 }
197 total_len -= chars; 199 total_len -= chars;
@@ -200,9 +202,9 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
200 } 202 }
201 if (bufs) /* More to do? */ 203 if (bufs) /* More to do? */
202 continue; 204 continue;
203 if (!PIPE_WRITERS(*inode)) 205 if (!pipe->writers)
204 break; 206 break;
205 if (!PIPE_WAITING_WRITERS(*inode)) { 207 if (!pipe->waiting_writers) {
206 /* syscall merging: Usually we must not sleep 208 /* syscall merging: Usually we must not sleep
207 * if O_NONBLOCK is set, or if we got some data. 209 * if O_NONBLOCK is set, or if we got some data.
208 * But if a writer sleeps in kernel space, then 210 * But if a writer sleeps in kernel space, then
@@ -216,20 +218,22 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
216 } 218 }
217 } 219 }
218 if (signal_pending(current)) { 220 if (signal_pending(current)) {
219 if (!ret) ret = -ERESTARTSYS; 221 if (!ret)
222 ret = -ERESTARTSYS;
220 break; 223 break;
221 } 224 }
222 if (do_wakeup) { 225 if (do_wakeup) {
223 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 226 wake_up_interruptible_sync(&pipe->wait);
224 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 227 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
225 } 228 }
226 pipe_wait(inode); 229 pipe_wait(pipe);
227 } 230 }
228 mutex_unlock(PIPE_MUTEX(*inode)); 231 mutex_unlock(&inode->i_mutex);
229 /* Signal writers asynchronously that there is more room. */ 232
233 /* Signal writers asynchronously that there is more room. */
230 if (do_wakeup) { 234 if (do_wakeup) {
231 wake_up_interruptible(PIPE_WAIT(*inode)); 235 wake_up_interruptible(&pipe->wait);
232 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 236 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
233 } 237 }
234 if (ret > 0) 238 if (ret > 0)
235 file_accessed(filp); 239 file_accessed(filp);
@@ -240,6 +244,7 @@ static ssize_t
240pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) 244pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
241{ 245{
242 struct iovec iov = { .iov_base = buf, .iov_len = count }; 246 struct iovec iov = { .iov_base = buf, .iov_len = count };
247
243 return pipe_readv(filp, &iov, 1, ppos); 248 return pipe_readv(filp, &iov, 1, ppos);
244} 249}
245 250
@@ -248,7 +253,7 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
248 unsigned long nr_segs, loff_t *ppos) 253 unsigned long nr_segs, loff_t *ppos)
249{ 254{
250 struct inode *inode = filp->f_dentry->d_inode; 255 struct inode *inode = filp->f_dentry->d_inode;
251 struct pipe_inode_info *info; 256 struct pipe_inode_info *pipe;
252 ssize_t ret; 257 ssize_t ret;
253 int do_wakeup; 258 int do_wakeup;
254 struct iovec *iov = (struct iovec *)_iov; 259 struct iovec *iov = (struct iovec *)_iov;
@@ -262,10 +267,10 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
262 267
263 do_wakeup = 0; 268 do_wakeup = 0;
264 ret = 0; 269 ret = 0;
265 mutex_lock(PIPE_MUTEX(*inode)); 270 mutex_lock(&inode->i_mutex);
266 info = inode->i_pipe; 271 pipe = inode->i_pipe;
267 272
268 if (!PIPE_READERS(*inode)) { 273 if (!pipe->readers) {
269 send_sig(SIGPIPE, current, 0); 274 send_sig(SIGPIPE, current, 0);
270 ret = -EPIPE; 275 ret = -EPIPE;
271 goto out; 276 goto out;
@@ -273,23 +278,25 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
273 278
274 /* We try to merge small writes */ 279 /* We try to merge small writes */
275 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ 280 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
276 if (info->nrbufs && chars != 0) { 281 if (pipe->nrbufs && chars != 0) {
277 int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1); 282 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
278 struct pipe_buffer *buf = info->bufs + lastbuf; 283 (PIPE_BUFFERS-1);
284 struct pipe_buffer *buf = pipe->bufs + lastbuf;
279 struct pipe_buf_operations *ops = buf->ops; 285 struct pipe_buf_operations *ops = buf->ops;
280 int offset = buf->offset + buf->len; 286 int offset = buf->offset + buf->len;
287
281 if (ops->can_merge && offset + chars <= PAGE_SIZE) { 288 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
282 void *addr; 289 void *addr;
283 int error; 290 int error;
284 291
285 addr = ops->map(filp, info, buf); 292 addr = ops->map(filp, pipe, buf);
286 if (IS_ERR(addr)) { 293 if (IS_ERR(addr)) {
287 error = PTR_ERR(addr); 294 error = PTR_ERR(addr);
288 goto out; 295 goto out;
289 } 296 }
290 error = pipe_iov_copy_from_user(offset + addr, iov, 297 error = pipe_iov_copy_from_user(offset + addr, iov,
291 chars); 298 chars);
292 ops->unmap(info, buf); 299 ops->unmap(pipe, buf);
293 ret = error; 300 ret = error;
294 do_wakeup = 1; 301 do_wakeup = 1;
295 if (error) 302 if (error)
@@ -304,16 +311,18 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
304 311
305 for (;;) { 312 for (;;) {
306 int bufs; 313 int bufs;
307 if (!PIPE_READERS(*inode)) { 314
315 if (!pipe->readers) {
308 send_sig(SIGPIPE, current, 0); 316 send_sig(SIGPIPE, current, 0);
309 if (!ret) ret = -EPIPE; 317 if (!ret)
318 ret = -EPIPE;
310 break; 319 break;
311 } 320 }
312 bufs = info->nrbufs; 321 bufs = pipe->nrbufs;
313 if (bufs < PIPE_BUFFERS) { 322 if (bufs < PIPE_BUFFERS) {
314 int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1); 323 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
315 struct pipe_buffer *buf = info->bufs + newbuf; 324 struct pipe_buffer *buf = pipe->bufs + newbuf;
316 struct page *page = info->tmp_page; 325 struct page *page = pipe->tmp_page;
317 int error; 326 int error;
318 327
319 if (!page) { 328 if (!page) {
@@ -322,9 +331,9 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
322 ret = ret ? : -ENOMEM; 331 ret = ret ? : -ENOMEM;
323 break; 332 break;
324 } 333 }
325 info->tmp_page = page; 334 pipe->tmp_page = page;
326 } 335 }
327 /* Always wakeup, even if the copy fails. Otherwise 336 /* Always wake up, even if the copy fails. Otherwise
328 * we lock up (O_NONBLOCK-)readers that sleep due to 337 * we lock up (O_NONBLOCK-)readers that sleep due to
329 * syscall merging. 338 * syscall merging.
330 * FIXME! Is this really true? 339 * FIXME! Is this really true?
@@ -337,7 +346,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
337 error = pipe_iov_copy_from_user(kmap(page), iov, chars); 346 error = pipe_iov_copy_from_user(kmap(page), iov, chars);
338 kunmap(page); 347 kunmap(page);
339 if (unlikely(error)) { 348 if (unlikely(error)) {
340 if (!ret) ret = -EFAULT; 349 if (!ret)
350 ret = -EFAULT;
341 break; 351 break;
342 } 352 }
343 ret += chars; 353 ret += chars;
@@ -347,8 +357,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
347 buf->ops = &anon_pipe_buf_ops; 357 buf->ops = &anon_pipe_buf_ops;
348 buf->offset = 0; 358 buf->offset = 0;
349 buf->len = chars; 359 buf->len = chars;
350 info->nrbufs = ++bufs; 360 pipe->nrbufs = ++bufs;
351 info->tmp_page = NULL; 361 pipe->tmp_page = NULL;
352 362
353 total_len -= chars; 363 total_len -= chars;
354 if (!total_len) 364 if (!total_len)
@@ -357,27 +367,29 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
357 if (bufs < PIPE_BUFFERS) 367 if (bufs < PIPE_BUFFERS)
358 continue; 368 continue;
359 if (filp->f_flags & O_NONBLOCK) { 369 if (filp->f_flags & O_NONBLOCK) {
360 if (!ret) ret = -EAGAIN; 370 if (!ret)
371 ret = -EAGAIN;
361 break; 372 break;
362 } 373 }
363 if (signal_pending(current)) { 374 if (signal_pending(current)) {
364 if (!ret) ret = -ERESTARTSYS; 375 if (!ret)
376 ret = -ERESTARTSYS;
365 break; 377 break;
366 } 378 }
367 if (do_wakeup) { 379 if (do_wakeup) {
368 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 380 wake_up_interruptible_sync(&pipe->wait);
369 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 381 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
370 do_wakeup = 0; 382 do_wakeup = 0;
371 } 383 }
372 PIPE_WAITING_WRITERS(*inode)++; 384 pipe->waiting_writers++;
373 pipe_wait(inode); 385 pipe_wait(pipe);
374 PIPE_WAITING_WRITERS(*inode)--; 386 pipe->waiting_writers--;
375 } 387 }
376out: 388out:
377 mutex_unlock(PIPE_MUTEX(*inode)); 389 mutex_unlock(&inode->i_mutex);
378 if (do_wakeup) { 390 if (do_wakeup) {
379 wake_up_interruptible(PIPE_WAIT(*inode)); 391 wake_up_interruptible(&pipe->wait);
380 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 392 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
381 } 393 }
382 if (ret > 0) 394 if (ret > 0)
383 file_update_time(filp); 395 file_update_time(filp);
@@ -389,6 +401,7 @@ pipe_write(struct file *filp, const char __user *buf,
389 size_t count, loff_t *ppos) 401 size_t count, loff_t *ppos)
390{ 402{
391 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; 403 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
404
392 return pipe_writev(filp, &iov, 1, ppos); 405 return pipe_writev(filp, &iov, 1, ppos);
393} 406}
394 407
@@ -399,7 +412,8 @@ bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
399} 412}
400 413
401static ssize_t 414static ssize_t
402bad_pipe_w(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) 415bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
416 loff_t *ppos)
403{ 417{
404 return -EBADF; 418 return -EBADF;
405} 419}
@@ -409,21 +423,22 @@ pipe_ioctl(struct inode *pino, struct file *filp,
409 unsigned int cmd, unsigned long arg) 423 unsigned int cmd, unsigned long arg)
410{ 424{
411 struct inode *inode = filp->f_dentry->d_inode; 425 struct inode *inode = filp->f_dentry->d_inode;
412 struct pipe_inode_info *info; 426 struct pipe_inode_info *pipe;
413 int count, buf, nrbufs; 427 int count, buf, nrbufs;
414 428
415 switch (cmd) { 429 switch (cmd) {
416 case FIONREAD: 430 case FIONREAD:
417 mutex_lock(PIPE_MUTEX(*inode)); 431 mutex_lock(&inode->i_mutex);
418 info = inode->i_pipe; 432 pipe = inode->i_pipe;
419 count = 0; 433 count = 0;
420 buf = info->curbuf; 434 buf = pipe->curbuf;
421 nrbufs = info->nrbufs; 435 nrbufs = pipe->nrbufs;
422 while (--nrbufs >= 0) { 436 while (--nrbufs >= 0) {
423 count += info->bufs[buf].len; 437 count += pipe->bufs[buf].len;
424 buf = (buf+1) & (PIPE_BUFFERS-1); 438 buf = (buf+1) & (PIPE_BUFFERS-1);
425 } 439 }
426 mutex_unlock(PIPE_MUTEX(*inode)); 440 mutex_unlock(&inode->i_mutex);
441
427 return put_user(count, (int __user *)arg); 442 return put_user(count, (int __user *)arg);
428 default: 443 default:
429 return -EINVAL; 444 return -EINVAL;
@@ -436,17 +451,17 @@ pipe_poll(struct file *filp, poll_table *wait)
436{ 451{
437 unsigned int mask; 452 unsigned int mask;
438 struct inode *inode = filp->f_dentry->d_inode; 453 struct inode *inode = filp->f_dentry->d_inode;
439 struct pipe_inode_info *info = inode->i_pipe; 454 struct pipe_inode_info *pipe = inode->i_pipe;
440 int nrbufs; 455 int nrbufs;
441 456
442 poll_wait(filp, PIPE_WAIT(*inode), wait); 457 poll_wait(filp, &pipe->wait, wait);
443 458
444 /* Reading only -- no need for acquiring the semaphore. */ 459 /* Reading only -- no need for acquiring the semaphore. */
445 nrbufs = info->nrbufs; 460 nrbufs = pipe->nrbufs;
446 mask = 0; 461 mask = 0;
447 if (filp->f_mode & FMODE_READ) { 462 if (filp->f_mode & FMODE_READ) {
448 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0; 463 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
449 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode)) 464 if (!pipe->writers && filp->f_version != pipe->w_counter)
450 mask |= POLLHUP; 465 mask |= POLLHUP;
451 } 466 }
452 467
@@ -456,7 +471,7 @@ pipe_poll(struct file *filp, poll_table *wait)
456 * Most Unices do not set POLLERR for FIFOs but on Linux they 471 * Most Unices do not set POLLERR for FIFOs but on Linux they
457 * behave exactly like pipes for poll(). 472 * behave exactly like pipes for poll().
458 */ 473 */
459 if (!PIPE_READERS(*inode)) 474 if (!pipe->readers)
460 mask |= POLLERR; 475 mask |= POLLERR;
461 } 476 }
462 477
@@ -466,17 +481,21 @@ pipe_poll(struct file *filp, poll_table *wait)
466static int 481static int
467pipe_release(struct inode *inode, int decr, int decw) 482pipe_release(struct inode *inode, int decr, int decw)
468{ 483{
469 mutex_lock(PIPE_MUTEX(*inode)); 484 struct pipe_inode_info *pipe;
470 PIPE_READERS(*inode) -= decr; 485
471 PIPE_WRITERS(*inode) -= decw; 486 mutex_lock(&inode->i_mutex);
472 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) { 487 pipe = inode->i_pipe;
488 pipe->readers -= decr;
489 pipe->writers -= decw;
490
491 if (!pipe->readers && !pipe->writers) {
473 free_pipe_info(inode); 492 free_pipe_info(inode);
474 } else { 493 } else {
475 wake_up_interruptible(PIPE_WAIT(*inode)); 494 wake_up_interruptible(&pipe->wait);
476 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 495 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
477 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 496 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
478 } 497 }
479 mutex_unlock(PIPE_MUTEX(*inode)); 498 mutex_unlock(&inode->i_mutex);
480 499
481 return 0; 500 return 0;
482} 501}
@@ -487,9 +506,9 @@ pipe_read_fasync(int fd, struct file *filp, int on)
487 struct inode *inode = filp->f_dentry->d_inode; 506 struct inode *inode = filp->f_dentry->d_inode;
488 int retval; 507 int retval;
489 508
490 mutex_lock(PIPE_MUTEX(*inode)); 509 mutex_lock(&inode->i_mutex);
491 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode)); 510 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
492 mutex_unlock(PIPE_MUTEX(*inode)); 511 mutex_unlock(&inode->i_mutex);
493 512
494 if (retval < 0) 513 if (retval < 0)
495 return retval; 514 return retval;
@@ -504,9 +523,9 @@ pipe_write_fasync(int fd, struct file *filp, int on)
504 struct inode *inode = filp->f_dentry->d_inode; 523 struct inode *inode = filp->f_dentry->d_inode;
505 int retval; 524 int retval;
506 525
507 mutex_lock(PIPE_MUTEX(*inode)); 526 mutex_lock(&inode->i_mutex);
508 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode)); 527 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
509 mutex_unlock(PIPE_MUTEX(*inode)); 528 mutex_unlock(&inode->i_mutex);
510 529
511 if (retval < 0) 530 if (retval < 0)
512 return retval; 531 return retval;
@@ -519,16 +538,17 @@ static int
519pipe_rdwr_fasync(int fd, struct file *filp, int on) 538pipe_rdwr_fasync(int fd, struct file *filp, int on)
520{ 539{
521 struct inode *inode = filp->f_dentry->d_inode; 540 struct inode *inode = filp->f_dentry->d_inode;
541 struct pipe_inode_info *pipe = inode->i_pipe;
522 int retval; 542 int retval;
523 543
524 mutex_lock(PIPE_MUTEX(*inode)); 544 mutex_lock(&inode->i_mutex);
525 545
526 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode)); 546 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
527 547
528 if (retval >= 0) 548 if (retval >= 0)
529 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode)); 549 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
530 550
531 mutex_unlock(PIPE_MUTEX(*inode)); 551 mutex_unlock(&inode->i_mutex);
532 552
533 if (retval < 0) 553 if (retval < 0)
534 return retval; 554 return retval;
@@ -567,9 +587,9 @@ pipe_read_open(struct inode *inode, struct file *filp)
567{ 587{
568 /* We could have perhaps used atomic_t, but this and friends 588 /* We could have perhaps used atomic_t, but this and friends
569 below are the only places. So it doesn't seem worthwhile. */ 589 below are the only places. So it doesn't seem worthwhile. */
570 mutex_lock(PIPE_MUTEX(*inode)); 590 mutex_lock(&inode->i_mutex);
571 PIPE_READERS(*inode)++; 591 inode->i_pipe->readers++;
572 mutex_unlock(PIPE_MUTEX(*inode)); 592 mutex_unlock(&inode->i_mutex);
573 593
574 return 0; 594 return 0;
575} 595}
@@ -577,9 +597,9 @@ pipe_read_open(struct inode *inode, struct file *filp)
577static int 597static int
578pipe_write_open(struct inode *inode, struct file *filp) 598pipe_write_open(struct inode *inode, struct file *filp)
579{ 599{
580 mutex_lock(PIPE_MUTEX(*inode)); 600 mutex_lock(&inode->i_mutex);
581 PIPE_WRITERS(*inode)++; 601 inode->i_pipe->writers++;
582 mutex_unlock(PIPE_MUTEX(*inode)); 602 mutex_unlock(&inode->i_mutex);
583 603
584 return 0; 604 return 0;
585} 605}
@@ -587,12 +607,12 @@ pipe_write_open(struct inode *inode, struct file *filp)
587static int 607static int
588pipe_rdwr_open(struct inode *inode, struct file *filp) 608pipe_rdwr_open(struct inode *inode, struct file *filp)
589{ 609{
590 mutex_lock(PIPE_MUTEX(*inode)); 610 mutex_lock(&inode->i_mutex);
591 if (filp->f_mode & FMODE_READ) 611 if (filp->f_mode & FMODE_READ)
592 PIPE_READERS(*inode)++; 612 inode->i_pipe->readers++;
593 if (filp->f_mode & FMODE_WRITE) 613 if (filp->f_mode & FMODE_WRITE)
594 PIPE_WRITERS(*inode)++; 614 inode->i_pipe->writers++;
595 mutex_unlock(PIPE_MUTEX(*inode)); 615 mutex_unlock(&inode->i_mutex);
596 616
597 return 0; 617 return 0;
598} 618}
@@ -675,37 +695,38 @@ static struct file_operations rdwr_pipe_fops = {
675 .fasync = pipe_rdwr_fasync, 695 .fasync = pipe_rdwr_fasync,
676}; 696};
677 697
678void free_pipe_info(struct inode *inode) 698struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
699{
700 struct pipe_inode_info *pipe;
701
702 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
703 if (pipe) {
704 init_waitqueue_head(&pipe->wait);
705 pipe->r_counter = pipe->w_counter = 1;
706 pipe->inode = inode;
707 }
708
709 return pipe;
710}
711
712void __free_pipe_info(struct pipe_inode_info *pipe)
679{ 713{
680 int i; 714 int i;
681 struct pipe_inode_info *info = inode->i_pipe;
682 715
683 inode->i_pipe = NULL;
684 for (i = 0; i < PIPE_BUFFERS; i++) { 716 for (i = 0; i < PIPE_BUFFERS; i++) {
685 struct pipe_buffer *buf = info->bufs + i; 717 struct pipe_buffer *buf = pipe->bufs + i;
686 if (buf->ops) 718 if (buf->ops)
687 buf->ops->release(info, buf); 719 buf->ops->release(pipe, buf);
688 } 720 }
689 if (info->tmp_page) 721 if (pipe->tmp_page)
690 __free_page(info->tmp_page); 722 __free_page(pipe->tmp_page);
691 kfree(info); 723 kfree(pipe);
692} 724}
693 725
694struct inode* pipe_new(struct inode* inode) 726void free_pipe_info(struct inode *inode)
695{ 727{
696 struct pipe_inode_info *info; 728 __free_pipe_info(inode->i_pipe);
697 729 inode->i_pipe = NULL;
698 info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
699 if (!info)
700 goto fail_page;
701 inode->i_pipe = info;
702
703 init_waitqueue_head(PIPE_WAIT(*inode));
704 PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
705
706 return inode;
707fail_page:
708 return NULL;
709} 730}
710 731
711static struct vfsmount *pipe_mnt __read_mostly; 732static struct vfsmount *pipe_mnt __read_mostly;
@@ -713,6 +734,7 @@ static int pipefs_delete_dentry(struct dentry *dentry)
713{ 734{
714 return 1; 735 return 1;
715} 736}
737
716static struct dentry_operations pipefs_dentry_operations = { 738static struct dentry_operations pipefs_dentry_operations = {
717 .d_delete = pipefs_delete_dentry, 739 .d_delete = pipefs_delete_dentry,
718}; 740};
@@ -720,13 +742,17 @@ static struct dentry_operations pipefs_dentry_operations = {
720static struct inode * get_pipe_inode(void) 742static struct inode * get_pipe_inode(void)
721{ 743{
722 struct inode *inode = new_inode(pipe_mnt->mnt_sb); 744 struct inode *inode = new_inode(pipe_mnt->mnt_sb);
745 struct pipe_inode_info *pipe;
723 746
724 if (!inode) 747 if (!inode)
725 goto fail_inode; 748 goto fail_inode;
726 749
727 if(!pipe_new(inode)) 750 pipe = alloc_pipe_info(inode);
751 if (!pipe)
728 goto fail_iput; 752 goto fail_iput;
729 PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1; 753 inode->i_pipe = pipe;
754
755 pipe->readers = pipe->writers = 1;
730 inode->i_fop = &rdwr_pipe_fops; 756 inode->i_fop = &rdwr_pipe_fops;
731 757
732 /* 758 /*
@@ -741,10 +767,12 @@ static struct inode * get_pipe_inode(void)
741 inode->i_gid = current->fsgid; 767 inode->i_gid = current->fsgid;
742 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 768 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
743 inode->i_blksize = PAGE_SIZE; 769 inode->i_blksize = PAGE_SIZE;
770
744 return inode; 771 return inode;
745 772
746fail_iput: 773fail_iput:
747 iput(inode); 774 iput(inode);
775
748fail_inode: 776fail_inode:
749 return NULL; 777 return NULL;
750} 778}
@@ -757,7 +785,7 @@ int do_pipe(int *fd)
757 struct inode * inode; 785 struct inode * inode;
758 struct file *f1, *f2; 786 struct file *f1, *f2;
759 int error; 787 int error;
760 int i,j; 788 int i, j;
761 789
762 error = -ENFILE; 790 error = -ENFILE;
763 f1 = get_empty_filp(); 791 f1 = get_empty_filp();
@@ -790,6 +818,7 @@ int do_pipe(int *fd)
790 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this); 818 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
791 if (!dentry) 819 if (!dentry)
792 goto close_f12_inode_i_j; 820 goto close_f12_inode_i_j;
821
793 dentry->d_op = &pipefs_dentry_operations; 822 dentry->d_op = &pipefs_dentry_operations;
794 d_add(dentry, inode); 823 d_add(dentry, inode);
795 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt)); 824 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
@@ -813,6 +842,7 @@ int do_pipe(int *fd)
813 fd_install(j, f2); 842 fd_install(j, f2);
814 fd[0] = i; 843 fd[0] = i;
815 fd[1] = j; 844 fd[1] = j;
845
816 return 0; 846 return 0;
817 847
818close_f12_inode_i_j: 848close_f12_inode_i_j:
@@ -837,8 +867,9 @@ no_files:
837 * d_name - pipe: will go nicely and kill the special-casing in procfs. 867 * d_name - pipe: will go nicely and kill the special-casing in procfs.
838 */ 868 */
839 869
840static struct super_block *pipefs_get_sb(struct file_system_type *fs_type, 870static struct super_block *
841 int flags, const char *dev_name, void *data) 871pipefs_get_sb(struct file_system_type *fs_type, int flags,
872 const char *dev_name, void *data)
842{ 873{
843 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC); 874 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
844} 875}
@@ -852,6 +883,7 @@ static struct file_system_type pipe_fs_type = {
852static int __init init_pipe_fs(void) 883static int __init init_pipe_fs(void)
853{ 884{
854 int err = register_filesystem(&pipe_fs_type); 885 int err = register_filesystem(&pipe_fs_type);
886
855 if (!err) { 887 if (!err) {
856 pipe_mnt = kern_mount(&pipe_fs_type); 888 pipe_mnt = kern_mount(&pipe_fs_type);
857 if (IS_ERR(pipe_mnt)) { 889 if (IS_ERR(pipe_mnt)) {
diff --git a/fs/read_write.c b/fs/read_write.c
index 6256ca81a718..5bc0e9234f9d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -202,7 +202,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
202 goto Einval; 202 goto Einval;
203 203
204 inode = file->f_dentry->d_inode; 204 inode = file->f_dentry->d_inode;
205 if (inode->i_flock && MANDATORY_LOCK(inode)) { 205 if (unlikely(inode->i_flock && MANDATORY_LOCK(inode))) {
206 int retval = locks_mandatory_area( 206 int retval = locks_mandatory_area(
207 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 207 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
208 inode, file, pos, count); 208 inode, file, pos, count);
diff --git a/fs/splice.c b/fs/splice.c
index bfa42a277bb8..e50a460239dd 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -9,11 +9,12 @@
9 * that transfers data buffers to or from a pipe buffer. 9 * that transfers data buffers to or from a pipe buffer.
10 * 10 *
11 * Named by Larry McVoy, original implementation from Linus, extended by 11 * Named by Larry McVoy, original implementation from Linus, extended by
12 * Jens to support splicing to files and fixing the initial implementation 12 * Jens to support splicing to files, network, direct splicing, etc and
13 * bugs. 13 * fixing lots of bugs.
14 * 14 *
15 * Copyright (C) 2005 Jens Axboe <axboe@suse.de> 15 * Copyright (C) 2005-2006 Jens Axboe <axboe@suse.de>
16 * Copyright (C) 2005 Linus Torvalds <torvalds@osdl.org> 16 * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org>
17 * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu>
17 * 18 *
18 */ 19 */
19#include <linux/fs.h> 20#include <linux/fs.h>
@@ -84,26 +85,43 @@ static void *page_cache_pipe_buf_map(struct file *file,
84 struct pipe_buffer *buf) 85 struct pipe_buffer *buf)
85{ 86{
86 struct page *page = buf->page; 87 struct page *page = buf->page;
87 88 int err;
88 lock_page(page);
89 89
90 if (!PageUptodate(page)) { 90 if (!PageUptodate(page)) {
91 unlock_page(page); 91 lock_page(page);
92 return ERR_PTR(-EIO); 92
93 } 93 /*
94 * Page got truncated/unhashed. This will cause a 0-byte
95 * splice, if this is the first page.
96 */
97 if (!page->mapping) {
98 err = -ENODATA;
99 goto error;
100 }
94 101
95 if (!page->mapping) { 102 /*
103 * Uh oh, read-error from disk.
104 */
105 if (!PageUptodate(page)) {
106 err = -EIO;
107 goto error;
108 }
109
110 /*
111 * Page is ok afterall, fall through to mapping.
112 */
96 unlock_page(page); 113 unlock_page(page);
97 return ERR_PTR(-ENODATA);
98 } 114 }
99 115
100 return kmap(buf->page); 116 return kmap(page);
117error:
118 unlock_page(page);
119 return ERR_PTR(err);
101} 120}
102 121
103static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, 122static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
104 struct pipe_buffer *buf) 123 struct pipe_buffer *buf)
105{ 124{
106 unlock_page(buf->page);
107 kunmap(buf->page); 125 kunmap(buf->page);
108} 126}
109 127
@@ -119,34 +137,30 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = {
119 * Pipe output worker. This sets up our pipe format with the page cache 137 * Pipe output worker. This sets up our pipe format with the page cache
120 * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 138 * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
121 */ 139 */
122static ssize_t move_to_pipe(struct inode *inode, struct page **pages, 140static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
123 int nr_pages, unsigned long offset, 141 int nr_pages, unsigned long offset,
124 unsigned long len, unsigned int flags) 142 unsigned long len, unsigned int flags)
125{ 143{
126 struct pipe_inode_info *info;
127 int ret, do_wakeup, i; 144 int ret, do_wakeup, i;
128 145
129 ret = 0; 146 ret = 0;
130 do_wakeup = 0; 147 do_wakeup = 0;
131 i = 0; 148 i = 0;
132 149
133 mutex_lock(PIPE_MUTEX(*inode)); 150 if (pipe->inode)
151 mutex_lock(&pipe->inode->i_mutex);
134 152
135 info = inode->i_pipe;
136 for (;;) { 153 for (;;) {
137 int bufs; 154 if (!pipe->readers) {
138
139 if (!PIPE_READERS(*inode)) {
140 send_sig(SIGPIPE, current, 0); 155 send_sig(SIGPIPE, current, 0);
141 if (!ret) 156 if (!ret)
142 ret = -EPIPE; 157 ret = -EPIPE;
143 break; 158 break;
144 } 159 }
145 160
146 bufs = info->nrbufs; 161 if (pipe->nrbufs < PIPE_BUFFERS) {
147 if (bufs < PIPE_BUFFERS) { 162 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1);
148 int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS - 1); 163 struct pipe_buffer *buf = pipe->bufs + newbuf;
149 struct pipe_buffer *buf = info->bufs + newbuf;
150 struct page *page = pages[i++]; 164 struct page *page = pages[i++];
151 unsigned long this_len; 165 unsigned long this_len;
152 166
@@ -158,8 +172,9 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
158 buf->offset = offset; 172 buf->offset = offset;
159 buf->len = this_len; 173 buf->len = this_len;
160 buf->ops = &page_cache_pipe_buf_ops; 174 buf->ops = &page_cache_pipe_buf_ops;
161 info->nrbufs = ++bufs; 175 pipe->nrbufs++;
162 do_wakeup = 1; 176 if (pipe->inode)
177 do_wakeup = 1;
163 178
164 ret += this_len; 179 ret += this_len;
165 len -= this_len; 180 len -= this_len;
@@ -168,7 +183,7 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
168 break; 183 break;
169 if (!len) 184 if (!len)
170 break; 185 break;
171 if (bufs < PIPE_BUFFERS) 186 if (pipe->nrbufs < PIPE_BUFFERS)
172 continue; 187 continue;
173 188
174 break; 189 break;
@@ -187,22 +202,26 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
187 } 202 }
188 203
189 if (do_wakeup) { 204 if (do_wakeup) {
190 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 205 smp_mb();
191 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, 206 if (waitqueue_active(&pipe->wait))
192 POLL_IN); 207 wake_up_interruptible_sync(&pipe->wait);
208 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
193 do_wakeup = 0; 209 do_wakeup = 0;
194 } 210 }
195 211
196 PIPE_WAITING_WRITERS(*inode)++; 212 pipe->waiting_writers++;
197 pipe_wait(inode); 213 pipe_wait(pipe);
198 PIPE_WAITING_WRITERS(*inode)--; 214 pipe->waiting_writers--;
199 } 215 }
200 216
201 mutex_unlock(PIPE_MUTEX(*inode)); 217 if (pipe->inode)
218 mutex_unlock(&pipe->inode->i_mutex);
202 219
203 if (do_wakeup) { 220 if (do_wakeup) {
204 wake_up_interruptible(PIPE_WAIT(*inode)); 221 smp_mb();
205 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 222 if (waitqueue_active(&pipe->wait))
223 wake_up_interruptible(&pipe->wait);
224 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
206 } 225 }
207 226
208 while (i < nr_pages) 227 while (i < nr_pages)
@@ -211,15 +230,16 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
211 return ret; 230 return ret;
212} 231}
213 232
214static int __generic_file_splice_read(struct file *in, struct inode *pipe, 233static int
215 size_t len, unsigned int flags) 234__generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe,
235 size_t len, unsigned int flags)
216{ 236{
217 struct address_space *mapping = in->f_mapping; 237 struct address_space *mapping = in->f_mapping;
218 unsigned int offset, nr_pages; 238 unsigned int offset, nr_pages;
219 struct page *pages[PIPE_BUFFERS], *shadow[PIPE_BUFFERS]; 239 struct page *pages[PIPE_BUFFERS];
220 struct page *page; 240 struct page *page;
221 pgoff_t index, pidx; 241 pgoff_t index;
222 int i, j; 242 int i, error;
223 243
224 index = in->f_pos >> PAGE_CACHE_SHIFT; 244 index = in->f_pos >> PAGE_CACHE_SHIFT;
225 offset = in->f_pos & ~PAGE_CACHE_MASK; 245 offset = in->f_pos & ~PAGE_CACHE_MASK;
@@ -229,78 +249,94 @@ static int __generic_file_splice_read(struct file *in, struct inode *pipe,
229 nr_pages = PIPE_BUFFERS; 249 nr_pages = PIPE_BUFFERS;
230 250
231 /* 251 /*
232 * initiate read-ahead on this page range 252 * Initiate read-ahead on this page range. however, don't call into
233 */ 253 * read-ahead if this is a non-zero offset (we are likely doing small
234 do_page_cache_readahead(mapping, in, index, nr_pages); 254 * chunk splice and the page is already there) for a single page.
235
236 /*
237 * Get as many pages from the page cache as possible..
238 * Start IO on the page cache entries we create (we
239 * can assume that any pre-existing ones we find have
240 * already had IO started on them).
241 */ 255 */
242 i = find_get_pages(mapping, index, nr_pages, pages); 256 if (!offset || nr_pages > 1)
257 do_page_cache_readahead(mapping, in, index, nr_pages);
243 258
244 /* 259 /*
245 * common case - we found all pages and they are contiguous, 260 * Now fill in the holes:
246 * kick them off
247 */ 261 */
248 if (i && (pages[i - 1]->index == index + i - 1)) 262 error = 0;
249 goto splice_them; 263 for (i = 0; i < nr_pages; i++, index++) {
264find_page:
265 /*
266 * lookup the page for this index
267 */
268 page = find_get_page(mapping, index);
269 if (!page) {
270 /*
271 * If in nonblock mode then dont block on
272 * readpage (we've kicked readahead so there
273 * will be asynchronous progress):
274 */
275 if (flags & SPLICE_F_NONBLOCK)
276 break;
250 277
251 /* 278 /*
252 * fill shadow[] with pages at the right locations, so we only 279 * page didn't exist, allocate one
253 * have to fill holes 280 */
254 */ 281 page = page_cache_alloc_cold(mapping);
255 memset(shadow, 0, nr_pages * sizeof(struct page *)); 282 if (!page)
256 for (j = 0; j < i; j++) 283 break;
257 shadow[pages[j]->index - index] = pages[j];
258 284
259 /* 285 error = add_to_page_cache_lru(page, mapping, index,
260 * now fill in the holes 286 mapping_gfp_mask(mapping));
261 */ 287 if (unlikely(error)) {
262 for (i = 0, pidx = index; i < nr_pages; pidx++, i++) { 288 page_cache_release(page);
263 int error; 289 break;
290 }
264 291
265 if (shadow[i]) 292 goto readpage;
266 continue; 293 }
267 294
268 /* 295 /*
269 * no page there, look one up / create it 296 * If the page isn't uptodate, we may need to start io on it
270 */ 297 */
271 page = find_or_create_page(mapping, pidx, 298 if (!PageUptodate(page)) {
272 mapping_gfp_mask(mapping)); 299 lock_page(page);
273 if (!page) 300
274 break; 301 /*
302 * page was truncated, stop here. if this isn't the
303 * first page, we'll just complete what we already
304 * added
305 */
306 if (!page->mapping) {
307 unlock_page(page);
308 page_cache_release(page);
309 break;
310 }
311 /*
312 * page was already under io and is now done, great
313 */
314 if (PageUptodate(page)) {
315 unlock_page(page);
316 goto fill_it;
317 }
275 318
276 if (PageUptodate(page)) 319readpage:
277 unlock_page(page); 320 /*
278 else { 321 * need to read in the page
322 */
279 error = mapping->a_ops->readpage(in, page); 323 error = mapping->a_ops->readpage(in, page);
280 324
281 if (unlikely(error)) { 325 if (unlikely(error)) {
282 page_cache_release(page); 326 page_cache_release(page);
327 if (error == AOP_TRUNCATED_PAGE)
328 goto find_page;
283 break; 329 break;
284 } 330 }
285 } 331 }
286 shadow[i] = page; 332fill_it:
333 pages[i] = page;
287 } 334 }
288 335
289 if (!i) { 336 if (i)
290 for (i = 0; i < nr_pages; i++) { 337 return move_to_pipe(pipe, pages, i, offset, len, flags);
291 if (shadow[i])
292 page_cache_release(shadow[i]);
293 }
294 return 0;
295 }
296 338
297 memcpy(pages, shadow, i * sizeof(struct page *)); 339 return error;
298
299 /*
300 * Now we splice them into the pipe..
301 */
302splice_them:
303 return move_to_pipe(pipe, pages, i, offset, len, flags);
304} 340}
305 341
306/** 342/**
@@ -311,9 +347,8 @@ splice_them:
311 * @flags: splice modifier flags 347 * @flags: splice modifier flags
312 * 348 *
313 * Will read pages from given file and fill them into a pipe. 349 * Will read pages from given file and fill them into a pipe.
314 *
315 */ 350 */
316ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, 351ssize_t generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe,
317 size_t len, unsigned int flags) 352 size_t len, unsigned int flags)
318{ 353{
319 ssize_t spliced; 354 ssize_t spliced;
@@ -321,6 +356,7 @@ ssize_t generic_file_splice_read(struct file *in, struct inode *pipe,
321 356
322 ret = 0; 357 ret = 0;
323 spliced = 0; 358 spliced = 0;
359
324 while (len) { 360 while (len) {
325 ret = __generic_file_splice_read(in, pipe, len, flags); 361 ret = __generic_file_splice_read(in, pipe, len, flags);
326 362
@@ -360,10 +396,10 @@ static int pipe_to_sendpage(struct pipe_inode_info *info,
360 int more; 396 int more;
361 397
362 /* 398 /*
363 * sub-optimal, but we are limited by the pipe ->map. we don't 399 * Sub-optimal, but we are limited by the pipe ->map. We don't
364 * need a kmap'ed buffer here, we just want to make sure we 400 * need a kmap'ed buffer here, we just want to make sure we
365 * have the page pinned if the pipe page originates from the 401 * have the page pinned if the pipe page originates from the
366 * page cache 402 * page cache.
367 */ 403 */
368 ptr = buf->ops->map(file, info, buf); 404 ptr = buf->ops->map(file, info, buf);
369 if (IS_ERR(ptr)) 405 if (IS_ERR(ptr))
@@ -414,7 +450,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
414 int ret; 450 int ret;
415 451
416 /* 452 /*
417 * after this, page will be locked and unmapped 453 * make sure the data in this buffer is uptodate
418 */ 454 */
419 src = buf->ops->map(file, info, buf); 455 src = buf->ops->map(file, info, buf);
420 if (IS_ERR(src)) 456 if (IS_ERR(src))
@@ -424,7 +460,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
424 offset = sd->pos & ~PAGE_CACHE_MASK; 460 offset = sd->pos & ~PAGE_CACHE_MASK;
425 461
426 /* 462 /*
427 * reuse buf page, if SPLICE_F_MOVE is set 463 * Reuse buf page, if SPLICE_F_MOVE is set.
428 */ 464 */
429 if (sd->flags & SPLICE_F_MOVE) { 465 if (sd->flags & SPLICE_F_MOVE) {
430 /* 466 /*
@@ -434,6 +470,9 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
434 if (buf->ops->steal(info, buf)) 470 if (buf->ops->steal(info, buf))
435 goto find_page; 471 goto find_page;
436 472
473 /*
474 * this will also set the page locked
475 */
437 page = buf->page; 476 page = buf->page;
438 if (add_to_page_cache(page, mapping, index, gfp_mask)) 477 if (add_to_page_cache(page, mapping, index, gfp_mask))
439 goto find_page; 478 goto find_page;
@@ -445,7 +484,7 @@ find_page:
445 ret = -ENOMEM; 484 ret = -ENOMEM;
446 page = find_or_create_page(mapping, index, gfp_mask); 485 page = find_or_create_page(mapping, index, gfp_mask);
447 if (!page) 486 if (!page)
448 goto out; 487 goto out_nomem;
449 488
450 /* 489 /*
451 * If the page is uptodate, it is also locked. If it isn't 490 * If the page is uptodate, it is also locked. If it isn't
@@ -462,7 +501,7 @@ find_page:
462 501
463 if (!PageUptodate(page)) { 502 if (!PageUptodate(page)) {
464 /* 503 /*
465 * page got invalidated, repeat 504 * Page got invalidated, repeat.
466 */ 505 */
467 if (!page->mapping) { 506 if (!page->mapping) {
468 unlock_page(page); 507 unlock_page(page);
@@ -501,12 +540,14 @@ find_page:
501 } else if (ret) 540 } else if (ret)
502 goto out; 541 goto out;
503 542
543 mark_page_accessed(page);
504 balance_dirty_pages_ratelimited(mapping); 544 balance_dirty_pages_ratelimited(mapping);
505out: 545out:
506 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 546 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) {
507 page_cache_release(page); 547 page_cache_release(page);
508 unlock_page(page); 548 unlock_page(page);
509 } 549 }
550out_nomem:
510 buf->ops->unmap(info, buf); 551 buf->ops->unmap(info, buf);
511 return ret; 552 return ret;
512} 553}
@@ -519,11 +560,10 @@ typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
519 * key here is the 'actor' worker passed in that actually moves the data 560 * key here is the 'actor' worker passed in that actually moves the data
520 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 561 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
521 */ 562 */
522static ssize_t move_from_pipe(struct inode *inode, struct file *out, 563static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
523 size_t len, unsigned int flags, 564 size_t len, unsigned int flags,
524 splice_actor *actor) 565 splice_actor *actor)
525{ 566{
526 struct pipe_inode_info *info;
527 int ret, do_wakeup, err; 567 int ret, do_wakeup, err;
528 struct splice_desc sd; 568 struct splice_desc sd;
529 569
@@ -535,22 +575,19 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
535 sd.file = out; 575 sd.file = out;
536 sd.pos = out->f_pos; 576 sd.pos = out->f_pos;
537 577
538 mutex_lock(PIPE_MUTEX(*inode)); 578 if (pipe->inode)
579 mutex_lock(&pipe->inode->i_mutex);
539 580
540 info = inode->i_pipe;
541 for (;;) { 581 for (;;) {
542 int bufs = info->nrbufs; 582 if (pipe->nrbufs) {
543 583 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
544 if (bufs) {
545 int curbuf = info->curbuf;
546 struct pipe_buffer *buf = info->bufs + curbuf;
547 struct pipe_buf_operations *ops = buf->ops; 584 struct pipe_buf_operations *ops = buf->ops;
548 585
549 sd.len = buf->len; 586 sd.len = buf->len;
550 if (sd.len > sd.total_len) 587 if (sd.len > sd.total_len)
551 sd.len = sd.total_len; 588 sd.len = sd.total_len;
552 589
553 err = actor(info, buf, &sd); 590 err = actor(pipe, buf, &sd);
554 if (err) { 591 if (err) {
555 if (!ret && err != -ENODATA) 592 if (!ret && err != -ENODATA)
556 ret = err; 593 ret = err;
@@ -561,13 +598,14 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
561 ret += sd.len; 598 ret += sd.len;
562 buf->offset += sd.len; 599 buf->offset += sd.len;
563 buf->len -= sd.len; 600 buf->len -= sd.len;
601
564 if (!buf->len) { 602 if (!buf->len) {
565 buf->ops = NULL; 603 buf->ops = NULL;
566 ops->release(info, buf); 604 ops->release(pipe, buf);
567 curbuf = (curbuf + 1) & (PIPE_BUFFERS - 1); 605 pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
568 info->curbuf = curbuf; 606 pipe->nrbufs--;
569 info->nrbufs = --bufs; 607 if (pipe->inode)
570 do_wakeup = 1; 608 do_wakeup = 1;
571 } 609 }
572 610
573 sd.pos += sd.len; 611 sd.pos += sd.len;
@@ -576,11 +614,11 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
576 break; 614 break;
577 } 615 }
578 616
579 if (bufs) 617 if (pipe->nrbufs)
580 continue; 618 continue;
581 if (!PIPE_WRITERS(*inode)) 619 if (!pipe->writers)
582 break; 620 break;
583 if (!PIPE_WAITING_WRITERS(*inode)) { 621 if (!pipe->waiting_writers) {
584 if (ret) 622 if (ret)
585 break; 623 break;
586 } 624 }
@@ -598,31 +636,34 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
598 } 636 }
599 637
600 if (do_wakeup) { 638 if (do_wakeup) {
601 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 639 smp_mb();
602 kill_fasync(PIPE_FASYNC_WRITERS(*inode),SIGIO,POLL_OUT); 640 if (waitqueue_active(&pipe->wait))
641 wake_up_interruptible_sync(&pipe->wait);
642 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
603 do_wakeup = 0; 643 do_wakeup = 0;
604 } 644 }
605 645
606 pipe_wait(inode); 646 pipe_wait(pipe);
607 } 647 }
608 648
609 mutex_unlock(PIPE_MUTEX(*inode)); 649 if (pipe->inode)
650 mutex_unlock(&pipe->inode->i_mutex);
610 651
611 if (do_wakeup) { 652 if (do_wakeup) {
612 wake_up_interruptible(PIPE_WAIT(*inode)); 653 smp_mb();
613 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 654 if (waitqueue_active(&pipe->wait))
655 wake_up_interruptible(&pipe->wait);
656 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
614 } 657 }
615 658
616 mutex_lock(&out->f_mapping->host->i_mutex);
617 out->f_pos = sd.pos; 659 out->f_pos = sd.pos;
618 mutex_unlock(&out->f_mapping->host->i_mutex);
619 return ret; 660 return ret;
620 661
621} 662}
622 663
623/** 664/**
624 * generic_file_splice_write - splice data from a pipe to a file 665 * generic_file_splice_write - splice data from a pipe to a file
625 * @inode: pipe inode 666 * @pipe: pipe info
626 * @out: file to write to 667 * @out: file to write to
627 * @len: number of bytes to splice 668 * @len: number of bytes to splice
628 * @flags: splice modifier flags 669 * @flags: splice modifier flags
@@ -631,14 +672,17 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
631 * the given pipe inode to the given file. 672 * the given pipe inode to the given file.
632 * 673 *
633 */ 674 */
634ssize_t generic_file_splice_write(struct inode *inode, struct file *out, 675ssize_t
635 size_t len, unsigned int flags) 676generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
677 size_t len, unsigned int flags)
636{ 678{
637 struct address_space *mapping = out->f_mapping; 679 struct address_space *mapping = out->f_mapping;
638 ssize_t ret = move_from_pipe(inode, out, len, flags, pipe_to_file); 680 ssize_t ret;
681
682 ret = move_from_pipe(pipe, out, len, flags, pipe_to_file);
639 683
640 /* 684 /*
641 * if file or inode is SYNC and we actually wrote some data, sync it 685 * If file or inode is SYNC and we actually wrote some data, sync it.
642 */ 686 */
643 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host)) 687 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host))
644 && ret > 0) { 688 && ret > 0) {
@@ -647,7 +691,7 @@ ssize_t generic_file_splice_write(struct inode *inode, struct file *out,
647 691
648 mutex_lock(&inode->i_mutex); 692 mutex_lock(&inode->i_mutex);
649 err = generic_osync_inode(mapping->host, mapping, 693 err = generic_osync_inode(mapping->host, mapping,
650 OSYNC_METADATA|OSYNC_DATA); 694 OSYNC_METADATA|OSYNC_DATA);
651 mutex_unlock(&inode->i_mutex); 695 mutex_unlock(&inode->i_mutex);
652 696
653 if (err) 697 if (err)
@@ -670,10 +714,10 @@ EXPORT_SYMBOL(generic_file_splice_write);
670 * is involved. 714 * is involved.
671 * 715 *
672 */ 716 */
673ssize_t generic_splice_sendpage(struct inode *inode, struct file *out, 717ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
674 size_t len, unsigned int flags) 718 size_t len, unsigned int flags)
675{ 719{
676 return move_from_pipe(inode, out, len, flags, pipe_to_sendpage); 720 return move_from_pipe(pipe, out, len, flags, pipe_to_sendpage);
677} 721}
678 722
679EXPORT_SYMBOL(generic_splice_sendpage); 723EXPORT_SYMBOL(generic_splice_sendpage);
@@ -681,19 +725,20 @@ EXPORT_SYMBOL(generic_splice_sendpage);
681/* 725/*
682 * Attempt to initiate a splice from pipe to file. 726 * Attempt to initiate a splice from pipe to file.
683 */ 727 */
684static long do_splice_from(struct inode *pipe, struct file *out, size_t len, 728static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
685 unsigned int flags) 729 size_t len, unsigned int flags)
686{ 730{
687 loff_t pos; 731 loff_t pos;
688 int ret; 732 int ret;
689 733
690 if (!out->f_op || !out->f_op->splice_write) 734 if (unlikely(!out->f_op || !out->f_op->splice_write))
691 return -EINVAL; 735 return -EINVAL;
692 736
693 if (!(out->f_mode & FMODE_WRITE)) 737 if (unlikely(!(out->f_mode & FMODE_WRITE)))
694 return -EBADF; 738 return -EBADF;
695 739
696 pos = out->f_pos; 740 pos = out->f_pos;
741
697 ret = rw_verify_area(WRITE, out, &pos, len); 742 ret = rw_verify_area(WRITE, out, &pos, len);
698 if (unlikely(ret < 0)) 743 if (unlikely(ret < 0))
699 return ret; 744 return ret;
@@ -704,19 +749,20 @@ static long do_splice_from(struct inode *pipe, struct file *out, size_t len,
704/* 749/*
705 * Attempt to initiate a splice from a file to a pipe. 750 * Attempt to initiate a splice from a file to a pipe.
706 */ 751 */
707static long do_splice_to(struct file *in, struct inode *pipe, size_t len, 752static long do_splice_to(struct file *in, struct pipe_inode_info *pipe,
708 unsigned int flags) 753 size_t len, unsigned int flags)
709{ 754{
710 loff_t pos, isize, left; 755 loff_t pos, isize, left;
711 int ret; 756 int ret;
712 757
713 if (!in->f_op || !in->f_op->splice_read) 758 if (unlikely(!in->f_op || !in->f_op->splice_read))
714 return -EINVAL; 759 return -EINVAL;
715 760
716 if (!(in->f_mode & FMODE_READ)) 761 if (unlikely(!(in->f_mode & FMODE_READ)))
717 return -EBADF; 762 return -EBADF;
718 763
719 pos = in->f_pos; 764 pos = in->f_pos;
765
720 ret = rw_verify_area(READ, in, &pos, len); 766 ret = rw_verify_area(READ, in, &pos, len);
721 if (unlikely(ret < 0)) 767 if (unlikely(ret < 0))
722 return ret; 768 return ret;
@@ -726,32 +772,168 @@ static long do_splice_to(struct file *in, struct inode *pipe, size_t len,
726 return 0; 772 return 0;
727 773
728 left = isize - in->f_pos; 774 left = isize - in->f_pos;
729 if (left < len) 775 if (unlikely(left < len))
730 len = left; 776 len = left;
731 777
732 return in->f_op->splice_read(in, pipe, len, flags); 778 return in->f_op->splice_read(in, pipe, len, flags);
733} 779}
734 780
781long do_splice_direct(struct file *in, struct file *out, size_t len,
782 unsigned int flags)
783{
784 struct pipe_inode_info *pipe;
785 long ret, bytes;
786 umode_t i_mode;
787 int i;
788
789 /*
790 * We require the input being a regular file, as we don't want to
791 * randomly drop data for eg socket -> socket splicing. Use the
792 * piped splicing for that!
793 */
794 i_mode = in->f_dentry->d_inode->i_mode;
795 if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode)))
796 return -EINVAL;
797
798 /*
799 * neither in nor out is a pipe, setup an internal pipe attached to
800 * 'out' and transfer the wanted data from 'in' to 'out' through that
801 */
802 pipe = current->splice_pipe;
803 if (unlikely(!pipe)) {
804 pipe = alloc_pipe_info(NULL);
805 if (!pipe)
806 return -ENOMEM;
807
808 /*
809 * We don't have an immediate reader, but we'll read the stuff
810 * out of the pipe right after the move_to_pipe(). So set
811 * PIPE_READERS appropriately.
812 */
813 pipe->readers = 1;
814
815 current->splice_pipe = pipe;
816 }
817
818 /*
819 * Do the splice.
820 */
821 ret = 0;
822 bytes = 0;
823
824 while (len) {
825 size_t read_len, max_read_len;
826
827 /*
828 * Do at most PIPE_BUFFERS pages worth of transfer:
829 */
830 max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
831
832 ret = do_splice_to(in, pipe, max_read_len, flags);
833 if (unlikely(ret < 0))
834 goto out_release;
835
836 read_len = ret;
837
838 /*
839 * NOTE: nonblocking mode only applies to the input. We
840 * must not do the output in nonblocking mode as then we
841 * could get stuck data in the internal pipe:
842 */
843 ret = do_splice_from(pipe, out, read_len,
844 flags & ~SPLICE_F_NONBLOCK);
845 if (unlikely(ret < 0))
846 goto out_release;
847
848 bytes += ret;
849 len -= ret;
850
851 /*
852 * In nonblocking mode, if we got back a short read then
853 * that was due to either an IO error or due to the
854 * pagecache entry not being there. In the IO error case
855 * the _next_ splice attempt will produce a clean IO error
856 * return value (not a short read), so in both cases it's
857 * correct to break out of the loop here:
858 */
859 if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len))
860 break;
861 }
862
863 pipe->nrbufs = pipe->curbuf = 0;
864
865 return bytes;
866
867out_release:
868 /*
869 * If we did an incomplete transfer we must release
870 * the pipe buffers in question:
871 */
872 for (i = 0; i < PIPE_BUFFERS; i++) {
873 struct pipe_buffer *buf = pipe->bufs + i;
874
875 if (buf->ops) {
876 buf->ops->release(pipe, buf);
877 buf->ops = NULL;
878 }
879 }
880 pipe->nrbufs = pipe->curbuf = 0;
881
882 /*
883 * If we transferred some data, return the number of bytes:
884 */
885 if (bytes > 0)
886 return bytes;
887
888 return ret;
889}
890
891EXPORT_SYMBOL(do_splice_direct);
892
735/* 893/*
736 * Determine where to splice to/from. 894 * Determine where to splice to/from.
737 */ 895 */
738static long do_splice(struct file *in, struct file *out, size_t len, 896static long do_splice(struct file *in, loff_t __user *off_in,
739 unsigned int flags) 897 struct file *out, loff_t __user *off_out,
898 size_t len, unsigned int flags)
740{ 899{
741 struct inode *pipe; 900 struct pipe_inode_info *pipe;
901
902 pipe = in->f_dentry->d_inode->i_pipe;
903 if (pipe) {
904 if (off_in)
905 return -ESPIPE;
906 if (off_out) {
907 if (out->f_op->llseek == no_llseek)
908 return -EINVAL;
909 if (copy_from_user(&out->f_pos, off_out,
910 sizeof(loff_t)))
911 return -EFAULT;
912 }
742 913
743 pipe = in->f_dentry->d_inode;
744 if (pipe->i_pipe)
745 return do_splice_from(pipe, out, len, flags); 914 return do_splice_from(pipe, out, len, flags);
915 }
916
917 pipe = out->f_dentry->d_inode->i_pipe;
918 if (pipe) {
919 if (off_out)
920 return -ESPIPE;
921 if (off_in) {
922 if (in->f_op->llseek == no_llseek)
923 return -EINVAL;
924 if (copy_from_user(&in->f_pos, off_in, sizeof(loff_t)))
925 return -EFAULT;
926 }
746 927
747 pipe = out->f_dentry->d_inode;
748 if (pipe->i_pipe)
749 return do_splice_to(in, pipe, len, flags); 928 return do_splice_to(in, pipe, len, flags);
929 }
750 930
751 return -EINVAL; 931 return -EINVAL;
752} 932}
753 933
754asmlinkage long sys_splice(int fdin, int fdout, size_t len, unsigned int flags) 934asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
935 int fd_out, loff_t __user *off_out,
936 size_t len, unsigned int flags)
755{ 937{
756 long error; 938 long error;
757 struct file *in, *out; 939 struct file *in, *out;
@@ -761,13 +943,15 @@ asmlinkage long sys_splice(int fdin, int fdout, size_t len, unsigned int flags)
761 return 0; 943 return 0;
762 944
763 error = -EBADF; 945 error = -EBADF;
764 in = fget_light(fdin, &fput_in); 946 in = fget_light(fd_in, &fput_in);
765 if (in) { 947 if (in) {
766 if (in->f_mode & FMODE_READ) { 948 if (in->f_mode & FMODE_READ) {
767 out = fget_light(fdout, &fput_out); 949 out = fget_light(fd_out, &fput_out);
768 if (out) { 950 if (out) {
769 if (out->f_mode & FMODE_WRITE) 951 if (out->f_mode & FMODE_WRITE)
770 error = do_splice(in, out, len, flags); 952 error = do_splice(in, off_in,
953 out, off_out,
954 len, flags);
771 fput_light(out, fput_out); 955 fput_light(out, fput_out);
772 } 956 }
773 } 957 }
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index ae4c4754ed31..269721af02f3 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -252,7 +252,7 @@ xfs_file_sendfile_invis(
252STATIC ssize_t 252STATIC ssize_t
253xfs_file_splice_read( 253xfs_file_splice_read(
254 struct file *infilp, 254 struct file *infilp,
255 struct inode *pipe, 255 struct pipe_inode_info *pipe,
256 size_t len, 256 size_t len,
257 unsigned int flags) 257 unsigned int flags)
258{ 258{
@@ -266,7 +266,7 @@ xfs_file_splice_read(
266STATIC ssize_t 266STATIC ssize_t
267xfs_file_splice_read_invis( 267xfs_file_splice_read_invis(
268 struct file *infilp, 268 struct file *infilp,
269 struct inode *pipe, 269 struct pipe_inode_info *pipe,
270 size_t len, 270 size_t len,
271 unsigned int flags) 271 unsigned int flags)
272{ 272{
@@ -279,7 +279,7 @@ xfs_file_splice_read_invis(
279 279
280STATIC ssize_t 280STATIC ssize_t
281xfs_file_splice_write( 281xfs_file_splice_write(
282 struct inode *pipe, 282 struct pipe_inode_info *pipe,
283 struct file *outfilp, 283 struct file *outfilp,
284 size_t len, 284 size_t len,
285 unsigned int flags) 285 unsigned int flags)
@@ -293,7 +293,7 @@ xfs_file_splice_write(
293 293
294STATIC ssize_t 294STATIC ssize_t
295xfs_file_splice_write_invis( 295xfs_file_splice_write_invis(
296 struct inode *pipe, 296 struct pipe_inode_info *pipe,
297 struct file *outfilp, 297 struct file *outfilp,
298 size_t len, 298 size_t len,
299 unsigned int flags) 299 unsigned int flags)
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 90cd314acbaa..74a52937f208 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -338,7 +338,7 @@ ssize_t
338xfs_splice_read( 338xfs_splice_read(
339 bhv_desc_t *bdp, 339 bhv_desc_t *bdp,
340 struct file *infilp, 340 struct file *infilp,
341 struct inode *pipe, 341 struct pipe_inode_info *pipe,
342 size_t count, 342 size_t count,
343 int flags, 343 int flags,
344 int ioflags, 344 int ioflags,
@@ -380,7 +380,7 @@ xfs_splice_read(
380ssize_t 380ssize_t
381xfs_splice_write( 381xfs_splice_write(
382 bhv_desc_t *bdp, 382 bhv_desc_t *bdp,
383 struct inode *pipe, 383 struct pipe_inode_info *pipe,
384 struct file *outfilp, 384 struct file *outfilp,
385 size_t count, 385 size_t count,
386 int flags, 386 int flags,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index eaa5659713fb..55c689a86ad2 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -94,9 +94,9 @@ extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
94 loff_t *, int, size_t, read_actor_t, 94 loff_t *, int, size_t, read_actor_t,
95 void *, struct cred *); 95 void *, struct cred *);
96extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, 96extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *,
97 struct inode *, size_t, int, int, 97 struct pipe_inode_info *, size_t, int, int,
98 struct cred *); 98 struct cred *);
99extern ssize_t xfs_splice_write(struct bhv_desc *, struct inode *, 99extern ssize_t xfs_splice_write(struct bhv_desc *, struct pipe_inode_info *,
100 struct file *, size_t, int, int, 100 struct file *, size_t, int, int,
101 struct cred *); 101 struct cred *);
102 102
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 6f1c79a28f8b..88b09f186289 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -174,9 +174,9 @@ typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
174 loff_t *, int, size_t, read_actor_t, 174 loff_t *, int, size_t, read_actor_t,
175 void *, struct cred *); 175 void *, struct cred *);
176typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, 176typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *,
177 struct inode *, size_t, int, int, 177 struct pipe_inode_info *, size_t, int, int,
178 struct cred *); 178 struct cred *);
179typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct inode *, 179typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct pipe_inode_info *,
180 struct file *, size_t, int, int, 180 struct file *, size_t, int, int,
181 struct cred *); 181 struct cred *);
182typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *, 182typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *,