aboutsummaryrefslogtreecommitdiffstats
path: root/fs/splice.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/splice.c')
-rw-r--r--fs/splice.c492
1 files changed, 338 insertions, 154 deletions
diff --git a/fs/splice.c b/fs/splice.c
index bfa42a277bb8..e50a460239dd 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -9,11 +9,12 @@
9 * that transfers data buffers to or from a pipe buffer. 9 * that transfers data buffers to or from a pipe buffer.
10 * 10 *
11 * Named by Larry McVoy, original implementation from Linus, extended by 11 * Named by Larry McVoy, original implementation from Linus, extended by
12 * Jens to support splicing to files and fixing the initial implementation 12 * Jens to support splicing to files, network, direct splicing, etc and
13 * bugs. 13 * fixing lots of bugs.
14 * 14 *
15 * Copyright (C) 2005 Jens Axboe <axboe@suse.de> 15 * Copyright (C) 2005-2006 Jens Axboe <axboe@suse.de>
16 * Copyright (C) 2005 Linus Torvalds <torvalds@osdl.org> 16 * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org>
17 * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu>
17 * 18 *
18 */ 19 */
19#include <linux/fs.h> 20#include <linux/fs.h>
@@ -84,26 +85,43 @@ static void *page_cache_pipe_buf_map(struct file *file,
84 struct pipe_buffer *buf) 85 struct pipe_buffer *buf)
85{ 86{
86 struct page *page = buf->page; 87 struct page *page = buf->page;
87 88 int err;
88 lock_page(page);
89 89
90 if (!PageUptodate(page)) { 90 if (!PageUptodate(page)) {
91 unlock_page(page); 91 lock_page(page);
92 return ERR_PTR(-EIO); 92
93 } 93 /*
94 * Page got truncated/unhashed. This will cause a 0-byte
95 * splice, if this is the first page.
96 */
97 if (!page->mapping) {
98 err = -ENODATA;
99 goto error;
100 }
94 101
95 if (!page->mapping) { 102 /*
103 * Uh oh, read-error from disk.
104 */
105 if (!PageUptodate(page)) {
106 err = -EIO;
107 goto error;
108 }
109
110 /*
111 * Page is ok afterall, fall through to mapping.
112 */
96 unlock_page(page); 113 unlock_page(page);
97 return ERR_PTR(-ENODATA);
98 } 114 }
99 115
100 return kmap(buf->page); 116 return kmap(page);
117error:
118 unlock_page(page);
119 return ERR_PTR(err);
101} 120}
102 121
103static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, 122static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
104 struct pipe_buffer *buf) 123 struct pipe_buffer *buf)
105{ 124{
106 unlock_page(buf->page);
107 kunmap(buf->page); 125 kunmap(buf->page);
108} 126}
109 127
@@ -119,34 +137,30 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = {
119 * Pipe output worker. This sets up our pipe format with the page cache 137 * Pipe output worker. This sets up our pipe format with the page cache
120 * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 138 * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
121 */ 139 */
122static ssize_t move_to_pipe(struct inode *inode, struct page **pages, 140static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
123 int nr_pages, unsigned long offset, 141 int nr_pages, unsigned long offset,
124 unsigned long len, unsigned int flags) 142 unsigned long len, unsigned int flags)
125{ 143{
126 struct pipe_inode_info *info;
127 int ret, do_wakeup, i; 144 int ret, do_wakeup, i;
128 145
129 ret = 0; 146 ret = 0;
130 do_wakeup = 0; 147 do_wakeup = 0;
131 i = 0; 148 i = 0;
132 149
133 mutex_lock(PIPE_MUTEX(*inode)); 150 if (pipe->inode)
151 mutex_lock(&pipe->inode->i_mutex);
134 152
135 info = inode->i_pipe;
136 for (;;) { 153 for (;;) {
137 int bufs; 154 if (!pipe->readers) {
138
139 if (!PIPE_READERS(*inode)) {
140 send_sig(SIGPIPE, current, 0); 155 send_sig(SIGPIPE, current, 0);
141 if (!ret) 156 if (!ret)
142 ret = -EPIPE; 157 ret = -EPIPE;
143 break; 158 break;
144 } 159 }
145 160
146 bufs = info->nrbufs; 161 if (pipe->nrbufs < PIPE_BUFFERS) {
147 if (bufs < PIPE_BUFFERS) { 162 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1);
148 int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS - 1); 163 struct pipe_buffer *buf = pipe->bufs + newbuf;
149 struct pipe_buffer *buf = info->bufs + newbuf;
150 struct page *page = pages[i++]; 164 struct page *page = pages[i++];
151 unsigned long this_len; 165 unsigned long this_len;
152 166
@@ -158,8 +172,9 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
158 buf->offset = offset; 172 buf->offset = offset;
159 buf->len = this_len; 173 buf->len = this_len;
160 buf->ops = &page_cache_pipe_buf_ops; 174 buf->ops = &page_cache_pipe_buf_ops;
161 info->nrbufs = ++bufs; 175 pipe->nrbufs++;
162 do_wakeup = 1; 176 if (pipe->inode)
177 do_wakeup = 1;
163 178
164 ret += this_len; 179 ret += this_len;
165 len -= this_len; 180 len -= this_len;
@@ -168,7 +183,7 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
168 break; 183 break;
169 if (!len) 184 if (!len)
170 break; 185 break;
171 if (bufs < PIPE_BUFFERS) 186 if (pipe->nrbufs < PIPE_BUFFERS)
172 continue; 187 continue;
173 188
174 break; 189 break;
@@ -187,22 +202,26 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
187 } 202 }
188 203
189 if (do_wakeup) { 204 if (do_wakeup) {
190 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 205 smp_mb();
191 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, 206 if (waitqueue_active(&pipe->wait))
192 POLL_IN); 207 wake_up_interruptible_sync(&pipe->wait);
208 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
193 do_wakeup = 0; 209 do_wakeup = 0;
194 } 210 }
195 211
196 PIPE_WAITING_WRITERS(*inode)++; 212 pipe->waiting_writers++;
197 pipe_wait(inode); 213 pipe_wait(pipe);
198 PIPE_WAITING_WRITERS(*inode)--; 214 pipe->waiting_writers--;
199 } 215 }
200 216
201 mutex_unlock(PIPE_MUTEX(*inode)); 217 if (pipe->inode)
218 mutex_unlock(&pipe->inode->i_mutex);
202 219
203 if (do_wakeup) { 220 if (do_wakeup) {
204 wake_up_interruptible(PIPE_WAIT(*inode)); 221 smp_mb();
205 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 222 if (waitqueue_active(&pipe->wait))
223 wake_up_interruptible(&pipe->wait);
224 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
206 } 225 }
207 226
208 while (i < nr_pages) 227 while (i < nr_pages)
@@ -211,15 +230,16 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
211 return ret; 230 return ret;
212} 231}
213 232
214static int __generic_file_splice_read(struct file *in, struct inode *pipe, 233static int
215 size_t len, unsigned int flags) 234__generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe,
235 size_t len, unsigned int flags)
216{ 236{
217 struct address_space *mapping = in->f_mapping; 237 struct address_space *mapping = in->f_mapping;
218 unsigned int offset, nr_pages; 238 unsigned int offset, nr_pages;
219 struct page *pages[PIPE_BUFFERS], *shadow[PIPE_BUFFERS]; 239 struct page *pages[PIPE_BUFFERS];
220 struct page *page; 240 struct page *page;
221 pgoff_t index, pidx; 241 pgoff_t index;
222 int i, j; 242 int i, error;
223 243
224 index = in->f_pos >> PAGE_CACHE_SHIFT; 244 index = in->f_pos >> PAGE_CACHE_SHIFT;
225 offset = in->f_pos & ~PAGE_CACHE_MASK; 245 offset = in->f_pos & ~PAGE_CACHE_MASK;
@@ -229,78 +249,94 @@ static int __generic_file_splice_read(struct file *in, struct inode *pipe,
229 nr_pages = PIPE_BUFFERS; 249 nr_pages = PIPE_BUFFERS;
230 250
231 /* 251 /*
232 * initiate read-ahead on this page range 252 * Initiate read-ahead on this page range. however, don't call into
233 */ 253 * read-ahead if this is a non-zero offset (we are likely doing small
234 do_page_cache_readahead(mapping, in, index, nr_pages); 254 * chunk splice and the page is already there) for a single page.
235
236 /*
237 * Get as many pages from the page cache as possible..
238 * Start IO on the page cache entries we create (we
239 * can assume that any pre-existing ones we find have
240 * already had IO started on them).
241 */ 255 */
242 i = find_get_pages(mapping, index, nr_pages, pages); 256 if (!offset || nr_pages > 1)
257 do_page_cache_readahead(mapping, in, index, nr_pages);
243 258
244 /* 259 /*
245 * common case - we found all pages and they are contiguous, 260 * Now fill in the holes:
246 * kick them off
247 */ 261 */
248 if (i && (pages[i - 1]->index == index + i - 1)) 262 error = 0;
249 goto splice_them; 263 for (i = 0; i < nr_pages; i++, index++) {
264find_page:
265 /*
266 * lookup the page for this index
267 */
268 page = find_get_page(mapping, index);
269 if (!page) {
270 /*
271 * If in nonblock mode then dont block on
272 * readpage (we've kicked readahead so there
273 * will be asynchronous progress):
274 */
275 if (flags & SPLICE_F_NONBLOCK)
276 break;
250 277
251 /* 278 /*
252 * fill shadow[] with pages at the right locations, so we only 279 * page didn't exist, allocate one
253 * have to fill holes 280 */
254 */ 281 page = page_cache_alloc_cold(mapping);
255 memset(shadow, 0, nr_pages * sizeof(struct page *)); 282 if (!page)
256 for (j = 0; j < i; j++) 283 break;
257 shadow[pages[j]->index - index] = pages[j];
258 284
259 /* 285 error = add_to_page_cache_lru(page, mapping, index,
260 * now fill in the holes 286 mapping_gfp_mask(mapping));
261 */ 287 if (unlikely(error)) {
262 for (i = 0, pidx = index; i < nr_pages; pidx++, i++) { 288 page_cache_release(page);
263 int error; 289 break;
290 }
264 291
265 if (shadow[i]) 292 goto readpage;
266 continue; 293 }
267 294
268 /* 295 /*
269 * no page there, look one up / create it 296 * If the page isn't uptodate, we may need to start io on it
270 */ 297 */
271 page = find_or_create_page(mapping, pidx, 298 if (!PageUptodate(page)) {
272 mapping_gfp_mask(mapping)); 299 lock_page(page);
273 if (!page) 300
274 break; 301 /*
302 * page was truncated, stop here. if this isn't the
303 * first page, we'll just complete what we already
304 * added
305 */
306 if (!page->mapping) {
307 unlock_page(page);
308 page_cache_release(page);
309 break;
310 }
311 /*
312 * page was already under io and is now done, great
313 */
314 if (PageUptodate(page)) {
315 unlock_page(page);
316 goto fill_it;
317 }
275 318
276 if (PageUptodate(page)) 319readpage:
277 unlock_page(page); 320 /*
278 else { 321 * need to read in the page
322 */
279 error = mapping->a_ops->readpage(in, page); 323 error = mapping->a_ops->readpage(in, page);
280 324
281 if (unlikely(error)) { 325 if (unlikely(error)) {
282 page_cache_release(page); 326 page_cache_release(page);
327 if (error == AOP_TRUNCATED_PAGE)
328 goto find_page;
283 break; 329 break;
284 } 330 }
285 } 331 }
286 shadow[i] = page; 332fill_it:
333 pages[i] = page;
287 } 334 }
288 335
289 if (!i) { 336 if (i)
290 for (i = 0; i < nr_pages; i++) { 337 return move_to_pipe(pipe, pages, i, offset, len, flags);
291 if (shadow[i])
292 page_cache_release(shadow[i]);
293 }
294 return 0;
295 }
296 338
297 memcpy(pages, shadow, i * sizeof(struct page *)); 339 return error;
298
299 /*
300 * Now we splice them into the pipe..
301 */
302splice_them:
303 return move_to_pipe(pipe, pages, i, offset, len, flags);
304} 340}
305 341
306/** 342/**
@@ -311,9 +347,8 @@ splice_them:
311 * @flags: splice modifier flags 347 * @flags: splice modifier flags
312 * 348 *
313 * Will read pages from given file and fill them into a pipe. 349 * Will read pages from given file and fill them into a pipe.
314 *
315 */ 350 */
316ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, 351ssize_t generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe,
317 size_t len, unsigned int flags) 352 size_t len, unsigned int flags)
318{ 353{
319 ssize_t spliced; 354 ssize_t spliced;
@@ -321,6 +356,7 @@ ssize_t generic_file_splice_read(struct file *in, struct inode *pipe,
321 356
322 ret = 0; 357 ret = 0;
323 spliced = 0; 358 spliced = 0;
359
324 while (len) { 360 while (len) {
325 ret = __generic_file_splice_read(in, pipe, len, flags); 361 ret = __generic_file_splice_read(in, pipe, len, flags);
326 362
@@ -360,10 +396,10 @@ static int pipe_to_sendpage(struct pipe_inode_info *info,
360 int more; 396 int more;
361 397
362 /* 398 /*
363 * sub-optimal, but we are limited by the pipe ->map. we don't 399 * Sub-optimal, but we are limited by the pipe ->map. We don't
364 * need a kmap'ed buffer here, we just want to make sure we 400 * need a kmap'ed buffer here, we just want to make sure we
365 * have the page pinned if the pipe page originates from the 401 * have the page pinned if the pipe page originates from the
366 * page cache 402 * page cache.
367 */ 403 */
368 ptr = buf->ops->map(file, info, buf); 404 ptr = buf->ops->map(file, info, buf);
369 if (IS_ERR(ptr)) 405 if (IS_ERR(ptr))
@@ -414,7 +450,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
414 int ret; 450 int ret;
415 451
416 /* 452 /*
417 * after this, page will be locked and unmapped 453 * make sure the data in this buffer is uptodate
418 */ 454 */
419 src = buf->ops->map(file, info, buf); 455 src = buf->ops->map(file, info, buf);
420 if (IS_ERR(src)) 456 if (IS_ERR(src))
@@ -424,7 +460,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
424 offset = sd->pos & ~PAGE_CACHE_MASK; 460 offset = sd->pos & ~PAGE_CACHE_MASK;
425 461
426 /* 462 /*
427 * reuse buf page, if SPLICE_F_MOVE is set 463 * Reuse buf page, if SPLICE_F_MOVE is set.
428 */ 464 */
429 if (sd->flags & SPLICE_F_MOVE) { 465 if (sd->flags & SPLICE_F_MOVE) {
430 /* 466 /*
@@ -434,6 +470,9 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
434 if (buf->ops->steal(info, buf)) 470 if (buf->ops->steal(info, buf))
435 goto find_page; 471 goto find_page;
436 472
473 /*
474 * this will also set the page locked
475 */
437 page = buf->page; 476 page = buf->page;
438 if (add_to_page_cache(page, mapping, index, gfp_mask)) 477 if (add_to_page_cache(page, mapping, index, gfp_mask))
439 goto find_page; 478 goto find_page;
@@ -445,7 +484,7 @@ find_page:
445 ret = -ENOMEM; 484 ret = -ENOMEM;
446 page = find_or_create_page(mapping, index, gfp_mask); 485 page = find_or_create_page(mapping, index, gfp_mask);
447 if (!page) 486 if (!page)
448 goto out; 487 goto out_nomem;
449 488
450 /* 489 /*
451 * If the page is uptodate, it is also locked. If it isn't 490 * If the page is uptodate, it is also locked. If it isn't
@@ -462,7 +501,7 @@ find_page:
462 501
463 if (!PageUptodate(page)) { 502 if (!PageUptodate(page)) {
464 /* 503 /*
465 * page got invalidated, repeat 504 * Page got invalidated, repeat.
466 */ 505 */
467 if (!page->mapping) { 506 if (!page->mapping) {
468 unlock_page(page); 507 unlock_page(page);
@@ -501,12 +540,14 @@ find_page:
501 } else if (ret) 540 } else if (ret)
502 goto out; 541 goto out;
503 542
543 mark_page_accessed(page);
504 balance_dirty_pages_ratelimited(mapping); 544 balance_dirty_pages_ratelimited(mapping);
505out: 545out:
506 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 546 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) {
507 page_cache_release(page); 547 page_cache_release(page);
508 unlock_page(page); 548 unlock_page(page);
509 } 549 }
550out_nomem:
510 buf->ops->unmap(info, buf); 551 buf->ops->unmap(info, buf);
511 return ret; 552 return ret;
512} 553}
@@ -519,11 +560,10 @@ typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
519 * key here is the 'actor' worker passed in that actually moves the data 560 * key here is the 'actor' worker passed in that actually moves the data
520 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 561 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
521 */ 562 */
522static ssize_t move_from_pipe(struct inode *inode, struct file *out, 563static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
523 size_t len, unsigned int flags, 564 size_t len, unsigned int flags,
524 splice_actor *actor) 565 splice_actor *actor)
525{ 566{
526 struct pipe_inode_info *info;
527 int ret, do_wakeup, err; 567 int ret, do_wakeup, err;
528 struct splice_desc sd; 568 struct splice_desc sd;
529 569
@@ -535,22 +575,19 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
535 sd.file = out; 575 sd.file = out;
536 sd.pos = out->f_pos; 576 sd.pos = out->f_pos;
537 577
538 mutex_lock(PIPE_MUTEX(*inode)); 578 if (pipe->inode)
579 mutex_lock(&pipe->inode->i_mutex);
539 580
540 info = inode->i_pipe;
541 for (;;) { 581 for (;;) {
542 int bufs = info->nrbufs; 582 if (pipe->nrbufs) {
543 583 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
544 if (bufs) {
545 int curbuf = info->curbuf;
546 struct pipe_buffer *buf = info->bufs + curbuf;
547 struct pipe_buf_operations *ops = buf->ops; 584 struct pipe_buf_operations *ops = buf->ops;
548 585
549 sd.len = buf->len; 586 sd.len = buf->len;
550 if (sd.len > sd.total_len) 587 if (sd.len > sd.total_len)
551 sd.len = sd.total_len; 588 sd.len = sd.total_len;
552 589
553 err = actor(info, buf, &sd); 590 err = actor(pipe, buf, &sd);
554 if (err) { 591 if (err) {
555 if (!ret && err != -ENODATA) 592 if (!ret && err != -ENODATA)
556 ret = err; 593 ret = err;
@@ -561,13 +598,14 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
561 ret += sd.len; 598 ret += sd.len;
562 buf->offset += sd.len; 599 buf->offset += sd.len;
563 buf->len -= sd.len; 600 buf->len -= sd.len;
601
564 if (!buf->len) { 602 if (!buf->len) {
565 buf->ops = NULL; 603 buf->ops = NULL;
566 ops->release(info, buf); 604 ops->release(pipe, buf);
567 curbuf = (curbuf + 1) & (PIPE_BUFFERS - 1); 605 pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
568 info->curbuf = curbuf; 606 pipe->nrbufs--;
569 info->nrbufs = --bufs; 607 if (pipe->inode)
570 do_wakeup = 1; 608 do_wakeup = 1;
571 } 609 }
572 610
573 sd.pos += sd.len; 611 sd.pos += sd.len;
@@ -576,11 +614,11 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
576 break; 614 break;
577 } 615 }
578 616
579 if (bufs) 617 if (pipe->nrbufs)
580 continue; 618 continue;
581 if (!PIPE_WRITERS(*inode)) 619 if (!pipe->writers)
582 break; 620 break;
583 if (!PIPE_WAITING_WRITERS(*inode)) { 621 if (!pipe->waiting_writers) {
584 if (ret) 622 if (ret)
585 break; 623 break;
586 } 624 }
@@ -598,31 +636,34 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
598 } 636 }
599 637
600 if (do_wakeup) { 638 if (do_wakeup) {
601 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 639 smp_mb();
602 kill_fasync(PIPE_FASYNC_WRITERS(*inode),SIGIO,POLL_OUT); 640 if (waitqueue_active(&pipe->wait))
641 wake_up_interruptible_sync(&pipe->wait);
642 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
603 do_wakeup = 0; 643 do_wakeup = 0;
604 } 644 }
605 645
606 pipe_wait(inode); 646 pipe_wait(pipe);
607 } 647 }
608 648
609 mutex_unlock(PIPE_MUTEX(*inode)); 649 if (pipe->inode)
650 mutex_unlock(&pipe->inode->i_mutex);
610 651
611 if (do_wakeup) { 652 if (do_wakeup) {
612 wake_up_interruptible(PIPE_WAIT(*inode)); 653 smp_mb();
613 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 654 if (waitqueue_active(&pipe->wait))
655 wake_up_interruptible(&pipe->wait);
656 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
614 } 657 }
615 658
616 mutex_lock(&out->f_mapping->host->i_mutex);
617 out->f_pos = sd.pos; 659 out->f_pos = sd.pos;
618 mutex_unlock(&out->f_mapping->host->i_mutex);
619 return ret; 660 return ret;
620 661
621} 662}
622 663
623/** 664/**
624 * generic_file_splice_write - splice data from a pipe to a file 665 * generic_file_splice_write - splice data from a pipe to a file
625 * @inode: pipe inode 666 * @pipe: pipe info
626 * @out: file to write to 667 * @out: file to write to
627 * @len: number of bytes to splice 668 * @len: number of bytes to splice
628 * @flags: splice modifier flags 669 * @flags: splice modifier flags
@@ -631,14 +672,17 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
631 * the given pipe inode to the given file. 672 * the given pipe inode to the given file.
632 * 673 *
633 */ 674 */
634ssize_t generic_file_splice_write(struct inode *inode, struct file *out, 675ssize_t
635 size_t len, unsigned int flags) 676generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
677 size_t len, unsigned int flags)
636{ 678{
637 struct address_space *mapping = out->f_mapping; 679 struct address_space *mapping = out->f_mapping;
638 ssize_t ret = move_from_pipe(inode, out, len, flags, pipe_to_file); 680 ssize_t ret;
681
682 ret = move_from_pipe(pipe, out, len, flags, pipe_to_file);
639 683
640 /* 684 /*
641 * if file or inode is SYNC and we actually wrote some data, sync it 685 * If file or inode is SYNC and we actually wrote some data, sync it.
642 */ 686 */
643 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host)) 687 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host))
644 && ret > 0) { 688 && ret > 0) {
@@ -647,7 +691,7 @@ ssize_t generic_file_splice_write(struct inode *inode, struct file *out,
647 691
648 mutex_lock(&inode->i_mutex); 692 mutex_lock(&inode->i_mutex);
649 err = generic_osync_inode(mapping->host, mapping, 693 err = generic_osync_inode(mapping->host, mapping,
650 OSYNC_METADATA|OSYNC_DATA); 694 OSYNC_METADATA|OSYNC_DATA);
651 mutex_unlock(&inode->i_mutex); 695 mutex_unlock(&inode->i_mutex);
652 696
653 if (err) 697 if (err)
@@ -670,10 +714,10 @@ EXPORT_SYMBOL(generic_file_splice_write);
670 * is involved. 714 * is involved.
671 * 715 *
672 */ 716 */
673ssize_t generic_splice_sendpage(struct inode *inode, struct file *out, 717ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
674 size_t len, unsigned int flags) 718 size_t len, unsigned int flags)
675{ 719{
676 return move_from_pipe(inode, out, len, flags, pipe_to_sendpage); 720 return move_from_pipe(pipe, out, len, flags, pipe_to_sendpage);
677} 721}
678 722
679EXPORT_SYMBOL(generic_splice_sendpage); 723EXPORT_SYMBOL(generic_splice_sendpage);
@@ -681,19 +725,20 @@ EXPORT_SYMBOL(generic_splice_sendpage);
681/* 725/*
682 * Attempt to initiate a splice from pipe to file. 726 * Attempt to initiate a splice from pipe to file.
683 */ 727 */
684static long do_splice_from(struct inode *pipe, struct file *out, size_t len, 728static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
685 unsigned int flags) 729 size_t len, unsigned int flags)
686{ 730{
687 loff_t pos; 731 loff_t pos;
688 int ret; 732 int ret;
689 733
690 if (!out->f_op || !out->f_op->splice_write) 734 if (unlikely(!out->f_op || !out->f_op->splice_write))
691 return -EINVAL; 735 return -EINVAL;
692 736
693 if (!(out->f_mode & FMODE_WRITE)) 737 if (unlikely(!(out->f_mode & FMODE_WRITE)))
694 return -EBADF; 738 return -EBADF;
695 739
696 pos = out->f_pos; 740 pos = out->f_pos;
741
697 ret = rw_verify_area(WRITE, out, &pos, len); 742 ret = rw_verify_area(WRITE, out, &pos, len);
698 if (unlikely(ret < 0)) 743 if (unlikely(ret < 0))
699 return ret; 744 return ret;
@@ -704,19 +749,20 @@ static long do_splice_from(struct inode *pipe, struct file *out, size_t len,
704/* 749/*
705 * Attempt to initiate a splice from a file to a pipe. 750 * Attempt to initiate a splice from a file to a pipe.
706 */ 751 */
707static long do_splice_to(struct file *in, struct inode *pipe, size_t len, 752static long do_splice_to(struct file *in, struct pipe_inode_info *pipe,
708 unsigned int flags) 753 size_t len, unsigned int flags)
709{ 754{
710 loff_t pos, isize, left; 755 loff_t pos, isize, left;
711 int ret; 756 int ret;
712 757
713 if (!in->f_op || !in->f_op->splice_read) 758 if (unlikely(!in->f_op || !in->f_op->splice_read))
714 return -EINVAL; 759 return -EINVAL;
715 760
716 if (!(in->f_mode & FMODE_READ)) 761 if (unlikely(!(in->f_mode & FMODE_READ)))
717 return -EBADF; 762 return -EBADF;
718 763
719 pos = in->f_pos; 764 pos = in->f_pos;
765
720 ret = rw_verify_area(READ, in, &pos, len); 766 ret = rw_verify_area(READ, in, &pos, len);
721 if (unlikely(ret < 0)) 767 if (unlikely(ret < 0))
722 return ret; 768 return ret;
@@ -726,32 +772,168 @@ static long do_splice_to(struct file *in, struct inode *pipe, size_t len,
726 return 0; 772 return 0;
727 773
728 left = isize - in->f_pos; 774 left = isize - in->f_pos;
729 if (left < len) 775 if (unlikely(left < len))
730 len = left; 776 len = left;
731 777
732 return in->f_op->splice_read(in, pipe, len, flags); 778 return in->f_op->splice_read(in, pipe, len, flags);
733} 779}
734 780
781long do_splice_direct(struct file *in, struct file *out, size_t len,
782 unsigned int flags)
783{
784 struct pipe_inode_info *pipe;
785 long ret, bytes;
786 umode_t i_mode;
787 int i;
788
789 /*
790 * We require the input being a regular file, as we don't want to
791 * randomly drop data for eg socket -> socket splicing. Use the
792 * piped splicing for that!
793 */
794 i_mode = in->f_dentry->d_inode->i_mode;
795 if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode)))
796 return -EINVAL;
797
798 /*
799 * neither in nor out is a pipe, setup an internal pipe attached to
800 * 'out' and transfer the wanted data from 'in' to 'out' through that
801 */
802 pipe = current->splice_pipe;
803 if (unlikely(!pipe)) {
804 pipe = alloc_pipe_info(NULL);
805 if (!pipe)
806 return -ENOMEM;
807
808 /*
809 * We don't have an immediate reader, but we'll read the stuff
810 * out of the pipe right after the move_to_pipe(). So set
811 * PIPE_READERS appropriately.
812 */
813 pipe->readers = 1;
814
815 current->splice_pipe = pipe;
816 }
817
818 /*
819 * Do the splice.
820 */
821 ret = 0;
822 bytes = 0;
823
824 while (len) {
825 size_t read_len, max_read_len;
826
827 /*
828 * Do at most PIPE_BUFFERS pages worth of transfer:
829 */
830 max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
831
832 ret = do_splice_to(in, pipe, max_read_len, flags);
833 if (unlikely(ret < 0))
834 goto out_release;
835
836 read_len = ret;
837
838 /*
839 * NOTE: nonblocking mode only applies to the input. We
840 * must not do the output in nonblocking mode as then we
841 * could get stuck data in the internal pipe:
842 */
843 ret = do_splice_from(pipe, out, read_len,
844 flags & ~SPLICE_F_NONBLOCK);
845 if (unlikely(ret < 0))
846 goto out_release;
847
848 bytes += ret;
849 len -= ret;
850
851 /*
852 * In nonblocking mode, if we got back a short read then
853 * that was due to either an IO error or due to the
854 * pagecache entry not being there. In the IO error case
855 * the _next_ splice attempt will produce a clean IO error
856 * return value (not a short read), so in both cases it's
857 * correct to break out of the loop here:
858 */
859 if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len))
860 break;
861 }
862
863 pipe->nrbufs = pipe->curbuf = 0;
864
865 return bytes;
866
867out_release:
868 /*
869 * If we did an incomplete transfer we must release
870 * the pipe buffers in question:
871 */
872 for (i = 0; i < PIPE_BUFFERS; i++) {
873 struct pipe_buffer *buf = pipe->bufs + i;
874
875 if (buf->ops) {
876 buf->ops->release(pipe, buf);
877 buf->ops = NULL;
878 }
879 }
880 pipe->nrbufs = pipe->curbuf = 0;
881
882 /*
883 * If we transferred some data, return the number of bytes:
884 */
885 if (bytes > 0)
886 return bytes;
887
888 return ret;
889}
890
891EXPORT_SYMBOL(do_splice_direct);
892
735/* 893/*
736 * Determine where to splice to/from. 894 * Determine where to splice to/from.
737 */ 895 */
738static long do_splice(struct file *in, struct file *out, size_t len, 896static long do_splice(struct file *in, loff_t __user *off_in,
739 unsigned int flags) 897 struct file *out, loff_t __user *off_out,
898 size_t len, unsigned int flags)
740{ 899{
741 struct inode *pipe; 900 struct pipe_inode_info *pipe;
901
902 pipe = in->f_dentry->d_inode->i_pipe;
903 if (pipe) {
904 if (off_in)
905 return -ESPIPE;
906 if (off_out) {
907 if (out->f_op->llseek == no_llseek)
908 return -EINVAL;
909 if (copy_from_user(&out->f_pos, off_out,
910 sizeof(loff_t)))
911 return -EFAULT;
912 }
742 913
743 pipe = in->f_dentry->d_inode;
744 if (pipe->i_pipe)
745 return do_splice_from(pipe, out, len, flags); 914 return do_splice_from(pipe, out, len, flags);
915 }
916
917 pipe = out->f_dentry->d_inode->i_pipe;
918 if (pipe) {
919 if (off_out)
920 return -ESPIPE;
921 if (off_in) {
922 if (in->f_op->llseek == no_llseek)
923 return -EINVAL;
924 if (copy_from_user(&in->f_pos, off_in, sizeof(loff_t)))
925 return -EFAULT;
926 }
746 927
747 pipe = out->f_dentry->d_inode;
748 if (pipe->i_pipe)
749 return do_splice_to(in, pipe, len, flags); 928 return do_splice_to(in, pipe, len, flags);
929 }
750 930
751 return -EINVAL; 931 return -EINVAL;
752} 932}
753 933
754asmlinkage long sys_splice(int fdin, int fdout, size_t len, unsigned int flags) 934asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
935 int fd_out, loff_t __user *off_out,
936 size_t len, unsigned int flags)
755{ 937{
756 long error; 938 long error;
757 struct file *in, *out; 939 struct file *in, *out;
@@ -761,13 +943,15 @@ asmlinkage long sys_splice(int fdin, int fdout, size_t len, unsigned int flags)
761 return 0; 943 return 0;
762 944
763 error = -EBADF; 945 error = -EBADF;
764 in = fget_light(fdin, &fput_in); 946 in = fget_light(fd_in, &fput_in);
765 if (in) { 947 if (in) {
766 if (in->f_mode & FMODE_READ) { 948 if (in->f_mode & FMODE_READ) {
767 out = fget_light(fdout, &fput_out); 949 out = fget_light(fd_out, &fput_out);
768 if (out) { 950 if (out) {
769 if (out->f_mode & FMODE_WRITE) 951 if (out->f_mode & FMODE_WRITE)
770 error = do_splice(in, out, len, flags); 952 error = do_splice(in, off_in,
953 out, off_out,
954 len, flags);
771 fput_light(out, fput_out); 955 fput_light(out, fput_out);
772 } 956 }
773 } 957 }