aboutsummaryrefslogtreecommitdiffstats
path: root/fs/splice.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/splice.c')
-rw-r--r--fs/splice.c202
1 files changed, 159 insertions, 43 deletions
diff --git a/fs/splice.c b/fs/splice.c
index 7c2bbf18d7a7..bfa42a277bb8 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -22,7 +22,10 @@
22#include <linux/pipe_fs_i.h> 22#include <linux/pipe_fs_i.h>
23#include <linux/mm_inline.h> 23#include <linux/mm_inline.h>
24#include <linux/swap.h> 24#include <linux/swap.h>
25#include <linux/writeback.h>
26#include <linux/buffer_head.h>
25#include <linux/module.h> 27#include <linux/module.h>
28#include <linux/syscalls.h>
26 29
27/* 30/*
28 * Passed to the actors 31 * Passed to the actors
@@ -34,28 +37,37 @@ struct splice_desc {
34 loff_t pos; /* file position */ 37 loff_t pos; /* file position */
35}; 38};
36 39
40/*
41 * Attempt to steal a page from a pipe buffer. This should perhaps go into
42 * a vm helper function, it's already simplified quite a bit by the
43 * addition of remove_mapping(). If success is returned, the caller may
44 * attempt to reuse this page for another destination.
45 */
37static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, 46static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
38 struct pipe_buffer *buf) 47 struct pipe_buffer *buf)
39{ 48{
40 struct page *page = buf->page; 49 struct page *page = buf->page;
50 struct address_space *mapping = page_mapping(page);
41 51
42 WARN_ON(!PageLocked(page)); 52 WARN_ON(!PageLocked(page));
43 WARN_ON(!PageUptodate(page)); 53 WARN_ON(!PageUptodate(page));
44 54
45 if (!remove_mapping(page_mapping(page), page)) 55 /*
46 return 1; 56 * At least for ext2 with nobh option, we need to wait on writeback
57 * completing on this page, since we'll remove it from the pagecache.
58 * Otherwise truncate wont wait on the page, allowing the disk
59 * blocks to be reused by someone else before we actually wrote our
60 * data to them. fs corruption ensues.
61 */
62 wait_on_page_writeback(page);
47 63
48 if (PageLRU(page)) { 64 if (PagePrivate(page))
49 struct zone *zone = page_zone(page); 65 try_to_release_page(page, mapping_gfp_mask(mapping));
50 66
51 spin_lock_irq(&zone->lru_lock); 67 if (!remove_mapping(mapping, page))
52 BUG_ON(!PageLRU(page)); 68 return 1;
53 __ClearPageLRU(page);
54 del_page_from_lru(zone, page);
55 spin_unlock_irq(&zone->lru_lock);
56 }
57 69
58 buf->stolen = 1; 70 buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU;
59 return 0; 71 return 0;
60} 72}
61 73
@@ -64,7 +76,7 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *info,
64{ 76{
65 page_cache_release(buf->page); 77 page_cache_release(buf->page);
66 buf->page = NULL; 78 buf->page = NULL;
67 buf->stolen = 0; 79 buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU);
68} 80}
69 81
70static void *page_cache_pipe_buf_map(struct file *file, 82static void *page_cache_pipe_buf_map(struct file *file,
@@ -91,8 +103,7 @@ static void *page_cache_pipe_buf_map(struct file *file,
91static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, 103static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
92 struct pipe_buffer *buf) 104 struct pipe_buffer *buf)
93{ 105{
94 if (!buf->stolen) 106 unlock_page(buf->page);
95 unlock_page(buf->page);
96 kunmap(buf->page); 107 kunmap(buf->page);
97} 108}
98 109
@@ -104,9 +115,13 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = {
104 .steal = page_cache_pipe_buf_steal, 115 .steal = page_cache_pipe_buf_steal,
105}; 116};
106 117
118/*
119 * Pipe output worker. This sets up our pipe format with the page cache
120 * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
121 */
107static ssize_t move_to_pipe(struct inode *inode, struct page **pages, 122static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
108 int nr_pages, unsigned long offset, 123 int nr_pages, unsigned long offset,
109 unsigned long len) 124 unsigned long len, unsigned int flags)
110{ 125{
111 struct pipe_inode_info *info; 126 struct pipe_inode_info *info;
112 int ret, do_wakeup, i; 127 int ret, do_wakeup, i;
@@ -159,6 +174,12 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
159 break; 174 break;
160 } 175 }
161 176
177 if (flags & SPLICE_F_NONBLOCK) {
178 if (!ret)
179 ret = -EAGAIN;
180 break;
181 }
182
162 if (signal_pending(current)) { 183 if (signal_pending(current)) {
163 if (!ret) 184 if (!ret)
164 ret = -ERESTARTSYS; 185 ret = -ERESTARTSYS;
@@ -191,7 +212,7 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
191} 212}
192 213
193static int __generic_file_splice_read(struct file *in, struct inode *pipe, 214static int __generic_file_splice_read(struct file *in, struct inode *pipe,
194 size_t len) 215 size_t len, unsigned int flags)
195{ 216{
196 struct address_space *mapping = in->f_mapping; 217 struct address_space *mapping = in->f_mapping;
197 unsigned int offset, nr_pages; 218 unsigned int offset, nr_pages;
@@ -231,9 +252,9 @@ static int __generic_file_splice_read(struct file *in, struct inode *pipe,
231 * fill shadow[] with pages at the right locations, so we only 252 * fill shadow[] with pages at the right locations, so we only
232 * have to fill holes 253 * have to fill holes
233 */ 254 */
234 memset(shadow, 0, i * sizeof(struct page *)); 255 memset(shadow, 0, nr_pages * sizeof(struct page *));
235 for (j = 0, pidx = index; j < i; pidx++, j++) 256 for (j = 0; j < i; j++)
236 shadow[pages[j]->index - pidx] = pages[j]; 257 shadow[pages[j]->index - index] = pages[j];
237 258
238 /* 259 /*
239 * now fill in the holes 260 * now fill in the holes
@@ -279,9 +300,19 @@ static int __generic_file_splice_read(struct file *in, struct inode *pipe,
279 * Now we splice them into the pipe.. 300 * Now we splice them into the pipe..
280 */ 301 */
281splice_them: 302splice_them:
282 return move_to_pipe(pipe, pages, i, offset, len); 303 return move_to_pipe(pipe, pages, i, offset, len, flags);
283} 304}
284 305
306/**
307 * generic_file_splice_read - splice data from file to a pipe
308 * @in: file to splice from
309 * @pipe: pipe to splice to
310 * @len: number of bytes to splice
311 * @flags: splice modifier flags
312 *
313 * Will read pages from given file and fill them into a pipe.
314 *
315 */
285ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, 316ssize_t generic_file_splice_read(struct file *in, struct inode *pipe,
286 size_t len, unsigned int flags) 317 size_t len, unsigned int flags)
287{ 318{
@@ -291,7 +322,7 @@ ssize_t generic_file_splice_read(struct file *in, struct inode *pipe,
291 ret = 0; 322 ret = 0;
292 spliced = 0; 323 spliced = 0;
293 while (len) { 324 while (len) {
294 ret = __generic_file_splice_read(in, pipe, len); 325 ret = __generic_file_splice_read(in, pipe, len, flags);
295 326
296 if (ret <= 0) 327 if (ret <= 0)
297 break; 328 break;
@@ -299,6 +330,11 @@ ssize_t generic_file_splice_read(struct file *in, struct inode *pipe,
299 in->f_pos += ret; 330 in->f_pos += ret;
300 len -= ret; 331 len -= ret;
301 spliced += ret; 332 spliced += ret;
333
334 if (!(flags & SPLICE_F_NONBLOCK))
335 continue;
336 ret = -EAGAIN;
337 break;
302 } 338 }
303 339
304 if (spliced) 340 if (spliced)
@@ -307,8 +343,11 @@ ssize_t generic_file_splice_read(struct file *in, struct inode *pipe,
307 return ret; 343 return ret;
308} 344}
309 345
346EXPORT_SYMBOL(generic_file_splice_read);
347
310/* 348/*
311 * Send 'len' bytes to socket from 'file' at position 'pos' using sendpage(). 349 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
350 * using sendpage().
312 */ 351 */
313static int pipe_to_sendpage(struct pipe_inode_info *info, 352static int pipe_to_sendpage(struct pipe_inode_info *info,
314 struct pipe_buffer *buf, struct splice_desc *sd) 353 struct pipe_buffer *buf, struct splice_desc *sd)
@@ -318,6 +357,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *info,
318 unsigned int offset; 357 unsigned int offset;
319 ssize_t ret; 358 ssize_t ret;
320 void *ptr; 359 void *ptr;
360 int more;
321 361
322 /* 362 /*
323 * sub-optimal, but we are limited by the pipe ->map. we don't 363 * sub-optimal, but we are limited by the pipe ->map. we don't
@@ -330,9 +370,9 @@ static int pipe_to_sendpage(struct pipe_inode_info *info,
330 return PTR_ERR(ptr); 370 return PTR_ERR(ptr);
331 371
332 offset = pos & ~PAGE_CACHE_MASK; 372 offset = pos & ~PAGE_CACHE_MASK;
373 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
333 374
334 ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos, 375 ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,more);
335 sd->len < sd->total_len);
336 376
337 buf->ops->unmap(info, buf); 377 buf->ops->unmap(info, buf);
338 if (ret == sd->len) 378 if (ret == sd->len)
@@ -354,16 +394,19 @@ static int pipe_to_sendpage(struct pipe_inode_info *info,
354 * - Destination page does not exist, we can add the pipe page to 394 * - Destination page does not exist, we can add the pipe page to
355 * the page cache and avoid the copy. 395 * the page cache and avoid the copy.
356 * 396 *
357 * For now we just do the slower thing and always copy pages over, it's 397 * If asked to move pages to the output file (SPLICE_F_MOVE is set in
358 * easier than migrating pages from the pipe to the target file. For the 398 * sd->flags), we attempt to migrate pages from the pipe to the output
359 * case of doing file | file splicing, the migrate approach had some LRU 399 * file address space page cache. This is possible if no one else has
360 * nastiness... 400 * the pipe page referenced outside of the pipe and page cache. If
401 * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
402 * a new page in the output file page cache and fill/dirty that.
361 */ 403 */
362static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, 404static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
363 struct splice_desc *sd) 405 struct splice_desc *sd)
364{ 406{
365 struct file *file = sd->file; 407 struct file *file = sd->file;
366 struct address_space *mapping = file->f_mapping; 408 struct address_space *mapping = file->f_mapping;
409 gfp_t gfp_mask = mapping_gfp_mask(mapping);
367 unsigned int offset; 410 unsigned int offset;
368 struct page *page; 411 struct page *page;
369 pgoff_t index; 412 pgoff_t index;
@@ -384,18 +427,23 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
384 * reuse buf page, if SPLICE_F_MOVE is set 427 * reuse buf page, if SPLICE_F_MOVE is set
385 */ 428 */
386 if (sd->flags & SPLICE_F_MOVE) { 429 if (sd->flags & SPLICE_F_MOVE) {
430 /*
431 * If steal succeeds, buf->page is now pruned from the vm
432 * side (LRU and page cache) and we can reuse it.
433 */
387 if (buf->ops->steal(info, buf)) 434 if (buf->ops->steal(info, buf))
388 goto find_page; 435 goto find_page;
389 436
390 page = buf->page; 437 page = buf->page;
391 if (add_to_page_cache_lru(page, mapping, index, 438 if (add_to_page_cache(page, mapping, index, gfp_mask))
392 mapping_gfp_mask(mapping)))
393 goto find_page; 439 goto find_page;
440
441 if (!(buf->flags & PIPE_BUF_FLAG_LRU))
442 lru_cache_add(page);
394 } else { 443 } else {
395find_page: 444find_page:
396 ret = -ENOMEM; 445 ret = -ENOMEM;
397 page = find_or_create_page(mapping, index, 446 page = find_or_create_page(mapping, index, gfp_mask);
398 mapping_gfp_mask(mapping));
399 if (!page) 447 if (!page)
400 goto out; 448 goto out;
401 449
@@ -432,10 +480,13 @@ find_page:
432 } 480 }
433 481
434 ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); 482 ret = mapping->a_ops->prepare_write(file, page, 0, sd->len);
435 if (ret) 483 if (ret == AOP_TRUNCATED_PAGE) {
484 page_cache_release(page);
485 goto find_page;
486 } else if (ret)
436 goto out; 487 goto out;
437 488
438 if (!buf->stolen) { 489 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) {
439 char *dst = kmap_atomic(page, KM_USER0); 490 char *dst = kmap_atomic(page, KM_USER0);
440 491
441 memcpy(dst + offset, src + buf->offset, sd->len); 492 memcpy(dst + offset, src + buf->offset, sd->len);
@@ -444,16 +495,18 @@ find_page:
444 } 495 }
445 496
446 ret = mapping->a_ops->commit_write(file, page, 0, sd->len); 497 ret = mapping->a_ops->commit_write(file, page, 0, sd->len);
447 if (ret < 0) 498 if (ret == AOP_TRUNCATED_PAGE) {
499 page_cache_release(page);
500 goto find_page;
501 } else if (ret)
448 goto out; 502 goto out;
449 503
450 set_page_dirty(page); 504 balance_dirty_pages_ratelimited(mapping);
451 ret = write_one_page(page, 0);
452out: 505out:
453 if (ret < 0) 506 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) {
454 unlock_page(page);
455 if (!buf->stolen)
456 page_cache_release(page); 507 page_cache_release(page);
508 unlock_page(page);
509 }
457 buf->ops->unmap(info, buf); 510 buf->ops->unmap(info, buf);
458 return ret; 511 return ret;
459} 512}
@@ -461,6 +514,11 @@ out:
461typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, 514typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
462 struct splice_desc *); 515 struct splice_desc *);
463 516
517/*
518 * Pipe input worker. Most of this logic works like a regular pipe, the
519 * key here is the 'actor' worker passed in that actually moves the data
520 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
521 */
464static ssize_t move_from_pipe(struct inode *inode, struct file *out, 522static ssize_t move_from_pipe(struct inode *inode, struct file *out,
465 size_t len, unsigned int flags, 523 size_t len, unsigned int flags,
466 splice_actor *actor) 524 splice_actor *actor)
@@ -527,6 +585,12 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
527 break; 585 break;
528 } 586 }
529 587
588 if (flags & SPLICE_F_NONBLOCK) {
589 if (!ret)
590 ret = -EAGAIN;
591 break;
592 }
593
530 if (signal_pending(current)) { 594 if (signal_pending(current)) {
531 if (!ret) 595 if (!ret)
532 ret = -ERESTARTSYS; 596 ret = -ERESTARTSYS;
@@ -556,21 +620,67 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
556 620
557} 621}
558 622
623/**
624 * generic_file_splice_write - splice data from a pipe to a file
625 * @inode: pipe inode
626 * @out: file to write to
627 * @len: number of bytes to splice
628 * @flags: splice modifier flags
629 *
630 * Will either move or copy pages (determined by @flags options) from
631 * the given pipe inode to the given file.
632 *
633 */
559ssize_t generic_file_splice_write(struct inode *inode, struct file *out, 634ssize_t generic_file_splice_write(struct inode *inode, struct file *out,
560 size_t len, unsigned int flags) 635 size_t len, unsigned int flags)
561{ 636{
562 return move_from_pipe(inode, out, len, flags, pipe_to_file); 637 struct address_space *mapping = out->f_mapping;
638 ssize_t ret = move_from_pipe(inode, out, len, flags, pipe_to_file);
639
640 /*
641 * if file or inode is SYNC and we actually wrote some data, sync it
642 */
643 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host))
644 && ret > 0) {
645 struct inode *inode = mapping->host;
646 int err;
647
648 mutex_lock(&inode->i_mutex);
649 err = generic_osync_inode(mapping->host, mapping,
650 OSYNC_METADATA|OSYNC_DATA);
651 mutex_unlock(&inode->i_mutex);
652
653 if (err)
654 ret = err;
655 }
656
657 return ret;
563} 658}
564 659
660EXPORT_SYMBOL(generic_file_splice_write);
661
662/**
663 * generic_splice_sendpage - splice data from a pipe to a socket
664 * @inode: pipe inode
665 * @out: socket to write to
666 * @len: number of bytes to splice
667 * @flags: splice modifier flags
668 *
669 * Will send @len bytes from the pipe to a network socket. No data copying
670 * is involved.
671 *
672 */
565ssize_t generic_splice_sendpage(struct inode *inode, struct file *out, 673ssize_t generic_splice_sendpage(struct inode *inode, struct file *out,
566 size_t len, unsigned int flags) 674 size_t len, unsigned int flags)
567{ 675{
568 return move_from_pipe(inode, out, len, flags, pipe_to_sendpage); 676 return move_from_pipe(inode, out, len, flags, pipe_to_sendpage);
569} 677}
570 678
571EXPORT_SYMBOL(generic_file_splice_write); 679EXPORT_SYMBOL(generic_splice_sendpage);
572EXPORT_SYMBOL(generic_file_splice_read);
573 680
681/*
682 * Attempt to initiate a splice from pipe to file.
683 */
574static long do_splice_from(struct inode *pipe, struct file *out, size_t len, 684static long do_splice_from(struct inode *pipe, struct file *out, size_t len,
575 unsigned int flags) 685 unsigned int flags)
576{ 686{
@@ -591,6 +701,9 @@ static long do_splice_from(struct inode *pipe, struct file *out, size_t len,
591 return out->f_op->splice_write(pipe, out, len, flags); 701 return out->f_op->splice_write(pipe, out, len, flags);
592} 702}
593 703
704/*
705 * Attempt to initiate a splice from a file to a pipe.
706 */
594static long do_splice_to(struct file *in, struct inode *pipe, size_t len, 707static long do_splice_to(struct file *in, struct inode *pipe, size_t len,
595 unsigned int flags) 708 unsigned int flags)
596{ 709{
@@ -619,6 +732,9 @@ static long do_splice_to(struct file *in, struct inode *pipe, size_t len,
619 return in->f_op->splice_read(in, pipe, len, flags); 732 return in->f_op->splice_read(in, pipe, len, flags);
620} 733}
621 734
735/*
736 * Determine where to splice to/from.
737 */
622static long do_splice(struct file *in, struct file *out, size_t len, 738static long do_splice(struct file *in, struct file *out, size_t len,
623 unsigned int flags) 739 unsigned int flags)
624{ 740{