diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/pipe.c | 4 | ||||
| -rw-r--r-- | fs/splice.c | 177 |
2 files changed, 141 insertions, 40 deletions
| @@ -95,6 +95,8 @@ static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buff | |||
| 95 | { | 95 | { |
| 96 | struct page *page = buf->page; | 96 | struct page *page = buf->page; |
| 97 | 97 | ||
| 98 | buf->flags &= ~PIPE_BUF_FLAG_STOLEN; | ||
| 99 | |||
| 98 | /* | 100 | /* |
| 99 | * If nobody else uses this page, and we don't already have a | 101 | * If nobody else uses this page, and we don't already have a |
| 100 | * temporary page, let's keep track of it as a one-deep | 102 | * temporary page, let's keep track of it as a one-deep |
| @@ -124,7 +126,7 @@ static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer | |||
| 124 | static int anon_pipe_buf_steal(struct pipe_inode_info *info, | 126 | static int anon_pipe_buf_steal(struct pipe_inode_info *info, |
| 125 | struct pipe_buffer *buf) | 127 | struct pipe_buffer *buf) |
| 126 | { | 128 | { |
| 127 | buf->stolen = 1; | 129 | buf->flags |= PIPE_BUF_FLAG_STOLEN; |
| 128 | return 0; | 130 | return 0; |
| 129 | } | 131 | } |
| 130 | 132 | ||
diff --git a/fs/splice.c b/fs/splice.c index 6081cf7d2d1b..bfa42a277bb8 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
| @@ -22,7 +22,10 @@ | |||
| 22 | #include <linux/pipe_fs_i.h> | 22 | #include <linux/pipe_fs_i.h> |
| 23 | #include <linux/mm_inline.h> | 23 | #include <linux/mm_inline.h> |
| 24 | #include <linux/swap.h> | 24 | #include <linux/swap.h> |
| 25 | #include <linux/writeback.h> | ||
| 26 | #include <linux/buffer_head.h> | ||
| 25 | #include <linux/module.h> | 27 | #include <linux/module.h> |
| 28 | #include <linux/syscalls.h> | ||
| 26 | 29 | ||
| 27 | /* | 30 | /* |
| 28 | * Passed to the actors | 31 | * Passed to the actors |
| @@ -34,28 +37,37 @@ struct splice_desc { | |||
| 34 | loff_t pos; /* file position */ | 37 | loff_t pos; /* file position */ |
| 35 | }; | 38 | }; |
| 36 | 39 | ||
| 40 | /* | ||
| 41 | * Attempt to steal a page from a pipe buffer. This should perhaps go into | ||
| 42 | * a vm helper function, it's already simplified quite a bit by the | ||
| 43 | * addition of remove_mapping(). If success is returned, the caller may | ||
| 44 | * attempt to reuse this page for another destination. | ||
| 45 | */ | ||
| 37 | static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, | 46 | static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, |
| 38 | struct pipe_buffer *buf) | 47 | struct pipe_buffer *buf) |
| 39 | { | 48 | { |
| 40 | struct page *page = buf->page; | 49 | struct page *page = buf->page; |
| 50 | struct address_space *mapping = page_mapping(page); | ||
| 41 | 51 | ||
| 42 | WARN_ON(!PageLocked(page)); | 52 | WARN_ON(!PageLocked(page)); |
| 43 | WARN_ON(!PageUptodate(page)); | 53 | WARN_ON(!PageUptodate(page)); |
| 44 | 54 | ||
| 45 | if (!remove_mapping(page_mapping(page), page)) | 55 | /* |
| 46 | return 1; | 56 | * At least for ext2 with nobh option, we need to wait on writeback |
| 57 | * completing on this page, since we'll remove it from the pagecache. | ||
| 58 | * Otherwise truncate wont wait on the page, allowing the disk | ||
| 59 | * blocks to be reused by someone else before we actually wrote our | ||
| 60 | * data to them. fs corruption ensues. | ||
| 61 | */ | ||
| 62 | wait_on_page_writeback(page); | ||
| 47 | 63 | ||
| 48 | if (PageLRU(page)) { | 64 | if (PagePrivate(page)) |
| 49 | struct zone *zone = page_zone(page); | 65 | try_to_release_page(page, mapping_gfp_mask(mapping)); |
| 50 | 66 | ||
| 51 | spin_lock_irq(&zone->lru_lock); | 67 | if (!remove_mapping(mapping, page)) |
| 52 | BUG_ON(!PageLRU(page)); | 68 | return 1; |
| 53 | __ClearPageLRU(page); | ||
| 54 | del_page_from_lru(zone, page); | ||
| 55 | spin_unlock_irq(&zone->lru_lock); | ||
| 56 | } | ||
| 57 | 69 | ||
| 58 | buf->stolen = 1; | 70 | buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; |
| 59 | return 0; | 71 | return 0; |
| 60 | } | 72 | } |
| 61 | 73 | ||
| @@ -64,7 +76,7 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *info, | |||
| 64 | { | 76 | { |
| 65 | page_cache_release(buf->page); | 77 | page_cache_release(buf->page); |
| 66 | buf->page = NULL; | 78 | buf->page = NULL; |
| 67 | buf->stolen = 0; | 79 | buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU); |
| 68 | } | 80 | } |
| 69 | 81 | ||
| 70 | static void *page_cache_pipe_buf_map(struct file *file, | 82 | static void *page_cache_pipe_buf_map(struct file *file, |
| @@ -91,8 +103,7 @@ static void *page_cache_pipe_buf_map(struct file *file, | |||
| 91 | static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, | 103 | static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, |
| 92 | struct pipe_buffer *buf) | 104 | struct pipe_buffer *buf) |
| 93 | { | 105 | { |
| 94 | if (!buf->stolen) | 106 | unlock_page(buf->page); |
| 95 | unlock_page(buf->page); | ||
| 96 | kunmap(buf->page); | 107 | kunmap(buf->page); |
| 97 | } | 108 | } |
| 98 | 109 | ||
| @@ -104,6 +115,10 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = { | |||
| 104 | .steal = page_cache_pipe_buf_steal, | 115 | .steal = page_cache_pipe_buf_steal, |
| 105 | }; | 116 | }; |
| 106 | 117 | ||
| 118 | /* | ||
| 119 | * Pipe output worker. This sets up our pipe format with the page cache | ||
| 120 | * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). | ||
| 121 | */ | ||
| 107 | static ssize_t move_to_pipe(struct inode *inode, struct page **pages, | 122 | static ssize_t move_to_pipe(struct inode *inode, struct page **pages, |
| 108 | int nr_pages, unsigned long offset, | 123 | int nr_pages, unsigned long offset, |
| 109 | unsigned long len, unsigned int flags) | 124 | unsigned long len, unsigned int flags) |
| @@ -237,9 +252,9 @@ static int __generic_file_splice_read(struct file *in, struct inode *pipe, | |||
| 237 | * fill shadow[] with pages at the right locations, so we only | 252 | * fill shadow[] with pages at the right locations, so we only |
| 238 | * have to fill holes | 253 | * have to fill holes |
| 239 | */ | 254 | */ |
| 240 | memset(shadow, 0, i * sizeof(struct page *)); | 255 | memset(shadow, 0, nr_pages * sizeof(struct page *)); |
| 241 | for (j = 0, pidx = index; j < i; pidx++, j++) | 256 | for (j = 0; j < i; j++) |
| 242 | shadow[pages[j]->index - pidx] = pages[j]; | 257 | shadow[pages[j]->index - index] = pages[j]; |
| 243 | 258 | ||
| 244 | /* | 259 | /* |
| 245 | * now fill in the holes | 260 | * now fill in the holes |
| @@ -288,6 +303,16 @@ splice_them: | |||
| 288 | return move_to_pipe(pipe, pages, i, offset, len, flags); | 303 | return move_to_pipe(pipe, pages, i, offset, len, flags); |
| 289 | } | 304 | } |
| 290 | 305 | ||
| 306 | /** | ||
| 307 | * generic_file_splice_read - splice data from file to a pipe | ||
| 308 | * @in: file to splice from | ||
| 309 | * @pipe: pipe to splice to | ||
| 310 | * @len: number of bytes to splice | ||
| 311 | * @flags: splice modifier flags | ||
| 312 | * | ||
| 313 | * Will read pages from given file and fill them into a pipe. | ||
| 314 | * | ||
| 315 | */ | ||
| 291 | ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, | 316 | ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, |
| 292 | size_t len, unsigned int flags) | 317 | size_t len, unsigned int flags) |
| 293 | { | 318 | { |
| @@ -318,8 +343,11 @@ ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, | |||
| 318 | return ret; | 343 | return ret; |
| 319 | } | 344 | } |
| 320 | 345 | ||
| 346 | EXPORT_SYMBOL(generic_file_splice_read); | ||
| 347 | |||
| 321 | /* | 348 | /* |
| 322 | * Send 'len' bytes to socket from 'file' at position 'pos' using sendpage(). | 349 | * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' |
| 350 | * using sendpage(). | ||
| 323 | */ | 351 | */ |
| 324 | static int pipe_to_sendpage(struct pipe_inode_info *info, | 352 | static int pipe_to_sendpage(struct pipe_inode_info *info, |
| 325 | struct pipe_buffer *buf, struct splice_desc *sd) | 353 | struct pipe_buffer *buf, struct splice_desc *sd) |
| @@ -329,6 +357,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *info, | |||
| 329 | unsigned int offset; | 357 | unsigned int offset; |
| 330 | ssize_t ret; | 358 | ssize_t ret; |
| 331 | void *ptr; | 359 | void *ptr; |
| 360 | int more; | ||
| 332 | 361 | ||
| 333 | /* | 362 | /* |
| 334 | * sub-optimal, but we are limited by the pipe ->map. we don't | 363 | * sub-optimal, but we are limited by the pipe ->map. we don't |
| @@ -341,9 +370,9 @@ static int pipe_to_sendpage(struct pipe_inode_info *info, | |||
| 341 | return PTR_ERR(ptr); | 370 | return PTR_ERR(ptr); |
| 342 | 371 | ||
| 343 | offset = pos & ~PAGE_CACHE_MASK; | 372 | offset = pos & ~PAGE_CACHE_MASK; |
| 373 | more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; | ||
| 344 | 374 | ||
| 345 | ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos, | 375 | ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,more); |
| 346 | sd->len < sd->total_len); | ||
| 347 | 376 | ||
| 348 | buf->ops->unmap(info, buf); | 377 | buf->ops->unmap(info, buf); |
| 349 | if (ret == sd->len) | 378 | if (ret == sd->len) |
| @@ -365,16 +394,19 @@ static int pipe_to_sendpage(struct pipe_inode_info *info, | |||
| 365 | * - Destination page does not exist, we can add the pipe page to | 394 | * - Destination page does not exist, we can add the pipe page to |
| 366 | * the page cache and avoid the copy. | 395 | * the page cache and avoid the copy. |
| 367 | * | 396 | * |
| 368 | * For now we just do the slower thing and always copy pages over, it's | 397 | * If asked to move pages to the output file (SPLICE_F_MOVE is set in |
| 369 | * easier than migrating pages from the pipe to the target file. For the | 398 | * sd->flags), we attempt to migrate pages from the pipe to the output |
| 370 | * case of doing file | file splicing, the migrate approach had some LRU | 399 | * file address space page cache. This is possible if no one else has |
| 371 | * nastiness... | 400 | * the pipe page referenced outside of the pipe and page cache. If |
| 401 | * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create | ||
| 402 | * a new page in the output file page cache and fill/dirty that. | ||
| 372 | */ | 403 | */ |
| 373 | static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, | 404 | static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, |
| 374 | struct splice_desc *sd) | 405 | struct splice_desc *sd) |
| 375 | { | 406 | { |
| 376 | struct file *file = sd->file; | 407 | struct file *file = sd->file; |
| 377 | struct address_space *mapping = file->f_mapping; | 408 | struct address_space *mapping = file->f_mapping; |
| 409 | gfp_t gfp_mask = mapping_gfp_mask(mapping); | ||
| 378 | unsigned int offset; | 410 | unsigned int offset; |
| 379 | struct page *page; | 411 | struct page *page; |
| 380 | pgoff_t index; | 412 | pgoff_t index; |
| @@ -395,18 +427,23 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, | |||
| 395 | * reuse buf page, if SPLICE_F_MOVE is set | 427 | * reuse buf page, if SPLICE_F_MOVE is set |
| 396 | */ | 428 | */ |
| 397 | if (sd->flags & SPLICE_F_MOVE) { | 429 | if (sd->flags & SPLICE_F_MOVE) { |
| 430 | /* | ||
| 431 | * If steal succeeds, buf->page is now pruned from the vm | ||
| 432 | * side (LRU and page cache) and we can reuse it. | ||
| 433 | */ | ||
| 398 | if (buf->ops->steal(info, buf)) | 434 | if (buf->ops->steal(info, buf)) |
| 399 | goto find_page; | 435 | goto find_page; |
| 400 | 436 | ||
| 401 | page = buf->page; | 437 | page = buf->page; |
| 402 | if (add_to_page_cache_lru(page, mapping, index, | 438 | if (add_to_page_cache(page, mapping, index, gfp_mask)) |
| 403 | mapping_gfp_mask(mapping))) | ||
| 404 | goto find_page; | 439 | goto find_page; |
| 440 | |||
| 441 | if (!(buf->flags & PIPE_BUF_FLAG_LRU)) | ||
| 442 | lru_cache_add(page); | ||
| 405 | } else { | 443 | } else { |
| 406 | find_page: | 444 | find_page: |
| 407 | ret = -ENOMEM; | 445 | ret = -ENOMEM; |
| 408 | page = find_or_create_page(mapping, index, | 446 | page = find_or_create_page(mapping, index, gfp_mask); |
| 409 | mapping_gfp_mask(mapping)); | ||
| 410 | if (!page) | 447 | if (!page) |
| 411 | goto out; | 448 | goto out; |
| 412 | 449 | ||
| @@ -443,10 +480,13 @@ find_page: | |||
| 443 | } | 480 | } |
| 444 | 481 | ||
| 445 | ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); | 482 | ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); |
| 446 | if (ret) | 483 | if (ret == AOP_TRUNCATED_PAGE) { |
| 484 | page_cache_release(page); | ||
| 485 | goto find_page; | ||
| 486 | } else if (ret) | ||
| 447 | goto out; | 487 | goto out; |
| 448 | 488 | ||
| 449 | if (!buf->stolen) { | 489 | if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { |
| 450 | char *dst = kmap_atomic(page, KM_USER0); | 490 | char *dst = kmap_atomic(page, KM_USER0); |
| 451 | 491 | ||
| 452 | memcpy(dst + offset, src + buf->offset, sd->len); | 492 | memcpy(dst + offset, src + buf->offset, sd->len); |
| @@ -455,16 +495,18 @@ find_page: | |||
| 455 | } | 495 | } |
| 456 | 496 | ||
| 457 | ret = mapping->a_ops->commit_write(file, page, 0, sd->len); | 497 | ret = mapping->a_ops->commit_write(file, page, 0, sd->len); |
| 458 | if (ret < 0) | 498 | if (ret == AOP_TRUNCATED_PAGE) { |
| 499 | page_cache_release(page); | ||
| 500 | goto find_page; | ||
| 501 | } else if (ret) | ||
| 459 | goto out; | 502 | goto out; |
| 460 | 503 | ||
| 461 | set_page_dirty(page); | 504 | balance_dirty_pages_ratelimited(mapping); |
| 462 | ret = write_one_page(page, 0); | ||
| 463 | out: | 505 | out: |
| 464 | if (ret < 0) | 506 | if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { |
| 465 | unlock_page(page); | ||
| 466 | if (!buf->stolen) | ||
| 467 | page_cache_release(page); | 507 | page_cache_release(page); |
| 508 | unlock_page(page); | ||
| 509 | } | ||
| 468 | buf->ops->unmap(info, buf); | 510 | buf->ops->unmap(info, buf); |
| 469 | return ret; | 511 | return ret; |
| 470 | } | 512 | } |
| @@ -472,6 +514,11 @@ out: | |||
| 472 | typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, | 514 | typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, |
| 473 | struct splice_desc *); | 515 | struct splice_desc *); |
| 474 | 516 | ||
| 517 | /* | ||
| 518 | * Pipe input worker. Most of this logic works like a regular pipe, the | ||
| 519 | * key here is the 'actor' worker passed in that actually moves the data | ||
| 520 | * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. | ||
| 521 | */ | ||
| 475 | static ssize_t move_from_pipe(struct inode *inode, struct file *out, | 522 | static ssize_t move_from_pipe(struct inode *inode, struct file *out, |
| 476 | size_t len, unsigned int flags, | 523 | size_t len, unsigned int flags, |
| 477 | splice_actor *actor) | 524 | splice_actor *actor) |
| @@ -573,21 +620,67 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out, | |||
| 573 | 620 | ||
| 574 | } | 621 | } |
| 575 | 622 | ||
| 623 | /** | ||
| 624 | * generic_file_splice_write - splice data from a pipe to a file | ||
| 625 | * @inode: pipe inode | ||
| 626 | * @out: file to write to | ||
| 627 | * @len: number of bytes to splice | ||
| 628 | * @flags: splice modifier flags | ||
| 629 | * | ||
| 630 | * Will either move or copy pages (determined by @flags options) from | ||
| 631 | * the given pipe inode to the given file. | ||
| 632 | * | ||
| 633 | */ | ||
| 576 | ssize_t generic_file_splice_write(struct inode *inode, struct file *out, | 634 | ssize_t generic_file_splice_write(struct inode *inode, struct file *out, |
| 577 | size_t len, unsigned int flags) | 635 | size_t len, unsigned int flags) |
| 578 | { | 636 | { |
| 579 | return move_from_pipe(inode, out, len, flags, pipe_to_file); | 637 | struct address_space *mapping = out->f_mapping; |
| 638 | ssize_t ret = move_from_pipe(inode, out, len, flags, pipe_to_file); | ||
| 639 | |||
| 640 | /* | ||
| 641 | * if file or inode is SYNC and we actually wrote some data, sync it | ||
| 642 | */ | ||
| 643 | if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host)) | ||
| 644 | && ret > 0) { | ||
| 645 | struct inode *inode = mapping->host; | ||
| 646 | int err; | ||
| 647 | |||
| 648 | mutex_lock(&inode->i_mutex); | ||
| 649 | err = generic_osync_inode(mapping->host, mapping, | ||
| 650 | OSYNC_METADATA|OSYNC_DATA); | ||
| 651 | mutex_unlock(&inode->i_mutex); | ||
| 652 | |||
| 653 | if (err) | ||
| 654 | ret = err; | ||
| 655 | } | ||
| 656 | |||
| 657 | return ret; | ||
| 580 | } | 658 | } |
| 581 | 659 | ||
| 660 | EXPORT_SYMBOL(generic_file_splice_write); | ||
| 661 | |||
| 662 | /** | ||
| 663 | * generic_splice_sendpage - splice data from a pipe to a socket | ||
| 664 | * @inode: pipe inode | ||
| 665 | * @out: socket to write to | ||
| 666 | * @len: number of bytes to splice | ||
| 667 | * @flags: splice modifier flags | ||
| 668 | * | ||
| 669 | * Will send @len bytes from the pipe to a network socket. No data copying | ||
| 670 | * is involved. | ||
| 671 | * | ||
| 672 | */ | ||
| 582 | ssize_t generic_splice_sendpage(struct inode *inode, struct file *out, | 673 | ssize_t generic_splice_sendpage(struct inode *inode, struct file *out, |
| 583 | size_t len, unsigned int flags) | 674 | size_t len, unsigned int flags) |
| 584 | { | 675 | { |
| 585 | return move_from_pipe(inode, out, len, flags, pipe_to_sendpage); | 676 | return move_from_pipe(inode, out, len, flags, pipe_to_sendpage); |
| 586 | } | 677 | } |
| 587 | 678 | ||
| 588 | EXPORT_SYMBOL(generic_file_splice_write); | 679 | EXPORT_SYMBOL(generic_splice_sendpage); |
| 589 | EXPORT_SYMBOL(generic_file_splice_read); | ||
| 590 | 680 | ||
| 681 | /* | ||
| 682 | * Attempt to initiate a splice from pipe to file. | ||
| 683 | */ | ||
| 591 | static long do_splice_from(struct inode *pipe, struct file *out, size_t len, | 684 | static long do_splice_from(struct inode *pipe, struct file *out, size_t len, |
| 592 | unsigned int flags) | 685 | unsigned int flags) |
| 593 | { | 686 | { |
| @@ -608,6 +701,9 @@ static long do_splice_from(struct inode *pipe, struct file *out, size_t len, | |||
| 608 | return out->f_op->splice_write(pipe, out, len, flags); | 701 | return out->f_op->splice_write(pipe, out, len, flags); |
| 609 | } | 702 | } |
| 610 | 703 | ||
| 704 | /* | ||
| 705 | * Attempt to initiate a splice from a file to a pipe. | ||
| 706 | */ | ||
| 611 | static long do_splice_to(struct file *in, struct inode *pipe, size_t len, | 707 | static long do_splice_to(struct file *in, struct inode *pipe, size_t len, |
| 612 | unsigned int flags) | 708 | unsigned int flags) |
| 613 | { | 709 | { |
| @@ -636,6 +732,9 @@ static long do_splice_to(struct file *in, struct inode *pipe, size_t len, | |||
| 636 | return in->f_op->splice_read(in, pipe, len, flags); | 732 | return in->f_op->splice_read(in, pipe, len, flags); |
| 637 | } | 733 | } |
| 638 | 734 | ||
| 735 | /* | ||
| 736 | * Determine where to splice to/from. | ||
| 737 | */ | ||
| 639 | static long do_splice(struct file *in, struct file *out, size_t len, | 738 | static long do_splice(struct file *in, struct file *out, size_t len, |
| 640 | unsigned int flags) | 739 | unsigned int flags) |
| 641 | { | 740 | { |
