diff options
author | Linus Torvalds <torvalds@g5.osdl.org> | 2006-04-02 17:22:06 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-04-02 17:22:06 -0400 |
commit | d69636157ad8fa6bb9fff452cefb34fcace851b5 (patch) | |
tree | dc9c0cc41150042224a367ffcab77cf7d315e3a7 /fs | |
parent | 1810b6cb162e0c19e0ecbbacbcfd66f578f335ec (diff) | |
parent | 3e7ee3e7b36fa4e2d88d8fb0a2577be95fc4636d (diff) |
Merge branch 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block
* 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block:
[PATCH] splice: fix page stealing LRU handling.
[PATCH] splice: page stealing needs to wait_on_page_writeback()
[PATCH] splice: export generic_splice_sendpage
[PATCH] splice: add a SPLICE_F_MORE flag
[PATCH] splice: add comments documenting more of the code
[PATCH] splice: improve writeback and clean up page stealing
[PATCH] splice: fix shadow[] filling logic
Diffstat (limited to 'fs')
-rw-r--r-- | fs/pipe.c | 4 | ||||
-rw-r--r-- | fs/splice.c | 177 |
2 files changed, 141 insertions, 40 deletions
@@ -95,6 +95,8 @@ static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buff | |||
95 | { | 95 | { |
96 | struct page *page = buf->page; | 96 | struct page *page = buf->page; |
97 | 97 | ||
98 | buf->flags &= ~PIPE_BUF_FLAG_STOLEN; | ||
99 | |||
98 | /* | 100 | /* |
99 | * If nobody else uses this page, and we don't already have a | 101 | * If nobody else uses this page, and we don't already have a |
100 | * temporary page, let's keep track of it as a one-deep | 102 | * temporary page, let's keep track of it as a one-deep |
@@ -124,7 +126,7 @@ static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer | |||
124 | static int anon_pipe_buf_steal(struct pipe_inode_info *info, | 126 | static int anon_pipe_buf_steal(struct pipe_inode_info *info, |
125 | struct pipe_buffer *buf) | 127 | struct pipe_buffer *buf) |
126 | { | 128 | { |
127 | buf->stolen = 1; | 129 | buf->flags |= PIPE_BUF_FLAG_STOLEN; |
128 | return 0; | 130 | return 0; |
129 | } | 131 | } |
130 | 132 | ||
diff --git a/fs/splice.c b/fs/splice.c index 6081cf7d2d1b..bfa42a277bb8 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -22,7 +22,10 @@ | |||
22 | #include <linux/pipe_fs_i.h> | 22 | #include <linux/pipe_fs_i.h> |
23 | #include <linux/mm_inline.h> | 23 | #include <linux/mm_inline.h> |
24 | #include <linux/swap.h> | 24 | #include <linux/swap.h> |
25 | #include <linux/writeback.h> | ||
26 | #include <linux/buffer_head.h> | ||
25 | #include <linux/module.h> | 27 | #include <linux/module.h> |
28 | #include <linux/syscalls.h> | ||
26 | 29 | ||
27 | /* | 30 | /* |
28 | * Passed to the actors | 31 | * Passed to the actors |
@@ -34,28 +37,37 @@ struct splice_desc { | |||
34 | loff_t pos; /* file position */ | 37 | loff_t pos; /* file position */ |
35 | }; | 38 | }; |
36 | 39 | ||
40 | /* | ||
41 | * Attempt to steal a page from a pipe buffer. This should perhaps go into | ||
42 | * a vm helper function, it's already simplified quite a bit by the | ||
43 | * addition of remove_mapping(). If success is returned, the caller may | ||
44 | * attempt to reuse this page for another destination. | ||
45 | */ | ||
37 | static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, | 46 | static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, |
38 | struct pipe_buffer *buf) | 47 | struct pipe_buffer *buf) |
39 | { | 48 | { |
40 | struct page *page = buf->page; | 49 | struct page *page = buf->page; |
50 | struct address_space *mapping = page_mapping(page); | ||
41 | 51 | ||
42 | WARN_ON(!PageLocked(page)); | 52 | WARN_ON(!PageLocked(page)); |
43 | WARN_ON(!PageUptodate(page)); | 53 | WARN_ON(!PageUptodate(page)); |
44 | 54 | ||
45 | if (!remove_mapping(page_mapping(page), page)) | 55 | /* |
46 | return 1; | 56 | * At least for ext2 with nobh option, we need to wait on writeback |
57 | * completing on this page, since we'll remove it from the pagecache. | ||
58 | * Otherwise truncate wont wait on the page, allowing the disk | ||
59 | * blocks to be reused by someone else before we actually wrote our | ||
60 | * data to them. fs corruption ensues. | ||
61 | */ | ||
62 | wait_on_page_writeback(page); | ||
47 | 63 | ||
48 | if (PageLRU(page)) { | 64 | if (PagePrivate(page)) |
49 | struct zone *zone = page_zone(page); | 65 | try_to_release_page(page, mapping_gfp_mask(mapping)); |
50 | 66 | ||
51 | spin_lock_irq(&zone->lru_lock); | 67 | if (!remove_mapping(mapping, page)) |
52 | BUG_ON(!PageLRU(page)); | 68 | return 1; |
53 | __ClearPageLRU(page); | ||
54 | del_page_from_lru(zone, page); | ||
55 | spin_unlock_irq(&zone->lru_lock); | ||
56 | } | ||
57 | 69 | ||
58 | buf->stolen = 1; | 70 | buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; |
59 | return 0; | 71 | return 0; |
60 | } | 72 | } |
61 | 73 | ||
@@ -64,7 +76,7 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *info, | |||
64 | { | 76 | { |
65 | page_cache_release(buf->page); | 77 | page_cache_release(buf->page); |
66 | buf->page = NULL; | 78 | buf->page = NULL; |
67 | buf->stolen = 0; | 79 | buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU); |
68 | } | 80 | } |
69 | 81 | ||
70 | static void *page_cache_pipe_buf_map(struct file *file, | 82 | static void *page_cache_pipe_buf_map(struct file *file, |
@@ -91,8 +103,7 @@ static void *page_cache_pipe_buf_map(struct file *file, | |||
91 | static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, | 103 | static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, |
92 | struct pipe_buffer *buf) | 104 | struct pipe_buffer *buf) |
93 | { | 105 | { |
94 | if (!buf->stolen) | 106 | unlock_page(buf->page); |
95 | unlock_page(buf->page); | ||
96 | kunmap(buf->page); | 107 | kunmap(buf->page); |
97 | } | 108 | } |
98 | 109 | ||
@@ -104,6 +115,10 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = { | |||
104 | .steal = page_cache_pipe_buf_steal, | 115 | .steal = page_cache_pipe_buf_steal, |
105 | }; | 116 | }; |
106 | 117 | ||
118 | /* | ||
119 | * Pipe output worker. This sets up our pipe format with the page cache | ||
120 | * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). | ||
121 | */ | ||
107 | static ssize_t move_to_pipe(struct inode *inode, struct page **pages, | 122 | static ssize_t move_to_pipe(struct inode *inode, struct page **pages, |
108 | int nr_pages, unsigned long offset, | 123 | int nr_pages, unsigned long offset, |
109 | unsigned long len, unsigned int flags) | 124 | unsigned long len, unsigned int flags) |
@@ -237,9 +252,9 @@ static int __generic_file_splice_read(struct file *in, struct inode *pipe, | |||
237 | * fill shadow[] with pages at the right locations, so we only | 252 | * fill shadow[] with pages at the right locations, so we only |
238 | * have to fill holes | 253 | * have to fill holes |
239 | */ | 254 | */ |
240 | memset(shadow, 0, i * sizeof(struct page *)); | 255 | memset(shadow, 0, nr_pages * sizeof(struct page *)); |
241 | for (j = 0, pidx = index; j < i; pidx++, j++) | 256 | for (j = 0; j < i; j++) |
242 | shadow[pages[j]->index - pidx] = pages[j]; | 257 | shadow[pages[j]->index - index] = pages[j]; |
243 | 258 | ||
244 | /* | 259 | /* |
245 | * now fill in the holes | 260 | * now fill in the holes |
@@ -288,6 +303,16 @@ splice_them: | |||
288 | return move_to_pipe(pipe, pages, i, offset, len, flags); | 303 | return move_to_pipe(pipe, pages, i, offset, len, flags); |
289 | } | 304 | } |
290 | 305 | ||
306 | /** | ||
307 | * generic_file_splice_read - splice data from file to a pipe | ||
308 | * @in: file to splice from | ||
309 | * @pipe: pipe to splice to | ||
310 | * @len: number of bytes to splice | ||
311 | * @flags: splice modifier flags | ||
312 | * | ||
313 | * Will read pages from given file and fill them into a pipe. | ||
314 | * | ||
315 | */ | ||
291 | ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, | 316 | ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, |
292 | size_t len, unsigned int flags) | 317 | size_t len, unsigned int flags) |
293 | { | 318 | { |
@@ -318,8 +343,11 @@ ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, | |||
318 | return ret; | 343 | return ret; |
319 | } | 344 | } |
320 | 345 | ||
346 | EXPORT_SYMBOL(generic_file_splice_read); | ||
347 | |||
321 | /* | 348 | /* |
322 | * Send 'len' bytes to socket from 'file' at position 'pos' using sendpage(). | 349 | * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' |
350 | * using sendpage(). | ||
323 | */ | 351 | */ |
324 | static int pipe_to_sendpage(struct pipe_inode_info *info, | 352 | static int pipe_to_sendpage(struct pipe_inode_info *info, |
325 | struct pipe_buffer *buf, struct splice_desc *sd) | 353 | struct pipe_buffer *buf, struct splice_desc *sd) |
@@ -329,6 +357,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *info, | |||
329 | unsigned int offset; | 357 | unsigned int offset; |
330 | ssize_t ret; | 358 | ssize_t ret; |
331 | void *ptr; | 359 | void *ptr; |
360 | int more; | ||
332 | 361 | ||
333 | /* | 362 | /* |
334 | * sub-optimal, but we are limited by the pipe ->map. we don't | 363 | * sub-optimal, but we are limited by the pipe ->map. we don't |
@@ -341,9 +370,9 @@ static int pipe_to_sendpage(struct pipe_inode_info *info, | |||
341 | return PTR_ERR(ptr); | 370 | return PTR_ERR(ptr); |
342 | 371 | ||
343 | offset = pos & ~PAGE_CACHE_MASK; | 372 | offset = pos & ~PAGE_CACHE_MASK; |
373 | more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; | ||
344 | 374 | ||
345 | ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos, | 375 | ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,more); |
346 | sd->len < sd->total_len); | ||
347 | 376 | ||
348 | buf->ops->unmap(info, buf); | 377 | buf->ops->unmap(info, buf); |
349 | if (ret == sd->len) | 378 | if (ret == sd->len) |
@@ -365,16 +394,19 @@ static int pipe_to_sendpage(struct pipe_inode_info *info, | |||
365 | * - Destination page does not exist, we can add the pipe page to | 394 | * - Destination page does not exist, we can add the pipe page to |
366 | * the page cache and avoid the copy. | 395 | * the page cache and avoid the copy. |
367 | * | 396 | * |
368 | * For now we just do the slower thing and always copy pages over, it's | 397 | * If asked to move pages to the output file (SPLICE_F_MOVE is set in |
369 | * easier than migrating pages from the pipe to the target file. For the | 398 | * sd->flags), we attempt to migrate pages from the pipe to the output |
370 | * case of doing file | file splicing, the migrate approach had some LRU | 399 | * file address space page cache. This is possible if no one else has |
371 | * nastiness... | 400 | * the pipe page referenced outside of the pipe and page cache. If |
401 | * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create | ||
402 | * a new page in the output file page cache and fill/dirty that. | ||
372 | */ | 403 | */ |
373 | static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, | 404 | static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, |
374 | struct splice_desc *sd) | 405 | struct splice_desc *sd) |
375 | { | 406 | { |
376 | struct file *file = sd->file; | 407 | struct file *file = sd->file; |
377 | struct address_space *mapping = file->f_mapping; | 408 | struct address_space *mapping = file->f_mapping; |
409 | gfp_t gfp_mask = mapping_gfp_mask(mapping); | ||
378 | unsigned int offset; | 410 | unsigned int offset; |
379 | struct page *page; | 411 | struct page *page; |
380 | pgoff_t index; | 412 | pgoff_t index; |
@@ -395,18 +427,23 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, | |||
395 | * reuse buf page, if SPLICE_F_MOVE is set | 427 | * reuse buf page, if SPLICE_F_MOVE is set |
396 | */ | 428 | */ |
397 | if (sd->flags & SPLICE_F_MOVE) { | 429 | if (sd->flags & SPLICE_F_MOVE) { |
430 | /* | ||
431 | * If steal succeeds, buf->page is now pruned from the vm | ||
432 | * side (LRU and page cache) and we can reuse it. | ||
433 | */ | ||
398 | if (buf->ops->steal(info, buf)) | 434 | if (buf->ops->steal(info, buf)) |
399 | goto find_page; | 435 | goto find_page; |
400 | 436 | ||
401 | page = buf->page; | 437 | page = buf->page; |
402 | if (add_to_page_cache_lru(page, mapping, index, | 438 | if (add_to_page_cache(page, mapping, index, gfp_mask)) |
403 | mapping_gfp_mask(mapping))) | ||
404 | goto find_page; | 439 | goto find_page; |
440 | |||
441 | if (!(buf->flags & PIPE_BUF_FLAG_LRU)) | ||
442 | lru_cache_add(page); | ||
405 | } else { | 443 | } else { |
406 | find_page: | 444 | find_page: |
407 | ret = -ENOMEM; | 445 | ret = -ENOMEM; |
408 | page = find_or_create_page(mapping, index, | 446 | page = find_or_create_page(mapping, index, gfp_mask); |
409 | mapping_gfp_mask(mapping)); | ||
410 | if (!page) | 447 | if (!page) |
411 | goto out; | 448 | goto out; |
412 | 449 | ||
@@ -443,10 +480,13 @@ find_page: | |||
443 | } | 480 | } |
444 | 481 | ||
445 | ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); | 482 | ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); |
446 | if (ret) | 483 | if (ret == AOP_TRUNCATED_PAGE) { |
484 | page_cache_release(page); | ||
485 | goto find_page; | ||
486 | } else if (ret) | ||
447 | goto out; | 487 | goto out; |
448 | 488 | ||
449 | if (!buf->stolen) { | 489 | if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { |
450 | char *dst = kmap_atomic(page, KM_USER0); | 490 | char *dst = kmap_atomic(page, KM_USER0); |
451 | 491 | ||
452 | memcpy(dst + offset, src + buf->offset, sd->len); | 492 | memcpy(dst + offset, src + buf->offset, sd->len); |
@@ -455,16 +495,18 @@ find_page: | |||
455 | } | 495 | } |
456 | 496 | ||
457 | ret = mapping->a_ops->commit_write(file, page, 0, sd->len); | 497 | ret = mapping->a_ops->commit_write(file, page, 0, sd->len); |
458 | if (ret < 0) | 498 | if (ret == AOP_TRUNCATED_PAGE) { |
499 | page_cache_release(page); | ||
500 | goto find_page; | ||
501 | } else if (ret) | ||
459 | goto out; | 502 | goto out; |
460 | 503 | ||
461 | set_page_dirty(page); | 504 | balance_dirty_pages_ratelimited(mapping); |
462 | ret = write_one_page(page, 0); | ||
463 | out: | 505 | out: |
464 | if (ret < 0) | 506 | if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { |
465 | unlock_page(page); | ||
466 | if (!buf->stolen) | ||
467 | page_cache_release(page); | 507 | page_cache_release(page); |
508 | unlock_page(page); | ||
509 | } | ||
468 | buf->ops->unmap(info, buf); | 510 | buf->ops->unmap(info, buf); |
469 | return ret; | 511 | return ret; |
470 | } | 512 | } |
@@ -472,6 +514,11 @@ out: | |||
472 | typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, | 514 | typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, |
473 | struct splice_desc *); | 515 | struct splice_desc *); |
474 | 516 | ||
517 | /* | ||
518 | * Pipe input worker. Most of this logic works like a regular pipe, the | ||
519 | * key here is the 'actor' worker passed in that actually moves the data | ||
520 | * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. | ||
521 | */ | ||
475 | static ssize_t move_from_pipe(struct inode *inode, struct file *out, | 522 | static ssize_t move_from_pipe(struct inode *inode, struct file *out, |
476 | size_t len, unsigned int flags, | 523 | size_t len, unsigned int flags, |
477 | splice_actor *actor) | 524 | splice_actor *actor) |
@@ -573,21 +620,67 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out, | |||
573 | 620 | ||
574 | } | 621 | } |
575 | 622 | ||
623 | /** | ||
624 | * generic_file_splice_write - splice data from a pipe to a file | ||
625 | * @inode: pipe inode | ||
626 | * @out: file to write to | ||
627 | * @len: number of bytes to splice | ||
628 | * @flags: splice modifier flags | ||
629 | * | ||
630 | * Will either move or copy pages (determined by @flags options) from | ||
631 | * the given pipe inode to the given file. | ||
632 | * | ||
633 | */ | ||
576 | ssize_t generic_file_splice_write(struct inode *inode, struct file *out, | 634 | ssize_t generic_file_splice_write(struct inode *inode, struct file *out, |
577 | size_t len, unsigned int flags) | 635 | size_t len, unsigned int flags) |
578 | { | 636 | { |
579 | return move_from_pipe(inode, out, len, flags, pipe_to_file); | 637 | struct address_space *mapping = out->f_mapping; |
638 | ssize_t ret = move_from_pipe(inode, out, len, flags, pipe_to_file); | ||
639 | |||
640 | /* | ||
641 | * if file or inode is SYNC and we actually wrote some data, sync it | ||
642 | */ | ||
643 | if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host)) | ||
644 | && ret > 0) { | ||
645 | struct inode *inode = mapping->host; | ||
646 | int err; | ||
647 | |||
648 | mutex_lock(&inode->i_mutex); | ||
649 | err = generic_osync_inode(mapping->host, mapping, | ||
650 | OSYNC_METADATA|OSYNC_DATA); | ||
651 | mutex_unlock(&inode->i_mutex); | ||
652 | |||
653 | if (err) | ||
654 | ret = err; | ||
655 | } | ||
656 | |||
657 | return ret; | ||
580 | } | 658 | } |
581 | 659 | ||
660 | EXPORT_SYMBOL(generic_file_splice_write); | ||
661 | |||
662 | /** | ||
663 | * generic_splice_sendpage - splice data from a pipe to a socket | ||
664 | * @inode: pipe inode | ||
665 | * @out: socket to write to | ||
666 | * @len: number of bytes to splice | ||
667 | * @flags: splice modifier flags | ||
668 | * | ||
669 | * Will send @len bytes from the pipe to a network socket. No data copying | ||
670 | * is involved. | ||
671 | * | ||
672 | */ | ||
582 | ssize_t generic_splice_sendpage(struct inode *inode, struct file *out, | 673 | ssize_t generic_splice_sendpage(struct inode *inode, struct file *out, |
583 | size_t len, unsigned int flags) | 674 | size_t len, unsigned int flags) |
584 | { | 675 | { |
585 | return move_from_pipe(inode, out, len, flags, pipe_to_sendpage); | 676 | return move_from_pipe(inode, out, len, flags, pipe_to_sendpage); |
586 | } | 677 | } |
587 | 678 | ||
588 | EXPORT_SYMBOL(generic_file_splice_write); | 679 | EXPORT_SYMBOL(generic_splice_sendpage); |
589 | EXPORT_SYMBOL(generic_file_splice_read); | ||
590 | 680 | ||
681 | /* | ||
682 | * Attempt to initiate a splice from pipe to file. | ||
683 | */ | ||
591 | static long do_splice_from(struct inode *pipe, struct file *out, size_t len, | 684 | static long do_splice_from(struct inode *pipe, struct file *out, size_t len, |
592 | unsigned int flags) | 685 | unsigned int flags) |
593 | { | 686 | { |
@@ -608,6 +701,9 @@ static long do_splice_from(struct inode *pipe, struct file *out, size_t len, | |||
608 | return out->f_op->splice_write(pipe, out, len, flags); | 701 | return out->f_op->splice_write(pipe, out, len, flags); |
609 | } | 702 | } |
610 | 703 | ||
704 | /* | ||
705 | * Attempt to initiate a splice from a file to a pipe. | ||
706 | */ | ||
611 | static long do_splice_to(struct file *in, struct inode *pipe, size_t len, | 707 | static long do_splice_to(struct file *in, struct inode *pipe, size_t len, |
612 | unsigned int flags) | 708 | unsigned int flags) |
613 | { | 709 | { |
@@ -636,6 +732,9 @@ static long do_splice_to(struct file *in, struct inode *pipe, size_t len, | |||
636 | return in->f_op->splice_read(in, pipe, len, flags); | 732 | return in->f_op->splice_read(in, pipe, len, flags); |
637 | } | 733 | } |
638 | 734 | ||
735 | /* | ||
736 | * Determine where to splice to/from. | ||
737 | */ | ||
639 | static long do_splice(struct file *in, struct file *out, size_t len, | 738 | static long do_splice(struct file *in, struct file *out, size_t len, |
640 | unsigned int flags) | 739 | unsigned int flags) |
641 | { | 740 | { |