diff options
Diffstat (limited to 'fs/splice.c')
-rw-r--r-- | fs/splice.c | 185 |
1 files changed, 127 insertions, 58 deletions
diff --git a/fs/splice.c b/fs/splice.c index 8d57e89924a6..22fac87e90b3 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -50,7 +50,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, | |||
50 | struct page *page = buf->page; | 50 | struct page *page = buf->page; |
51 | struct address_space *mapping = page_mapping(page); | 51 | struct address_space *mapping = page_mapping(page); |
52 | 52 | ||
53 | WARN_ON(!PageLocked(page)); | 53 | lock_page(page); |
54 | |||
54 | WARN_ON(!PageUptodate(page)); | 55 | WARN_ON(!PageUptodate(page)); |
55 | 56 | ||
56 | /* | 57 | /* |
@@ -65,8 +66,10 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, | |||
65 | if (PagePrivate(page)) | 66 | if (PagePrivate(page)) |
66 | try_to_release_page(page, mapping_gfp_mask(mapping)); | 67 | try_to_release_page(page, mapping_gfp_mask(mapping)); |
67 | 68 | ||
68 | if (!remove_mapping(mapping, page)) | 69 | if (!remove_mapping(mapping, page)) { |
70 | unlock_page(page); | ||
69 | return 1; | 71 | return 1; |
72 | } | ||
70 | 73 | ||
71 | buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; | 74 | buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; |
72 | return 0; | 75 | return 0; |
@@ -145,8 +148,8 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = { | |||
145 | * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). | 148 | * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). |
146 | */ | 149 | */ |
147 | static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, | 150 | static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, |
148 | int nr_pages, unsigned long offset, | 151 | int nr_pages, unsigned long len, |
149 | unsigned long len, unsigned int flags) | 152 | unsigned int offset, unsigned int flags) |
150 | { | 153 | { |
151 | int ret, do_wakeup, i; | 154 | int ret, do_wakeup, i; |
152 | 155 | ||
@@ -243,14 +246,16 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
243 | unsigned int flags) | 246 | unsigned int flags) |
244 | { | 247 | { |
245 | struct address_space *mapping = in->f_mapping; | 248 | struct address_space *mapping = in->f_mapping; |
246 | unsigned int offset, nr_pages; | 249 | unsigned int loff, offset, nr_pages; |
247 | struct page *pages[PIPE_BUFFERS]; | 250 | struct page *pages[PIPE_BUFFERS]; |
248 | struct page *page; | 251 | struct page *page; |
249 | pgoff_t index; | 252 | pgoff_t index, end_index; |
253 | loff_t isize; | ||
254 | size_t bytes; | ||
250 | int i, error; | 255 | int i, error; |
251 | 256 | ||
252 | index = *ppos >> PAGE_CACHE_SHIFT; | 257 | index = *ppos >> PAGE_CACHE_SHIFT; |
253 | offset = *ppos & ~PAGE_CACHE_MASK; | 258 | loff = offset = *ppos & ~PAGE_CACHE_MASK; |
254 | nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 259 | nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
255 | 260 | ||
256 | if (nr_pages > PIPE_BUFFERS) | 261 | if (nr_pages > PIPE_BUFFERS) |
@@ -268,6 +273,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
268 | * Now fill in the holes: | 273 | * Now fill in the holes: |
269 | */ | 274 | */ |
270 | error = 0; | 275 | error = 0; |
276 | bytes = 0; | ||
271 | for (i = 0; i < nr_pages; i++, index++) { | 277 | for (i = 0; i < nr_pages; i++, index++) { |
272 | find_page: | 278 | find_page: |
273 | /* | 279 | /* |
@@ -276,14 +282,6 @@ find_page: | |||
276 | page = find_get_page(mapping, index); | 282 | page = find_get_page(mapping, index); |
277 | if (!page) { | 283 | if (!page) { |
278 | /* | 284 | /* |
279 | * If in nonblock mode then dont block on | ||
280 | * readpage (we've kicked readahead so there | ||
281 | * will be asynchronous progress): | ||
282 | */ | ||
283 | if (flags & SPLICE_F_NONBLOCK) | ||
284 | break; | ||
285 | |||
286 | /* | ||
287 | * page didn't exist, allocate one | 285 | * page didn't exist, allocate one |
288 | */ | 286 | */ |
289 | page = page_cache_alloc_cold(mapping); | 287 | page = page_cache_alloc_cold(mapping); |
@@ -304,6 +302,13 @@ find_page: | |||
304 | * If the page isn't uptodate, we may need to start io on it | 302 | * If the page isn't uptodate, we may need to start io on it |
305 | */ | 303 | */ |
306 | if (!PageUptodate(page)) { | 304 | if (!PageUptodate(page)) { |
305 | /* | ||
306 | * If in nonblock mode then dont block on waiting | ||
307 | * for an in-flight io page | ||
308 | */ | ||
309 | if (flags & SPLICE_F_NONBLOCK) | ||
310 | break; | ||
311 | |||
307 | lock_page(page); | 312 | lock_page(page); |
308 | 313 | ||
309 | /* | 314 | /* |
@@ -336,13 +341,41 @@ readpage: | |||
336 | goto find_page; | 341 | goto find_page; |
337 | break; | 342 | break; |
338 | } | 343 | } |
344 | |||
345 | /* | ||
346 | * i_size must be checked after ->readpage(). | ||
347 | */ | ||
348 | isize = i_size_read(mapping->host); | ||
349 | end_index = (isize - 1) >> PAGE_CACHE_SHIFT; | ||
350 | if (unlikely(!isize || index > end_index)) { | ||
351 | page_cache_release(page); | ||
352 | break; | ||
353 | } | ||
354 | |||
355 | /* | ||
356 | * if this is the last page, see if we need to shrink | ||
357 | * the length and stop | ||
358 | */ | ||
359 | if (end_index == index) { | ||
360 | loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK); | ||
361 | if (bytes + loff > isize) { | ||
362 | page_cache_release(page); | ||
363 | break; | ||
364 | } | ||
365 | /* | ||
366 | * force quit after adding this page | ||
367 | */ | ||
368 | nr_pages = i; | ||
369 | } | ||
339 | } | 370 | } |
340 | fill_it: | 371 | fill_it: |
341 | pages[i] = page; | 372 | pages[i] = page; |
373 | bytes += PAGE_CACHE_SIZE - loff; | ||
374 | loff = 0; | ||
342 | } | 375 | } |
343 | 376 | ||
344 | if (i) | 377 | if (i) |
345 | return move_to_pipe(pipe, pages, i, offset, len, flags); | 378 | return move_to_pipe(pipe, pages, i, bytes, offset, flags); |
346 | 379 | ||
347 | return error; | 380 | return error; |
348 | } | 381 | } |
@@ -369,17 +402,20 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, | |||
369 | while (len) { | 402 | while (len) { |
370 | ret = __generic_file_splice_read(in, ppos, pipe, len, flags); | 403 | ret = __generic_file_splice_read(in, ppos, pipe, len, flags); |
371 | 404 | ||
372 | if (ret <= 0) | 405 | if (ret < 0) |
373 | break; | 406 | break; |
407 | else if (!ret) { | ||
408 | if (spliced) | ||
409 | break; | ||
410 | if (flags & SPLICE_F_NONBLOCK) { | ||
411 | ret = -EAGAIN; | ||
412 | break; | ||
413 | } | ||
414 | } | ||
374 | 415 | ||
375 | *ppos += ret; | 416 | *ppos += ret; |
376 | len -= ret; | 417 | len -= ret; |
377 | spliced += ret; | 418 | spliced += ret; |
378 | |||
379 | if (!(flags & SPLICE_F_NONBLOCK)) | ||
380 | continue; | ||
381 | ret = -EAGAIN; | ||
382 | break; | ||
383 | } | 419 | } |
384 | 420 | ||
385 | if (spliced) | 421 | if (spliced) |
@@ -474,14 +510,12 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, | |||
474 | if (sd->flags & SPLICE_F_MOVE) { | 510 | if (sd->flags & SPLICE_F_MOVE) { |
475 | /* | 511 | /* |
476 | * If steal succeeds, buf->page is now pruned from the vm | 512 | * If steal succeeds, buf->page is now pruned from the vm |
477 | * side (LRU and page cache) and we can reuse it. | 513 | * side (LRU and page cache) and we can reuse it. The page |
514 | * will also be looked on successful return. | ||
478 | */ | 515 | */ |
479 | if (buf->ops->steal(info, buf)) | 516 | if (buf->ops->steal(info, buf)) |
480 | goto find_page; | 517 | goto find_page; |
481 | 518 | ||
482 | /* | ||
483 | * this will also set the page locked | ||
484 | */ | ||
485 | page = buf->page; | 519 | page = buf->page; |
486 | if (add_to_page_cache(page, mapping, index, gfp_mask)) | 520 | if (add_to_page_cache(page, mapping, index, gfp_mask)) |
487 | goto find_page; | 521 | goto find_page; |
@@ -490,15 +524,27 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, | |||
490 | lru_cache_add(page); | 524 | lru_cache_add(page); |
491 | } else { | 525 | } else { |
492 | find_page: | 526 | find_page: |
493 | ret = -ENOMEM; | 527 | page = find_lock_page(mapping, index); |
494 | page = find_or_create_page(mapping, index, gfp_mask); | 528 | if (!page) { |
495 | if (!page) | 529 | ret = -ENOMEM; |
496 | goto out_nomem; | 530 | page = page_cache_alloc_cold(mapping); |
531 | if (unlikely(!page)) | ||
532 | goto out_nomem; | ||
533 | |||
534 | /* | ||
535 | * This will also lock the page | ||
536 | */ | ||
537 | ret = add_to_page_cache_lru(page, mapping, index, | ||
538 | gfp_mask); | ||
539 | if (unlikely(ret)) | ||
540 | goto out; | ||
541 | } | ||
497 | 542 | ||
498 | /* | 543 | /* |
499 | * If the page is uptodate, it is also locked. If it isn't | 544 | * We get here with the page locked. If the page is also |
500 | * uptodate, we can mark it uptodate if we are filling the | 545 | * uptodate, we don't need to do more. If it isn't, we |
501 | * full page. Otherwise we need to read it in first... | 546 | * may need to bring it in if we are not going to overwrite |
547 | * the full page. | ||
502 | */ | 548 | */ |
503 | if (!PageUptodate(page)) { | 549 | if (!PageUptodate(page)) { |
504 | if (sd->len < PAGE_CACHE_SIZE) { | 550 | if (sd->len < PAGE_CACHE_SIZE) { |
@@ -520,10 +566,8 @@ find_page: | |||
520 | ret = -EIO; | 566 | ret = -EIO; |
521 | goto out; | 567 | goto out; |
522 | } | 568 | } |
523 | } else { | 569 | } else |
524 | WARN_ON(!PageLocked(page)); | ||
525 | SetPageUptodate(page); | 570 | SetPageUptodate(page); |
526 | } | ||
527 | } | 571 | } |
528 | } | 572 | } |
529 | 573 | ||
@@ -552,10 +596,10 @@ find_page: | |||
552 | mark_page_accessed(page); | 596 | mark_page_accessed(page); |
553 | balance_dirty_pages_ratelimited(mapping); | 597 | balance_dirty_pages_ratelimited(mapping); |
554 | out: | 598 | out: |
555 | if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { | 599 | if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) |
556 | page_cache_release(page); | 600 | page_cache_release(page); |
557 | unlock_page(page); | 601 | |
558 | } | 602 | unlock_page(page); |
559 | out_nomem: | 603 | out_nomem: |
560 | buf->ops->unmap(info, buf); | 604 | buf->ops->unmap(info, buf); |
561 | return ret; | 605 | return ret; |
@@ -687,22 +731,26 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
687 | ssize_t ret; | 731 | ssize_t ret; |
688 | 732 | ||
689 | ret = move_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); | 733 | ret = move_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); |
690 | 734 | if (ret > 0) { | |
691 | /* | ||
692 | * If file or inode is SYNC and we actually wrote some data, sync it. | ||
693 | */ | ||
694 | if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host)) | ||
695 | && ret > 0) { | ||
696 | struct inode *inode = mapping->host; | 735 | struct inode *inode = mapping->host; |
697 | int err; | ||
698 | 736 | ||
699 | mutex_lock(&inode->i_mutex); | 737 | *ppos += ret; |
700 | err = generic_osync_inode(mapping->host, mapping, | 738 | |
701 | OSYNC_METADATA|OSYNC_DATA); | 739 | /* |
702 | mutex_unlock(&inode->i_mutex); | 740 | * If file or inode is SYNC and we actually wrote some data, |
741 | * sync it. | ||
742 | */ | ||
743 | if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { | ||
744 | int err; | ||
745 | |||
746 | mutex_lock(&inode->i_mutex); | ||
747 | err = generic_osync_inode(inode, mapping, | ||
748 | OSYNC_METADATA|OSYNC_DATA); | ||
749 | mutex_unlock(&inode->i_mutex); | ||
703 | 750 | ||
704 | if (err) | 751 | if (err) |
705 | ret = err; | 752 | ret = err; |
753 | } | ||
706 | } | 754 | } |
707 | 755 | ||
708 | return ret; | 756 | return ret; |
@@ -904,6 +952,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
904 | { | 952 | { |
905 | struct pipe_inode_info *pipe; | 953 | struct pipe_inode_info *pipe; |
906 | loff_t offset, *off; | 954 | loff_t offset, *off; |
955 | long ret; | ||
907 | 956 | ||
908 | pipe = in->f_dentry->d_inode->i_pipe; | 957 | pipe = in->f_dentry->d_inode->i_pipe; |
909 | if (pipe) { | 958 | if (pipe) { |
@@ -918,7 +967,12 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
918 | } else | 967 | } else |
919 | off = &out->f_pos; | 968 | off = &out->f_pos; |
920 | 969 | ||
921 | return do_splice_from(pipe, out, off, len, flags); | 970 | ret = do_splice_from(pipe, out, off, len, flags); |
971 | |||
972 | if (off_out && copy_to_user(off_out, off, sizeof(loff_t))) | ||
973 | ret = -EFAULT; | ||
974 | |||
975 | return ret; | ||
922 | } | 976 | } |
923 | 977 | ||
924 | pipe = out->f_dentry->d_inode->i_pipe; | 978 | pipe = out->f_dentry->d_inode->i_pipe; |
@@ -934,7 +988,12 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
934 | } else | 988 | } else |
935 | off = &in->f_pos; | 989 | off = &in->f_pos; |
936 | 990 | ||
937 | return do_splice_to(in, off, pipe, len, flags); | 991 | ret = do_splice_to(in, off, pipe, len, flags); |
992 | |||
993 | if (off_in && copy_to_user(off_in, off, sizeof(loff_t))) | ||
994 | ret = -EFAULT; | ||
995 | |||
996 | return ret; | ||
938 | } | 997 | } |
939 | 998 | ||
940 | return -EINVAL; | 999 | return -EINVAL; |
@@ -979,7 +1038,9 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
979 | size_t len, unsigned int flags) | 1038 | size_t len, unsigned int flags) |
980 | { | 1039 | { |
981 | struct pipe_buffer *ibuf, *obuf; | 1040 | struct pipe_buffer *ibuf, *obuf; |
982 | int ret = 0, do_wakeup = 0, i; | 1041 | int ret, do_wakeup, i, ipipe_first; |
1042 | |||
1043 | ret = do_wakeup = ipipe_first = 0; | ||
983 | 1044 | ||
984 | /* | 1045 | /* |
985 | * Potential ABBA deadlock, work around it by ordering lock | 1046 | * Potential ABBA deadlock, work around it by ordering lock |
@@ -987,6 +1048,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
987 | * could deadlock (one doing tee from A -> B, the other from B -> A). | 1048 | * could deadlock (one doing tee from A -> B, the other from B -> A). |
988 | */ | 1049 | */ |
989 | if (ipipe->inode < opipe->inode) { | 1050 | if (ipipe->inode < opipe->inode) { |
1051 | ipipe_first = 1; | ||
990 | mutex_lock(&ipipe->inode->i_mutex); | 1052 | mutex_lock(&ipipe->inode->i_mutex); |
991 | mutex_lock(&opipe->inode->i_mutex); | 1053 | mutex_lock(&opipe->inode->i_mutex); |
992 | } else { | 1054 | } else { |
@@ -1035,9 +1097,11 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
1035 | 1097 | ||
1036 | /* | 1098 | /* |
1037 | * We have input available, but no output room. | 1099 | * We have input available, but no output room. |
1038 | * If we already copied data, return that. | 1100 | * If we already copied data, return that. If we |
1101 | * need to drop the opipe lock, it must be ordered | ||
1102 | * last to avoid deadlocks. | ||
1039 | */ | 1103 | */ |
1040 | if (flags & SPLICE_F_NONBLOCK) { | 1104 | if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) { |
1041 | if (!ret) | 1105 | if (!ret) |
1042 | ret = -EAGAIN; | 1106 | ret = -EAGAIN; |
1043 | break; | 1107 | break; |
@@ -1071,7 +1135,12 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
1071 | if (ret) | 1135 | if (ret) |
1072 | break; | 1136 | break; |
1073 | } | 1137 | } |
1074 | if (flags & SPLICE_F_NONBLOCK) { | 1138 | /* |
1139 | * pipe_wait() drops the ipipe mutex. To avoid deadlocks | ||
1140 | * with another process, we can only safely do that if | ||
1141 | * the ipipe lock is ordered last. | ||
1142 | */ | ||
1143 | if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) { | ||
1075 | if (!ret) | 1144 | if (!ret) |
1076 | ret = -EAGAIN; | 1145 | ret = -EAGAIN; |
1077 | break; | 1146 | break; |