diff options
Diffstat (limited to 'fs/splice.c')
-rw-r--r-- | fs/splice.c | 207 |
1 files changed, 111 insertions, 96 deletions
diff --git a/fs/splice.c b/fs/splice.c index a46ddd28561e..a285fd746dc0 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -51,7 +51,7 @@ struct splice_pipe_desc { | |||
51 | * addition of remove_mapping(). If success is returned, the caller may | 51 | * addition of remove_mapping(). If success is returned, the caller may |
52 | * attempt to reuse this page for another destination. | 52 | * attempt to reuse this page for another destination. |
53 | */ | 53 | */ |
54 | static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, | 54 | static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, |
55 | struct pipe_buffer *buf) | 55 | struct pipe_buffer *buf) |
56 | { | 56 | { |
57 | struct page *page = buf->page; | 57 | struct page *page = buf->page; |
@@ -78,21 +78,19 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, | |||
78 | return 1; | 78 | return 1; |
79 | } | 79 | } |
80 | 80 | ||
81 | buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; | 81 | buf->flags |= PIPE_BUF_FLAG_LRU; |
82 | return 0; | 82 | return 0; |
83 | } | 83 | } |
84 | 84 | ||
85 | static void page_cache_pipe_buf_release(struct pipe_inode_info *info, | 85 | static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe, |
86 | struct pipe_buffer *buf) | 86 | struct pipe_buffer *buf) |
87 | { | 87 | { |
88 | page_cache_release(buf->page); | 88 | page_cache_release(buf->page); |
89 | buf->page = NULL; | 89 | buf->flags &= ~PIPE_BUF_FLAG_LRU; |
90 | buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU); | ||
91 | } | 90 | } |
92 | 91 | ||
93 | static void *page_cache_pipe_buf_map(struct file *file, | 92 | static int page_cache_pipe_buf_pin(struct pipe_inode_info *pipe, |
94 | struct pipe_inode_info *info, | 93 | struct pipe_buffer *buf) |
95 | struct pipe_buffer *buf) | ||
96 | { | 94 | { |
97 | struct page *page = buf->page; | 95 | struct page *page = buf->page; |
98 | int err; | 96 | int err; |
@@ -118,64 +116,45 @@ static void *page_cache_pipe_buf_map(struct file *file, | |||
118 | } | 116 | } |
119 | 117 | ||
120 | /* | 118 | /* |
121 | * Page is ok afterall, fall through to mapping. | 119 | * Page is ok afterall, we are done. |
122 | */ | 120 | */ |
123 | unlock_page(page); | 121 | unlock_page(page); |
124 | } | 122 | } |
125 | 123 | ||
126 | return kmap(page); | 124 | return 0; |
127 | error: | 125 | error: |
128 | unlock_page(page); | 126 | unlock_page(page); |
129 | return ERR_PTR(err); | 127 | return err; |
130 | } | ||
131 | |||
132 | static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, | ||
133 | struct pipe_buffer *buf) | ||
134 | { | ||
135 | kunmap(buf->page); | ||
136 | } | ||
137 | |||
138 | static void *user_page_pipe_buf_map(struct file *file, | ||
139 | struct pipe_inode_info *pipe, | ||
140 | struct pipe_buffer *buf) | ||
141 | { | ||
142 | return kmap(buf->page); | ||
143 | } | ||
144 | |||
145 | static void user_page_pipe_buf_unmap(struct pipe_inode_info *pipe, | ||
146 | struct pipe_buffer *buf) | ||
147 | { | ||
148 | kunmap(buf->page); | ||
149 | } | ||
150 | |||
151 | static void page_cache_pipe_buf_get(struct pipe_inode_info *info, | ||
152 | struct pipe_buffer *buf) | ||
153 | { | ||
154 | page_cache_get(buf->page); | ||
155 | } | 128 | } |
156 | 129 | ||
157 | static struct pipe_buf_operations page_cache_pipe_buf_ops = { | 130 | static struct pipe_buf_operations page_cache_pipe_buf_ops = { |
158 | .can_merge = 0, | 131 | .can_merge = 0, |
159 | .map = page_cache_pipe_buf_map, | 132 | .map = generic_pipe_buf_map, |
160 | .unmap = page_cache_pipe_buf_unmap, | 133 | .unmap = generic_pipe_buf_unmap, |
134 | .pin = page_cache_pipe_buf_pin, | ||
161 | .release = page_cache_pipe_buf_release, | 135 | .release = page_cache_pipe_buf_release, |
162 | .steal = page_cache_pipe_buf_steal, | 136 | .steal = page_cache_pipe_buf_steal, |
163 | .get = page_cache_pipe_buf_get, | 137 | .get = generic_pipe_buf_get, |
164 | }; | 138 | }; |
165 | 139 | ||
166 | static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, | 140 | static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, |
167 | struct pipe_buffer *buf) | 141 | struct pipe_buffer *buf) |
168 | { | 142 | { |
169 | return 1; | 143 | if (!(buf->flags & PIPE_BUF_FLAG_GIFT)) |
144 | return 1; | ||
145 | |||
146 | buf->flags |= PIPE_BUF_FLAG_LRU; | ||
147 | return generic_pipe_buf_steal(pipe, buf); | ||
170 | } | 148 | } |
171 | 149 | ||
172 | static struct pipe_buf_operations user_page_pipe_buf_ops = { | 150 | static struct pipe_buf_operations user_page_pipe_buf_ops = { |
173 | .can_merge = 0, | 151 | .can_merge = 0, |
174 | .map = user_page_pipe_buf_map, | 152 | .map = generic_pipe_buf_map, |
175 | .unmap = user_page_pipe_buf_unmap, | 153 | .unmap = generic_pipe_buf_unmap, |
154 | .pin = generic_pipe_buf_pin, | ||
176 | .release = page_cache_pipe_buf_release, | 155 | .release = page_cache_pipe_buf_release, |
177 | .steal = user_page_pipe_buf_steal, | 156 | .steal = user_page_pipe_buf_steal, |
178 | .get = page_cache_pipe_buf_get, | 157 | .get = generic_pipe_buf_get, |
179 | }; | 158 | }; |
180 | 159 | ||
181 | /* | 160 | /* |
@@ -210,6 +189,9 @@ static ssize_t splice_to_pipe(struct pipe_inode_info *pipe, | |||
210 | buf->offset = spd->partial[page_nr].offset; | 189 | buf->offset = spd->partial[page_nr].offset; |
211 | buf->len = spd->partial[page_nr].len; | 190 | buf->len = spd->partial[page_nr].len; |
212 | buf->ops = spd->ops; | 191 | buf->ops = spd->ops; |
192 | if (spd->flags & SPLICE_F_GIFT) | ||
193 | buf->flags |= PIPE_BUF_FLAG_GIFT; | ||
194 | |||
213 | pipe->nrbufs++; | 195 | pipe->nrbufs++; |
214 | page_nr++; | 196 | page_nr++; |
215 | ret += buf->len; | 197 | ret += buf->len; |
@@ -326,6 +308,12 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
326 | page = find_get_page(mapping, index); | 308 | page = find_get_page(mapping, index); |
327 | if (!page) { | 309 | if (!page) { |
328 | /* | 310 | /* |
311 | * Make sure the read-ahead engine is notified | ||
312 | * about this failure. | ||
313 | */ | ||
314 | handle_ra_miss(mapping, &in->f_ra, index); | ||
315 | |||
316 | /* | ||
329 | * page didn't exist, allocate one. | 317 | * page didn't exist, allocate one. |
330 | */ | 318 | */ |
331 | page = page_cache_alloc_cold(mapping); | 319 | page = page_cache_alloc_cold(mapping); |
@@ -336,6 +324,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
336 | mapping_gfp_mask(mapping)); | 324 | mapping_gfp_mask(mapping)); |
337 | if (unlikely(error)) { | 325 | if (unlikely(error)) { |
338 | page_cache_release(page); | 326 | page_cache_release(page); |
327 | if (error == -EEXIST) | ||
328 | continue; | ||
339 | break; | 329 | break; |
340 | } | 330 | } |
341 | /* | 331 | /* |
@@ -512,31 +502,21 @@ EXPORT_SYMBOL(generic_file_splice_read); | |||
512 | * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' | 502 | * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' |
513 | * using sendpage(). Return the number of bytes sent. | 503 | * using sendpage(). Return the number of bytes sent. |
514 | */ | 504 | */ |
515 | static int pipe_to_sendpage(struct pipe_inode_info *info, | 505 | static int pipe_to_sendpage(struct pipe_inode_info *pipe, |
516 | struct pipe_buffer *buf, struct splice_desc *sd) | 506 | struct pipe_buffer *buf, struct splice_desc *sd) |
517 | { | 507 | { |
518 | struct file *file = sd->file; | 508 | struct file *file = sd->file; |
519 | loff_t pos = sd->pos; | 509 | loff_t pos = sd->pos; |
520 | ssize_t ret; | 510 | int ret, more; |
521 | void *ptr; | ||
522 | int more; | ||
523 | |||
524 | /* | ||
525 | * Sub-optimal, but we are limited by the pipe ->map. We don't | ||
526 | * need a kmap'ed buffer here, we just want to make sure we | ||
527 | * have the page pinned if the pipe page originates from the | ||
528 | * page cache. | ||
529 | */ | ||
530 | ptr = buf->ops->map(file, info, buf); | ||
531 | if (IS_ERR(ptr)) | ||
532 | return PTR_ERR(ptr); | ||
533 | 511 | ||
534 | more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; | 512 | ret = buf->ops->pin(pipe, buf); |
513 | if (!ret) { | ||
514 | more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; | ||
535 | 515 | ||
536 | ret = file->f_op->sendpage(file, buf->page, buf->offset, sd->len, | 516 | ret = file->f_op->sendpage(file, buf->page, buf->offset, |
537 | &pos, more); | 517 | sd->len, &pos, more); |
518 | } | ||
538 | 519 | ||
539 | buf->ops->unmap(info, buf); | ||
540 | return ret; | 520 | return ret; |
541 | } | 521 | } |
542 | 522 | ||
@@ -560,7 +540,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *info, | |||
560 | * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create | 540 | * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create |
561 | * a new page in the output file page cache and fill/dirty that. | 541 | * a new page in the output file page cache and fill/dirty that. |
562 | */ | 542 | */ |
563 | static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, | 543 | static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, |
564 | struct splice_desc *sd) | 544 | struct splice_desc *sd) |
565 | { | 545 | { |
566 | struct file *file = sd->file; | 546 | struct file *file = sd->file; |
@@ -569,15 +549,14 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, | |||
569 | unsigned int offset, this_len; | 549 | unsigned int offset, this_len; |
570 | struct page *page; | 550 | struct page *page; |
571 | pgoff_t index; | 551 | pgoff_t index; |
572 | char *src; | ||
573 | int ret; | 552 | int ret; |
574 | 553 | ||
575 | /* | 554 | /* |
576 | * make sure the data in this buffer is uptodate | 555 | * make sure the data in this buffer is uptodate |
577 | */ | 556 | */ |
578 | src = buf->ops->map(file, info, buf); | 557 | ret = buf->ops->pin(pipe, buf); |
579 | if (IS_ERR(src)) | 558 | if (unlikely(ret)) |
580 | return PTR_ERR(src); | 559 | return ret; |
581 | 560 | ||
582 | index = sd->pos >> PAGE_CACHE_SHIFT; | 561 | index = sd->pos >> PAGE_CACHE_SHIFT; |
583 | offset = sd->pos & ~PAGE_CACHE_MASK; | 562 | offset = sd->pos & ~PAGE_CACHE_MASK; |
@@ -587,20 +566,25 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, | |||
587 | this_len = PAGE_CACHE_SIZE - offset; | 566 | this_len = PAGE_CACHE_SIZE - offset; |
588 | 567 | ||
589 | /* | 568 | /* |
590 | * Reuse buf page, if SPLICE_F_MOVE is set. | 569 | * Reuse buf page, if SPLICE_F_MOVE is set and we are doing a full |
570 | * page. | ||
591 | */ | 571 | */ |
592 | if (sd->flags & SPLICE_F_MOVE) { | 572 | if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) { |
593 | /* | 573 | /* |
594 | * If steal succeeds, buf->page is now pruned from the vm | 574 | * If steal succeeds, buf->page is now pruned from the |
595 | * side (LRU and page cache) and we can reuse it. The page | 575 | * pagecache and we can reuse it. The page will also be |
596 | * will also be looked on successful return. | 576 | * locked on successful return. |
597 | */ | 577 | */ |
598 | if (buf->ops->steal(info, buf)) | 578 | if (buf->ops->steal(pipe, buf)) |
599 | goto find_page; | 579 | goto find_page; |
600 | 580 | ||
601 | page = buf->page; | 581 | page = buf->page; |
602 | if (add_to_page_cache(page, mapping, index, gfp_mask)) | 582 | if (add_to_page_cache(page, mapping, index, gfp_mask)) { |
583 | unlock_page(page); | ||
603 | goto find_page; | 584 | goto find_page; |
585 | } | ||
586 | |||
587 | page_cache_get(page); | ||
604 | 588 | ||
605 | if (!(buf->flags & PIPE_BUF_FLAG_LRU)) | 589 | if (!(buf->flags & PIPE_BUF_FLAG_LRU)) |
606 | lru_cache_add(page); | 590 | lru_cache_add(page); |
@@ -654,40 +638,55 @@ find_page: | |||
654 | } | 638 | } |
655 | 639 | ||
656 | ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len); | 640 | ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len); |
657 | if (ret == AOP_TRUNCATED_PAGE) { | 641 | if (unlikely(ret)) { |
642 | loff_t isize = i_size_read(mapping->host); | ||
643 | |||
644 | if (ret != AOP_TRUNCATED_PAGE) | ||
645 | unlock_page(page); | ||
658 | page_cache_release(page); | 646 | page_cache_release(page); |
659 | goto find_page; | 647 | if (ret == AOP_TRUNCATED_PAGE) |
660 | } else if (ret) | 648 | goto find_page; |
649 | |||
650 | /* | ||
651 | * prepare_write() may have instantiated a few blocks | ||
652 | * outside i_size. Trim these off again. | ||
653 | */ | ||
654 | if (sd->pos + this_len > isize) | ||
655 | vmtruncate(mapping->host, isize); | ||
656 | |||
661 | goto out; | 657 | goto out; |
658 | } | ||
662 | 659 | ||
663 | if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { | 660 | if (buf->page != page) { |
664 | char *dst = kmap_atomic(page, KM_USER0); | 661 | /* |
662 | * Careful, ->map() uses KM_USER0! | ||
663 | */ | ||
664 | char *src = buf->ops->map(pipe, buf, 1); | ||
665 | char *dst = kmap_atomic(page, KM_USER1); | ||
665 | 666 | ||
666 | memcpy(dst + offset, src + buf->offset, this_len); | 667 | memcpy(dst + offset, src + buf->offset, this_len); |
667 | flush_dcache_page(page); | 668 | flush_dcache_page(page); |
668 | kunmap_atomic(dst, KM_USER0); | 669 | kunmap_atomic(dst, KM_USER1); |
670 | buf->ops->unmap(pipe, buf, src); | ||
669 | } | 671 | } |
670 | 672 | ||
671 | ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); | 673 | ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); |
672 | if (ret == AOP_TRUNCATED_PAGE) { | 674 | if (!ret) { |
675 | /* | ||
676 | * Return the number of bytes written and mark page as | ||
677 | * accessed, we are now done! | ||
678 | */ | ||
679 | ret = this_len; | ||
680 | mark_page_accessed(page); | ||
681 | balance_dirty_pages_ratelimited(mapping); | ||
682 | } else if (ret == AOP_TRUNCATED_PAGE) { | ||
673 | page_cache_release(page); | 683 | page_cache_release(page); |
674 | goto find_page; | 684 | goto find_page; |
675 | } else if (ret) | 685 | } |
676 | goto out; | ||
677 | |||
678 | /* | ||
679 | * Return the number of bytes written. | ||
680 | */ | ||
681 | ret = this_len; | ||
682 | mark_page_accessed(page); | ||
683 | balance_dirty_pages_ratelimited(mapping); | ||
684 | out: | 686 | out: |
685 | if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) | 687 | page_cache_release(page); |
686 | page_cache_release(page); | ||
687 | |||
688 | unlock_page(page); | 688 | unlock_page(page); |
689 | out_nomem: | 689 | out_nomem: |
690 | buf->ops->unmap(info, buf); | ||
691 | return ret; | 690 | return ret; |
692 | } | 691 | } |
693 | 692 | ||
@@ -1095,7 +1094,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1095 | */ | 1094 | */ |
1096 | static int get_iovec_page_array(const struct iovec __user *iov, | 1095 | static int get_iovec_page_array(const struct iovec __user *iov, |
1097 | unsigned int nr_vecs, struct page **pages, | 1096 | unsigned int nr_vecs, struct page **pages, |
1098 | struct partial_page *partial) | 1097 | struct partial_page *partial, int aligned) |
1099 | { | 1098 | { |
1100 | int buffers = 0, error = 0; | 1099 | int buffers = 0, error = 0; |
1101 | 1100 | ||
@@ -1135,6 +1134,15 @@ static int get_iovec_page_array(const struct iovec __user *iov, | |||
1135 | * in the user pages. | 1134 | * in the user pages. |
1136 | */ | 1135 | */ |
1137 | off = (unsigned long) base & ~PAGE_MASK; | 1136 | off = (unsigned long) base & ~PAGE_MASK; |
1137 | |||
1138 | /* | ||
1139 | * If asked for alignment, the offset must be zero and the | ||
1140 | * length a multiple of the PAGE_SIZE. | ||
1141 | */ | ||
1142 | error = -EINVAL; | ||
1143 | if (aligned && (off || len & ~PAGE_MASK)) | ||
1144 | break; | ||
1145 | |||
1138 | npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 1146 | npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
1139 | if (npages > PIPE_BUFFERS - buffers) | 1147 | if (npages > PIPE_BUFFERS - buffers) |
1140 | npages = PIPE_BUFFERS - buffers; | 1148 | npages = PIPE_BUFFERS - buffers; |
@@ -1150,7 +1158,7 @@ static int get_iovec_page_array(const struct iovec __user *iov, | |||
1150 | * Fill this contiguous range into the partial page map. | 1158 | * Fill this contiguous range into the partial page map. |
1151 | */ | 1159 | */ |
1152 | for (i = 0; i < error; i++) { | 1160 | for (i = 0; i < error; i++) { |
1153 | const int plen = min_t(size_t, len, PAGE_SIZE) - off; | 1161 | const int plen = min_t(size_t, len, PAGE_SIZE - off); |
1154 | 1162 | ||
1155 | partial[buffers].offset = off; | 1163 | partial[buffers].offset = off; |
1156 | partial[buffers].len = plen; | 1164 | partial[buffers].len = plen; |
@@ -1228,7 +1236,8 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov, | |||
1228 | else if (unlikely(!nr_segs)) | 1236 | else if (unlikely(!nr_segs)) |
1229 | return 0; | 1237 | return 0; |
1230 | 1238 | ||
1231 | spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial); | 1239 | spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, |
1240 | flags & SPLICE_F_GIFT); | ||
1232 | if (spd.nr_pages <= 0) | 1241 | if (spd.nr_pages <= 0) |
1233 | return spd.nr_pages; | 1242 | return spd.nr_pages; |
1234 | 1243 | ||
@@ -1336,6 +1345,12 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
1336 | obuf = opipe->bufs + nbuf; | 1345 | obuf = opipe->bufs + nbuf; |
1337 | *obuf = *ibuf; | 1346 | *obuf = *ibuf; |
1338 | 1347 | ||
1348 | /* | ||
1349 | * Don't inherit the gift flag, we need to | ||
1350 | * prevent multiple steals of this page. | ||
1351 | */ | ||
1352 | obuf->flags &= ~PIPE_BUF_FLAG_GIFT; | ||
1353 | |||
1339 | if (obuf->len > len) | 1354 | if (obuf->len > len) |
1340 | obuf->len = len; | 1355 | obuf->len = len; |
1341 | 1356 | ||