diff options
author | Jens Axboe <jens.axboe@oracle.com> | 2010-05-20 04:43:18 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2010-05-21 15:12:40 -0400 |
commit | 35f3d14dbbc58447c61e38a162ea10add6b31dc7 (patch) | |
tree | 3e03cd540b7dcdac82195c4e76862c0ce6daaaf0 | |
parent | 3d42b3612891baecf709d93f28655a6882a65d41 (diff) |
pipe: add support for shrinking and growing pipes
This patch adds F_GETPIPE_SZ and F_SETPIPE_SZ fcntl() actions for
growing and shrinking the size of a pipe and adjusts pipe.c and splice.c
(and relay and network splice) usage to work with these larger (or smaller)
pipes.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r-- | fs/fcntl.c | 5 | ||||
-rw-r--r-- | fs/pipe.c | 107 | ||||
-rw-r--r-- | fs/splice.c | 151 | ||||
-rw-r--r-- | include/linux/fcntl.h | 6 | ||||
-rw-r--r-- | include/linux/pipe_fs_i.h | 11 | ||||
-rw-r--r-- | include/linux/splice.h | 7 | ||||
-rw-r--r-- | kernel/relay.c | 15 | ||||
-rw-r--r-- | kernel/trace/trace.c | 60 | ||||
-rw-r--r-- | net/core/skbuff.c | 38 |
9 files changed, 292 insertions, 108 deletions
diff --git a/fs/fcntl.c b/fs/fcntl.c index 452d02f9075e..bcba960328fa 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/dnotify.h> | 14 | #include <linux/dnotify.h> |
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/pipe_fs_i.h> | ||
17 | #include <linux/security.h> | 18 | #include <linux/security.h> |
18 | #include <linux/ptrace.h> | 19 | #include <linux/ptrace.h> |
19 | #include <linux/signal.h> | 20 | #include <linux/signal.h> |
@@ -412,6 +413,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, | |||
412 | case F_NOTIFY: | 413 | case F_NOTIFY: |
413 | err = fcntl_dirnotify(fd, filp, arg); | 414 | err = fcntl_dirnotify(fd, filp, arg); |
414 | break; | 415 | break; |
416 | case F_SETPIPE_SZ: | ||
417 | case F_GETPIPE_SZ: | ||
418 | err = pipe_fcntl(filp, cmd, arg); | ||
419 | break; | ||
415 | default: | 420 | default: |
416 | break; | 421 | break; |
417 | } | 422 | } |
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/init.h> | 12 | #include <linux/init.h> |
13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
14 | #include <linux/log2.h> | ||
14 | #include <linux/mount.h> | 15 | #include <linux/mount.h> |
15 | #include <linux/pipe_fs_i.h> | 16 | #include <linux/pipe_fs_i.h> |
16 | #include <linux/uio.h> | 17 | #include <linux/uio.h> |
@@ -390,7 +391,7 @@ redo: | |||
390 | if (!buf->len) { | 391 | if (!buf->len) { |
391 | buf->ops = NULL; | 392 | buf->ops = NULL; |
392 | ops->release(pipe, buf); | 393 | ops->release(pipe, buf); |
393 | curbuf = (curbuf + 1) & (PIPE_BUFFERS-1); | 394 | curbuf = (curbuf + 1) & (pipe->buffers - 1); |
394 | pipe->curbuf = curbuf; | 395 | pipe->curbuf = curbuf; |
395 | pipe->nrbufs = --bufs; | 396 | pipe->nrbufs = --bufs; |
396 | do_wakeup = 1; | 397 | do_wakeup = 1; |
@@ -472,7 +473,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov, | |||
472 | chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ | 473 | chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ |
473 | if (pipe->nrbufs && chars != 0) { | 474 | if (pipe->nrbufs && chars != 0) { |
474 | int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & | 475 | int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & |
475 | (PIPE_BUFFERS-1); | 476 | (pipe->buffers - 1); |
476 | struct pipe_buffer *buf = pipe->bufs + lastbuf; | 477 | struct pipe_buffer *buf = pipe->bufs + lastbuf; |
477 | const struct pipe_buf_operations *ops = buf->ops; | 478 | const struct pipe_buf_operations *ops = buf->ops; |
478 | int offset = buf->offset + buf->len; | 479 | int offset = buf->offset + buf->len; |
@@ -518,8 +519,8 @@ redo1: | |||
518 | break; | 519 | break; |
519 | } | 520 | } |
520 | bufs = pipe->nrbufs; | 521 | bufs = pipe->nrbufs; |
521 | if (bufs < PIPE_BUFFERS) { | 522 | if (bufs < pipe->buffers) { |
522 | int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); | 523 | int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1); |
523 | struct pipe_buffer *buf = pipe->bufs + newbuf; | 524 | struct pipe_buffer *buf = pipe->bufs + newbuf; |
524 | struct page *page = pipe->tmp_page; | 525 | struct page *page = pipe->tmp_page; |
525 | char *src; | 526 | char *src; |
@@ -580,7 +581,7 @@ redo2: | |||
580 | if (!total_len) | 581 | if (!total_len) |
581 | break; | 582 | break; |
582 | } | 583 | } |
583 | if (bufs < PIPE_BUFFERS) | 584 | if (bufs < pipe->buffers) |
584 | continue; | 585 | continue; |
585 | if (filp->f_flags & O_NONBLOCK) { | 586 | if (filp->f_flags & O_NONBLOCK) { |
586 | if (!ret) | 587 | if (!ret) |
@@ -640,7 +641,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
640 | nrbufs = pipe->nrbufs; | 641 | nrbufs = pipe->nrbufs; |
641 | while (--nrbufs >= 0) { | 642 | while (--nrbufs >= 0) { |
642 | count += pipe->bufs[buf].len; | 643 | count += pipe->bufs[buf].len; |
643 | buf = (buf+1) & (PIPE_BUFFERS-1); | 644 | buf = (buf+1) & (pipe->buffers - 1); |
644 | } | 645 | } |
645 | mutex_unlock(&inode->i_mutex); | 646 | mutex_unlock(&inode->i_mutex); |
646 | 647 | ||
@@ -671,7 +672,7 @@ pipe_poll(struct file *filp, poll_table *wait) | |||
671 | } | 672 | } |
672 | 673 | ||
673 | if (filp->f_mode & FMODE_WRITE) { | 674 | if (filp->f_mode & FMODE_WRITE) { |
674 | mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0; | 675 | mask |= (nrbufs < pipe->buffers) ? POLLOUT | POLLWRNORM : 0; |
675 | /* | 676 | /* |
676 | * Most Unices do not set POLLERR for FIFOs but on Linux they | 677 | * Most Unices do not set POLLERR for FIFOs but on Linux they |
677 | * behave exactly like pipes for poll(). | 678 | * behave exactly like pipes for poll(). |
@@ -877,25 +878,32 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode) | |||
877 | 878 | ||
878 | pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); | 879 | pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); |
879 | if (pipe) { | 880 | if (pipe) { |
880 | init_waitqueue_head(&pipe->wait); | 881 | pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL); |
881 | pipe->r_counter = pipe->w_counter = 1; | 882 | if (pipe->bufs) { |
882 | pipe->inode = inode; | 883 | init_waitqueue_head(&pipe->wait); |
884 | pipe->r_counter = pipe->w_counter = 1; | ||
885 | pipe->inode = inode; | ||
886 | pipe->buffers = PIPE_DEF_BUFFERS; | ||
887 | return pipe; | ||
888 | } | ||
889 | kfree(pipe); | ||
883 | } | 890 | } |
884 | 891 | ||
885 | return pipe; | 892 | return NULL; |
886 | } | 893 | } |
887 | 894 | ||
888 | void __free_pipe_info(struct pipe_inode_info *pipe) | 895 | void __free_pipe_info(struct pipe_inode_info *pipe) |
889 | { | 896 | { |
890 | int i; | 897 | int i; |
891 | 898 | ||
892 | for (i = 0; i < PIPE_BUFFERS; i++) { | 899 | for (i = 0; i < pipe->buffers; i++) { |
893 | struct pipe_buffer *buf = pipe->bufs + i; | 900 | struct pipe_buffer *buf = pipe->bufs + i; |
894 | if (buf->ops) | 901 | if (buf->ops) |
895 | buf->ops->release(pipe, buf); | 902 | buf->ops->release(pipe, buf); |
896 | } | 903 | } |
897 | if (pipe->tmp_page) | 904 | if (pipe->tmp_page) |
898 | __free_page(pipe->tmp_page); | 905 | __free_page(pipe->tmp_page); |
906 | kfree(pipe->bufs); | ||
899 | kfree(pipe); | 907 | kfree(pipe); |
900 | } | 908 | } |
901 | 909 | ||
@@ -1094,6 +1102,81 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes) | |||
1094 | } | 1102 | } |
1095 | 1103 | ||
1096 | /* | 1104 | /* |
1105 | * Allocate a new array of pipe buffers and copy the info over. Returns the | ||
1106 | * pipe size if successful, or return -ERROR on error. | ||
1107 | */ | ||
1108 | static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) | ||
1109 | { | ||
1110 | struct pipe_buffer *bufs; | ||
1111 | |||
1112 | /* | ||
1113 | * Must be a power-of-2 currently | ||
1114 | */ | ||
1115 | if (!is_power_of_2(arg)) | ||
1116 | return -EINVAL; | ||
1117 | |||
1118 | /* | ||
1119 | * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't | ||
1120 | * expect a lot of shrink+grow operations, just free and allocate | ||
1121 | * again like we would do for growing. If the pipe currently | ||
1122 | * contains more buffers than arg, then return busy. | ||
1123 | */ | ||
1124 | if (arg < pipe->nrbufs) | ||
1125 | return -EBUSY; | ||
1126 | |||
1127 | bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL); | ||
1128 | if (unlikely(!bufs)) | ||
1129 | return -ENOMEM; | ||
1130 | |||
1131 | /* | ||
1132 | * The pipe array wraps around, so just start the new one at zero | ||
1133 | * and adjust the indexes. | ||
1134 | */ | ||
1135 | if (pipe->nrbufs) { | ||
1136 | const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1); | ||
1137 | const unsigned int head = pipe->nrbufs - tail; | ||
1138 | |||
1139 | if (head) | ||
1140 | memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer)); | ||
1141 | if (tail) | ||
1142 | memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer)); | ||
1143 | } | ||
1144 | |||
1145 | pipe->curbuf = 0; | ||
1146 | kfree(pipe->bufs); | ||
1147 | pipe->bufs = bufs; | ||
1148 | pipe->buffers = arg; | ||
1149 | return arg; | ||
1150 | } | ||
1151 | |||
1152 | long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) | ||
1153 | { | ||
1154 | struct pipe_inode_info *pipe; | ||
1155 | long ret; | ||
1156 | |||
1157 | pipe = file->f_path.dentry->d_inode->i_pipe; | ||
1158 | if (!pipe) | ||
1159 | return -EBADF; | ||
1160 | |||
1161 | mutex_lock(&pipe->inode->i_mutex); | ||
1162 | |||
1163 | switch (cmd) { | ||
1164 | case F_SETPIPE_SZ: | ||
1165 | ret = pipe_set_size(pipe, arg); | ||
1166 | break; | ||
1167 | case F_GETPIPE_SZ: | ||
1168 | ret = pipe->buffers; | ||
1169 | break; | ||
1170 | default: | ||
1171 | ret = -EINVAL; | ||
1172 | break; | ||
1173 | } | ||
1174 | |||
1175 | mutex_unlock(&pipe->inode->i_mutex); | ||
1176 | return ret; | ||
1177 | } | ||
1178 | |||
1179 | /* | ||
1097 | * pipefs should _never_ be mounted by userland - too much of security hassle, | 1180 | * pipefs should _never_ be mounted by userland - too much of security hassle, |
1098 | * no real gain from having the whole whorehouse mounted. So we don't need | 1181 | * no real gain from having the whole whorehouse mounted. So we don't need |
1099 | * any operations on the root directory. However, we need a non-trivial | 1182 | * any operations on the root directory. However, we need a non-trivial |
diff --git a/fs/splice.c b/fs/splice.c index 9313b6124a2e..ac22b00d86c3 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -193,8 +193,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, | |||
193 | break; | 193 | break; |
194 | } | 194 | } |
195 | 195 | ||
196 | if (pipe->nrbufs < PIPE_BUFFERS) { | 196 | if (pipe->nrbufs < pipe->buffers) { |
197 | int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); | 197 | int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); |
198 | struct pipe_buffer *buf = pipe->bufs + newbuf; | 198 | struct pipe_buffer *buf = pipe->bufs + newbuf; |
199 | 199 | ||
200 | buf->page = spd->pages[page_nr]; | 200 | buf->page = spd->pages[page_nr]; |
@@ -214,7 +214,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, | |||
214 | 214 | ||
215 | if (!--spd->nr_pages) | 215 | if (!--spd->nr_pages) |
216 | break; | 216 | break; |
217 | if (pipe->nrbufs < PIPE_BUFFERS) | 217 | if (pipe->nrbufs < pipe->buffers) |
218 | continue; | 218 | continue; |
219 | 219 | ||
220 | break; | 220 | break; |
@@ -265,6 +265,36 @@ static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) | |||
265 | page_cache_release(spd->pages[i]); | 265 | page_cache_release(spd->pages[i]); |
266 | } | 266 | } |
267 | 267 | ||
268 | /* | ||
269 | * Check if we need to grow the arrays holding pages and partial page | ||
270 | * descriptions. | ||
271 | */ | ||
272 | int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) | ||
273 | { | ||
274 | if (pipe->buffers <= PIPE_DEF_BUFFERS) | ||
275 | return 0; | ||
276 | |||
277 | spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL); | ||
278 | spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL); | ||
279 | |||
280 | if (spd->pages && spd->partial) | ||
281 | return 0; | ||
282 | |||
283 | kfree(spd->pages); | ||
284 | kfree(spd->partial); | ||
285 | return -ENOMEM; | ||
286 | } | ||
287 | |||
288 | void splice_shrink_spd(struct pipe_inode_info *pipe, | ||
289 | struct splice_pipe_desc *spd) | ||
290 | { | ||
291 | if (pipe->buffers <= PIPE_DEF_BUFFERS) | ||
292 | return; | ||
293 | |||
294 | kfree(spd->pages); | ||
295 | kfree(spd->partial); | ||
296 | } | ||
297 | |||
268 | static int | 298 | static int |
269 | __generic_file_splice_read(struct file *in, loff_t *ppos, | 299 | __generic_file_splice_read(struct file *in, loff_t *ppos, |
270 | struct pipe_inode_info *pipe, size_t len, | 300 | struct pipe_inode_info *pipe, size_t len, |
@@ -272,8 +302,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
272 | { | 302 | { |
273 | struct address_space *mapping = in->f_mapping; | 303 | struct address_space *mapping = in->f_mapping; |
274 | unsigned int loff, nr_pages, req_pages; | 304 | unsigned int loff, nr_pages, req_pages; |
275 | struct page *pages[PIPE_BUFFERS]; | 305 | struct page *pages[PIPE_DEF_BUFFERS]; |
276 | struct partial_page partial[PIPE_BUFFERS]; | 306 | struct partial_page partial[PIPE_DEF_BUFFERS]; |
277 | struct page *page; | 307 | struct page *page; |
278 | pgoff_t index, end_index; | 308 | pgoff_t index, end_index; |
279 | loff_t isize; | 309 | loff_t isize; |
@@ -286,15 +316,18 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
286 | .spd_release = spd_release_page, | 316 | .spd_release = spd_release_page, |
287 | }; | 317 | }; |
288 | 318 | ||
319 | if (splice_grow_spd(pipe, &spd)) | ||
320 | return -ENOMEM; | ||
321 | |||
289 | index = *ppos >> PAGE_CACHE_SHIFT; | 322 | index = *ppos >> PAGE_CACHE_SHIFT; |
290 | loff = *ppos & ~PAGE_CACHE_MASK; | 323 | loff = *ppos & ~PAGE_CACHE_MASK; |
291 | req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 324 | req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
292 | nr_pages = min(req_pages, (unsigned)PIPE_BUFFERS); | 325 | nr_pages = min(req_pages, pipe->buffers); |
293 | 326 | ||
294 | /* | 327 | /* |
295 | * Lookup the (hopefully) full range of pages we need. | 328 | * Lookup the (hopefully) full range of pages we need. |
296 | */ | 329 | */ |
297 | spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages); | 330 | spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages); |
298 | index += spd.nr_pages; | 331 | index += spd.nr_pages; |
299 | 332 | ||
300 | /* | 333 | /* |
@@ -335,7 +368,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
335 | unlock_page(page); | 368 | unlock_page(page); |
336 | } | 369 | } |
337 | 370 | ||
338 | pages[spd.nr_pages++] = page; | 371 | spd.pages[spd.nr_pages++] = page; |
339 | index++; | 372 | index++; |
340 | } | 373 | } |
341 | 374 | ||
@@ -356,7 +389,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
356 | * this_len is the max we'll use from this page | 389 | * this_len is the max we'll use from this page |
357 | */ | 390 | */ |
358 | this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); | 391 | this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); |
359 | page = pages[page_nr]; | 392 | page = spd.pages[page_nr]; |
360 | 393 | ||
361 | if (PageReadahead(page)) | 394 | if (PageReadahead(page)) |
362 | page_cache_async_readahead(mapping, &in->f_ra, in, | 395 | page_cache_async_readahead(mapping, &in->f_ra, in, |
@@ -393,8 +426,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
393 | error = -ENOMEM; | 426 | error = -ENOMEM; |
394 | break; | 427 | break; |
395 | } | 428 | } |
396 | page_cache_release(pages[page_nr]); | 429 | page_cache_release(spd.pages[page_nr]); |
397 | pages[page_nr] = page; | 430 | spd.pages[page_nr] = page; |
398 | } | 431 | } |
399 | /* | 432 | /* |
400 | * page was already under io and is now done, great | 433 | * page was already under io and is now done, great |
@@ -451,8 +484,8 @@ fill_it: | |||
451 | len = this_len; | 484 | len = this_len; |
452 | } | 485 | } |
453 | 486 | ||
454 | partial[page_nr].offset = loff; | 487 | spd.partial[page_nr].offset = loff; |
455 | partial[page_nr].len = this_len; | 488 | spd.partial[page_nr].len = this_len; |
456 | len -= this_len; | 489 | len -= this_len; |
457 | loff = 0; | 490 | loff = 0; |
458 | spd.nr_pages++; | 491 | spd.nr_pages++; |
@@ -464,12 +497,13 @@ fill_it: | |||
464 | * we got, 'nr_pages' is how many pages are in the map. | 497 | * we got, 'nr_pages' is how many pages are in the map. |
465 | */ | 498 | */ |
466 | while (page_nr < nr_pages) | 499 | while (page_nr < nr_pages) |
467 | page_cache_release(pages[page_nr++]); | 500 | page_cache_release(spd.pages[page_nr++]); |
468 | in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; | 501 | in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; |
469 | 502 | ||
470 | if (spd.nr_pages) | 503 | if (spd.nr_pages) |
471 | return splice_to_pipe(pipe, &spd); | 504 | error = splice_to_pipe(pipe, &spd); |
472 | 505 | ||
506 | splice_shrink_spd(pipe, &spd); | ||
473 | return error; | 507 | return error; |
474 | } | 508 | } |
475 | 509 | ||
@@ -560,9 +594,9 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | |||
560 | unsigned int nr_pages; | 594 | unsigned int nr_pages; |
561 | unsigned int nr_freed; | 595 | unsigned int nr_freed; |
562 | size_t offset; | 596 | size_t offset; |
563 | struct page *pages[PIPE_BUFFERS]; | 597 | struct page *pages[PIPE_DEF_BUFFERS]; |
564 | struct partial_page partial[PIPE_BUFFERS]; | 598 | struct partial_page partial[PIPE_DEF_BUFFERS]; |
565 | struct iovec vec[PIPE_BUFFERS]; | 599 | struct iovec *vec, __vec[PIPE_DEF_BUFFERS]; |
566 | pgoff_t index; | 600 | pgoff_t index; |
567 | ssize_t res; | 601 | ssize_t res; |
568 | size_t this_len; | 602 | size_t this_len; |
@@ -576,11 +610,22 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | |||
576 | .spd_release = spd_release_page, | 610 | .spd_release = spd_release_page, |
577 | }; | 611 | }; |
578 | 612 | ||
613 | if (splice_grow_spd(pipe, &spd)) | ||
614 | return -ENOMEM; | ||
615 | |||
616 | res = -ENOMEM; | ||
617 | vec = __vec; | ||
618 | if (pipe->buffers > PIPE_DEF_BUFFERS) { | ||
619 | vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL); | ||
620 | if (!vec) | ||
621 | goto shrink_ret; | ||
622 | } | ||
623 | |||
579 | index = *ppos >> PAGE_CACHE_SHIFT; | 624 | index = *ppos >> PAGE_CACHE_SHIFT; |
580 | offset = *ppos & ~PAGE_CACHE_MASK; | 625 | offset = *ppos & ~PAGE_CACHE_MASK; |
581 | nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 626 | nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
582 | 627 | ||
583 | for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) { | 628 | for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) { |
584 | struct page *page; | 629 | struct page *page; |
585 | 630 | ||
586 | page = alloc_page(GFP_USER); | 631 | page = alloc_page(GFP_USER); |
@@ -591,7 +636,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | |||
591 | this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset); | 636 | this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset); |
592 | vec[i].iov_base = (void __user *) page_address(page); | 637 | vec[i].iov_base = (void __user *) page_address(page); |
593 | vec[i].iov_len = this_len; | 638 | vec[i].iov_len = this_len; |
594 | pages[i] = page; | 639 | spd.pages[i] = page; |
595 | spd.nr_pages++; | 640 | spd.nr_pages++; |
596 | len -= this_len; | 641 | len -= this_len; |
597 | offset = 0; | 642 | offset = 0; |
@@ -610,11 +655,11 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | |||
610 | nr_freed = 0; | 655 | nr_freed = 0; |
611 | for (i = 0; i < spd.nr_pages; i++) { | 656 | for (i = 0; i < spd.nr_pages; i++) { |
612 | this_len = min_t(size_t, vec[i].iov_len, res); | 657 | this_len = min_t(size_t, vec[i].iov_len, res); |
613 | partial[i].offset = 0; | 658 | spd.partial[i].offset = 0; |
614 | partial[i].len = this_len; | 659 | spd.partial[i].len = this_len; |
615 | if (!this_len) { | 660 | if (!this_len) { |
616 | __free_page(pages[i]); | 661 | __free_page(spd.pages[i]); |
617 | pages[i] = NULL; | 662 | spd.pages[i] = NULL; |
618 | nr_freed++; | 663 | nr_freed++; |
619 | } | 664 | } |
620 | res -= this_len; | 665 | res -= this_len; |
@@ -625,13 +670,18 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | |||
625 | if (res > 0) | 670 | if (res > 0) |
626 | *ppos += res; | 671 | *ppos += res; |
627 | 672 | ||
673 | shrink_ret: | ||
674 | if (vec != __vec) | ||
675 | kfree(vec); | ||
676 | splice_shrink_spd(pipe, &spd); | ||
628 | return res; | 677 | return res; |
629 | 678 | ||
630 | err: | 679 | err: |
631 | for (i = 0; i < spd.nr_pages; i++) | 680 | for (i = 0; i < spd.nr_pages; i++) |
632 | __free_page(pages[i]); | 681 | __free_page(spd.pages[i]); |
633 | 682 | ||
634 | return error; | 683 | res = error; |
684 | goto shrink_ret; | ||
635 | } | 685 | } |
636 | EXPORT_SYMBOL(default_file_splice_read); | 686 | EXPORT_SYMBOL(default_file_splice_read); |
637 | 687 | ||
@@ -784,7 +834,7 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, | |||
784 | if (!buf->len) { | 834 | if (!buf->len) { |
785 | buf->ops = NULL; | 835 | buf->ops = NULL; |
786 | ops->release(pipe, buf); | 836 | ops->release(pipe, buf); |
787 | pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); | 837 | pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); |
788 | pipe->nrbufs--; | 838 | pipe->nrbufs--; |
789 | if (pipe->inode) | 839 | if (pipe->inode) |
790 | sd->need_wakeup = true; | 840 | sd->need_wakeup = true; |
@@ -1211,7 +1261,7 @@ out_release: | |||
1211 | * If we did an incomplete transfer we must release | 1261 | * If we did an incomplete transfer we must release |
1212 | * the pipe buffers in question: | 1262 | * the pipe buffers in question: |
1213 | */ | 1263 | */ |
1214 | for (i = 0; i < PIPE_BUFFERS; i++) { | 1264 | for (i = 0; i < pipe->buffers; i++) { |
1215 | struct pipe_buffer *buf = pipe->bufs + i; | 1265 | struct pipe_buffer *buf = pipe->bufs + i; |
1216 | 1266 | ||
1217 | if (buf->ops) { | 1267 | if (buf->ops) { |
@@ -1371,7 +1421,8 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1371 | */ | 1421 | */ |
1372 | static int get_iovec_page_array(const struct iovec __user *iov, | 1422 | static int get_iovec_page_array(const struct iovec __user *iov, |
1373 | unsigned int nr_vecs, struct page **pages, | 1423 | unsigned int nr_vecs, struct page **pages, |
1374 | struct partial_page *partial, int aligned) | 1424 | struct partial_page *partial, int aligned, |
1425 | unsigned int pipe_buffers) | ||
1375 | { | 1426 | { |
1376 | int buffers = 0, error = 0; | 1427 | int buffers = 0, error = 0; |
1377 | 1428 | ||
@@ -1414,8 +1465,8 @@ static int get_iovec_page_array(const struct iovec __user *iov, | |||
1414 | break; | 1465 | break; |
1415 | 1466 | ||
1416 | npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 1467 | npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
1417 | if (npages > PIPE_BUFFERS - buffers) | 1468 | if (npages > pipe_buffers - buffers) |
1418 | npages = PIPE_BUFFERS - buffers; | 1469 | npages = pipe_buffers - buffers; |
1419 | 1470 | ||
1420 | error = get_user_pages_fast((unsigned long)base, npages, | 1471 | error = get_user_pages_fast((unsigned long)base, npages, |
1421 | 0, &pages[buffers]); | 1472 | 0, &pages[buffers]); |
@@ -1450,7 +1501,7 @@ static int get_iovec_page_array(const struct iovec __user *iov, | |||
1450 | * or if we mapped the max number of pages that we have | 1501 | * or if we mapped the max number of pages that we have |
1451 | * room for. | 1502 | * room for. |
1452 | */ | 1503 | */ |
1453 | if (error < npages || buffers == PIPE_BUFFERS) | 1504 | if (error < npages || buffers == pipe_buffers) |
1454 | break; | 1505 | break; |
1455 | 1506 | ||
1456 | nr_vecs--; | 1507 | nr_vecs--; |
@@ -1593,8 +1644,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, | |||
1593 | unsigned long nr_segs, unsigned int flags) | 1644 | unsigned long nr_segs, unsigned int flags) |
1594 | { | 1645 | { |
1595 | struct pipe_inode_info *pipe; | 1646 | struct pipe_inode_info *pipe; |
1596 | struct page *pages[PIPE_BUFFERS]; | 1647 | struct page *pages[PIPE_DEF_BUFFERS]; |
1597 | struct partial_page partial[PIPE_BUFFERS]; | 1648 | struct partial_page partial[PIPE_DEF_BUFFERS]; |
1598 | struct splice_pipe_desc spd = { | 1649 | struct splice_pipe_desc spd = { |
1599 | .pages = pages, | 1650 | .pages = pages, |
1600 | .partial = partial, | 1651 | .partial = partial, |
@@ -1602,17 +1653,25 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, | |||
1602 | .ops = &user_page_pipe_buf_ops, | 1653 | .ops = &user_page_pipe_buf_ops, |
1603 | .spd_release = spd_release_page, | 1654 | .spd_release = spd_release_page, |
1604 | }; | 1655 | }; |
1656 | long ret; | ||
1605 | 1657 | ||
1606 | pipe = pipe_info(file->f_path.dentry->d_inode); | 1658 | pipe = pipe_info(file->f_path.dentry->d_inode); |
1607 | if (!pipe) | 1659 | if (!pipe) |
1608 | return -EBADF; | 1660 | return -EBADF; |
1609 | 1661 | ||
1610 | spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, | 1662 | if (splice_grow_spd(pipe, &spd)) |
1611 | flags & SPLICE_F_GIFT); | 1663 | return -ENOMEM; |
1664 | |||
1665 | spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages, | ||
1666 | spd.partial, flags & SPLICE_F_GIFT, | ||
1667 | pipe->buffers); | ||
1612 | if (spd.nr_pages <= 0) | 1668 | if (spd.nr_pages <= 0) |
1613 | return spd.nr_pages; | 1669 | ret = spd.nr_pages; |
1670 | else | ||
1671 | ret = splice_to_pipe(pipe, &spd); | ||
1614 | 1672 | ||
1615 | return splice_to_pipe(pipe, &spd); | 1673 | splice_shrink_spd(pipe, &spd); |
1674 | return ret; | ||
1616 | } | 1675 | } |
1617 | 1676 | ||
1618 | /* | 1677 | /* |
@@ -1738,13 +1797,13 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | |||
1738 | * Check ->nrbufs without the inode lock first. This function | 1797 | * Check ->nrbufs without the inode lock first. This function |
1739 | * is speculative anyways, so missing one is ok. | 1798 | * is speculative anyways, so missing one is ok. |
1740 | */ | 1799 | */ |
1741 | if (pipe->nrbufs < PIPE_BUFFERS) | 1800 | if (pipe->nrbufs < pipe->buffers) |
1742 | return 0; | 1801 | return 0; |
1743 | 1802 | ||
1744 | ret = 0; | 1803 | ret = 0; |
1745 | pipe_lock(pipe); | 1804 | pipe_lock(pipe); |
1746 | 1805 | ||
1747 | while (pipe->nrbufs >= PIPE_BUFFERS) { | 1806 | while (pipe->nrbufs >= pipe->buffers) { |
1748 | if (!pipe->readers) { | 1807 | if (!pipe->readers) { |
1749 | send_sig(SIGPIPE, current, 0); | 1808 | send_sig(SIGPIPE, current, 0); |
1750 | ret = -EPIPE; | 1809 | ret = -EPIPE; |
@@ -1810,7 +1869,7 @@ retry: | |||
1810 | * Cannot make any progress, because either the input | 1869 | * Cannot make any progress, because either the input |
1811 | * pipe is empty or the output pipe is full. | 1870 | * pipe is empty or the output pipe is full. |
1812 | */ | 1871 | */ |
1813 | if (!ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) { | 1872 | if (!ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) { |
1814 | /* Already processed some buffers, break */ | 1873 | /* Already processed some buffers, break */ |
1815 | if (ret) | 1874 | if (ret) |
1816 | break; | 1875 | break; |
@@ -1831,7 +1890,7 @@ retry: | |||
1831 | } | 1890 | } |
1832 | 1891 | ||
1833 | ibuf = ipipe->bufs + ipipe->curbuf; | 1892 | ibuf = ipipe->bufs + ipipe->curbuf; |
1834 | nbuf = (opipe->curbuf + opipe->nrbufs) % PIPE_BUFFERS; | 1893 | nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1); |
1835 | obuf = opipe->bufs + nbuf; | 1894 | obuf = opipe->bufs + nbuf; |
1836 | 1895 | ||
1837 | if (len >= ibuf->len) { | 1896 | if (len >= ibuf->len) { |
@@ -1841,7 +1900,7 @@ retry: | |||
1841 | *obuf = *ibuf; | 1900 | *obuf = *ibuf; |
1842 | ibuf->ops = NULL; | 1901 | ibuf->ops = NULL; |
1843 | opipe->nrbufs++; | 1902 | opipe->nrbufs++; |
1844 | ipipe->curbuf = (ipipe->curbuf + 1) % PIPE_BUFFERS; | 1903 | ipipe->curbuf = (ipipe->curbuf + 1) & (ipipe->buffers - 1); |
1845 | ipipe->nrbufs--; | 1904 | ipipe->nrbufs--; |
1846 | input_wakeup = true; | 1905 | input_wakeup = true; |
1847 | } else { | 1906 | } else { |
@@ -1914,11 +1973,11 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
1914 | * If we have iterated all input buffers or ran out of | 1973 | * If we have iterated all input buffers or ran out of |
1915 | * output room, break. | 1974 | * output room, break. |
1916 | */ | 1975 | */ |
1917 | if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) | 1976 | if (i >= ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) |
1918 | break; | 1977 | break; |
1919 | 1978 | ||
1920 | ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); | 1979 | ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (ipipe->buffers-1)); |
1921 | nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); | 1980 | nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1); |
1922 | 1981 | ||
1923 | /* | 1982 | /* |
1924 | * Get a reference to this pipe buffer, | 1983 | * Get a reference to this pipe buffer, |
diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h index 86037400a6e3..afc00af3229b 100644 --- a/include/linux/fcntl.h +++ b/include/linux/fcntl.h | |||
@@ -22,6 +22,12 @@ | |||
22 | #define F_NOTIFY (F_LINUX_SPECIFIC_BASE+2) | 22 | #define F_NOTIFY (F_LINUX_SPECIFIC_BASE+2) |
23 | 23 | ||
24 | /* | 24 | /* |
25 | * Set and get of pipe page size array | ||
26 | */ | ||
27 | #define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) | ||
28 | #define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8) | ||
29 | |||
30 | /* | ||
25 | * Types of directory notifications that may be requested. | 31 | * Types of directory notifications that may be requested. |
26 | */ | 32 | */ |
27 | #define DN_ACCESS 0x00000001 /* File accessed */ | 33 | #define DN_ACCESS 0x00000001 /* File accessed */ |
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index b43a9e039059..65f4282fcbaf 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h | |||
@@ -3,7 +3,7 @@ | |||
3 | 3 | ||
4 | #define PIPEFS_MAGIC 0x50495045 | 4 | #define PIPEFS_MAGIC 0x50495045 |
5 | 5 | ||
6 | #define PIPE_BUFFERS (16) | 6 | #define PIPE_DEF_BUFFERS 16 |
7 | 7 | ||
8 | #define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ | 8 | #define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ |
9 | #define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */ | 9 | #define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */ |
@@ -44,17 +44,17 @@ struct pipe_buffer { | |||
44 | **/ | 44 | **/ |
45 | struct pipe_inode_info { | 45 | struct pipe_inode_info { |
46 | wait_queue_head_t wait; | 46 | wait_queue_head_t wait; |
47 | unsigned int nrbufs, curbuf; | 47 | unsigned int nrbufs, curbuf, buffers; |
48 | struct page *tmp_page; | ||
49 | unsigned int readers; | 48 | unsigned int readers; |
50 | unsigned int writers; | 49 | unsigned int writers; |
51 | unsigned int waiting_writers; | 50 | unsigned int waiting_writers; |
52 | unsigned int r_counter; | 51 | unsigned int r_counter; |
53 | unsigned int w_counter; | 52 | unsigned int w_counter; |
53 | struct page *tmp_page; | ||
54 | struct fasync_struct *fasync_readers; | 54 | struct fasync_struct *fasync_readers; |
55 | struct fasync_struct *fasync_writers; | 55 | struct fasync_struct *fasync_writers; |
56 | struct inode *inode; | 56 | struct inode *inode; |
57 | struct pipe_buffer bufs[PIPE_BUFFERS]; | 57 | struct pipe_buffer *bufs; |
58 | }; | 58 | }; |
59 | 59 | ||
60 | /* | 60 | /* |
@@ -154,4 +154,7 @@ int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); | |||
154 | int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); | 154 | int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); |
155 | void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); | 155 | void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); |
156 | 156 | ||
157 | /* for F_SETPIPE_SZ and F_GETPIPE_SZ */ | ||
158 | long pipe_fcntl(struct file *, unsigned int, unsigned long arg); | ||
159 | |||
157 | #endif | 160 | #endif |
diff --git a/include/linux/splice.h b/include/linux/splice.h index 18e7c7c0cae6..997c3b4c212b 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h | |||
@@ -82,4 +82,11 @@ extern ssize_t splice_to_pipe(struct pipe_inode_info *, | |||
82 | extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, | 82 | extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, |
83 | splice_direct_actor *); | 83 | splice_direct_actor *); |
84 | 84 | ||
85 | /* | ||
86 | * for dynamic pipe sizing | ||
87 | */ | ||
88 | extern int splice_grow_spd(struct pipe_inode_info *, struct splice_pipe_desc *); | ||
89 | extern void splice_shrink_spd(struct pipe_inode_info *, | ||
90 | struct splice_pipe_desc *); | ||
91 | |||
85 | #endif | 92 | #endif |
diff --git a/kernel/relay.c b/kernel/relay.c index 3d97f2821611..4268287148c1 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
@@ -1231,8 +1231,8 @@ static ssize_t subbuf_splice_actor(struct file *in, | |||
1231 | size_t read_subbuf = read_start / subbuf_size; | 1231 | size_t read_subbuf = read_start / subbuf_size; |
1232 | size_t padding = rbuf->padding[read_subbuf]; | 1232 | size_t padding = rbuf->padding[read_subbuf]; |
1233 | size_t nonpad_end = read_subbuf * subbuf_size + subbuf_size - padding; | 1233 | size_t nonpad_end = read_subbuf * subbuf_size + subbuf_size - padding; |
1234 | struct page *pages[PIPE_BUFFERS]; | 1234 | struct page *pages[PIPE_DEF_BUFFERS]; |
1235 | struct partial_page partial[PIPE_BUFFERS]; | 1235 | struct partial_page partial[PIPE_DEF_BUFFERS]; |
1236 | struct splice_pipe_desc spd = { | 1236 | struct splice_pipe_desc spd = { |
1237 | .pages = pages, | 1237 | .pages = pages, |
1238 | .nr_pages = 0, | 1238 | .nr_pages = 0, |
@@ -1245,6 +1245,8 @@ static ssize_t subbuf_splice_actor(struct file *in, | |||
1245 | 1245 | ||
1246 | if (rbuf->subbufs_produced == rbuf->subbufs_consumed) | 1246 | if (rbuf->subbufs_produced == rbuf->subbufs_consumed) |
1247 | return 0; | 1247 | return 0; |
1248 | if (splice_grow_spd(pipe, &spd)) | ||
1249 | return -ENOMEM; | ||
1248 | 1250 | ||
1249 | /* | 1251 | /* |
1250 | * Adjust read len, if longer than what is available | 1252 | * Adjust read len, if longer than what is available |
@@ -1255,7 +1257,7 @@ static ssize_t subbuf_splice_actor(struct file *in, | |||
1255 | subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT; | 1257 | subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT; |
1256 | pidx = (read_start / PAGE_SIZE) % subbuf_pages; | 1258 | pidx = (read_start / PAGE_SIZE) % subbuf_pages; |
1257 | poff = read_start & ~PAGE_MASK; | 1259 | poff = read_start & ~PAGE_MASK; |
1258 | nr_pages = min_t(unsigned int, subbuf_pages, PIPE_BUFFERS); | 1260 | nr_pages = min_t(unsigned int, subbuf_pages, pipe->buffers); |
1259 | 1261 | ||
1260 | for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) { | 1262 | for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) { |
1261 | unsigned int this_len, this_end, private; | 1263 | unsigned int this_len, this_end, private; |
@@ -1289,16 +1291,19 @@ static ssize_t subbuf_splice_actor(struct file *in, | |||
1289 | } | 1291 | } |
1290 | } | 1292 | } |
1291 | 1293 | ||
1294 | ret = 0; | ||
1292 | if (!spd.nr_pages) | 1295 | if (!spd.nr_pages) |
1293 | return 0; | 1296 | goto out; |
1294 | 1297 | ||
1295 | ret = *nonpad_ret = splice_to_pipe(pipe, &spd); | 1298 | ret = *nonpad_ret = splice_to_pipe(pipe, &spd); |
1296 | if (ret < 0 || ret < total_len) | 1299 | if (ret < 0 || ret < total_len) |
1297 | return ret; | 1300 | goto out; |
1298 | 1301 | ||
1299 | if (read_start + ret == nonpad_end) | 1302 | if (read_start + ret == nonpad_end) |
1300 | ret += padding; | 1303 | ret += padding; |
1301 | 1304 | ||
1305 | out: | ||
1306 | splice_shrink_spd(pipe, &spd); | ||
1302 | return ret; | 1307 | return ret; |
1303 | } | 1308 | } |
1304 | 1309 | ||
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 44f916a04065..7b155a0e6f31 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -3269,12 +3269,12 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
3269 | size_t len, | 3269 | size_t len, |
3270 | unsigned int flags) | 3270 | unsigned int flags) |
3271 | { | 3271 | { |
3272 | struct page *pages[PIPE_BUFFERS]; | 3272 | struct page *pages_def[PIPE_DEF_BUFFERS]; |
3273 | struct partial_page partial[PIPE_BUFFERS]; | 3273 | struct partial_page partial_def[PIPE_DEF_BUFFERS]; |
3274 | struct trace_iterator *iter = filp->private_data; | 3274 | struct trace_iterator *iter = filp->private_data; |
3275 | struct splice_pipe_desc spd = { | 3275 | struct splice_pipe_desc spd = { |
3276 | .pages = pages, | 3276 | .pages = pages_def, |
3277 | .partial = partial, | 3277 | .partial = partial_def, |
3278 | .nr_pages = 0, /* This gets updated below. */ | 3278 | .nr_pages = 0, /* This gets updated below. */ |
3279 | .flags = flags, | 3279 | .flags = flags, |
3280 | .ops = &tracing_pipe_buf_ops, | 3280 | .ops = &tracing_pipe_buf_ops, |
@@ -3285,6 +3285,9 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
3285 | size_t rem; | 3285 | size_t rem; |
3286 | unsigned int i; | 3286 | unsigned int i; |
3287 | 3287 | ||
3288 | if (splice_grow_spd(pipe, &spd)) | ||
3289 | return -ENOMEM; | ||
3290 | |||
3288 | /* copy the tracer to avoid using a global lock all around */ | 3291 | /* copy the tracer to avoid using a global lock all around */ |
3289 | mutex_lock(&trace_types_lock); | 3292 | mutex_lock(&trace_types_lock); |
3290 | if (unlikely(old_tracer != current_trace && current_trace)) { | 3293 | if (unlikely(old_tracer != current_trace && current_trace)) { |
@@ -3315,23 +3318,23 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
3315 | trace_access_lock(iter->cpu_file); | 3318 | trace_access_lock(iter->cpu_file); |
3316 | 3319 | ||
3317 | /* Fill as many pages as possible. */ | 3320 | /* Fill as many pages as possible. */ |
3318 | for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { | 3321 | for (i = 0, rem = len; i < pipe->buffers && rem; i++) { |
3319 | pages[i] = alloc_page(GFP_KERNEL); | 3322 | spd.pages[i] = alloc_page(GFP_KERNEL); |
3320 | if (!pages[i]) | 3323 | if (!spd.pages[i]) |
3321 | break; | 3324 | break; |
3322 | 3325 | ||
3323 | rem = tracing_fill_pipe_page(rem, iter); | 3326 | rem = tracing_fill_pipe_page(rem, iter); |
3324 | 3327 | ||
3325 | /* Copy the data into the page, so we can start over. */ | 3328 | /* Copy the data into the page, so we can start over. */ |
3326 | ret = trace_seq_to_buffer(&iter->seq, | 3329 | ret = trace_seq_to_buffer(&iter->seq, |
3327 | page_address(pages[i]), | 3330 | page_address(spd.pages[i]), |
3328 | iter->seq.len); | 3331 | iter->seq.len); |
3329 | if (ret < 0) { | 3332 | if (ret < 0) { |
3330 | __free_page(pages[i]); | 3333 | __free_page(spd.pages[i]); |
3331 | break; | 3334 | break; |
3332 | } | 3335 | } |
3333 | partial[i].offset = 0; | 3336 | spd.partial[i].offset = 0; |
3334 | partial[i].len = iter->seq.len; | 3337 | spd.partial[i].len = iter->seq.len; |
3335 | 3338 | ||
3336 | trace_seq_init(&iter->seq); | 3339 | trace_seq_init(&iter->seq); |
3337 | } | 3340 | } |
@@ -3342,12 +3345,14 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
3342 | 3345 | ||
3343 | spd.nr_pages = i; | 3346 | spd.nr_pages = i; |
3344 | 3347 | ||
3345 | return splice_to_pipe(pipe, &spd); | 3348 | ret = splice_to_pipe(pipe, &spd); |
3349 | out: | ||
3350 | splice_shrink_spd(pipe, &spd); | ||
3351 | return ret; | ||
3346 | 3352 | ||
3347 | out_err: | 3353 | out_err: |
3348 | mutex_unlock(&iter->mutex); | 3354 | mutex_unlock(&iter->mutex); |
3349 | 3355 | goto out; | |
3350 | return ret; | ||
3351 | } | 3356 | } |
3352 | 3357 | ||
3353 | static ssize_t | 3358 | static ssize_t |
@@ -3746,11 +3751,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
3746 | unsigned int flags) | 3751 | unsigned int flags) |
3747 | { | 3752 | { |
3748 | struct ftrace_buffer_info *info = file->private_data; | 3753 | struct ftrace_buffer_info *info = file->private_data; |
3749 | struct partial_page partial[PIPE_BUFFERS]; | 3754 | struct partial_page partial_def[PIPE_DEF_BUFFERS]; |
3750 | struct page *pages[PIPE_BUFFERS]; | 3755 | struct page *pages_def[PIPE_DEF_BUFFERS]; |
3751 | struct splice_pipe_desc spd = { | 3756 | struct splice_pipe_desc spd = { |
3752 | .pages = pages, | 3757 | .pages = pages_def, |
3753 | .partial = partial, | 3758 | .partial = partial_def, |
3754 | .flags = flags, | 3759 | .flags = flags, |
3755 | .ops = &buffer_pipe_buf_ops, | 3760 | .ops = &buffer_pipe_buf_ops, |
3756 | .spd_release = buffer_spd_release, | 3761 | .spd_release = buffer_spd_release, |
@@ -3759,22 +3764,28 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
3759 | int entries, size, i; | 3764 | int entries, size, i; |
3760 | size_t ret; | 3765 | size_t ret; |
3761 | 3766 | ||
3767 | if (splice_grow_spd(pipe, &spd)) | ||
3768 | return -ENOMEM; | ||
3769 | |||
3762 | if (*ppos & (PAGE_SIZE - 1)) { | 3770 | if (*ppos & (PAGE_SIZE - 1)) { |
3763 | WARN_ONCE(1, "Ftrace: previous read must page-align\n"); | 3771 | WARN_ONCE(1, "Ftrace: previous read must page-align\n"); |
3764 | return -EINVAL; | 3772 | ret = -EINVAL; |
3773 | goto out; | ||
3765 | } | 3774 | } |
3766 | 3775 | ||
3767 | if (len & (PAGE_SIZE - 1)) { | 3776 | if (len & (PAGE_SIZE - 1)) { |
3768 | WARN_ONCE(1, "Ftrace: splice_read should page-align\n"); | 3777 | WARN_ONCE(1, "Ftrace: splice_read should page-align\n"); |
3769 | if (len < PAGE_SIZE) | 3778 | if (len < PAGE_SIZE) { |
3770 | return -EINVAL; | 3779 | ret = -EINVAL; |
3780 | goto out; | ||
3781 | } | ||
3771 | len &= PAGE_MASK; | 3782 | len &= PAGE_MASK; |
3772 | } | 3783 | } |
3773 | 3784 | ||
3774 | trace_access_lock(info->cpu); | 3785 | trace_access_lock(info->cpu); |
3775 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); | 3786 | entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); |
3776 | 3787 | ||
3777 | for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { | 3788 | for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) { |
3778 | struct page *page; | 3789 | struct page *page; |
3779 | int r; | 3790 | int r; |
3780 | 3791 | ||
@@ -3829,11 +3840,12 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
3829 | else | 3840 | else |
3830 | ret = 0; | 3841 | ret = 0; |
3831 | /* TODO: block */ | 3842 | /* TODO: block */ |
3832 | return ret; | 3843 | goto out; |
3833 | } | 3844 | } |
3834 | 3845 | ||
3835 | ret = splice_to_pipe(pipe, &spd); | 3846 | ret = splice_to_pipe(pipe, &spd); |
3836 | 3847 | splice_shrink_spd(pipe, &spd); | |
3848 | out: | ||
3837 | return ret; | 3849 | return ret; |
3838 | } | 3850 | } |
3839 | 3851 | ||
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 93c4e060c91e..931981774b1a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -1417,12 +1417,13 @@ new_page: | |||
1417 | /* | 1417 | /* |
1418 | * Fill page/offset/length into spd, if it can hold more pages. | 1418 | * Fill page/offset/length into spd, if it can hold more pages. |
1419 | */ | 1419 | */ |
1420 | static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, | 1420 | static inline int spd_fill_page(struct splice_pipe_desc *spd, |
1421 | struct pipe_inode_info *pipe, struct page *page, | ||
1421 | unsigned int *len, unsigned int offset, | 1422 | unsigned int *len, unsigned int offset, |
1422 | struct sk_buff *skb, int linear, | 1423 | struct sk_buff *skb, int linear, |
1423 | struct sock *sk) | 1424 | struct sock *sk) |
1424 | { | 1425 | { |
1425 | if (unlikely(spd->nr_pages == PIPE_BUFFERS)) | 1426 | if (unlikely(spd->nr_pages == pipe->buffers)) |
1426 | return 1; | 1427 | return 1; |
1427 | 1428 | ||
1428 | if (linear) { | 1429 | if (linear) { |
@@ -1458,7 +1459,8 @@ static inline int __splice_segment(struct page *page, unsigned int poff, | |||
1458 | unsigned int plen, unsigned int *off, | 1459 | unsigned int plen, unsigned int *off, |
1459 | unsigned int *len, struct sk_buff *skb, | 1460 | unsigned int *len, struct sk_buff *skb, |
1460 | struct splice_pipe_desc *spd, int linear, | 1461 | struct splice_pipe_desc *spd, int linear, |
1461 | struct sock *sk) | 1462 | struct sock *sk, |
1463 | struct pipe_inode_info *pipe) | ||
1462 | { | 1464 | { |
1463 | if (!*len) | 1465 | if (!*len) |
1464 | return 1; | 1466 | return 1; |
@@ -1481,7 +1483,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff, | |||
1481 | /* the linear region may spread across several pages */ | 1483 | /* the linear region may spread across several pages */ |
1482 | flen = min_t(unsigned int, flen, PAGE_SIZE - poff); | 1484 | flen = min_t(unsigned int, flen, PAGE_SIZE - poff); |
1483 | 1485 | ||
1484 | if (spd_fill_page(spd, page, &flen, poff, skb, linear, sk)) | 1486 | if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk)) |
1485 | return 1; | 1487 | return 1; |
1486 | 1488 | ||
1487 | __segment_seek(&page, &poff, &plen, flen); | 1489 | __segment_seek(&page, &poff, &plen, flen); |
@@ -1496,9 +1498,9 @@ static inline int __splice_segment(struct page *page, unsigned int poff, | |||
1496 | * Map linear and fragment data from the skb to spd. It reports failure if the | 1498 | * Map linear and fragment data from the skb to spd. It reports failure if the |
1497 | * pipe is full or if we already spliced the requested length. | 1499 | * pipe is full or if we already spliced the requested length. |
1498 | */ | 1500 | */ |
1499 | static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, | 1501 | static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, |
1500 | unsigned int *len, struct splice_pipe_desc *spd, | 1502 | unsigned int *offset, unsigned int *len, |
1501 | struct sock *sk) | 1503 | struct splice_pipe_desc *spd, struct sock *sk) |
1502 | { | 1504 | { |
1503 | int seg; | 1505 | int seg; |
1504 | 1506 | ||
@@ -1508,7 +1510,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, | |||
1508 | if (__splice_segment(virt_to_page(skb->data), | 1510 | if (__splice_segment(virt_to_page(skb->data), |
1509 | (unsigned long) skb->data & (PAGE_SIZE - 1), | 1511 | (unsigned long) skb->data & (PAGE_SIZE - 1), |
1510 | skb_headlen(skb), | 1512 | skb_headlen(skb), |
1511 | offset, len, skb, spd, 1, sk)) | 1513 | offset, len, skb, spd, 1, sk, pipe)) |
1512 | return 1; | 1514 | return 1; |
1513 | 1515 | ||
1514 | /* | 1516 | /* |
@@ -1518,7 +1520,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, | |||
1518 | const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; | 1520 | const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; |
1519 | 1521 | ||
1520 | if (__splice_segment(f->page, f->page_offset, f->size, | 1522 | if (__splice_segment(f->page, f->page_offset, f->size, |
1521 | offset, len, skb, spd, 0, sk)) | 1523 | offset, len, skb, spd, 0, sk, pipe)) |
1522 | return 1; | 1524 | return 1; |
1523 | } | 1525 | } |
1524 | 1526 | ||
@@ -1535,8 +1537,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, | |||
1535 | struct pipe_inode_info *pipe, unsigned int tlen, | 1537 | struct pipe_inode_info *pipe, unsigned int tlen, |
1536 | unsigned int flags) | 1538 | unsigned int flags) |
1537 | { | 1539 | { |
1538 | struct partial_page partial[PIPE_BUFFERS]; | 1540 | struct partial_page partial[PIPE_DEF_BUFFERS]; |
1539 | struct page *pages[PIPE_BUFFERS]; | 1541 | struct page *pages[PIPE_DEF_BUFFERS]; |
1540 | struct splice_pipe_desc spd = { | 1542 | struct splice_pipe_desc spd = { |
1541 | .pages = pages, | 1543 | .pages = pages, |
1542 | .partial = partial, | 1544 | .partial = partial, |
@@ -1546,12 +1548,16 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, | |||
1546 | }; | 1548 | }; |
1547 | struct sk_buff *frag_iter; | 1549 | struct sk_buff *frag_iter; |
1548 | struct sock *sk = skb->sk; | 1550 | struct sock *sk = skb->sk; |
1551 | int ret = 0; | ||
1552 | |||
1553 | if (splice_grow_spd(pipe, &spd)) | ||
1554 | return -ENOMEM; | ||
1549 | 1555 | ||
1550 | /* | 1556 | /* |
1551 | * __skb_splice_bits() only fails if the output has no room left, | 1557 | * __skb_splice_bits() only fails if the output has no room left, |
1552 | * so no point in going over the frag_list for the error case. | 1558 | * so no point in going over the frag_list for the error case. |
1553 | */ | 1559 | */ |
1554 | if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk)) | 1560 | if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk)) |
1555 | goto done; | 1561 | goto done; |
1556 | else if (!tlen) | 1562 | else if (!tlen) |
1557 | goto done; | 1563 | goto done; |
@@ -1562,14 +1568,12 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, | |||
1562 | skb_walk_frags(skb, frag_iter) { | 1568 | skb_walk_frags(skb, frag_iter) { |
1563 | if (!tlen) | 1569 | if (!tlen) |
1564 | break; | 1570 | break; |
1565 | if (__skb_splice_bits(frag_iter, &offset, &tlen, &spd, sk)) | 1571 | if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk)) |
1566 | break; | 1572 | break; |
1567 | } | 1573 | } |
1568 | 1574 | ||
1569 | done: | 1575 | done: |
1570 | if (spd.nr_pages) { | 1576 | if (spd.nr_pages) { |
1571 | int ret; | ||
1572 | |||
1573 | /* | 1577 | /* |
1574 | * Drop the socket lock, otherwise we have reverse | 1578 | * Drop the socket lock, otherwise we have reverse |
1575 | * locking dependencies between sk_lock and i_mutex | 1579 | * locking dependencies between sk_lock and i_mutex |
@@ -1582,10 +1586,10 @@ done: | |||
1582 | release_sock(sk); | 1586 | release_sock(sk); |
1583 | ret = splice_to_pipe(pipe, &spd); | 1587 | ret = splice_to_pipe(pipe, &spd); |
1584 | lock_sock(sk); | 1588 | lock_sock(sk); |
1585 | return ret; | ||
1586 | } | 1589 | } |
1587 | 1590 | ||
1588 | return 0; | 1591 | splice_shrink_spd(pipe, &spd); |
1592 | return ret; | ||
1589 | } | 1593 | } |
1590 | 1594 | ||
1591 | /** | 1595 | /** |