diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2016-09-22 16:33:12 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2016-10-05 18:23:36 -0400 |
commit | 241699cd72a8489c9446ae3910ddd243e9b9061b (patch) | |
tree | efcd8d62f5788ab843cd3457cf2e85f3be7b5296 | |
parent | d82718e348fee15dbce8f578ff2588982b7cc7ca (diff) |
new iov_iter flavour: pipe-backed
iov_iter variant for passing data into pipe. copy_to_iter()
copies data into page(s) it has allocated and stuffs them into
the pipe; copy_page_to_iter() stuffs there a reference to the
page given to it. Both will try to coalesce if possible.
iov_iter_zero() is similar to copy_to_iter(); iov_iter_get_pages()
and friends will do as copy_to_iter() would have and return the
pages where the data would've been copied. iov_iter_advance()
will truncate everything past the spot it has advanced to.
New primitive: iov_iter_pipe(), used for initializing those.
pipe should be locked all along.
Running out of space acts as fault would for iovec-backed ones;
in other words, giving it to ->read_iter() may result in short
read if the pipe overflows, or -EFAULT if it happens with nothing
copied there.
In other words, ->read_iter() on those acts pretty much like
->splice_read(). Moreover, all generic_file_splice_read() users,
as well as many other ->splice_read() instances can be switched
to that scheme - that'll happen in the next commit.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | fs/splice.c | 2 | ||||
-rw-r--r-- | include/linux/splice.h | 1 | ||||
-rw-r--r-- | include/linux/uio.h | 14 | ||||
-rw-r--r-- | lib/iov_iter.c | 397 |
4 files changed, 408 insertions, 6 deletions
diff --git a/fs/splice.c b/fs/splice.c index e13d93531554..589a1d52bb98 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -524,7 +524,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, | |||
524 | } | 524 | } |
525 | EXPORT_SYMBOL(generic_file_splice_read); | 525 | EXPORT_SYMBOL(generic_file_splice_read); |
526 | 526 | ||
527 | static const struct pipe_buf_operations default_pipe_buf_ops = { | 527 | const struct pipe_buf_operations default_pipe_buf_ops = { |
528 | .can_merge = 0, | 528 | .can_merge = 0, |
529 | .confirm = generic_pipe_buf_confirm, | 529 | .confirm = generic_pipe_buf_confirm, |
530 | .release = generic_pipe_buf_release, | 530 | .release = generic_pipe_buf_release, |
diff --git a/include/linux/splice.h b/include/linux/splice.h index 58b300f37534..00a21166e268 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h | |||
@@ -85,4 +85,5 @@ extern void splice_shrink_spd(struct splice_pipe_desc *); | |||
85 | extern void spd_release_page(struct splice_pipe_desc *, unsigned int); | 85 | extern void spd_release_page(struct splice_pipe_desc *, unsigned int); |
86 | 86 | ||
87 | extern const struct pipe_buf_operations page_cache_pipe_buf_ops; | 87 | extern const struct pipe_buf_operations page_cache_pipe_buf_ops; |
88 | extern const struct pipe_buf_operations default_pipe_buf_ops; | ||
88 | #endif | 89 | #endif |
diff --git a/include/linux/uio.h b/include/linux/uio.h index 75b4aaf31a9d..b5ebe6dca404 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <uapi/linux/uio.h> | 13 | #include <uapi/linux/uio.h> |
14 | 14 | ||
15 | struct page; | 15 | struct page; |
16 | struct pipe_inode_info; | ||
16 | 17 | ||
17 | struct kvec { | 18 | struct kvec { |
18 | void *iov_base; /* and that should *never* hold a userland pointer */ | 19 | void *iov_base; /* and that should *never* hold a userland pointer */ |
@@ -23,6 +24,7 @@ enum { | |||
23 | ITER_IOVEC = 0, | 24 | ITER_IOVEC = 0, |
24 | ITER_KVEC = 2, | 25 | ITER_KVEC = 2, |
25 | ITER_BVEC = 4, | 26 | ITER_BVEC = 4, |
27 | ITER_PIPE = 8, | ||
26 | }; | 28 | }; |
27 | 29 | ||
28 | struct iov_iter { | 30 | struct iov_iter { |
@@ -33,8 +35,12 @@ struct iov_iter { | |||
33 | const struct iovec *iov; | 35 | const struct iovec *iov; |
34 | const struct kvec *kvec; | 36 | const struct kvec *kvec; |
35 | const struct bio_vec *bvec; | 37 | const struct bio_vec *bvec; |
38 | struct pipe_inode_info *pipe; | ||
39 | }; | ||
40 | union { | ||
41 | unsigned long nr_segs; | ||
42 | int idx; | ||
36 | }; | 43 | }; |
37 | unsigned long nr_segs; | ||
38 | }; | 44 | }; |
39 | 45 | ||
40 | /* | 46 | /* |
@@ -64,7 +70,7 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) | |||
64 | } | 70 | } |
65 | 71 | ||
66 | #define iov_for_each(iov, iter, start) \ | 72 | #define iov_for_each(iov, iter, start) \ |
67 | if (!((start).type & ITER_BVEC)) \ | 73 | if (!((start).type & (ITER_BVEC | ITER_PIPE))) \ |
68 | for (iter = (start); \ | 74 | for (iter = (start); \ |
69 | (iter).count && \ | 75 | (iter).count && \ |
70 | ((iov = iov_iter_iovec(&(iter))), 1); \ | 76 | ((iov = iov_iter_iovec(&(iter))), 1); \ |
@@ -94,6 +100,8 @@ void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec, | |||
94 | unsigned long nr_segs, size_t count); | 100 | unsigned long nr_segs, size_t count); |
95 | void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec, | 101 | void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec, |
96 | unsigned long nr_segs, size_t count); | 102 | unsigned long nr_segs, size_t count); |
103 | void iov_iter_pipe(struct iov_iter *i, int direction, struct pipe_inode_info *pipe, | ||
104 | size_t count); | ||
97 | ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, | 105 | ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, |
98 | size_t maxsize, unsigned maxpages, size_t *start); | 106 | size_t maxsize, unsigned maxpages, size_t *start); |
99 | ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, | 107 | ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, |
@@ -109,7 +117,7 @@ static inline size_t iov_iter_count(struct iov_iter *i) | |||
109 | 117 | ||
110 | static inline bool iter_is_iovec(struct iov_iter *i) | 118 | static inline bool iter_is_iovec(struct iov_iter *i) |
111 | { | 119 | { |
112 | return !(i->type & (ITER_BVEC | ITER_KVEC)); | 120 | return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE)); |
113 | } | 121 | } |
114 | 122 | ||
115 | /* | 123 | /* |
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 7e3138cfc8c9..659eaafcde65 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c | |||
@@ -3,8 +3,11 @@ | |||
3 | #include <linux/pagemap.h> | 3 | #include <linux/pagemap.h> |
4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
5 | #include <linux/vmalloc.h> | 5 | #include <linux/vmalloc.h> |
6 | #include <linux/splice.h> | ||
6 | #include <net/checksum.h> | 7 | #include <net/checksum.h> |
7 | 8 | ||
9 | #define PIPE_PARANOIA /* for now */ | ||
10 | |||
8 | #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ | 11 | #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ |
9 | size_t left; \ | 12 | size_t left; \ |
10 | size_t wanted = n; \ | 13 | size_t wanted = n; \ |
@@ -290,6 +293,93 @@ done: | |||
290 | return wanted - bytes; | 293 | return wanted - bytes; |
291 | } | 294 | } |
292 | 295 | ||
296 | #ifdef PIPE_PARANOIA | ||
297 | static bool sanity(const struct iov_iter *i) | ||
298 | { | ||
299 | struct pipe_inode_info *pipe = i->pipe; | ||
300 | int idx = i->idx; | ||
301 | int next = pipe->curbuf + pipe->nrbufs; | ||
302 | if (i->iov_offset) { | ||
303 | struct pipe_buffer *p; | ||
304 | if (unlikely(!pipe->nrbufs)) | ||
305 | goto Bad; // pipe must be non-empty | ||
306 | if (unlikely(idx != ((next - 1) & (pipe->buffers - 1)))) | ||
307 | goto Bad; // must be at the last buffer... | ||
308 | |||
309 | p = &pipe->bufs[idx]; | ||
310 | if (unlikely(p->offset + p->len != i->iov_offset)) | ||
311 | goto Bad; // ... at the end of segment | ||
312 | } else { | ||
313 | if (idx != (next & (pipe->buffers - 1))) | ||
314 | goto Bad; // must be right after the last buffer | ||
315 | } | ||
316 | return true; | ||
317 | Bad: | ||
318 | printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset); | ||
319 | printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n", | ||
320 | pipe->curbuf, pipe->nrbufs, pipe->buffers); | ||
321 | for (idx = 0; idx < pipe->buffers; idx++) | ||
322 | printk(KERN_ERR "[%p %p %d %d]\n", | ||
323 | pipe->bufs[idx].ops, | ||
324 | pipe->bufs[idx].page, | ||
325 | pipe->bufs[idx].offset, | ||
326 | pipe->bufs[idx].len); | ||
327 | WARN_ON(1); | ||
328 | return false; | ||
329 | } | ||
330 | #else | ||
331 | #define sanity(i) true | ||
332 | #endif | ||
333 | |||
334 | static inline int next_idx(int idx, struct pipe_inode_info *pipe) | ||
335 | { | ||
336 | return (idx + 1) & (pipe->buffers - 1); | ||
337 | } | ||
338 | |||
339 | static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, | ||
340 | struct iov_iter *i) | ||
341 | { | ||
342 | struct pipe_inode_info *pipe = i->pipe; | ||
343 | struct pipe_buffer *buf; | ||
344 | size_t off; | ||
345 | int idx; | ||
346 | |||
347 | if (unlikely(bytes > i->count)) | ||
348 | bytes = i->count; | ||
349 | |||
350 | if (unlikely(!bytes)) | ||
351 | return 0; | ||
352 | |||
353 | if (!sanity(i)) | ||
354 | return 0; | ||
355 | |||
356 | off = i->iov_offset; | ||
357 | idx = i->idx; | ||
358 | buf = &pipe->bufs[idx]; | ||
359 | if (off) { | ||
360 | if (offset == off && buf->page == page) { | ||
361 | /* merge with the last one */ | ||
362 | buf->len += bytes; | ||
363 | i->iov_offset += bytes; | ||
364 | goto out; | ||
365 | } | ||
366 | idx = next_idx(idx, pipe); | ||
367 | buf = &pipe->bufs[idx]; | ||
368 | } | ||
369 | if (idx == pipe->curbuf && pipe->nrbufs) | ||
370 | return 0; | ||
371 | pipe->nrbufs++; | ||
372 | buf->ops = &page_cache_pipe_buf_ops; | ||
373 | get_page(buf->page = page); | ||
374 | buf->offset = offset; | ||
375 | buf->len = bytes; | ||
376 | i->iov_offset = offset + bytes; | ||
377 | i->idx = idx; | ||
378 | out: | ||
379 | i->count -= bytes; | ||
380 | return bytes; | ||
381 | } | ||
382 | |||
293 | /* | 383 | /* |
294 | * Fault in one or more iovecs of the given iov_iter, to a maximum length of | 384 | * Fault in one or more iovecs of the given iov_iter, to a maximum length of |
295 | * bytes. For each iovec, fault in each page that constitutes the iovec. | 385 | * bytes. For each iovec, fault in each page that constitutes the iovec. |
@@ -356,9 +446,98 @@ static void memzero_page(struct page *page, size_t offset, size_t len) | |||
356 | kunmap_atomic(addr); | 446 | kunmap_atomic(addr); |
357 | } | 447 | } |
358 | 448 | ||
449 | static inline bool allocated(struct pipe_buffer *buf) | ||
450 | { | ||
451 | return buf->ops == &default_pipe_buf_ops; | ||
452 | } | ||
453 | |||
454 | static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp) | ||
455 | { | ||
456 | size_t off = i->iov_offset; | ||
457 | int idx = i->idx; | ||
458 | if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) { | ||
459 | idx = next_idx(idx, i->pipe); | ||
460 | off = 0; | ||
461 | } | ||
462 | *idxp = idx; | ||
463 | *offp = off; | ||
464 | } | ||
465 | |||
466 | static size_t push_pipe(struct iov_iter *i, size_t size, | ||
467 | int *idxp, size_t *offp) | ||
468 | { | ||
469 | struct pipe_inode_info *pipe = i->pipe; | ||
470 | size_t off; | ||
471 | int idx; | ||
472 | ssize_t left; | ||
473 | |||
474 | if (unlikely(size > i->count)) | ||
475 | size = i->count; | ||
476 | if (unlikely(!size)) | ||
477 | return 0; | ||
478 | |||
479 | left = size; | ||
480 | data_start(i, &idx, &off); | ||
481 | *idxp = idx; | ||
482 | *offp = off; | ||
483 | if (off) { | ||
484 | left -= PAGE_SIZE - off; | ||
485 | if (left <= 0) { | ||
486 | pipe->bufs[idx].len += size; | ||
487 | return size; | ||
488 | } | ||
489 | pipe->bufs[idx].len = PAGE_SIZE; | ||
490 | idx = next_idx(idx, pipe); | ||
491 | } | ||
492 | while (idx != pipe->curbuf || !pipe->nrbufs) { | ||
493 | struct page *page = alloc_page(GFP_USER); | ||
494 | if (!page) | ||
495 | break; | ||
496 | pipe->nrbufs++; | ||
497 | pipe->bufs[idx].ops = &default_pipe_buf_ops; | ||
498 | pipe->bufs[idx].page = page; | ||
499 | pipe->bufs[idx].offset = 0; | ||
500 | if (left <= PAGE_SIZE) { | ||
501 | pipe->bufs[idx].len = left; | ||
502 | return size; | ||
503 | } | ||
504 | pipe->bufs[idx].len = PAGE_SIZE; | ||
505 | left -= PAGE_SIZE; | ||
506 | idx = next_idx(idx, pipe); | ||
507 | } | ||
508 | return size - left; | ||
509 | } | ||
510 | |||
511 | static size_t copy_pipe_to_iter(const void *addr, size_t bytes, | ||
512 | struct iov_iter *i) | ||
513 | { | ||
514 | struct pipe_inode_info *pipe = i->pipe; | ||
515 | size_t n, off; | ||
516 | int idx; | ||
517 | |||
518 | if (!sanity(i)) | ||
519 | return 0; | ||
520 | |||
521 | bytes = n = push_pipe(i, bytes, &idx, &off); | ||
522 | if (unlikely(!n)) | ||
523 | return 0; | ||
524 | for ( ; n; idx = next_idx(idx, pipe), off = 0) { | ||
525 | size_t chunk = min_t(size_t, n, PAGE_SIZE - off); | ||
526 | memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk); | ||
527 | i->idx = idx; | ||
528 | i->iov_offset = off + chunk; | ||
529 | n -= chunk; | ||
530 | addr += chunk; | ||
531 | } | ||
532 | i->count -= bytes; | ||
533 | return bytes; | ||
534 | } | ||
535 | |||
359 | size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) | 536 | size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) |
360 | { | 537 | { |
361 | const char *from = addr; | 538 | const char *from = addr; |
539 | if (unlikely(i->type & ITER_PIPE)) | ||
540 | return copy_pipe_to_iter(addr, bytes, i); | ||
362 | iterate_and_advance(i, bytes, v, | 541 | iterate_and_advance(i, bytes, v, |
363 | __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, | 542 | __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, |
364 | v.iov_len), | 543 | v.iov_len), |
@@ -374,6 +553,10 @@ EXPORT_SYMBOL(copy_to_iter); | |||
374 | size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) | 553 | size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) |
375 | { | 554 | { |
376 | char *to = addr; | 555 | char *to = addr; |
556 | if (unlikely(i->type & ITER_PIPE)) { | ||
557 | WARN_ON(1); | ||
558 | return 0; | ||
559 | } | ||
377 | iterate_and_advance(i, bytes, v, | 560 | iterate_and_advance(i, bytes, v, |
378 | __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, | 561 | __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, |
379 | v.iov_len), | 562 | v.iov_len), |
@@ -389,6 +572,10 @@ EXPORT_SYMBOL(copy_from_iter); | |||
389 | size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) | 572 | size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) |
390 | { | 573 | { |
391 | char *to = addr; | 574 | char *to = addr; |
575 | if (unlikely(i->type & ITER_PIPE)) { | ||
576 | WARN_ON(1); | ||
577 | return 0; | ||
578 | } | ||
392 | iterate_and_advance(i, bytes, v, | 579 | iterate_and_advance(i, bytes, v, |
393 | __copy_from_user_nocache((to += v.iov_len) - v.iov_len, | 580 | __copy_from_user_nocache((to += v.iov_len) - v.iov_len, |
394 | v.iov_base, v.iov_len), | 581 | v.iov_base, v.iov_len), |
@@ -409,14 +596,20 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, | |||
409 | size_t wanted = copy_to_iter(kaddr + offset, bytes, i); | 596 | size_t wanted = copy_to_iter(kaddr + offset, bytes, i); |
410 | kunmap_atomic(kaddr); | 597 | kunmap_atomic(kaddr); |
411 | return wanted; | 598 | return wanted; |
412 | } else | 599 | } else if (likely(!(i->type & ITER_PIPE))) |
413 | return copy_page_to_iter_iovec(page, offset, bytes, i); | 600 | return copy_page_to_iter_iovec(page, offset, bytes, i); |
601 | else | ||
602 | return copy_page_to_iter_pipe(page, offset, bytes, i); | ||
414 | } | 603 | } |
415 | EXPORT_SYMBOL(copy_page_to_iter); | 604 | EXPORT_SYMBOL(copy_page_to_iter); |
416 | 605 | ||
417 | size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, | 606 | size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, |
418 | struct iov_iter *i) | 607 | struct iov_iter *i) |
419 | { | 608 | { |
609 | if (unlikely(i->type & ITER_PIPE)) { | ||
610 | WARN_ON(1); | ||
611 | return 0; | ||
612 | } | ||
420 | if (i->type & (ITER_BVEC|ITER_KVEC)) { | 613 | if (i->type & (ITER_BVEC|ITER_KVEC)) { |
421 | void *kaddr = kmap_atomic(page); | 614 | void *kaddr = kmap_atomic(page); |
422 | size_t wanted = copy_from_iter(kaddr + offset, bytes, i); | 615 | size_t wanted = copy_from_iter(kaddr + offset, bytes, i); |
@@ -427,8 +620,34 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, | |||
427 | } | 620 | } |
428 | EXPORT_SYMBOL(copy_page_from_iter); | 621 | EXPORT_SYMBOL(copy_page_from_iter); |
429 | 622 | ||
623 | static size_t pipe_zero(size_t bytes, struct iov_iter *i) | ||
624 | { | ||
625 | struct pipe_inode_info *pipe = i->pipe; | ||
626 | size_t n, off; | ||
627 | int idx; | ||
628 | |||
629 | if (!sanity(i)) | ||
630 | return 0; | ||
631 | |||
632 | bytes = n = push_pipe(i, bytes, &idx, &off); | ||
633 | if (unlikely(!n)) | ||
634 | return 0; | ||
635 | |||
636 | for ( ; n; idx = next_idx(idx, pipe), off = 0) { | ||
637 | size_t chunk = min_t(size_t, n, PAGE_SIZE - off); | ||
638 | memzero_page(pipe->bufs[idx].page, off, chunk); | ||
639 | i->idx = idx; | ||
640 | i->iov_offset = off + chunk; | ||
641 | n -= chunk; | ||
642 | } | ||
643 | i->count -= bytes; | ||
644 | return bytes; | ||
645 | } | ||
646 | |||
430 | size_t iov_iter_zero(size_t bytes, struct iov_iter *i) | 647 | size_t iov_iter_zero(size_t bytes, struct iov_iter *i) |
431 | { | 648 | { |
649 | if (unlikely(i->type & ITER_PIPE)) | ||
650 | return pipe_zero(bytes, i); | ||
432 | iterate_and_advance(i, bytes, v, | 651 | iterate_and_advance(i, bytes, v, |
433 | __clear_user(v.iov_base, v.iov_len), | 652 | __clear_user(v.iov_base, v.iov_len), |
434 | memzero_page(v.bv_page, v.bv_offset, v.bv_len), | 653 | memzero_page(v.bv_page, v.bv_offset, v.bv_len), |
@@ -443,6 +662,11 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, | |||
443 | struct iov_iter *i, unsigned long offset, size_t bytes) | 662 | struct iov_iter *i, unsigned long offset, size_t bytes) |
444 | { | 663 | { |
445 | char *kaddr = kmap_atomic(page), *p = kaddr + offset; | 664 | char *kaddr = kmap_atomic(page), *p = kaddr + offset; |
665 | if (unlikely(i->type & ITER_PIPE)) { | ||
666 | kunmap_atomic(kaddr); | ||
667 | WARN_ON(1); | ||
668 | return 0; | ||
669 | } | ||
446 | iterate_all_kinds(i, bytes, v, | 670 | iterate_all_kinds(i, bytes, v, |
447 | __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, | 671 | __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, |
448 | v.iov_base, v.iov_len), | 672 | v.iov_base, v.iov_len), |
@@ -455,8 +679,51 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, | |||
455 | } | 679 | } |
456 | EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); | 680 | EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); |
457 | 681 | ||
682 | static void pipe_advance(struct iov_iter *i, size_t size) | ||
683 | { | ||
684 | struct pipe_inode_info *pipe = i->pipe; | ||
685 | struct pipe_buffer *buf; | ||
686 | int idx = i->idx; | ||
687 | size_t off = i->iov_offset; | ||
688 | |||
689 | if (unlikely(i->count < size)) | ||
690 | size = i->count; | ||
691 | |||
692 | if (size) { | ||
693 | if (off) /* make it relative to the beginning of buffer */ | ||
694 | size += off - pipe->bufs[idx].offset; | ||
695 | while (1) { | ||
696 | buf = &pipe->bufs[idx]; | ||
697 | if (size <= buf->len) | ||
698 | break; | ||
699 | size -= buf->len; | ||
700 | idx = next_idx(idx, pipe); | ||
701 | } | ||
702 | buf->len = size; | ||
703 | i->idx = idx; | ||
704 | off = i->iov_offset = buf->offset + size; | ||
705 | } | ||
706 | if (off) | ||
707 | idx = next_idx(idx, pipe); | ||
708 | if (pipe->nrbufs) { | ||
709 | int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); | ||
710 | /* [curbuf,unused) is in use. Free [idx,unused) */ | ||
711 | while (idx != unused) { | ||
712 | buf = &pipe->bufs[idx]; | ||
713 | buf->ops->release(pipe, buf); | ||
714 | buf->ops = NULL; | ||
715 | idx = next_idx(idx, pipe); | ||
716 | pipe->nrbufs--; | ||
717 | } | ||
718 | } | ||
719 | } | ||
720 | |||
458 | void iov_iter_advance(struct iov_iter *i, size_t size) | 721 | void iov_iter_advance(struct iov_iter *i, size_t size) |
459 | { | 722 | { |
723 | if (unlikely(i->type & ITER_PIPE)) { | ||
724 | pipe_advance(i, size); | ||
725 | return; | ||
726 | } | ||
460 | iterate_and_advance(i, size, v, 0, 0, 0) | 727 | iterate_and_advance(i, size, v, 0, 0, 0) |
461 | } | 728 | } |
462 | EXPORT_SYMBOL(iov_iter_advance); | 729 | EXPORT_SYMBOL(iov_iter_advance); |
@@ -466,6 +733,8 @@ EXPORT_SYMBOL(iov_iter_advance); | |||
466 | */ | 733 | */ |
467 | size_t iov_iter_single_seg_count(const struct iov_iter *i) | 734 | size_t iov_iter_single_seg_count(const struct iov_iter *i) |
468 | { | 735 | { |
736 | if (unlikely(i->type & ITER_PIPE)) | ||
737 | return i->count; // it is a silly place, anyway | ||
469 | if (i->nr_segs == 1) | 738 | if (i->nr_segs == 1) |
470 | return i->count; | 739 | return i->count; |
471 | else if (i->type & ITER_BVEC) | 740 | else if (i->type & ITER_BVEC) |
@@ -501,6 +770,19 @@ void iov_iter_bvec(struct iov_iter *i, int direction, | |||
501 | } | 770 | } |
502 | EXPORT_SYMBOL(iov_iter_bvec); | 771 | EXPORT_SYMBOL(iov_iter_bvec); |
503 | 772 | ||
773 | void iov_iter_pipe(struct iov_iter *i, int direction, | ||
774 | struct pipe_inode_info *pipe, | ||
775 | size_t count) | ||
776 | { | ||
777 | BUG_ON(direction != ITER_PIPE); | ||
778 | i->type = direction; | ||
779 | i->pipe = pipe; | ||
780 | i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); | ||
781 | i->iov_offset = 0; | ||
782 | i->count = count; | ||
783 | } | ||
784 | EXPORT_SYMBOL(iov_iter_pipe); | ||
785 | |||
504 | unsigned long iov_iter_alignment(const struct iov_iter *i) | 786 | unsigned long iov_iter_alignment(const struct iov_iter *i) |
505 | { | 787 | { |
506 | unsigned long res = 0; | 788 | unsigned long res = 0; |
@@ -509,6 +791,11 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) | |||
509 | if (!size) | 791 | if (!size) |
510 | return 0; | 792 | return 0; |
511 | 793 | ||
794 | if (unlikely(i->type & ITER_PIPE)) { | ||
795 | if (i->iov_offset && allocated(&i->pipe->bufs[i->idx])) | ||
796 | return size | i->iov_offset; | ||
797 | return size; | ||
798 | } | ||
512 | iterate_all_kinds(i, size, v, | 799 | iterate_all_kinds(i, size, v, |
513 | (res |= (unsigned long)v.iov_base | v.iov_len, 0), | 800 | (res |= (unsigned long)v.iov_base | v.iov_len, 0), |
514 | res |= v.bv_offset | v.bv_len, | 801 | res |= v.bv_offset | v.bv_len, |
@@ -525,6 +812,11 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) | |||
525 | if (!size) | 812 | if (!size) |
526 | return 0; | 813 | return 0; |
527 | 814 | ||
815 | if (unlikely(i->type & ITER_PIPE)) { | ||
816 | WARN_ON(1); | ||
817 | return ~0U; | ||
818 | } | ||
819 | |||
528 | iterate_all_kinds(i, size, v, | 820 | iterate_all_kinds(i, size, v, |
529 | (res |= (!res ? 0 : (unsigned long)v.iov_base) | | 821 | (res |= (!res ? 0 : (unsigned long)v.iov_base) | |
530 | (size != v.iov_len ? size : 0), 0), | 822 | (size != v.iov_len ? size : 0), 0), |
@@ -537,6 +829,47 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) | |||
537 | } | 829 | } |
538 | EXPORT_SYMBOL(iov_iter_gap_alignment); | 830 | EXPORT_SYMBOL(iov_iter_gap_alignment); |
539 | 831 | ||
832 | static inline size_t __pipe_get_pages(struct iov_iter *i, | ||
833 | size_t maxsize, | ||
834 | struct page **pages, | ||
835 | int idx, | ||
836 | size_t *start) | ||
837 | { | ||
838 | struct pipe_inode_info *pipe = i->pipe; | ||
839 | size_t n = push_pipe(i, maxsize, &idx, start); | ||
840 | if (!n) | ||
841 | return -EFAULT; | ||
842 | |||
843 | maxsize = n; | ||
844 | n += *start; | ||
845 | while (n >= PAGE_SIZE) { | ||
846 | get_page(*pages++ = pipe->bufs[idx].page); | ||
847 | idx = next_idx(idx, pipe); | ||
848 | n -= PAGE_SIZE; | ||
849 | } | ||
850 | |||
851 | return maxsize; | ||
852 | } | ||
853 | |||
854 | static ssize_t pipe_get_pages(struct iov_iter *i, | ||
855 | struct page **pages, size_t maxsize, unsigned maxpages, | ||
856 | size_t *start) | ||
857 | { | ||
858 | unsigned npages; | ||
859 | size_t capacity; | ||
860 | int idx; | ||
861 | |||
862 | if (!sanity(i)) | ||
863 | return -EFAULT; | ||
864 | |||
865 | data_start(i, &idx, start); | ||
866 | /* some of this one + all after this one */ | ||
867 | npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; | ||
868 | capacity = min(npages,maxpages) * PAGE_SIZE - *start; | ||
869 | |||
870 | return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start); | ||
871 | } | ||
872 | |||
540 | ssize_t iov_iter_get_pages(struct iov_iter *i, | 873 | ssize_t iov_iter_get_pages(struct iov_iter *i, |
541 | struct page **pages, size_t maxsize, unsigned maxpages, | 874 | struct page **pages, size_t maxsize, unsigned maxpages, |
542 | size_t *start) | 875 | size_t *start) |
@@ -547,6 +880,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, | |||
547 | if (!maxsize) | 880 | if (!maxsize) |
548 | return 0; | 881 | return 0; |
549 | 882 | ||
883 | if (unlikely(i->type & ITER_PIPE)) | ||
884 | return pipe_get_pages(i, pages, maxsize, maxpages, start); | ||
550 | iterate_all_kinds(i, maxsize, v, ({ | 885 | iterate_all_kinds(i, maxsize, v, ({ |
551 | unsigned long addr = (unsigned long)v.iov_base; | 886 | unsigned long addr = (unsigned long)v.iov_base; |
552 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); | 887 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); |
@@ -582,6 +917,37 @@ static struct page **get_pages_array(size_t n) | |||
582 | return p; | 917 | return p; |
583 | } | 918 | } |
584 | 919 | ||
920 | static ssize_t pipe_get_pages_alloc(struct iov_iter *i, | ||
921 | struct page ***pages, size_t maxsize, | ||
922 | size_t *start) | ||
923 | { | ||
924 | struct page **p; | ||
925 | size_t n; | ||
926 | int idx; | ||
927 | int npages; | ||
928 | |||
929 | if (!sanity(i)) | ||
930 | return -EFAULT; | ||
931 | |||
932 | data_start(i, &idx, start); | ||
933 | /* some of this one + all after this one */ | ||
934 | npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; | ||
935 | n = npages * PAGE_SIZE - *start; | ||
936 | if (maxsize > n) | ||
937 | maxsize = n; | ||
938 | else | ||
939 | npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); | ||
940 | p = get_pages_array(npages); | ||
941 | if (!p) | ||
942 | return -ENOMEM; | ||
943 | n = __pipe_get_pages(i, maxsize, p, idx, start); | ||
944 | if (n > 0) | ||
945 | *pages = p; | ||
946 | else | ||
947 | kvfree(p); | ||
948 | return n; | ||
949 | } | ||
950 | |||
585 | ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, | 951 | ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, |
586 | struct page ***pages, size_t maxsize, | 952 | struct page ***pages, size_t maxsize, |
587 | size_t *start) | 953 | size_t *start) |
@@ -594,6 +960,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, | |||
594 | if (!maxsize) | 960 | if (!maxsize) |
595 | return 0; | 961 | return 0; |
596 | 962 | ||
963 | if (unlikely(i->type & ITER_PIPE)) | ||
964 | return pipe_get_pages_alloc(i, pages, maxsize, start); | ||
597 | iterate_all_kinds(i, maxsize, v, ({ | 965 | iterate_all_kinds(i, maxsize, v, ({ |
598 | unsigned long addr = (unsigned long)v.iov_base; | 966 | unsigned long addr = (unsigned long)v.iov_base; |
599 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); | 967 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); |
@@ -635,6 +1003,10 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, | |||
635 | __wsum sum, next; | 1003 | __wsum sum, next; |
636 | size_t off = 0; | 1004 | size_t off = 0; |
637 | sum = *csum; | 1005 | sum = *csum; |
1006 | if (unlikely(i->type & ITER_PIPE)) { | ||
1007 | WARN_ON(1); | ||
1008 | return 0; | ||
1009 | } | ||
638 | iterate_and_advance(i, bytes, v, ({ | 1010 | iterate_and_advance(i, bytes, v, ({ |
639 | int err = 0; | 1011 | int err = 0; |
640 | next = csum_and_copy_from_user(v.iov_base, | 1012 | next = csum_and_copy_from_user(v.iov_base, |
@@ -673,6 +1045,10 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, | |||
673 | __wsum sum, next; | 1045 | __wsum sum, next; |
674 | size_t off = 0; | 1046 | size_t off = 0; |
675 | sum = *csum; | 1047 | sum = *csum; |
1048 | if (unlikely(i->type & ITER_PIPE)) { | ||
1049 | WARN_ON(1); /* for now */ | ||
1050 | return 0; | ||
1051 | } | ||
676 | iterate_and_advance(i, bytes, v, ({ | 1052 | iterate_and_advance(i, bytes, v, ({ |
677 | int err = 0; | 1053 | int err = 0; |
678 | next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, | 1054 | next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, |
@@ -712,7 +1088,20 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) | |||
712 | if (!size) | 1088 | if (!size) |
713 | return 0; | 1089 | return 0; |
714 | 1090 | ||
715 | iterate_all_kinds(i, size, v, ({ | 1091 | if (unlikely(i->type & ITER_PIPE)) { |
1092 | struct pipe_inode_info *pipe = i->pipe; | ||
1093 | size_t off; | ||
1094 | int idx; | ||
1095 | |||
1096 | if (!sanity(i)) | ||
1097 | return 0; | ||
1098 | |||
1099 | data_start(i, &idx, &off); | ||
1100 | /* some of this one + all after this one */ | ||
1101 | npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1; | ||
1102 | if (npages >= maxpages) | ||
1103 | return maxpages; | ||
1104 | } else iterate_all_kinds(i, size, v, ({ | ||
716 | unsigned long p = (unsigned long)v.iov_base; | 1105 | unsigned long p = (unsigned long)v.iov_base; |
717 | npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) | 1106 | npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) |
718 | - p / PAGE_SIZE; | 1107 | - p / PAGE_SIZE; |
@@ -737,6 +1126,10 @@ EXPORT_SYMBOL(iov_iter_npages); | |||
737 | const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) | 1126 | const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) |
738 | { | 1127 | { |
739 | *new = *old; | 1128 | *new = *old; |
1129 | if (unlikely(new->type & ITER_PIPE)) { | ||
1130 | WARN_ON(1); | ||
1131 | return NULL; | ||
1132 | } | ||
740 | if (new->type & ITER_BVEC) | 1133 | if (new->type & ITER_BVEC) |
741 | return new->bvec = kmemdup(new->bvec, | 1134 | return new->bvec = kmemdup(new->bvec, |
742 | new->nr_segs * sizeof(struct bio_vec), | 1135 | new->nr_segs * sizeof(struct bio_vec), |