aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-04-26 10:47:55 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-04-26 10:47:55 -0400
commit7b97ebfb931887be63bfa29b6c143e9e9da3f5e8 (patch)
treefaaf77f68ef4192ac1a3c23f43fed122a080d341
parent07db8696f5d484485dde77138ff87d19c8628a75 (diff)
parent1ebd32fc54bd04de6b3944587f25513c0681f98e (diff)
Merge branch 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block
* 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block: [PATCH] splice: add ->splice_write support for /dev/null [PATCH] splice: rearrange moving to/from pipe helpers [PATCH] Add support for the sys_vmsplice syscall [PATCH] splice: fix offset problems [PATCH] splice: fix min() warning
-rw-r--r--arch/ia64/kernel/entry.S1
-rw-r--r--arch/powerpc/kernel/systbl.S1
-rw-r--r--arch/powerpc/platforms/cell/spu_callbacks.c1
-rw-r--r--drivers/char/mem.c14
-rw-r--r--fs/splice.c355
-rw-r--r--include/asm-i386/unistd.h3
-rw-r--r--include/asm-ia64/unistd.h3
-rw-r--r--include/asm-powerpc/unistd.h3
-rw-r--r--include/asm-x86_64/unistd.h4
-rw-r--r--include/linux/pipe_fs_i.h17
-rw-r--r--include/linux/syscalls.h3
11 files changed, 328 insertions, 77 deletions
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index e30798811216..bcb80ca5cf40 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1610,5 +1610,6 @@ sys_call_table:
1610 data8 sys_get_robust_list 1610 data8 sys_get_robust_list
1611 data8 sys_sync_file_range // 1300 1611 data8 sys_sync_file_range // 1300
1612 data8 sys_tee 1612 data8 sys_tee
1613 data8 sys_vmsplice
1613 1614
1614 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls 1615 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 8d1522690501..0b98eea73c5e 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -324,6 +324,7 @@ COMPAT_SYS(ppoll)
324SYSCALL(unshare) 324SYSCALL(unshare)
325SYSCALL(splice) 325SYSCALL(splice)
326SYSCALL(tee) 326SYSCALL(tee)
327SYSCALL(vmsplice)
327 328
328/* 329/*
329 * please add new calls to arch/powerpc/platforms/cell/spu_callbacks.c 330 * please add new calls to arch/powerpc/platforms/cell/spu_callbacks.c
diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c
index deb3afb94484..b283380a2a18 100644
--- a/arch/powerpc/platforms/cell/spu_callbacks.c
+++ b/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -318,6 +318,7 @@ void *spu_syscall_table[] = {
318 [__NR_unshare] sys_unshare, 318 [__NR_unshare] sys_unshare,
319 [__NR_splice] sys_splice, 319 [__NR_splice] sys_splice,
320 [__NR_tee] sys_tee, 320 [__NR_tee] sys_tee,
321 [__NR_vmsplice] sys_vmsplice,
321}; 322};
322 323
323long spu_sys_callback(struct spu_syscall_block *s) 324long spu_sys_callback(struct spu_syscall_block *s)
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 66719f9d294c..1fa9fa157c12 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -27,6 +27,7 @@
27#include <linux/crash_dump.h> 27#include <linux/crash_dump.h>
28#include <linux/backing-dev.h> 28#include <linux/backing-dev.h>
29#include <linux/bootmem.h> 29#include <linux/bootmem.h>
30#include <linux/pipe_fs_i.h>
30 31
31#include <asm/uaccess.h> 32#include <asm/uaccess.h>
32#include <asm/io.h> 33#include <asm/io.h>
@@ -578,6 +579,18 @@ static ssize_t write_null(struct file * file, const char __user * buf,
578 return count; 579 return count;
579} 580}
580 581
582static int pipe_to_null(struct pipe_inode_info *info, struct pipe_buffer *buf,
583 struct splice_desc *sd)
584{
585 return sd->len;
586}
587
588static ssize_t splice_write_null(struct pipe_inode_info *pipe,struct file *out,
589 loff_t *ppos, size_t len, unsigned int flags)
590{
591 return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null);
592}
593
581#ifdef CONFIG_MMU 594#ifdef CONFIG_MMU
582/* 595/*
583 * For fun, we are using the MMU for this. 596 * For fun, we are using the MMU for this.
@@ -785,6 +798,7 @@ static struct file_operations null_fops = {
785 .llseek = null_lseek, 798 .llseek = null_lseek,
786 .read = read_null, 799 .read = read_null,
787 .write = write_null, 800 .write = write_null,
801 .splice_write = splice_write_null,
788}; 802};
789 803
790#if defined(CONFIG_ISA) || !defined(__mc68000__) 804#if defined(CONFIG_ISA) || !defined(__mc68000__)
diff --git a/fs/splice.c b/fs/splice.c
index 0559e7577a04..447ebc0a37f3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -27,15 +27,22 @@
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/uio.h>
31
32struct partial_page {
33 unsigned int offset;
34 unsigned int len;
35};
30 36
31/* 37/*
32 * Passed to the actors 38 * Passed to splice_to_pipe
33 */ 39 */
34struct splice_desc { 40struct splice_pipe_desc {
35 unsigned int len, total_len; /* current and remaining length */ 41 struct page **pages; /* page map */
42 struct partial_page *partial; /* pages[] may not be contig */
43 int nr_pages; /* number of pages in map */
36 unsigned int flags; /* splice flags */ 44 unsigned int flags; /* splice flags */
37 struct file *file; /* file to read/write */ 45 struct pipe_buf_operations *ops;/* ops associated with output pipe */
38 loff_t pos; /* file position */
39}; 46};
40 47
41/* 48/*
@@ -128,6 +135,19 @@ static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
128 kunmap(buf->page); 135 kunmap(buf->page);
129} 136}
130 137
138static void *user_page_pipe_buf_map(struct file *file,
139 struct pipe_inode_info *pipe,
140 struct pipe_buffer *buf)
141{
142 return kmap(buf->page);
143}
144
145static void user_page_pipe_buf_unmap(struct pipe_inode_info *pipe,
146 struct pipe_buffer *buf)
147{
148 kunmap(buf->page);
149}
150
131static void page_cache_pipe_buf_get(struct pipe_inode_info *info, 151static void page_cache_pipe_buf_get(struct pipe_inode_info *info,
132 struct pipe_buffer *buf) 152 struct pipe_buffer *buf)
133{ 153{
@@ -143,19 +163,33 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = {
143 .get = page_cache_pipe_buf_get, 163 .get = page_cache_pipe_buf_get,
144}; 164};
145 165
166static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
167 struct pipe_buffer *buf)
168{
169 return 1;
170}
171
172static struct pipe_buf_operations user_page_pipe_buf_ops = {
173 .can_merge = 0,
174 .map = user_page_pipe_buf_map,
175 .unmap = user_page_pipe_buf_unmap,
176 .release = page_cache_pipe_buf_release,
177 .steal = user_page_pipe_buf_steal,
178 .get = page_cache_pipe_buf_get,
179};
180
146/* 181/*
147 * Pipe output worker. This sets up our pipe format with the page cache 182 * Pipe output worker. This sets up our pipe format with the page cache
148 * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 183 * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
149 */ 184 */
150static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, 185static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
151 int nr_pages, unsigned long len, 186 struct splice_pipe_desc *spd)
152 unsigned int offset, unsigned int flags)
153{ 187{
154 int ret, do_wakeup, i; 188 int ret, do_wakeup, page_nr;
155 189
156 ret = 0; 190 ret = 0;
157 do_wakeup = 0; 191 do_wakeup = 0;
158 i = 0; 192 page_nr = 0;
159 193
160 if (pipe->inode) 194 if (pipe->inode)
161 mutex_lock(&pipe->inode->i_mutex); 195 mutex_lock(&pipe->inode->i_mutex);
@@ -171,27 +205,19 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
171 if (pipe->nrbufs < PIPE_BUFFERS) { 205 if (pipe->nrbufs < PIPE_BUFFERS) {
172 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); 206 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1);
173 struct pipe_buffer *buf = pipe->bufs + newbuf; 207 struct pipe_buffer *buf = pipe->bufs + newbuf;
174 struct page *page = pages[i++];
175 unsigned long this_len;
176 208
177 this_len = PAGE_CACHE_SIZE - offset; 209 buf->page = spd->pages[page_nr];
178 if (this_len > len) 210 buf->offset = spd->partial[page_nr].offset;
179 this_len = len; 211 buf->len = spd->partial[page_nr].len;
180 212 buf->ops = spd->ops;
181 buf->page = page;
182 buf->offset = offset;
183 buf->len = this_len;
184 buf->ops = &page_cache_pipe_buf_ops;
185 pipe->nrbufs++; 213 pipe->nrbufs++;
214 page_nr++;
215 ret += buf->len;
216
186 if (pipe->inode) 217 if (pipe->inode)
187 do_wakeup = 1; 218 do_wakeup = 1;
188 219
189 ret += this_len; 220 if (!--spd->nr_pages)
190 len -= this_len;
191 offset = 0;
192 if (!--nr_pages)
193 break;
194 if (!len)
195 break; 221 break;
196 if (pipe->nrbufs < PIPE_BUFFERS) 222 if (pipe->nrbufs < PIPE_BUFFERS)
197 continue; 223 continue;
@@ -199,7 +225,7 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
199 break; 225 break;
200 } 226 }
201 227
202 if (flags & SPLICE_F_NONBLOCK) { 228 if (spd->flags & SPLICE_F_NONBLOCK) {
203 if (!ret) 229 if (!ret)
204 ret = -EAGAIN; 230 ret = -EAGAIN;
205 break; 231 break;
@@ -234,8 +260,8 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
234 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 260 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
235 } 261 }
236 262
237 while (i < nr_pages) 263 while (page_nr < spd->nr_pages)
238 page_cache_release(pages[i++]); 264 page_cache_release(spd->pages[page_nr++]);
239 265
240 return ret; 266 return ret;
241} 267}
@@ -246,17 +272,24 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
246 unsigned int flags) 272 unsigned int flags)
247{ 273{
248 struct address_space *mapping = in->f_mapping; 274 struct address_space *mapping = in->f_mapping;
249 unsigned int loff, offset, nr_pages; 275 unsigned int loff, nr_pages;
250 struct page *pages[PIPE_BUFFERS]; 276 struct page *pages[PIPE_BUFFERS];
277 struct partial_page partial[PIPE_BUFFERS];
251 struct page *page; 278 struct page *page;
252 pgoff_t index, end_index; 279 pgoff_t index, end_index;
253 loff_t isize; 280 loff_t isize;
254 size_t bytes; 281 size_t total_len;
255 int i, error; 282 int error;
283 struct splice_pipe_desc spd = {
284 .pages = pages,
285 .partial = partial,
286 .flags = flags,
287 .ops = &page_cache_pipe_buf_ops,
288 };
256 289
257 index = *ppos >> PAGE_CACHE_SHIFT; 290 index = *ppos >> PAGE_CACHE_SHIFT;
258 loff = offset = *ppos & ~PAGE_CACHE_MASK; 291 loff = *ppos & ~PAGE_CACHE_MASK;
259 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 292 nr_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
260 293
261 if (nr_pages > PIPE_BUFFERS) 294 if (nr_pages > PIPE_BUFFERS)
262 nr_pages = PIPE_BUFFERS; 295 nr_pages = PIPE_BUFFERS;
@@ -266,15 +299,15 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
266 * read-ahead if this is a non-zero offset (we are likely doing small 299 * read-ahead if this is a non-zero offset (we are likely doing small
267 * chunk splice and the page is already there) for a single page. 300 * chunk splice and the page is already there) for a single page.
268 */ 301 */
269 if (!offset || nr_pages > 1) 302 if (!loff || spd.nr_pages > 1)
270 do_page_cache_readahead(mapping, in, index, nr_pages); 303 do_page_cache_readahead(mapping, in, index, spd.nr_pages);
271 304
272 /* 305 /*
273 * Now fill in the holes: 306 * Now fill in the holes:
274 */ 307 */
275 error = 0; 308 error = 0;
276 bytes = 0; 309 total_len = 0;
277 for (i = 0; i < nr_pages; i++, index++) { 310 for (spd.nr_pages = 0; spd.nr_pages < nr_pages; spd.nr_pages++, index++) {
278 unsigned int this_len; 311 unsigned int this_len;
279 312
280 if (!len) 313 if (!len)
@@ -283,7 +316,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
283 /* 316 /*
284 * this_len is the max we'll use from this page 317 * this_len is the max we'll use from this page
285 */ 318 */
286 this_len = min(len, PAGE_CACHE_SIZE - loff); 319 this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
287find_page: 320find_page:
288 /* 321 /*
289 * lookup the page for this index 322 * lookup the page for this index
@@ -367,26 +400,29 @@ readpage:
367 */ 400 */
368 if (end_index == index) { 401 if (end_index == index) {
369 loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK); 402 loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK);
370 if (bytes + loff > isize) { 403 if (total_len + loff > isize) {
371 page_cache_release(page); 404 page_cache_release(page);
372 break; 405 break;
373 } 406 }
374 /* 407 /*
375 * force quit after adding this page 408 * force quit after adding this page
376 */ 409 */
377 nr_pages = i; 410 nr_pages = spd.nr_pages;
378 this_len = min(this_len, loff); 411 this_len = min(this_len, loff);
412 loff = 0;
379 } 413 }
380 } 414 }
381fill_it: 415fill_it:
382 pages[i] = page; 416 pages[spd.nr_pages] = page;
383 bytes += this_len; 417 partial[spd.nr_pages].offset = loff;
418 partial[spd.nr_pages].len = this_len;
384 len -= this_len; 419 len -= this_len;
420 total_len += this_len;
385 loff = 0; 421 loff = 0;
386 } 422 }
387 423
388 if (i) 424 if (spd.nr_pages)
389 return move_to_pipe(pipe, pages, i, bytes, offset, flags); 425 return splice_to_pipe(pipe, &spd);
390 426
391 return error; 427 return error;
392} 428}
@@ -439,14 +475,13 @@ EXPORT_SYMBOL(generic_file_splice_read);
439 475
440/* 476/*
441 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' 477 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
442 * using sendpage(). 478 * using sendpage(). Return the number of bytes sent.
443 */ 479 */
444static int pipe_to_sendpage(struct pipe_inode_info *info, 480static int pipe_to_sendpage(struct pipe_inode_info *info,
445 struct pipe_buffer *buf, struct splice_desc *sd) 481 struct pipe_buffer *buf, struct splice_desc *sd)
446{ 482{
447 struct file *file = sd->file; 483 struct file *file = sd->file;
448 loff_t pos = sd->pos; 484 loff_t pos = sd->pos;
449 unsigned int offset;
450 ssize_t ret; 485 ssize_t ret;
451 void *ptr; 486 void *ptr;
452 int more; 487 int more;
@@ -461,16 +496,13 @@ static int pipe_to_sendpage(struct pipe_inode_info *info,
461 if (IS_ERR(ptr)) 496 if (IS_ERR(ptr))
462 return PTR_ERR(ptr); 497 return PTR_ERR(ptr);
463 498
464 offset = pos & ~PAGE_CACHE_MASK;
465 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 499 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
466 500
467 ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,more); 501 ret = file->f_op->sendpage(file, buf->page, buf->offset, sd->len,
502 &pos, more);
468 503
469 buf->ops->unmap(info, buf); 504 buf->ops->unmap(info, buf);
470 if (ret == sd->len) 505 return ret;
471 return 0;
472
473 return -EIO;
474} 506}
475 507
476/* 508/*
@@ -499,7 +531,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
499 struct file *file = sd->file; 531 struct file *file = sd->file;
500 struct address_space *mapping = file->f_mapping; 532 struct address_space *mapping = file->f_mapping;
501 gfp_t gfp_mask = mapping_gfp_mask(mapping); 533 gfp_t gfp_mask = mapping_gfp_mask(mapping);
502 unsigned int offset; 534 unsigned int offset, this_len;
503 struct page *page; 535 struct page *page;
504 pgoff_t index; 536 pgoff_t index;
505 char *src; 537 char *src;
@@ -515,6 +547,10 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
515 index = sd->pos >> PAGE_CACHE_SHIFT; 547 index = sd->pos >> PAGE_CACHE_SHIFT;
516 offset = sd->pos & ~PAGE_CACHE_MASK; 548 offset = sd->pos & ~PAGE_CACHE_MASK;
517 549
550 this_len = sd->len;
551 if (this_len + offset > PAGE_CACHE_SIZE)
552 this_len = PAGE_CACHE_SIZE - offset;
553
518 /* 554 /*
519 * Reuse buf page, if SPLICE_F_MOVE is set. 555 * Reuse buf page, if SPLICE_F_MOVE is set.
520 */ 556 */
@@ -558,7 +594,7 @@ find_page:
558 * the full page. 594 * the full page.
559 */ 595 */
560 if (!PageUptodate(page)) { 596 if (!PageUptodate(page)) {
561 if (sd->len < PAGE_CACHE_SIZE) { 597 if (this_len < PAGE_CACHE_SIZE) {
562 ret = mapping->a_ops->readpage(file, page); 598 ret = mapping->a_ops->readpage(file, page);
563 if (unlikely(ret)) 599 if (unlikely(ret))
564 goto out; 600 goto out;
@@ -582,7 +618,7 @@ find_page:
582 } 618 }
583 } 619 }
584 620
585 ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); 621 ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
586 if (ret == AOP_TRUNCATED_PAGE) { 622 if (ret == AOP_TRUNCATED_PAGE) {
587 page_cache_release(page); 623 page_cache_release(page);
588 goto find_page; 624 goto find_page;
@@ -592,18 +628,22 @@ find_page:
592 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 628 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) {
593 char *dst = kmap_atomic(page, KM_USER0); 629 char *dst = kmap_atomic(page, KM_USER0);
594 630
595 memcpy(dst + offset, src + buf->offset, sd->len); 631 memcpy(dst + offset, src + buf->offset, this_len);
596 flush_dcache_page(page); 632 flush_dcache_page(page);
597 kunmap_atomic(dst, KM_USER0); 633 kunmap_atomic(dst, KM_USER0);
598 } 634 }
599 635
600 ret = mapping->a_ops->commit_write(file, page, 0, sd->len); 636 ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
601 if (ret == AOP_TRUNCATED_PAGE) { 637 if (ret == AOP_TRUNCATED_PAGE) {
602 page_cache_release(page); 638 page_cache_release(page);
603 goto find_page; 639 goto find_page;
604 } else if (ret) 640 } else if (ret)
605 goto out; 641 goto out;
606 642
643 /*
644 * Return the number of bytes written.
645 */
646 ret = this_len;
607 mark_page_accessed(page); 647 mark_page_accessed(page);
608 balance_dirty_pages_ratelimited(mapping); 648 balance_dirty_pages_ratelimited(mapping);
609out: 649out:
@@ -616,17 +656,14 @@ out_nomem:
616 return ret; 656 return ret;
617} 657}
618 658
619typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
620 struct splice_desc *);
621
622/* 659/*
623 * Pipe input worker. Most of this logic works like a regular pipe, the 660 * Pipe input worker. Most of this logic works like a regular pipe, the
624 * key here is the 'actor' worker passed in that actually moves the data 661 * key here is the 'actor' worker passed in that actually moves the data
625 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 662 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
626 */ 663 */
627static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out, 664ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
628 loff_t *ppos, size_t len, unsigned int flags, 665 loff_t *ppos, size_t len, unsigned int flags,
629 splice_actor *actor) 666 splice_actor *actor)
630{ 667{
631 int ret, do_wakeup, err; 668 int ret, do_wakeup, err;
632 struct splice_desc sd; 669 struct splice_desc sd;
@@ -652,16 +689,22 @@ static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
652 sd.len = sd.total_len; 689 sd.len = sd.total_len;
653 690
654 err = actor(pipe, buf, &sd); 691 err = actor(pipe, buf, &sd);
655 if (err) { 692 if (err <= 0) {
656 if (!ret && err != -ENODATA) 693 if (!ret && err != -ENODATA)
657 ret = err; 694 ret = err;
658 695
659 break; 696 break;
660 } 697 }
661 698
662 ret += sd.len; 699 ret += err;
663 buf->offset += sd.len; 700 buf->offset += err;
664 buf->len -= sd.len; 701 buf->len -= err;
702
703 sd.len -= err;
704 sd.pos += err;
705 sd.total_len -= err;
706 if (sd.len)
707 continue;
665 708
666 if (!buf->len) { 709 if (!buf->len) {
667 buf->ops = NULL; 710 buf->ops = NULL;
@@ -672,8 +715,6 @@ static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
672 do_wakeup = 1; 715 do_wakeup = 1;
673 } 716 }
674 717
675 sd.pos += sd.len;
676 sd.total_len -= sd.len;
677 if (!sd.total_len) 718 if (!sd.total_len)
678 break; 719 break;
679 } 720 }
@@ -741,7 +782,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
741 struct address_space *mapping = out->f_mapping; 782 struct address_space *mapping = out->f_mapping;
742 ssize_t ret; 783 ssize_t ret;
743 784
744 ret = move_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); 785 ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
745 if (ret > 0) { 786 if (ret > 0) {
746 struct inode *inode = mapping->host; 787 struct inode *inode = mapping->host;
747 788
@@ -783,7 +824,7 @@ EXPORT_SYMBOL(generic_file_splice_write);
783ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, 824ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
784 loff_t *ppos, size_t len, unsigned int flags) 825 loff_t *ppos, size_t len, unsigned int flags)
785{ 826{
786 return move_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); 827 return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage);
787} 828}
788 829
789EXPORT_SYMBOL(generic_splice_sendpage); 830EXPORT_SYMBOL(generic_splice_sendpage);
@@ -870,7 +911,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
870 911
871 /* 912 /*
872 * We don't have an immediate reader, but we'll read the stuff 913 * We don't have an immediate reader, but we'll read the stuff
873 * out of the pipe right after the move_to_pipe(). So set 914 * out of the pipe right after the splice_to_pipe(). So set
874 * PIPE_READERS appropriately. 915 * PIPE_READERS appropriately.
875 */ 916 */
876 pipe->readers = 1; 917 pipe->readers = 1;
@@ -1010,6 +1051,174 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1010 return -EINVAL; 1051 return -EINVAL;
1011} 1052}
1012 1053
1054/*
1055 * Map an iov into an array of pages and offset/length tupples. With the
1056 * partial_page structure, we can map several non-contiguous ranges into
1057 * our ones pages[] map instead of splitting that operation into pieces.
1058 * Could easily be exported as a generic helper for other users, in which
1059 * case one would probably want to add a 'max_nr_pages' parameter as well.
1060 */
1061static int get_iovec_page_array(const struct iovec __user *iov,
1062 unsigned int nr_vecs, struct page **pages,
1063 struct partial_page *partial)
1064{
1065 int buffers = 0, error = 0;
1066
1067 /*
1068 * It's ok to take the mmap_sem for reading, even
1069 * across a "get_user()".
1070 */
1071 down_read(&current->mm->mmap_sem);
1072
1073 while (nr_vecs) {
1074 unsigned long off, npages;
1075 void __user *base;
1076 size_t len;
1077 int i;
1078
1079 /*
1080 * Get user address base and length for this iovec.
1081 */
1082 error = get_user(base, &iov->iov_base);
1083 if (unlikely(error))
1084 break;
1085 error = get_user(len, &iov->iov_len);
1086 if (unlikely(error))
1087 break;
1088
1089 /*
1090 * Sanity check this iovec. 0 read succeeds.
1091 */
1092 if (unlikely(!len))
1093 break;
1094 error = -EFAULT;
1095 if (unlikely(!base))
1096 break;
1097
1098 /*
1099 * Get this base offset and number of pages, then map
1100 * in the user pages.
1101 */
1102 off = (unsigned long) base & ~PAGE_MASK;
1103 npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1104 if (npages > PIPE_BUFFERS - buffers)
1105 npages = PIPE_BUFFERS - buffers;
1106
1107 error = get_user_pages(current, current->mm,
1108 (unsigned long) base, npages, 0, 0,
1109 &pages[buffers], NULL);
1110
1111 if (unlikely(error <= 0))
1112 break;
1113
1114 /*
1115 * Fill this contiguous range into the partial page map.
1116 */
1117 for (i = 0; i < error; i++) {
1118 const int plen = min_t(size_t, len, PAGE_SIZE) - off;
1119
1120 partial[buffers].offset = off;
1121 partial[buffers].len = plen;
1122
1123 off = 0;
1124 len -= plen;
1125 buffers++;
1126 }
1127
1128 /*
1129 * We didn't complete this iov, stop here since it probably
1130 * means we have to move some of this into a pipe to
1131 * be able to continue.
1132 */
1133 if (len)
1134 break;
1135
1136 /*
1137 * Don't continue if we mapped fewer pages than we asked for,
1138 * or if we mapped the max number of pages that we have
1139 * room for.
1140 */
1141 if (error < npages || buffers == PIPE_BUFFERS)
1142 break;
1143
1144 nr_vecs--;
1145 iov++;
1146 }
1147
1148 up_read(&current->mm->mmap_sem);
1149
1150 if (buffers)
1151 return buffers;
1152
1153 return error;
1154}
1155
1156/*
1157 * vmsplice splices a user address range into a pipe. It can be thought of
1158 * as splice-from-memory, where the regular splice is splice-from-file (or
1159 * to file). In both cases the output is a pipe, naturally.
1160 *
1161 * Note that vmsplice only supports splicing _from_ user memory to a pipe,
1162 * not the other way around. Splicing from user memory is a simple operation
1163 * that can be supported without any funky alignment restrictions or nasty
1164 * vm tricks. We simply map in the user memory and fill them into a pipe.
1165 * The reverse isn't quite as easy, though. There are two possible solutions
1166 * for that:
1167 *
1168 * - memcpy() the data internally, at which point we might as well just
1169 * do a regular read() on the buffer anyway.
1170 * - Lots of nasty vm tricks, that are neither fast nor flexible (it
1171 * has restriction limitations on both ends of the pipe).
1172 *
1173 * Alas, it isn't here.
1174 *
1175 */
1176static long do_vmsplice(struct file *file, const struct iovec __user *iov,
1177 unsigned long nr_segs, unsigned int flags)
1178{
1179 struct pipe_inode_info *pipe = file->f_dentry->d_inode->i_pipe;
1180 struct page *pages[PIPE_BUFFERS];
1181 struct partial_page partial[PIPE_BUFFERS];
1182 struct splice_pipe_desc spd = {
1183 .pages = pages,
1184 .partial = partial,
1185 .flags = flags,
1186 .ops = &user_page_pipe_buf_ops,
1187 };
1188
1189 if (unlikely(!pipe))
1190 return -EBADF;
1191 if (unlikely(nr_segs > UIO_MAXIOV))
1192 return -EINVAL;
1193 else if (unlikely(!nr_segs))
1194 return 0;
1195
1196 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial);
1197 if (spd.nr_pages <= 0)
1198 return spd.nr_pages;
1199
1200 return splice_to_pipe(pipe, &spd);
1201}
1202
1203asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
1204 unsigned long nr_segs, unsigned int flags)
1205{
1206 struct file *file;
1207 long error;
1208 int fput;
1209
1210 error = -EBADF;
1211 file = fget_light(fd, &fput);
1212 if (file) {
1213 if (file->f_mode & FMODE_WRITE)
1214 error = do_vmsplice(file, iov, nr_segs, flags);
1215
1216 fput_light(file, fput);
1217 }
1218
1219 return error;
1220}
1221
1013asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, 1222asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
1014 int fd_out, loff_t __user *off_out, 1223 int fd_out, loff_t __user *off_out,
1015 size_t len, unsigned int flags) 1224 size_t len, unsigned int flags)
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index d81d6cfc1bb4..eb4b152c82fc 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -321,8 +321,9 @@
321#define __NR_splice 313 321#define __NR_splice 313
322#define __NR_sync_file_range 314 322#define __NR_sync_file_range 314
323#define __NR_tee 315 323#define __NR_tee 315
324#define __NR_vmsplice 316
324 325
325#define NR_syscalls 316 326#define NR_syscalls 317
326 327
327/* 328/*
328 * user-visible error numbers are in the range -1 - -128: see 329 * user-visible error numbers are in the range -1 - -128: see
diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h
index a40ebec6aeeb..7107763168bf 100644
--- a/include/asm-ia64/unistd.h
+++ b/include/asm-ia64/unistd.h
@@ -290,12 +290,13 @@
290#define __NR_get_robust_list 1299 290#define __NR_get_robust_list 1299
291#define __NR_sync_file_range 1300 291#define __NR_sync_file_range 1300
292#define __NR_tee 1301 292#define __NR_tee 1301
293#define __NR_vmsplice 1302
293 294
294#ifdef __KERNEL__ 295#ifdef __KERNEL__
295 296
296#include <linux/config.h> 297#include <linux/config.h>
297 298
298#define NR_syscalls 278 /* length of syscall table */ 299#define NR_syscalls 279 /* length of syscall table */
299 300
300#define __ARCH_WANT_SYS_RT_SIGACTION 301#define __ARCH_WANT_SYS_RT_SIGACTION
301 302
diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h
index c612f1a62772..34325e292596 100644
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -303,8 +303,9 @@
303#define __NR_unshare 282 303#define __NR_unshare 282
304#define __NR_splice 283 304#define __NR_splice 283
305#define __NR_tee 284 305#define __NR_tee 284
306#define __NR_vmsplice 285
306 307
307#define __NR_syscalls 285 308#define __NR_syscalls 286
308 309
309#ifdef __KERNEL__ 310#ifdef __KERNEL__
310#define __NR__exit __NR_exit 311#define __NR__exit __NR_exit
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index 98c36eae567c..feb77cb8c044 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -615,8 +615,10 @@ __SYSCALL(__NR_splice, sys_splice)
615__SYSCALL(__NR_tee, sys_tee) 615__SYSCALL(__NR_tee, sys_tee)
616#define __NR_sync_file_range 277 616#define __NR_sync_file_range 277
617__SYSCALL(__NR_sync_file_range, sys_sync_file_range) 617__SYSCALL(__NR_sync_file_range, sys_sync_file_range)
618#define __NR_vmsplice 278
619__SYSCALL(__NR_vmsplice, sys_vmsplice)
618 620
619#define __NR_syscall_max __NR_sync_file_range 621#define __NR_syscall_max __NR_vmsplice
620 622
621#ifndef __NO_STUBS 623#ifndef __NO_STUBS
622 624
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index ef7f33c0be19..0008d4bd4059 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -61,4 +61,21 @@ void __free_pipe_info(struct pipe_inode_info *);
61 /* from/to, of course */ 61 /* from/to, of course */
62#define SPLICE_F_MORE (0x04) /* expect more data */ 62#define SPLICE_F_MORE (0x04) /* expect more data */
63 63
64/*
65 * Passed to the actors
66 */
67struct splice_desc {
68 unsigned int len, total_len; /* current and remaining length */
69 unsigned int flags; /* splice flags */
70 struct file *file; /* file to read/write */
71 loff_t pos; /* file position */
72};
73
74typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
75 struct splice_desc *);
76
77extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
78 loff_t *, size_t, unsigned int,
79 splice_actor *);
80
64#endif 81#endif
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index d3ebc0e68b2b..3996960fc565 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -574,6 +574,9 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
574 int fd_out, loff_t __user *off_out, 574 int fd_out, loff_t __user *off_out,
575 size_t len, unsigned int flags); 575 size_t len, unsigned int flags);
576 576
577asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
578 unsigned long nr_segs, unsigned int flags);
579
577asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags); 580asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags);
578 581
579asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, 582asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,