aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-10 16:51:06 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-10 16:51:06 -0400
commit01370f0603f8435d415a19f7e62d1bab826c3589 (patch)
treed3ce7c36c6f9e33bd1d8328ef58f2fca41a18cb3
parent5cbc39a726eafa1198c18adb3cf56ccee371dba1 (diff)
parent0845718dafea3e16041d270c256e8516acf4e13d (diff)
Merge branch 'splice-2.6.23' of git://git.kernel.dk/data/git/linux-2.6-block
* 'splice-2.6.23' of git://git.kernel.dk/data/git/linux-2.6-block: pipe: add documentation and comments pipe: change the ->pin() operation to ->confirm() Remove remnants of sendfile() xip sendfile removal splice: completely document external interface with kerneldoc sendfile: remove bad_sendfile() from bad_file_ops shmem: convert to using splice instead of sendfile() relay: use splice_to_pipe() instead of open-coding the pipe loop pipe: allow passing around of ops private pointer splice: divorce the splice structure/function definitions from the pipe header splice: relay support sendfile: convert nfsd to splice_direct_to_actor() sendfile: convert nfs to using splice_read() loop: convert to using splice_direct_to_actor() instead of sendfile() splice: add void cookie to the actor data sendfile: kill generic_file_sendfile() sendfile: remove .sendfile from filesystems that use generic_file_sendfile() sys_sendfile: switch to using ->splice_read, if available vmsplice: add vmsplice-to-user support splice: abstract out actor data
-rw-r--r--Documentation/DocBook/kernel-api.tmpl11
-rw-r--r--drivers/block/loop.c64
-rw-r--r--drivers/char/mem.c2
-rw-r--r--fs/adfs/file.c2
-rw-r--r--fs/affs/file.c2
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/bad_inode.c7
-rw-r--r--fs/bfs/file.c2
-rw-r--r--fs/block_dev.c1
-rw-r--r--fs/cifs/cifsfs.c8
-rw-r--r--fs/coda/file.c11
-rw-r--r--fs/ecryptfs/file.c15
-rw-r--r--fs/ext2/file.c2
-rw-r--r--fs/ext3/file.c1
-rw-r--r--fs/ext4/file.c1
-rw-r--r--fs/fat/file.c2
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/gfs2/ops_file.c1
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/hpfs/file.c2
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jfs/file.c1
-rw-r--r--fs/minix/file.c2
-rw-r--r--fs/nfs/file.c15
-rw-r--r--fs/nfsd/vfs.c47
-rw-r--r--fs/ntfs/file.c2
-rw-r--r--fs/ocfs2/file.c18
-rw-r--r--fs/pipe.c70
-rw-r--r--fs/qnx4/file.c2
-rw-r--r--fs/ramfs/file-mmu.c2
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/read_write.c20
-rw-r--r--fs/reiserfs/file.c1
-rw-r--r--fs/smbfs/file.c9
-rw-r--r--fs/splice.c413
-rw-r--r--fs/sysv/file.c2
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/ufs/file.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c26
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c44
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h6
-rw-r--r--fs/xfs/xfs_vnodeops.c3
-rw-r--r--include/linux/fs.h7
-rw-r--r--include/linux/pipe_fs_i.h117
-rw-r--r--include/linux/splice.h73
-rw-r--r--include/linux/sunrpc/svc.h2
-rw-r--r--kernel/relay.c205
-rw-r--r--mm/filemap.c20
-rw-r--r--mm/filemap_xip.c22
-rw-r--r--mm/shmem.c42
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c2
-rw-r--r--net/sunrpc/svc.c2
56 files changed, 874 insertions, 459 deletions
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index 38f88b6ae405..8c5698a8c2e1 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -643,4 +643,15 @@ X!Idrivers/video/console/fonts.c
643!Edrivers/spi/spi.c 643!Edrivers/spi/spi.c
644 </chapter> 644 </chapter>
645 645
646 <chapter id="splice">
647 <title>splice API</title>
648 <para>)
649 splice is a method for moving blocks of data around inside the
650 kernel, without continually transferring it between the kernel
651 and user space.
652 </para>
653!Iinclude/linux/splice.h
654!Ffs/splice.c
655 </chapter>
656
646</book> 657</book>
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 0ed5470d2533..4503290da407 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -74,6 +74,7 @@
74#include <linux/highmem.h> 74#include <linux/highmem.h>
75#include <linux/gfp.h> 75#include <linux/gfp.h>
76#include <linux/kthread.h> 76#include <linux/kthread.h>
77#include <linux/splice.h>
77 78
78#include <asm/uaccess.h> 79#include <asm/uaccess.h>
79 80
@@ -401,50 +402,73 @@ struct lo_read_data {
401}; 402};
402 403
403static int 404static int
404lo_read_actor(read_descriptor_t *desc, struct page *page, 405lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
405 unsigned long offset, unsigned long size) 406 struct splice_desc *sd)
406{ 407{
407 unsigned long count = desc->count; 408 struct lo_read_data *p = sd->u.data;
408 struct lo_read_data *p = desc->arg.data;
409 struct loop_device *lo = p->lo; 409 struct loop_device *lo = p->lo;
410 struct page *page = buf->page;
410 sector_t IV; 411 sector_t IV;
412 size_t size;
413 int ret;
411 414
412 IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9); 415 ret = buf->ops->confirm(pipe, buf);
416 if (unlikely(ret))
417 return ret;
413 418
414 if (size > count) 419 IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
415 size = count; 420 (buf->offset >> 9);
421 size = sd->len;
422 if (size > p->bsize)
423 size = p->bsize;
416 424
417 if (lo_do_transfer(lo, READ, page, offset, p->page, p->offset, size, IV)) { 425 if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) {
418 size = 0;
419 printk(KERN_ERR "loop: transfer error block %ld\n", 426 printk(KERN_ERR "loop: transfer error block %ld\n",
420 page->index); 427 page->index);
421 desc->error = -EINVAL; 428 size = -EINVAL;
422 } 429 }
423 430
424 flush_dcache_page(p->page); 431 flush_dcache_page(p->page);
425 432
426 desc->count = count - size; 433 if (size > 0)
427 desc->written += size; 434 p->offset += size;
428 p->offset += size; 435
429 return size; 436 return size;
430} 437}
431 438
432static int 439static int
440lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd)
441{
442 return __splice_from_pipe(pipe, sd, lo_splice_actor);
443}
444
445static int
433do_lo_receive(struct loop_device *lo, 446do_lo_receive(struct loop_device *lo,
434 struct bio_vec *bvec, int bsize, loff_t pos) 447 struct bio_vec *bvec, int bsize, loff_t pos)
435{ 448{
436 struct lo_read_data cookie; 449 struct lo_read_data cookie;
450 struct splice_desc sd;
437 struct file *file; 451 struct file *file;
438 int retval; 452 long retval;
439 453
440 cookie.lo = lo; 454 cookie.lo = lo;
441 cookie.page = bvec->bv_page; 455 cookie.page = bvec->bv_page;
442 cookie.offset = bvec->bv_offset; 456 cookie.offset = bvec->bv_offset;
443 cookie.bsize = bsize; 457 cookie.bsize = bsize;
458
459 sd.len = 0;
460 sd.total_len = bvec->bv_len;
461 sd.flags = 0;
462 sd.pos = pos;
463 sd.u.data = &cookie;
464
444 file = lo->lo_backing_file; 465 file = lo->lo_backing_file;
445 retval = file->f_op->sendfile(file, &pos, bvec->bv_len, 466 retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor);
446 lo_read_actor, &cookie); 467
447 return (retval < 0)? retval: 0; 468 if (retval < 0)
469 return retval;
470
471 return 0;
448} 472}
449 473
450static int 474static int
@@ -679,8 +703,8 @@ static int loop_change_fd(struct loop_device *lo, struct file *lo_file,
679 if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) 703 if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
680 goto out_putf; 704 goto out_putf;
681 705
682 /* new backing store needs to support loop (eg sendfile) */ 706 /* new backing store needs to support loop (eg splice_read) */
683 if (!inode->i_fop->sendfile) 707 if (!inode->i_fop->splice_read)
684 goto out_putf; 708 goto out_putf;
685 709
686 /* size of the new backing store needs to be the same */ 710 /* size of the new backing store needs to be the same */
@@ -760,7 +784,7 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file,
760 * If we can't read - sorry. If we only can't write - well, 784 * If we can't read - sorry. If we only can't write - well,
761 * it's going to be read-only. 785 * it's going to be read-only.
762 */ 786 */
763 if (!file->f_op->sendfile) 787 if (!file->f_op->splice_read)
764 goto out_putf; 788 goto out_putf;
765 if (aops->prepare_write && aops->commit_write) 789 if (aops->prepare_write && aops->commit_write)
766 lo_flags |= LO_FLAGS_USE_AOPS; 790 lo_flags |= LO_FLAGS_USE_AOPS;
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index cc9a9d0df979..d2e4cfd79f27 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -24,7 +24,7 @@
24#include <linux/crash_dump.h> 24#include <linux/crash_dump.h>
25#include <linux/backing-dev.h> 25#include <linux/backing-dev.h>
26#include <linux/bootmem.h> 26#include <linux/bootmem.h>
27#include <linux/pipe_fs_i.h> 27#include <linux/splice.h>
28#include <linux/pfn.h> 28#include <linux/pfn.h>
29 29
30#include <asm/uaccess.h> 30#include <asm/uaccess.h>
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index f544a2855923..36e381c6a99a 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -33,7 +33,7 @@ const struct file_operations adfs_file_operations = {
33 .fsync = file_fsync, 33 .fsync = file_fsync,
34 .write = do_sync_write, 34 .write = do_sync_write,
35 .aio_write = generic_file_aio_write, 35 .aio_write = generic_file_aio_write,
36 .sendfile = generic_file_sendfile, 36 .splice_read = generic_file_splice_read,
37}; 37};
38 38
39const struct inode_operations adfs_file_inode_operations = { 39const struct inode_operations adfs_file_inode_operations = {
diff --git a/fs/affs/file.c b/fs/affs/file.c
index c8796906f584..c314a35f0918 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -35,7 +35,7 @@ const struct file_operations affs_file_operations = {
35 .open = affs_file_open, 35 .open = affs_file_open,
36 .release = affs_file_release, 36 .release = affs_file_release,
37 .fsync = file_fsync, 37 .fsync = file_fsync,
38 .sendfile = generic_file_sendfile, 38 .splice_read = generic_file_splice_read,
39}; 39};
40 40
41const struct inode_operations affs_file_inode_operations = { 41const struct inode_operations affs_file_inode_operations = {
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 9c0e721d9fc2..aede7eb66dd4 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -32,7 +32,7 @@ const struct file_operations afs_file_operations = {
32 .aio_read = generic_file_aio_read, 32 .aio_read = generic_file_aio_read,
33 .aio_write = afs_file_write, 33 .aio_write = afs_file_write,
34 .mmap = generic_file_readonly_mmap, 34 .mmap = generic_file_readonly_mmap,
35 .sendfile = generic_file_sendfile, 35 .splice_read = generic_file_splice_read,
36 .fsync = afs_fsync, 36 .fsync = afs_fsync,
37}; 37};
38 38
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 329ee473eede..521ff7caadbd 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -114,12 +114,6 @@ static int bad_file_lock(struct file *file, int cmd, struct file_lock *fl)
114 return -EIO; 114 return -EIO;
115} 115}
116 116
117static ssize_t bad_file_sendfile(struct file *in_file, loff_t *ppos,
118 size_t count, read_actor_t actor, void *target)
119{
120 return -EIO;
121}
122
123static ssize_t bad_file_sendpage(struct file *file, struct page *page, 117static ssize_t bad_file_sendpage(struct file *file, struct page *page,
124 int off, size_t len, loff_t *pos, int more) 118 int off, size_t len, loff_t *pos, int more)
125{ 119{
@@ -182,7 +176,6 @@ static const struct file_operations bad_file_ops =
182 .aio_fsync = bad_file_aio_fsync, 176 .aio_fsync = bad_file_aio_fsync,
183 .fasync = bad_file_fasync, 177 .fasync = bad_file_fasync,
184 .lock = bad_file_lock, 178 .lock = bad_file_lock,
185 .sendfile = bad_file_sendfile,
186 .sendpage = bad_file_sendpage, 179 .sendpage = bad_file_sendpage,
187 .get_unmapped_area = bad_file_get_unmapped_area, 180 .get_unmapped_area = bad_file_get_unmapped_area,
188 .check_flags = bad_file_check_flags, 181 .check_flags = bad_file_check_flags,
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index ef4d1fa04e65..24310e9ee05a 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -24,7 +24,7 @@ const struct file_operations bfs_file_operations = {
24 .write = do_sync_write, 24 .write = do_sync_write,
25 .aio_write = generic_file_aio_write, 25 .aio_write = generic_file_aio_write,
26 .mmap = generic_file_mmap, 26 .mmap = generic_file_mmap,
27 .sendfile = generic_file_sendfile, 27 .splice_read = generic_file_splice_read,
28}; 28};
29 29
30static int bfs_move_block(unsigned long from, unsigned long to, struct super_block *sb) 30static int bfs_move_block(unsigned long from, unsigned long to, struct super_block *sb)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ea1480a16f51..b3e9bfa748cf 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1346,7 +1346,6 @@ const struct file_operations def_blk_fops = {
1346#ifdef CONFIG_COMPAT 1346#ifdef CONFIG_COMPAT
1347 .compat_ioctl = compat_blkdev_ioctl, 1347 .compat_ioctl = compat_blkdev_ioctl,
1348#endif 1348#endif
1349 .sendfile = generic_file_sendfile,
1350 .splice_read = generic_file_splice_read, 1349 .splice_read = generic_file_splice_read,
1351 .splice_write = generic_file_splice_write, 1350 .splice_write = generic_file_splice_write,
1352}; 1351};
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 7c04752b76cb..8b0cbf4a4ad0 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -616,7 +616,7 @@ const struct file_operations cifs_file_ops = {
616 .fsync = cifs_fsync, 616 .fsync = cifs_fsync,
617 .flush = cifs_flush, 617 .flush = cifs_flush,
618 .mmap = cifs_file_mmap, 618 .mmap = cifs_file_mmap,
619 .sendfile = generic_file_sendfile, 619 .splice_read = generic_file_splice_read,
620 .llseek = cifs_llseek, 620 .llseek = cifs_llseek,
621#ifdef CONFIG_CIFS_POSIX 621#ifdef CONFIG_CIFS_POSIX
622 .ioctl = cifs_ioctl, 622 .ioctl = cifs_ioctl,
@@ -637,7 +637,7 @@ const struct file_operations cifs_file_direct_ops = {
637 .lock = cifs_lock, 637 .lock = cifs_lock,
638 .fsync = cifs_fsync, 638 .fsync = cifs_fsync,
639 .flush = cifs_flush, 639 .flush = cifs_flush,
640 .sendfile = generic_file_sendfile, /* BB removeme BB */ 640 .splice_read = generic_file_splice_read,
641#ifdef CONFIG_CIFS_POSIX 641#ifdef CONFIG_CIFS_POSIX
642 .ioctl = cifs_ioctl, 642 .ioctl = cifs_ioctl,
643#endif /* CONFIG_CIFS_POSIX */ 643#endif /* CONFIG_CIFS_POSIX */
@@ -656,7 +656,7 @@ const struct file_operations cifs_file_nobrl_ops = {
656 .fsync = cifs_fsync, 656 .fsync = cifs_fsync,
657 .flush = cifs_flush, 657 .flush = cifs_flush,
658 .mmap = cifs_file_mmap, 658 .mmap = cifs_file_mmap,
659 .sendfile = generic_file_sendfile, 659 .splice_read = generic_file_splice_read,
660 .llseek = cifs_llseek, 660 .llseek = cifs_llseek,
661#ifdef CONFIG_CIFS_POSIX 661#ifdef CONFIG_CIFS_POSIX
662 .ioctl = cifs_ioctl, 662 .ioctl = cifs_ioctl,
@@ -676,7 +676,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
676 .release = cifs_close, 676 .release = cifs_close,
677 .fsync = cifs_fsync, 677 .fsync = cifs_fsync,
678 .flush = cifs_flush, 678 .flush = cifs_flush,
679 .sendfile = generic_file_sendfile, /* BB removeme BB */ 679 .splice_read = generic_file_splice_read,
680#ifdef CONFIG_CIFS_POSIX 680#ifdef CONFIG_CIFS_POSIX
681 .ioctl = cifs_ioctl, 681 .ioctl = cifs_ioctl,
682#endif /* CONFIG_CIFS_POSIX */ 682#endif /* CONFIG_CIFS_POSIX */
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 5ef2b609ec7d..99dbe866816d 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -47,8 +47,9 @@ coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *p
47} 47}
48 48
49static ssize_t 49static ssize_t
50coda_file_sendfile(struct file *coda_file, loff_t *ppos, size_t count, 50coda_file_splice_read(struct file *coda_file, loff_t *ppos,
51 read_actor_t actor, void *target) 51 struct pipe_inode_info *pipe, size_t count,
52 unsigned int flags)
52{ 53{
53 struct coda_file_info *cfi; 54 struct coda_file_info *cfi;
54 struct file *host_file; 55 struct file *host_file;
@@ -57,10 +58,10 @@ coda_file_sendfile(struct file *coda_file, loff_t *ppos, size_t count,
57 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); 58 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
58 host_file = cfi->cfi_container; 59 host_file = cfi->cfi_container;
59 60
60 if (!host_file->f_op || !host_file->f_op->sendfile) 61 if (!host_file->f_op || !host_file->f_op->splice_read)
61 return -EINVAL; 62 return -EINVAL;
62 63
63 return host_file->f_op->sendfile(host_file, ppos, count, actor, target); 64 return host_file->f_op->splice_read(host_file, ppos, pipe, count,flags);
64} 65}
65 66
66static ssize_t 67static ssize_t
@@ -295,6 +296,6 @@ const struct file_operations coda_file_operations = {
295 .flush = coda_flush, 296 .flush = coda_flush,
296 .release = coda_release, 297 .release = coda_release,
297 .fsync = coda_fsync, 298 .fsync = coda_fsync,
298 .sendfile = coda_file_sendfile, 299 .splice_read = coda_file_splice_read,
299}; 300};
300 301
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 59288d817078..94f456fe4d9b 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -338,16 +338,17 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
338 return rc; 338 return rc;
339} 339}
340 340
341static ssize_t ecryptfs_sendfile(struct file *file, loff_t * ppos, 341static ssize_t ecryptfs_splice_read(struct file *file, loff_t * ppos,
342 size_t count, read_actor_t actor, void *target) 342 struct pipe_inode_info *pipe, size_t count,
343 unsigned int flags)
343{ 344{
344 struct file *lower_file = NULL; 345 struct file *lower_file = NULL;
345 int rc = -EINVAL; 346 int rc = -EINVAL;
346 347
347 lower_file = ecryptfs_file_to_lower(file); 348 lower_file = ecryptfs_file_to_lower(file);
348 if (lower_file->f_op && lower_file->f_op->sendfile) 349 if (lower_file->f_op && lower_file->f_op->splice_read)
349 rc = lower_file->f_op->sendfile(lower_file, ppos, count, 350 rc = lower_file->f_op->splice_read(lower_file, ppos, pipe,
350 actor, target); 351 count, flags);
351 352
352 return rc; 353 return rc;
353} 354}
@@ -364,7 +365,7 @@ const struct file_operations ecryptfs_dir_fops = {
364 .release = ecryptfs_release, 365 .release = ecryptfs_release,
365 .fsync = ecryptfs_fsync, 366 .fsync = ecryptfs_fsync,
366 .fasync = ecryptfs_fasync, 367 .fasync = ecryptfs_fasync,
367 .sendfile = ecryptfs_sendfile, 368 .splice_read = ecryptfs_splice_read,
368}; 369};
369 370
370const struct file_operations ecryptfs_main_fops = { 371const struct file_operations ecryptfs_main_fops = {
@@ -381,7 +382,7 @@ const struct file_operations ecryptfs_main_fops = {
381 .release = ecryptfs_release, 382 .release = ecryptfs_release,
382 .fsync = ecryptfs_fsync, 383 .fsync = ecryptfs_fsync,
383 .fasync = ecryptfs_fasync, 384 .fasync = ecryptfs_fasync,
384 .sendfile = ecryptfs_sendfile, 385 .splice_read = ecryptfs_splice_read,
385}; 386};
386 387
387static int 388static int
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 566d4e2d3852..04afeecaaef3 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -53,7 +53,6 @@ const struct file_operations ext2_file_operations = {
53 .open = generic_file_open, 53 .open = generic_file_open,
54 .release = ext2_release_file, 54 .release = ext2_release_file,
55 .fsync = ext2_sync_file, 55 .fsync = ext2_sync_file,
56 .sendfile = generic_file_sendfile,
57 .splice_read = generic_file_splice_read, 56 .splice_read = generic_file_splice_read,
58 .splice_write = generic_file_splice_write, 57 .splice_write = generic_file_splice_write,
59}; 58};
@@ -71,7 +70,6 @@ const struct file_operations ext2_xip_file_operations = {
71 .open = generic_file_open, 70 .open = generic_file_open,
72 .release = ext2_release_file, 71 .release = ext2_release_file,
73 .fsync = ext2_sync_file, 72 .fsync = ext2_sync_file,
74 .sendfile = xip_file_sendfile,
75}; 73};
76#endif 74#endif
77 75
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 1e6f13864536..acc4913d3019 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -120,7 +120,6 @@ const struct file_operations ext3_file_operations = {
120 .open = generic_file_open, 120 .open = generic_file_open,
121 .release = ext3_release_file, 121 .release = ext3_release_file,
122 .fsync = ext3_sync_file, 122 .fsync = ext3_sync_file,
123 .sendfile = generic_file_sendfile,
124 .splice_read = generic_file_splice_read, 123 .splice_read = generic_file_splice_read,
125 .splice_write = generic_file_splice_write, 124 .splice_write = generic_file_splice_write,
126}; 125};
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3c6c1fd2be90..d4c8186aed64 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -120,7 +120,6 @@ const struct file_operations ext4_file_operations = {
120 .open = generic_file_open, 120 .open = generic_file_open,
121 .release = ext4_release_file, 121 .release = ext4_release_file,
122 .fsync = ext4_sync_file, 122 .fsync = ext4_sync_file,
123 .sendfile = generic_file_sendfile,
124 .splice_read = generic_file_splice_read, 123 .splice_read = generic_file_splice_read,
125 .splice_write = generic_file_splice_write, 124 .splice_write = generic_file_splice_write,
126}; 125};
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 55d3c7461c5b..69a83b59dce8 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -134,7 +134,7 @@ const struct file_operations fat_file_operations = {
134 .release = fat_file_release, 134 .release = fat_file_release,
135 .ioctl = fat_generic_ioctl, 135 .ioctl = fat_generic_ioctl,
136 .fsync = file_fsync, 136 .fsync = file_fsync,
137 .sendfile = generic_file_sendfile, 137 .splice_read = generic_file_splice_read,
138}; 138};
139 139
140static int fat_cont_expand(struct inode *inode, loff_t size) 140static int fat_cont_expand(struct inode *inode, loff_t size)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index adf7995232b8..f79de7c8cdfa 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -802,7 +802,7 @@ static const struct file_operations fuse_file_operations = {
802 .release = fuse_release, 802 .release = fuse_release,
803 .fsync = fuse_fsync, 803 .fsync = fuse_fsync,
804 .lock = fuse_file_lock, 804 .lock = fuse_file_lock,
805 .sendfile = generic_file_sendfile, 805 .splice_read = generic_file_splice_read,
806}; 806};
807 807
808static const struct file_operations fuse_direct_io_file_operations = { 808static const struct file_operations fuse_direct_io_file_operations = {
@@ -814,7 +814,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
814 .release = fuse_release, 814 .release = fuse_release,
815 .fsync = fuse_fsync, 815 .fsync = fuse_fsync,
816 .lock = fuse_file_lock, 816 .lock = fuse_file_lock,
817 /* no mmap and sendfile */ 817 /* no mmap and splice_read */
818}; 818};
819 819
820static const struct address_space_operations fuse_file_aops = { 820static const struct address_space_operations fuse_file_aops = {
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 064df8804582..7dc3be108204 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -635,7 +635,6 @@ const struct file_operations gfs2_file_fops = {
635 .release = gfs2_close, 635 .release = gfs2_close,
636 .fsync = gfs2_fsync, 636 .fsync = gfs2_fsync,
637 .lock = gfs2_lock, 637 .lock = gfs2_lock,
638 .sendfile = generic_file_sendfile,
639 .flock = gfs2_flock, 638 .flock = gfs2_flock,
640 .splice_read = generic_file_splice_read, 639 .splice_read = generic_file_splice_read,
641 .splice_write = generic_file_splice_write, 640 .splice_write = generic_file_splice_write,
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 9a934db0bd8a..bc835f272a6e 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -607,7 +607,7 @@ static const struct file_operations hfs_file_operations = {
607 .write = do_sync_write, 607 .write = do_sync_write,
608 .aio_write = generic_file_aio_write, 608 .aio_write = generic_file_aio_write,
609 .mmap = generic_file_mmap, 609 .mmap = generic_file_mmap,
610 .sendfile = generic_file_sendfile, 610 .splice_read = generic_file_splice_read,
611 .fsync = file_fsync, 611 .fsync = file_fsync,
612 .open = hfs_file_open, 612 .open = hfs_file_open,
613 .release = hfs_file_release, 613 .release = hfs_file_release,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 45dab5d6cc10..409ce5429c91 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -288,7 +288,7 @@ static const struct file_operations hfsplus_file_operations = {
288 .write = do_sync_write, 288 .write = do_sync_write,
289 .aio_write = generic_file_aio_write, 289 .aio_write = generic_file_aio_write,
290 .mmap = generic_file_mmap, 290 .mmap = generic_file_mmap,
291 .sendfile = generic_file_sendfile, 291 .splice_read = generic_file_splice_read,
292 .fsync = file_fsync, 292 .fsync = file_fsync,
293 .open = hfsplus_file_open, 293 .open = hfsplus_file_open,
294 .release = hfsplus_file_release, 294 .release = hfsplus_file_release,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 8286491dbf31..c77862032e84 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -390,7 +390,7 @@ int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
390static const struct file_operations hostfs_file_fops = { 390static const struct file_operations hostfs_file_fops = {
391 .llseek = generic_file_llseek, 391 .llseek = generic_file_llseek,
392 .read = do_sync_read, 392 .read = do_sync_read,
393 .sendfile = generic_file_sendfile, 393 .splice_read = generic_file_splice_read,
394 .aio_read = generic_file_aio_read, 394 .aio_read = generic_file_aio_read,
395 .aio_write = generic_file_aio_write, 395 .aio_write = generic_file_aio_write,
396 .write = do_sync_write, 396 .write = do_sync_write,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index b4eafc0f1e54..5b53e5c5d8df 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -129,7 +129,7 @@ const struct file_operations hpfs_file_ops =
129 .mmap = generic_file_mmap, 129 .mmap = generic_file_mmap,
130 .release = hpfs_file_release, 130 .release = hpfs_file_release,
131 .fsync = hpfs_file_fsync, 131 .fsync = hpfs_file_fsync,
132 .sendfile = generic_file_sendfile, 132 .splice_read = generic_file_splice_read,
133}; 133};
134 134
135const struct inode_operations hpfs_file_iops = 135const struct inode_operations hpfs_file_iops =
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 99871279a1ed..c2530197be0c 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -47,7 +47,7 @@ const struct file_operations jffs2_file_operations =
47 .ioctl = jffs2_ioctl, 47 .ioctl = jffs2_ioctl,
48 .mmap = generic_file_readonly_mmap, 48 .mmap = generic_file_readonly_mmap,
49 .fsync = jffs2_fsync, 49 .fsync = jffs2_fsync,
50 .sendfile = generic_file_sendfile 50 .splice_read = generic_file_splice_read,
51}; 51};
52 52
53/* jffs2_file_inode_operations */ 53/* jffs2_file_inode_operations */
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index f7f8eff19b7b..87eb93694af7 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -108,7 +108,6 @@ const struct file_operations jfs_file_operations = {
108 .aio_read = generic_file_aio_read, 108 .aio_read = generic_file_aio_read,
109 .aio_write = generic_file_aio_write, 109 .aio_write = generic_file_aio_write,
110 .mmap = generic_file_mmap, 110 .mmap = generic_file_mmap,
111 .sendfile = generic_file_sendfile,
112 .splice_read = generic_file_splice_read, 111 .splice_read = generic_file_splice_read,
113 .splice_write = generic_file_splice_write, 112 .splice_write = generic_file_splice_write,
114 .fsync = jfs_fsync, 113 .fsync = jfs_fsync,
diff --git a/fs/minix/file.c b/fs/minix/file.c
index f92baa1d7570..17765f697e50 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -23,7 +23,7 @@ const struct file_operations minix_file_operations = {
23 .aio_write = generic_file_aio_write, 23 .aio_write = generic_file_aio_write,
24 .mmap = generic_file_mmap, 24 .mmap = generic_file_mmap,
25 .fsync = minix_sync_file, 25 .fsync = minix_sync_file,
26 .sendfile = generic_file_sendfile, 26 .splice_read = generic_file_splice_read,
27}; 27};
28 28
29const struct inode_operations minix_file_inode_operations = { 29const struct inode_operations minix_file_inode_operations = {
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 9eb8eb4e4a08..8689b736fdd9 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -41,7 +41,9 @@ static int nfs_file_open(struct inode *, struct file *);
41static int nfs_file_release(struct inode *, struct file *); 41static int nfs_file_release(struct inode *, struct file *);
42static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin); 42static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
43static int nfs_file_mmap(struct file *, struct vm_area_struct *); 43static int nfs_file_mmap(struct file *, struct vm_area_struct *);
44static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); 44static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos,
45 struct pipe_inode_info *pipe,
46 size_t count, unsigned int flags);
45static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov, 47static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov,
46 unsigned long nr_segs, loff_t pos); 48 unsigned long nr_segs, loff_t pos);
47static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, 49static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
@@ -65,7 +67,7 @@ const struct file_operations nfs_file_operations = {
65 .fsync = nfs_fsync, 67 .fsync = nfs_fsync,
66 .lock = nfs_lock, 68 .lock = nfs_lock,
67 .flock = nfs_flock, 69 .flock = nfs_flock,
68 .sendfile = nfs_file_sendfile, 70 .splice_read = nfs_file_splice_read,
69 .check_flags = nfs_check_flags, 71 .check_flags = nfs_check_flags,
70}; 72};
71 73
@@ -224,20 +226,21 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
224} 226}
225 227
226static ssize_t 228static ssize_t
227nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count, 229nfs_file_splice_read(struct file *filp, loff_t *ppos,
228 read_actor_t actor, void *target) 230 struct pipe_inode_info *pipe, size_t count,
231 unsigned int flags)
229{ 232{
230 struct dentry *dentry = filp->f_path.dentry; 233 struct dentry *dentry = filp->f_path.dentry;
231 struct inode *inode = dentry->d_inode; 234 struct inode *inode = dentry->d_inode;
232 ssize_t res; 235 ssize_t res;
233 236
234 dfprintk(VFS, "nfs: sendfile(%s/%s, %lu@%Lu)\n", 237 dfprintk(VFS, "nfs: splice_read(%s/%s, %lu@%Lu)\n",
235 dentry->d_parent->d_name.name, dentry->d_name.name, 238 dentry->d_parent->d_name.name, dentry->d_name.name,
236 (unsigned long) count, (unsigned long long) *ppos); 239 (unsigned long) count, (unsigned long long) *ppos);
237 240
238 res = nfs_revalidate_mapping(inode, filp->f_mapping); 241 res = nfs_revalidate_mapping(inode, filp->f_mapping);
239 if (!res) 242 if (!res)
240 res = generic_file_sendfile(filp, ppos, count, actor, target); 243 res = generic_file_splice_read(filp, ppos, pipe, count, flags);
241 return res; 244 return res;
242} 245}
243 246
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7e6aa245b5d5..8604e35bd48e 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -23,7 +23,7 @@
23#include <linux/file.h> 23#include <linux/file.h>
24#include <linux/mount.h> 24#include <linux/mount.h>
25#include <linux/major.h> 25#include <linux/major.h>
26#include <linux/ext2_fs.h> 26#include <linux/splice.h>
27#include <linux/proc_fs.h> 27#include <linux/proc_fs.h>
28#include <linux/stat.h> 28#include <linux/stat.h>
29#include <linux/fcntl.h> 29#include <linux/fcntl.h>
@@ -801,26 +801,32 @@ found:
801} 801}
802 802
803/* 803/*
804 * Grab and keep cached pages assosiated with a file in the svc_rqst 804 * Grab and keep cached pages associated with a file in the svc_rqst
805 * so that they can be passed to the netowork sendmsg/sendpage routines 805 * so that they can be passed to the network sendmsg/sendpage routines
806 * directrly. They will be released after the sending has completed. 806 * directly. They will be released after the sending has completed.
807 */ 807 */
808static int 808static int
809nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset , unsigned long size) 809nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
810 struct splice_desc *sd)
810{ 811{
811 unsigned long count = desc->count; 812 struct svc_rqst *rqstp = sd->u.data;
812 struct svc_rqst *rqstp = desc->arg.data;
813 struct page **pp = rqstp->rq_respages + rqstp->rq_resused; 813 struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
814 struct page *page = buf->page;
815 size_t size;
816 int ret;
817
818 ret = buf->ops->confirm(pipe, buf);
819 if (unlikely(ret))
820 return ret;
814 821
815 if (size > count) 822 size = sd->len;
816 size = count;
817 823
818 if (rqstp->rq_res.page_len == 0) { 824 if (rqstp->rq_res.page_len == 0) {
819 get_page(page); 825 get_page(page);
820 put_page(*pp); 826 put_page(*pp);
821 *pp = page; 827 *pp = page;
822 rqstp->rq_resused++; 828 rqstp->rq_resused++;
823 rqstp->rq_res.page_base = offset; 829 rqstp->rq_res.page_base = buf->offset;
824 rqstp->rq_res.page_len = size; 830 rqstp->rq_res.page_len = size;
825 } else if (page != pp[-1]) { 831 } else if (page != pp[-1]) {
826 get_page(page); 832 get_page(page);
@@ -832,11 +838,15 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset
832 } else 838 } else
833 rqstp->rq_res.page_len += size; 839 rqstp->rq_res.page_len += size;
834 840
835 desc->count = count - size;
836 desc->written += size;
837 return size; 841 return size;
838} 842}
839 843
844static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
845 struct splice_desc *sd)
846{
847 return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
848}
849
840static __be32 850static __be32
841nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 851nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
842 loff_t offset, struct kvec *vec, int vlen, unsigned long *count) 852 loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
@@ -861,10 +871,15 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
861 if (ra && ra->p_set) 871 if (ra && ra->p_set)
862 file->f_ra = ra->p_ra; 872 file->f_ra = ra->p_ra;
863 873
864 if (file->f_op->sendfile && rqstp->rq_sendfile_ok) { 874 if (file->f_op->splice_read && rqstp->rq_splice_ok) {
865 rqstp->rq_resused = 1; 875 struct splice_desc sd = {
866 host_err = file->f_op->sendfile(file, &offset, *count, 876 .len = 0,
867 nfsd_read_actor, rqstp); 877 .total_len = *count,
878 .pos = offset,
879 .u.data = rqstp,
880 };
881
882 host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
868 } else { 883 } else {
869 oldfs = get_fs(); 884 oldfs = get_fs();
870 set_fs(KERNEL_DS); 885 set_fs(KERNEL_DS);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 7ed56390b582..ffcc504a1667 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2276,7 +2276,7 @@ const struct file_operations ntfs_file_ops = {
2276 mounted filesystem. */ 2276 mounted filesystem. */
2277 .mmap = generic_file_mmap, /* Mmap file. */ 2277 .mmap = generic_file_mmap, /* Mmap file. */
2278 .open = ntfs_file_open, /* Open file. */ 2278 .open = ntfs_file_open, /* Open file. */
2279 .sendfile = generic_file_sendfile, /* Zero-copy data send with 2279 .splice_read = generic_file_splice_read /* Zero-copy data send with
2280 the data source being on 2280 the data source being on
2281 the ntfs partition. We do 2281 the ntfs partition. We do
2282 not need to care about the 2282 not need to care about the
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ac6c96431bbc..4979b6675717 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -31,7 +31,7 @@
31#include <linux/pagemap.h> 31#include <linux/pagemap.h>
32#include <linux/uio.h> 32#include <linux/uio.h>
33#include <linux/sched.h> 33#include <linux/sched.h>
34#include <linux/pipe_fs_i.h> 34#include <linux/splice.h>
35#include <linux/mount.h> 35#include <linux/mount.h>
36#include <linux/writeback.h> 36#include <linux/writeback.h>
37 37
@@ -1583,7 +1583,7 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
1583 ssize_t copied = 0; 1583 ssize_t copied = 0;
1584 struct ocfs2_splice_write_priv sp; 1584 struct ocfs2_splice_write_priv sp;
1585 1585
1586 ret = buf->ops->pin(pipe, buf); 1586 ret = buf->ops->confirm(pipe, buf);
1587 if (ret) 1587 if (ret)
1588 goto out; 1588 goto out;
1589 1589
@@ -1604,7 +1604,7 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
1604 * might enter ocfs2_buffered_write_cluster() more 1604 * might enter ocfs2_buffered_write_cluster() more
1605 * than once, so keep track of our progress here. 1605 * than once, so keep track of our progress here.
1606 */ 1606 */
1607 copied = ocfs2_buffered_write_cluster(sd->file, 1607 copied = ocfs2_buffered_write_cluster(sd->u.file,
1608 (loff_t)sd->pos + total, 1608 (loff_t)sd->pos + total,
1609 count, 1609 count,
1610 ocfs2_map_and_write_splice_data, 1610 ocfs2_map_and_write_splice_data,
@@ -1636,9 +1636,14 @@ static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,
1636 int ret, err; 1636 int ret, err;
1637 struct address_space *mapping = out->f_mapping; 1637 struct address_space *mapping = out->f_mapping;
1638 struct inode *inode = mapping->host; 1638 struct inode *inode = mapping->host;
1639 1639 struct splice_desc sd = {
1640 ret = __splice_from_pipe(pipe, out, ppos, len, flags, 1640 .total_len = len,
1641 ocfs2_splice_write_actor); 1641 .flags = flags,
1642 .pos = *ppos,
1643 .u.file = out,
1644 };
1645
1646 ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor);
1642 if (ret > 0) { 1647 if (ret > 0) {
1643 *ppos += ret; 1648 *ppos += ret;
1644 1649
@@ -1817,7 +1822,6 @@ const struct inode_operations ocfs2_special_file_iops = {
1817const struct file_operations ocfs2_fops = { 1822const struct file_operations ocfs2_fops = {
1818 .read = do_sync_read, 1823 .read = do_sync_read,
1819 .write = do_sync_write, 1824 .write = do_sync_write,
1820 .sendfile = generic_file_sendfile,
1821 .mmap = ocfs2_mmap, 1825 .mmap = ocfs2_mmap,
1822 .fsync = ocfs2_sync_file, 1826 .fsync = ocfs2_sync_file,
1823 .release = ocfs2_file_release, 1827 .release = ocfs2_file_release,
diff --git a/fs/pipe.c b/fs/pipe.c
index 3a89592bdf57..d007830d9c87 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -164,6 +164,20 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
164 page_cache_release(page); 164 page_cache_release(page);
165} 165}
166 166
167/**
168 * generic_pipe_buf_map - virtually map a pipe buffer
169 * @pipe: the pipe that the buffer belongs to
170 * @buf: the buffer that should be mapped
171 * @atomic: whether to use an atomic map
172 *
173 * Description:
174 * This function returns a kernel virtual address mapping for the
175 * passed in @pipe_buffer. If @atomic is set, an atomic map is provided
176 * and the caller has to be careful not to fault before calling
177 * the unmap function.
178 *
179 * Note that this function occupies KM_USER0 if @atomic != 0.
180 */
167void *generic_pipe_buf_map(struct pipe_inode_info *pipe, 181void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
168 struct pipe_buffer *buf, int atomic) 182 struct pipe_buffer *buf, int atomic)
169{ 183{
@@ -175,6 +189,15 @@ void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
175 return kmap(buf->page); 189 return kmap(buf->page);
176} 190}
177 191
192/**
193 * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer
194 * @pipe: the pipe that the buffer belongs to
195 * @buf: the buffer that should be unmapped
196 * @map_data: the data that the mapping function returned
197 *
198 * Description:
199 * This function undoes the mapping that ->map() provided.
200 */
178void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, 201void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
179 struct pipe_buffer *buf, void *map_data) 202 struct pipe_buffer *buf, void *map_data)
180{ 203{
@@ -185,11 +208,28 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
185 kunmap(buf->page); 208 kunmap(buf->page);
186} 209}
187 210
211/**
212 * generic_pipe_buf_steal - attempt to take ownership of a @pipe_buffer
213 * @pipe: the pipe that the buffer belongs to
214 * @buf: the buffer to attempt to steal
215 *
216 * Description:
217 * This function attempts to steal the @struct page attached to
218 * @buf. If successful, this function returns 0 and returns with
219 * the page locked. The caller may then reuse the page for whatever
220 * he wishes, the typical use is insertion into a different file
221 * page cache.
222 */
188int generic_pipe_buf_steal(struct pipe_inode_info *pipe, 223int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
189 struct pipe_buffer *buf) 224 struct pipe_buffer *buf)
190{ 225{
191 struct page *page = buf->page; 226 struct page *page = buf->page;
192 227
228 /*
229 * A reference of one is golden, that means that the owner of this
230 * page is the only one holding a reference to it. lock the page
231 * and return OK.
232 */
193 if (page_count(page) == 1) { 233 if (page_count(page) == 1) {
194 lock_page(page); 234 lock_page(page);
195 return 0; 235 return 0;
@@ -198,12 +238,32 @@ int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
198 return 1; 238 return 1;
199} 239}
200 240
201void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf) 241/**
242 * generic_pipe_buf_get - get a reference to a @struct pipe_buffer
243 * @pipe: the pipe that the buffer belongs to
244 * @buf: the buffer to get a reference to
245 *
246 * Description:
247 * This function grabs an extra reference to @buf. It's used in
248 * in the tee() system call, when we duplicate the buffers in one
249 * pipe into another.
250 */
251void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
202{ 252{
203 page_cache_get(buf->page); 253 page_cache_get(buf->page);
204} 254}
205 255
206int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf) 256/**
257 * generic_pipe_buf_confirm - verify contents of the pipe buffer
258 * @pipe: the pipe that the buffer belongs to
259 * @buf: the buffer to confirm
260 *
261 * Description:
262 * This function does nothing, because the generic pipe code uses
263 * pages that are always good when inserted into the pipe.
264 */
265int generic_pipe_buf_confirm(struct pipe_inode_info *info,
266 struct pipe_buffer *buf)
207{ 267{
208 return 0; 268 return 0;
209} 269}
@@ -212,7 +272,7 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
212 .can_merge = 1, 272 .can_merge = 1,
213 .map = generic_pipe_buf_map, 273 .map = generic_pipe_buf_map,
214 .unmap = generic_pipe_buf_unmap, 274 .unmap = generic_pipe_buf_unmap,
215 .pin = generic_pipe_buf_pin, 275 .confirm = generic_pipe_buf_confirm,
216 .release = anon_pipe_buf_release, 276 .release = anon_pipe_buf_release,
217 .steal = generic_pipe_buf_steal, 277 .steal = generic_pipe_buf_steal,
218 .get = generic_pipe_buf_get, 278 .get = generic_pipe_buf_get,
@@ -252,7 +312,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
252 if (chars > total_len) 312 if (chars > total_len)
253 chars = total_len; 313 chars = total_len;
254 314
255 error = ops->pin(pipe, buf); 315 error = ops->confirm(pipe, buf);
256 if (error) { 316 if (error) {
257 if (!ret) 317 if (!ret)
258 error = ret; 318 error = ret;
@@ -373,7 +433,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
373 int error, atomic = 1; 433 int error, atomic = 1;
374 void *addr; 434 void *addr;
375 435
376 error = ops->pin(pipe, buf); 436 error = ops->confirm(pipe, buf);
377 if (error) 437 if (error)
378 goto out; 438 goto out;
379 439
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index 44649981bbc8..867f42b02035 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -25,7 +25,7 @@ const struct file_operations qnx4_file_operations =
25 .read = do_sync_read, 25 .read = do_sync_read,
26 .aio_read = generic_file_aio_read, 26 .aio_read = generic_file_aio_read,
27 .mmap = generic_file_mmap, 27 .mmap = generic_file_mmap,
28 .sendfile = generic_file_sendfile, 28 .splice_read = generic_file_splice_read,
29#ifdef CONFIG_QNX4FS_RW 29#ifdef CONFIG_QNX4FS_RW
30 .write = do_sync_write, 30 .write = do_sync_write,
31 .aio_write = generic_file_aio_write, 31 .aio_write = generic_file_aio_write,
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 2f14774a124f..97bdc0b2f9d2 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -41,7 +41,7 @@ const struct file_operations ramfs_file_operations = {
41 .aio_write = generic_file_aio_write, 41 .aio_write = generic_file_aio_write,
42 .mmap = generic_file_mmap, 42 .mmap = generic_file_mmap,
43 .fsync = simple_sync_file, 43 .fsync = simple_sync_file,
44 .sendfile = generic_file_sendfile, 44 .splice_read = generic_file_splice_read,
45 .llseek = generic_file_llseek, 45 .llseek = generic_file_llseek,
46}; 46};
47 47
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 5d258c40a2fd..cad2b7ace630 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -42,7 +42,7 @@ const struct file_operations ramfs_file_operations = {
42 .write = do_sync_write, 42 .write = do_sync_write,
43 .aio_write = generic_file_aio_write, 43 .aio_write = generic_file_aio_write,
44 .fsync = simple_sync_file, 44 .fsync = simple_sync_file,
45 .sendfile = generic_file_sendfile, 45 .splice_read = generic_file_splice_read,
46 .llseek = generic_file_llseek, 46 .llseek = generic_file_llseek,
47}; 47};
48 48
diff --git a/fs/read_write.c b/fs/read_write.c
index 4d03008f015b..507ddff48a9a 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -15,6 +15,7 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/syscalls.h> 16#include <linux/syscalls.h>
17#include <linux/pagemap.h> 17#include <linux/pagemap.h>
18#include <linux/splice.h>
18#include "read_write.h" 19#include "read_write.h"
19 20
20#include <asm/uaccess.h> 21#include <asm/uaccess.h>
@@ -25,7 +26,7 @@ const struct file_operations generic_ro_fops = {
25 .read = do_sync_read, 26 .read = do_sync_read,
26 .aio_read = generic_file_aio_read, 27 .aio_read = generic_file_aio_read,
27 .mmap = generic_file_readonly_mmap, 28 .mmap = generic_file_readonly_mmap,
28 .sendfile = generic_file_sendfile, 29 .splice_read = generic_file_splice_read,
29}; 30};
30 31
31EXPORT_SYMBOL(generic_ro_fops); 32EXPORT_SYMBOL(generic_ro_fops);
@@ -708,7 +709,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
708 struct inode * in_inode, * out_inode; 709 struct inode * in_inode, * out_inode;
709 loff_t pos; 710 loff_t pos;
710 ssize_t retval; 711 ssize_t retval;
711 int fput_needed_in, fput_needed_out; 712 int fput_needed_in, fput_needed_out, fl;
712 713
713 /* 714 /*
714 * Get input file, and verify that it is ok.. 715 * Get input file, and verify that it is ok..
@@ -723,7 +724,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
723 in_inode = in_file->f_path.dentry->d_inode; 724 in_inode = in_file->f_path.dentry->d_inode;
724 if (!in_inode) 725 if (!in_inode)
725 goto fput_in; 726 goto fput_in;
726 if (!in_file->f_op || !in_file->f_op->sendfile) 727 if (!in_file->f_op || !in_file->f_op->splice_read)
727 goto fput_in; 728 goto fput_in;
728 retval = -ESPIPE; 729 retval = -ESPIPE;
729 if (!ppos) 730 if (!ppos)
@@ -776,7 +777,18 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
776 count = max - pos; 777 count = max - pos;
777 } 778 }
778 779
779 retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); 780 fl = 0;
781#if 0
782 /*
783 * We need to debate whether we can enable this or not. The
784 * man page documents EAGAIN return for the output at least,
785 * and the application is arguably buggy if it doesn't expect
786 * EAGAIN on a non-blocking file descriptor.
787 */
788 if (in_file->f_flags & O_NONBLOCK)
789 fl = SPLICE_F_NONBLOCK;
790#endif
791 retval = do_splice_direct(in_file, ppos, out_file, count, fl);
780 792
781 if (retval > 0) { 793 if (retval > 0) {
782 add_rchar(current, retval); 794 add_rchar(current, retval);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 9e451a68580f..30eebfb1b2d8 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1531,7 +1531,6 @@ const struct file_operations reiserfs_file_operations = {
1531 .open = generic_file_open, 1531 .open = generic_file_open,
1532 .release = reiserfs_file_release, 1532 .release = reiserfs_file_release,
1533 .fsync = reiserfs_sync_file, 1533 .fsync = reiserfs_sync_file,
1534 .sendfile = generic_file_sendfile,
1535 .aio_read = generic_file_aio_read, 1534 .aio_read = generic_file_aio_read,
1536 .aio_write = generic_file_aio_write, 1535 .aio_write = generic_file_aio_write,
1537 .splice_read = generic_file_splice_read, 1536 .splice_read = generic_file_splice_read,
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index aea3f8aa54c0..c5d78a7e492b 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -262,8 +262,9 @@ out:
262} 262}
263 263
264static ssize_t 264static ssize_t
265smb_file_sendfile(struct file *file, loff_t *ppos, 265smb_file_splice_read(struct file *file, loff_t *ppos,
266 size_t count, read_actor_t actor, void *target) 266 struct pipe_inode_info *pipe, size_t count,
267 unsigned int flags)
267{ 268{
268 struct dentry *dentry = file->f_path.dentry; 269 struct dentry *dentry = file->f_path.dentry;
269 ssize_t status; 270 ssize_t status;
@@ -277,7 +278,7 @@ smb_file_sendfile(struct file *file, loff_t *ppos,
277 DENTRY_PATH(dentry), status); 278 DENTRY_PATH(dentry), status);
278 goto out; 279 goto out;
279 } 280 }
280 status = generic_file_sendfile(file, ppos, count, actor, target); 281 status = generic_file_splice_read(file, ppos, pipe, count, flags);
281out: 282out:
282 return status; 283 return status;
283} 284}
@@ -416,7 +417,7 @@ const struct file_operations smb_file_operations =
416 .open = smb_file_open, 417 .open = smb_file_open,
417 .release = smb_file_release, 418 .release = smb_file_release,
418 .fsync = smb_fsync, 419 .fsync = smb_fsync,
419 .sendfile = smb_file_sendfile, 420 .splice_read = smb_file_splice_read,
420}; 421};
421 422
422const struct inode_operations smb_file_inode_operations = 423const struct inode_operations smb_file_inode_operations =
diff --git a/fs/splice.c b/fs/splice.c
index e7d7080de2f9..ed2ce995475c 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -20,7 +20,7 @@
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/file.h> 21#include <linux/file.h>
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/pipe_fs_i.h> 23#include <linux/splice.h>
24#include <linux/mm_inline.h> 24#include <linux/mm_inline.h>
25#include <linux/swap.h> 25#include <linux/swap.h>
26#include <linux/writeback.h> 26#include <linux/writeback.h>
@@ -29,22 +29,6 @@
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/uio.h> 30#include <linux/uio.h>
31 31
32struct partial_page {
33 unsigned int offset;
34 unsigned int len;
35};
36
37/*
38 * Passed to splice_to_pipe
39 */
40struct splice_pipe_desc {
41 struct page **pages; /* page map */
42 struct partial_page *partial; /* pages[] may not be contig */
43 int nr_pages; /* number of pages in map */
44 unsigned int flags; /* splice flags */
45 const struct pipe_buf_operations *ops;/* ops associated with output pipe */
46};
47
48/* 32/*
49 * Attempt to steal a page from a pipe buffer. This should perhaps go into 33 * Attempt to steal a page from a pipe buffer. This should perhaps go into
50 * a vm helper function, it's already simplified quite a bit by the 34 * a vm helper function, it's already simplified quite a bit by the
@@ -101,8 +85,12 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
101 buf->flags &= ~PIPE_BUF_FLAG_LRU; 85 buf->flags &= ~PIPE_BUF_FLAG_LRU;
102} 86}
103 87
104static int page_cache_pipe_buf_pin(struct pipe_inode_info *pipe, 88/*
105 struct pipe_buffer *buf) 89 * Check whether the contents of buf is OK to access. Since the content
90 * is a page cache page, IO may be in flight.
91 */
92static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
93 struct pipe_buffer *buf)
106{ 94{
107 struct page *page = buf->page; 95 struct page *page = buf->page;
108 int err; 96 int err;
@@ -143,7 +131,7 @@ static const struct pipe_buf_operations page_cache_pipe_buf_ops = {
143 .can_merge = 0, 131 .can_merge = 0,
144 .map = generic_pipe_buf_map, 132 .map = generic_pipe_buf_map,
145 .unmap = generic_pipe_buf_unmap, 133 .unmap = generic_pipe_buf_unmap,
146 .pin = page_cache_pipe_buf_pin, 134 .confirm = page_cache_pipe_buf_confirm,
147 .release = page_cache_pipe_buf_release, 135 .release = page_cache_pipe_buf_release,
148 .steal = page_cache_pipe_buf_steal, 136 .steal = page_cache_pipe_buf_steal,
149 .get = generic_pipe_buf_get, 137 .get = generic_pipe_buf_get,
@@ -163,18 +151,25 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = {
163 .can_merge = 0, 151 .can_merge = 0,
164 .map = generic_pipe_buf_map, 152 .map = generic_pipe_buf_map,
165 .unmap = generic_pipe_buf_unmap, 153 .unmap = generic_pipe_buf_unmap,
166 .pin = generic_pipe_buf_pin, 154 .confirm = generic_pipe_buf_confirm,
167 .release = page_cache_pipe_buf_release, 155 .release = page_cache_pipe_buf_release,
168 .steal = user_page_pipe_buf_steal, 156 .steal = user_page_pipe_buf_steal,
169 .get = generic_pipe_buf_get, 157 .get = generic_pipe_buf_get,
170}; 158};
171 159
172/* 160/**
173 * Pipe output worker. This sets up our pipe format with the page cache 161 * splice_to_pipe - fill passed data into a pipe
174 * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 162 * @pipe: pipe to fill
163 * @spd: data to fill
164 *
165 * Description:
166 * @spd contains a map of pages and len/offset tupples, a long with
167 * the struct pipe_buf_operations associated with these pages. This
168 * function will link that data to the pipe.
169 *
175 */ 170 */
176static ssize_t splice_to_pipe(struct pipe_inode_info *pipe, 171ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
177 struct splice_pipe_desc *spd) 172 struct splice_pipe_desc *spd)
178{ 173{
179 unsigned int spd_pages = spd->nr_pages; 174 unsigned int spd_pages = spd->nr_pages;
180 int ret, do_wakeup, page_nr; 175 int ret, do_wakeup, page_nr;
@@ -201,6 +196,7 @@ static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
201 buf->page = spd->pages[page_nr]; 196 buf->page = spd->pages[page_nr];
202 buf->offset = spd->partial[page_nr].offset; 197 buf->offset = spd->partial[page_nr].offset;
203 buf->len = spd->partial[page_nr].len; 198 buf->len = spd->partial[page_nr].len;
199 buf->private = spd->partial[page_nr].private;
204 buf->ops = spd->ops; 200 buf->ops = spd->ops;
205 if (spd->flags & SPLICE_F_GIFT) 201 if (spd->flags & SPLICE_F_GIFT)
206 buf->flags |= PIPE_BUF_FLAG_GIFT; 202 buf->flags |= PIPE_BUF_FLAG_GIFT;
@@ -296,19 +292,15 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
296 page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages); 292 page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages);
297 293
298 /* 294 /*
299 * Now fill in the holes:
300 */
301 error = 0;
302
303 /*
304 * Lookup the (hopefully) full range of pages we need. 295 * Lookup the (hopefully) full range of pages we need.
305 */ 296 */
306 spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages); 297 spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages);
307 298
308 /* 299 /*
309 * If find_get_pages_contig() returned fewer pages than we needed, 300 * If find_get_pages_contig() returned fewer pages than we needed,
310 * allocate the rest. 301 * allocate the rest and fill in the holes.
311 */ 302 */
303 error = 0;
312 index += spd.nr_pages; 304 index += spd.nr_pages;
313 while (spd.nr_pages < nr_pages) { 305 while (spd.nr_pages < nr_pages) {
314 /* 306 /*
@@ -470,11 +462,16 @@ fill_it:
470/** 462/**
471 * generic_file_splice_read - splice data from file to a pipe 463 * generic_file_splice_read - splice data from file to a pipe
472 * @in: file to splice from 464 * @in: file to splice from
465 * @ppos: position in @in
473 * @pipe: pipe to splice to 466 * @pipe: pipe to splice to
474 * @len: number of bytes to splice 467 * @len: number of bytes to splice
475 * @flags: splice modifier flags 468 * @flags: splice modifier flags
476 * 469 *
477 * Will read pages from given file and fill them into a pipe. 470 * Description:
471 * Will read pages from given file and fill them into a pipe. Can be
472 * used as long as the address_space operations for the source implements
473 * a readpage() hook.
474 *
478 */ 475 */
479ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, 476ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
480 struct pipe_inode_info *pipe, size_t len, 477 struct pipe_inode_info *pipe, size_t len,
@@ -528,11 +525,11 @@ EXPORT_SYMBOL(generic_file_splice_read);
528static int pipe_to_sendpage(struct pipe_inode_info *pipe, 525static int pipe_to_sendpage(struct pipe_inode_info *pipe,
529 struct pipe_buffer *buf, struct splice_desc *sd) 526 struct pipe_buffer *buf, struct splice_desc *sd)
530{ 527{
531 struct file *file = sd->file; 528 struct file *file = sd->u.file;
532 loff_t pos = sd->pos; 529 loff_t pos = sd->pos;
533 int ret, more; 530 int ret, more;
534 531
535 ret = buf->ops->pin(pipe, buf); 532 ret = buf->ops->confirm(pipe, buf);
536 if (!ret) { 533 if (!ret) {
537 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 534 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
538 535
@@ -566,7 +563,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
566static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 563static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
567 struct splice_desc *sd) 564 struct splice_desc *sd)
568{ 565{
569 struct file *file = sd->file; 566 struct file *file = sd->u.file;
570 struct address_space *mapping = file->f_mapping; 567 struct address_space *mapping = file->f_mapping;
571 unsigned int offset, this_len; 568 unsigned int offset, this_len;
572 struct page *page; 569 struct page *page;
@@ -576,7 +573,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
576 /* 573 /*
577 * make sure the data in this buffer is uptodate 574 * make sure the data in this buffer is uptodate
578 */ 575 */
579 ret = buf->ops->pin(pipe, buf); 576 ret = buf->ops->confirm(pipe, buf);
580 if (unlikely(ret)) 577 if (unlikely(ret))
581 return ret; 578 return ret;
582 579
@@ -663,36 +660,37 @@ out_ret:
663 return ret; 660 return ret;
664} 661}
665 662
666/* 663/**
667 * Pipe input worker. Most of this logic works like a regular pipe, the 664 * __splice_from_pipe - splice data from a pipe to given actor
668 * key here is the 'actor' worker passed in that actually moves the data 665 * @pipe: pipe to splice from
669 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 666 * @sd: information to @actor
667 * @actor: handler that splices the data
668 *
669 * Description:
670 * This function does little more than loop over the pipe and call
671 * @actor to do the actual moving of a single struct pipe_buffer to
672 * the desired destination. See pipe_to_file, pipe_to_sendpage, or
673 * pipe_to_user.
674 *
670 */ 675 */
671ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, 676ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
672 struct file *out, loff_t *ppos, size_t len, 677 splice_actor *actor)
673 unsigned int flags, splice_actor *actor)
674{ 678{
675 int ret, do_wakeup, err; 679 int ret, do_wakeup, err;
676 struct splice_desc sd;
677 680
678 ret = 0; 681 ret = 0;
679 do_wakeup = 0; 682 do_wakeup = 0;
680 683
681 sd.total_len = len;
682 sd.flags = flags;
683 sd.file = out;
684 sd.pos = *ppos;
685
686 for (;;) { 684 for (;;) {
687 if (pipe->nrbufs) { 685 if (pipe->nrbufs) {
688 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; 686 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
689 const struct pipe_buf_operations *ops = buf->ops; 687 const struct pipe_buf_operations *ops = buf->ops;
690 688
691 sd.len = buf->len; 689 sd->len = buf->len;
692 if (sd.len > sd.total_len) 690 if (sd->len > sd->total_len)
693 sd.len = sd.total_len; 691 sd->len = sd->total_len;
694 692
695 err = actor(pipe, buf, &sd); 693 err = actor(pipe, buf, sd);
696 if (err <= 0) { 694 if (err <= 0) {
697 if (!ret && err != -ENODATA) 695 if (!ret && err != -ENODATA)
698 ret = err; 696 ret = err;
@@ -704,10 +702,10 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
704 buf->offset += err; 702 buf->offset += err;
705 buf->len -= err; 703 buf->len -= err;
706 704
707 sd.len -= err; 705 sd->len -= err;
708 sd.pos += err; 706 sd->pos += err;
709 sd.total_len -= err; 707 sd->total_len -= err;
710 if (sd.len) 708 if (sd->len)
711 continue; 709 continue;
712 710
713 if (!buf->len) { 711 if (!buf->len) {
@@ -719,7 +717,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
719 do_wakeup = 1; 717 do_wakeup = 1;
720 } 718 }
721 719
722 if (!sd.total_len) 720 if (!sd->total_len)
723 break; 721 break;
724 } 722 }
725 723
@@ -732,7 +730,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
732 break; 730 break;
733 } 731 }
734 732
735 if (flags & SPLICE_F_NONBLOCK) { 733 if (sd->flags & SPLICE_F_NONBLOCK) {
736 if (!ret) 734 if (!ret)
737 ret = -EAGAIN; 735 ret = -EAGAIN;
738 break; 736 break;
@@ -766,12 +764,32 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
766} 764}
767EXPORT_SYMBOL(__splice_from_pipe); 765EXPORT_SYMBOL(__splice_from_pipe);
768 766
767/**
768 * splice_from_pipe - splice data from a pipe to a file
769 * @pipe: pipe to splice from
770 * @out: file to splice to
771 * @ppos: position in @out
772 * @len: how many bytes to splice
773 * @flags: splice modifier flags
774 * @actor: handler that splices the data
775 *
776 * Description:
777 * See __splice_from_pipe. This function locks the input and output inodes,
778 * otherwise it's identical to __splice_from_pipe().
779 *
780 */
769ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, 781ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
770 loff_t *ppos, size_t len, unsigned int flags, 782 loff_t *ppos, size_t len, unsigned int flags,
771 splice_actor *actor) 783 splice_actor *actor)
772{ 784{
773 ssize_t ret; 785 ssize_t ret;
774 struct inode *inode = out->f_mapping->host; 786 struct inode *inode = out->f_mapping->host;
787 struct splice_desc sd = {
788 .total_len = len,
789 .flags = flags,
790 .pos = *ppos,
791 .u.file = out,
792 };
775 793
776 /* 794 /*
777 * The actor worker might be calling ->prepare_write and 795 * The actor worker might be calling ->prepare_write and
@@ -780,7 +798,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
780 * pipe->inode, we have to order lock acquiry here. 798 * pipe->inode, we have to order lock acquiry here.
781 */ 799 */
782 inode_double_lock(inode, pipe->inode); 800 inode_double_lock(inode, pipe->inode);
783 ret = __splice_from_pipe(pipe, out, ppos, len, flags, actor); 801 ret = __splice_from_pipe(pipe, &sd, actor);
784 inode_double_unlock(inode, pipe->inode); 802 inode_double_unlock(inode, pipe->inode);
785 803
786 return ret; 804 return ret;
@@ -790,12 +808,14 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
790 * generic_file_splice_write_nolock - generic_file_splice_write without mutexes 808 * generic_file_splice_write_nolock - generic_file_splice_write without mutexes
791 * @pipe: pipe info 809 * @pipe: pipe info
792 * @out: file to write to 810 * @out: file to write to
811 * @ppos: position in @out
793 * @len: number of bytes to splice 812 * @len: number of bytes to splice
794 * @flags: splice modifier flags 813 * @flags: splice modifier flags
795 * 814 *
796 * Will either move or copy pages (determined by @flags options) from 815 * Description:
797 * the given pipe inode to the given file. The caller is responsible 816 * Will either move or copy pages (determined by @flags options) from
798 * for acquiring i_mutex on both inodes. 817 * the given pipe inode to the given file. The caller is responsible
818 * for acquiring i_mutex on both inodes.
799 * 819 *
800 */ 820 */
801ssize_t 821ssize_t
@@ -804,6 +824,12 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
804{ 824{
805 struct address_space *mapping = out->f_mapping; 825 struct address_space *mapping = out->f_mapping;
806 struct inode *inode = mapping->host; 826 struct inode *inode = mapping->host;
827 struct splice_desc sd = {
828 .total_len = len,
829 .flags = flags,
830 .pos = *ppos,
831 .u.file = out,
832 };
807 ssize_t ret; 833 ssize_t ret;
808 int err; 834 int err;
809 835
@@ -811,7 +837,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
811 if (unlikely(err)) 837 if (unlikely(err))
812 return err; 838 return err;
813 839
814 ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); 840 ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
815 if (ret > 0) { 841 if (ret > 0) {
816 unsigned long nr_pages; 842 unsigned long nr_pages;
817 843
@@ -841,11 +867,13 @@ EXPORT_SYMBOL(generic_file_splice_write_nolock);
841 * generic_file_splice_write - splice data from a pipe to a file 867 * generic_file_splice_write - splice data from a pipe to a file
842 * @pipe: pipe info 868 * @pipe: pipe info
843 * @out: file to write to 869 * @out: file to write to
870 * @ppos: position in @out
844 * @len: number of bytes to splice 871 * @len: number of bytes to splice
845 * @flags: splice modifier flags 872 * @flags: splice modifier flags
846 * 873 *
847 * Will either move or copy pages (determined by @flags options) from 874 * Description:
848 * the given pipe inode to the given file. 875 * Will either move or copy pages (determined by @flags options) from
876 * the given pipe inode to the given file.
849 * 877 *
850 */ 878 */
851ssize_t 879ssize_t
@@ -896,13 +924,15 @@ EXPORT_SYMBOL(generic_file_splice_write);
896 924
897/** 925/**
898 * generic_splice_sendpage - splice data from a pipe to a socket 926 * generic_splice_sendpage - splice data from a pipe to a socket
899 * @inode: pipe inode 927 * @pipe: pipe to splice from
900 * @out: socket to write to 928 * @out: socket to write to
929 * @ppos: position in @out
901 * @len: number of bytes to splice 930 * @len: number of bytes to splice
902 * @flags: splice modifier flags 931 * @flags: splice modifier flags
903 * 932 *
904 * Will send @len bytes from the pipe to a network socket. No data copying 933 * Description:
905 * is involved. 934 * Will send @len bytes from the pipe to a network socket. No data copying
935 * is involved.
906 * 936 *
907 */ 937 */
908ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, 938ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
@@ -956,14 +986,27 @@ static long do_splice_to(struct file *in, loff_t *ppos,
956 return in->f_op->splice_read(in, ppos, pipe, len, flags); 986 return in->f_op->splice_read(in, ppos, pipe, len, flags);
957} 987}
958 988
959long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, 989/**
960 size_t len, unsigned int flags) 990 * splice_direct_to_actor - splices data directly between two non-pipes
991 * @in: file to splice from
992 * @sd: actor information on where to splice to
993 * @actor: handles the data splicing
994 *
995 * Description:
996 * This is a special case helper to splice directly between two
997 * points, without requiring an explicit pipe. Internally an allocated
998 * pipe is cached in the process, and reused during the life time of
999 * that process.
1000 *
1001 */
1002ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1003 splice_direct_actor *actor)
961{ 1004{
962 struct pipe_inode_info *pipe; 1005 struct pipe_inode_info *pipe;
963 long ret, bytes; 1006 long ret, bytes;
964 loff_t out_off;
965 umode_t i_mode; 1007 umode_t i_mode;
966 int i; 1008 size_t len;
1009 int i, flags;
967 1010
968 /* 1011 /*
969 * We require the input being a regular file, as we don't want to 1012 * We require the input being a regular file, as we don't want to
@@ -999,7 +1042,13 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
999 */ 1042 */
1000 ret = 0; 1043 ret = 0;
1001 bytes = 0; 1044 bytes = 0;
1002 out_off = 0; 1045 len = sd->total_len;
1046 flags = sd->flags;
1047
1048 /*
1049 * Don't block on output, we have to drain the direct pipe.
1050 */
1051 sd->flags &= ~SPLICE_F_NONBLOCK;
1003 1052
1004 while (len) { 1053 while (len) {
1005 size_t read_len, max_read_len; 1054 size_t read_len, max_read_len;
@@ -1009,19 +1058,19 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1009 */ 1058 */
1010 max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE)); 1059 max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
1011 1060
1012 ret = do_splice_to(in, ppos, pipe, max_read_len, flags); 1061 ret = do_splice_to(in, &sd->pos, pipe, max_read_len, flags);
1013 if (unlikely(ret < 0)) 1062 if (unlikely(ret < 0))
1014 goto out_release; 1063 goto out_release;
1015 1064
1016 read_len = ret; 1065 read_len = ret;
1066 sd->total_len = read_len;
1017 1067
1018 /* 1068 /*
1019 * NOTE: nonblocking mode only applies to the input. We 1069 * NOTE: nonblocking mode only applies to the input. We
1020 * must not do the output in nonblocking mode as then we 1070 * must not do the output in nonblocking mode as then we
1021 * could get stuck data in the internal pipe: 1071 * could get stuck data in the internal pipe:
1022 */ 1072 */
1023 ret = do_splice_from(pipe, out, &out_off, read_len, 1073 ret = actor(pipe, sd);
1024 flags & ~SPLICE_F_NONBLOCK);
1025 if (unlikely(ret < 0)) 1074 if (unlikely(ret < 0))
1026 goto out_release; 1075 goto out_release;
1027 1076
@@ -1066,6 +1115,48 @@ out_release:
1066 return bytes; 1115 return bytes;
1067 1116
1068 return ret; 1117 return ret;
1118
1119}
1120EXPORT_SYMBOL(splice_direct_to_actor);
1121
1122static int direct_splice_actor(struct pipe_inode_info *pipe,
1123 struct splice_desc *sd)
1124{
1125 struct file *file = sd->u.file;
1126
1127 return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags);
1128}
1129
1130/**
1131 * do_splice_direct - splices data directly between two files
1132 * @in: file to splice from
1133 * @ppos: input file offset
1134 * @out: file to splice to
1135 * @len: number of bytes to splice
1136 * @flags: splice modifier flags
1137 *
1138 * Description:
1139 * For use by do_sendfile(). splice can easily emulate sendfile, but
1140 * doing it in the application would incur an extra system call
1141 * (splice in + splice out, as compared to just sendfile()). So this helper
1142 * can splice directly through a process-private pipe.
1143 *
1144 */
1145long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1146 size_t len, unsigned int flags)
1147{
1148 struct splice_desc sd = {
1149 .len = len,
1150 .total_len = len,
1151 .flags = flags,
1152 .pos = *ppos,
1153 .u.file = out,
1154 };
1155 size_t ret;
1156
1157 ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
1158 *ppos = sd.pos;
1159 return ret;
1069} 1160}
1070 1161
1071/* 1162/*
@@ -1248,28 +1339,131 @@ static int get_iovec_page_array(const struct iovec __user *iov,
1248 return error; 1339 return error;
1249} 1340}
1250 1341
1342static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
1343 struct splice_desc *sd)
1344{
1345 char *src;
1346 int ret;
1347
1348 ret = buf->ops->confirm(pipe, buf);
1349 if (unlikely(ret))
1350 return ret;
1351
1352 /*
1353 * See if we can use the atomic maps, by prefaulting in the
1354 * pages and doing an atomic copy
1355 */
1356 if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) {
1357 src = buf->ops->map(pipe, buf, 1);
1358 ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset,
1359 sd->len);
1360 buf->ops->unmap(pipe, buf, src);
1361 if (!ret) {
1362 ret = sd->len;
1363 goto out;
1364 }
1365 }
1366
1367 /*
1368 * No dice, use slow non-atomic map and copy
1369 */
1370 src = buf->ops->map(pipe, buf, 0);
1371
1372 ret = sd->len;
1373 if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
1374 ret = -EFAULT;
1375
1376out:
1377 if (ret > 0)
1378 sd->u.userptr += ret;
1379 buf->ops->unmap(pipe, buf, src);
1380 return ret;
1381}
1382
1383/*
1384 * For lack of a better implementation, implement vmsplice() to userspace
1385 * as a simple copy of the pipes pages to the user iov.
1386 */
1387static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
1388 unsigned long nr_segs, unsigned int flags)
1389{
1390 struct pipe_inode_info *pipe;
1391 struct splice_desc sd;
1392 ssize_t size;
1393 int error;
1394 long ret;
1395
1396 pipe = pipe_info(file->f_path.dentry->d_inode);
1397 if (!pipe)
1398 return -EBADF;
1399
1400 if (pipe->inode)
1401 mutex_lock(&pipe->inode->i_mutex);
1402
1403 error = ret = 0;
1404 while (nr_segs) {
1405 void __user *base;
1406 size_t len;
1407
1408 /*
1409 * Get user address base and length for this iovec.
1410 */
1411 error = get_user(base, &iov->iov_base);
1412 if (unlikely(error))
1413 break;
1414 error = get_user(len, &iov->iov_len);
1415 if (unlikely(error))
1416 break;
1417
1418 /*
1419 * Sanity check this iovec. 0 read succeeds.
1420 */
1421 if (unlikely(!len))
1422 break;
1423 if (unlikely(!base)) {
1424 error = -EFAULT;
1425 break;
1426 }
1427
1428 sd.len = 0;
1429 sd.total_len = len;
1430 sd.flags = flags;
1431 sd.u.userptr = base;
1432 sd.pos = 0;
1433
1434 size = __splice_from_pipe(pipe, &sd, pipe_to_user);
1435 if (size < 0) {
1436 if (!ret)
1437 ret = size;
1438
1439 break;
1440 }
1441
1442 ret += size;
1443
1444 if (size < len)
1445 break;
1446
1447 nr_segs--;
1448 iov++;
1449 }
1450
1451 if (pipe->inode)
1452 mutex_unlock(&pipe->inode->i_mutex);
1453
1454 if (!ret)
1455 ret = error;
1456
1457 return ret;
1458}
1459
1251/* 1460/*
1252 * vmsplice splices a user address range into a pipe. It can be thought of 1461 * vmsplice splices a user address range into a pipe. It can be thought of
1253 * as splice-from-memory, where the regular splice is splice-from-file (or 1462 * as splice-from-memory, where the regular splice is splice-from-file (or
1254 * to file). In both cases the output is a pipe, naturally. 1463 * to file). In both cases the output is a pipe, naturally.
1255 *
1256 * Note that vmsplice only supports splicing _from_ user memory to a pipe,
1257 * not the other way around. Splicing from user memory is a simple operation
1258 * that can be supported without any funky alignment restrictions or nasty
1259 * vm tricks. We simply map in the user memory and fill them into a pipe.
1260 * The reverse isn't quite as easy, though. There are two possible solutions
1261 * for that:
1262 *
1263 * - memcpy() the data internally, at which point we might as well just
1264 * do a regular read() on the buffer anyway.
1265 * - Lots of nasty vm tricks, that are neither fast nor flexible (it
1266 * has restriction limitations on both ends of the pipe).
1267 *
1268 * Alas, it isn't here.
1269 *
1270 */ 1464 */
1271static long do_vmsplice(struct file *file, const struct iovec __user *iov, 1465static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
1272 unsigned long nr_segs, unsigned int flags) 1466 unsigned long nr_segs, unsigned int flags)
1273{ 1467{
1274 struct pipe_inode_info *pipe; 1468 struct pipe_inode_info *pipe;
1275 struct page *pages[PIPE_BUFFERS]; 1469 struct page *pages[PIPE_BUFFERS];
@@ -1284,10 +1478,6 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
1284 pipe = pipe_info(file->f_path.dentry->d_inode); 1478 pipe = pipe_info(file->f_path.dentry->d_inode);
1285 if (!pipe) 1479 if (!pipe)
1286 return -EBADF; 1480 return -EBADF;
1287 if (unlikely(nr_segs > UIO_MAXIOV))
1288 return -EINVAL;
1289 else if (unlikely(!nr_segs))
1290 return 0;
1291 1481
1292 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, 1482 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
1293 flags & SPLICE_F_GIFT); 1483 flags & SPLICE_F_GIFT);
@@ -1297,6 +1487,22 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
1297 return splice_to_pipe(pipe, &spd); 1487 return splice_to_pipe(pipe, &spd);
1298} 1488}
1299 1489
1490/*
1491 * Note that vmsplice only really supports true splicing _from_ user memory
1492 * to a pipe, not the other way around. Splicing from user memory is a simple
1493 * operation that can be supported without any funky alignment restrictions
1494 * or nasty vm tricks. We simply map in the user memory and fill them into
1495 * a pipe. The reverse isn't quite as easy, though. There are two possible
1496 * solutions for that:
1497 *
1498 * - memcpy() the data internally, at which point we might as well just
1499 * do a regular read() on the buffer anyway.
1500 * - Lots of nasty vm tricks, that are neither fast nor flexible (it
1501 * has restriction limitations on both ends of the pipe).
1502 *
1503 * Currently we punt and implement it as a normal copy, see pipe_to_user().
1504 *
1505 */
1300asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, 1506asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
1301 unsigned long nr_segs, unsigned int flags) 1507 unsigned long nr_segs, unsigned int flags)
1302{ 1508{
@@ -1304,11 +1510,18 @@ asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
1304 long error; 1510 long error;
1305 int fput; 1511 int fput;
1306 1512
1513 if (unlikely(nr_segs > UIO_MAXIOV))
1514 return -EINVAL;
1515 else if (unlikely(!nr_segs))
1516 return 0;
1517
1307 error = -EBADF; 1518 error = -EBADF;
1308 file = fget_light(fd, &fput); 1519 file = fget_light(fd, &fput);
1309 if (file) { 1520 if (file) {
1310 if (file->f_mode & FMODE_WRITE) 1521 if (file->f_mode & FMODE_WRITE)
1311 error = do_vmsplice(file, iov, nr_segs, flags); 1522 error = vmsplice_to_pipe(file, iov, nr_segs, flags);
1523 else if (file->f_mode & FMODE_READ)
1524 error = vmsplice_to_user(file, iov, nr_segs, flags);
1312 1525
1313 fput_light(file, fput); 1526 fput_light(file, fput);
1314 } 1527 }
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 0732ddb9020b..589be21d884e 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -27,7 +27,7 @@ const struct file_operations sysv_file_operations = {
27 .aio_write = generic_file_aio_write, 27 .aio_write = generic_file_aio_write,
28 .mmap = generic_file_mmap, 28 .mmap = generic_file_mmap,
29 .fsync = sysv_sync_file, 29 .fsync = sysv_sync_file,
30 .sendfile = generic_file_sendfile, 30 .splice_read = generic_file_splice_read,
31}; 31};
32 32
33const struct inode_operations sysv_file_inode_operations = { 33const struct inode_operations sysv_file_inode_operations = {
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 51b5764685e7..df070bee8d4f 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -261,7 +261,7 @@ const struct file_operations udf_file_operations = {
261 .aio_write = udf_file_aio_write, 261 .aio_write = udf_file_aio_write,
262 .release = udf_release_file, 262 .release = udf_release_file,
263 .fsync = udf_fsync_file, 263 .fsync = udf_fsync_file,
264 .sendfile = generic_file_sendfile, 264 .splice_read = generic_file_splice_read,
265}; 265};
266 266
267const struct inode_operations udf_file_inode_operations = { 267const struct inode_operations udf_file_inode_operations = {
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 1e096323bad4..6705d74c6d2d 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -60,5 +60,5 @@ const struct file_operations ufs_file_operations = {
60 .mmap = generic_file_mmap, 60 .mmap = generic_file_mmap,
61 .open = generic_file_open, 61 .open = generic_file_open,
62 .fsync = ufs_sync_file, 62 .fsync = ufs_sync_file,
63 .sendfile = generic_file_sendfile, 63 .splice_read = generic_file_splice_read,
64}; 64};
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index cb51dc961355..8c43cd2e237a 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -124,30 +124,6 @@ xfs_file_aio_write_invis(
124} 124}
125 125
126STATIC ssize_t 126STATIC ssize_t
127xfs_file_sendfile(
128 struct file *filp,
129 loff_t *pos,
130 size_t count,
131 read_actor_t actor,
132 void *target)
133{
134 return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode),
135 filp, pos, 0, count, actor, target, NULL);
136}
137
138STATIC ssize_t
139xfs_file_sendfile_invis(
140 struct file *filp,
141 loff_t *pos,
142 size_t count,
143 read_actor_t actor,
144 void *target)
145{
146 return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode),
147 filp, pos, IO_INVIS, count, actor, target, NULL);
148}
149
150STATIC ssize_t
151xfs_file_splice_read( 127xfs_file_splice_read(
152 struct file *infilp, 128 struct file *infilp,
153 loff_t *ppos, 129 loff_t *ppos,
@@ -452,7 +428,6 @@ const struct file_operations xfs_file_operations = {
452 .write = do_sync_write, 428 .write = do_sync_write,
453 .aio_read = xfs_file_aio_read, 429 .aio_read = xfs_file_aio_read,
454 .aio_write = xfs_file_aio_write, 430 .aio_write = xfs_file_aio_write,
455 .sendfile = xfs_file_sendfile,
456 .splice_read = xfs_file_splice_read, 431 .splice_read = xfs_file_splice_read,
457 .splice_write = xfs_file_splice_write, 432 .splice_write = xfs_file_splice_write,
458 .unlocked_ioctl = xfs_file_ioctl, 433 .unlocked_ioctl = xfs_file_ioctl,
@@ -475,7 +450,6 @@ const struct file_operations xfs_invis_file_operations = {
475 .write = do_sync_write, 450 .write = do_sync_write,
476 .aio_read = xfs_file_aio_read_invis, 451 .aio_read = xfs_file_aio_read_invis,
477 .aio_write = xfs_file_aio_write_invis, 452 .aio_write = xfs_file_aio_write_invis,
478 .sendfile = xfs_file_sendfile_invis,
479 .splice_read = xfs_file_splice_read_invis, 453 .splice_read = xfs_file_splice_read_invis,
480 .splice_write = xfs_file_splice_write_invis, 454 .splice_write = xfs_file_splice_write_invis,
481 .unlocked_ioctl = xfs_file_ioctl_invis, 455 .unlocked_ioctl = xfs_file_ioctl_invis,
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 715adad7dd4d..af24a457d3a3 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -101,7 +101,6 @@
101 * Feature macros (disable/enable) 101 * Feature macros (disable/enable)
102 */ 102 */
103#undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */ 103#undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */
104#define HAVE_SENDFILE /* sendfile(2) exists in 2.6, but not in 2.4 */
105#define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */ 104#define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */
106#ifdef CONFIG_SMP 105#ifdef CONFIG_SMP
107#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ 106#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index ed90403f0ee7..765ec16a6e39 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -287,50 +287,6 @@ xfs_read(
287} 287}
288 288
289ssize_t 289ssize_t
290xfs_sendfile(
291 bhv_desc_t *bdp,
292 struct file *filp,
293 loff_t *offset,
294 int ioflags,
295 size_t count,
296 read_actor_t actor,
297 void *target,
298 cred_t *credp)
299{
300 xfs_inode_t *ip = XFS_BHVTOI(bdp);
301 xfs_mount_t *mp = ip->i_mount;
302 ssize_t ret;
303
304 XFS_STATS_INC(xs_read_calls);
305 if (XFS_FORCED_SHUTDOWN(mp))
306 return -EIO;
307
308 xfs_ilock(ip, XFS_IOLOCK_SHARED);
309
310 if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
311 (!(ioflags & IO_INVIS))) {
312 bhv_vrwlock_t locktype = VRWLOCK_READ;
313 int error;
314
315 error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
316 *offset, count,
317 FILP_DELAY_FLAG(filp), &locktype);
318 if (error) {
319 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
320 return -error;
321 }
322 }
323 xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore,
324 (void *)(unsigned long)target, count, *offset, ioflags);
325 ret = generic_file_sendfile(filp, offset, count, actor, target);
326 if (ret > 0)
327 XFS_STATS_ADD(xs_read_bytes, ret);
328
329 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
330 return ret;
331}
332
333ssize_t
334xfs_splice_read( 290xfs_splice_read(
335 bhv_desc_t *bdp, 291 bhv_desc_t *bdp,
336 struct file *infilp, 292 struct file *infilp,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index 7ac51b1d2161..7c60a1eed88b 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -90,9 +90,6 @@ extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
90extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *, 90extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *,
91 const struct iovec *, unsigned int, 91 const struct iovec *, unsigned int,
92 loff_t *, int, struct cred *); 92 loff_t *, int, struct cred *);
93extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
94 loff_t *, int, size_t, read_actor_t,
95 void *, struct cred *);
96extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, loff_t *, 93extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, loff_t *,
97 struct pipe_inode_info *, size_t, int, int, 94 struct pipe_inode_info *, size_t, int, int,
98 struct cred *); 95 struct cred *);
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index d1b2d01843d1..013048a92643 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -139,9 +139,6 @@ typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
139typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *, 139typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *,
140 const struct iovec *, unsigned int, 140 const struct iovec *, unsigned int,
141 loff_t *, int, struct cred *); 141 loff_t *, int, struct cred *);
142typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
143 loff_t *, int, size_t, read_actor_t,
144 void *, struct cred *);
145typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, loff_t *, 142typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, loff_t *,
146 struct pipe_inode_info *, size_t, int, int, 143 struct pipe_inode_info *, size_t, int, int,
147 struct cred *); 144 struct cred *);
@@ -206,7 +203,6 @@ typedef struct bhv_vnodeops {
206 vop_close_t vop_close; 203 vop_close_t vop_close;
207 vop_read_t vop_read; 204 vop_read_t vop_read;
208 vop_write_t vop_write; 205 vop_write_t vop_write;
209 vop_sendfile_t vop_sendfile;
210 vop_splice_read_t vop_splice_read; 206 vop_splice_read_t vop_splice_read;
211 vop_splice_write_t vop_splice_write; 207 vop_splice_write_t vop_splice_write;
212 vop_ioctl_t vop_ioctl; 208 vop_ioctl_t vop_ioctl;
@@ -254,8 +250,6 @@ typedef struct bhv_vnodeops {
254 VOP(vop_read, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr) 250 VOP(vop_read, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
255#define bhv_vop_write(vp,file,iov,segs,offset,ioflags,cr) \ 251#define bhv_vop_write(vp,file,iov,segs,offset,ioflags,cr) \
256 VOP(vop_write, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr) 252 VOP(vop_write, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
257#define bhv_vop_sendfile(vp,f,off,ioflags,cnt,act,targ,cr) \
258 VOP(vop_sendfile, vp)(VNHEAD(vp),f,off,ioflags,cnt,act,targ,cr)
259#define bhv_vop_splice_read(vp,f,o,pipe,cnt,fl,iofl,cr) \ 253#define bhv_vop_splice_read(vp,f,o,pipe,cnt,fl,iofl,cr) \
260 VOP(vop_splice_read, vp)(VNHEAD(vp),f,o,pipe,cnt,fl,iofl,cr) 254 VOP(vop_splice_read, vp)(VNHEAD(vp),f,o,pipe,cnt,fl,iofl,cr)
261#define bhv_vop_splice_write(vp,f,o,pipe,cnt,fl,iofl,cr) \ 255#define bhv_vop_splice_write(vp,f,o,pipe,cnt,fl,iofl,cr) \
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index de17aed578f0..70bc82f65311 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -4680,9 +4680,6 @@ bhv_vnodeops_t xfs_vnodeops = {
4680 .vop_open = xfs_open, 4680 .vop_open = xfs_open,
4681 .vop_close = xfs_close, 4681 .vop_close = xfs_close,
4682 .vop_read = xfs_read, 4682 .vop_read = xfs_read,
4683#ifdef HAVE_SENDFILE
4684 .vop_sendfile = xfs_sendfile,
4685#endif
4686#ifdef HAVE_SPLICE 4683#ifdef HAVE_SPLICE
4687 .vop_splice_read = xfs_splice_read, 4684 .vop_splice_read = xfs_splice_read,
4688 .vop_splice_write = xfs_splice_write, 4685 .vop_splice_write = xfs_splice_write,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6a41f4cab14c..4f0b3bf5983c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1054,7 +1054,7 @@ struct block_device_operations {
1054}; 1054};
1055 1055
1056/* 1056/*
1057 * "descriptor" for what we're up to with a read for sendfile(). 1057 * "descriptor" for what we're up to with a read.
1058 * This allows us to use the same read code yet 1058 * This allows us to use the same read code yet
1059 * have multiple different users of the data that 1059 * have multiple different users of the data that
1060 * we read from a file. 1060 * we read from a file.
@@ -1105,7 +1105,6 @@ struct file_operations {
1105 int (*aio_fsync) (struct kiocb *, int datasync); 1105 int (*aio_fsync) (struct kiocb *, int datasync);
1106 int (*fasync) (int, struct file *, int); 1106 int (*fasync) (int, struct file *, int);
1107 int (*lock) (struct file *, int, struct file_lock *); 1107 int (*lock) (struct file *, int, struct file_lock *);
1108 ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *);
1109 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); 1108 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
1110 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); 1109 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
1111 int (*check_flags)(int); 1110 int (*check_flags)(int);
@@ -1762,7 +1761,6 @@ extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
1762 unsigned long, loff_t, loff_t *, size_t, ssize_t); 1761 unsigned long, loff_t, loff_t *, size_t, ssize_t);
1763extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); 1762extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
1764extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); 1763extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
1765extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
1766extern void do_generic_mapping_read(struct address_space *mapping, 1764extern void do_generic_mapping_read(struct address_space *mapping,
1767 struct file_ra_state *, struct file *, 1765 struct file_ra_state *, struct file *,
1768 loff_t *, read_descriptor_t *, read_actor_t); 1766 loff_t *, read_descriptor_t *, read_actor_t);
@@ -1792,9 +1790,6 @@ extern int nonseekable_open(struct inode * inode, struct file * filp);
1792#ifdef CONFIG_FS_XIP 1790#ifdef CONFIG_FS_XIP
1793extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, 1791extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len,
1794 loff_t *ppos); 1792 loff_t *ppos);
1795extern ssize_t xip_file_sendfile(struct file *in_file, loff_t *ppos,
1796 size_t count, read_actor_t actor,
1797 void *target);
1798extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); 1793extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma);
1799extern ssize_t xip_file_write(struct file *filp, const char __user *buf, 1794extern ssize_t xip_file_write(struct file *filp, const char __user *buf,
1800 size_t len, loff_t *ppos); 1795 size_t len, loff_t *ppos);
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index c8884f971228..8e4120285f72 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -9,13 +9,39 @@
9#define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */ 9#define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */
10#define PIPE_BUF_FLAG_GIFT 0x04 /* page is a gift */ 10#define PIPE_BUF_FLAG_GIFT 0x04 /* page is a gift */
11 11
12/**
13 * struct pipe_buffer - a linux kernel pipe buffer
14 * @page: the page containing the data for the pipe buffer
15 * @offset: offset of data inside the @page
16 * @len: length of data inside the @page
17 * @ops: operations associated with this buffer. See @pipe_buf_operations.
18 * @flags: pipe buffer flags. See above.
19 * @private: private data owned by the ops.
20 **/
12struct pipe_buffer { 21struct pipe_buffer {
13 struct page *page; 22 struct page *page;
14 unsigned int offset, len; 23 unsigned int offset, len;
15 const struct pipe_buf_operations *ops; 24 const struct pipe_buf_operations *ops;
16 unsigned int flags; 25 unsigned int flags;
26 unsigned long private;
17}; 27};
18 28
29/**
30 * struct pipe_inode_info - a linux kernel pipe
31 * @wait: reader/writer wait point in case of empty/full pipe
32 * @nrbufs: the number of non-empty pipe buffers in this pipe
33 * @curbuf: the current pipe buffer entry
34 * @tmp_page: cached released page
35 * @readers: number of current readers of this pipe
36 * @writers: number of current writers of this pipe
37 * @waiting_writers: number of writers blocked waiting for room
38 * @r_counter: reader counter
39 * @w_counter: writer counter
40 * @fasync_readers: reader side fasync
41 * @fasync_writers: writer side fasync
42 * @inode: inode this pipe is attached to
43 * @bufs: the circular array of pipe buffers
44 **/
19struct pipe_inode_info { 45struct pipe_inode_info {
20 wait_queue_head_t wait; 46 wait_queue_head_t wait;
21 unsigned int nrbufs, curbuf; 47 unsigned int nrbufs, curbuf;
@@ -34,22 +60,73 @@ struct pipe_inode_info {
34/* 60/*
35 * Note on the nesting of these functions: 61 * Note on the nesting of these functions:
36 * 62 *
37 * ->pin() 63 * ->confirm()
38 * ->steal() 64 * ->steal()
39 * ... 65 * ...
40 * ->map() 66 * ->map()
41 * ... 67 * ...
42 * ->unmap() 68 * ->unmap()
43 * 69 *
44 * That is, ->map() must be called on a pinned buffer, same goes for ->steal(). 70 * That is, ->map() must be called on a confirmed buffer,
71 * same goes for ->steal(). See below for the meaning of each
72 * operation. Also see kerneldoc in fs/pipe.c for the pipe
73 * and generic variants of these hooks.
45 */ 74 */
46struct pipe_buf_operations { 75struct pipe_buf_operations {
76 /*
77 * This is set to 1, if the generic pipe read/write may coalesce
78 * data into an existing buffer. If this is set to 0, a new pipe
79 * page segment is always used for new data.
80 */
47 int can_merge; 81 int can_merge;
82
83 /*
84 * ->map() returns a virtual address mapping of the pipe buffer.
85 * The last integer flag reflects whether this should be an atomic
86 * mapping or not. The atomic map is faster, however you can't take
87 * page faults before calling ->unmap() again. So if you need to eg
88 * access user data through copy_to/from_user(), then you must get
89 * a non-atomic map. ->map() uses the KM_USER0 atomic slot for
90 * atomic maps, so you can't map more than one pipe_buffer at once
91 * and you have to be careful if mapping another page as source
92 * or destination for a copy (IOW, it has to use something else
93 * than KM_USER0).
94 */
48 void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int); 95 void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int);
96
97 /*
98 * Undoes ->map(), finishes the virtual mapping of the pipe buffer.
99 */
49 void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *); 100 void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *);
50 int (*pin)(struct pipe_inode_info *, struct pipe_buffer *); 101
102 /*
103 * ->confirm() verifies that the data in the pipe buffer is there
104 * and that the contents are good. If the pages in the pipe belong
105 * to a file system, we may need to wait for IO completion in this
106 * hook. Returns 0 for good, or a negative error value in case of
107 * error.
108 */
109 int (*confirm)(struct pipe_inode_info *, struct pipe_buffer *);
110
111 /*
112 * When the contents of this pipe buffer has been completely
113 * consumed by a reader, ->release() is called.
114 */
51 void (*release)(struct pipe_inode_info *, struct pipe_buffer *); 115 void (*release)(struct pipe_inode_info *, struct pipe_buffer *);
116
117 /*
118 * Attempt to take ownership of the pipe buffer and its contents.
119 * ->steal() returns 0 for success, in which case the contents
120 * of the pipe (the buf->page) is locked and now completely owned
121 * by the caller. The page may then be transferred to a different
122 * mapping, the most often used case is insertion into different
123 * file address space cache.
124 */
52 int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); 125 int (*steal)(struct pipe_inode_info *, struct pipe_buffer *);
126
127 /*
128 * Get a reference to the pipe buffer.
129 */
53 void (*get)(struct pipe_inode_info *, struct pipe_buffer *); 130 void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
54}; 131};
55 132
@@ -68,39 +145,7 @@ void __free_pipe_info(struct pipe_inode_info *);
68void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int); 145void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int);
69void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *); 146void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *);
70void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); 147void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
71int generic_pipe_buf_pin(struct pipe_inode_info *, struct pipe_buffer *); 148int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
72int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); 149int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
73 150
74/*
75 * splice is tied to pipes as a transport (at least for now), so we'll just
76 * add the splice flags here.
77 */
78#define SPLICE_F_MOVE (0x01) /* move pages instead of copying */
79#define SPLICE_F_NONBLOCK (0x02) /* don't block on the pipe splicing (but */
80 /* we may still block on the fd we splice */
81 /* from/to, of course */
82#define SPLICE_F_MORE (0x04) /* expect more data */
83#define SPLICE_F_GIFT (0x08) /* pages passed in are a gift */
84
85/*
86 * Passed to the actors
87 */
88struct splice_desc {
89 unsigned int len, total_len; /* current and remaining length */
90 unsigned int flags; /* splice flags */
91 struct file *file; /* file to read/write */
92 loff_t pos; /* file position */
93};
94
95typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
96 struct splice_desc *);
97
98extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
99 loff_t *, size_t, unsigned int,
100 splice_actor *);
101
102extern ssize_t __splice_from_pipe(struct pipe_inode_info *, struct file *,
103 loff_t *, size_t, unsigned int,
104 splice_actor *);
105
106#endif 151#endif
diff --git a/include/linux/splice.h b/include/linux/splice.h
new file mode 100644
index 000000000000..33e447f98a54
--- /dev/null
+++ b/include/linux/splice.h
@@ -0,0 +1,73 @@
1/*
2 * Function declerations and data structures related to the splice
3 * implementation.
4 *
5 * Copyright (C) 2007 Jens Axboe <jens.axboe@oracle.com>
6 *
7 */
8#ifndef SPLICE_H
9#define SPLICE_H
10
11#include <linux/pipe_fs_i.h>
12
13/*
14 * splice is tied to pipes as a transport (at least for now), so we'll just
15 * add the splice flags here.
16 */
17#define SPLICE_F_MOVE (0x01) /* move pages instead of copying */
18#define SPLICE_F_NONBLOCK (0x02) /* don't block on the pipe splicing (but */
19 /* we may still block on the fd we splice */
20 /* from/to, of course */
21#define SPLICE_F_MORE (0x04) /* expect more data */
22#define SPLICE_F_GIFT (0x08) /* pages passed in are a gift */
23
24/*
25 * Passed to the actors
26 */
27struct splice_desc {
28 unsigned int len, total_len; /* current and remaining length */
29 unsigned int flags; /* splice flags */
30 /*
31 * actor() private data
32 */
33 union {
34 void __user *userptr; /* memory to write to */
35 struct file *file; /* file to read/write */
36 void *data; /* cookie */
37 } u;
38 loff_t pos; /* file position */
39};
40
41struct partial_page {
42 unsigned int offset;
43 unsigned int len;
44 unsigned long private;
45};
46
47/*
48 * Passed to splice_to_pipe
49 */
50struct splice_pipe_desc {
51 struct page **pages; /* page map */
52 struct partial_page *partial; /* pages[] may not be contig */
53 int nr_pages; /* number of pages in map */
54 unsigned int flags; /* splice flags */
55 const struct pipe_buf_operations *ops;/* ops associated with output pipe */
56};
57
58typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
59 struct splice_desc *);
60typedef int (splice_direct_actor)(struct pipe_inode_info *,
61 struct splice_desc *);
62
63extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
64 loff_t *, size_t, unsigned int,
65 splice_actor *);
66extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
67 struct splice_desc *, splice_actor *);
68extern ssize_t splice_to_pipe(struct pipe_inode_info *,
69 struct splice_pipe_desc *);
70extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
71 splice_direct_actor *);
72
73#endif
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 4a7ae8ab6eb8..129d50f2225c 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -253,7 +253,7 @@ struct svc_rqst {
253 * determine what device number 253 * determine what device number
254 * to report (real or virtual) 254 * to report (real or virtual)
255 */ 255 */
256 int rq_sendfile_ok; /* turned off in gss privacy 256 int rq_splice_ok; /* turned off in gss privacy
257 * to prevent encrypting page 257 * to prevent encrypting page
258 * cache pages */ 258 * cache pages */
259 wait_queue_head_t rq_wait; /* synchronization */ 259 wait_queue_head_t rq_wait; /* synchronization */
diff --git a/kernel/relay.c b/kernel/relay.c
index 95db8c79fe8f..3b299fb3855c 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -21,6 +21,7 @@
21#include <linux/vmalloc.h> 21#include <linux/vmalloc.h>
22#include <linux/mm.h> 22#include <linux/mm.h>
23#include <linux/cpu.h> 23#include <linux/cpu.h>
24#include <linux/splice.h>
24 25
25/* list of open channels, for cpu hotplug */ 26/* list of open channels, for cpu hotplug */
26static DEFINE_MUTEX(relay_channels_mutex); 27static DEFINE_MUTEX(relay_channels_mutex);
@@ -121,6 +122,7 @@ static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size)
121 buf->page_array[i] = alloc_page(GFP_KERNEL); 122 buf->page_array[i] = alloc_page(GFP_KERNEL);
122 if (unlikely(!buf->page_array[i])) 123 if (unlikely(!buf->page_array[i]))
123 goto depopulate; 124 goto depopulate;
125 set_page_private(buf->page_array[i], (unsigned long)buf);
124 } 126 }
125 mem = vmap(buf->page_array, n_pages, VM_MAP, PAGE_KERNEL); 127 mem = vmap(buf->page_array, n_pages, VM_MAP, PAGE_KERNEL);
126 if (!mem) 128 if (!mem)
@@ -970,43 +972,6 @@ static int subbuf_read_actor(size_t read_start,
970 return ret; 972 return ret;
971} 973}
972 974
973/*
974 * subbuf_send_actor - send up to one subbuf's worth of data
975 */
976static int subbuf_send_actor(size_t read_start,
977 struct rchan_buf *buf,
978 size_t avail,
979 read_descriptor_t *desc,
980 read_actor_t actor)
981{
982 unsigned long pidx, poff;
983 unsigned int subbuf_pages;
984 int ret = 0;
985
986 subbuf_pages = buf->chan->alloc_size >> PAGE_SHIFT;
987 pidx = (read_start / PAGE_SIZE) % subbuf_pages;
988 poff = read_start & ~PAGE_MASK;
989 while (avail) {
990 struct page *p = buf->page_array[pidx];
991 unsigned int len;
992
993 len = PAGE_SIZE - poff;
994 if (len > avail)
995 len = avail;
996
997 len = actor(desc, p, poff, len);
998 if (desc->error)
999 break;
1000
1001 avail -= len;
1002 ret += len;
1003 poff = 0;
1004 pidx = (pidx + 1) % subbuf_pages;
1005 }
1006
1007 return ret;
1008}
1009
1010typedef int (*subbuf_actor_t) (size_t read_start, 975typedef int (*subbuf_actor_t) (size_t read_start,
1011 struct rchan_buf *buf, 976 struct rchan_buf *buf,
1012 size_t avail, 977 size_t avail,
@@ -1067,19 +1032,159 @@ static ssize_t relay_file_read(struct file *filp,
1067 NULL, &desc); 1032 NULL, &desc);
1068} 1033}
1069 1034
1070static ssize_t relay_file_sendfile(struct file *filp, 1035static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed)
1071 loff_t *ppos,
1072 size_t count,
1073 read_actor_t actor,
1074 void *target)
1075{ 1036{
1076 read_descriptor_t desc; 1037 rbuf->bytes_consumed += bytes_consumed;
1077 desc.written = 0; 1038
1078 desc.count = count; 1039 if (rbuf->bytes_consumed >= rbuf->chan->subbuf_size) {
1079 desc.arg.data = target; 1040 relay_subbufs_consumed(rbuf->chan, rbuf->cpu, 1);
1080 desc.error = 0; 1041 rbuf->bytes_consumed %= rbuf->chan->subbuf_size;
1081 return relay_file_read_subbufs(filp, ppos, subbuf_send_actor, 1042 }
1082 actor, &desc); 1043}
1044
1045static void relay_pipe_buf_release(struct pipe_inode_info *pipe,
1046 struct pipe_buffer *buf)
1047{
1048 struct rchan_buf *rbuf;
1049
1050 rbuf = (struct rchan_buf *)page_private(buf->page);
1051 relay_consume_bytes(rbuf, buf->private);
1052}
1053
1054static struct pipe_buf_operations relay_pipe_buf_ops = {
1055 .can_merge = 0,
1056 .map = generic_pipe_buf_map,
1057 .unmap = generic_pipe_buf_unmap,
1058 .confirm = generic_pipe_buf_confirm,
1059 .release = relay_pipe_buf_release,
1060 .steal = generic_pipe_buf_steal,
1061 .get = generic_pipe_buf_get,
1062};
1063
1064/**
1065 * subbuf_splice_actor - splice up to one subbuf's worth of data
1066 */
1067static int subbuf_splice_actor(struct file *in,
1068 loff_t *ppos,
1069 struct pipe_inode_info *pipe,
1070 size_t len,
1071 unsigned int flags,
1072 int *nonpad_ret)
1073{
1074 unsigned int pidx, poff, total_len, subbuf_pages, ret;
1075 struct rchan_buf *rbuf = in->private_data;
1076 unsigned int subbuf_size = rbuf->chan->subbuf_size;
1077 size_t read_start = ((size_t)*ppos) % rbuf->chan->alloc_size;
1078 size_t read_subbuf = read_start / subbuf_size;
1079 size_t padding = rbuf->padding[read_subbuf];
1080 size_t nonpad_end = read_subbuf * subbuf_size + subbuf_size - padding;
1081 struct page *pages[PIPE_BUFFERS];
1082 struct partial_page partial[PIPE_BUFFERS];
1083 struct splice_pipe_desc spd = {
1084 .pages = pages,
1085 .nr_pages = 0,
1086 .partial = partial,
1087 .flags = flags,
1088 .ops = &relay_pipe_buf_ops,
1089 };
1090
1091 if (rbuf->subbufs_produced == rbuf->subbufs_consumed)
1092 return 0;
1093
1094 /*
1095 * Adjust read len, if longer than what is available
1096 */
1097 if (len > (subbuf_size - read_start % subbuf_size))
1098 len = subbuf_size - read_start % subbuf_size;
1099
1100 subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT;
1101 pidx = (read_start / PAGE_SIZE) % subbuf_pages;
1102 poff = read_start & ~PAGE_MASK;
1103
1104 for (total_len = 0; spd.nr_pages < subbuf_pages; spd.nr_pages++) {
1105 unsigned int this_len, this_end, private;
1106 unsigned int cur_pos = read_start + total_len;
1107
1108 if (!len)
1109 break;
1110
1111 this_len = min_t(unsigned long, len, PAGE_SIZE - poff);
1112 private = this_len;
1113
1114 spd.pages[spd.nr_pages] = rbuf->page_array[pidx];
1115 spd.partial[spd.nr_pages].offset = poff;
1116
1117 this_end = cur_pos + this_len;
1118 if (this_end >= nonpad_end) {
1119 this_len = nonpad_end - cur_pos;
1120 private = this_len + padding;
1121 }
1122 spd.partial[spd.nr_pages].len = this_len;
1123 spd.partial[spd.nr_pages].private = private;
1124
1125 len -= this_len;
1126 total_len += this_len;
1127 poff = 0;
1128 pidx = (pidx + 1) % subbuf_pages;
1129
1130 if (this_end >= nonpad_end) {
1131 spd.nr_pages++;
1132 break;
1133 }
1134 }
1135
1136 if (!spd.nr_pages)
1137 return 0;
1138
1139 ret = *nonpad_ret = splice_to_pipe(pipe, &spd);
1140 if (ret < 0 || ret < total_len)
1141 return ret;
1142
1143 if (read_start + ret == nonpad_end)
1144 ret += padding;
1145
1146 return ret;
1147}
1148
1149static ssize_t relay_file_splice_read(struct file *in,
1150 loff_t *ppos,
1151 struct pipe_inode_info *pipe,
1152 size_t len,
1153 unsigned int flags)
1154{
1155 ssize_t spliced;
1156 int ret;
1157 int nonpad_ret = 0;
1158
1159 ret = 0;
1160 spliced = 0;
1161
1162 while (len) {
1163 ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret);
1164 if (ret < 0)
1165 break;
1166 else if (!ret) {
1167 if (spliced)
1168 break;
1169 if (flags & SPLICE_F_NONBLOCK) {
1170 ret = -EAGAIN;
1171 break;
1172 }
1173 }
1174
1175 *ppos += ret;
1176 if (ret > len)
1177 len = 0;
1178 else
1179 len -= ret;
1180 spliced += nonpad_ret;
1181 nonpad_ret = 0;
1182 }
1183
1184 if (spliced)
1185 return spliced;
1186
1187 return ret;
1083} 1188}
1084 1189
1085const struct file_operations relay_file_operations = { 1190const struct file_operations relay_file_operations = {
@@ -1089,7 +1194,7 @@ const struct file_operations relay_file_operations = {
1089 .read = relay_file_read, 1194 .read = relay_file_read,
1090 .llseek = no_llseek, 1195 .llseek = no_llseek,
1091 .release = relay_file_release, 1196 .release = relay_file_release,
1092 .sendfile = relay_file_sendfile, 1197 .splice_read = relay_file_splice_read,
1093}; 1198};
1094EXPORT_SYMBOL_GPL(relay_file_operations); 1199EXPORT_SYMBOL_GPL(relay_file_operations);
1095 1200
diff --git a/mm/filemap.c b/mm/filemap.c
index d1d9814f99dd..c6ebd9f912ab 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1245,26 +1245,6 @@ int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long o
1245 return written; 1245 return written;
1246} 1246}
1247 1247
1248ssize_t generic_file_sendfile(struct file *in_file, loff_t *ppos,
1249 size_t count, read_actor_t actor, void *target)
1250{
1251 read_descriptor_t desc;
1252
1253 if (!count)
1254 return 0;
1255
1256 desc.written = 0;
1257 desc.count = count;
1258 desc.arg.data = target;
1259 desc.error = 0;
1260
1261 do_generic_file_read(in_file, ppos, &desc, actor);
1262 if (desc.written)
1263 return desc.written;
1264 return desc.error;
1265}
1266EXPORT_SYMBOL(generic_file_sendfile);
1267
1268static ssize_t 1248static ssize_t
1269do_readahead(struct address_space *mapping, struct file *filp, 1249do_readahead(struct address_space *mapping, struct file *filp,
1270 unsigned long index, unsigned long nr) 1250 unsigned long index, unsigned long nr)
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index fa360e566d88..65ffc321f0c0 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -159,28 +159,6 @@ xip_file_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
159} 159}
160EXPORT_SYMBOL_GPL(xip_file_read); 160EXPORT_SYMBOL_GPL(xip_file_read);
161 161
162ssize_t
163xip_file_sendfile(struct file *in_file, loff_t *ppos,
164 size_t count, read_actor_t actor, void *target)
165{
166 read_descriptor_t desc;
167
168 if (!count)
169 return 0;
170
171 desc.written = 0;
172 desc.count = count;
173 desc.arg.data = target;
174 desc.error = 0;
175
176 do_xip_mapping_read(in_file->f_mapping, &in_file->f_ra, in_file,
177 ppos, &desc, actor);
178 if (desc.written)
179 return desc.written;
180 return desc.error;
181}
182EXPORT_SYMBOL_GPL(xip_file_sendfile);
183
184/* 162/*
185 * __xip_unmap is invoked from xip_unmap and 163 * __xip_unmap is invoked from xip_unmap and
186 * xip_write 164 * xip_write
diff --git a/mm/shmem.c b/mm/shmem.c
index b6aae2b33393..0493e4d0bcaa 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1100,9 +1100,9 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
1100 * Normally, filepage is NULL on entry, and either found 1100 * Normally, filepage is NULL on entry, and either found
1101 * uptodate immediately, or allocated and zeroed, or read 1101 * uptodate immediately, or allocated and zeroed, or read
1102 * in under swappage, which is then assigned to filepage. 1102 * in under swappage, which is then assigned to filepage.
1103 * But shmem_prepare_write passes in a locked filepage, 1103 * But shmem_readpage and shmem_prepare_write pass in a locked
1104 * which may be found not uptodate by other callers too, 1104 * filepage, which may be found not uptodate by other callers
1105 * and may need to be copied from the swappage read in. 1105 * too, and may need to be copied from the swappage read in.
1106 */ 1106 */
1107repeat: 1107repeat:
1108 if (!filepage) 1108 if (!filepage)
@@ -1485,9 +1485,18 @@ static const struct inode_operations shmem_symlink_inode_operations;
1485static const struct inode_operations shmem_symlink_inline_operations; 1485static const struct inode_operations shmem_symlink_inline_operations;
1486 1486
1487/* 1487/*
1488 * Normally tmpfs makes no use of shmem_prepare_write, but it 1488 * Normally tmpfs avoids the use of shmem_readpage and shmem_prepare_write;
1489 * lets a tmpfs file be used read-write below the loop driver. 1489 * but providing them allows a tmpfs file to be used for splice, sendfile, and
1490 * below the loop driver, in the generic fashion that many filesystems support.
1490 */ 1491 */
1492static int shmem_readpage(struct file *file, struct page *page)
1493{
1494 struct inode *inode = page->mapping->host;
1495 int error = shmem_getpage(inode, page->index, &page, SGP_CACHE, NULL);
1496 unlock_page(page);
1497 return error;
1498}
1499
1491static int 1500static int
1492shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) 1501shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
1493{ 1502{
@@ -1711,25 +1720,6 @@ static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count
1711 return desc.error; 1720 return desc.error;
1712} 1721}
1713 1722
1714static ssize_t shmem_file_sendfile(struct file *in_file, loff_t *ppos,
1715 size_t count, read_actor_t actor, void *target)
1716{
1717 read_descriptor_t desc;
1718
1719 if (!count)
1720 return 0;
1721
1722 desc.written = 0;
1723 desc.count = count;
1724 desc.arg.data = target;
1725 desc.error = 0;
1726
1727 do_shmem_file_read(in_file, ppos, &desc, actor);
1728 if (desc.written)
1729 return desc.written;
1730 return desc.error;
1731}
1732
1733static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) 1723static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
1734{ 1724{
1735 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); 1725 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
@@ -2386,6 +2376,7 @@ static const struct address_space_operations shmem_aops = {
2386 .writepage = shmem_writepage, 2376 .writepage = shmem_writepage,
2387 .set_page_dirty = __set_page_dirty_no_writeback, 2377 .set_page_dirty = __set_page_dirty_no_writeback,
2388#ifdef CONFIG_TMPFS 2378#ifdef CONFIG_TMPFS
2379 .readpage = shmem_readpage,
2389 .prepare_write = shmem_prepare_write, 2380 .prepare_write = shmem_prepare_write,
2390 .commit_write = simple_commit_write, 2381 .commit_write = simple_commit_write,
2391#endif 2382#endif
@@ -2399,7 +2390,8 @@ static const struct file_operations shmem_file_operations = {
2399 .read = shmem_file_read, 2390 .read = shmem_file_read,
2400 .write = shmem_file_write, 2391 .write = shmem_file_write,
2401 .fsync = simple_sync_file, 2392 .fsync = simple_sync_file,
2402 .sendfile = shmem_file_sendfile, 2393 .splice_read = generic_file_splice_read,
2394 .splice_write = generic_file_splice_write,
2403#endif 2395#endif
2404}; 2396};
2405 2397
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 099a983797da..c094583386fd 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -853,7 +853,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
853 u32 priv_len, maj_stat; 853 u32 priv_len, maj_stat;
854 int pad, saved_len, remaining_len, offset; 854 int pad, saved_len, remaining_len, offset;
855 855
856 rqstp->rq_sendfile_ok = 0; 856 rqstp->rq_splice_ok = 0;
857 857
858 priv_len = svc_getnl(&buf->head[0]); 858 priv_len = svc_getnl(&buf->head[0]);
859 if (rqstp->rq_deferred) { 859 if (rqstp->rq_deferred) {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index e673ef993904..55ea6df069de 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -814,7 +814,7 @@ svc_process(struct svc_rqst *rqstp)
814 rqstp->rq_res.tail[0].iov_base = NULL; 814 rqstp->rq_res.tail[0].iov_base = NULL;
815 rqstp->rq_res.tail[0].iov_len = 0; 815 rqstp->rq_res.tail[0].iov_len = 0;
816 /* Will be turned off only in gss privacy case: */ 816 /* Will be turned off only in gss privacy case: */
817 rqstp->rq_sendfile_ok = 1; 817 rqstp->rq_splice_ok = 1;
818 /* tcp needs a space for the record length... */ 818 /* tcp needs a space for the record length... */
819 if (rqstp->rq_prot == IPPROTO_TCP) 819 if (rqstp->rq_prot == IPPROTO_TCP)
820 svc_putnl(resv, 0); 820 svc_putnl(resv, 0);