aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorWolfram Sang <wsa@the-dreams.de>2014-06-17 08:36:41 -0400
committerWolfram Sang <wsa@the-dreams.de>2014-06-17 08:37:31 -0400
commitf0b1f6442b5090fed3529cb39f3acf8c91693d3d (patch)
treebc5f62b017a82161c9a7f892f464813f6efd5bf3 /fs
parent4632a93f015caf6d7db4352f37aab74a39e60d7a (diff)
parent7171511eaec5bf23fb06078f59784a3a0626b38f (diff)
Merge tag 'v3.16-rc1' into i2c/for-next
Merge a stable base (Linux 3.16-rc1) Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_addr.c5
-rw-r--r--fs/9p/vfs_file.c14
-rw-r--r--fs/adfs/file.c8
-rw-r--r--fs/affs/file.c8
-rw-r--r--fs/afs/file.c8
-rw-r--r--fs/afs/internal.h3
-rw-r--r--fs/afs/write.c11
-rw-r--r--fs/aio.c84
-rw-r--r--fs/bfs/file.c8
-rw-r--r--fs/block_dev.c40
-rw-r--r--fs/btrfs/extent_io.c39
-rw-r--r--fs/btrfs/extent_io.h3
-rw-r--r--fs/btrfs/file.c51
-rw-r--r--fs/btrfs/inode.c47
-rw-r--r--fs/btrfs/ioctl.c147
-rw-r--r--fs/btrfs/qgroup.c4
-rw-r--r--fs/btrfs/reada.c9
-rw-r--r--fs/btrfs/tests/btrfs-tests.c2
-rw-r--r--fs/btrfs/tests/qgroup-tests.c2
-rw-r--r--fs/btrfs/transaction.c12
-rw-r--r--fs/ceph/acl.c6
-rw-r--r--fs/ceph/addr.c21
-rw-r--r--fs/ceph/caps.c246
-rw-r--r--fs/ceph/export.c2
-rw-r--r--fs/ceph/file.c185
-rw-r--r--fs/ceph/inode.c247
-rw-r--r--fs/ceph/mds_client.c9
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/super.h13
-rw-r--r--fs/cifs/cifsfs.c55
-rw-r--r--fs/cifs/cifsfs.h12
-rw-r--r--fs/cifs/file.c81
-rw-r--r--fs/dcache.c4
-rw-r--r--fs/direct-io.c164
-rw-r--r--fs/dlm/lowcomms.c5
-rw-r--r--fs/ecryptfs/file.c13
-rw-r--r--fs/exec.c7
-rw-r--r--fs/exofs/file.c10
-rw-r--r--fs/exofs/inode.c2
-rw-r--r--fs/ext2/file.c10
-rw-r--r--fs/ext2/inode.c10
-rw-r--r--fs/ext3/file.c10
-rw-r--r--fs/ext3/inode.c15
-rw-r--r--fs/ext4/ext4.h3
-rw-r--r--fs/ext4/file.c35
-rw-r--r--fs/ext4/indirect.c14
-rw-r--r--fs/ext4/inode.c24
-rw-r--r--fs/f2fs/data.c17
-rw-r--r--fs/f2fs/file.c10
-rw-r--r--fs/fat/file.c8
-rw-r--r--fs/fat/inode.c12
-rw-r--r--fs/file.c11
-rw-r--r--fs/file_table.c6
-rw-r--r--fs/fuse/cuse.c8
-rw-r--r--fs/fuse/file.c154
-rw-r--r--fs/fuse/fuse_i.h5
-rw-r--r--fs/gfs2/aops.c11
-rw-r--r--fs/gfs2/file.c30
-rw-r--r--fs/hfs/inode.c16
-rw-r--r--fs/hfsplus/inode.c15
-rw-r--r--fs/hostfs/hostfs_kern.c8
-rw-r--r--fs/hpfs/file.c8
-rw-r--r--fs/jffs2/file.c8
-rw-r--r--fs/jfs/file.c10
-rw-r--r--fs/jfs/inode.c8
-rw-r--r--fs/logfs/file.c8
-rw-r--r--fs/minix/file.c8
-rw-r--r--fs/nfs/direct.c326
-rw-r--r--fs/nfs/file.c65
-rw-r--r--fs/nfs/internal.h6
-rw-r--r--fs/nfs/nfs4file.c10
-rw-r--r--fs/nilfs2/file.c8
-rw-r--r--fs/nilfs2/inode.c9
-rw-r--r--fs/ntfs/file.c9
-rw-r--r--fs/ocfs2/aops.c7
-rw-r--r--fs/ocfs2/file.c138
-rw-r--r--fs/omfs/file.c8
-rw-r--r--fs/open.c6
-rw-r--r--fs/pipe.c145
-rw-r--r--fs/ramfs/file-mmu.c10
-rw-r--r--fs/ramfs/file-nommu.c10
-rw-r--r--fs/read_write.c108
-rw-r--r--fs/reiserfs/file.c10
-rw-r--r--fs/reiserfs/inode.c10
-rw-r--r--fs/romfs/mmap-nommu.c4
-rw-r--r--fs/splice.c195
-rw-r--r--fs/sysv/file.c8
-rw-r--r--fs/ubifs/file.c25
-rw-r--r--fs/udf/file.c19
-rw-r--r--fs/udf/inode.c10
-rw-r--r--fs/ufs/file.c8
-rw-r--r--fs/xfs/xfs_aops.c17
-rw-r--r--fs/xfs/xfs_file.c119
-rw-r--r--fs/xfs/xfs_trace.h1
94 files changed, 1541 insertions, 1820 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index c71e88602ff4..cc1cfae726b3 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -259,8 +259,7 @@ static int v9fs_launder_page(struct page *page)
259 * 259 *
260 */ 260 */
261static ssize_t 261static ssize_t
262v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 262v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
263 loff_t pos, unsigned long nr_segs)
264{ 263{
265 /* 264 /*
266 * FIXME 265 * FIXME
@@ -269,7 +268,7 @@ v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
269 */ 268 */
270 p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) off/no(%lld/%lu) EINVAL\n", 269 p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) off/no(%lld/%lu) EINVAL\n",
271 iocb->ki_filp->f_path.dentry->d_name.name, 270 iocb->ki_filp->f_path.dentry->d_name.name,
272 (long long)pos, nr_segs); 271 (long long)pos, iter->nr_segs);
273 272
274 return -EINVAL; 273 return -EINVAL;
275} 274}
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 96e550760699..520c11c2dcca 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -692,7 +692,7 @@ v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
692{ 692{
693 if (filp->f_flags & O_DIRECT) 693 if (filp->f_flags & O_DIRECT)
694 return v9fs_direct_read(filp, data, count, offset); 694 return v9fs_direct_read(filp, data, count, offset);
695 return do_sync_read(filp, data, count, offset); 695 return new_sync_read(filp, data, count, offset);
696} 696}
697 697
698/** 698/**
@@ -760,7 +760,7 @@ err_out:
760 760
761buff_write: 761buff_write:
762 mutex_unlock(&inode->i_mutex); 762 mutex_unlock(&inode->i_mutex);
763 return do_sync_write(filp, data, count, offsetp); 763 return new_sync_write(filp, data, count, offsetp);
764} 764}
765 765
766/** 766/**
@@ -778,7 +778,7 @@ v9fs_cached_file_write(struct file *filp, const char __user * data,
778 778
779 if (filp->f_flags & O_DIRECT) 779 if (filp->f_flags & O_DIRECT)
780 return v9fs_direct_write(filp, data, count, offset); 780 return v9fs_direct_write(filp, data, count, offset);
781 return do_sync_write(filp, data, count, offset); 781 return new_sync_write(filp, data, count, offset);
782} 782}
783 783
784 784
@@ -847,8 +847,8 @@ const struct file_operations v9fs_cached_file_operations = {
847 .llseek = generic_file_llseek, 847 .llseek = generic_file_llseek,
848 .read = v9fs_cached_file_read, 848 .read = v9fs_cached_file_read,
849 .write = v9fs_cached_file_write, 849 .write = v9fs_cached_file_write,
850 .aio_read = generic_file_aio_read, 850 .read_iter = generic_file_read_iter,
851 .aio_write = generic_file_aio_write, 851 .write_iter = generic_file_write_iter,
852 .open = v9fs_file_open, 852 .open = v9fs_file_open,
853 .release = v9fs_dir_release, 853 .release = v9fs_dir_release,
854 .lock = v9fs_file_lock, 854 .lock = v9fs_file_lock,
@@ -860,8 +860,8 @@ const struct file_operations v9fs_cached_file_operations_dotl = {
860 .llseek = generic_file_llseek, 860 .llseek = generic_file_llseek,
861 .read = v9fs_cached_file_read, 861 .read = v9fs_cached_file_read,
862 .write = v9fs_cached_file_write, 862 .write = v9fs_cached_file_write,
863 .aio_read = generic_file_aio_read, 863 .read_iter = generic_file_read_iter,
864 .aio_write = generic_file_aio_write, 864 .write_iter = generic_file_write_iter,
865 .open = v9fs_file_open, 865 .open = v9fs_file_open,
866 .release = v9fs_dir_release, 866 .release = v9fs_dir_release,
867 .lock = v9fs_file_lock_dotl, 867 .lock = v9fs_file_lock_dotl,
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index a36da5382b40..07c9edce5aa7 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -23,12 +23,12 @@
23 23
24const struct file_operations adfs_file_operations = { 24const struct file_operations adfs_file_operations = {
25 .llseek = generic_file_llseek, 25 .llseek = generic_file_llseek,
26 .read = do_sync_read, 26 .read = new_sync_read,
27 .aio_read = generic_file_aio_read, 27 .read_iter = generic_file_read_iter,
28 .mmap = generic_file_mmap, 28 .mmap = generic_file_mmap,
29 .fsync = generic_file_fsync, 29 .fsync = generic_file_fsync,
30 .write = do_sync_write, 30 .write = new_sync_write,
31 .aio_write = generic_file_aio_write, 31 .write_iter = generic_file_write_iter,
32 .splice_read = generic_file_splice_read, 32 .splice_read = generic_file_splice_read,
33}; 33};
34 34
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 0270303388ee..a7fe57d2cd9a 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -27,10 +27,10 @@ static int affs_file_release(struct inode *inode, struct file *filp);
27 27
28const struct file_operations affs_file_operations = { 28const struct file_operations affs_file_operations = {
29 .llseek = generic_file_llseek, 29 .llseek = generic_file_llseek,
30 .read = do_sync_read, 30 .read = new_sync_read,
31 .aio_read = generic_file_aio_read, 31 .read_iter = generic_file_read_iter,
32 .write = do_sync_write, 32 .write = new_sync_write,
33 .aio_write = generic_file_aio_write, 33 .write_iter = generic_file_write_iter,
34 .mmap = generic_file_mmap, 34 .mmap = generic_file_mmap,
35 .open = affs_file_open, 35 .open = affs_file_open,
36 .release = affs_file_release, 36 .release = affs_file_release,
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 66d50fe2ee45..932ce07948b3 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -31,10 +31,10 @@ const struct file_operations afs_file_operations = {
31 .open = afs_open, 31 .open = afs_open,
32 .release = afs_release, 32 .release = afs_release,
33 .llseek = generic_file_llseek, 33 .llseek = generic_file_llseek,
34 .read = do_sync_read, 34 .read = new_sync_read,
35 .write = do_sync_write, 35 .write = new_sync_write,
36 .aio_read = generic_file_aio_read, 36 .read_iter = generic_file_read_iter,
37 .aio_write = afs_file_write, 37 .write_iter = afs_file_write,
38 .mmap = generic_file_readonly_mmap, 38 .mmap = generic_file_readonly_mmap,
39 .splice_read = generic_file_splice_read, 39 .splice_read = generic_file_splice_read,
40 .fsync = afs_fsync, 40 .fsync = afs_fsync,
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 590b55f46d61..71d5982312f3 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -747,8 +747,7 @@ extern int afs_write_end(struct file *file, struct address_space *mapping,
747extern int afs_writepage(struct page *, struct writeback_control *); 747extern int afs_writepage(struct page *, struct writeback_control *);
748extern int afs_writepages(struct address_space *, struct writeback_control *); 748extern int afs_writepages(struct address_space *, struct writeback_control *);
749extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); 749extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
750extern ssize_t afs_file_write(struct kiocb *, const struct iovec *, 750extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
751 unsigned long, loff_t);
752extern int afs_writeback_all(struct afs_vnode *); 751extern int afs_writeback_all(struct afs_vnode *);
753extern int afs_fsync(struct file *, loff_t, loff_t, int); 752extern int afs_fsync(struct file *, loff_t, loff_t, int);
754 753
diff --git a/fs/afs/write.c b/fs/afs/write.c
index a890db4b9898..ab6adfd52516 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -625,15 +625,14 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
625/* 625/*
626 * write to an AFS file 626 * write to an AFS file
627 */ 627 */
628ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov, 628ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
629 unsigned long nr_segs, loff_t pos)
630{ 629{
631 struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp)); 630 struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
632 ssize_t result; 631 ssize_t result;
633 size_t count = iov_length(iov, nr_segs); 632 size_t count = iov_iter_count(from);
634 633
635 _enter("{%x.%u},{%zu},%lu,", 634 _enter("{%x.%u},{%zu},",
636 vnode->fid.vid, vnode->fid.vnode, count, nr_segs); 635 vnode->fid.vid, vnode->fid.vnode, count);
637 636
638 if (IS_SWAPFILE(&vnode->vfs_inode)) { 637 if (IS_SWAPFILE(&vnode->vfs_inode)) {
639 printk(KERN_INFO 638 printk(KERN_INFO
@@ -644,7 +643,7 @@ ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
644 if (!count) 643 if (!count)
645 return 0; 644 return 0;
646 645
647 result = generic_file_aio_write(iocb, iov, nr_segs, pos); 646 result = generic_file_write_iter(iocb, from);
648 if (IS_ERR_VALUE(result)) { 647 if (IS_ERR_VALUE(result)) {
649 _leave(" = %zd", result); 648 _leave(" = %zd", result);
650 return result; 649 return result;
diff --git a/fs/aio.c b/fs/aio.c
index a0ed6c7d2cd2..4f078c054b41 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -477,7 +477,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
477} 477}
478EXPORT_SYMBOL(kiocb_set_cancel_fn); 478EXPORT_SYMBOL(kiocb_set_cancel_fn);
479 479
480static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb) 480static int kiocb_cancel(struct kiocb *kiocb)
481{ 481{
482 kiocb_cancel_fn *old, *cancel; 482 kiocb_cancel_fn *old, *cancel;
483 483
@@ -538,7 +538,7 @@ static void free_ioctx_users(struct percpu_ref *ref)
538 struct kiocb, ki_list); 538 struct kiocb, ki_list);
539 539
540 list_del_init(&req->ki_list); 540 list_del_init(&req->ki_list);
541 kiocb_cancel(ctx, req); 541 kiocb_cancel(req);
542 } 542 }
543 543
544 spin_unlock_irq(&ctx->ctx_lock); 544 spin_unlock_irq(&ctx->ctx_lock);
@@ -727,42 +727,42 @@ err:
727 * when the processes owning a context have all exited to encourage 727 * when the processes owning a context have all exited to encourage
728 * the rapid destruction of the kioctx. 728 * the rapid destruction of the kioctx.
729 */ 729 */
730static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, 730static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
731 struct completion *requests_done) 731 struct completion *requests_done)
732{ 732{
733 if (!atomic_xchg(&ctx->dead, 1)) { 733 struct kioctx_table *table;
734 struct kioctx_table *table;
735 734
736 spin_lock(&mm->ioctx_lock); 735 if (atomic_xchg(&ctx->dead, 1))
737 rcu_read_lock(); 736 return -EINVAL;
738 table = rcu_dereference(mm->ioctx_table);
739 737
740 WARN_ON(ctx != table->table[ctx->id]);
741 table->table[ctx->id] = NULL;
742 rcu_read_unlock();
743 spin_unlock(&mm->ioctx_lock);
744 738
745 /* percpu_ref_kill() will do the necessary call_rcu() */ 739 spin_lock(&mm->ioctx_lock);
746 wake_up_all(&ctx->wait); 740 rcu_read_lock();
741 table = rcu_dereference(mm->ioctx_table);
747 742
748 /* 743 WARN_ON(ctx != table->table[ctx->id]);
749 * It'd be more correct to do this in free_ioctx(), after all 744 table->table[ctx->id] = NULL;
750 * the outstanding kiocbs have finished - but by then io_destroy 745 rcu_read_unlock();
751 * has already returned, so io_setup() could potentially return 746 spin_unlock(&mm->ioctx_lock);
752 * -EAGAIN with no ioctxs actually in use (as far as userspace
753 * could tell).
754 */
755 aio_nr_sub(ctx->max_reqs);
756 747
757 if (ctx->mmap_size) 748 /* percpu_ref_kill() will do the necessary call_rcu() */
758 vm_munmap(ctx->mmap_base, ctx->mmap_size); 749 wake_up_all(&ctx->wait);
759 750
760 ctx->requests_done = requests_done; 751 /*
761 percpu_ref_kill(&ctx->users); 752 * It'd be more correct to do this in free_ioctx(), after all
762 } else { 753 * the outstanding kiocbs have finished - but by then io_destroy
763 if (requests_done) 754 * has already returned, so io_setup() could potentially return
764 complete(requests_done); 755 * -EAGAIN with no ioctxs actually in use (as far as userspace
765 } 756 * could tell).
757 */
758 aio_nr_sub(ctx->max_reqs);
759
760 if (ctx->mmap_size)
761 vm_munmap(ctx->mmap_base, ctx->mmap_size);
762
763 ctx->requests_done = requests_done;
764 percpu_ref_kill(&ctx->users);
765 return 0;
766} 766}
767 767
768/* wait_on_sync_kiocb: 768/* wait_on_sync_kiocb:
@@ -1219,21 +1219,23 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1219 if (likely(NULL != ioctx)) { 1219 if (likely(NULL != ioctx)) {
1220 struct completion requests_done = 1220 struct completion requests_done =
1221 COMPLETION_INITIALIZER_ONSTACK(requests_done); 1221 COMPLETION_INITIALIZER_ONSTACK(requests_done);
1222 int ret;
1222 1223
1223 /* Pass requests_done to kill_ioctx() where it can be set 1224 /* Pass requests_done to kill_ioctx() where it can be set
1224 * in a thread-safe way. If we try to set it here then we have 1225 * in a thread-safe way. If we try to set it here then we have
1225 * a race condition if two io_destroy() called simultaneously. 1226 * a race condition if two io_destroy() called simultaneously.
1226 */ 1227 */
1227 kill_ioctx(current->mm, ioctx, &requests_done); 1228 ret = kill_ioctx(current->mm, ioctx, &requests_done);
1228 percpu_ref_put(&ioctx->users); 1229 percpu_ref_put(&ioctx->users);
1229 1230
1230 /* Wait until all IO for the context are done. Otherwise kernel 1231 /* Wait until all IO for the context are done. Otherwise kernel
1231 * keep using user-space buffers even if user thinks the context 1232 * keep using user-space buffers even if user thinks the context
1232 * is destroyed. 1233 * is destroyed.
1233 */ 1234 */
1234 wait_for_completion(&requests_done); 1235 if (!ret)
1236 wait_for_completion(&requests_done);
1235 1237
1236 return 0; 1238 return ret;
1237 } 1239 }
1238 pr_debug("EINVAL: io_destroy: invalid context id\n"); 1240 pr_debug("EINVAL: io_destroy: invalid context id\n");
1239 return -EINVAL; 1241 return -EINVAL;
@@ -1241,6 +1243,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1241 1243
1242typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *, 1244typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
1243 unsigned long, loff_t); 1245 unsigned long, loff_t);
1246typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
1244 1247
1245static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, 1248static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
1246 int rw, char __user *buf, 1249 int rw, char __user *buf,
@@ -1298,7 +1301,9 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
1298 int rw; 1301 int rw;
1299 fmode_t mode; 1302 fmode_t mode;
1300 aio_rw_op *rw_op; 1303 aio_rw_op *rw_op;
1304 rw_iter_op *iter_op;
1301 struct iovec inline_vec, *iovec = &inline_vec; 1305 struct iovec inline_vec, *iovec = &inline_vec;
1306 struct iov_iter iter;
1302 1307
1303 switch (opcode) { 1308 switch (opcode) {
1304 case IOCB_CMD_PREAD: 1309 case IOCB_CMD_PREAD:
@@ -1306,6 +1311,7 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
1306 mode = FMODE_READ; 1311 mode = FMODE_READ;
1307 rw = READ; 1312 rw = READ;
1308 rw_op = file->f_op->aio_read; 1313 rw_op = file->f_op->aio_read;
1314 iter_op = file->f_op->read_iter;
1309 goto rw_common; 1315 goto rw_common;
1310 1316
1311 case IOCB_CMD_PWRITE: 1317 case IOCB_CMD_PWRITE:
@@ -1313,12 +1319,13 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
1313 mode = FMODE_WRITE; 1319 mode = FMODE_WRITE;
1314 rw = WRITE; 1320 rw = WRITE;
1315 rw_op = file->f_op->aio_write; 1321 rw_op = file->f_op->aio_write;
1322 iter_op = file->f_op->write_iter;
1316 goto rw_common; 1323 goto rw_common;
1317rw_common: 1324rw_common:
1318 if (unlikely(!(file->f_mode & mode))) 1325 if (unlikely(!(file->f_mode & mode)))
1319 return -EBADF; 1326 return -EBADF;
1320 1327
1321 if (!rw_op) 1328 if (!rw_op && !iter_op)
1322 return -EINVAL; 1329 return -EINVAL;
1323 1330
1324 ret = (opcode == IOCB_CMD_PREADV || 1331 ret = (opcode == IOCB_CMD_PREADV ||
@@ -1347,7 +1354,12 @@ rw_common:
1347 if (rw == WRITE) 1354 if (rw == WRITE)
1348 file_start_write(file); 1355 file_start_write(file);
1349 1356
1350 ret = rw_op(req, iovec, nr_segs, req->ki_pos); 1357 if (iter_op) {
1358 iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
1359 ret = iter_op(req, &iter);
1360 } else {
1361 ret = rw_op(req, iovec, nr_segs, req->ki_pos);
1362 }
1351 1363
1352 if (rw == WRITE) 1364 if (rw == WRITE)
1353 file_end_write(file); 1365 file_end_write(file);
@@ -1585,7 +1597,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
1585 1597
1586 kiocb = lookup_kiocb(ctx, iocb, key); 1598 kiocb = lookup_kiocb(ctx, iocb, key);
1587 if (kiocb) 1599 if (kiocb)
1588 ret = kiocb_cancel(ctx, kiocb); 1600 ret = kiocb_cancel(kiocb);
1589 else 1601 else
1590 ret = -EINVAL; 1602 ret = -EINVAL;
1591 1603
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index ae2892218335..e7f88ace1a25 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -23,10 +23,10 @@
23 23
24const struct file_operations bfs_file_operations = { 24const struct file_operations bfs_file_operations = {
25 .llseek = generic_file_llseek, 25 .llseek = generic_file_llseek,
26 .read = do_sync_read, 26 .read = new_sync_read,
27 .aio_read = generic_file_aio_read, 27 .read_iter = generic_file_read_iter,
28 .write = do_sync_write, 28 .write = new_sync_write,
29 .aio_write = generic_file_aio_write, 29 .write_iter = generic_file_write_iter,
30 .mmap = generic_file_mmap, 30 .mmap = generic_file_mmap,
31 .splice_read = generic_file_splice_read, 31 .splice_read = generic_file_splice_read,
32}; 32};
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 83fba15cc394..6d7274619bf9 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -165,14 +165,15 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
165} 165}
166 166
167static ssize_t 167static ssize_t
168blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 168blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
169 loff_t offset, unsigned long nr_segs) 169 loff_t offset)
170{ 170{
171 struct file *file = iocb->ki_filp; 171 struct file *file = iocb->ki_filp;
172 struct inode *inode = file->f_mapping->host; 172 struct inode *inode = file->f_mapping->host;
173 173
174 return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset, 174 return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter,
175 nr_segs, blkdev_get_block, NULL, NULL, 0); 175 offset, blkdev_get_block,
176 NULL, NULL, 0);
176} 177}
177 178
178int __sync_blockdev(struct block_device *bdev, int wait) 179int __sync_blockdev(struct block_device *bdev, int wait)
@@ -1571,43 +1572,38 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1571 * Does not take i_mutex for the write and thus is not for general purpose 1572 * Does not take i_mutex for the write and thus is not for general purpose
1572 * use. 1573 * use.
1573 */ 1574 */
1574ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, 1575ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
1575 unsigned long nr_segs, loff_t pos)
1576{ 1576{
1577 struct file *file = iocb->ki_filp; 1577 struct file *file = iocb->ki_filp;
1578 struct blk_plug plug; 1578 struct blk_plug plug;
1579 ssize_t ret; 1579 ssize_t ret;
1580 1580
1581 BUG_ON(iocb->ki_pos != pos);
1582
1583 blk_start_plug(&plug); 1581 blk_start_plug(&plug);
1584 ret = __generic_file_aio_write(iocb, iov, nr_segs); 1582 ret = __generic_file_write_iter(iocb, from);
1585 if (ret > 0) { 1583 if (ret > 0) {
1586 ssize_t err; 1584 ssize_t err;
1587 1585 err = generic_write_sync(file, iocb->ki_pos - ret, ret);
1588 err = generic_write_sync(file, pos, ret);
1589 if (err < 0) 1586 if (err < 0)
1590 ret = err; 1587 ret = err;
1591 } 1588 }
1592 blk_finish_plug(&plug); 1589 blk_finish_plug(&plug);
1593 return ret; 1590 return ret;
1594} 1591}
1595EXPORT_SYMBOL_GPL(blkdev_aio_write); 1592EXPORT_SYMBOL_GPL(blkdev_write_iter);
1596 1593
1597static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov, 1594static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
1598 unsigned long nr_segs, loff_t pos)
1599{ 1595{
1600 struct file *file = iocb->ki_filp; 1596 struct file *file = iocb->ki_filp;
1601 struct inode *bd_inode = file->f_mapping->host; 1597 struct inode *bd_inode = file->f_mapping->host;
1602 loff_t size = i_size_read(bd_inode); 1598 loff_t size = i_size_read(bd_inode);
1599 loff_t pos = iocb->ki_pos;
1603 1600
1604 if (pos >= size) 1601 if (pos >= size)
1605 return 0; 1602 return 0;
1606 1603
1607 size -= pos; 1604 size -= pos;
1608 if (size < iocb->ki_nbytes) 1605 iov_iter_truncate(to, size);
1609 nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size); 1606 return generic_file_read_iter(iocb, to);
1610 return generic_file_aio_read(iocb, iov, nr_segs, pos);
1611} 1607}
1612 1608
1613/* 1609/*
@@ -1639,10 +1635,10 @@ const struct file_operations def_blk_fops = {
1639 .open = blkdev_open, 1635 .open = blkdev_open,
1640 .release = blkdev_close, 1636 .release = blkdev_close,
1641 .llseek = block_llseek, 1637 .llseek = block_llseek,
1642 .read = do_sync_read, 1638 .read = new_sync_read,
1643 .write = do_sync_write, 1639 .write = new_sync_write,
1644 .aio_read = blkdev_aio_read, 1640 .read_iter = blkdev_read_iter,
1645 .aio_write = blkdev_aio_write, 1641 .write_iter = blkdev_write_iter,
1646 .mmap = generic_file_mmap, 1642 .mmap = generic_file_mmap,
1647 .fsync = blkdev_fsync, 1643 .fsync = blkdev_fsync,
1648 .unlocked_ioctl = block_ioctl, 1644 .unlocked_ioctl = block_ioctl,
@@ -1650,7 +1646,7 @@ const struct file_operations def_blk_fops = {
1650 .compat_ioctl = compat_blkdev_ioctl, 1646 .compat_ioctl = compat_blkdev_ioctl,
1651#endif 1647#endif
1652 .splice_read = generic_file_splice_read, 1648 .splice_read = generic_file_splice_read,
1653 .splice_write = generic_file_splice_write, 1649 .splice_write = iter_file_splice_write,
1654}; 1650};
1655 1651
1656int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) 1652int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f25a9092b946..a389820d158b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2354,7 +2354,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2354{ 2354{
2355 int uptodate = (err == 0); 2355 int uptodate = (err == 0);
2356 struct extent_io_tree *tree; 2356 struct extent_io_tree *tree;
2357 int ret; 2357 int ret = 0;
2358 2358
2359 tree = &BTRFS_I(page->mapping->host)->io_tree; 2359 tree = &BTRFS_I(page->mapping->host)->io_tree;
2360 2360
@@ -5068,6 +5068,43 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
5068 } 5068 }
5069} 5069}
5070 5070
5071int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
5072 unsigned long start,
5073 unsigned long len)
5074{
5075 size_t cur;
5076 size_t offset;
5077 struct page *page;
5078 char *kaddr;
5079 char __user *dst = (char __user *)dstv;
5080 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5081 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5082 int ret = 0;
5083
5084 WARN_ON(start > eb->len);
5085 WARN_ON(start + len > eb->start + eb->len);
5086
5087 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
5088
5089 while (len > 0) {
5090 page = extent_buffer_page(eb, i);
5091
5092 cur = min(len, (PAGE_CACHE_SIZE - offset));
5093 kaddr = page_address(page);
5094 if (copy_to_user(dst, kaddr + offset, cur)) {
5095 ret = -EFAULT;
5096 break;
5097 }
5098
5099 dst += cur;
5100 len -= cur;
5101 offset = 0;
5102 i++;
5103 }
5104
5105 return ret;
5106}
5107
5071int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, 5108int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
5072 unsigned long min_len, char **map, 5109 unsigned long min_len, char **map,
5073 unsigned long *map_start, 5110 unsigned long *map_start,
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 8b63f2d46518..15ce5f2a2b62 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -304,6 +304,9 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
304void read_extent_buffer(struct extent_buffer *eb, void *dst, 304void read_extent_buffer(struct extent_buffer *eb, void *dst,
305 unsigned long start, 305 unsigned long start,
306 unsigned long len); 306 unsigned long len);
307int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst,
308 unsigned long start,
309 unsigned long len);
307void write_extent_buffer(struct extent_buffer *eb, const void *src, 310void write_extent_buffer(struct extent_buffer *eb, const void *src,
308 unsigned long start, unsigned long len); 311 unsigned long start, unsigned long len);
309void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, 312void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e472441feb5d..1f2b99cb55ea 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -448,7 +448,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
448 write_bytes -= copied; 448 write_bytes -= copied;
449 total_copied += copied; 449 total_copied += copied;
450 450
451 /* Return to btrfs_file_aio_write to fault page */ 451 /* Return to btrfs_file_write_iter to fault page */
452 if (unlikely(copied == 0)) 452 if (unlikely(copied == 0))
453 break; 453 break;
454 454
@@ -1675,27 +1675,22 @@ again:
1675} 1675}
1676 1676
1677static ssize_t __btrfs_direct_write(struct kiocb *iocb, 1677static ssize_t __btrfs_direct_write(struct kiocb *iocb,
1678 const struct iovec *iov, 1678 struct iov_iter *from,
1679 unsigned long nr_segs, loff_t pos, 1679 loff_t pos)
1680 size_t count, size_t ocount)
1681{ 1680{
1682 struct file *file = iocb->ki_filp; 1681 struct file *file = iocb->ki_filp;
1683 struct iov_iter i;
1684 ssize_t written; 1682 ssize_t written;
1685 ssize_t written_buffered; 1683 ssize_t written_buffered;
1686 loff_t endbyte; 1684 loff_t endbyte;
1687 int err; 1685 int err;
1688 1686
1689 written = generic_file_direct_write(iocb, iov, &nr_segs, pos, 1687 written = generic_file_direct_write(iocb, from, pos);
1690 count, ocount);
1691 1688
1692 if (written < 0 || written == count) 1689 if (written < 0 || !iov_iter_count(from))
1693 return written; 1690 return written;
1694 1691
1695 pos += written; 1692 pos += written;
1696 count -= written; 1693 written_buffered = __btrfs_buffered_write(file, from, pos);
1697 iov_iter_init(&i, iov, nr_segs, count, written);
1698 written_buffered = __btrfs_buffered_write(file, &i, pos);
1699 if (written_buffered < 0) { 1694 if (written_buffered < 0) {
1700 err = written_buffered; 1695 err = written_buffered;
1701 goto out; 1696 goto out;
@@ -1730,9 +1725,8 @@ static void update_time_for_write(struct inode *inode)
1730 inode_inc_iversion(inode); 1725 inode_inc_iversion(inode);
1731} 1726}
1732 1727
1733static ssize_t btrfs_file_aio_write(struct kiocb *iocb, 1728static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1734 const struct iovec *iov, 1729 struct iov_iter *from)
1735 unsigned long nr_segs, loff_t pos)
1736{ 1730{
1737 struct file *file = iocb->ki_filp; 1731 struct file *file = iocb->ki_filp;
1738 struct inode *inode = file_inode(file); 1732 struct inode *inode = file_inode(file);
@@ -1741,18 +1735,12 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1741 u64 end_pos; 1735 u64 end_pos;
1742 ssize_t num_written = 0; 1736 ssize_t num_written = 0;
1743 ssize_t err = 0; 1737 ssize_t err = 0;
1744 size_t count, ocount; 1738 size_t count = iov_iter_count(from);
1745 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); 1739 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
1740 loff_t pos = iocb->ki_pos;
1746 1741
1747 mutex_lock(&inode->i_mutex); 1742 mutex_lock(&inode->i_mutex);
1748 1743
1749 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
1750 if (err) {
1751 mutex_unlock(&inode->i_mutex);
1752 goto out;
1753 }
1754 count = ocount;
1755
1756 current->backing_dev_info = inode->i_mapping->backing_dev_info; 1744 current->backing_dev_info = inode->i_mapping->backing_dev_info;
1757 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 1745 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1758 if (err) { 1746 if (err) {
@@ -1765,6 +1753,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1765 goto out; 1753 goto out;
1766 } 1754 }
1767 1755
1756 iov_iter_truncate(from, count);
1757
1768 err = file_remove_suid(file); 1758 err = file_remove_suid(file);
1769 if (err) { 1759 if (err) {
1770 mutex_unlock(&inode->i_mutex); 1760 mutex_unlock(&inode->i_mutex);
@@ -1806,14 +1796,9 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1806 atomic_inc(&BTRFS_I(inode)->sync_writers); 1796 atomic_inc(&BTRFS_I(inode)->sync_writers);
1807 1797
1808 if (unlikely(file->f_flags & O_DIRECT)) { 1798 if (unlikely(file->f_flags & O_DIRECT)) {
1809 num_written = __btrfs_direct_write(iocb, iov, nr_segs, 1799 num_written = __btrfs_direct_write(iocb, from, pos);
1810 pos, count, ocount);
1811 } else { 1800 } else {
1812 struct iov_iter i; 1801 num_written = __btrfs_buffered_write(file, from, pos);
1813
1814 iov_iter_init(&i, iov, nr_segs, count, num_written);
1815
1816 num_written = __btrfs_buffered_write(file, &i, pos);
1817 if (num_written > 0) 1802 if (num_written > 0)
1818 iocb->ki_pos = pos + num_written; 1803 iocb->ki_pos = pos + num_written;
1819 } 1804 }
@@ -2740,11 +2725,11 @@ out:
2740 2725
2741const struct file_operations btrfs_file_operations = { 2726const struct file_operations btrfs_file_operations = {
2742 .llseek = btrfs_file_llseek, 2727 .llseek = btrfs_file_llseek,
2743 .read = do_sync_read, 2728 .read = new_sync_read,
2744 .write = do_sync_write, 2729 .write = new_sync_write,
2745 .aio_read = generic_file_aio_read, 2730 .read_iter = generic_file_read_iter,
2746 .splice_read = generic_file_splice_read, 2731 .splice_read = generic_file_splice_read,
2747 .aio_write = btrfs_file_aio_write, 2732 .write_iter = btrfs_file_write_iter,
2748 .mmap = btrfs_file_mmap, 2733 .mmap = btrfs_file_mmap,
2749 .open = generic_file_open, 2734 .open = generic_file_open,
2750 .release = btrfs_release_file, 2735 .release = btrfs_release_file,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7fa5f7fd7bc7..8925f66a1411 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7445,39 +7445,30 @@ free_ordered:
7445} 7445}
7446 7446
7447static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, 7447static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
7448 const struct iovec *iov, loff_t offset, 7448 const struct iov_iter *iter, loff_t offset)
7449 unsigned long nr_segs)
7450{ 7449{
7451 int seg; 7450 int seg;
7452 int i; 7451 int i;
7453 size_t size;
7454 unsigned long addr;
7455 unsigned blocksize_mask = root->sectorsize - 1; 7452 unsigned blocksize_mask = root->sectorsize - 1;
7456 ssize_t retval = -EINVAL; 7453 ssize_t retval = -EINVAL;
7457 loff_t end = offset;
7458 7454
7459 if (offset & blocksize_mask) 7455 if (offset & blocksize_mask)
7460 goto out; 7456 goto out;
7461 7457
7462 /* Check the memory alignment. Blocks cannot straddle pages */ 7458 if (iov_iter_alignment(iter) & blocksize_mask)
7463 for (seg = 0; seg < nr_segs; seg++) { 7459 goto out;
7464 addr = (unsigned long)iov[seg].iov_base;
7465 size = iov[seg].iov_len;
7466 end += size;
7467 if ((addr & blocksize_mask) || (size & blocksize_mask))
7468 goto out;
7469
7470 /* If this is a write we don't need to check anymore */
7471 if (rw & WRITE)
7472 continue;
7473 7460
7474 /* 7461 /* If this is a write we don't need to check anymore */
7475 * Check to make sure we don't have duplicate iov_base's in this 7462 if (rw & WRITE)
7476 * iovec, if so return EINVAL, otherwise we'll get csum errors 7463 return 0;
7477 * when reading back. 7464 /*
7478 */ 7465 * Check to make sure we don't have duplicate iov_base's in this
7479 for (i = seg + 1; i < nr_segs; i++) { 7466 * iovec, if so return EINVAL, otherwise we'll get csum errors
7480 if (iov[seg].iov_base == iov[i].iov_base) 7467 * when reading back.
7468 */
7469 for (seg = 0; seg < iter->nr_segs; seg++) {
7470 for (i = seg + 1; i < iter->nr_segs; i++) {
7471 if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
7481 goto out; 7472 goto out;
7482 } 7473 }
7483 } 7474 }
@@ -7487,8 +7478,7 @@ out:
7487} 7478}
7488 7479
7489static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, 7480static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
7490 const struct iovec *iov, loff_t offset, 7481 struct iov_iter *iter, loff_t offset)
7491 unsigned long nr_segs)
7492{ 7482{
7493 struct file *file = iocb->ki_filp; 7483 struct file *file = iocb->ki_filp;
7494 struct inode *inode = file->f_mapping->host; 7484 struct inode *inode = file->f_mapping->host;
@@ -7498,8 +7488,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
7498 bool relock = false; 7488 bool relock = false;
7499 ssize_t ret; 7489 ssize_t ret;
7500 7490
7501 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, 7491 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter, offset))
7502 offset, nr_segs))
7503 return 0; 7492 return 0;
7504 7493
7505 atomic_inc(&inode->i_dio_count); 7494 atomic_inc(&inode->i_dio_count);
@@ -7511,7 +7500,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
7511 * we need to flush the dirty pages again to make absolutely sure 7500 * we need to flush the dirty pages again to make absolutely sure
7512 * that any outstanding dirty pages are on disk. 7501 * that any outstanding dirty pages are on disk.
7513 */ 7502 */
7514 count = iov_length(iov, nr_segs); 7503 count = iov_iter_count(iter);
7515 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, 7504 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
7516 &BTRFS_I(inode)->runtime_flags)) 7505 &BTRFS_I(inode)->runtime_flags))
7517 filemap_fdatawrite_range(inode->i_mapping, offset, count); 7506 filemap_fdatawrite_range(inode->i_mapping, offset, count);
@@ -7538,7 +7527,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
7538 7527
7539 ret = __blockdev_direct_IO(rw, iocb, inode, 7528 ret = __blockdev_direct_IO(rw, iocb, inode,
7540 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, 7529 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
7541 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, 7530 iter, offset, btrfs_get_blocks_direct, NULL,
7542 btrfs_submit_direct, flags); 7531 btrfs_submit_direct, flags);
7543 if (rw & WRITE) { 7532 if (rw & WRITE) {
7544 if (ret < 0 && ret != -EIOCBQUEUED) 7533 if (ret < 0 && ret != -EIOCBQUEUED)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 82c18ba12e3f..0d321c23069a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1957,7 +1957,8 @@ static noinline int copy_to_sk(struct btrfs_root *root,
1957 struct btrfs_path *path, 1957 struct btrfs_path *path,
1958 struct btrfs_key *key, 1958 struct btrfs_key *key,
1959 struct btrfs_ioctl_search_key *sk, 1959 struct btrfs_ioctl_search_key *sk,
1960 char *buf, 1960 size_t *buf_size,
1961 char __user *ubuf,
1961 unsigned long *sk_offset, 1962 unsigned long *sk_offset,
1962 int *num_found) 1963 int *num_found)
1963{ 1964{
@@ -1989,13 +1990,25 @@ static noinline int copy_to_sk(struct btrfs_root *root,
1989 if (!key_in_sk(key, sk)) 1990 if (!key_in_sk(key, sk))
1990 continue; 1991 continue;
1991 1992
1992 if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE) 1993 if (sizeof(sh) + item_len > *buf_size) {
1994 if (*num_found) {
1995 ret = 1;
1996 goto out;
1997 }
1998
1999 /*
2000 * return one empty item back for v1, which does not
2001 * handle -EOVERFLOW
2002 */
2003
2004 *buf_size = sizeof(sh) + item_len;
1993 item_len = 0; 2005 item_len = 0;
2006 ret = -EOVERFLOW;
2007 }
1994 2008
1995 if (sizeof(sh) + item_len + *sk_offset > 2009 if (sizeof(sh) + item_len + *sk_offset > *buf_size) {
1996 BTRFS_SEARCH_ARGS_BUFSIZE) {
1997 ret = 1; 2010 ret = 1;
1998 goto overflow; 2011 goto out;
1999 } 2012 }
2000 2013
2001 sh.objectid = key->objectid; 2014 sh.objectid = key->objectid;
@@ -2005,20 +2018,33 @@ static noinline int copy_to_sk(struct btrfs_root *root,
2005 sh.transid = found_transid; 2018 sh.transid = found_transid;
2006 2019
2007 /* copy search result header */ 2020 /* copy search result header */
2008 memcpy(buf + *sk_offset, &sh, sizeof(sh)); 2021 if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
2022 ret = -EFAULT;
2023 goto out;
2024 }
2025
2009 *sk_offset += sizeof(sh); 2026 *sk_offset += sizeof(sh);
2010 2027
2011 if (item_len) { 2028 if (item_len) {
2012 char *p = buf + *sk_offset; 2029 char __user *up = ubuf + *sk_offset;
2013 /* copy the item */ 2030 /* copy the item */
2014 read_extent_buffer(leaf, p, 2031 if (read_extent_buffer_to_user(leaf, up,
2015 item_off, item_len); 2032 item_off, item_len)) {
2033 ret = -EFAULT;
2034 goto out;
2035 }
2036
2016 *sk_offset += item_len; 2037 *sk_offset += item_len;
2017 } 2038 }
2018 (*num_found)++; 2039 (*num_found)++;
2019 2040
2020 if (*num_found >= sk->nr_items) 2041 if (ret) /* -EOVERFLOW from above */
2021 break; 2042 goto out;
2043
2044 if (*num_found >= sk->nr_items) {
2045 ret = 1;
2046 goto out;
2047 }
2022 } 2048 }
2023advance_key: 2049advance_key:
2024 ret = 0; 2050 ret = 0;
@@ -2033,22 +2059,37 @@ advance_key:
2033 key->objectid++; 2059 key->objectid++;
2034 } else 2060 } else
2035 ret = 1; 2061 ret = 1;
2036overflow: 2062out:
2063 /*
2064 * 0: all items from this leaf copied, continue with next
2065 * 1: * more items can be copied, but unused buffer is too small
2066 * * all items were found
2067 * Either way, it will stops the loop which iterates to the next
2068 * leaf
2069 * -EOVERFLOW: item was to large for buffer
2070 * -EFAULT: could not copy extent buffer back to userspace
2071 */
2037 return ret; 2072 return ret;
2038} 2073}
2039 2074
2040static noinline int search_ioctl(struct inode *inode, 2075static noinline int search_ioctl(struct inode *inode,
2041 struct btrfs_ioctl_search_args *args) 2076 struct btrfs_ioctl_search_key *sk,
2077 size_t *buf_size,
2078 char __user *ubuf)
2042{ 2079{
2043 struct btrfs_root *root; 2080 struct btrfs_root *root;
2044 struct btrfs_key key; 2081 struct btrfs_key key;
2045 struct btrfs_path *path; 2082 struct btrfs_path *path;
2046 struct btrfs_ioctl_search_key *sk = &args->key;
2047 struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; 2083 struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info;
2048 int ret; 2084 int ret;
2049 int num_found = 0; 2085 int num_found = 0;
2050 unsigned long sk_offset = 0; 2086 unsigned long sk_offset = 0;
2051 2087
2088 if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) {
2089 *buf_size = sizeof(struct btrfs_ioctl_search_header);
2090 return -EOVERFLOW;
2091 }
2092
2052 path = btrfs_alloc_path(); 2093 path = btrfs_alloc_path();
2053 if (!path) 2094 if (!path)
2054 return -ENOMEM; 2095 return -ENOMEM;
@@ -2082,14 +2123,15 @@ static noinline int search_ioctl(struct inode *inode,
2082 ret = 0; 2123 ret = 0;
2083 goto err; 2124 goto err;
2084 } 2125 }
2085 ret = copy_to_sk(root, path, &key, sk, args->buf, 2126 ret = copy_to_sk(root, path, &key, sk, buf_size, ubuf,
2086 &sk_offset, &num_found); 2127 &sk_offset, &num_found);
2087 btrfs_release_path(path); 2128 btrfs_release_path(path);
2088 if (ret || num_found >= sk->nr_items) 2129 if (ret)
2089 break; 2130 break;
2090 2131
2091 } 2132 }
2092 ret = 0; 2133 if (ret > 0)
2134 ret = 0;
2093err: 2135err:
2094 sk->nr_items = num_found; 2136 sk->nr_items = num_found;
2095 btrfs_free_path(path); 2137 btrfs_free_path(path);
@@ -2099,22 +2141,73 @@ err:
2099static noinline int btrfs_ioctl_tree_search(struct file *file, 2141static noinline int btrfs_ioctl_tree_search(struct file *file,
2100 void __user *argp) 2142 void __user *argp)
2101{ 2143{
2102 struct btrfs_ioctl_search_args *args; 2144 struct btrfs_ioctl_search_args __user *uargs;
2103 struct inode *inode; 2145 struct btrfs_ioctl_search_key sk;
2104 int ret; 2146 struct inode *inode;
2147 int ret;
2148 size_t buf_size;
2105 2149
2106 if (!capable(CAP_SYS_ADMIN)) 2150 if (!capable(CAP_SYS_ADMIN))
2107 return -EPERM; 2151 return -EPERM;
2108 2152
2109 args = memdup_user(argp, sizeof(*args)); 2153 uargs = (struct btrfs_ioctl_search_args __user *)argp;
2110 if (IS_ERR(args)) 2154
2111 return PTR_ERR(args); 2155 if (copy_from_user(&sk, &uargs->key, sizeof(sk)))
2156 return -EFAULT;
2157
2158 buf_size = sizeof(uargs->buf);
2112 2159
2113 inode = file_inode(file); 2160 inode = file_inode(file);
2114 ret = search_ioctl(inode, args); 2161 ret = search_ioctl(inode, &sk, &buf_size, uargs->buf);
2115 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 2162
2163 /*
2164 * In the origin implementation an overflow is handled by returning a
2165 * search header with a len of zero, so reset ret.
2166 */
2167 if (ret == -EOVERFLOW)
2168 ret = 0;
2169
2170 if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk)))
2116 ret = -EFAULT; 2171 ret = -EFAULT;
2117 kfree(args); 2172 return ret;
2173}
2174
2175static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
2176 void __user *argp)
2177{
2178 struct btrfs_ioctl_search_args_v2 __user *uarg;
2179 struct btrfs_ioctl_search_args_v2 args;
2180 struct inode *inode;
2181 int ret;
2182 size_t buf_size;
2183 const size_t buf_limit = 16 * 1024 * 1024;
2184
2185 if (!capable(CAP_SYS_ADMIN))
2186 return -EPERM;
2187
2188 /* copy search header and buffer size */
2189 uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp;
2190 if (copy_from_user(&args, uarg, sizeof(args)))
2191 return -EFAULT;
2192
2193 buf_size = args.buf_size;
2194
2195 if (buf_size < sizeof(struct btrfs_ioctl_search_header))
2196 return -EOVERFLOW;
2197
2198 /* limit result size to 16MB */
2199 if (buf_size > buf_limit)
2200 buf_size = buf_limit;
2201
2202 inode = file_inode(file);
2203 ret = search_ioctl(inode, &args.key, &buf_size,
2204 (char *)(&uarg->buf[0]));
2205 if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
2206 ret = -EFAULT;
2207 else if (ret == -EOVERFLOW &&
2208 copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size)))
2209 ret = -EFAULT;
2210
2118 return ret; 2211 return ret;
2119} 2212}
2120 2213
@@ -5198,6 +5291,8 @@ long btrfs_ioctl(struct file *file, unsigned int
5198 return btrfs_ioctl_trans_end(file); 5291 return btrfs_ioctl_trans_end(file);
5199 case BTRFS_IOC_TREE_SEARCH: 5292 case BTRFS_IOC_TREE_SEARCH:
5200 return btrfs_ioctl_tree_search(file, argp); 5293 return btrfs_ioctl_tree_search(file, argp);
5294 case BTRFS_IOC_TREE_SEARCH_V2:
5295 return btrfs_ioctl_tree_search_v2(file, argp);
5201 case BTRFS_IOC_INO_LOOKUP: 5296 case BTRFS_IOC_INO_LOOKUP:
5202 return btrfs_ioctl_ino_lookup(file, argp); 5297 return btrfs_ioctl_ino_lookup(file, argp);
5203 case BTRFS_IOC_INO_PATHS: 5298 case BTRFS_IOC_INO_PATHS:
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index cf5aead95a7f..98cb6b2630f9 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1798,8 +1798,10 @@ static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
1798 return -ENOMEM; 1798 return -ENOMEM;
1799 1799
1800 tmp = ulist_alloc(GFP_NOFS); 1800 tmp = ulist_alloc(GFP_NOFS);
1801 if (!tmp) 1801 if (!tmp) {
1802 ulist_free(qgroups);
1802 return -ENOMEM; 1803 return -ENOMEM;
1804 }
1803 1805
1804 btrfs_get_tree_mod_seq(fs_info, &elem); 1806 btrfs_get_tree_mod_seq(fs_info, &elem);
1805 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq, 1807 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 30947f923620..09230cf3a244 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -428,8 +428,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
428 continue; 428 continue;
429 } 429 }
430 if (!dev->bdev) { 430 if (!dev->bdev) {
431 /* cannot read ahead on missing device */ 431 /*
432 continue; 432 * cannot read ahead on missing device, but for RAID5/6,
433 * REQ_GET_READ_MIRRORS return 1. So don't skip missing
434 * device for such case.
435 */
436 if (nzones > 1)
437 continue;
433 } 438 }
434 if (dev_replace_is_ongoing && 439 if (dev_replace_is_ongoing &&
435 dev == fs_info->dev_replace.tgtdev) { 440 dev == fs_info->dev_replace.tgtdev) {
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index a5dcacb5df9c..9626252ee6b4 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -135,7 +135,7 @@ restart:
135 radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) { 135 radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) {
136 struct extent_buffer *eb; 136 struct extent_buffer *eb;
137 137
138 eb = radix_tree_deref_slot(slot); 138 eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock);
139 if (!eb) 139 if (!eb)
140 continue; 140 continue;
141 /* Shouldn't happen but that kind of thinking creates CVE's */ 141 /* Shouldn't happen but that kind of thinking creates CVE's */
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index fa691b754aaf..ec3dcb202357 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -415,6 +415,8 @@ int btrfs_test_qgroups(void)
415 ret = -ENOMEM; 415 ret = -ENOMEM;
416 goto out; 416 goto out;
417 } 417 }
418 btrfs_set_header_level(root->node, 0);
419 btrfs_set_header_nritems(root->node, 0);
418 root->alloc_bytenr += 8192; 420 root->alloc_bytenr += 8192;
419 421
420 tmp_root = btrfs_alloc_dummy_root(); 422 tmp_root = btrfs_alloc_dummy_root();
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 9630f10f8e1e..511839c04f11 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1284,11 +1284,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1284 goto fail; 1284 goto fail;
1285 } 1285 }
1286 1286
1287 pending->error = btrfs_qgroup_inherit(trans, fs_info, 1287 ret = btrfs_qgroup_inherit(trans, fs_info,
1288 root->root_key.objectid, 1288 root->root_key.objectid,
1289 objectid, pending->inherit); 1289 objectid, pending->inherit);
1290 if (pending->error) 1290 if (ret) {
1291 goto no_free_objectid; 1291 btrfs_abort_transaction(trans, root, ret);
1292 goto fail;
1293 }
1292 1294
1293 /* see comments in should_cow_block() */ 1295 /* see comments in should_cow_block() */
1294 set_bit(BTRFS_ROOT_FORCE_COW, &root->state); 1296 set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 21887d63dad5..469f2e8657e8 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -104,12 +104,6 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
104 umode_t new_mode = inode->i_mode, old_mode = inode->i_mode; 104 umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;
105 struct dentry *dentry; 105 struct dentry *dentry;
106 106
107 if (acl) {
108 ret = posix_acl_valid(acl);
109 if (ret < 0)
110 goto out;
111 }
112
113 switch (type) { 107 switch (type) {
114 case ACL_TYPE_ACCESS: 108 case ACL_TYPE_ACCESS:
115 name = POSIX_ACL_XATTR_ACCESS; 109 name = POSIX_ACL_XATTR_ACCESS;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 65a30e817dd8..90b3954d48ed 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -211,18 +211,15 @@ static int readpage_nounlock(struct file *filp, struct page *page)
211 SetPageError(page); 211 SetPageError(page);
212 ceph_fscache_readpage_cancel(inode, page); 212 ceph_fscache_readpage_cancel(inode, page);
213 goto out; 213 goto out;
214 } else {
215 if (err < PAGE_CACHE_SIZE) {
216 /* zero fill remainder of page */
217 zero_user_segment(page, err, PAGE_CACHE_SIZE);
218 } else {
219 flush_dcache_page(page);
220 }
221 } 214 }
222 SetPageUptodate(page); 215 if (err < PAGE_CACHE_SIZE)
216 /* zero fill remainder of page */
217 zero_user_segment(page, err, PAGE_CACHE_SIZE);
218 else
219 flush_dcache_page(page);
223 220
224 if (err >= 0) 221 SetPageUptodate(page);
225 ceph_readpage_to_fscache(inode, page); 222 ceph_readpage_to_fscache(inode, page);
226 223
227out: 224out:
228 return err < 0 ? err : 0; 225 return err < 0 ? err : 0;
@@ -1187,8 +1184,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
1187 * never get called. 1184 * never get called.
1188 */ 1185 */
1189static ssize_t ceph_direct_io(int rw, struct kiocb *iocb, 1186static ssize_t ceph_direct_io(int rw, struct kiocb *iocb,
1190 const struct iovec *iov, 1187 struct iov_iter *iter,
1191 loff_t pos, unsigned long nr_segs) 1188 loff_t pos)
1192{ 1189{
1193 WARN_ON(1); 1190 WARN_ON(1);
1194 return -EINVAL; 1191 return -EINVAL;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index c561b628ebce..1fde164b74b5 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -221,8 +221,8 @@ int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
221 return 0; 221 return 0;
222} 222}
223 223
224static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc, 224struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
225 struct ceph_cap_reservation *ctx) 225 struct ceph_cap_reservation *ctx)
226{ 226{
227 struct ceph_cap *cap = NULL; 227 struct ceph_cap *cap = NULL;
228 228
@@ -508,15 +508,14 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
508 * it is < 0. (This is so we can atomically add the cap and add an 508 * it is < 0. (This is so we can atomically add the cap and add an
509 * open file reference to it.) 509 * open file reference to it.)
510 */ 510 */
511int ceph_add_cap(struct inode *inode, 511void ceph_add_cap(struct inode *inode,
512 struct ceph_mds_session *session, u64 cap_id, 512 struct ceph_mds_session *session, u64 cap_id,
513 int fmode, unsigned issued, unsigned wanted, 513 int fmode, unsigned issued, unsigned wanted,
514 unsigned seq, unsigned mseq, u64 realmino, int flags, 514 unsigned seq, unsigned mseq, u64 realmino, int flags,
515 struct ceph_cap_reservation *caps_reservation) 515 struct ceph_cap **new_cap)
516{ 516{
517 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; 517 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
518 struct ceph_inode_info *ci = ceph_inode(inode); 518 struct ceph_inode_info *ci = ceph_inode(inode);
519 struct ceph_cap *new_cap = NULL;
520 struct ceph_cap *cap; 519 struct ceph_cap *cap;
521 int mds = session->s_mds; 520 int mds = session->s_mds;
522 int actual_wanted; 521 int actual_wanted;
@@ -531,20 +530,10 @@ int ceph_add_cap(struct inode *inode,
531 if (fmode >= 0) 530 if (fmode >= 0)
532 wanted |= ceph_caps_for_mode(fmode); 531 wanted |= ceph_caps_for_mode(fmode);
533 532
534retry:
535 spin_lock(&ci->i_ceph_lock);
536 cap = __get_cap_for_mds(ci, mds); 533 cap = __get_cap_for_mds(ci, mds);
537 if (!cap) { 534 if (!cap) {
538 if (new_cap) { 535 cap = *new_cap;
539 cap = new_cap; 536 *new_cap = NULL;
540 new_cap = NULL;
541 } else {
542 spin_unlock(&ci->i_ceph_lock);
543 new_cap = get_cap(mdsc, caps_reservation);
544 if (new_cap == NULL)
545 return -ENOMEM;
546 goto retry;
547 }
548 537
549 cap->issued = 0; 538 cap->issued = 0;
550 cap->implemented = 0; 539 cap->implemented = 0;
@@ -562,9 +551,6 @@ retry:
562 session->s_nr_caps++; 551 session->s_nr_caps++;
563 spin_unlock(&session->s_cap_lock); 552 spin_unlock(&session->s_cap_lock);
564 } else { 553 } else {
565 if (new_cap)
566 ceph_put_cap(mdsc, new_cap);
567
568 /* 554 /*
569 * auth mds of the inode changed. we received the cap export 555 * auth mds of the inode changed. we received the cap export
570 * message, but still haven't received the cap import message. 556 * message, but still haven't received the cap import message.
@@ -626,7 +612,6 @@ retry:
626 ci->i_auth_cap = cap; 612 ci->i_auth_cap = cap;
627 cap->mds_wanted = wanted; 613 cap->mds_wanted = wanted;
628 } 614 }
629 ci->i_cap_exporting_issued = 0;
630 } else { 615 } else {
631 WARN_ON(ci->i_auth_cap == cap); 616 WARN_ON(ci->i_auth_cap == cap);
632 } 617 }
@@ -648,9 +633,6 @@ retry:
648 633
649 if (fmode >= 0) 634 if (fmode >= 0)
650 __ceph_get_fmode(ci, fmode); 635 __ceph_get_fmode(ci, fmode);
651 spin_unlock(&ci->i_ceph_lock);
652 wake_up_all(&ci->i_cap_wq);
653 return 0;
654} 636}
655 637
656/* 638/*
@@ -685,7 +667,7 @@ static int __cap_is_valid(struct ceph_cap *cap)
685 */ 667 */
686int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) 668int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
687{ 669{
688 int have = ci->i_snap_caps | ci->i_cap_exporting_issued; 670 int have = ci->i_snap_caps;
689 struct ceph_cap *cap; 671 struct ceph_cap *cap;
690 struct rb_node *p; 672 struct rb_node *p;
691 673
@@ -900,7 +882,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
900 */ 882 */
901static int __ceph_is_any_caps(struct ceph_inode_info *ci) 883static int __ceph_is_any_caps(struct ceph_inode_info *ci)
902{ 884{
903 return !RB_EMPTY_ROOT(&ci->i_caps) || ci->i_cap_exporting_issued; 885 return !RB_EMPTY_ROOT(&ci->i_caps);
904} 886}
905 887
906int ceph_is_any_caps(struct inode *inode) 888int ceph_is_any_caps(struct inode *inode)
@@ -2397,32 +2379,30 @@ static void invalidate_aliases(struct inode *inode)
2397 * actually be a revocation if it specifies a smaller cap set.) 2379 * actually be a revocation if it specifies a smaller cap set.)
2398 * 2380 *
2399 * caller holds s_mutex and i_ceph_lock, we drop both. 2381 * caller holds s_mutex and i_ceph_lock, we drop both.
2400 *
2401 * return value:
2402 * 0 - ok
2403 * 1 - check_caps on auth cap only (writeback)
2404 * 2 - check_caps (ack revoke)
2405 */ 2382 */
2406static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, 2383static void handle_cap_grant(struct ceph_mds_client *mdsc,
2384 struct inode *inode, struct ceph_mds_caps *grant,
2385 void *snaptrace, int snaptrace_len,
2386 struct ceph_buffer *xattr_buf,
2407 struct ceph_mds_session *session, 2387 struct ceph_mds_session *session,
2408 struct ceph_cap *cap, 2388 struct ceph_cap *cap, int issued)
2409 struct ceph_buffer *xattr_buf) 2389 __releases(ci->i_ceph_lock)
2410 __releases(ci->i_ceph_lock)
2411{ 2390{
2412 struct ceph_inode_info *ci = ceph_inode(inode); 2391 struct ceph_inode_info *ci = ceph_inode(inode);
2413 int mds = session->s_mds; 2392 int mds = session->s_mds;
2414 int seq = le32_to_cpu(grant->seq); 2393 int seq = le32_to_cpu(grant->seq);
2415 int newcaps = le32_to_cpu(grant->caps); 2394 int newcaps = le32_to_cpu(grant->caps);
2416 int issued, implemented, used, wanted, dirty; 2395 int used, wanted, dirty;
2417 u64 size = le64_to_cpu(grant->size); 2396 u64 size = le64_to_cpu(grant->size);
2418 u64 max_size = le64_to_cpu(grant->max_size); 2397 u64 max_size = le64_to_cpu(grant->max_size);
2419 struct timespec mtime, atime, ctime; 2398 struct timespec mtime, atime, ctime;
2420 int check_caps = 0; 2399 int check_caps = 0;
2421 int wake = 0; 2400 bool wake = 0;
2422 int writeback = 0; 2401 bool writeback = 0;
2423 int queue_invalidate = 0; 2402 bool queue_trunc = 0;
2424 int deleted_inode = 0; 2403 bool queue_invalidate = 0;
2425 int queue_revalidate = 0; 2404 bool queue_revalidate = 0;
2405 bool deleted_inode = 0;
2426 2406
2427 dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", 2407 dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
2428 inode, cap, mds, seq, ceph_cap_string(newcaps)); 2408 inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2466,16 +2446,13 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2466 } 2446 }
2467 2447
2468 /* side effects now are allowed */ 2448 /* side effects now are allowed */
2469
2470 issued = __ceph_caps_issued(ci, &implemented);
2471 issued |= implemented | __ceph_caps_dirty(ci);
2472
2473 cap->cap_gen = session->s_cap_gen; 2449 cap->cap_gen = session->s_cap_gen;
2474 cap->seq = seq; 2450 cap->seq = seq;
2475 2451
2476 __check_cap_issue(ci, cap, newcaps); 2452 __check_cap_issue(ci, cap, newcaps);
2477 2453
2478 if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { 2454 if ((newcaps & CEPH_CAP_AUTH_SHARED) &&
2455 (issued & CEPH_CAP_AUTH_EXCL) == 0) {
2479 inode->i_mode = le32_to_cpu(grant->mode); 2456 inode->i_mode = le32_to_cpu(grant->mode);
2480 inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid)); 2457 inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid));
2481 inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid)); 2458 inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid));
@@ -2484,7 +2461,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2484 from_kgid(&init_user_ns, inode->i_gid)); 2461 from_kgid(&init_user_ns, inode->i_gid));
2485 } 2462 }
2486 2463
2487 if ((issued & CEPH_CAP_LINK_EXCL) == 0) { 2464 if ((newcaps & CEPH_CAP_AUTH_SHARED) &&
2465 (issued & CEPH_CAP_LINK_EXCL) == 0) {
2488 set_nlink(inode, le32_to_cpu(grant->nlink)); 2466 set_nlink(inode, le32_to_cpu(grant->nlink));
2489 if (inode->i_nlink == 0 && 2467 if (inode->i_nlink == 0 &&
2490 (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL))) 2468 (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
@@ -2511,30 +2489,35 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2511 if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1) 2489 if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1)
2512 queue_revalidate = 1; 2490 queue_revalidate = 1;
2513 2491
2514 /* size/ctime/mtime/atime? */ 2492 if (newcaps & CEPH_CAP_ANY_RD) {
2515 ceph_fill_file_size(inode, issued, 2493 /* ctime/mtime/atime? */
2516 le32_to_cpu(grant->truncate_seq), 2494 ceph_decode_timespec(&mtime, &grant->mtime);
2517 le64_to_cpu(grant->truncate_size), size); 2495 ceph_decode_timespec(&atime, &grant->atime);
2518 ceph_decode_timespec(&mtime, &grant->mtime); 2496 ceph_decode_timespec(&ctime, &grant->ctime);
2519 ceph_decode_timespec(&atime, &grant->atime); 2497 ceph_fill_file_time(inode, issued,
2520 ceph_decode_timespec(&ctime, &grant->ctime); 2498 le32_to_cpu(grant->time_warp_seq),
2521 ceph_fill_file_time(inode, issued, 2499 &ctime, &mtime, &atime);
2522 le32_to_cpu(grant->time_warp_seq), &ctime, &mtime, 2500 }
2523 &atime); 2501
2524 2502 if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
2525 2503 /* file layout may have changed */
2526 /* file layout may have changed */ 2504 ci->i_layout = grant->layout;
2527 ci->i_layout = grant->layout; 2505 /* size/truncate_seq? */
2528 2506 queue_trunc = ceph_fill_file_size(inode, issued,
2529 /* max size increase? */ 2507 le32_to_cpu(grant->truncate_seq),
2530 if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { 2508 le64_to_cpu(grant->truncate_size),
2531 dout("max_size %lld -> %llu\n", ci->i_max_size, max_size); 2509 size);
2532 ci->i_max_size = max_size; 2510 /* max size increase? */
2533 if (max_size >= ci->i_wanted_max_size) { 2511 if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
2534 ci->i_wanted_max_size = 0; /* reset */ 2512 dout("max_size %lld -> %llu\n",
2535 ci->i_requested_max_size = 0; 2513 ci->i_max_size, max_size);
2514 ci->i_max_size = max_size;
2515 if (max_size >= ci->i_wanted_max_size) {
2516 ci->i_wanted_max_size = 0; /* reset */
2517 ci->i_requested_max_size = 0;
2518 }
2519 wake = 1;
2536 } 2520 }
2537 wake = 1;
2538 } 2521 }
2539 2522
2540 /* check cap bits */ 2523 /* check cap bits */
@@ -2595,6 +2578,23 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2595 2578
2596 spin_unlock(&ci->i_ceph_lock); 2579 spin_unlock(&ci->i_ceph_lock);
2597 2580
2581 if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
2582 down_write(&mdsc->snap_rwsem);
2583 ceph_update_snap_trace(mdsc, snaptrace,
2584 snaptrace + snaptrace_len, false);
2585 downgrade_write(&mdsc->snap_rwsem);
2586 kick_flushing_inode_caps(mdsc, session, inode);
2587 up_read(&mdsc->snap_rwsem);
2588 if (newcaps & ~issued)
2589 wake = 1;
2590 }
2591
2592 if (queue_trunc) {
2593 ceph_queue_vmtruncate(inode);
2594 ceph_queue_revalidate(inode);
2595 } else if (queue_revalidate)
2596 ceph_queue_revalidate(inode);
2597
2598 if (writeback) 2598 if (writeback)
2599 /* 2599 /*
2600 * queue inode for writeback: we can't actually call 2600 * queue inode for writeback: we can't actually call
@@ -2606,8 +2606,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2606 ceph_queue_invalidate(inode); 2606 ceph_queue_invalidate(inode);
2607 if (deleted_inode) 2607 if (deleted_inode)
2608 invalidate_aliases(inode); 2608 invalidate_aliases(inode);
2609 if (queue_revalidate)
2610 ceph_queue_revalidate(inode);
2611 if (wake) 2609 if (wake)
2612 wake_up_all(&ci->i_cap_wq); 2610 wake_up_all(&ci->i_cap_wq);
2613 2611
@@ -2784,7 +2782,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2784{ 2782{
2785 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; 2783 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
2786 struct ceph_mds_session *tsession = NULL; 2784 struct ceph_mds_session *tsession = NULL;
2787 struct ceph_cap *cap, *tcap; 2785 struct ceph_cap *cap, *tcap, *new_cap = NULL;
2788 struct ceph_inode_info *ci = ceph_inode(inode); 2786 struct ceph_inode_info *ci = ceph_inode(inode);
2789 u64 t_cap_id; 2787 u64 t_cap_id;
2790 unsigned mseq = le32_to_cpu(ex->migrate_seq); 2788 unsigned mseq = le32_to_cpu(ex->migrate_seq);
@@ -2807,7 +2805,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2807retry: 2805retry:
2808 spin_lock(&ci->i_ceph_lock); 2806 spin_lock(&ci->i_ceph_lock);
2809 cap = __get_cap_for_mds(ci, mds); 2807 cap = __get_cap_for_mds(ci, mds);
2810 if (!cap) 2808 if (!cap || cap->cap_id != le64_to_cpu(ex->cap_id))
2811 goto out_unlock; 2809 goto out_unlock;
2812 2810
2813 if (target < 0) { 2811 if (target < 0) {
@@ -2846,15 +2844,14 @@ retry:
2846 } 2844 }
2847 __ceph_remove_cap(cap, false); 2845 __ceph_remove_cap(cap, false);
2848 goto out_unlock; 2846 goto out_unlock;
2849 } 2847 } else if (tsession) {
2850
2851 if (tsession) {
2852 int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
2853 spin_unlock(&ci->i_ceph_lock);
2854 /* add placeholder for the export tagert */ 2848 /* add placeholder for the export tagert */
2849 int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
2855 ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0, 2850 ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0,
2856 t_seq - 1, t_mseq, (u64)-1, flag, NULL); 2851 t_seq - 1, t_mseq, (u64)-1, flag, &new_cap);
2857 goto retry; 2852
2853 __ceph_remove_cap(cap, false);
2854 goto out_unlock;
2858 } 2855 }
2859 2856
2860 spin_unlock(&ci->i_ceph_lock); 2857 spin_unlock(&ci->i_ceph_lock);
@@ -2873,6 +2870,7 @@ retry:
2873 SINGLE_DEPTH_NESTING); 2870 SINGLE_DEPTH_NESTING);
2874 } 2871 }
2875 ceph_add_cap_releases(mdsc, tsession); 2872 ceph_add_cap_releases(mdsc, tsession);
2873 new_cap = ceph_get_cap(mdsc, NULL);
2876 } else { 2874 } else {
2877 WARN_ON(1); 2875 WARN_ON(1);
2878 tsession = NULL; 2876 tsession = NULL;
@@ -2887,24 +2885,27 @@ out_unlock:
2887 mutex_unlock(&tsession->s_mutex); 2885 mutex_unlock(&tsession->s_mutex);
2888 ceph_put_mds_session(tsession); 2886 ceph_put_mds_session(tsession);
2889 } 2887 }
2888 if (new_cap)
2889 ceph_put_cap(mdsc, new_cap);
2890} 2890}
2891 2891
2892/* 2892/*
2893 * Handle cap IMPORT. If there are temp bits from an older EXPORT, 2893 * Handle cap IMPORT.
2894 * clean them up.
2895 * 2894 *
2896 * caller holds s_mutex. 2895 * caller holds s_mutex. acquires i_ceph_lock
2897 */ 2896 */
2898static void handle_cap_import(struct ceph_mds_client *mdsc, 2897static void handle_cap_import(struct ceph_mds_client *mdsc,
2899 struct inode *inode, struct ceph_mds_caps *im, 2898 struct inode *inode, struct ceph_mds_caps *im,
2900 struct ceph_mds_cap_peer *ph, 2899 struct ceph_mds_cap_peer *ph,
2901 struct ceph_mds_session *session, 2900 struct ceph_mds_session *session,
2902 void *snaptrace, int snaptrace_len) 2901 struct ceph_cap **target_cap, int *old_issued)
2902 __acquires(ci->i_ceph_lock)
2903{ 2903{
2904 struct ceph_inode_info *ci = ceph_inode(inode); 2904 struct ceph_inode_info *ci = ceph_inode(inode);
2905 struct ceph_cap *cap; 2905 struct ceph_cap *cap, *ocap, *new_cap = NULL;
2906 int mds = session->s_mds; 2906 int mds = session->s_mds;
2907 unsigned issued = le32_to_cpu(im->caps); 2907 int issued;
2908 unsigned caps = le32_to_cpu(im->caps);
2908 unsigned wanted = le32_to_cpu(im->wanted); 2909 unsigned wanted = le32_to_cpu(im->wanted);
2909 unsigned seq = le32_to_cpu(im->seq); 2910 unsigned seq = le32_to_cpu(im->seq);
2910 unsigned mseq = le32_to_cpu(im->migrate_seq); 2911 unsigned mseq = le32_to_cpu(im->migrate_seq);
@@ -2924,40 +2925,52 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
2924 dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n", 2925 dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n",
2925 inode, ci, mds, mseq, peer); 2926 inode, ci, mds, mseq, peer);
2926 2927
2928retry:
2927 spin_lock(&ci->i_ceph_lock); 2929 spin_lock(&ci->i_ceph_lock);
2928 cap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; 2930 cap = __get_cap_for_mds(ci, mds);
2929 if (cap && cap->cap_id == p_cap_id) { 2931 if (!cap) {
2932 if (!new_cap) {
2933 spin_unlock(&ci->i_ceph_lock);
2934 new_cap = ceph_get_cap(mdsc, NULL);
2935 goto retry;
2936 }
2937 cap = new_cap;
2938 } else {
2939 if (new_cap) {
2940 ceph_put_cap(mdsc, new_cap);
2941 new_cap = NULL;
2942 }
2943 }
2944
2945 __ceph_caps_issued(ci, &issued);
2946 issued |= __ceph_caps_dirty(ci);
2947
2948 ceph_add_cap(inode, session, cap_id, -1, caps, wanted, seq, mseq,
2949 realmino, CEPH_CAP_FLAG_AUTH, &new_cap);
2950
2951 ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL;
2952 if (ocap && ocap->cap_id == p_cap_id) {
2930 dout(" remove export cap %p mds%d flags %d\n", 2953 dout(" remove export cap %p mds%d flags %d\n",
2931 cap, peer, ph->flags); 2954 ocap, peer, ph->flags);
2932 if ((ph->flags & CEPH_CAP_FLAG_AUTH) && 2955 if ((ph->flags & CEPH_CAP_FLAG_AUTH) &&
2933 (cap->seq != le32_to_cpu(ph->seq) || 2956 (ocap->seq != le32_to_cpu(ph->seq) ||
2934 cap->mseq != le32_to_cpu(ph->mseq))) { 2957 ocap->mseq != le32_to_cpu(ph->mseq))) {
2935 pr_err("handle_cap_import: mismatched seq/mseq: " 2958 pr_err("handle_cap_import: mismatched seq/mseq: "
2936 "ino (%llx.%llx) mds%d seq %d mseq %d " 2959 "ino (%llx.%llx) mds%d seq %d mseq %d "
2937 "importer mds%d has peer seq %d mseq %d\n", 2960 "importer mds%d has peer seq %d mseq %d\n",
2938 ceph_vinop(inode), peer, cap->seq, 2961 ceph_vinop(inode), peer, ocap->seq,
2939 cap->mseq, mds, le32_to_cpu(ph->seq), 2962 ocap->mseq, mds, le32_to_cpu(ph->seq),
2940 le32_to_cpu(ph->mseq)); 2963 le32_to_cpu(ph->mseq));
2941 } 2964 }
2942 ci->i_cap_exporting_issued = cap->issued; 2965 __ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
2943 __ceph_remove_cap(cap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
2944 } 2966 }
2945 2967
2946 /* make sure we re-request max_size, if necessary */ 2968 /* make sure we re-request max_size, if necessary */
2947 ci->i_wanted_max_size = 0; 2969 ci->i_wanted_max_size = 0;
2948 ci->i_requested_max_size = 0; 2970 ci->i_requested_max_size = 0;
2949 spin_unlock(&ci->i_ceph_lock);
2950
2951 down_write(&mdsc->snap_rwsem);
2952 ceph_update_snap_trace(mdsc, snaptrace, snaptrace+snaptrace_len,
2953 false);
2954 downgrade_write(&mdsc->snap_rwsem);
2955 ceph_add_cap(inode, session, cap_id, -1,
2956 issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH,
2957 NULL /* no caps context */);
2958 kick_flushing_inode_caps(mdsc, session, inode);
2959 up_read(&mdsc->snap_rwsem);
2960 2971
2972 *old_issued = issued;
2973 *target_cap = cap;
2961} 2974}
2962 2975
2963/* 2976/*
@@ -2977,7 +2990,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2977 struct ceph_mds_caps *h; 2990 struct ceph_mds_caps *h;
2978 struct ceph_mds_cap_peer *peer = NULL; 2991 struct ceph_mds_cap_peer *peer = NULL;
2979 int mds = session->s_mds; 2992 int mds = session->s_mds;
2980 int op; 2993 int op, issued;
2981 u32 seq, mseq; 2994 u32 seq, mseq;
2982 struct ceph_vino vino; 2995 struct ceph_vino vino;
2983 u64 cap_id; 2996 u64 cap_id;
@@ -3069,7 +3082,10 @@ void ceph_handle_caps(struct ceph_mds_session *session,
3069 3082
3070 case CEPH_CAP_OP_IMPORT: 3083 case CEPH_CAP_OP_IMPORT:
3071 handle_cap_import(mdsc, inode, h, peer, session, 3084 handle_cap_import(mdsc, inode, h, peer, session,
3072 snaptrace, snaptrace_len); 3085 &cap, &issued);
3086 handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len,
3087 msg->middle, session, cap, issued);
3088 goto done_unlocked;
3073 } 3089 }
3074 3090
3075 /* the rest require a cap */ 3091 /* the rest require a cap */
@@ -3086,8 +3102,10 @@ void ceph_handle_caps(struct ceph_mds_session *session,
3086 switch (op) { 3102 switch (op) {
3087 case CEPH_CAP_OP_REVOKE: 3103 case CEPH_CAP_OP_REVOKE:
3088 case CEPH_CAP_OP_GRANT: 3104 case CEPH_CAP_OP_GRANT:
3089 case CEPH_CAP_OP_IMPORT: 3105 __ceph_caps_issued(ci, &issued);
3090 handle_cap_grant(inode, h, session, cap, msg->middle); 3106 issued |= __ceph_caps_dirty(ci);
3107 handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle,
3108 session, cap, issued);
3091 goto done_unlocked; 3109 goto done_unlocked;
3092 3110
3093 case CEPH_CAP_OP_FLUSH_ACK: 3111 case CEPH_CAP_OP_FLUSH_ACK:
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 00d6af6a32ec..8d7d782f4382 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -169,7 +169,7 @@ static struct dentry *__get_parent(struct super_block *sb,
169 return dentry; 169 return dentry;
170} 170}
171 171
172struct dentry *ceph_get_parent(struct dentry *child) 172static struct dentry *ceph_get_parent(struct dentry *child)
173{ 173{
174 /* don't re-export snaps */ 174 /* don't re-export snaps */
175 if (ceph_snap(child->d_inode) != CEPH_NOSNAP) 175 if (ceph_snap(child->d_inode) != CEPH_NOSNAP)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 88a6df4cbe6d..302085100c28 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -418,7 +418,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
418 struct page **pages; 418 struct page **pages;
419 u64 off = iocb->ki_pos; 419 u64 off = iocb->ki_pos;
420 int num_pages, ret; 420 int num_pages, ret;
421 size_t len = i->count; 421 size_t len = iov_iter_count(i);
422 422
423 dout("sync_read on file %p %llu~%u %s\n", file, off, 423 dout("sync_read on file %p %llu~%u %s\n", file, off,
424 (unsigned)len, 424 (unsigned)len,
@@ -436,25 +436,26 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
436 436
437 if (file->f_flags & O_DIRECT) { 437 if (file->f_flags & O_DIRECT) {
438 while (iov_iter_count(i)) { 438 while (iov_iter_count(i)) {
439 void __user *data = i->iov[0].iov_base + i->iov_offset; 439 size_t start;
440 size_t len = i->iov[0].iov_len - i->iov_offset; 440 ssize_t n;
441 441
442 num_pages = calc_pages_for((unsigned long)data, len); 442 n = iov_iter_get_pages_alloc(i, &pages, INT_MAX, &start);
443 pages = ceph_get_direct_page_vector(data, 443 if (n < 0)
444 num_pages, true); 444 return n;
445 if (IS_ERR(pages))
446 return PTR_ERR(pages);
447 445
448 ret = striped_read(inode, off, len, 446 num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE;
447
448 ret = striped_read(inode, off, n,
449 pages, num_pages, checkeof, 449 pages, num_pages, checkeof,
450 1, (unsigned long)data & ~PAGE_MASK); 450 1, start);
451
451 ceph_put_page_vector(pages, num_pages, true); 452 ceph_put_page_vector(pages, num_pages, true);
452 453
453 if (ret <= 0) 454 if (ret <= 0)
454 break; 455 break;
455 off += ret; 456 off += ret;
456 iov_iter_advance(i, ret); 457 iov_iter_advance(i, ret);
457 if (ret < len) 458 if (ret < n)
458 break; 459 break;
459 } 460 }
460 } else { 461 } else {
@@ -466,25 +467,14 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
466 num_pages, checkeof, 0, 0); 467 num_pages, checkeof, 0, 0);
467 if (ret > 0) { 468 if (ret > 0) {
468 int l, k = 0; 469 int l, k = 0;
469 size_t left = len = ret; 470 size_t left = ret;
470 471
471 while (left) { 472 while (left) {
472 void __user *data = i->iov[0].iov_base 473 int copy = min_t(size_t, PAGE_SIZE, left);
473 + i->iov_offset; 474 l = copy_page_to_iter(pages[k++], 0, copy, i);
474 l = min(i->iov[0].iov_len - i->iov_offset, 475 off += l;
475 left); 476 left -= l;
476 477 if (l < copy)
477 ret = ceph_copy_page_vector_to_user(&pages[k],
478 data, off,
479 l);
480 if (ret > 0) {
481 iov_iter_advance(i, ret);
482 left -= ret;
483 off += ret;
484 k = calc_pages_for(iocb->ki_pos,
485 len - left + 1) - 1;
486 BUG_ON(k >= num_pages && left);
487 } else
488 break; 478 break;
489 } 479 }
490 } 480 }
@@ -541,8 +531,7 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
541 * objects, rollback on failure, etc.) 531 * objects, rollback on failure, etc.)
542 */ 532 */
543static ssize_t 533static ssize_t
544ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov, 534ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from)
545 unsigned long nr_segs, size_t count)
546{ 535{
547 struct file *file = iocb->ki_filp; 536 struct file *file = iocb->ki_filp;
548 struct inode *inode = file_inode(file); 537 struct inode *inode = file_inode(file);
@@ -556,11 +545,10 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
556 int written = 0; 545 int written = 0;
557 int flags; 546 int flags;
558 int check_caps = 0; 547 int check_caps = 0;
559 int page_align;
560 int ret; 548 int ret;
561 struct timespec mtime = CURRENT_TIME; 549 struct timespec mtime = CURRENT_TIME;
562 loff_t pos = iocb->ki_pos; 550 loff_t pos = iocb->ki_pos;
563 struct iov_iter i; 551 size_t count = iov_iter_count(from);
564 552
565 if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) 553 if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
566 return -EROFS; 554 return -EROFS;
@@ -582,13 +570,10 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
582 CEPH_OSD_FLAG_ONDISK | 570 CEPH_OSD_FLAG_ONDISK |
583 CEPH_OSD_FLAG_WRITE; 571 CEPH_OSD_FLAG_WRITE;
584 572
585 iov_iter_init(&i, iov, nr_segs, count, 0); 573 while (iov_iter_count(from) > 0) {
586 574 u64 len = iov_iter_single_seg_count(from);
587 while (iov_iter_count(&i) > 0) { 575 size_t start;
588 void __user *data = i.iov->iov_base + i.iov_offset; 576 ssize_t n;
589 u64 len = i.iov->iov_len - i.iov_offset;
590
591 page_align = (unsigned long)data & ~PAGE_MASK;
592 577
593 snapc = ci->i_snap_realm->cached_context; 578 snapc = ci->i_snap_realm->cached_context;
594 vino = ceph_vino(inode); 579 vino = ceph_vino(inode);
@@ -604,20 +589,21 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
604 break; 589 break;
605 } 590 }
606 591
607 num_pages = calc_pages_for(page_align, len); 592 n = iov_iter_get_pages_alloc(from, &pages, len, &start);
608 pages = ceph_get_direct_page_vector(data, num_pages, false); 593 if (unlikely(n < 0)) {
609 if (IS_ERR(pages)) { 594 ret = n;
610 ret = PTR_ERR(pages); 595 ceph_osdc_put_request(req);
611 goto out; 596 break;
612 } 597 }
613 598
599 num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE;
614 /* 600 /*
615 * throw out any page cache pages in this range. this 601 * throw out any page cache pages in this range. this
616 * may block. 602 * may block.
617 */ 603 */
618 truncate_inode_pages_range(inode->i_mapping, pos, 604 truncate_inode_pages_range(inode->i_mapping, pos,
619 (pos+len) | (PAGE_CACHE_SIZE-1)); 605 (pos+n) | (PAGE_CACHE_SIZE-1));
620 osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align, 606 osd_req_op_extent_osd_data_pages(req, 0, pages, n, start,
621 false, false); 607 false, false);
622 608
623 /* BUG_ON(vino.snap != CEPH_NOSNAP); */ 609 /* BUG_ON(vino.snap != CEPH_NOSNAP); */
@@ -629,22 +615,20 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
629 615
630 ceph_put_page_vector(pages, num_pages, false); 616 ceph_put_page_vector(pages, num_pages, false);
631 617
632out:
633 ceph_osdc_put_request(req); 618 ceph_osdc_put_request(req);
634 if (ret == 0) { 619 if (ret)
635 pos += len;
636 written += len;
637 iov_iter_advance(&i, (size_t)len);
638
639 if (pos > i_size_read(inode)) {
640 check_caps = ceph_inode_set_size(inode, pos);
641 if (check_caps)
642 ceph_check_caps(ceph_inode(inode),
643 CHECK_CAPS_AUTHONLY,
644 NULL);
645 }
646 } else
647 break; 620 break;
621 pos += n;
622 written += n;
623 iov_iter_advance(from, n);
624
625 if (pos > i_size_read(inode)) {
626 check_caps = ceph_inode_set_size(inode, pos);
627 if (check_caps)
628 ceph_check_caps(ceph_inode(inode),
629 CHECK_CAPS_AUTHONLY,
630 NULL);
631 }
648 } 632 }
649 633
650 if (ret != -EOLDSNAPC && written > 0) { 634 if (ret != -EOLDSNAPC && written > 0) {
@@ -662,8 +646,7 @@ out:
662 * correct atomic write, we should e.g. take write locks on all 646 * correct atomic write, we should e.g. take write locks on all
663 * objects, rollback on failure, etc.) 647 * objects, rollback on failure, etc.)
664 */ 648 */
665static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov, 649static ssize_t ceph_sync_write(struct kiocb *iocb, struct iov_iter *from)
666 unsigned long nr_segs, size_t count)
667{ 650{
668 struct file *file = iocb->ki_filp; 651 struct file *file = iocb->ki_filp;
669 struct inode *inode = file_inode(file); 652 struct inode *inode = file_inode(file);
@@ -681,7 +664,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
681 int ret; 664 int ret;
682 struct timespec mtime = CURRENT_TIME; 665 struct timespec mtime = CURRENT_TIME;
683 loff_t pos = iocb->ki_pos; 666 loff_t pos = iocb->ki_pos;
684 struct iov_iter i; 667 size_t count = iov_iter_count(from);
685 668
686 if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) 669 if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
687 return -EROFS; 670 return -EROFS;
@@ -703,9 +686,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
703 CEPH_OSD_FLAG_WRITE | 686 CEPH_OSD_FLAG_WRITE |
704 CEPH_OSD_FLAG_ACK; 687 CEPH_OSD_FLAG_ACK;
705 688
706 iov_iter_init(&i, iov, nr_segs, count, 0); 689 while ((len = iov_iter_count(from)) > 0) {
707
708 while ((len = iov_iter_count(&i)) > 0) {
709 size_t left; 690 size_t left;
710 int n; 691 int n;
711 692
@@ -737,13 +718,12 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
737 left = len; 718 left = len;
738 for (n = 0; n < num_pages; n++) { 719 for (n = 0; n < num_pages; n++) {
739 size_t plen = min_t(size_t, left, PAGE_SIZE); 720 size_t plen = min_t(size_t, left, PAGE_SIZE);
740 ret = iov_iter_copy_from_user(pages[n], &i, 0, plen); 721 ret = copy_page_from_iter(pages[n], 0, plen, from);
741 if (ret != plen) { 722 if (ret != plen) {
742 ret = -EFAULT; 723 ret = -EFAULT;
743 break; 724 break;
744 } 725 }
745 left -= ret; 726 left -= ret;
746 iov_iter_advance(&i, ret);
747 } 727 }
748 728
749 if (ret < 0) { 729 if (ret < 0) {
@@ -796,8 +776,7 @@ out:
796 * 776 *
797 * Hmm, the sync read case isn't actually async... should it be? 777 * Hmm, the sync read case isn't actually async... should it be?
798 */ 778 */
799static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov, 779static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
800 unsigned long nr_segs, loff_t pos)
801{ 780{
802 struct file *filp = iocb->ki_filp; 781 struct file *filp = iocb->ki_filp;
803 struct ceph_file_info *fi = filp->private_data; 782 struct ceph_file_info *fi = filp->private_data;
@@ -823,40 +802,20 @@ again:
823 if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || 802 if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
824 (iocb->ki_filp->f_flags & O_DIRECT) || 803 (iocb->ki_filp->f_flags & O_DIRECT) ||
825 (fi->flags & CEPH_F_SYNC)) { 804 (fi->flags & CEPH_F_SYNC)) {
826 struct iov_iter i;
827 805
828 dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n", 806 dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n",
829 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, 807 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
830 ceph_cap_string(got)); 808 ceph_cap_string(got));
831 809
832 if (!read) {
833 ret = generic_segment_checks(iov, &nr_segs,
834 &len, VERIFY_WRITE);
835 if (ret)
836 goto out;
837 }
838
839 iov_iter_init(&i, iov, nr_segs, len, read);
840
841 /* hmm, this isn't really async... */ 810 /* hmm, this isn't really async... */
842 ret = ceph_sync_read(iocb, &i, &checkeof); 811 ret = ceph_sync_read(iocb, to, &checkeof);
843 } else { 812 } else {
844 /*
845 * We can't modify the content of iov,
846 * so we only read from beginning.
847 */
848 if (read) {
849 iocb->ki_pos = pos;
850 len = iocb->ki_nbytes;
851 read = 0;
852 }
853 dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", 813 dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
854 inode, ceph_vinop(inode), pos, (unsigned)len, 814 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
855 ceph_cap_string(got)); 815 ceph_cap_string(got));
856 816
857 ret = generic_file_aio_read(iocb, iov, nr_segs, pos); 817 ret = generic_file_read_iter(iocb, to);
858 } 818 }
859out:
860 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", 819 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
861 inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); 820 inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
862 ceph_put_cap_refs(ci, got); 821 ceph_put_cap_refs(ci, got);
@@ -872,6 +831,7 @@ out:
872 ", reading more\n", iocb->ki_pos, 831 ", reading more\n", iocb->ki_pos,
873 inode->i_size); 832 inode->i_size);
874 833
834 iov_iter_advance(to, ret);
875 read += ret; 835 read += ret;
876 len -= ret; 836 len -= ret;
877 checkeof = 0; 837 checkeof = 0;
@@ -895,8 +855,7 @@ out:
895 * 855 *
896 * If we are near ENOSPC, write synchronously. 856 * If we are near ENOSPC, write synchronously.
897 */ 857 */
898static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, 858static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
899 unsigned long nr_segs, loff_t pos)
900{ 859{
901 struct file *file = iocb->ki_filp; 860 struct file *file = iocb->ki_filp;
902 struct ceph_file_info *fi = file->private_data; 861 struct ceph_file_info *fi = file->private_data;
@@ -904,18 +863,15 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
904 struct ceph_inode_info *ci = ceph_inode(inode); 863 struct ceph_inode_info *ci = ceph_inode(inode);
905 struct ceph_osd_client *osdc = 864 struct ceph_osd_client *osdc =
906 &ceph_sb_to_client(inode->i_sb)->client->osdc; 865 &ceph_sb_to_client(inode->i_sb)->client->osdc;
907 ssize_t count, written = 0; 866 ssize_t count = iov_iter_count(from), written = 0;
908 int err, want, got; 867 int err, want, got;
868 loff_t pos = iocb->ki_pos;
909 869
910 if (ceph_snap(inode) != CEPH_NOSNAP) 870 if (ceph_snap(inode) != CEPH_NOSNAP)
911 return -EROFS; 871 return -EROFS;
912 872
913 mutex_lock(&inode->i_mutex); 873 mutex_lock(&inode->i_mutex);
914 874
915 err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
916 if (err)
917 goto out;
918
919 /* We can write back this queue in page reclaim */ 875 /* We can write back this queue in page reclaim */
920 current->backing_dev_info = file->f_mapping->backing_dev_info; 876 current->backing_dev_info = file->f_mapping->backing_dev_info;
921 877
@@ -925,6 +881,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
925 881
926 if (count == 0) 882 if (count == 0)
927 goto out; 883 goto out;
884 iov_iter_truncate(from, count);
928 885
929 err = file_remove_suid(file); 886 err = file_remove_suid(file);
930 if (err) 887 if (err)
@@ -956,23 +913,26 @@ retry_snap:
956 913
957 if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || 914 if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
958 (file->f_flags & O_DIRECT) || (fi->flags & CEPH_F_SYNC)) { 915 (file->f_flags & O_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
916 struct iov_iter data;
959 mutex_unlock(&inode->i_mutex); 917 mutex_unlock(&inode->i_mutex);
918 /* we might need to revert back to that point */
919 data = *from;
960 if (file->f_flags & O_DIRECT) 920 if (file->f_flags & O_DIRECT)
961 written = ceph_sync_direct_write(iocb, iov, 921 written = ceph_sync_direct_write(iocb, &data);
962 nr_segs, count);
963 else 922 else
964 written = ceph_sync_write(iocb, iov, nr_segs, count); 923 written = ceph_sync_write(iocb, &data);
965 if (written == -EOLDSNAPC) { 924 if (written == -EOLDSNAPC) {
966 dout("aio_write %p %llx.%llx %llu~%u" 925 dout("aio_write %p %llx.%llx %llu~%u"
967 "got EOLDSNAPC, retrying\n", 926 "got EOLDSNAPC, retrying\n",
968 inode, ceph_vinop(inode), 927 inode, ceph_vinop(inode),
969 pos, (unsigned)iov->iov_len); 928 pos, (unsigned)count);
970 mutex_lock(&inode->i_mutex); 929 mutex_lock(&inode->i_mutex);
971 goto retry_snap; 930 goto retry_snap;
972 } 931 }
932 if (written > 0)
933 iov_iter_advance(from, written);
973 } else { 934 } else {
974 loff_t old_size = inode->i_size; 935 loff_t old_size = inode->i_size;
975 struct iov_iter from;
976 /* 936 /*
977 * No need to acquire the i_truncate_mutex. Because 937 * No need to acquire the i_truncate_mutex. Because
978 * the MDS revokes Fwb caps before sending truncate 938 * the MDS revokes Fwb caps before sending truncate
@@ -980,8 +940,7 @@ retry_snap:
980 * are pending vmtruncate. So write and vmtruncate 940 * are pending vmtruncate. So write and vmtruncate
981 * can not run at the same time 941 * can not run at the same time
982 */ 942 */
983 iov_iter_init(&from, iov, nr_segs, count, 0); 943 written = generic_perform_write(file, from, pos);
984 written = generic_perform_write(file, &from, pos);
985 if (likely(written >= 0)) 944 if (likely(written >= 0))
986 iocb->ki_pos = pos + written; 945 iocb->ki_pos = pos + written;
987 if (inode->i_size > old_size) 946 if (inode->i_size > old_size)
@@ -999,7 +958,7 @@ retry_snap:
999 } 958 }
1000 959
1001 dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", 960 dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n",
1002 inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, 961 inode, ceph_vinop(inode), pos, (unsigned)count,
1003 ceph_cap_string(got)); 962 ceph_cap_string(got));
1004 ceph_put_cap_refs(ci, got); 963 ceph_put_cap_refs(ci, got);
1005 964
@@ -1276,16 +1235,16 @@ const struct file_operations ceph_file_fops = {
1276 .open = ceph_open, 1235 .open = ceph_open,
1277 .release = ceph_release, 1236 .release = ceph_release,
1278 .llseek = ceph_llseek, 1237 .llseek = ceph_llseek,
1279 .read = do_sync_read, 1238 .read = new_sync_read,
1280 .write = do_sync_write, 1239 .write = new_sync_write,
1281 .aio_read = ceph_aio_read, 1240 .read_iter = ceph_read_iter,
1282 .aio_write = ceph_aio_write, 1241 .write_iter = ceph_write_iter,
1283 .mmap = ceph_mmap, 1242 .mmap = ceph_mmap,
1284 .fsync = ceph_fsync, 1243 .fsync = ceph_fsync,
1285 .lock = ceph_lock, 1244 .lock = ceph_lock,
1286 .flock = ceph_flock, 1245 .flock = ceph_flock,
1287 .splice_read = generic_file_splice_read, 1246 .splice_read = generic_file_splice_read,
1288 .splice_write = generic_file_splice_write, 1247 .splice_write = iter_file_splice_write,
1289 .unlocked_ioctl = ceph_ioctl, 1248 .unlocked_ioctl = ceph_ioctl,
1290 .compat_ioctl = ceph_ioctl, 1249 .compat_ioctl = ceph_ioctl,
1291 .fallocate = ceph_fallocate, 1250 .fallocate = ceph_fallocate,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e4fff9ff1c27..04c89c266cec 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -10,6 +10,7 @@
10#include <linux/writeback.h> 10#include <linux/writeback.h>
11#include <linux/vmalloc.h> 11#include <linux/vmalloc.h>
12#include <linux/posix_acl.h> 12#include <linux/posix_acl.h>
13#include <linux/random.h>
13 14
14#include "super.h" 15#include "super.h"
15#include "mds_client.h" 16#include "mds_client.h"
@@ -179,9 +180,8 @@ struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, u32 f)
179 * specified, copy the frag delegation info to the caller if 180 * specified, copy the frag delegation info to the caller if
180 * it is present. 181 * it is present.
181 */ 182 */
182u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, 183static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
183 struct ceph_inode_frag *pfrag, 184 struct ceph_inode_frag *pfrag, int *found)
184 int *found)
185{ 185{
186 u32 t = ceph_frag_make(0, 0); 186 u32 t = ceph_frag_make(0, 0);
187 struct ceph_inode_frag *frag; 187 struct ceph_inode_frag *frag;
@@ -191,7 +191,6 @@ u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
191 if (found) 191 if (found)
192 *found = 0; 192 *found = 0;
193 193
194 mutex_lock(&ci->i_fragtree_mutex);
195 while (1) { 194 while (1) {
196 WARN_ON(!ceph_frag_contains_value(t, v)); 195 WARN_ON(!ceph_frag_contains_value(t, v));
197 frag = __ceph_find_frag(ci, t); 196 frag = __ceph_find_frag(ci, t);
@@ -220,10 +219,19 @@ u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
220 } 219 }
221 dout("choose_frag(%x) = %x\n", v, t); 220 dout("choose_frag(%x) = %x\n", v, t);
222 221
223 mutex_unlock(&ci->i_fragtree_mutex);
224 return t; 222 return t;
225} 223}
226 224
225u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
226 struct ceph_inode_frag *pfrag, int *found)
227{
228 u32 ret;
229 mutex_lock(&ci->i_fragtree_mutex);
230 ret = __ceph_choose_frag(ci, v, pfrag, found);
231 mutex_unlock(&ci->i_fragtree_mutex);
232 return ret;
233}
234
227/* 235/*
228 * Process dirfrag (delegation) info from the mds. Include leaf 236 * Process dirfrag (delegation) info from the mds. Include leaf
229 * fragment in tree ONLY if ndist > 0. Otherwise, only 237 * fragment in tree ONLY if ndist > 0. Otherwise, only
@@ -237,11 +245,17 @@ static int ceph_fill_dirfrag(struct inode *inode,
237 u32 id = le32_to_cpu(dirinfo->frag); 245 u32 id = le32_to_cpu(dirinfo->frag);
238 int mds = le32_to_cpu(dirinfo->auth); 246 int mds = le32_to_cpu(dirinfo->auth);
239 int ndist = le32_to_cpu(dirinfo->ndist); 247 int ndist = le32_to_cpu(dirinfo->ndist);
248 int diri_auth = -1;
240 int i; 249 int i;
241 int err = 0; 250 int err = 0;
242 251
252 spin_lock(&ci->i_ceph_lock);
253 if (ci->i_auth_cap)
254 diri_auth = ci->i_auth_cap->mds;
255 spin_unlock(&ci->i_ceph_lock);
256
243 mutex_lock(&ci->i_fragtree_mutex); 257 mutex_lock(&ci->i_fragtree_mutex);
244 if (ndist == 0) { 258 if (ndist == 0 && mds == diri_auth) {
245 /* no delegation info needed. */ 259 /* no delegation info needed. */
246 frag = __ceph_find_frag(ci, id); 260 frag = __ceph_find_frag(ci, id);
247 if (!frag) 261 if (!frag)
@@ -286,6 +300,75 @@ out:
286 return err; 300 return err;
287} 301}
288 302
303static int ceph_fill_fragtree(struct inode *inode,
304 struct ceph_frag_tree_head *fragtree,
305 struct ceph_mds_reply_dirfrag *dirinfo)
306{
307 struct ceph_inode_info *ci = ceph_inode(inode);
308 struct ceph_inode_frag *frag;
309 struct rb_node *rb_node;
310 int i;
311 u32 id, nsplits;
312 bool update = false;
313
314 mutex_lock(&ci->i_fragtree_mutex);
315 nsplits = le32_to_cpu(fragtree->nsplits);
316 if (nsplits) {
317 i = prandom_u32() % nsplits;
318 id = le32_to_cpu(fragtree->splits[i].frag);
319 if (!__ceph_find_frag(ci, id))
320 update = true;
321 } else if (!RB_EMPTY_ROOT(&ci->i_fragtree)) {
322 rb_node = rb_first(&ci->i_fragtree);
323 frag = rb_entry(rb_node, struct ceph_inode_frag, node);
324 if (frag->frag != ceph_frag_make(0, 0) || rb_next(rb_node))
325 update = true;
326 }
327 if (!update && dirinfo) {
328 id = le32_to_cpu(dirinfo->frag);
329 if (id != __ceph_choose_frag(ci, id, NULL, NULL))
330 update = true;
331 }
332 if (!update)
333 goto out_unlock;
334
335 dout("fill_fragtree %llx.%llx\n", ceph_vinop(inode));
336 rb_node = rb_first(&ci->i_fragtree);
337 for (i = 0; i < nsplits; i++) {
338 id = le32_to_cpu(fragtree->splits[i].frag);
339 frag = NULL;
340 while (rb_node) {
341 frag = rb_entry(rb_node, struct ceph_inode_frag, node);
342 if (ceph_frag_compare(frag->frag, id) >= 0) {
343 if (frag->frag != id)
344 frag = NULL;
345 else
346 rb_node = rb_next(rb_node);
347 break;
348 }
349 rb_node = rb_next(rb_node);
350 rb_erase(&frag->node, &ci->i_fragtree);
351 kfree(frag);
352 frag = NULL;
353 }
354 if (!frag) {
355 frag = __get_or_create_frag(ci, id);
356 if (IS_ERR(frag))
357 continue;
358 }
359 frag->split_by = le32_to_cpu(fragtree->splits[i].by);
360 dout(" frag %x split by %d\n", frag->frag, frag->split_by);
361 }
362 while (rb_node) {
363 frag = rb_entry(rb_node, struct ceph_inode_frag, node);
364 rb_node = rb_next(rb_node);
365 rb_erase(&frag->node, &ci->i_fragtree);
366 kfree(frag);
367 }
368out_unlock:
369 mutex_unlock(&ci->i_fragtree_mutex);
370 return 0;
371}
289 372
290/* 373/*
291 * initialize a newly allocated inode. 374 * initialize a newly allocated inode.
@@ -341,7 +424,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
341 INIT_LIST_HEAD(&ci->i_cap_snaps); 424 INIT_LIST_HEAD(&ci->i_cap_snaps);
342 ci->i_head_snapc = NULL; 425 ci->i_head_snapc = NULL;
343 ci->i_snap_caps = 0; 426 ci->i_snap_caps = 0;
344 ci->i_cap_exporting_issued = 0;
345 427
346 for (i = 0; i < CEPH_FILE_MODE_NUM; i++) 428 for (i = 0; i < CEPH_FILE_MODE_NUM; i++)
347 ci->i_nr_by_mode[i] = 0; 429 ci->i_nr_by_mode[i] = 0;
@@ -407,7 +489,7 @@ void ceph_destroy_inode(struct inode *inode)
407 489
408 /* 490 /*
409 * we may still have a snap_realm reference if there are stray 491 * we may still have a snap_realm reference if there are stray
410 * caps in i_cap_exporting_issued or i_snap_caps. 492 * caps in i_snap_caps.
411 */ 493 */
412 if (ci->i_snap_realm) { 494 if (ci->i_snap_realm) {
413 struct ceph_mds_client *mdsc = 495 struct ceph_mds_client *mdsc =
@@ -582,22 +664,26 @@ static int fill_inode(struct inode *inode,
582 unsigned long ttl_from, int cap_fmode, 664 unsigned long ttl_from, int cap_fmode,
583 struct ceph_cap_reservation *caps_reservation) 665 struct ceph_cap_reservation *caps_reservation)
584{ 666{
667 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
585 struct ceph_mds_reply_inode *info = iinfo->in; 668 struct ceph_mds_reply_inode *info = iinfo->in;
586 struct ceph_inode_info *ci = ceph_inode(inode); 669 struct ceph_inode_info *ci = ceph_inode(inode);
587 int i; 670 int issued = 0, implemented, new_issued;
588 int issued = 0, implemented;
589 struct timespec mtime, atime, ctime; 671 struct timespec mtime, atime, ctime;
590 u32 nsplits;
591 struct ceph_inode_frag *frag;
592 struct rb_node *rb_node;
593 struct ceph_buffer *xattr_blob = NULL; 672 struct ceph_buffer *xattr_blob = NULL;
673 struct ceph_cap *new_cap = NULL;
594 int err = 0; 674 int err = 0;
595 int queue_trunc = 0; 675 bool wake = false;
676 bool queue_trunc = false;
677 bool new_version = false;
596 678
597 dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", 679 dout("fill_inode %p ino %llx.%llx v %llu had %llu\n",
598 inode, ceph_vinop(inode), le64_to_cpu(info->version), 680 inode, ceph_vinop(inode), le64_to_cpu(info->version),
599 ci->i_version); 681 ci->i_version);
600 682
683 /* prealloc new cap struct */
684 if (info->cap.caps && ceph_snap(inode) == CEPH_NOSNAP)
685 new_cap = ceph_get_cap(mdsc, caps_reservation);
686
601 /* 687 /*
602 * prealloc xattr data, if it looks like we'll need it. only 688 * prealloc xattr data, if it looks like we'll need it. only
603 * if len > 4 (meaning there are actually xattrs; the first 4 689 * if len > 4 (meaning there are actually xattrs; the first 4
@@ -623,19 +709,23 @@ static int fill_inode(struct inode *inode,
623 * 3 2 skip 709 * 3 2 skip
624 * 3 3 update 710 * 3 3 update
625 */ 711 */
626 if (le64_to_cpu(info->version) > 0 && 712 if (ci->i_version == 0 ||
627 (ci->i_version & ~1) >= le64_to_cpu(info->version)) 713 ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
628 goto no_change; 714 le64_to_cpu(info->version) > (ci->i_version & ~1)))
629 715 new_version = true;
716
630 issued = __ceph_caps_issued(ci, &implemented); 717 issued = __ceph_caps_issued(ci, &implemented);
631 issued |= implemented | __ceph_caps_dirty(ci); 718 issued |= implemented | __ceph_caps_dirty(ci);
719 new_issued = ~issued & le32_to_cpu(info->cap.caps);
632 720
633 /* update inode */ 721 /* update inode */
634 ci->i_version = le64_to_cpu(info->version); 722 ci->i_version = le64_to_cpu(info->version);
635 inode->i_version++; 723 inode->i_version++;
636 inode->i_rdev = le32_to_cpu(info->rdev); 724 inode->i_rdev = le32_to_cpu(info->rdev);
725 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
637 726
638 if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { 727 if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
728 (issued & CEPH_CAP_AUTH_EXCL) == 0) {
639 inode->i_mode = le32_to_cpu(info->mode); 729 inode->i_mode = le32_to_cpu(info->mode);
640 inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid)); 730 inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid));
641 inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid)); 731 inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid));
@@ -644,23 +734,35 @@ static int fill_inode(struct inode *inode,
644 from_kgid(&init_user_ns, inode->i_gid)); 734 from_kgid(&init_user_ns, inode->i_gid));
645 } 735 }
646 736
647 if ((issued & CEPH_CAP_LINK_EXCL) == 0) 737 if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) &&
738 (issued & CEPH_CAP_LINK_EXCL) == 0)
648 set_nlink(inode, le32_to_cpu(info->nlink)); 739 set_nlink(inode, le32_to_cpu(info->nlink));
649 740
650 /* be careful with mtime, atime, size */ 741 if (new_version || (new_issued & CEPH_CAP_ANY_RD)) {
651 ceph_decode_timespec(&atime, &info->atime); 742 /* be careful with mtime, atime, size */
652 ceph_decode_timespec(&mtime, &info->mtime); 743 ceph_decode_timespec(&atime, &info->atime);
653 ceph_decode_timespec(&ctime, &info->ctime); 744 ceph_decode_timespec(&mtime, &info->mtime);
654 queue_trunc = ceph_fill_file_size(inode, issued, 745 ceph_decode_timespec(&ctime, &info->ctime);
655 le32_to_cpu(info->truncate_seq), 746 ceph_fill_file_time(inode, issued,
656 le64_to_cpu(info->truncate_size), 747 le32_to_cpu(info->time_warp_seq),
657 le64_to_cpu(info->size)); 748 &ctime, &mtime, &atime);
658 ceph_fill_file_time(inode, issued, 749 }
659 le32_to_cpu(info->time_warp_seq), 750
660 &ctime, &mtime, &atime); 751 if (new_version ||
661 752 (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
662 ci->i_layout = info->layout; 753 ci->i_layout = info->layout;
663 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; 754 queue_trunc = ceph_fill_file_size(inode, issued,
755 le32_to_cpu(info->truncate_seq),
756 le64_to_cpu(info->truncate_size),
757 le64_to_cpu(info->size));
758 /* only update max_size on auth cap */
759 if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
760 ci->i_max_size != le64_to_cpu(info->max_size)) {
761 dout("max_size %lld -> %llu\n", ci->i_max_size,
762 le64_to_cpu(info->max_size));
763 ci->i_max_size = le64_to_cpu(info->max_size);
764 }
765 }
664 766
665 /* xattrs */ 767 /* xattrs */
666 /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */ 768 /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */
@@ -745,58 +847,6 @@ static int fill_inode(struct inode *inode,
745 dout(" marking %p complete (empty)\n", inode); 847 dout(" marking %p complete (empty)\n", inode);
746 __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); 848 __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
747 } 849 }
748no_change:
749 /* only update max_size on auth cap */
750 if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
751 ci->i_max_size != le64_to_cpu(info->max_size)) {
752 dout("max_size %lld -> %llu\n", ci->i_max_size,
753 le64_to_cpu(info->max_size));
754 ci->i_max_size = le64_to_cpu(info->max_size);
755 }
756
757 spin_unlock(&ci->i_ceph_lock);
758
759 /* queue truncate if we saw i_size decrease */
760 if (queue_trunc)
761 ceph_queue_vmtruncate(inode);
762
763 /* populate frag tree */
764 /* FIXME: move me up, if/when version reflects fragtree changes */
765 nsplits = le32_to_cpu(info->fragtree.nsplits);
766 mutex_lock(&ci->i_fragtree_mutex);
767 rb_node = rb_first(&ci->i_fragtree);
768 for (i = 0; i < nsplits; i++) {
769 u32 id = le32_to_cpu(info->fragtree.splits[i].frag);
770 frag = NULL;
771 while (rb_node) {
772 frag = rb_entry(rb_node, struct ceph_inode_frag, node);
773 if (ceph_frag_compare(frag->frag, id) >= 0) {
774 if (frag->frag != id)
775 frag = NULL;
776 else
777 rb_node = rb_next(rb_node);
778 break;
779 }
780 rb_node = rb_next(rb_node);
781 rb_erase(&frag->node, &ci->i_fragtree);
782 kfree(frag);
783 frag = NULL;
784 }
785 if (!frag) {
786 frag = __get_or_create_frag(ci, id);
787 if (IS_ERR(frag))
788 continue;
789 }
790 frag->split_by = le32_to_cpu(info->fragtree.splits[i].by);
791 dout(" frag %x split by %d\n", frag->frag, frag->split_by);
792 }
793 while (rb_node) {
794 frag = rb_entry(rb_node, struct ceph_inode_frag, node);
795 rb_node = rb_next(rb_node);
796 rb_erase(&frag->node, &ci->i_fragtree);
797 kfree(frag);
798 }
799 mutex_unlock(&ci->i_fragtree_mutex);
800 850
801 /* were we issued a capability? */ 851 /* were we issued a capability? */
802 if (info->cap.caps) { 852 if (info->cap.caps) {
@@ -809,30 +859,41 @@ no_change:
809 le32_to_cpu(info->cap.seq), 859 le32_to_cpu(info->cap.seq),
810 le32_to_cpu(info->cap.mseq), 860 le32_to_cpu(info->cap.mseq),
811 le64_to_cpu(info->cap.realm), 861 le64_to_cpu(info->cap.realm),
812 info->cap.flags, 862 info->cap.flags, &new_cap);
813 caps_reservation); 863 wake = true;
814 } else { 864 } else {
815 spin_lock(&ci->i_ceph_lock);
816 dout(" %p got snap_caps %s\n", inode, 865 dout(" %p got snap_caps %s\n", inode,
817 ceph_cap_string(le32_to_cpu(info->cap.caps))); 866 ceph_cap_string(le32_to_cpu(info->cap.caps)));
818 ci->i_snap_caps |= le32_to_cpu(info->cap.caps); 867 ci->i_snap_caps |= le32_to_cpu(info->cap.caps);
819 if (cap_fmode >= 0) 868 if (cap_fmode >= 0)
820 __ceph_get_fmode(ci, cap_fmode); 869 __ceph_get_fmode(ci, cap_fmode);
821 spin_unlock(&ci->i_ceph_lock);
822 } 870 }
823 } else if (cap_fmode >= 0) { 871 } else if (cap_fmode >= 0) {
824 pr_warn("mds issued no caps on %llx.%llx\n", 872 pr_warn("mds issued no caps on %llx.%llx\n",
825 ceph_vinop(inode)); 873 ceph_vinop(inode));
826 __ceph_get_fmode(ci, cap_fmode); 874 __ceph_get_fmode(ci, cap_fmode);
827 } 875 }
876 spin_unlock(&ci->i_ceph_lock);
877
878 if (wake)
879 wake_up_all(&ci->i_cap_wq);
880
881 /* queue truncate if we saw i_size decrease */
882 if (queue_trunc)
883 ceph_queue_vmtruncate(inode);
884
885 /* populate frag tree */
886 if (S_ISDIR(inode->i_mode))
887 ceph_fill_fragtree(inode, &info->fragtree, dirinfo);
828 888
829 /* update delegation info? */ 889 /* update delegation info? */
830 if (dirinfo) 890 if (dirinfo)
831 ceph_fill_dirfrag(inode, dirinfo); 891 ceph_fill_dirfrag(inode, dirinfo);
832 892
833 err = 0; 893 err = 0;
834
835out: 894out:
895 if (new_cap)
896 ceph_put_cap(mdsc, new_cap);
836 if (xattr_blob) 897 if (xattr_blob)
837 ceph_buffer_put(xattr_blob); 898 ceph_buffer_put(xattr_blob);
838 return err; 899 return err;
@@ -1485,7 +1546,7 @@ static void ceph_invalidate_work(struct work_struct *work)
1485 orig_gen = ci->i_rdcache_gen; 1546 orig_gen = ci->i_rdcache_gen;
1486 spin_unlock(&ci->i_ceph_lock); 1547 spin_unlock(&ci->i_ceph_lock);
1487 1548
1488 truncate_inode_pages(inode->i_mapping, 0); 1549 truncate_pagecache(inode, 0);
1489 1550
1490 spin_lock(&ci->i_ceph_lock); 1551 spin_lock(&ci->i_ceph_lock);
1491 if (orig_gen == ci->i_rdcache_gen && 1552 if (orig_gen == ci->i_rdcache_gen &&
@@ -1588,7 +1649,7 @@ retry:
1588 ci->i_truncate_pending, to); 1649 ci->i_truncate_pending, to);
1589 spin_unlock(&ci->i_ceph_lock); 1650 spin_unlock(&ci->i_ceph_lock);
1590 1651
1591 truncate_inode_pages(inode->i_mapping, to); 1652 truncate_pagecache(inode, to);
1592 1653
1593 spin_lock(&ci->i_ceph_lock); 1654 spin_lock(&ci->i_ceph_lock);
1594 if (to == ci->i_truncate_size) { 1655 if (to == ci->i_truncate_size) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9a33b98cb000..92a2548278fc 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1558,6 +1558,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
1558 init_completion(&req->r_safe_completion); 1558 init_completion(&req->r_safe_completion);
1559 INIT_LIST_HEAD(&req->r_unsafe_item); 1559 INIT_LIST_HEAD(&req->r_unsafe_item);
1560 1560
1561 req->r_stamp = CURRENT_TIME;
1562
1561 req->r_op = op; 1563 req->r_op = op;
1562 req->r_direct_mode = mode; 1564 req->r_direct_mode = mode;
1563 return req; 1565 return req;
@@ -1783,7 +1785,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1783 } 1785 }
1784 1786
1785 len = sizeof(*head) + 1787 len = sizeof(*head) +
1786 pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)); 1788 pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
1789 sizeof(struct timespec);
1787 1790
1788 /* calculate (max) length for cap releases */ 1791 /* calculate (max) length for cap releases */
1789 len += sizeof(struct ceph_mds_request_release) * 1792 len += sizeof(struct ceph_mds_request_release) *
@@ -1800,6 +1803,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1800 goto out_free2; 1803 goto out_free2;
1801 } 1804 }
1802 1805
1806 msg->hdr.version = 2;
1803 msg->hdr.tid = cpu_to_le64(req->r_tid); 1807 msg->hdr.tid = cpu_to_le64(req->r_tid);
1804 1808
1805 head = msg->front.iov_base; 1809 head = msg->front.iov_base;
@@ -1836,6 +1840,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1836 mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); 1840 mds, req->r_old_inode_drop, req->r_old_inode_unless, 0);
1837 head->num_releases = cpu_to_le16(releases); 1841 head->num_releases = cpu_to_le16(releases);
1838 1842
1843 /* time stamp */
1844 ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp));
1845
1839 BUG_ON(p > end); 1846 BUG_ON(p > end);
1840 msg->front.iov_len = p - msg->front.iov_base; 1847 msg->front.iov_len = p - msg->front.iov_base;
1841 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 1848 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index e90cfccf93bd..e00737cf523c 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -194,6 +194,7 @@ struct ceph_mds_request {
194 int r_fmode; /* file mode, if expecting cap */ 194 int r_fmode; /* file mode, if expecting cap */
195 kuid_t r_uid; 195 kuid_t r_uid;
196 kgid_t r_gid; 196 kgid_t r_gid;
197 struct timespec r_stamp;
197 198
198 /* for choosing which mds to send this request to */ 199 /* for choosing which mds to send this request to */
199 int r_direct_mode; 200 int r_direct_mode;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index ead05cc1f447..12b20744e386 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -292,7 +292,6 @@ struct ceph_inode_info {
292 struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or 292 struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or
293 dirty|flushing caps */ 293 dirty|flushing caps */
294 unsigned i_snap_caps; /* cap bits for snapped files */ 294 unsigned i_snap_caps; /* cap bits for snapped files */
295 unsigned i_cap_exporting_issued;
296 295
297 int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ 296 int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */
298 297
@@ -775,11 +774,13 @@ static inline void ceph_forget_all_cached_acls(struct inode *inode)
775extern const char *ceph_cap_string(int c); 774extern const char *ceph_cap_string(int c);
776extern void ceph_handle_caps(struct ceph_mds_session *session, 775extern void ceph_handle_caps(struct ceph_mds_session *session,
777 struct ceph_msg *msg); 776 struct ceph_msg *msg);
778extern int ceph_add_cap(struct inode *inode, 777extern struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
779 struct ceph_mds_session *session, u64 cap_id, 778 struct ceph_cap_reservation *ctx);
780 int fmode, unsigned issued, unsigned wanted, 779extern void ceph_add_cap(struct inode *inode,
781 unsigned cap, unsigned seq, u64 realmino, int flags, 780 struct ceph_mds_session *session, u64 cap_id,
782 struct ceph_cap_reservation *caps_reservation); 781 int fmode, unsigned issued, unsigned wanted,
782 unsigned cap, unsigned seq, u64 realmino, int flags,
783 struct ceph_cap **new_cap);
783extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); 784extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
784extern void ceph_put_cap(struct ceph_mds_client *mdsc, 785extern void ceph_put_cap(struct ceph_mds_client *mdsc,
785 struct ceph_cap *cap); 786 struct ceph_cap *cap);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 6aaa8112c538..2c90d07c0b3a 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -725,8 +725,7 @@ out_nls:
725 goto out; 725 goto out;
726} 726}
727 727
728static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 728static ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
729 unsigned long nr_segs, loff_t pos)
730{ 729{
731 struct inode *inode = file_inode(iocb->ki_filp); 730 struct inode *inode = file_inode(iocb->ki_filp);
732 struct cifsInodeInfo *cinode = CIFS_I(inode); 731 struct cifsInodeInfo *cinode = CIFS_I(inode);
@@ -737,14 +736,14 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
737 if (written) 736 if (written)
738 return written; 737 return written;
739 738
740 written = generic_file_aio_write(iocb, iov, nr_segs, pos); 739 written = generic_file_write_iter(iocb, from);
741 740
742 if (CIFS_CACHE_WRITE(CIFS_I(inode))) 741 if (CIFS_CACHE_WRITE(CIFS_I(inode)))
743 goto out; 742 goto out;
744 743
745 rc = filemap_fdatawrite(inode->i_mapping); 744 rc = filemap_fdatawrite(inode->i_mapping);
746 if (rc) 745 if (rc)
747 cifs_dbg(FYI, "cifs_file_aio_write: %d rc on %p inode\n", 746 cifs_dbg(FYI, "cifs_file_write_iter: %d rc on %p inode\n",
748 rc, inode); 747 rc, inode);
749 748
750out: 749out:
@@ -880,10 +879,10 @@ const struct inode_operations cifs_symlink_inode_ops = {
880}; 879};
881 880
882const struct file_operations cifs_file_ops = { 881const struct file_operations cifs_file_ops = {
883 .read = do_sync_read, 882 .read = new_sync_read,
884 .write = do_sync_write, 883 .write = new_sync_write,
885 .aio_read = generic_file_aio_read, 884 .read_iter = generic_file_read_iter,
886 .aio_write = cifs_file_aio_write, 885 .write_iter = cifs_file_write_iter,
887 .open = cifs_open, 886 .open = cifs_open,
888 .release = cifs_close, 887 .release = cifs_close,
889 .lock = cifs_lock, 888 .lock = cifs_lock,
@@ -899,10 +898,10 @@ const struct file_operations cifs_file_ops = {
899}; 898};
900 899
901const struct file_operations cifs_file_strict_ops = { 900const struct file_operations cifs_file_strict_ops = {
902 .read = do_sync_read, 901 .read = new_sync_read,
903 .write = do_sync_write, 902 .write = new_sync_write,
904 .aio_read = cifs_strict_readv, 903 .read_iter = cifs_strict_readv,
905 .aio_write = cifs_strict_writev, 904 .write_iter = cifs_strict_writev,
906 .open = cifs_open, 905 .open = cifs_open,
907 .release = cifs_close, 906 .release = cifs_close,
908 .lock = cifs_lock, 907 .lock = cifs_lock,
@@ -919,10 +918,10 @@ const struct file_operations cifs_file_strict_ops = {
919 918
920const struct file_operations cifs_file_direct_ops = { 919const struct file_operations cifs_file_direct_ops = {
921 /* BB reevaluate whether they can be done with directio, no cache */ 920 /* BB reevaluate whether they can be done with directio, no cache */
922 .read = do_sync_read, 921 .read = new_sync_read,
923 .write = do_sync_write, 922 .write = new_sync_write,
924 .aio_read = cifs_user_readv, 923 .read_iter = cifs_user_readv,
925 .aio_write = cifs_user_writev, 924 .write_iter = cifs_user_writev,
926 .open = cifs_open, 925 .open = cifs_open,
927 .release = cifs_close, 926 .release = cifs_close,
928 .lock = cifs_lock, 927 .lock = cifs_lock,
@@ -938,10 +937,10 @@ const struct file_operations cifs_file_direct_ops = {
938}; 937};
939 938
940const struct file_operations cifs_file_nobrl_ops = { 939const struct file_operations cifs_file_nobrl_ops = {
941 .read = do_sync_read, 940 .read = new_sync_read,
942 .write = do_sync_write, 941 .write = new_sync_write,
943 .aio_read = generic_file_aio_read, 942 .read_iter = generic_file_read_iter,
944 .aio_write = cifs_file_aio_write, 943 .write_iter = cifs_file_write_iter,
945 .open = cifs_open, 944 .open = cifs_open,
946 .release = cifs_close, 945 .release = cifs_close,
947 .fsync = cifs_fsync, 946 .fsync = cifs_fsync,
@@ -956,10 +955,10 @@ const struct file_operations cifs_file_nobrl_ops = {
956}; 955};
957 956
958const struct file_operations cifs_file_strict_nobrl_ops = { 957const struct file_operations cifs_file_strict_nobrl_ops = {
959 .read = do_sync_read, 958 .read = new_sync_read,
960 .write = do_sync_write, 959 .write = new_sync_write,
961 .aio_read = cifs_strict_readv, 960 .read_iter = cifs_strict_readv,
962 .aio_write = cifs_strict_writev, 961 .write_iter = cifs_strict_writev,
963 .open = cifs_open, 962 .open = cifs_open,
964 .release = cifs_close, 963 .release = cifs_close,
965 .fsync = cifs_strict_fsync, 964 .fsync = cifs_strict_fsync,
@@ -975,10 +974,10 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
975 974
976const struct file_operations cifs_file_direct_nobrl_ops = { 975const struct file_operations cifs_file_direct_nobrl_ops = {
977 /* BB reevaluate whether they can be done with directio, no cache */ 976 /* BB reevaluate whether they can be done with directio, no cache */
978 .read = do_sync_read, 977 .read = new_sync_read,
979 .write = do_sync_write, 978 .write = new_sync_write,
980 .aio_read = cifs_user_readv, 979 .read_iter = cifs_user_readv,
981 .aio_write = cifs_user_writev, 980 .write_iter = cifs_user_writev,
982 .open = cifs_open, 981 .open = cifs_open,
983 .release = cifs_close, 982 .release = cifs_close,
984 .fsync = cifs_fsync, 983 .fsync = cifs_fsync,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 8fe51166d6e3..70f178a7c759 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -95,14 +95,10 @@ extern const struct file_operations cifs_file_strict_nobrl_ops;
95extern int cifs_open(struct inode *inode, struct file *file); 95extern int cifs_open(struct inode *inode, struct file *file);
96extern int cifs_close(struct inode *inode, struct file *file); 96extern int cifs_close(struct inode *inode, struct file *file);
97extern int cifs_closedir(struct inode *inode, struct file *file); 97extern int cifs_closedir(struct inode *inode, struct file *file);
98extern ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov, 98extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to);
99 unsigned long nr_segs, loff_t pos); 99extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to);
100extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, 100extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from);
101 unsigned long nr_segs, loff_t pos); 101extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from);
102extern ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
103 unsigned long nr_segs, loff_t pos);
104extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
105 unsigned long nr_segs, loff_t pos);
106extern int cifs_lock(struct file *, int, struct file_lock *); 102extern int cifs_lock(struct file *, int, struct file_lock *);
107extern int cifs_fsync(struct file *, loff_t, loff_t, int); 103extern int cifs_fsync(struct file *, loff_t, loff_t, int);
108extern int cifs_strict_fsync(struct file *, loff_t, loff_t, int); 104extern int cifs_strict_fsync(struct file *, loff_t, loff_t, int);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 208f56eca4bf..e90a1e9aa627 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2385,14 +2385,12 @@ cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2385} 2385}
2386 2386
2387static ssize_t 2387static ssize_t
2388cifs_iovec_write(struct file *file, const struct iovec *iov, 2388cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
2389 unsigned long nr_segs, loff_t *poffset)
2390{ 2389{
2391 unsigned long nr_pages, i; 2390 unsigned long nr_pages, i;
2392 size_t bytes, copied, len, cur_len; 2391 size_t bytes, copied, len, cur_len;
2393 ssize_t total_written = 0; 2392 ssize_t total_written = 0;
2394 loff_t offset; 2393 loff_t offset;
2395 struct iov_iter it;
2396 struct cifsFileInfo *open_file; 2394 struct cifsFileInfo *open_file;
2397 struct cifs_tcon *tcon; 2395 struct cifs_tcon *tcon;
2398 struct cifs_sb_info *cifs_sb; 2396 struct cifs_sb_info *cifs_sb;
@@ -2401,14 +2399,16 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
2401 int rc; 2399 int rc;
2402 pid_t pid; 2400 pid_t pid;
2403 2401
2404 len = iov_length(iov, nr_segs); 2402 len = iov_iter_count(from);
2405 if (!len)
2406 return 0;
2407
2408 rc = generic_write_checks(file, poffset, &len, 0); 2403 rc = generic_write_checks(file, poffset, &len, 0);
2409 if (rc) 2404 if (rc)
2410 return rc; 2405 return rc;
2411 2406
2407 if (!len)
2408 return 0;
2409
2410 iov_iter_truncate(from, len);
2411
2412 INIT_LIST_HEAD(&wdata_list); 2412 INIT_LIST_HEAD(&wdata_list);
2413 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 2413 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2414 open_file = file->private_data; 2414 open_file = file->private_data;
@@ -2424,7 +2424,6 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
2424 else 2424 else
2425 pid = current->tgid; 2425 pid = current->tgid;
2426 2426
2427 iov_iter_init(&it, iov, nr_segs, len, 0);
2428 do { 2427 do {
2429 size_t save_len; 2428 size_t save_len;
2430 2429
@@ -2444,11 +2443,10 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
2444 2443
2445 save_len = cur_len; 2444 save_len = cur_len;
2446 for (i = 0; i < nr_pages; i++) { 2445 for (i = 0; i < nr_pages; i++) {
2447 bytes = min_t(const size_t, cur_len, PAGE_SIZE); 2446 bytes = min_t(size_t, cur_len, PAGE_SIZE);
2448 copied = iov_iter_copy_from_user(wdata->pages[i], &it, 2447 copied = copy_page_from_iter(wdata->pages[i], 0, bytes,
2449 0, bytes); 2448 from);
2450 cur_len -= copied; 2449 cur_len -= copied;
2451 iov_iter_advance(&it, copied);
2452 /* 2450 /*
2453 * If we didn't copy as much as we expected, then that 2451 * If we didn't copy as much as we expected, then that
2454 * may mean we trod into an unmapped area. Stop copying 2452 * may mean we trod into an unmapped area. Stop copying
@@ -2546,11 +2544,11 @@ restart_loop:
2546 return total_written ? total_written : (ssize_t)rc; 2544 return total_written ? total_written : (ssize_t)rc;
2547} 2545}
2548 2546
2549ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, 2547ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2550 unsigned long nr_segs, loff_t pos)
2551{ 2548{
2552 ssize_t written; 2549 ssize_t written;
2553 struct inode *inode; 2550 struct inode *inode;
2551 loff_t pos = iocb->ki_pos;
2554 2552
2555 inode = file_inode(iocb->ki_filp); 2553 inode = file_inode(iocb->ki_filp);
2556 2554
@@ -2560,7 +2558,7 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2560 * write request. 2558 * write request.
2561 */ 2559 */
2562 2560
2563 written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos); 2561 written = cifs_iovec_write(iocb->ki_filp, from, &pos);
2564 if (written > 0) { 2562 if (written > 0) {
2565 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags); 2563 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags);
2566 iocb->ki_pos = pos; 2564 iocb->ki_pos = pos;
@@ -2570,8 +2568,7 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2570} 2568}
2571 2569
2572static ssize_t 2570static ssize_t
2573cifs_writev(struct kiocb *iocb, const struct iovec *iov, 2571cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2574 unsigned long nr_segs, loff_t pos)
2575{ 2572{
2576 struct file *file = iocb->ki_filp; 2573 struct file *file = iocb->ki_filp;
2577 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 2574 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
@@ -2589,10 +2586,10 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2589 mutex_lock(&inode->i_mutex); 2586 mutex_lock(&inode->i_mutex);
2590 if (file->f_flags & O_APPEND) 2587 if (file->f_flags & O_APPEND)
2591 lock_pos = i_size_read(inode); 2588 lock_pos = i_size_read(inode);
2592 if (!cifs_find_lock_conflict(cfile, lock_pos, iov_length(iov, nr_segs), 2589 if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from),
2593 server->vals->exclusive_lock_type, NULL, 2590 server->vals->exclusive_lock_type, NULL,
2594 CIFS_WRITE_OP)) { 2591 CIFS_WRITE_OP)) {
2595 rc = __generic_file_aio_write(iocb, iov, nr_segs); 2592 rc = __generic_file_write_iter(iocb, from);
2596 mutex_unlock(&inode->i_mutex); 2593 mutex_unlock(&inode->i_mutex);
2597 2594
2598 if (rc > 0) { 2595 if (rc > 0) {
@@ -2610,8 +2607,7 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2610} 2607}
2611 2608
2612ssize_t 2609ssize_t
2613cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, 2610cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2614 unsigned long nr_segs, loff_t pos)
2615{ 2611{
2616 struct inode *inode = file_inode(iocb->ki_filp); 2612 struct inode *inode = file_inode(iocb->ki_filp);
2617 struct cifsInodeInfo *cinode = CIFS_I(inode); 2613 struct cifsInodeInfo *cinode = CIFS_I(inode);
@@ -2629,11 +2625,10 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2629 if (cap_unix(tcon->ses) && 2625 if (cap_unix(tcon->ses) &&
2630 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) 2626 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2631 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { 2627 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2632 written = generic_file_aio_write( 2628 written = generic_file_write_iter(iocb, from);
2633 iocb, iov, nr_segs, pos);
2634 goto out; 2629 goto out;
2635 } 2630 }
2636 written = cifs_writev(iocb, iov, nr_segs, pos); 2631 written = cifs_writev(iocb, from);
2637 goto out; 2632 goto out;
2638 } 2633 }
2639 /* 2634 /*
@@ -2642,7 +2637,7 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2642 * affected pages because it may cause a error with mandatory locks on 2637 * affected pages because it may cause a error with mandatory locks on
2643 * these pages but not on the region from pos to ppos+len-1. 2638 * these pages but not on the region from pos to ppos+len-1.
2644 */ 2639 */
2645 written = cifs_user_writev(iocb, iov, nr_segs, pos); 2640 written = cifs_user_writev(iocb, from);
2646 if (written > 0 && CIFS_CACHE_READ(cinode)) { 2641 if (written > 0 && CIFS_CACHE_READ(cinode)) {
2647 /* 2642 /*
2648 * Windows 7 server can delay breaking level2 oplock if a write 2643 * Windows 7 server can delay breaking level2 oplock if a write
@@ -2831,32 +2826,25 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2831 return total_read > 0 ? total_read : result; 2826 return total_read > 0 ? total_read : result;
2832} 2827}
2833 2828
2834ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov, 2829ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2835 unsigned long nr_segs, loff_t pos)
2836{ 2830{
2837 struct file *file = iocb->ki_filp; 2831 struct file *file = iocb->ki_filp;
2838 ssize_t rc; 2832 ssize_t rc;
2839 size_t len, cur_len; 2833 size_t len, cur_len;
2840 ssize_t total_read = 0; 2834 ssize_t total_read = 0;
2841 loff_t offset = pos; 2835 loff_t offset = iocb->ki_pos;
2842 unsigned int npages; 2836 unsigned int npages;
2843 struct cifs_sb_info *cifs_sb; 2837 struct cifs_sb_info *cifs_sb;
2844 struct cifs_tcon *tcon; 2838 struct cifs_tcon *tcon;
2845 struct cifsFileInfo *open_file; 2839 struct cifsFileInfo *open_file;
2846 struct cifs_readdata *rdata, *tmp; 2840 struct cifs_readdata *rdata, *tmp;
2847 struct list_head rdata_list; 2841 struct list_head rdata_list;
2848 struct iov_iter to;
2849 pid_t pid; 2842 pid_t pid;
2850 2843
2851 if (!nr_segs) 2844 len = iov_iter_count(to);
2852 return 0;
2853
2854 len = iov_length(iov, nr_segs);
2855 if (!len) 2845 if (!len)
2856 return 0; 2846 return 0;
2857 2847
2858 iov_iter_init(&to, iov, nr_segs, len, 0);
2859
2860 INIT_LIST_HEAD(&rdata_list); 2848 INIT_LIST_HEAD(&rdata_list);
2861 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 2849 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2862 open_file = file->private_data; 2850 open_file = file->private_data;
@@ -2914,7 +2902,7 @@ error:
2914 if (!list_empty(&rdata_list)) 2902 if (!list_empty(&rdata_list))
2915 rc = 0; 2903 rc = 0;
2916 2904
2917 len = iov_iter_count(&to); 2905 len = iov_iter_count(to);
2918 /* the loop below should proceed in the order of increasing offsets */ 2906 /* the loop below should proceed in the order of increasing offsets */
2919 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) { 2907 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2920 again: 2908 again:
@@ -2931,7 +2919,7 @@ error:
2931 goto again; 2919 goto again;
2932 } 2920 }
2933 } else { 2921 } else {
2934 rc = cifs_readdata_to_iov(rdata, &to); 2922 rc = cifs_readdata_to_iov(rdata, to);
2935 } 2923 }
2936 2924
2937 } 2925 }
@@ -2939,7 +2927,7 @@ error:
2939 kref_put(&rdata->refcount, cifs_uncached_readdata_release); 2927 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2940 } 2928 }
2941 2929
2942 total_read = len - iov_iter_count(&to); 2930 total_read = len - iov_iter_count(to);
2943 2931
2944 cifs_stats_bytes_read(tcon, total_read); 2932 cifs_stats_bytes_read(tcon, total_read);
2945 2933
@@ -2948,15 +2936,14 @@ error:
2948 rc = 0; 2936 rc = 0;
2949 2937
2950 if (total_read) { 2938 if (total_read) {
2951 iocb->ki_pos = pos + total_read; 2939 iocb->ki_pos += total_read;
2952 return total_read; 2940 return total_read;
2953 } 2941 }
2954 return rc; 2942 return rc;
2955} 2943}
2956 2944
2957ssize_t 2945ssize_t
2958cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, 2946cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
2959 unsigned long nr_segs, loff_t pos)
2960{ 2947{
2961 struct inode *inode = file_inode(iocb->ki_filp); 2948 struct inode *inode = file_inode(iocb->ki_filp);
2962 struct cifsInodeInfo *cinode = CIFS_I(inode); 2949 struct cifsInodeInfo *cinode = CIFS_I(inode);
@@ -2975,22 +2962,22 @@ cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2975 * pos+len-1. 2962 * pos+len-1.
2976 */ 2963 */
2977 if (!CIFS_CACHE_READ(cinode)) 2964 if (!CIFS_CACHE_READ(cinode))
2978 return cifs_user_readv(iocb, iov, nr_segs, pos); 2965 return cifs_user_readv(iocb, to);
2979 2966
2980 if (cap_unix(tcon->ses) && 2967 if (cap_unix(tcon->ses) &&
2981 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2968 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2982 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2969 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2983 return generic_file_aio_read(iocb, iov, nr_segs, pos); 2970 return generic_file_read_iter(iocb, to);
2984 2971
2985 /* 2972 /*
2986 * We need to hold the sem to be sure nobody modifies lock list 2973 * We need to hold the sem to be sure nobody modifies lock list
2987 * with a brlock that prevents reading. 2974 * with a brlock that prevents reading.
2988 */ 2975 */
2989 down_read(&cinode->lock_sem); 2976 down_read(&cinode->lock_sem);
2990 if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), 2977 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
2991 tcon->ses->server->vals->shared_lock_type, 2978 tcon->ses->server->vals->shared_lock_type,
2992 NULL, CIFS_READ_OP)) 2979 NULL, CIFS_READ_OP))
2993 rc = generic_file_aio_read(iocb, iov, nr_segs, pos); 2980 rc = generic_file_read_iter(iocb, to);
2994 up_read(&cinode->lock_sem); 2981 up_read(&cinode->lock_sem);
2995 return rc; 2982 return rc;
2996} 2983}
@@ -3703,8 +3690,8 @@ void cifs_oplock_break(struct work_struct *work)
3703 * Direct IO is not yet supported in the cached mode. 3690 * Direct IO is not yet supported in the cached mode.
3704 */ 3691 */
3705static ssize_t 3692static ssize_t
3706cifs_direct_io(int rw, struct kiocb *iocb, const struct iovec *iov, 3693cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter,
3707 loff_t pos, unsigned long nr_segs) 3694 loff_t pos)
3708{ 3695{
3709 /* 3696 /*
3710 * FIXME 3697 * FIXME
diff --git a/fs/dcache.c b/fs/dcache.c
index 1792d6075b4f..06f65857a855 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -532,10 +532,12 @@ static inline struct dentry *lock_parent(struct dentry *dentry)
532 struct dentry *parent = dentry->d_parent; 532 struct dentry *parent = dentry->d_parent;
533 if (IS_ROOT(dentry)) 533 if (IS_ROOT(dentry))
534 return NULL; 534 return NULL;
535 if (unlikely((int)dentry->d_lockref.count < 0))
536 return NULL;
535 if (likely(spin_trylock(&parent->d_lock))) 537 if (likely(spin_trylock(&parent->d_lock)))
536 return parent; 538 return parent;
537 spin_unlock(&dentry->d_lock);
538 rcu_read_lock(); 539 rcu_read_lock();
540 spin_unlock(&dentry->d_lock);
539again: 541again:
540 parent = ACCESS_ONCE(dentry->d_parent); 542 parent = ACCESS_ONCE(dentry->d_parent);
541 spin_lock(&parent->d_lock); 543 spin_lock(&parent->d_lock);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 31ba0935e32e..98040ba388ac 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -77,7 +77,6 @@ struct dio_submit {
77 unsigned blocks_available; /* At block_in_file. changes */ 77 unsigned blocks_available; /* At block_in_file. changes */
78 int reap_counter; /* rate limit reaping */ 78 int reap_counter; /* rate limit reaping */
79 sector_t final_block_in_request;/* doesn't change */ 79 sector_t final_block_in_request;/* doesn't change */
80 unsigned first_block_in_page; /* doesn't change, Used only once */
81 int boundary; /* prev block is at a boundary */ 80 int boundary; /* prev block is at a boundary */
82 get_block_t *get_block; /* block mapping function */ 81 get_block_t *get_block; /* block mapping function */
83 dio_submit_t *submit_io; /* IO submition function */ 82 dio_submit_t *submit_io; /* IO submition function */
@@ -98,19 +97,14 @@ struct dio_submit {
98 sector_t cur_page_block; /* Where it starts */ 97 sector_t cur_page_block; /* Where it starts */
99 loff_t cur_page_fs_offset; /* Offset in file */ 98 loff_t cur_page_fs_offset; /* Offset in file */
100 99
101 /* 100 struct iov_iter *iter;
102 * Page fetching state. These variables belong to dio_refill_pages().
103 */
104 int curr_page; /* changes */
105 int total_pages; /* doesn't change */
106 unsigned long curr_user_address;/* changes */
107
108 /* 101 /*
109 * Page queue. These variables belong to dio_refill_pages() and 102 * Page queue. These variables belong to dio_refill_pages() and
110 * dio_get_page(). 103 * dio_get_page().
111 */ 104 */
112 unsigned head; /* next page to process */ 105 unsigned head; /* next page to process */
113 unsigned tail; /* last valid page + 1 */ 106 unsigned tail; /* last valid page + 1 */
107 size_t from, to;
114}; 108};
115 109
116/* dio_state communicated between submission path and end_io */ 110/* dio_state communicated between submission path and end_io */
@@ -163,15 +157,10 @@ static inline unsigned dio_pages_present(struct dio_submit *sdio)
163 */ 157 */
164static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) 158static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
165{ 159{
166 int ret; 160 ssize_t ret;
167 int nr_pages;
168 161
169 nr_pages = min(sdio->total_pages - sdio->curr_page, DIO_PAGES); 162 ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES * PAGE_SIZE,
170 ret = get_user_pages_fast( 163 &sdio->from);
171 sdio->curr_user_address, /* Where from? */
172 nr_pages, /* How many pages? */
173 dio->rw == READ, /* Write to memory? */
174 &dio->pages[0]); /* Put results here */
175 164
176 if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) { 165 if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) {
177 struct page *page = ZERO_PAGE(0); 166 struct page *page = ZERO_PAGE(0);
@@ -186,18 +175,19 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
186 dio->pages[0] = page; 175 dio->pages[0] = page;
187 sdio->head = 0; 176 sdio->head = 0;
188 sdio->tail = 1; 177 sdio->tail = 1;
189 ret = 0; 178 sdio->from = 0;
190 goto out; 179 sdio->to = PAGE_SIZE;
180 return 0;
191 } 181 }
192 182
193 if (ret >= 0) { 183 if (ret >= 0) {
194 sdio->curr_user_address += ret * PAGE_SIZE; 184 iov_iter_advance(sdio->iter, ret);
195 sdio->curr_page += ret; 185 ret += sdio->from;
196 sdio->head = 0; 186 sdio->head = 0;
197 sdio->tail = ret; 187 sdio->tail = (ret + PAGE_SIZE - 1) / PAGE_SIZE;
198 ret = 0; 188 sdio->to = ((ret - 1) & (PAGE_SIZE - 1)) + 1;
189 return 0;
199 } 190 }
200out:
201 return ret; 191 return ret;
202} 192}
203 193
@@ -208,8 +198,9 @@ out:
208 * L1 cache. 198 * L1 cache.
209 */ 199 */
210static inline struct page *dio_get_page(struct dio *dio, 200static inline struct page *dio_get_page(struct dio *dio,
211 struct dio_submit *sdio) 201 struct dio_submit *sdio, size_t *from, size_t *to)
212{ 202{
203 int n;
213 if (dio_pages_present(sdio) == 0) { 204 if (dio_pages_present(sdio) == 0) {
214 int ret; 205 int ret;
215 206
@@ -218,7 +209,10 @@ static inline struct page *dio_get_page(struct dio *dio,
218 return ERR_PTR(ret); 209 return ERR_PTR(ret);
219 BUG_ON(dio_pages_present(sdio) == 0); 210 BUG_ON(dio_pages_present(sdio) == 0);
220 } 211 }
221 return dio->pages[sdio->head++]; 212 n = sdio->head++;
213 *from = n ? 0 : sdio->from;
214 *to = (n == sdio->tail - 1) ? sdio->to : PAGE_SIZE;
215 return dio->pages[n];
222} 216}
223 217
224/** 218/**
@@ -422,8 +416,8 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
422 */ 416 */
423static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio) 417static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
424{ 418{
425 while (dio_pages_present(sdio)) 419 while (sdio->head < sdio->tail)
426 page_cache_release(dio_get_page(dio, sdio)); 420 page_cache_release(dio->pages[sdio->head++]);
427} 421}
428 422
429/* 423/*
@@ -912,23 +906,18 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
912 struct buffer_head *map_bh) 906 struct buffer_head *map_bh)
913{ 907{
914 const unsigned blkbits = sdio->blkbits; 908 const unsigned blkbits = sdio->blkbits;
915 const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
916 struct page *page;
917 unsigned block_in_page;
918 int ret = 0; 909 int ret = 0;
919 910
920 /* The I/O can start at any block offset within the first page */
921 block_in_page = sdio->first_block_in_page;
922
923 while (sdio->block_in_file < sdio->final_block_in_request) { 911 while (sdio->block_in_file < sdio->final_block_in_request) {
924 page = dio_get_page(dio, sdio); 912 struct page *page;
913 size_t from, to;
914 page = dio_get_page(dio, sdio, &from, &to);
925 if (IS_ERR(page)) { 915 if (IS_ERR(page)) {
926 ret = PTR_ERR(page); 916 ret = PTR_ERR(page);
927 goto out; 917 goto out;
928 } 918 }
929 919
930 while (block_in_page < blocks_per_page) { 920 while (from < to) {
931 unsigned offset_in_page = block_in_page << blkbits;
932 unsigned this_chunk_bytes; /* # of bytes mapped */ 921 unsigned this_chunk_bytes; /* # of bytes mapped */
933 unsigned this_chunk_blocks; /* # of blocks */ 922 unsigned this_chunk_blocks; /* # of blocks */
934 unsigned u; 923 unsigned u;
@@ -999,10 +988,10 @@ do_holes:
999 page_cache_release(page); 988 page_cache_release(page);
1000 goto out; 989 goto out;
1001 } 990 }
1002 zero_user(page, block_in_page << blkbits, 991 zero_user(page, from, 1 << blkbits);
1003 1 << blkbits);
1004 sdio->block_in_file++; 992 sdio->block_in_file++;
1005 block_in_page++; 993 from += 1 << blkbits;
994 dio->result += 1 << blkbits;
1006 goto next_block; 995 goto next_block;
1007 } 996 }
1008 997
@@ -1019,7 +1008,7 @@ do_holes:
1019 * can add to this page 1008 * can add to this page
1020 */ 1009 */
1021 this_chunk_blocks = sdio->blocks_available; 1010 this_chunk_blocks = sdio->blocks_available;
1022 u = (PAGE_SIZE - offset_in_page) >> blkbits; 1011 u = (to - from) >> blkbits;
1023 if (this_chunk_blocks > u) 1012 if (this_chunk_blocks > u)
1024 this_chunk_blocks = u; 1013 this_chunk_blocks = u;
1025 u = sdio->final_block_in_request - sdio->block_in_file; 1014 u = sdio->final_block_in_request - sdio->block_in_file;
@@ -1031,7 +1020,7 @@ do_holes:
1031 if (this_chunk_blocks == sdio->blocks_available) 1020 if (this_chunk_blocks == sdio->blocks_available)
1032 sdio->boundary = buffer_boundary(map_bh); 1021 sdio->boundary = buffer_boundary(map_bh);
1033 ret = submit_page_section(dio, sdio, page, 1022 ret = submit_page_section(dio, sdio, page,
1034 offset_in_page, 1023 from,
1035 this_chunk_bytes, 1024 this_chunk_bytes,
1036 sdio->next_block_for_io, 1025 sdio->next_block_for_io,
1037 map_bh); 1026 map_bh);
@@ -1042,7 +1031,8 @@ do_holes:
1042 sdio->next_block_for_io += this_chunk_blocks; 1031 sdio->next_block_for_io += this_chunk_blocks;
1043 1032
1044 sdio->block_in_file += this_chunk_blocks; 1033 sdio->block_in_file += this_chunk_blocks;
1045 block_in_page += this_chunk_blocks; 1034 from += this_chunk_bytes;
1035 dio->result += this_chunk_bytes;
1046 sdio->blocks_available -= this_chunk_blocks; 1036 sdio->blocks_available -= this_chunk_blocks;
1047next_block: 1037next_block:
1048 BUG_ON(sdio->block_in_file > sdio->final_block_in_request); 1038 BUG_ON(sdio->block_in_file > sdio->final_block_in_request);
@@ -1052,7 +1042,6 @@ next_block:
1052 1042
1053 /* Drop the ref which was taken in get_user_pages() */ 1043 /* Drop the ref which was taken in get_user_pages() */
1054 page_cache_release(page); 1044 page_cache_release(page);
1055 block_in_page = 0;
1056 } 1045 }
1057out: 1046out:
1058 return ret; 1047 return ret;
@@ -1107,24 +1096,20 @@ static inline int drop_refcount(struct dio *dio)
1107 */ 1096 */
1108static inline ssize_t 1097static inline ssize_t
1109do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1098do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1110 struct block_device *bdev, const struct iovec *iov, loff_t offset, 1099 struct block_device *bdev, struct iov_iter *iter, loff_t offset,
1111 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 1100 get_block_t get_block, dio_iodone_t end_io,
1112 dio_submit_t submit_io, int flags) 1101 dio_submit_t submit_io, int flags)
1113{ 1102{
1114 int seg;
1115 size_t size;
1116 unsigned long addr;
1117 unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits); 1103 unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits);
1118 unsigned blkbits = i_blkbits; 1104 unsigned blkbits = i_blkbits;
1119 unsigned blocksize_mask = (1 << blkbits) - 1; 1105 unsigned blocksize_mask = (1 << blkbits) - 1;
1120 ssize_t retval = -EINVAL; 1106 ssize_t retval = -EINVAL;
1121 loff_t end = offset; 1107 loff_t end = offset + iov_iter_count(iter);
1122 struct dio *dio; 1108 struct dio *dio;
1123 struct dio_submit sdio = { 0, }; 1109 struct dio_submit sdio = { 0, };
1124 unsigned long user_addr;
1125 size_t bytes;
1126 struct buffer_head map_bh = { 0, }; 1110 struct buffer_head map_bh = { 0, };
1127 struct blk_plug plug; 1111 struct blk_plug plug;
1112 unsigned long align = offset | iov_iter_alignment(iter);
1128 1113
1129 if (rw & WRITE) 1114 if (rw & WRITE)
1130 rw = WRITE_ODIRECT; 1115 rw = WRITE_ODIRECT;
@@ -1134,32 +1119,16 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1134 * the early prefetch in the caller enough time. 1119 * the early prefetch in the caller enough time.
1135 */ 1120 */
1136 1121
1137 if (offset & blocksize_mask) { 1122 if (align & blocksize_mask) {
1138 if (bdev) 1123 if (bdev)
1139 blkbits = blksize_bits(bdev_logical_block_size(bdev)); 1124 blkbits = blksize_bits(bdev_logical_block_size(bdev));
1140 blocksize_mask = (1 << blkbits) - 1; 1125 blocksize_mask = (1 << blkbits) - 1;
1141 if (offset & blocksize_mask) 1126 if (align & blocksize_mask)
1142 goto out; 1127 goto out;
1143 } 1128 }
1144 1129
1145 /* Check the memory alignment. Blocks cannot straddle pages */
1146 for (seg = 0; seg < nr_segs; seg++) {
1147 addr = (unsigned long)iov[seg].iov_base;
1148 size = iov[seg].iov_len;
1149 end += size;
1150 if (unlikely((addr & blocksize_mask) ||
1151 (size & blocksize_mask))) {
1152 if (bdev)
1153 blkbits = blksize_bits(
1154 bdev_logical_block_size(bdev));
1155 blocksize_mask = (1 << blkbits) - 1;
1156 if ((addr & blocksize_mask) || (size & blocksize_mask))
1157 goto out;
1158 }
1159 }
1160
1161 /* watch out for a 0 len io from a tricksy fs */ 1130 /* watch out for a 0 len io from a tricksy fs */
1162 if (rw == READ && end == offset) 1131 if (rw == READ && !iov_iter_count(iter))
1163 return 0; 1132 return 0;
1164 1133
1165 dio = kmem_cache_alloc(dio_cache, GFP_KERNEL); 1134 dio = kmem_cache_alloc(dio_cache, GFP_KERNEL);
@@ -1249,6 +1218,10 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1249 spin_lock_init(&dio->bio_lock); 1218 spin_lock_init(&dio->bio_lock);
1250 dio->refcount = 1; 1219 dio->refcount = 1;
1251 1220
1221 sdio.iter = iter;
1222 sdio.final_block_in_request =
1223 (offset + iov_iter_count(iter)) >> blkbits;
1224
1252 /* 1225 /*
1253 * In case of non-aligned buffers, we may need 2 more 1226 * In case of non-aligned buffers, we may need 2 more
1254 * pages since we need to zero out first and last block. 1227 * pages since we need to zero out first and last block.
@@ -1256,47 +1229,13 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1256 if (unlikely(sdio.blkfactor)) 1229 if (unlikely(sdio.blkfactor))
1257 sdio.pages_in_io = 2; 1230 sdio.pages_in_io = 2;
1258 1231
1259 for (seg = 0; seg < nr_segs; seg++) { 1232 sdio.pages_in_io += iov_iter_npages(iter, INT_MAX);
1260 user_addr = (unsigned long)iov[seg].iov_base;
1261 sdio.pages_in_io +=
1262 ((user_addr + iov[seg].iov_len + PAGE_SIZE-1) /
1263 PAGE_SIZE - user_addr / PAGE_SIZE);
1264 }
1265 1233
1266 blk_start_plug(&plug); 1234 blk_start_plug(&plug);
1267 1235
1268 for (seg = 0; seg < nr_segs; seg++) { 1236 retval = do_direct_IO(dio, &sdio, &map_bh);
1269 user_addr = (unsigned long)iov[seg].iov_base; 1237 if (retval)
1270 sdio.size += bytes = iov[seg].iov_len; 1238 dio_cleanup(dio, &sdio);
1271
1272 /* Index into the first page of the first block */
1273 sdio.first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits;
1274 sdio.final_block_in_request = sdio.block_in_file +
1275 (bytes >> blkbits);
1276 /* Page fetching state */
1277 sdio.head = 0;
1278 sdio.tail = 0;
1279 sdio.curr_page = 0;
1280
1281 sdio.total_pages = 0;
1282 if (user_addr & (PAGE_SIZE-1)) {
1283 sdio.total_pages++;
1284 bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1));
1285 }
1286 sdio.total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
1287 sdio.curr_user_address = user_addr;
1288
1289 retval = do_direct_IO(dio, &sdio, &map_bh);
1290
1291 dio->result += iov[seg].iov_len -
1292 ((sdio.final_block_in_request - sdio.block_in_file) <<
1293 blkbits);
1294
1295 if (retval) {
1296 dio_cleanup(dio, &sdio);
1297 break;
1298 }
1299 } /* end iovec loop */
1300 1239
1301 if (retval == -ENOTBLK) { 1240 if (retval == -ENOTBLK) {
1302 /* 1241 /*
@@ -1365,8 +1304,8 @@ out:
1365 1304
1366ssize_t 1305ssize_t
1367__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1306__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1368 struct block_device *bdev, const struct iovec *iov, loff_t offset, 1307 struct block_device *bdev, struct iov_iter *iter, loff_t offset,
1369 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 1308 get_block_t get_block, dio_iodone_t end_io,
1370 dio_submit_t submit_io, int flags) 1309 dio_submit_t submit_io, int flags)
1371{ 1310{
1372 /* 1311 /*
@@ -1381,9 +1320,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1381 prefetch(bdev->bd_queue); 1320 prefetch(bdev->bd_queue);
1382 prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES); 1321 prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
1383 1322
1384 return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, 1323 return do_blockdev_direct_IO(rw, iocb, inode, bdev, iter, offset,
1385 nr_segs, get_block, end_io, 1324 get_block, end_io, submit_io, flags);
1386 submit_io, flags);
1387} 1325}
1388 1326
1389EXPORT_SYMBOL(__blockdev_direct_IO); 1327EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 1e5b45359509..d08e079ea5d3 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -617,6 +617,11 @@ static void retry_failed_sctp_send(struct connection *recv_con,
617 int nodeid = sn_send_failed->ssf_info.sinfo_ppid; 617 int nodeid = sn_send_failed->ssf_info.sinfo_ppid;
618 618
619 log_print("Retry sending %d bytes to node id %d", len, nodeid); 619 log_print("Retry sending %d bytes to node id %d", len, nodeid);
620
621 if (!nodeid) {
622 log_print("Shouldn't resend data via listening connection.");
623 return;
624 }
620 625
621 con = nodeid2con(nodeid, 0); 626 con = nodeid2con(nodeid, 0);
622 if (!con) { 627 if (!con) {
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index b1eaa7a1f82c..db0fad3269c0 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -45,14 +45,13 @@
45 * The function to be used for directory reads is ecryptfs_read. 45 * The function to be used for directory reads is ecryptfs_read.
46 */ 46 */
47static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, 47static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
48 const struct iovec *iov, 48 struct iov_iter *to)
49 unsigned long nr_segs, loff_t pos)
50{ 49{
51 ssize_t rc; 50 ssize_t rc;
52 struct path *path; 51 struct path *path;
53 struct file *file = iocb->ki_filp; 52 struct file *file = iocb->ki_filp;
54 53
55 rc = generic_file_aio_read(iocb, iov, nr_segs, pos); 54 rc = generic_file_read_iter(iocb, to);
56 /* 55 /*
57 * Even though this is a async interface, we need to wait 56 * Even though this is a async interface, we need to wait
58 * for IO to finish to update atime 57 * for IO to finish to update atime
@@ -352,10 +351,10 @@ const struct file_operations ecryptfs_dir_fops = {
352 351
353const struct file_operations ecryptfs_main_fops = { 352const struct file_operations ecryptfs_main_fops = {
354 .llseek = generic_file_llseek, 353 .llseek = generic_file_llseek,
355 .read = do_sync_read, 354 .read = new_sync_read,
356 .aio_read = ecryptfs_read_update_atime, 355 .read_iter = ecryptfs_read_update_atime,
357 .write = do_sync_write, 356 .write = new_sync_write,
358 .aio_write = generic_file_aio_write, 357 .write_iter = generic_file_write_iter,
359 .iterate = ecryptfs_readdir, 358 .iterate = ecryptfs_readdir,
360 .unlocked_ioctl = ecryptfs_unlocked_ioctl, 359 .unlocked_ioctl = ecryptfs_unlocked_ioctl,
361#ifdef CONFIG_COMPAT 360#ifdef CONFIG_COMPAT
diff --git a/fs/exec.c b/fs/exec.c
index 238b7aa26f68..a3d33fe592d6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1046,13 +1046,13 @@ EXPORT_SYMBOL_GPL(get_task_comm);
1046 * so that a new one can be started 1046 * so that a new one can be started
1047 */ 1047 */
1048 1048
1049void set_task_comm(struct task_struct *tsk, const char *buf) 1049void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
1050{ 1050{
1051 task_lock(tsk); 1051 task_lock(tsk);
1052 trace_task_rename(tsk, buf); 1052 trace_task_rename(tsk, buf);
1053 strlcpy(tsk->comm, buf, sizeof(tsk->comm)); 1053 strlcpy(tsk->comm, buf, sizeof(tsk->comm));
1054 task_unlock(tsk); 1054 task_unlock(tsk);
1055 perf_event_comm(tsk); 1055 perf_event_comm(tsk, exec);
1056} 1056}
1057 1057
1058int flush_old_exec(struct linux_binprm * bprm) 1058int flush_old_exec(struct linux_binprm * bprm)
@@ -1110,7 +1110,8 @@ void setup_new_exec(struct linux_binprm * bprm)
1110 else 1110 else
1111 set_dumpable(current->mm, suid_dumpable); 1111 set_dumpable(current->mm, suid_dumpable);
1112 1112
1113 set_task_comm(current, kbasename(bprm->filename)); 1113 perf_event_exec();
1114 __set_task_comm(current, kbasename(bprm->filename), true);
1114 1115
1115 /* Set the new mm task size. We have to do that late because it may 1116 /* Set the new mm task size. We have to do that late because it may
1116 * depend on TIF_32BIT which is only updated in flush_thread() on 1117 * depend on TIF_32BIT which is only updated in flush_thread() on
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 491c6c078e7f..71bf8e4fb5d4 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -67,17 +67,17 @@ static int exofs_flush(struct file *file, fl_owner_t id)
67 67
68const struct file_operations exofs_file_operations = { 68const struct file_operations exofs_file_operations = {
69 .llseek = generic_file_llseek, 69 .llseek = generic_file_llseek,
70 .read = do_sync_read, 70 .read = new_sync_read,
71 .write = do_sync_write, 71 .write = new_sync_write,
72 .aio_read = generic_file_aio_read, 72 .read_iter = generic_file_read_iter,
73 .aio_write = generic_file_aio_write, 73 .write_iter = generic_file_write_iter,
74 .mmap = generic_file_mmap, 74 .mmap = generic_file_mmap,
75 .open = generic_file_open, 75 .open = generic_file_open,
76 .release = exofs_release_file, 76 .release = exofs_release_file,
77 .fsync = exofs_file_fsync, 77 .fsync = exofs_file_fsync,
78 .flush = exofs_flush, 78 .flush = exofs_flush,
79 .splice_read = generic_file_splice_read, 79 .splice_read = generic_file_splice_read,
80 .splice_write = generic_file_splice_write, 80 .splice_write = iter_file_splice_write,
81}; 81};
82 82
83const struct inode_operations exofs_file_inode_operations = { 83const struct inode_operations exofs_file_inode_operations = {
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index d1c244d67667..3f9cafd73931 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -964,7 +964,7 @@ static void exofs_invalidatepage(struct page *page, unsigned int offset,
964 964
965 /* TODO: Should be easy enough to do proprly */ 965 /* TODO: Should be easy enough to do proprly */
966static ssize_t exofs_direct_IO(int rw, struct kiocb *iocb, 966static ssize_t exofs_direct_IO(int rw, struct kiocb *iocb,
967 const struct iovec *iov, loff_t offset, unsigned long nr_segs) 967 struct iov_iter *iter, loff_t offset)
968{ 968{
969 return 0; 969 return 0;
970} 970}
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 44c36e590765..7c87b22a7228 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -62,10 +62,10 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
62 */ 62 */
63const struct file_operations ext2_file_operations = { 63const struct file_operations ext2_file_operations = {
64 .llseek = generic_file_llseek, 64 .llseek = generic_file_llseek,
65 .read = do_sync_read, 65 .read = new_sync_read,
66 .write = do_sync_write, 66 .write = new_sync_write,
67 .aio_read = generic_file_aio_read, 67 .read_iter = generic_file_read_iter,
68 .aio_write = generic_file_aio_write, 68 .write_iter = generic_file_write_iter,
69 .unlocked_ioctl = ext2_ioctl, 69 .unlocked_ioctl = ext2_ioctl,
70#ifdef CONFIG_COMPAT 70#ifdef CONFIG_COMPAT
71 .compat_ioctl = ext2_compat_ioctl, 71 .compat_ioctl = ext2_compat_ioctl,
@@ -75,7 +75,7 @@ const struct file_operations ext2_file_operations = {
75 .release = ext2_release_file, 75 .release = ext2_release_file,
76 .fsync = ext2_fsync, 76 .fsync = ext2_fsync,
77 .splice_read = generic_file_splice_read, 77 .splice_read = generic_file_splice_read,
78 .splice_write = generic_file_splice_write, 78 .splice_write = iter_file_splice_write,
79}; 79};
80 80
81#ifdef CONFIG_EXT2_FS_XIP 81#ifdef CONFIG_EXT2_FS_XIP
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index b1d2a4675d42..36d35c36311d 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -850,18 +850,18 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
850} 850}
851 851
852static ssize_t 852static ssize_t
853ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 853ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
854 loff_t offset, unsigned long nr_segs) 854 loff_t offset)
855{ 855{
856 struct file *file = iocb->ki_filp; 856 struct file *file = iocb->ki_filp;
857 struct address_space *mapping = file->f_mapping; 857 struct address_space *mapping = file->f_mapping;
858 struct inode *inode = mapping->host; 858 struct inode *inode = mapping->host;
859 size_t count = iov_iter_count(iter);
859 ssize_t ret; 860 ssize_t ret;
860 861
861 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 862 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext2_get_block);
862 ext2_get_block);
863 if (ret < 0 && (rw & WRITE)) 863 if (ret < 0 && (rw & WRITE))
864 ext2_write_failed(mapping, offset + iov_length(iov, nr_segs)); 864 ext2_write_failed(mapping, offset + count);
865 return ret; 865 return ret;
866} 866}
867 867
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index aad05311392a..a062fa1e1b11 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -50,10 +50,10 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
50 50
51const struct file_operations ext3_file_operations = { 51const struct file_operations ext3_file_operations = {
52 .llseek = generic_file_llseek, 52 .llseek = generic_file_llseek,
53 .read = do_sync_read, 53 .read = new_sync_read,
54 .write = do_sync_write, 54 .write = new_sync_write,
55 .aio_read = generic_file_aio_read, 55 .read_iter = generic_file_read_iter,
56 .aio_write = generic_file_aio_write, 56 .write_iter = generic_file_write_iter,
57 .unlocked_ioctl = ext3_ioctl, 57 .unlocked_ioctl = ext3_ioctl,
58#ifdef CONFIG_COMPAT 58#ifdef CONFIG_COMPAT
59 .compat_ioctl = ext3_compat_ioctl, 59 .compat_ioctl = ext3_compat_ioctl,
@@ -63,7 +63,7 @@ const struct file_operations ext3_file_operations = {
63 .release = ext3_release_file, 63 .release = ext3_release_file,
64 .fsync = ext3_sync_file, 64 .fsync = ext3_sync_file,
65 .splice_read = generic_file_splice_read, 65 .splice_read = generic_file_splice_read,
66 .splice_write = generic_file_splice_write, 66 .splice_write = iter_file_splice_write,
67}; 67};
68 68
69const struct inode_operations ext3_file_inode_operations = { 69const struct inode_operations ext3_file_inode_operations = {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 695abe738a24..2c6ccc49ba27 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1821,8 +1821,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
1821 * VFS code falls back into buffered path in that case so we are safe. 1821 * VFS code falls back into buffered path in that case so we are safe.
1822 */ 1822 */
1823static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, 1823static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
1824 const struct iovec *iov, loff_t offset, 1824 struct iov_iter *iter, loff_t offset)
1825 unsigned long nr_segs)
1826{ 1825{
1827 struct file *file = iocb->ki_filp; 1826 struct file *file = iocb->ki_filp;
1828 struct inode *inode = file->f_mapping->host; 1827 struct inode *inode = file->f_mapping->host;
@@ -1830,10 +1829,10 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
1830 handle_t *handle; 1829 handle_t *handle;
1831 ssize_t ret; 1830 ssize_t ret;
1832 int orphan = 0; 1831 int orphan = 0;
1833 size_t count = iov_length(iov, nr_segs); 1832 size_t count = iov_iter_count(iter);
1834 int retries = 0; 1833 int retries = 0;
1835 1834
1836 trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); 1835 trace_ext3_direct_IO_enter(inode, offset, count, rw);
1837 1836
1838 if (rw == WRITE) { 1837 if (rw == WRITE) {
1839 loff_t final_size = offset + count; 1838 loff_t final_size = offset + count;
@@ -1857,15 +1856,14 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
1857 } 1856 }
1858 1857
1859retry: 1858retry:
1860 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 1859 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext3_get_block);
1861 ext3_get_block);
1862 /* 1860 /*
1863 * In case of error extending write may have instantiated a few 1861 * In case of error extending write may have instantiated a few
1864 * blocks outside i_size. Trim these off again. 1862 * blocks outside i_size. Trim these off again.
1865 */ 1863 */
1866 if (unlikely((rw & WRITE) && ret < 0)) { 1864 if (unlikely((rw & WRITE) && ret < 0)) {
1867 loff_t isize = i_size_read(inode); 1865 loff_t isize = i_size_read(inode);
1868 loff_t end = offset + iov_length(iov, nr_segs); 1866 loff_t end = offset + count;
1869 1867
1870 if (end > isize) 1868 if (end > isize)
1871 ext3_truncate_failed_direct_write(inode); 1869 ext3_truncate_failed_direct_write(inode);
@@ -1910,8 +1908,7 @@ retry:
1910 ret = err; 1908 ret = err;
1911 } 1909 }
1912out: 1910out:
1913 trace_ext3_direct_IO_exit(inode, offset, 1911 trace_ext3_direct_IO_exit(inode, offset, count, rw, ret);
1914 iov_length(iov, nr_segs), rw, ret);
1915 return ret; 1912 return ret;
1916} 1913}
1917 1914
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1479e2ae00d2..7cc5a0e23688 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2140,8 +2140,7 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
2140extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, 2140extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
2141 struct ext4_map_blocks *map, int flags); 2141 struct ext4_map_blocks *map, int flags);
2142extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, 2142extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
2143 const struct iovec *iov, loff_t offset, 2143 struct iov_iter *iter, loff_t offset);
2144 unsigned long nr_segs);
2145extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); 2144extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
2146extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); 2145extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
2147extern void ext4_ind_truncate(handle_t *, struct inode *inode); 2146extern void ext4_ind_truncate(handle_t *, struct inode *inode);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 4e8bc284ec0e..8695f70af1ef 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -74,26 +74,22 @@ static void ext4_unwritten_wait(struct inode *inode)
74 * or one thread will zero the other's data, causing corruption. 74 * or one thread will zero the other's data, causing corruption.
75 */ 75 */
76static int 76static int
77ext4_unaligned_aio(struct inode *inode, const struct iovec *iov, 77ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
78 unsigned long nr_segs, loff_t pos)
79{ 78{
80 struct super_block *sb = inode->i_sb; 79 struct super_block *sb = inode->i_sb;
81 int blockmask = sb->s_blocksize - 1; 80 int blockmask = sb->s_blocksize - 1;
82 size_t count = iov_length(iov, nr_segs);
83 loff_t final_size = pos + count;
84 81
85 if (pos >= i_size_read(inode)) 82 if (pos >= i_size_read(inode))
86 return 0; 83 return 0;
87 84
88 if ((pos & blockmask) || (final_size & blockmask)) 85 if ((pos | iov_iter_alignment(from)) & blockmask)
89 return 1; 86 return 1;
90 87
91 return 0; 88 return 0;
92} 89}
93 90
94static ssize_t 91static ssize_t
95ext4_file_write(struct kiocb *iocb, const struct iovec *iov, 92ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
96 unsigned long nr_segs, loff_t pos)
97{ 93{
98 struct file *file = iocb->ki_filp; 94 struct file *file = iocb->ki_filp;
99 struct inode *inode = file_inode(iocb->ki_filp); 95 struct inode *inode = file_inode(iocb->ki_filp);
@@ -101,10 +97,9 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
101 struct blk_plug plug; 97 struct blk_plug plug;
102 int o_direct = file->f_flags & O_DIRECT; 98 int o_direct = file->f_flags & O_DIRECT;
103 int overwrite = 0; 99 int overwrite = 0;
104 size_t length = iov_length(iov, nr_segs); 100 size_t length = iov_iter_count(from);
105 ssize_t ret; 101 ssize_t ret;
106 102 loff_t pos = iocb->ki_pos;
107 BUG_ON(iocb->ki_pos != pos);
108 103
109 /* 104 /*
110 * Unaligned direct AIO must be serialized; see comment above 105 * Unaligned direct AIO must be serialized; see comment above
@@ -114,7 +109,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
114 ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && 109 ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
115 !is_sync_kiocb(iocb) && 110 !is_sync_kiocb(iocb) &&
116 (file->f_flags & O_APPEND || 111 (file->f_flags & O_APPEND ||
117 ext4_unaligned_aio(inode, iov, nr_segs, pos))) { 112 ext4_unaligned_aio(inode, from, pos))) {
118 aio_mutex = ext4_aio_mutex(inode); 113 aio_mutex = ext4_aio_mutex(inode);
119 mutex_lock(aio_mutex); 114 mutex_lock(aio_mutex);
120 ext4_unwritten_wait(inode); 115 ext4_unwritten_wait(inode);
@@ -138,10 +133,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
138 goto errout; 133 goto errout;
139 } 134 }
140 135
141 if (pos + length > sbi->s_bitmap_maxbytes) { 136 if (pos + length > sbi->s_bitmap_maxbytes)
142 nr_segs = iov_shorten((struct iovec *)iov, nr_segs, 137 iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos);
143 sbi->s_bitmap_maxbytes - pos);
144 }
145 } 138 }
146 139
147 if (o_direct) { 140 if (o_direct) {
@@ -179,7 +172,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
179 } 172 }
180 } 173 }
181 174
182 ret = __generic_file_aio_write(iocb, iov, nr_segs); 175 ret = __generic_file_write_iter(iocb, from);
183 mutex_unlock(&inode->i_mutex); 176 mutex_unlock(&inode->i_mutex);
184 177
185 if (ret > 0) { 178 if (ret > 0) {
@@ -594,10 +587,10 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
594 587
595const struct file_operations ext4_file_operations = { 588const struct file_operations ext4_file_operations = {
596 .llseek = ext4_llseek, 589 .llseek = ext4_llseek,
597 .read = do_sync_read, 590 .read = new_sync_read,
598 .write = do_sync_write, 591 .write = new_sync_write,
599 .aio_read = generic_file_aio_read, 592 .read_iter = generic_file_read_iter,
600 .aio_write = ext4_file_write, 593 .write_iter = ext4_file_write_iter,
601 .unlocked_ioctl = ext4_ioctl, 594 .unlocked_ioctl = ext4_ioctl,
602#ifdef CONFIG_COMPAT 595#ifdef CONFIG_COMPAT
603 .compat_ioctl = ext4_compat_ioctl, 596 .compat_ioctl = ext4_compat_ioctl,
@@ -607,7 +600,7 @@ const struct file_operations ext4_file_operations = {
607 .release = ext4_release_file, 600 .release = ext4_release_file,
608 .fsync = ext4_sync_file, 601 .fsync = ext4_sync_file,
609 .splice_read = generic_file_splice_read, 602 .splice_read = generic_file_splice_read,
610 .splice_write = generic_file_splice_write, 603 .splice_write = iter_file_splice_write,
611 .fallocate = ext4_fallocate, 604 .fallocate = ext4_fallocate,
612}; 605};
613 606
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 594009f5f523..8a57e9fcd1b9 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -639,8 +639,7 @@ out:
639 * VFS code falls back into buffered path in that case so we are safe. 639 * VFS code falls back into buffered path in that case so we are safe.
640 */ 640 */
641ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, 641ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
642 const struct iovec *iov, loff_t offset, 642 struct iov_iter *iter, loff_t offset)
643 unsigned long nr_segs)
644{ 643{
645 struct file *file = iocb->ki_filp; 644 struct file *file = iocb->ki_filp;
646 struct inode *inode = file->f_mapping->host; 645 struct inode *inode = file->f_mapping->host;
@@ -648,7 +647,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
648 handle_t *handle; 647 handle_t *handle;
649 ssize_t ret; 648 ssize_t ret;
650 int orphan = 0; 649 int orphan = 0;
651 size_t count = iov_length(iov, nr_segs); 650 size_t count = iov_iter_count(iter);
652 int retries = 0; 651 int retries = 0;
653 652
654 if (rw == WRITE) { 653 if (rw == WRITE) {
@@ -687,18 +686,17 @@ retry:
687 goto locked; 686 goto locked;
688 } 687 }
689 ret = __blockdev_direct_IO(rw, iocb, inode, 688 ret = __blockdev_direct_IO(rw, iocb, inode,
690 inode->i_sb->s_bdev, iov, 689 inode->i_sb->s_bdev, iter, offset,
691 offset, nr_segs,
692 ext4_get_block, NULL, NULL, 0); 690 ext4_get_block, NULL, NULL, 0);
693 inode_dio_done(inode); 691 inode_dio_done(inode);
694 } else { 692 } else {
695locked: 693locked:
696 ret = blockdev_direct_IO(rw, iocb, inode, iov, 694 ret = blockdev_direct_IO(rw, iocb, inode, iter,
697 offset, nr_segs, ext4_get_block); 695 offset, ext4_get_block);
698 696
699 if (unlikely((rw & WRITE) && ret < 0)) { 697 if (unlikely((rw & WRITE) && ret < 0)) {
700 loff_t isize = i_size_read(inode); 698 loff_t isize = i_size_read(inode);
701 loff_t end = offset + iov_length(iov, nr_segs); 699 loff_t end = offset + count;
702 700
703 if (end > isize) 701 if (end > isize)
704 ext4_truncate_failed_write(inode); 702 ext4_truncate_failed_write(inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7fcd68ee9155..8a064734e6eb 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3093,13 +3093,12 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3093 * 3093 *
3094 */ 3094 */
3095static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, 3095static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3096 const struct iovec *iov, loff_t offset, 3096 struct iov_iter *iter, loff_t offset)
3097 unsigned long nr_segs)
3098{ 3097{
3099 struct file *file = iocb->ki_filp; 3098 struct file *file = iocb->ki_filp;
3100 struct inode *inode = file->f_mapping->host; 3099 struct inode *inode = file->f_mapping->host;
3101 ssize_t ret; 3100 ssize_t ret;
3102 size_t count = iov_length(iov, nr_segs); 3101 size_t count = iov_iter_count(iter);
3103 int overwrite = 0; 3102 int overwrite = 0;
3104 get_block_t *get_block_func = NULL; 3103 get_block_t *get_block_func = NULL;
3105 int dio_flags = 0; 3104 int dio_flags = 0;
@@ -3108,7 +3107,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3108 3107
3109 /* Use the old path for reads and writes beyond i_size. */ 3108 /* Use the old path for reads and writes beyond i_size. */
3110 if (rw != WRITE || final_size > inode->i_size) 3109 if (rw != WRITE || final_size > inode->i_size)
3111 return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); 3110 return ext4_ind_direct_IO(rw, iocb, iter, offset);
3112 3111
3113 BUG_ON(iocb->private == NULL); 3112 BUG_ON(iocb->private == NULL);
3114 3113
@@ -3175,8 +3174,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3175 dio_flags = DIO_LOCKING; 3174 dio_flags = DIO_LOCKING;
3176 } 3175 }
3177 ret = __blockdev_direct_IO(rw, iocb, inode, 3176 ret = __blockdev_direct_IO(rw, iocb, inode,
3178 inode->i_sb->s_bdev, iov, 3177 inode->i_sb->s_bdev, iter,
3179 offset, nr_segs, 3178 offset,
3180 get_block_func, 3179 get_block_func,
3181 ext4_end_io_dio, 3180 ext4_end_io_dio,
3182 NULL, 3181 NULL,
@@ -3230,11 +3229,11 @@ retake_lock:
3230} 3229}
3231 3230
3232static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, 3231static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
3233 const struct iovec *iov, loff_t offset, 3232 struct iov_iter *iter, loff_t offset)
3234 unsigned long nr_segs)
3235{ 3233{
3236 struct file *file = iocb->ki_filp; 3234 struct file *file = iocb->ki_filp;
3237 struct inode *inode = file->f_mapping->host; 3235 struct inode *inode = file->f_mapping->host;
3236 size_t count = iov_iter_count(iter);
3238 ssize_t ret; 3237 ssize_t ret;
3239 3238
3240 /* 3239 /*
@@ -3247,13 +3246,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
3247 if (ext4_has_inline_data(inode)) 3246 if (ext4_has_inline_data(inode))
3248 return 0; 3247 return 0;
3249 3248
3250 trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); 3249 trace_ext4_direct_IO_enter(inode, offset, count, rw);
3251 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3250 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3252 ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); 3251 ret = ext4_ext_direct_IO(rw, iocb, iter, offset);
3253 else 3252 else
3254 ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); 3253 ret = ext4_ind_direct_IO(rw, iocb, iter, offset);
3255 trace_ext4_direct_IO_exit(inode, offset, 3254 trace_ext4_direct_IO_exit(inode, offset, count, rw, ret);
3256 iov_length(iov, nr_segs), rw, ret);
3257 return ret; 3255 return ret;
3258} 3256}
3259 3257
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index c1fb6dd10911..0924521306b4 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1017,10 +1017,9 @@ static int f2fs_write_end(struct file *file,
1017} 1017}
1018 1018
1019static int check_direct_IO(struct inode *inode, int rw, 1019static int check_direct_IO(struct inode *inode, int rw,
1020 const struct iovec *iov, loff_t offset, unsigned long nr_segs) 1020 struct iov_iter *iter, loff_t offset)
1021{ 1021{
1022 unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; 1022 unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
1023 int i;
1024 1023
1025 if (rw == READ) 1024 if (rw == READ)
1026 return 0; 1025 return 0;
@@ -1028,14 +1027,14 @@ static int check_direct_IO(struct inode *inode, int rw,
1028 if (offset & blocksize_mask) 1027 if (offset & blocksize_mask)
1029 return -EINVAL; 1028 return -EINVAL;
1030 1029
1031 for (i = 0; i < nr_segs; i++) 1030 if (iov_iter_alignment(iter) & blocksize_mask)
1032 if (iov[i].iov_len & blocksize_mask) 1031 return -EINVAL;
1033 return -EINVAL; 1032
1034 return 0; 1033 return 0;
1035} 1034}
1036 1035
1037static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, 1036static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
1038 const struct iovec *iov, loff_t offset, unsigned long nr_segs) 1037 struct iov_iter *iter, loff_t offset)
1039{ 1038{
1040 struct file *file = iocb->ki_filp; 1039 struct file *file = iocb->ki_filp;
1041 struct inode *inode = file->f_mapping->host; 1040 struct inode *inode = file->f_mapping->host;
@@ -1044,14 +1043,14 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
1044 if (f2fs_has_inline_data(inode)) 1043 if (f2fs_has_inline_data(inode))
1045 return 0; 1044 return 0;
1046 1045
1047 if (check_direct_IO(inode, rw, iov, offset, nr_segs)) 1046 if (check_direct_IO(inode, rw, iter, offset))
1048 return 0; 1047 return 0;
1049 1048
1050 /* clear fsync mark to recover these blocks */ 1049 /* clear fsync mark to recover these blocks */
1051 fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino); 1050 fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
1052 1051
1053 return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 1052 return blockdev_direct_IO(rw, iocb, inode, iter, offset,
1054 get_data_block); 1053 get_data_block);
1055} 1054}
1056 1055
1057static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, 1056static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 9c49c593d8eb..c58e33075719 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -808,10 +808,10 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
808 808
809const struct file_operations f2fs_file_operations = { 809const struct file_operations f2fs_file_operations = {
810 .llseek = f2fs_llseek, 810 .llseek = f2fs_llseek,
811 .read = do_sync_read, 811 .read = new_sync_read,
812 .write = do_sync_write, 812 .write = new_sync_write,
813 .aio_read = generic_file_aio_read, 813 .read_iter = generic_file_read_iter,
814 .aio_write = generic_file_aio_write, 814 .write_iter = generic_file_write_iter,
815 .open = generic_file_open, 815 .open = generic_file_open,
816 .mmap = f2fs_file_mmap, 816 .mmap = f2fs_file_mmap,
817 .fsync = f2fs_sync_file, 817 .fsync = f2fs_sync_file,
@@ -821,5 +821,5 @@ const struct file_operations f2fs_file_operations = {
821 .compat_ioctl = f2fs_compat_ioctl, 821 .compat_ioctl = f2fs_compat_ioctl,
822#endif 822#endif
823 .splice_read = generic_file_splice_read, 823 .splice_read = generic_file_splice_read,
824 .splice_write = generic_file_splice_write, 824 .splice_write = iter_file_splice_write,
825}; 825};
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 9b104f543056..85f79a89e747 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -170,10 +170,10 @@ int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
170 170
171const struct file_operations fat_file_operations = { 171const struct file_operations fat_file_operations = {
172 .llseek = generic_file_llseek, 172 .llseek = generic_file_llseek,
173 .read = do_sync_read, 173 .read = new_sync_read,
174 .write = do_sync_write, 174 .write = new_sync_write,
175 .aio_read = generic_file_aio_read, 175 .read_iter = generic_file_read_iter,
176 .aio_write = generic_file_aio_write, 176 .write_iter = generic_file_write_iter,
177 .mmap = generic_file_mmap, 177 .mmap = generic_file_mmap,
178 .release = fat_file_release, 178 .release = fat_file_release,
179 .unlocked_ioctl = fat_generic_ioctl, 179 .unlocked_ioctl = fat_generic_ioctl,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 9c83594d7fb5..756aead10d96 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -247,12 +247,13 @@ static int fat_write_end(struct file *file, struct address_space *mapping,
247} 247}
248 248
249static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, 249static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
250 const struct iovec *iov, 250 struct iov_iter *iter,
251 loff_t offset, unsigned long nr_segs) 251 loff_t offset)
252{ 252{
253 struct file *file = iocb->ki_filp; 253 struct file *file = iocb->ki_filp;
254 struct address_space *mapping = file->f_mapping; 254 struct address_space *mapping = file->f_mapping;
255 struct inode *inode = mapping->host; 255 struct inode *inode = mapping->host;
256 size_t count = iov_iter_count(iter);
256 ssize_t ret; 257 ssize_t ret;
257 258
258 if (rw == WRITE) { 259 if (rw == WRITE) {
@@ -265,7 +266,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
265 * 266 *
266 * Return 0, and fallback to normal buffered write. 267 * Return 0, and fallback to normal buffered write.
267 */ 268 */
268 loff_t size = offset + iov_length(iov, nr_segs); 269 loff_t size = offset + count;
269 if (MSDOS_I(inode)->mmu_private < size) 270 if (MSDOS_I(inode)->mmu_private < size)
270 return 0; 271 return 0;
271 } 272 }
@@ -274,10 +275,9 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
274 * FAT need to use the DIO_LOCKING for avoiding the race 275 * FAT need to use the DIO_LOCKING for avoiding the race
275 * condition of fat_get_block() and ->truncate(). 276 * condition of fat_get_block() and ->truncate().
276 */ 277 */
277 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 278 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, fat_get_block);
278 fat_get_block);
279 if (ret < 0 && (rw & WRITE)) 279 if (ret < 0 && (rw & WRITE))
280 fat_write_failed(mapping, offset + iov_length(iov, nr_segs)); 280 fat_write_failed(mapping, offset + count);
281 281
282 return ret; 282 return ret;
283} 283}
diff --git a/fs/file.c b/fs/file.c
index 8f294cfac697..66923fe3176e 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -44,15 +44,10 @@ static void *alloc_fdmem(size_t size)
44 return vmalloc(size); 44 return vmalloc(size);
45} 45}
46 46
47static void free_fdmem(void *ptr)
48{
49 is_vmalloc_addr(ptr) ? vfree(ptr) : kfree(ptr);
50}
51
52static void __free_fdtable(struct fdtable *fdt) 47static void __free_fdtable(struct fdtable *fdt)
53{ 48{
54 free_fdmem(fdt->fd); 49 kvfree(fdt->fd);
55 free_fdmem(fdt->open_fds); 50 kvfree(fdt->open_fds);
56 kfree(fdt); 51 kfree(fdt);
57} 52}
58 53
@@ -130,7 +125,7 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
130 return fdt; 125 return fdt;
131 126
132out_arr: 127out_arr:
133 free_fdmem(fdt->fd); 128 kvfree(fdt->fd);
134out_fdt: 129out_fdt:
135 kfree(fdt); 130 kfree(fdt);
136out: 131out:
diff --git a/fs/file_table.c b/fs/file_table.c
index 40bf4660f0a3..385bfd31512a 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -175,6 +175,12 @@ struct file *alloc_file(struct path *path, fmode_t mode,
175 file->f_path = *path; 175 file->f_path = *path;
176 file->f_inode = path->dentry->d_inode; 176 file->f_inode = path->dentry->d_inode;
177 file->f_mapping = path->dentry->d_inode->i_mapping; 177 file->f_mapping = path->dentry->d_inode->i_mapping;
178 if ((mode & FMODE_READ) &&
179 likely(fop->read || fop->aio_read || fop->read_iter))
180 mode |= FMODE_CAN_READ;
181 if ((mode & FMODE_WRITE) &&
182 likely(fop->write || fop->aio_write || fop->write_iter))
183 mode |= FMODE_CAN_WRITE;
178 file->f_mode = mode; 184 file->f_mode = mode;
179 file->f_op = fop; 185 file->f_op = fop;
180 if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) 186 if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 13b691a8a7d2..966ace8b243f 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -94,8 +94,10 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
94 loff_t pos = 0; 94 loff_t pos = 0;
95 struct iovec iov = { .iov_base = buf, .iov_len = count }; 95 struct iovec iov = { .iov_base = buf, .iov_len = count };
96 struct fuse_io_priv io = { .async = 0, .file = file }; 96 struct fuse_io_priv io = { .async = 0, .file = file };
97 struct iov_iter ii;
98 iov_iter_init(&ii, READ, &iov, 1, count);
97 99
98 return fuse_direct_io(&io, &iov, 1, count, &pos, FUSE_DIO_CUSE); 100 return fuse_direct_io(&io, &ii, &pos, FUSE_DIO_CUSE);
99} 101}
100 102
101static ssize_t cuse_write(struct file *file, const char __user *buf, 103static ssize_t cuse_write(struct file *file, const char __user *buf,
@@ -104,12 +106,14 @@ static ssize_t cuse_write(struct file *file, const char __user *buf,
104 loff_t pos = 0; 106 loff_t pos = 0;
105 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; 107 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
106 struct fuse_io_priv io = { .async = 0, .file = file }; 108 struct fuse_io_priv io = { .async = 0, .file = file };
109 struct iov_iter ii;
110 iov_iter_init(&ii, WRITE, &iov, 1, count);
107 111
108 /* 112 /*
109 * No locking or generic_write_checks(), the server is 113 * No locking or generic_write_checks(), the server is
110 * responsible for locking and sanity checks. 114 * responsible for locking and sanity checks.
111 */ 115 */
112 return fuse_direct_io(&io, &iov, 1, count, &pos, 116 return fuse_direct_io(&io, &ii, &pos,
113 FUSE_DIO_WRITE | FUSE_DIO_CUSE); 117 FUSE_DIO_WRITE | FUSE_DIO_CUSE);
114} 118}
115 119
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 903cbc9cd6bd..6e16dad13e9b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -933,8 +933,7 @@ out:
933 return err; 933 return err;
934} 934}
935 935
936static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, 936static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
937 unsigned long nr_segs, loff_t pos)
938{ 937{
939 struct inode *inode = iocb->ki_filp->f_mapping->host; 938 struct inode *inode = iocb->ki_filp->f_mapping->host;
940 struct fuse_conn *fc = get_fuse_conn(inode); 939 struct fuse_conn *fc = get_fuse_conn(inode);
@@ -945,14 +944,14 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
945 * i_size is up to date). 944 * i_size is up to date).
946 */ 945 */
947 if (fc->auto_inval_data || 946 if (fc->auto_inval_data ||
948 (pos + iov_length(iov, nr_segs) > i_size_read(inode))) { 947 (iocb->ki_pos + iov_iter_count(to) > i_size_read(inode))) {
949 int err; 948 int err;
950 err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL); 949 err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL);
951 if (err) 950 if (err)
952 return err; 951 return err;
953 } 952 }
954 953
955 return generic_file_aio_read(iocb, iov, nr_segs, pos); 954 return generic_file_read_iter(iocb, to);
956} 955}
957 956
958static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, 957static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
@@ -1181,19 +1180,17 @@ static ssize_t fuse_perform_write(struct file *file,
1181 return res > 0 ? res : err; 1180 return res > 0 ? res : err;
1182} 1181}
1183 1182
1184static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 1183static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1185 unsigned long nr_segs, loff_t pos)
1186{ 1184{
1187 struct file *file = iocb->ki_filp; 1185 struct file *file = iocb->ki_filp;
1188 struct address_space *mapping = file->f_mapping; 1186 struct address_space *mapping = file->f_mapping;
1189 size_t count = 0; 1187 size_t count = iov_iter_count(from);
1190 size_t ocount = 0;
1191 ssize_t written = 0; 1188 ssize_t written = 0;
1192 ssize_t written_buffered = 0; 1189 ssize_t written_buffered = 0;
1193 struct inode *inode = mapping->host; 1190 struct inode *inode = mapping->host;
1194 ssize_t err; 1191 ssize_t err;
1195 struct iov_iter i;
1196 loff_t endbyte = 0; 1192 loff_t endbyte = 0;
1193 loff_t pos = iocb->ki_pos;
1197 1194
1198 if (get_fuse_conn(inode)->writeback_cache) { 1195 if (get_fuse_conn(inode)->writeback_cache) {
1199 /* Update size (EOF optimization) and mode (SUID clearing) */ 1196 /* Update size (EOF optimization) and mode (SUID clearing) */
@@ -1201,17 +1198,9 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1201 if (err) 1198 if (err)
1202 return err; 1199 return err;
1203 1200
1204 return generic_file_aio_write(iocb, iov, nr_segs, pos); 1201 return generic_file_write_iter(iocb, from);
1205 } 1202 }
1206 1203
1207 WARN_ON(iocb->ki_pos != pos);
1208
1209 ocount = 0;
1210 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
1211 if (err)
1212 return err;
1213
1214 count = ocount;
1215 mutex_lock(&inode->i_mutex); 1204 mutex_lock(&inode->i_mutex);
1216 1205
1217 /* We can write back this queue in page reclaim */ 1206 /* We can write back this queue in page reclaim */
@@ -1224,6 +1213,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1224 if (count == 0) 1213 if (count == 0)
1225 goto out; 1214 goto out;
1226 1215
1216 iov_iter_truncate(from, count);
1227 err = file_remove_suid(file); 1217 err = file_remove_suid(file);
1228 if (err) 1218 if (err)
1229 goto out; 1219 goto out;
@@ -1233,16 +1223,13 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1233 goto out; 1223 goto out;
1234 1224
1235 if (file->f_flags & O_DIRECT) { 1225 if (file->f_flags & O_DIRECT) {
1236 written = generic_file_direct_write(iocb, iov, &nr_segs, pos, 1226 written = generic_file_direct_write(iocb, from, pos);
1237 count, ocount); 1227 if (written < 0 || !iov_iter_count(from))
1238 if (written < 0 || written == count)
1239 goto out; 1228 goto out;
1240 1229
1241 pos += written; 1230 pos += written;
1242 count -= written;
1243 1231
1244 iov_iter_init(&i, iov, nr_segs, count, written); 1232 written_buffered = fuse_perform_write(file, mapping, from, pos);
1245 written_buffered = fuse_perform_write(file, mapping, &i, pos);
1246 if (written_buffered < 0) { 1233 if (written_buffered < 0) {
1247 err = written_buffered; 1234 err = written_buffered;
1248 goto out; 1235 goto out;
@@ -1261,8 +1248,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1261 written += written_buffered; 1248 written += written_buffered;
1262 iocb->ki_pos = pos + written_buffered; 1249 iocb->ki_pos = pos + written_buffered;
1263 } else { 1250 } else {
1264 iov_iter_init(&i, iov, nr_segs, count, 0); 1251 written = fuse_perform_write(file, mapping, from, pos);
1265 written = fuse_perform_write(file, mapping, &i, pos);
1266 if (written >= 0) 1252 if (written >= 0)
1267 iocb->ki_pos = pos + written; 1253 iocb->ki_pos = pos + written;
1268 } 1254 }
@@ -1300,7 +1286,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
1300 size_t nbytes = 0; /* # bytes already packed in req */ 1286 size_t nbytes = 0; /* # bytes already packed in req */
1301 1287
1302 /* Special case for kernel I/O: can copy directly into the buffer */ 1288 /* Special case for kernel I/O: can copy directly into the buffer */
1303 if (segment_eq(get_fs(), KERNEL_DS)) { 1289 if (ii->type & ITER_KVEC) {
1304 unsigned long user_addr = fuse_get_user_addr(ii); 1290 unsigned long user_addr = fuse_get_user_addr(ii);
1305 size_t frag_size = fuse_get_frag_size(ii, *nbytesp); 1291 size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
1306 1292
@@ -1316,35 +1302,26 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
1316 1302
1317 while (nbytes < *nbytesp && req->num_pages < req->max_pages) { 1303 while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
1318 unsigned npages; 1304 unsigned npages;
1319 unsigned long user_addr = fuse_get_user_addr(ii); 1305 size_t start;
1320 unsigned offset = user_addr & ~PAGE_MASK;
1321 size_t frag_size = fuse_get_frag_size(ii, *nbytesp - nbytes);
1322 int ret;
1323
1324 unsigned n = req->max_pages - req->num_pages; 1306 unsigned n = req->max_pages - req->num_pages;
1325 frag_size = min_t(size_t, frag_size, n << PAGE_SHIFT); 1307 ssize_t ret = iov_iter_get_pages(ii,
1326 1308 &req->pages[req->num_pages],
1327 npages = (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; 1309 n * PAGE_SIZE, &start);
1328 npages = clamp(npages, 1U, n);
1329
1330 ret = get_user_pages_fast(user_addr, npages, !write,
1331 &req->pages[req->num_pages]);
1332 if (ret < 0) 1310 if (ret < 0)
1333 return ret; 1311 return ret;
1334 1312
1335 npages = ret; 1313 iov_iter_advance(ii, ret);
1336 frag_size = min_t(size_t, frag_size, 1314 nbytes += ret;
1337 (npages << PAGE_SHIFT) - offset); 1315
1338 iov_iter_advance(ii, frag_size); 1316 ret += start;
1317 npages = (ret + PAGE_SIZE - 1) / PAGE_SIZE;
1339 1318
1340 req->page_descs[req->num_pages].offset = offset; 1319 req->page_descs[req->num_pages].offset = start;
1341 fuse_page_descs_length_init(req, req->num_pages, npages); 1320 fuse_page_descs_length_init(req, req->num_pages, npages);
1342 1321
1343 req->num_pages += npages; 1322 req->num_pages += npages;
1344 req->page_descs[req->num_pages - 1].length -= 1323 req->page_descs[req->num_pages - 1].length -=
1345 (npages << PAGE_SHIFT) - offset - frag_size; 1324 (PAGE_SIZE - ret) & (PAGE_SIZE - 1);
1346
1347 nbytes += frag_size;
1348 } 1325 }
1349 1326
1350 if (write) 1327 if (write)
@@ -1359,24 +1336,11 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
1359 1336
1360static inline int fuse_iter_npages(const struct iov_iter *ii_p) 1337static inline int fuse_iter_npages(const struct iov_iter *ii_p)
1361{ 1338{
1362 struct iov_iter ii = *ii_p; 1339 return iov_iter_npages(ii_p, FUSE_MAX_PAGES_PER_REQ);
1363 int npages = 0;
1364
1365 while (iov_iter_count(&ii) && npages < FUSE_MAX_PAGES_PER_REQ) {
1366 unsigned long user_addr = fuse_get_user_addr(&ii);
1367 unsigned offset = user_addr & ~PAGE_MASK;
1368 size_t frag_size = iov_iter_single_seg_count(&ii);
1369
1370 npages += (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1371 iov_iter_advance(&ii, frag_size);
1372 }
1373
1374 return min(npages, FUSE_MAX_PAGES_PER_REQ);
1375} 1340}
1376 1341
1377ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, 1342ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
1378 unsigned long nr_segs, size_t count, loff_t *ppos, 1343 loff_t *ppos, int flags)
1379 int flags)
1380{ 1344{
1381 int write = flags & FUSE_DIO_WRITE; 1345 int write = flags & FUSE_DIO_WRITE;
1382 int cuse = flags & FUSE_DIO_CUSE; 1346 int cuse = flags & FUSE_DIO_CUSE;
@@ -1386,18 +1350,16 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
1386 struct fuse_conn *fc = ff->fc; 1350 struct fuse_conn *fc = ff->fc;
1387 size_t nmax = write ? fc->max_write : fc->max_read; 1351 size_t nmax = write ? fc->max_write : fc->max_read;
1388 loff_t pos = *ppos; 1352 loff_t pos = *ppos;
1353 size_t count = iov_iter_count(iter);
1389 pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT; 1354 pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT;
1390 pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT; 1355 pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT;
1391 ssize_t res = 0; 1356 ssize_t res = 0;
1392 struct fuse_req *req; 1357 struct fuse_req *req;
1393 struct iov_iter ii;
1394
1395 iov_iter_init(&ii, iov, nr_segs, count, 0);
1396 1358
1397 if (io->async) 1359 if (io->async)
1398 req = fuse_get_req_for_background(fc, fuse_iter_npages(&ii)); 1360 req = fuse_get_req_for_background(fc, fuse_iter_npages(iter));
1399 else 1361 else
1400 req = fuse_get_req(fc, fuse_iter_npages(&ii)); 1362 req = fuse_get_req(fc, fuse_iter_npages(iter));
1401 if (IS_ERR(req)) 1363 if (IS_ERR(req))
1402 return PTR_ERR(req); 1364 return PTR_ERR(req);
1403 1365
@@ -1413,7 +1375,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
1413 size_t nres; 1375 size_t nres;
1414 fl_owner_t owner = current->files; 1376 fl_owner_t owner = current->files;
1415 size_t nbytes = min(count, nmax); 1377 size_t nbytes = min(count, nmax);
1416 int err = fuse_get_user_pages(req, &ii, &nbytes, write); 1378 int err = fuse_get_user_pages(req, iter, &nbytes, write);
1417 if (err) { 1379 if (err) {
1418 res = err; 1380 res = err;
1419 break; 1381 break;
@@ -1443,9 +1405,9 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
1443 fuse_put_request(fc, req); 1405 fuse_put_request(fc, req);
1444 if (io->async) 1406 if (io->async)
1445 req = fuse_get_req_for_background(fc, 1407 req = fuse_get_req_for_background(fc,
1446 fuse_iter_npages(&ii)); 1408 fuse_iter_npages(iter));
1447 else 1409 else
1448 req = fuse_get_req(fc, fuse_iter_npages(&ii)); 1410 req = fuse_get_req(fc, fuse_iter_npages(iter));
1449 if (IS_ERR(req)) 1411 if (IS_ERR(req))
1450 break; 1412 break;
1451 } 1413 }
@@ -1460,9 +1422,8 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
1460EXPORT_SYMBOL_GPL(fuse_direct_io); 1422EXPORT_SYMBOL_GPL(fuse_direct_io);
1461 1423
1462static ssize_t __fuse_direct_read(struct fuse_io_priv *io, 1424static ssize_t __fuse_direct_read(struct fuse_io_priv *io,
1463 const struct iovec *iov, 1425 struct iov_iter *iter,
1464 unsigned long nr_segs, loff_t *ppos, 1426 loff_t *ppos)
1465 size_t count)
1466{ 1427{
1467 ssize_t res; 1428 ssize_t res;
1468 struct file *file = io->file; 1429 struct file *file = io->file;
@@ -1471,7 +1432,7 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io,
1471 if (is_bad_inode(inode)) 1432 if (is_bad_inode(inode))
1472 return -EIO; 1433 return -EIO;
1473 1434
1474 res = fuse_direct_io(io, iov, nr_segs, count, ppos, 0); 1435 res = fuse_direct_io(io, iter, ppos, 0);
1475 1436
1476 fuse_invalidate_attr(inode); 1437 fuse_invalidate_attr(inode);
1477 1438
@@ -1483,22 +1444,26 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf,
1483{ 1444{
1484 struct fuse_io_priv io = { .async = 0, .file = file }; 1445 struct fuse_io_priv io = { .async = 0, .file = file };
1485 struct iovec iov = { .iov_base = buf, .iov_len = count }; 1446 struct iovec iov = { .iov_base = buf, .iov_len = count };
1486 return __fuse_direct_read(&io, &iov, 1, ppos, count); 1447 struct iov_iter ii;
1448 iov_iter_init(&ii, READ, &iov, 1, count);
1449 return __fuse_direct_read(&io, &ii, ppos);
1487} 1450}
1488 1451
1489static ssize_t __fuse_direct_write(struct fuse_io_priv *io, 1452static ssize_t __fuse_direct_write(struct fuse_io_priv *io,
1490 const struct iovec *iov, 1453 struct iov_iter *iter,
1491 unsigned long nr_segs, loff_t *ppos) 1454 loff_t *ppos)
1492{ 1455{
1493 struct file *file = io->file; 1456 struct file *file = io->file;
1494 struct inode *inode = file_inode(file); 1457 struct inode *inode = file_inode(file);
1495 size_t count = iov_length(iov, nr_segs); 1458 size_t count = iov_iter_count(iter);
1496 ssize_t res; 1459 ssize_t res;
1497 1460
1461
1498 res = generic_write_checks(file, ppos, &count, 0); 1462 res = generic_write_checks(file, ppos, &count, 0);
1499 if (!res) 1463 if (!res) {
1500 res = fuse_direct_io(io, iov, nr_segs, count, ppos, 1464 iov_iter_truncate(iter, count);
1501 FUSE_DIO_WRITE); 1465 res = fuse_direct_io(io, iter, ppos, FUSE_DIO_WRITE);
1466 }
1502 1467
1503 fuse_invalidate_attr(inode); 1468 fuse_invalidate_attr(inode);
1504 1469
@@ -1512,13 +1477,15 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
1512 struct inode *inode = file_inode(file); 1477 struct inode *inode = file_inode(file);
1513 ssize_t res; 1478 ssize_t res;
1514 struct fuse_io_priv io = { .async = 0, .file = file }; 1479 struct fuse_io_priv io = { .async = 0, .file = file };
1480 struct iov_iter ii;
1481 iov_iter_init(&ii, WRITE, &iov, 1, count);
1515 1482
1516 if (is_bad_inode(inode)) 1483 if (is_bad_inode(inode))
1517 return -EIO; 1484 return -EIO;
1518 1485
1519 /* Don't allow parallel writes to the same file */ 1486 /* Don't allow parallel writes to the same file */
1520 mutex_lock(&inode->i_mutex); 1487 mutex_lock(&inode->i_mutex);
1521 res = __fuse_direct_write(&io, &iov, 1, ppos); 1488 res = __fuse_direct_write(&io, &ii, ppos);
1522 if (res > 0) 1489 if (res > 0)
1523 fuse_write_update_size(inode, *ppos); 1490 fuse_write_update_size(inode, *ppos);
1524 mutex_unlock(&inode->i_mutex); 1491 mutex_unlock(&inode->i_mutex);
@@ -2372,7 +2339,7 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
2372 if (!bytes) 2339 if (!bytes)
2373 return 0; 2340 return 0;
2374 2341
2375 iov_iter_init(&ii, iov, nr_segs, bytes, 0); 2342 iov_iter_init(&ii, to_user ? READ : WRITE, iov, nr_segs, bytes);
2376 2343
2377 while (iov_iter_count(&ii)) { 2344 while (iov_iter_count(&ii)) {
2378 struct page *page = pages[page_idx++]; 2345 struct page *page = pages[page_idx++];
@@ -2894,8 +2861,8 @@ static inline loff_t fuse_round_up(loff_t off)
2894} 2861}
2895 2862
2896static ssize_t 2863static ssize_t
2897fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 2864fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
2898 loff_t offset, unsigned long nr_segs) 2865 loff_t offset)
2899{ 2866{
2900 ssize_t ret = 0; 2867 ssize_t ret = 0;
2901 struct file *file = iocb->ki_filp; 2868 struct file *file = iocb->ki_filp;
@@ -2904,7 +2871,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2904 loff_t pos = 0; 2871 loff_t pos = 0;
2905 struct inode *inode; 2872 struct inode *inode;
2906 loff_t i_size; 2873 loff_t i_size;
2907 size_t count = iov_length(iov, nr_segs); 2874 size_t count = iov_iter_count(iter);
2908 struct fuse_io_priv *io; 2875 struct fuse_io_priv *io;
2909 2876
2910 pos = offset; 2877 pos = offset;
@@ -2919,6 +2886,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2919 if (offset >= i_size) 2886 if (offset >= i_size)
2920 return 0; 2887 return 0;
2921 count = min_t(loff_t, count, fuse_round_up(i_size - offset)); 2888 count = min_t(loff_t, count, fuse_round_up(i_size - offset));
2889 iov_iter_truncate(iter, count);
2922 } 2890 }
2923 2891
2924 io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); 2892 io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL);
@@ -2948,9 +2916,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2948 io->async = false; 2916 io->async = false;
2949 2917
2950 if (rw == WRITE) 2918 if (rw == WRITE)
2951 ret = __fuse_direct_write(io, iov, nr_segs, &pos); 2919 ret = __fuse_direct_write(io, iter, &pos);
2952 else 2920 else
2953 ret = __fuse_direct_read(io, iov, nr_segs, &pos, count); 2921 ret = __fuse_direct_read(io, iter, &pos);
2954 2922
2955 if (io->async) { 2923 if (io->async) {
2956 fuse_aio_complete(io, ret < 0 ? ret : 0, -1); 2924 fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
@@ -3061,10 +3029,10 @@ out:
3061 3029
3062static const struct file_operations fuse_file_operations = { 3030static const struct file_operations fuse_file_operations = {
3063 .llseek = fuse_file_llseek, 3031 .llseek = fuse_file_llseek,
3064 .read = do_sync_read, 3032 .read = new_sync_read,
3065 .aio_read = fuse_file_aio_read, 3033 .read_iter = fuse_file_read_iter,
3066 .write = do_sync_write, 3034 .write = new_sync_write,
3067 .aio_write = fuse_file_aio_write, 3035 .write_iter = fuse_file_write_iter,
3068 .mmap = fuse_file_mmap, 3036 .mmap = fuse_file_mmap,
3069 .open = fuse_open, 3037 .open = fuse_open,
3070 .flush = fuse_flush, 3038 .flush = fuse_flush,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 7aa5c75e0de1..e8e47a6ab518 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -880,9 +880,8 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
880/** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */ 880/** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */
881#define FUSE_DIO_CUSE (1 << 1) 881#define FUSE_DIO_CUSE (1 << 1)
882 882
883ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, 883ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
884 unsigned long nr_segs, size_t count, loff_t *ppos, 884 loff_t *ppos, int flags);
885 int flags);
886long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, 885long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
887 unsigned int flags); 886 unsigned int flags);
888long fuse_ioctl_common(struct file *file, unsigned int cmd, 887long fuse_ioctl_common(struct file *file, unsigned int cmd,
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 492123cda64a..805b37fed638 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1040,8 +1040,7 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
1040 1040
1041 1041
1042static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, 1042static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
1043 const struct iovec *iov, loff_t offset, 1043 struct iov_iter *iter, loff_t offset)
1044 unsigned long nr_segs)
1045{ 1044{
1046 struct file *file = iocb->ki_filp; 1045 struct file *file = iocb->ki_filp;
1047 struct inode *inode = file->f_mapping->host; 1046 struct inode *inode = file->f_mapping->host;
@@ -1081,7 +1080,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
1081 */ 1080 */
1082 if (mapping->nrpages) { 1081 if (mapping->nrpages) {
1083 loff_t lstart = offset & (PAGE_CACHE_SIZE - 1); 1082 loff_t lstart = offset & (PAGE_CACHE_SIZE - 1);
1084 loff_t len = iov_length(iov, nr_segs); 1083 loff_t len = iov_iter_count(iter);
1085 loff_t end = PAGE_ALIGN(offset + len) - 1; 1084 loff_t end = PAGE_ALIGN(offset + len) - 1;
1086 1085
1087 rv = 0; 1086 rv = 0;
@@ -1096,9 +1095,9 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
1096 truncate_inode_pages_range(mapping, lstart, end); 1095 truncate_inode_pages_range(mapping, lstart, end);
1097 } 1096 }
1098 1097
1099 rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 1098 rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
1100 offset, nr_segs, gfs2_get_block_direct, 1099 iter, offset,
1101 NULL, NULL, 0); 1100 gfs2_get_block_direct, NULL, NULL, 0);
1102out: 1101out:
1103 gfs2_glock_dq(&gh); 1102 gfs2_glock_dq(&gh);
1104 gfs2_holder_uninit(&gh); 1103 gfs2_holder_uninit(&gh);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 6ab0cfb2e891..4fc3a3046174 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -684,7 +684,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
684} 684}
685 685
686/** 686/**
687 * gfs2_file_aio_write - Perform a write to a file 687 * gfs2_file_write_iter - Perform a write to a file
688 * @iocb: The io context 688 * @iocb: The io context
689 * @iov: The data to write 689 * @iov: The data to write
690 * @nr_segs: Number of @iov segments 690 * @nr_segs: Number of @iov segments
@@ -697,11 +697,9 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
697 * 697 *
698 */ 698 */
699 699
700static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 700static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
701 unsigned long nr_segs, loff_t pos)
702{ 701{
703 struct file *file = iocb->ki_filp; 702 struct file *file = iocb->ki_filp;
704 size_t writesize = iov_length(iov, nr_segs);
705 struct gfs2_inode *ip = GFS2_I(file_inode(file)); 703 struct gfs2_inode *ip = GFS2_I(file_inode(file));
706 int ret; 704 int ret;
707 705
@@ -709,7 +707,7 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
709 if (ret) 707 if (ret)
710 return ret; 708 return ret;
711 709
712 gfs2_size_hint(file, pos, writesize); 710 gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from));
713 711
714 if (file->f_flags & O_APPEND) { 712 if (file->f_flags & O_APPEND) {
715 struct gfs2_holder gh; 713 struct gfs2_holder gh;
@@ -720,7 +718,7 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
720 gfs2_glock_dq_uninit(&gh); 718 gfs2_glock_dq_uninit(&gh);
721 } 719 }
722 720
723 return generic_file_aio_write(iocb, iov, nr_segs, pos); 721 return generic_file_write_iter(iocb, from);
724} 722}
725 723
726static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, 724static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
@@ -1058,10 +1056,10 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
1058 1056
1059const struct file_operations gfs2_file_fops = { 1057const struct file_operations gfs2_file_fops = {
1060 .llseek = gfs2_llseek, 1058 .llseek = gfs2_llseek,
1061 .read = do_sync_read, 1059 .read = new_sync_read,
1062 .aio_read = generic_file_aio_read, 1060 .read_iter = generic_file_read_iter,
1063 .write = do_sync_write, 1061 .write = new_sync_write,
1064 .aio_write = gfs2_file_aio_write, 1062 .write_iter = gfs2_file_write_iter,
1065 .unlocked_ioctl = gfs2_ioctl, 1063 .unlocked_ioctl = gfs2_ioctl,
1066 .mmap = gfs2_mmap, 1064 .mmap = gfs2_mmap,
1067 .open = gfs2_open, 1065 .open = gfs2_open,
@@ -1070,7 +1068,7 @@ const struct file_operations gfs2_file_fops = {
1070 .lock = gfs2_lock, 1068 .lock = gfs2_lock,
1071 .flock = gfs2_flock, 1069 .flock = gfs2_flock,
1072 .splice_read = generic_file_splice_read, 1070 .splice_read = generic_file_splice_read,
1073 .splice_write = generic_file_splice_write, 1071 .splice_write = iter_file_splice_write,
1074 .setlease = gfs2_setlease, 1072 .setlease = gfs2_setlease,
1075 .fallocate = gfs2_fallocate, 1073 .fallocate = gfs2_fallocate,
1076}; 1074};
@@ -1090,17 +1088,17 @@ const struct file_operations gfs2_dir_fops = {
1090 1088
1091const struct file_operations gfs2_file_fops_nolock = { 1089const struct file_operations gfs2_file_fops_nolock = {
1092 .llseek = gfs2_llseek, 1090 .llseek = gfs2_llseek,
1093 .read = do_sync_read, 1091 .read = new_sync_read,
1094 .aio_read = generic_file_aio_read, 1092 .read_iter = generic_file_read_iter,
1095 .write = do_sync_write, 1093 .write = new_sync_write,
1096 .aio_write = gfs2_file_aio_write, 1094 .write_iter = gfs2_file_write_iter,
1097 .unlocked_ioctl = gfs2_ioctl, 1095 .unlocked_ioctl = gfs2_ioctl,
1098 .mmap = gfs2_mmap, 1096 .mmap = gfs2_mmap,
1099 .open = gfs2_open, 1097 .open = gfs2_open,
1100 .release = gfs2_release, 1098 .release = gfs2_release,
1101 .fsync = gfs2_fsync, 1099 .fsync = gfs2_fsync,
1102 .splice_read = generic_file_splice_read, 1100 .splice_read = generic_file_splice_read,
1103 .splice_write = generic_file_splice_write, 1101 .splice_write = iter_file_splice_write,
1104 .setlease = generic_setlease, 1102 .setlease = generic_setlease,
1105 .fallocate = gfs2_fallocate, 1103 .fallocate = gfs2_fallocate,
1106}; 1104};
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 9e2fecd62f62..d0929bc81782 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -125,15 +125,15 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
125} 125}
126 126
127static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, 127static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
128 const struct iovec *iov, loff_t offset, unsigned long nr_segs) 128 struct iov_iter *iter, loff_t offset)
129{ 129{
130 struct file *file = iocb->ki_filp; 130 struct file *file = iocb->ki_filp;
131 struct address_space *mapping = file->f_mapping; 131 struct address_space *mapping = file->f_mapping;
132 struct inode *inode = file_inode(file)->i_mapping->host; 132 struct inode *inode = file_inode(file)->i_mapping->host;
133 size_t count = iov_iter_count(iter);
133 ssize_t ret; 134 ssize_t ret;
134 135
135 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 136 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, hfs_get_block);
136 hfs_get_block);
137 137
138 /* 138 /*
139 * In case of error extending write may have instantiated a few 139 * In case of error extending write may have instantiated a few
@@ -141,7 +141,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
141 */ 141 */
142 if (unlikely((rw & WRITE) && ret < 0)) { 142 if (unlikely((rw & WRITE) && ret < 0)) {
143 loff_t isize = i_size_read(inode); 143 loff_t isize = i_size_read(inode);
144 loff_t end = offset + iov_length(iov, nr_segs); 144 loff_t end = offset + count;
145 145
146 if (end > isize) 146 if (end > isize)
147 hfs_write_failed(mapping, end); 147 hfs_write_failed(mapping, end);
@@ -674,10 +674,10 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
674 674
675static const struct file_operations hfs_file_operations = { 675static const struct file_operations hfs_file_operations = {
676 .llseek = generic_file_llseek, 676 .llseek = generic_file_llseek,
677 .read = do_sync_read, 677 .read = new_sync_read,
678 .aio_read = generic_file_aio_read, 678 .read_iter = generic_file_read_iter,
679 .write = do_sync_write, 679 .write = new_sync_write,
680 .aio_write = generic_file_aio_write, 680 .write_iter = generic_file_write_iter,
681 .mmap = generic_file_mmap, 681 .mmap = generic_file_mmap,
682 .splice_read = generic_file_splice_read, 682 .splice_read = generic_file_splice_read,
683 .fsync = hfs_file_fsync, 683 .fsync = hfs_file_fsync,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index a4f45bd88a63..0cf786f2d046 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -123,14 +123,15 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
123} 123}
124 124
125static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, 125static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
126 const struct iovec *iov, loff_t offset, unsigned long nr_segs) 126 struct iov_iter *iter, loff_t offset)
127{ 127{
128 struct file *file = iocb->ki_filp; 128 struct file *file = iocb->ki_filp;
129 struct address_space *mapping = file->f_mapping; 129 struct address_space *mapping = file->f_mapping;
130 struct inode *inode = file_inode(file)->i_mapping->host; 130 struct inode *inode = file_inode(file)->i_mapping->host;
131 size_t count = iov_iter_count(iter);
131 ssize_t ret; 132 ssize_t ret;
132 133
133 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 134 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset,
134 hfsplus_get_block); 135 hfsplus_get_block);
135 136
136 /* 137 /*
@@ -139,7 +140,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
139 */ 140 */
140 if (unlikely((rw & WRITE) && ret < 0)) { 141 if (unlikely((rw & WRITE) && ret < 0)) {
141 loff_t isize = i_size_read(inode); 142 loff_t isize = i_size_read(inode);
142 loff_t end = offset + iov_length(iov, nr_segs); 143 loff_t end = offset + count;
143 144
144 if (end > isize) 145 if (end > isize)
145 hfsplus_write_failed(mapping, end); 146 hfsplus_write_failed(mapping, end);
@@ -340,10 +341,10 @@ static const struct inode_operations hfsplus_file_inode_operations = {
340 341
341static const struct file_operations hfsplus_file_operations = { 342static const struct file_operations hfsplus_file_operations = {
342 .llseek = generic_file_llseek, 343 .llseek = generic_file_llseek,
343 .read = do_sync_read, 344 .read = new_sync_read,
344 .aio_read = generic_file_aio_read, 345 .read_iter = generic_file_read_iter,
345 .write = do_sync_write, 346 .write = new_sync_write,
346 .aio_write = generic_file_aio_write, 347 .write_iter = generic_file_write_iter,
347 .mmap = generic_file_mmap, 348 .mmap = generic_file_mmap,
348 .splice_read = generic_file_splice_read, 349 .splice_read = generic_file_splice_read,
349 .fsync = hfsplus_file_fsync, 350 .fsync = hfsplus_file_fsync,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 9c470fde9878..bb529f3b7f2b 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -378,11 +378,11 @@ static int hostfs_fsync(struct file *file, loff_t start, loff_t end,
378 378
379static const struct file_operations hostfs_file_fops = { 379static const struct file_operations hostfs_file_fops = {
380 .llseek = generic_file_llseek, 380 .llseek = generic_file_llseek,
381 .read = do_sync_read, 381 .read = new_sync_read,
382 .splice_read = generic_file_splice_read, 382 .splice_read = generic_file_splice_read,
383 .aio_read = generic_file_aio_read, 383 .read_iter = generic_file_read_iter,
384 .aio_write = generic_file_aio_write, 384 .write_iter = generic_file_write_iter,
385 .write = do_sync_write, 385 .write = new_sync_write,
386 .mmap = generic_file_mmap, 386 .mmap = generic_file_mmap,
387 .open = hostfs_file_open, 387 .open = hostfs_file_open,
388 .release = hostfs_file_release, 388 .release = hostfs_file_release,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 67c1a61e0955..7f54e5f76cec 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -197,10 +197,10 @@ const struct address_space_operations hpfs_aops = {
197const struct file_operations hpfs_file_ops = 197const struct file_operations hpfs_file_ops =
198{ 198{
199 .llseek = generic_file_llseek, 199 .llseek = generic_file_llseek,
200 .read = do_sync_read, 200 .read = new_sync_read,
201 .aio_read = generic_file_aio_read, 201 .read_iter = generic_file_read_iter,
202 .write = do_sync_write, 202 .write = new_sync_write,
203 .aio_write = generic_file_aio_write, 203 .write_iter = generic_file_write_iter,
204 .mmap = generic_file_mmap, 204 .mmap = generic_file_mmap,
205 .release = hpfs_file_release, 205 .release = hpfs_file_release,
206 .fsync = hpfs_file_fsync, 206 .fsync = hpfs_file_fsync,
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 256cd19a3b78..64989ca9ba90 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -51,10 +51,10 @@ const struct file_operations jffs2_file_operations =
51{ 51{
52 .llseek = generic_file_llseek, 52 .llseek = generic_file_llseek,
53 .open = generic_file_open, 53 .open = generic_file_open,
54 .read = do_sync_read, 54 .read = new_sync_read,
55 .aio_read = generic_file_aio_read, 55 .read_iter = generic_file_read_iter,
56 .write = do_sync_write, 56 .write = new_sync_write,
57 .aio_write = generic_file_aio_write, 57 .write_iter = generic_file_write_iter,
58 .unlocked_ioctl=jffs2_ioctl, 58 .unlocked_ioctl=jffs2_ioctl,
59 .mmap = generic_file_readonly_mmap, 59 .mmap = generic_file_readonly_mmap,
60 .fsync = jffs2_fsync, 60 .fsync = jffs2_fsync,
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 794da944d5cd..33aa0cc1f8b8 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -151,13 +151,13 @@ const struct inode_operations jfs_file_inode_operations = {
151const struct file_operations jfs_file_operations = { 151const struct file_operations jfs_file_operations = {
152 .open = jfs_open, 152 .open = jfs_open,
153 .llseek = generic_file_llseek, 153 .llseek = generic_file_llseek,
154 .write = do_sync_write, 154 .write = new_sync_write,
155 .read = do_sync_read, 155 .read = new_sync_read,
156 .aio_read = generic_file_aio_read, 156 .read_iter = generic_file_read_iter,
157 .aio_write = generic_file_aio_write, 157 .write_iter = generic_file_write_iter,
158 .mmap = generic_file_mmap, 158 .mmap = generic_file_mmap,
159 .splice_read = generic_file_splice_read, 159 .splice_read = generic_file_splice_read,
160 .splice_write = generic_file_splice_write, 160 .splice_write = iter_file_splice_write,
161 .fsync = jfs_fsync, 161 .fsync = jfs_fsync,
162 .release = jfs_release, 162 .release = jfs_release,
163 .unlocked_ioctl = jfs_ioctl, 163 .unlocked_ioctl = jfs_ioctl,
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 6f8fe72c2a7a..bd3df1ca3c9b 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -331,15 +331,15 @@ static sector_t jfs_bmap(struct address_space *mapping, sector_t block)
331} 331}
332 332
333static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, 333static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
334 const struct iovec *iov, loff_t offset, unsigned long nr_segs) 334 struct iov_iter *iter, loff_t offset)
335{ 335{
336 struct file *file = iocb->ki_filp; 336 struct file *file = iocb->ki_filp;
337 struct address_space *mapping = file->f_mapping; 337 struct address_space *mapping = file->f_mapping;
338 struct inode *inode = file->f_mapping->host; 338 struct inode *inode = file->f_mapping->host;
339 size_t count = iov_iter_count(iter);
339 ssize_t ret; 340 ssize_t ret;
340 341
341 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 342 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, jfs_get_block);
342 jfs_get_block);
343 343
344 /* 344 /*
345 * In case of error extending write may have instantiated a few 345 * In case of error extending write may have instantiated a few
@@ -347,7 +347,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
347 */ 347 */
348 if (unlikely((rw & WRITE) && ret < 0)) { 348 if (unlikely((rw & WRITE) && ret < 0)) {
349 loff_t isize = i_size_read(inode); 349 loff_t isize = i_size_read(inode);
350 loff_t end = offset + iov_length(iov, nr_segs); 350 loff_t end = offset + count;
351 351
352 if (end > isize) 352 if (end > isize)
353 jfs_write_failed(mapping, end); 353 jfs_write_failed(mapping, end);
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 57914fc32b62..8538752df2f6 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -264,15 +264,15 @@ const struct inode_operations logfs_reg_iops = {
264}; 264};
265 265
266const struct file_operations logfs_reg_fops = { 266const struct file_operations logfs_reg_fops = {
267 .aio_read = generic_file_aio_read, 267 .read_iter = generic_file_read_iter,
268 .aio_write = generic_file_aio_write, 268 .write_iter = generic_file_write_iter,
269 .fsync = logfs_fsync, 269 .fsync = logfs_fsync,
270 .unlocked_ioctl = logfs_ioctl, 270 .unlocked_ioctl = logfs_ioctl,
271 .llseek = generic_file_llseek, 271 .llseek = generic_file_llseek,
272 .mmap = generic_file_readonly_mmap, 272 .mmap = generic_file_readonly_mmap,
273 .open = generic_file_open, 273 .open = generic_file_open,
274 .read = do_sync_read, 274 .read = new_sync_read,
275 .write = do_sync_write, 275 .write = new_sync_write,
276}; 276};
277 277
278const struct address_space_operations logfs_reg_aops = { 278const struct address_space_operations logfs_reg_aops = {
diff --git a/fs/minix/file.c b/fs/minix/file.c
index adc6f5494231..a967de085ac0 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -14,10 +14,10 @@
14 */ 14 */
15const struct file_operations minix_file_operations = { 15const struct file_operations minix_file_operations = {
16 .llseek = generic_file_llseek, 16 .llseek = generic_file_llseek,
17 .read = do_sync_read, 17 .read = new_sync_read,
18 .aio_read = generic_file_aio_read, 18 .read_iter = generic_file_read_iter,
19 .write = do_sync_write, 19 .write = new_sync_write,
20 .aio_write = generic_file_aio_write, 20 .write_iter = generic_file_write_iter,
21 .mmap = generic_file_mmap, 21 .mmap = generic_file_mmap,
22 .fsync = generic_file_fsync, 22 .fsync = generic_file_fsync,
23 .splice_read = generic_file_splice_read, 23 .splice_read = generic_file_splice_read,
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4ad7bc388679..8f98138cbc43 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -212,20 +212,20 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
212 * shunt off direct read and write requests before the VFS gets them, 212 * shunt off direct read and write requests before the VFS gets them,
213 * so this method is only ever called for swap. 213 * so this method is only ever called for swap.
214 */ 214 */
215ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) 215ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
216{ 216{
217#ifndef CONFIG_NFS_SWAP 217#ifndef CONFIG_NFS_SWAP
218 dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n", 218 dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n",
219 iocb->ki_filp, (long long) pos, nr_segs); 219 iocb->ki_filp, (long long) pos, iter->nr_segs);
220 220
221 return -EINVAL; 221 return -EINVAL;
222#else 222#else
223 VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); 223 VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
224 224
225 if (rw == READ || rw == KERNEL_READ) 225 if (rw == READ || rw == KERNEL_READ)
226 return nfs_file_direct_read(iocb, iov, nr_segs, pos, 226 return nfs_file_direct_read(iocb, iter, pos,
227 rw == READ ? true : false); 227 rw == READ ? true : false);
228 return nfs_file_direct_write(iocb, iov, nr_segs, pos, 228 return nfs_file_direct_write(iocb, iter, pos,
229 rw == WRITE ? true : false); 229 rw == WRITE ? true : false);
230#endif /* CONFIG_NFS_SWAP */ 230#endif /* CONFIG_NFS_SWAP */
231} 231}
@@ -414,60 +414,37 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
414 * handled automatically by nfs_direct_read_result(). Otherwise, if 414 * handled automatically by nfs_direct_read_result(). Otherwise, if
415 * no requests have been sent, just return an error. 415 * no requests have been sent, just return an error.
416 */ 416 */
417static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
418 const struct iovec *iov,
419 loff_t pos, bool uio)
420{
421 struct nfs_direct_req *dreq = desc->pg_dreq;
422 struct nfs_open_context *ctx = dreq->ctx;
423 struct inode *inode = ctx->dentry->d_inode;
424 unsigned long user_addr = (unsigned long)iov->iov_base;
425 size_t count = iov->iov_len;
426 size_t rsize = NFS_SERVER(inode)->rsize;
427 unsigned int pgbase;
428 int result;
429 ssize_t started = 0;
430 struct page **pagevec = NULL;
431 unsigned int npages;
432
433 do {
434 size_t bytes;
435 int i;
436 417
437 pgbase = user_addr & ~PAGE_MASK; 418static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
438 bytes = min(max_t(size_t, rsize, PAGE_SIZE), count); 419 struct iov_iter *iter,
420 loff_t pos)
421{
422 struct nfs_pageio_descriptor desc;
423 struct inode *inode = dreq->inode;
424 ssize_t result = -EINVAL;
425 size_t requested_bytes = 0;
426 size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE);
439 427
440 result = -ENOMEM; 428 nfs_pageio_init_read(&desc, dreq->inode, false,
441 npages = nfs_page_array_len(pgbase, bytes); 429 &nfs_direct_read_completion_ops);
442 if (!pagevec) 430 get_dreq(dreq);
443 pagevec = kmalloc(npages * sizeof(struct page *), 431 desc.pg_dreq = dreq;
444 GFP_KERNEL); 432 atomic_inc(&inode->i_dio_count);
445 if (!pagevec)
446 break;
447 if (uio) {
448 down_read(&current->mm->mmap_sem);
449 result = get_user_pages(current, current->mm, user_addr,
450 npages, 1, 0, pagevec, NULL);
451 up_read(&current->mm->mmap_sem);
452 if (result < 0)
453 break;
454 } else {
455 WARN_ON(npages != 1);
456 result = get_kernel_page(user_addr, 1, pagevec);
457 if (WARN_ON(result != 1))
458 break;
459 }
460 433
461 if ((unsigned)result < npages) { 434 while (iov_iter_count(iter)) {
462 bytes = result * PAGE_SIZE; 435 struct page **pagevec;
463 if (bytes <= pgbase) { 436 size_t bytes;
464 nfs_direct_release_pages(pagevec, result); 437 size_t pgbase;
465 break; 438 unsigned npages, i;
466 }
467 bytes -= pgbase;
468 npages = result;
469 }
470 439
440 result = iov_iter_get_pages_alloc(iter, &pagevec,
441 rsize, &pgbase);
442 if (result < 0)
443 break;
444
445 bytes = result;
446 iov_iter_advance(iter, bytes);
447 npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
471 for (i = 0; i < npages; i++) { 448 for (i = 0; i < npages; i++) {
472 struct nfs_page *req; 449 struct nfs_page *req;
473 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); 450 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
@@ -480,56 +457,21 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
480 } 457 }
481 req->wb_index = pos >> PAGE_SHIFT; 458 req->wb_index = pos >> PAGE_SHIFT;
482 req->wb_offset = pos & ~PAGE_MASK; 459 req->wb_offset = pos & ~PAGE_MASK;
483 if (!nfs_pageio_add_request(desc, req)) { 460 if (!nfs_pageio_add_request(&desc, req)) {
484 result = desc->pg_error; 461 result = desc.pg_error;
485 nfs_release_request(req); 462 nfs_release_request(req);
486 break; 463 break;
487 } 464 }
488 pgbase = 0; 465 pgbase = 0;
489 bytes -= req_len; 466 bytes -= req_len;
490 started += req_len; 467 requested_bytes += req_len;
491 user_addr += req_len;
492 pos += req_len; 468 pos += req_len;
493 count -= req_len;
494 dreq->bytes_left -= req_len; 469 dreq->bytes_left -= req_len;
495 } 470 }
496 /* The nfs_page now hold references to these pages */
497 nfs_direct_release_pages(pagevec, npages); 471 nfs_direct_release_pages(pagevec, npages);
498 } while (count != 0 && result >= 0); 472 kvfree(pagevec);
499
500 kfree(pagevec);
501
502 if (started)
503 return started;
504 return result < 0 ? (ssize_t) result : -EFAULT;
505}
506
507static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
508 const struct iovec *iov,
509 unsigned long nr_segs,
510 loff_t pos, bool uio)
511{
512 struct nfs_pageio_descriptor desc;
513 struct inode *inode = dreq->inode;
514 ssize_t result = -EINVAL;
515 size_t requested_bytes = 0;
516 unsigned long seg;
517
518 nfs_pageio_init_read(&desc, dreq->inode, false,
519 &nfs_direct_read_completion_ops);
520 get_dreq(dreq);
521 desc.pg_dreq = dreq;
522 atomic_inc(&inode->i_dio_count);
523
524 for (seg = 0; seg < nr_segs; seg++) {
525 const struct iovec *vec = &iov[seg];
526 result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
527 if (result < 0) 473 if (result < 0)
528 break; 474 break;
529 requested_bytes += result;
530 if ((size_t)result < vec->iov_len)
531 break;
532 pos += vec->iov_len;
533 } 475 }
534 476
535 nfs_pageio_complete(&desc); 477 nfs_pageio_complete(&desc);
@@ -552,8 +494,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
552/** 494/**
553 * nfs_file_direct_read - file direct read operation for NFS files 495 * nfs_file_direct_read - file direct read operation for NFS files
554 * @iocb: target I/O control block 496 * @iocb: target I/O control block
555 * @iov: vector of user buffers into which to read data 497 * @iter: vector of user buffers into which to read data
556 * @nr_segs: size of iov vector
557 * @pos: byte offset in file where reading starts 498 * @pos: byte offset in file where reading starts
558 * 499 *
559 * We use this function for direct reads instead of calling 500 * We use this function for direct reads instead of calling
@@ -570,8 +511,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
570 * client must read the updated atime from the server back into its 511 * client must read the updated atime from the server back into its
571 * cache. 512 * cache.
572 */ 513 */
573ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, 514ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
574 unsigned long nr_segs, loff_t pos, bool uio) 515 loff_t pos, bool uio)
575{ 516{
576 struct file *file = iocb->ki_filp; 517 struct file *file = iocb->ki_filp;
577 struct address_space *mapping = file->f_mapping; 518 struct address_space *mapping = file->f_mapping;
@@ -579,9 +520,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
579 struct nfs_direct_req *dreq; 520 struct nfs_direct_req *dreq;
580 struct nfs_lock_context *l_ctx; 521 struct nfs_lock_context *l_ctx;
581 ssize_t result = -EINVAL; 522 ssize_t result = -EINVAL;
582 size_t count; 523 size_t count = iov_iter_count(iter);
583
584 count = iov_length(iov, nr_segs);
585 nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); 524 nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
586 525
587 dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n", 526 dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
@@ -604,7 +543,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
604 goto out_unlock; 543 goto out_unlock;
605 544
606 dreq->inode = inode; 545 dreq->inode = inode;
607 dreq->bytes_left = iov_length(iov, nr_segs); 546 dreq->bytes_left = count;
608 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 547 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
609 l_ctx = nfs_get_lock_context(dreq->ctx); 548 l_ctx = nfs_get_lock_context(dreq->ctx);
610 if (IS_ERR(l_ctx)) { 549 if (IS_ERR(l_ctx)) {
@@ -615,8 +554,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
615 if (!is_sync_kiocb(iocb)) 554 if (!is_sync_kiocb(iocb))
616 dreq->iocb = iocb; 555 dreq->iocb = iocb;
617 556
618 NFS_I(inode)->read_io += iov_length(iov, nr_segs); 557 NFS_I(inode)->read_io += count;
619 result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); 558 result = nfs_direct_read_schedule_iovec(dreq, iter, pos);
620 559
621 mutex_unlock(&inode->i_mutex); 560 mutex_unlock(&inode->i_mutex);
622 561
@@ -772,108 +711,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
772} 711}
773#endif 712#endif
774 713
775/*
776 * NB: Return the value of the first error return code. Subsequent
777 * errors after the first one are ignored.
778 */
779/*
780 * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
781 * operation. If nfs_writedata_alloc() or get_user_pages() fails,
782 * bail and stop sending more writes. Write length accounting is
783 * handled automatically by nfs_direct_write_result(). Otherwise, if
784 * no requests have been sent, just return an error.
785 */
786static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
787 const struct iovec *iov,
788 loff_t pos, bool uio)
789{
790 struct nfs_direct_req *dreq = desc->pg_dreq;
791 struct nfs_open_context *ctx = dreq->ctx;
792 struct inode *inode = ctx->dentry->d_inode;
793 unsigned long user_addr = (unsigned long)iov->iov_base;
794 size_t count = iov->iov_len;
795 size_t wsize = NFS_SERVER(inode)->wsize;
796 unsigned int pgbase;
797 int result;
798 ssize_t started = 0;
799 struct page **pagevec = NULL;
800 unsigned int npages;
801
802 do {
803 size_t bytes;
804 int i;
805
806 pgbase = user_addr & ~PAGE_MASK;
807 bytes = min(max_t(size_t, wsize, PAGE_SIZE), count);
808
809 result = -ENOMEM;
810 npages = nfs_page_array_len(pgbase, bytes);
811 if (!pagevec)
812 pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
813 if (!pagevec)
814 break;
815
816 if (uio) {
817 down_read(&current->mm->mmap_sem);
818 result = get_user_pages(current, current->mm, user_addr,
819 npages, 0, 0, pagevec, NULL);
820 up_read(&current->mm->mmap_sem);
821 if (result < 0)
822 break;
823 } else {
824 WARN_ON(npages != 1);
825 result = get_kernel_page(user_addr, 0, pagevec);
826 if (WARN_ON(result != 1))
827 break;
828 }
829
830 if ((unsigned)result < npages) {
831 bytes = result * PAGE_SIZE;
832 if (bytes <= pgbase) {
833 nfs_direct_release_pages(pagevec, result);
834 break;
835 }
836 bytes -= pgbase;
837 npages = result;
838 }
839
840 for (i = 0; i < npages; i++) {
841 struct nfs_page *req;
842 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
843
844 req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
845 pgbase, req_len);
846 if (IS_ERR(req)) {
847 result = PTR_ERR(req);
848 break;
849 }
850 nfs_lock_request(req);
851 req->wb_index = pos >> PAGE_SHIFT;
852 req->wb_offset = pos & ~PAGE_MASK;
853 if (!nfs_pageio_add_request(desc, req)) {
854 result = desc->pg_error;
855 nfs_unlock_and_release_request(req);
856 break;
857 }
858 pgbase = 0;
859 bytes -= req_len;
860 started += req_len;
861 user_addr += req_len;
862 pos += req_len;
863 count -= req_len;
864 dreq->bytes_left -= req_len;
865 }
866 /* The nfs_page now hold references to these pages */
867 nfs_direct_release_pages(pagevec, npages);
868 } while (count != 0 && result >= 0);
869
870 kfree(pagevec);
871
872 if (started)
873 return started;
874 return result < 0 ? (ssize_t) result : -EFAULT;
875}
876
877static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) 714static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
878{ 715{
879 struct nfs_direct_req *dreq = hdr->dreq; 716 struct nfs_direct_req *dreq = hdr->dreq;
@@ -956,16 +793,27 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
956 .completion = nfs_direct_write_completion, 793 .completion = nfs_direct_write_completion,
957}; 794};
958 795
796
797/*
798 * NB: Return the value of the first error return code. Subsequent
799 * errors after the first one are ignored.
800 */
801/*
802 * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
803 * operation. If nfs_writedata_alloc() or get_user_pages() fails,
804 * bail and stop sending more writes. Write length accounting is
805 * handled automatically by nfs_direct_write_result(). Otherwise, if
806 * no requests have been sent, just return an error.
807 */
959static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, 808static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
960 const struct iovec *iov, 809 struct iov_iter *iter,
961 unsigned long nr_segs, 810 loff_t pos)
962 loff_t pos, bool uio)
963{ 811{
964 struct nfs_pageio_descriptor desc; 812 struct nfs_pageio_descriptor desc;
965 struct inode *inode = dreq->inode; 813 struct inode *inode = dreq->inode;
966 ssize_t result = 0; 814 ssize_t result = 0;
967 size_t requested_bytes = 0; 815 size_t requested_bytes = 0;
968 unsigned long seg; 816 size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
969 817
970 nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, 818 nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false,
971 &nfs_direct_write_completion_ops); 819 &nfs_direct_write_completion_ops);
@@ -973,16 +821,49 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
973 get_dreq(dreq); 821 get_dreq(dreq);
974 atomic_inc(&inode->i_dio_count); 822 atomic_inc(&inode->i_dio_count);
975 823
976 NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs); 824 NFS_I(inode)->write_io += iov_iter_count(iter);
977 for (seg = 0; seg < nr_segs; seg++) { 825 while (iov_iter_count(iter)) {
978 const struct iovec *vec = &iov[seg]; 826 struct page **pagevec;
979 result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); 827 size_t bytes;
828 size_t pgbase;
829 unsigned npages, i;
830
831 result = iov_iter_get_pages_alloc(iter, &pagevec,
832 wsize, &pgbase);
980 if (result < 0) 833 if (result < 0)
981 break; 834 break;
982 requested_bytes += result; 835
983 if ((size_t)result < vec->iov_len) 836 bytes = result;
837 iov_iter_advance(iter, bytes);
838 npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
839 for (i = 0; i < npages; i++) {
840 struct nfs_page *req;
841 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
842
843 req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
844 pgbase, req_len);
845 if (IS_ERR(req)) {
846 result = PTR_ERR(req);
847 break;
848 }
849 nfs_lock_request(req);
850 req->wb_index = pos >> PAGE_SHIFT;
851 req->wb_offset = pos & ~PAGE_MASK;
852 if (!nfs_pageio_add_request(&desc, req)) {
853 result = desc.pg_error;
854 nfs_unlock_and_release_request(req);
855 break;
856 }
857 pgbase = 0;
858 bytes -= req_len;
859 requested_bytes += req_len;
860 pos += req_len;
861 dreq->bytes_left -= req_len;
862 }
863 nfs_direct_release_pages(pagevec, npages);
864 kvfree(pagevec);
865 if (result < 0)
984 break; 866 break;
985 pos += vec->iov_len;
986 } 867 }
987 nfs_pageio_complete(&desc); 868 nfs_pageio_complete(&desc);
988 869
@@ -1004,8 +885,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
1004/** 885/**
1005 * nfs_file_direct_write - file direct write operation for NFS files 886 * nfs_file_direct_write - file direct write operation for NFS files
1006 * @iocb: target I/O control block 887 * @iocb: target I/O control block
1007 * @iov: vector of user buffers from which to write data 888 * @iter: vector of user buffers from which to write data
1008 * @nr_segs: size of iov vector
1009 * @pos: byte offset in file where writing starts 889 * @pos: byte offset in file where writing starts
1010 * 890 *
1011 * We use this function for direct writes instead of calling 891 * We use this function for direct writes instead of calling
@@ -1023,8 +903,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
1023 * Note that O_APPEND is not supported for NFS direct writes, as there 903 * Note that O_APPEND is not supported for NFS direct writes, as there
1024 * is no atomic O_APPEND write facility in the NFS protocol. 904 * is no atomic O_APPEND write facility in the NFS protocol.
1025 */ 905 */
1026ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, 906ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
1027 unsigned long nr_segs, loff_t pos, bool uio) 907 loff_t pos, bool uio)
1028{ 908{
1029 ssize_t result = -EINVAL; 909 ssize_t result = -EINVAL;
1030 struct file *file = iocb->ki_filp; 910 struct file *file = iocb->ki_filp;
@@ -1033,9 +913,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
1033 struct nfs_direct_req *dreq; 913 struct nfs_direct_req *dreq;
1034 struct nfs_lock_context *l_ctx; 914 struct nfs_lock_context *l_ctx;
1035 loff_t end; 915 loff_t end;
1036 size_t count; 916 size_t count = iov_iter_count(iter);
1037
1038 count = iov_length(iov, nr_segs);
1039 end = (pos + count - 1) >> PAGE_CACHE_SHIFT; 917 end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
1040 918
1041 nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); 919 nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
@@ -1086,7 +964,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
1086 if (!is_sync_kiocb(iocb)) 964 if (!is_sync_kiocb(iocb))
1087 dreq->iocb = iocb; 965 dreq->iocb = iocb;
1088 966
1089 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio); 967 result = nfs_direct_write_schedule_iovec(dreq, iter, pos);
1090 968
1091 if (mapping->nrpages) { 969 if (mapping->nrpages) {
1092 invalidate_inode_pages2_range(mapping, 970 invalidate_inode_pages2_range(mapping,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index c1edf7336315..4042ff58fe3f 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -165,22 +165,21 @@ nfs_file_flush(struct file *file, fl_owner_t id)
165EXPORT_SYMBOL_GPL(nfs_file_flush); 165EXPORT_SYMBOL_GPL(nfs_file_flush);
166 166
167ssize_t 167ssize_t
168nfs_file_read(struct kiocb *iocb, const struct iovec *iov, 168nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
169 unsigned long nr_segs, loff_t pos)
170{ 169{
171 struct inode *inode = file_inode(iocb->ki_filp); 170 struct inode *inode = file_inode(iocb->ki_filp);
172 ssize_t result; 171 ssize_t result;
173 172
174 if (iocb->ki_filp->f_flags & O_DIRECT) 173 if (iocb->ki_filp->f_flags & O_DIRECT)
175 return nfs_file_direct_read(iocb, iov, nr_segs, pos, true); 174 return nfs_file_direct_read(iocb, to, iocb->ki_pos, true);
176 175
177 dprintk("NFS: read(%pD2, %lu@%lu)\n", 176 dprintk("NFS: read(%pD2, %zu@%lu)\n",
178 iocb->ki_filp, 177 iocb->ki_filp,
179 (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos); 178 iov_iter_count(to), (unsigned long) iocb->ki_pos);
180 179
181 result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); 180 result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
182 if (!result) { 181 if (!result) {
183 result = generic_file_aio_read(iocb, iov, nr_segs, pos); 182 result = generic_file_read_iter(iocb, to);
184 if (result > 0) 183 if (result > 0)
185 nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result); 184 nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
186 } 185 }
@@ -635,24 +634,24 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode)
635 return 0; 634 return 0;
636} 635}
637 636
638ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, 637ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
639 unsigned long nr_segs, loff_t pos)
640{ 638{
641 struct file *file = iocb->ki_filp; 639 struct file *file = iocb->ki_filp;
642 struct inode *inode = file_inode(file); 640 struct inode *inode = file_inode(file);
643 unsigned long written = 0; 641 unsigned long written = 0;
644 ssize_t result; 642 ssize_t result;
645 size_t count = iov_length(iov, nr_segs); 643 size_t count = iov_iter_count(from);
644 loff_t pos = iocb->ki_pos;
646 645
647 result = nfs_key_timeout_notify(file, inode); 646 result = nfs_key_timeout_notify(file, inode);
648 if (result) 647 if (result)
649 return result; 648 return result;
650 649
651 if (file->f_flags & O_DIRECT) 650 if (file->f_flags & O_DIRECT)
652 return nfs_file_direct_write(iocb, iov, nr_segs, pos, true); 651 return nfs_file_direct_write(iocb, from, pos, true);
653 652
654 dprintk("NFS: write(%pD2, %lu@%Ld)\n", 653 dprintk("NFS: write(%pD2, %zu@%Ld)\n",
655 file, (unsigned long) count, (long long) pos); 654 file, count, (long long) pos);
656 655
657 result = -EBUSY; 656 result = -EBUSY;
658 if (IS_SWAPFILE(inode)) 657 if (IS_SWAPFILE(inode))
@@ -670,7 +669,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
670 if (!count) 669 if (!count)
671 goto out; 670 goto out;
672 671
673 result = generic_file_aio_write(iocb, iov, nr_segs, pos); 672 result = generic_file_write_iter(iocb, from);
674 if (result > 0) 673 if (result > 0)
675 written = result; 674 written = result;
676 675
@@ -691,36 +690,6 @@ out_swapfile:
691} 690}
692EXPORT_SYMBOL_GPL(nfs_file_write); 691EXPORT_SYMBOL_GPL(nfs_file_write);
693 692
694ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
695 struct file *filp, loff_t *ppos,
696 size_t count, unsigned int flags)
697{
698 struct inode *inode = file_inode(filp);
699 unsigned long written = 0;
700 ssize_t ret;
701
702 dprintk("NFS splice_write(%pD2, %lu@%llu)\n",
703 filp, (unsigned long) count, (unsigned long long) *ppos);
704
705 /*
706 * The combination of splice and an O_APPEND destination is disallowed.
707 */
708
709 ret = generic_file_splice_write(pipe, filp, ppos, count, flags);
710 if (ret > 0)
711 written = ret;
712
713 if (ret >= 0 && nfs_need_sync_write(filp, inode)) {
714 int err = vfs_fsync(filp, 0);
715 if (err < 0)
716 ret = err;
717 }
718 if (ret > 0)
719 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
720 return ret;
721}
722EXPORT_SYMBOL_GPL(nfs_file_splice_write);
723
724static int 693static int
725do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) 694do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
726{ 695{
@@ -935,10 +904,10 @@ EXPORT_SYMBOL_GPL(nfs_setlease);
935 904
936const struct file_operations nfs_file_operations = { 905const struct file_operations nfs_file_operations = {
937 .llseek = nfs_file_llseek, 906 .llseek = nfs_file_llseek,
938 .read = do_sync_read, 907 .read = new_sync_read,
939 .write = do_sync_write, 908 .write = new_sync_write,
940 .aio_read = nfs_file_read, 909 .read_iter = nfs_file_read,
941 .aio_write = nfs_file_write, 910 .write_iter = nfs_file_write,
942 .mmap = nfs_file_mmap, 911 .mmap = nfs_file_mmap,
943 .open = nfs_file_open, 912 .open = nfs_file_open,
944 .flush = nfs_file_flush, 913 .flush = nfs_file_flush,
@@ -947,7 +916,7 @@ const struct file_operations nfs_file_operations = {
947 .lock = nfs_lock, 916 .lock = nfs_lock,
948 .flock = nfs_flock, 917 .flock = nfs_flock,
949 .splice_read = nfs_file_splice_read, 918 .splice_read = nfs_file_splice_read,
950 .splice_write = nfs_file_splice_write, 919 .splice_write = iter_file_splice_write,
951 .check_flags = nfs_check_flags, 920 .check_flags = nfs_check_flags,
952 .setlease = nfs_setlease, 921 .setlease = nfs_setlease,
953}; 922};
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 8b69cba1bb04..82ddbf46660e 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -327,16 +327,14 @@ int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *)
327int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int); 327int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int);
328loff_t nfs_file_llseek(struct file *, loff_t, int); 328loff_t nfs_file_llseek(struct file *, loff_t, int);
329int nfs_file_flush(struct file *, fl_owner_t); 329int nfs_file_flush(struct file *, fl_owner_t);
330ssize_t nfs_file_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); 330ssize_t nfs_file_read(struct kiocb *, struct iov_iter *);
331ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, 331ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *,
332 size_t, unsigned int); 332 size_t, unsigned int);
333int nfs_file_mmap(struct file *, struct vm_area_struct *); 333int nfs_file_mmap(struct file *, struct vm_area_struct *);
334ssize_t nfs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); 334ssize_t nfs_file_write(struct kiocb *, struct iov_iter *);
335int nfs_file_release(struct inode *, struct file *); 335int nfs_file_release(struct inode *, struct file *);
336int nfs_lock(struct file *, int, struct file_lock *); 336int nfs_lock(struct file *, int, struct file_lock *);
337int nfs_flock(struct file *, int, struct file_lock *); 337int nfs_flock(struct file *, int, struct file_lock *);
338ssize_t nfs_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *,
339 size_t, unsigned int);
340int nfs_check_flags(int); 338int nfs_check_flags(int);
341int nfs_setlease(struct file *, long, struct file_lock **); 339int nfs_setlease(struct file *, long, struct file_lock **);
342 340
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 464db9dd6318..a816f0627a6c 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -117,10 +117,10 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
117 117
118const struct file_operations nfs4_file_operations = { 118const struct file_operations nfs4_file_operations = {
119 .llseek = nfs_file_llseek, 119 .llseek = nfs_file_llseek,
120 .read = do_sync_read, 120 .read = new_sync_read,
121 .write = do_sync_write, 121 .write = new_sync_write,
122 .aio_read = nfs_file_read, 122 .read_iter = nfs_file_read,
123 .aio_write = nfs_file_write, 123 .write_iter = nfs_file_write,
124 .mmap = nfs_file_mmap, 124 .mmap = nfs_file_mmap,
125 .open = nfs4_file_open, 125 .open = nfs4_file_open,
126 .flush = nfs_file_flush, 126 .flush = nfs_file_flush,
@@ -129,7 +129,7 @@ const struct file_operations nfs4_file_operations = {
129 .lock = nfs_lock, 129 .lock = nfs_lock,
130 .flock = nfs_flock, 130 .flock = nfs_flock,
131 .splice_read = nfs_file_splice_read, 131 .splice_read = nfs_file_splice_read,
132 .splice_write = nfs_file_splice_write, 132 .splice_write = iter_file_splice_write,
133 .check_flags = nfs_check_flags, 133 .check_flags = nfs_check_flags,
134 .setlease = nfs_setlease, 134 .setlease = nfs_setlease,
135}; 135};
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index f3a82fbcae02..24978153c0c4 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -152,10 +152,10 @@ static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
152 */ 152 */
153const struct file_operations nilfs_file_operations = { 153const struct file_operations nilfs_file_operations = {
154 .llseek = generic_file_llseek, 154 .llseek = generic_file_llseek,
155 .read = do_sync_read, 155 .read = new_sync_read,
156 .write = do_sync_write, 156 .write = new_sync_write,
157 .aio_read = generic_file_aio_read, 157 .read_iter = generic_file_read_iter,
158 .aio_write = generic_file_aio_write, 158 .write_iter = generic_file_write_iter,
159 .unlocked_ioctl = nilfs_ioctl, 159 .unlocked_ioctl = nilfs_ioctl,
160#ifdef CONFIG_COMPAT 160#ifdef CONFIG_COMPAT
161 .compat_ioctl = nilfs_compat_ioctl, 161 .compat_ioctl = nilfs_compat_ioctl,
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index b9c5726120e3..6252b173a465 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -298,19 +298,20 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping,
298} 298}
299 299
300static ssize_t 300static ssize_t
301nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 301nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
302 loff_t offset, unsigned long nr_segs) 302 loff_t offset)
303{ 303{
304 struct file *file = iocb->ki_filp; 304 struct file *file = iocb->ki_filp;
305 struct address_space *mapping = file->f_mapping; 305 struct address_space *mapping = file->f_mapping;
306 struct inode *inode = file->f_mapping->host; 306 struct inode *inode = file->f_mapping->host;
307 size_t count = iov_iter_count(iter);
307 ssize_t size; 308 ssize_t size;
308 309
309 if (rw == WRITE) 310 if (rw == WRITE)
310 return 0; 311 return 0;
311 312
312 /* Needs synchronization with the cleaner */ 313 /* Needs synchronization with the cleaner */
313 size = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 314 size = blockdev_direct_IO(rw, iocb, inode, iter, offset,
314 nilfs_get_block); 315 nilfs_get_block);
315 316
316 /* 317 /*
@@ -319,7 +320,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
319 */ 320 */
320 if (unlikely((rw & WRITE) && size < 0)) { 321 if (unlikely((rw & WRITE) && size < 0)) {
321 loff_t isize = i_size_read(inode); 322 loff_t isize = i_size_read(inode);
322 loff_t end = offset + iov_length(iov, nr_segs); 323 loff_t end = offset + count;
323 324
324 if (end > isize) 325 if (end > isize)
325 nilfs_write_failed(mapping, end); 326 nilfs_write_failed(mapping, end);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 86ddab916b66..5c9e2c81cb11 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2090,10 +2090,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
2090 size_t count; /* after file limit checks */ 2090 size_t count; /* after file limit checks */
2091 ssize_t written, err; 2091 ssize_t written, err;
2092 2092
2093 count = 0; 2093 count = iov_length(iov, nr_segs);
2094 err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
2095 if (err)
2096 return err;
2097 pos = *ppos; 2094 pos = *ppos;
2098 /* We can write back this queue in page reclaim. */ 2095 /* We can write back this queue in page reclaim. */
2099 current->backing_dev_info = mapping->backing_dev_info; 2096 current->backing_dev_info = mapping->backing_dev_info;
@@ -2202,8 +2199,8 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
2202 2199
2203const struct file_operations ntfs_file_ops = { 2200const struct file_operations ntfs_file_ops = {
2204 .llseek = generic_file_llseek, /* Seek inside file. */ 2201 .llseek = generic_file_llseek, /* Seek inside file. */
2205 .read = do_sync_read, /* Read from file. */ 2202 .read = new_sync_read, /* Read from file. */
2206 .aio_read = generic_file_aio_read, /* Async read from file. */ 2203 .read_iter = generic_file_read_iter, /* Async read from file. */
2207#ifdef NTFS_RW 2204#ifdef NTFS_RW
2208 .write = do_sync_write, /* Write to file. */ 2205 .write = do_sync_write, /* Write to file. */
2209 .aio_write = ntfs_file_aio_write, /* Async write to file. */ 2206 .aio_write = ntfs_file_aio_write, /* Async write to file. */
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index d310d12a9adc..4a231a166cf8 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -599,9 +599,8 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait)
599 599
600static ssize_t ocfs2_direct_IO(int rw, 600static ssize_t ocfs2_direct_IO(int rw,
601 struct kiocb *iocb, 601 struct kiocb *iocb,
602 const struct iovec *iov, 602 struct iov_iter *iter,
603 loff_t offset, 603 loff_t offset)
604 unsigned long nr_segs)
605{ 604{
606 struct file *file = iocb->ki_filp; 605 struct file *file = iocb->ki_filp;
607 struct inode *inode = file_inode(file)->i_mapping->host; 606 struct inode *inode = file_inode(file)->i_mapping->host;
@@ -618,7 +617,7 @@ static ssize_t ocfs2_direct_IO(int rw,
618 return 0; 617 return 0;
619 618
620 return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, 619 return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
621 iov, offset, nr_segs, 620 iter, offset,
622 ocfs2_direct_IO_get_blocks, 621 ocfs2_direct_IO_get_blocks,
623 ocfs2_dio_end_io, NULL, 0); 622 ocfs2_dio_end_io, NULL, 0);
624} 623}
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8eb6e5732d3b..2930e231f3f9 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2233,16 +2233,13 @@ out:
2233 return ret; 2233 return ret;
2234} 2234}
2235 2235
2236static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, 2236static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
2237 const struct iovec *iov, 2237 struct iov_iter *from)
2238 unsigned long nr_segs,
2239 loff_t pos)
2240{ 2238{
2241 int ret, direct_io, appending, rw_level, have_alloc_sem = 0; 2239 int ret, direct_io, appending, rw_level, have_alloc_sem = 0;
2242 int can_do_direct, has_refcount = 0; 2240 int can_do_direct, has_refcount = 0;
2243 ssize_t written = 0; 2241 ssize_t written = 0;
2244 size_t ocount; /* original count */ 2242 size_t count = iov_iter_count(from);
2245 size_t count; /* after file limit checks */
2246 loff_t old_size, *ppos = &iocb->ki_pos; 2243 loff_t old_size, *ppos = &iocb->ki_pos;
2247 u32 old_clusters; 2244 u32 old_clusters;
2248 struct file *file = iocb->ki_filp; 2245 struct file *file = iocb->ki_filp;
@@ -2256,7 +2253,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2256 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2253 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2257 file->f_path.dentry->d_name.len, 2254 file->f_path.dentry->d_name.len,
2258 file->f_path.dentry->d_name.name, 2255 file->f_path.dentry->d_name.name,
2259 (unsigned int)nr_segs); 2256 (unsigned int)from->nr_segs); /* GRRRRR */
2260 2257
2261 if (iocb->ki_nbytes == 0) 2258 if (iocb->ki_nbytes == 0)
2262 return 0; 2259 return 0;
@@ -2354,29 +2351,21 @@ relock:
2354 /* communicate with ocfs2_dio_end_io */ 2351 /* communicate with ocfs2_dio_end_io */
2355 ocfs2_iocb_set_rw_locked(iocb, rw_level); 2352 ocfs2_iocb_set_rw_locked(iocb, rw_level);
2356 2353
2357 ret = generic_segment_checks(iov, &nr_segs, &ocount,
2358 VERIFY_READ);
2359 if (ret)
2360 goto out_dio;
2361
2362 count = ocount;
2363 ret = generic_write_checks(file, ppos, &count, 2354 ret = generic_write_checks(file, ppos, &count,
2364 S_ISBLK(inode->i_mode)); 2355 S_ISBLK(inode->i_mode));
2365 if (ret) 2356 if (ret)
2366 goto out_dio; 2357 goto out_dio;
2367 2358
2359 iov_iter_truncate(from, count);
2368 if (direct_io) { 2360 if (direct_io) {
2369 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, 2361 written = generic_file_direct_write(iocb, from, *ppos);
2370 count, ocount);
2371 if (written < 0) { 2362 if (written < 0) {
2372 ret = written; 2363 ret = written;
2373 goto out_dio; 2364 goto out_dio;
2374 } 2365 }
2375 } else { 2366 } else {
2376 struct iov_iter from;
2377 iov_iter_init(&from, iov, nr_segs, count, 0);
2378 current->backing_dev_info = file->f_mapping->backing_dev_info; 2367 current->backing_dev_info = file->f_mapping->backing_dev_info;
2379 written = generic_perform_write(file, &from, *ppos); 2368 written = generic_perform_write(file, from, *ppos);
2380 if (likely(written >= 0)) 2369 if (likely(written >= 0))
2381 iocb->ki_pos = *ppos + written; 2370 iocb->ki_pos = *ppos + written;
2382 current->backing_dev_info = NULL; 2371 current->backing_dev_info = NULL;
@@ -2441,84 +2430,6 @@ out_sems:
2441 return ret; 2430 return ret;
2442} 2431}
2443 2432
2444static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
2445 struct file *out,
2446 struct splice_desc *sd)
2447{
2448 int ret;
2449
2450 ret = ocfs2_prepare_inode_for_write(out, &sd->pos,
2451 sd->total_len, 0, NULL, NULL);
2452 if (ret < 0) {
2453 mlog_errno(ret);
2454 return ret;
2455 }
2456
2457 return splice_from_pipe_feed(pipe, sd, pipe_to_file);
2458}
2459
2460static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
2461 struct file *out,
2462 loff_t *ppos,
2463 size_t len,
2464 unsigned int flags)
2465{
2466 int ret;
2467 struct address_space *mapping = out->f_mapping;
2468 struct inode *inode = mapping->host;
2469 struct splice_desc sd = {
2470 .total_len = len,
2471 .flags = flags,
2472 .pos = *ppos,
2473 .u.file = out,
2474 };
2475
2476
2477 trace_ocfs2_file_splice_write(inode, out, out->f_path.dentry,
2478 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2479 out->f_path.dentry->d_name.len,
2480 out->f_path.dentry->d_name.name, len);
2481
2482 pipe_lock(pipe);
2483
2484 splice_from_pipe_begin(&sd);
2485 do {
2486 ret = splice_from_pipe_next(pipe, &sd);
2487 if (ret <= 0)
2488 break;
2489
2490 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
2491 ret = ocfs2_rw_lock(inode, 1);
2492 if (ret < 0)
2493 mlog_errno(ret);
2494 else {
2495 ret = ocfs2_splice_to_file(pipe, out, &sd);
2496 ocfs2_rw_unlock(inode, 1);
2497 }
2498 mutex_unlock(&inode->i_mutex);
2499 } while (ret > 0);
2500 splice_from_pipe_end(pipe, &sd);
2501
2502 pipe_unlock(pipe);
2503
2504 if (sd.num_spliced)
2505 ret = sd.num_spliced;
2506
2507 if (ret > 0) {
2508 int err;
2509
2510 err = generic_write_sync(out, *ppos, ret);
2511 if (err)
2512 ret = err;
2513 else
2514 *ppos += ret;
2515
2516 balance_dirty_pages_ratelimited(mapping);
2517 }
2518
2519 return ret;
2520}
2521
2522static ssize_t ocfs2_file_splice_read(struct file *in, 2433static ssize_t ocfs2_file_splice_read(struct file *in,
2523 loff_t *ppos, 2434 loff_t *ppos,
2524 struct pipe_inode_info *pipe, 2435 struct pipe_inode_info *pipe,
@@ -2534,7 +2445,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
2534 in->f_path.dentry->d_name.name, len); 2445 in->f_path.dentry->d_name.name, len);
2535 2446
2536 /* 2447 /*
2537 * See the comment in ocfs2_file_aio_read() 2448 * See the comment in ocfs2_file_read_iter()
2538 */ 2449 */
2539 ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level); 2450 ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level);
2540 if (ret < 0) { 2451 if (ret < 0) {
@@ -2549,10 +2460,8 @@ bail:
2549 return ret; 2460 return ret;
2550} 2461}
2551 2462
2552static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, 2463static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
2553 const struct iovec *iov, 2464 struct iov_iter *to)
2554 unsigned long nr_segs,
2555 loff_t pos)
2556{ 2465{
2557 int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0; 2466 int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0;
2558 struct file *filp = iocb->ki_filp; 2467 struct file *filp = iocb->ki_filp;
@@ -2561,7 +2470,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2561 trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry, 2470 trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry,
2562 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2471 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2563 filp->f_path.dentry->d_name.len, 2472 filp->f_path.dentry->d_name.len,
2564 filp->f_path.dentry->d_name.name, nr_segs); 2473 filp->f_path.dentry->d_name.name,
2474 to->nr_segs); /* GRRRRR */
2565 2475
2566 2476
2567 if (!inode) { 2477 if (!inode) {
@@ -2606,13 +2516,13 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2606 } 2516 }
2607 ocfs2_inode_unlock(inode, lock_level); 2517 ocfs2_inode_unlock(inode, lock_level);
2608 2518
2609 ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); 2519 ret = generic_file_read_iter(iocb, to);
2610 trace_generic_file_aio_read_ret(ret); 2520 trace_generic_file_aio_read_ret(ret);
2611 2521
2612 /* buffered aio wouldn't have proper lock coverage today */ 2522 /* buffered aio wouldn't have proper lock coverage today */
2613 BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); 2523 BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));
2614 2524
2615 /* see ocfs2_file_aio_write */ 2525 /* see ocfs2_file_write_iter */
2616 if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { 2526 if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {
2617 rw_level = -1; 2527 rw_level = -1;
2618 have_alloc_sem = 0; 2528 have_alloc_sem = 0;
@@ -2705,14 +2615,14 @@ const struct inode_operations ocfs2_special_file_iops = {
2705 */ 2615 */
2706const struct file_operations ocfs2_fops = { 2616const struct file_operations ocfs2_fops = {
2707 .llseek = ocfs2_file_llseek, 2617 .llseek = ocfs2_file_llseek,
2708 .read = do_sync_read, 2618 .read = new_sync_read,
2709 .write = do_sync_write, 2619 .write = new_sync_write,
2710 .mmap = ocfs2_mmap, 2620 .mmap = ocfs2_mmap,
2711 .fsync = ocfs2_sync_file, 2621 .fsync = ocfs2_sync_file,
2712 .release = ocfs2_file_release, 2622 .release = ocfs2_file_release,
2713 .open = ocfs2_file_open, 2623 .open = ocfs2_file_open,
2714 .aio_read = ocfs2_file_aio_read, 2624 .read_iter = ocfs2_file_read_iter,
2715 .aio_write = ocfs2_file_aio_write, 2625 .write_iter = ocfs2_file_write_iter,
2716 .unlocked_ioctl = ocfs2_ioctl, 2626 .unlocked_ioctl = ocfs2_ioctl,
2717#ifdef CONFIG_COMPAT 2627#ifdef CONFIG_COMPAT
2718 .compat_ioctl = ocfs2_compat_ioctl, 2628 .compat_ioctl = ocfs2_compat_ioctl,
@@ -2720,7 +2630,7 @@ const struct file_operations ocfs2_fops = {
2720 .lock = ocfs2_lock, 2630 .lock = ocfs2_lock,
2721 .flock = ocfs2_flock, 2631 .flock = ocfs2_flock,
2722 .splice_read = ocfs2_file_splice_read, 2632 .splice_read = ocfs2_file_splice_read,
2723 .splice_write = ocfs2_file_splice_write, 2633 .splice_write = iter_file_splice_write,
2724 .fallocate = ocfs2_fallocate, 2634 .fallocate = ocfs2_fallocate,
2725}; 2635};
2726 2636
@@ -2753,21 +2663,21 @@ const struct file_operations ocfs2_dops = {
2753 */ 2663 */
2754const struct file_operations ocfs2_fops_no_plocks = { 2664const struct file_operations ocfs2_fops_no_plocks = {
2755 .llseek = ocfs2_file_llseek, 2665 .llseek = ocfs2_file_llseek,
2756 .read = do_sync_read, 2666 .read = new_sync_read,
2757 .write = do_sync_write, 2667 .write = new_sync_write,
2758 .mmap = ocfs2_mmap, 2668 .mmap = ocfs2_mmap,
2759 .fsync = ocfs2_sync_file, 2669 .fsync = ocfs2_sync_file,
2760 .release = ocfs2_file_release, 2670 .release = ocfs2_file_release,
2761 .open = ocfs2_file_open, 2671 .open = ocfs2_file_open,
2762 .aio_read = ocfs2_file_aio_read, 2672 .read_iter = ocfs2_file_read_iter,
2763 .aio_write = ocfs2_file_aio_write, 2673 .write_iter = ocfs2_file_write_iter,
2764 .unlocked_ioctl = ocfs2_ioctl, 2674 .unlocked_ioctl = ocfs2_ioctl,
2765#ifdef CONFIG_COMPAT 2675#ifdef CONFIG_COMPAT
2766 .compat_ioctl = ocfs2_compat_ioctl, 2676 .compat_ioctl = ocfs2_compat_ioctl,
2767#endif 2677#endif
2768 .flock = ocfs2_flock, 2678 .flock = ocfs2_flock,
2769 .splice_read = ocfs2_file_splice_read, 2679 .splice_read = ocfs2_file_splice_read,
2770 .splice_write = ocfs2_file_splice_write, 2680 .splice_write = iter_file_splice_write,
2771 .fallocate = ocfs2_fallocate, 2681 .fallocate = ocfs2_fallocate,
2772}; 2682};
2773 2683
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 54d57d6ba68d..902e88527fce 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -337,10 +337,10 @@ static sector_t omfs_bmap(struct address_space *mapping, sector_t block)
337 337
338const struct file_operations omfs_file_operations = { 338const struct file_operations omfs_file_operations = {
339 .llseek = generic_file_llseek, 339 .llseek = generic_file_llseek,
340 .read = do_sync_read, 340 .read = new_sync_read,
341 .write = do_sync_write, 341 .write = new_sync_write,
342 .aio_read = generic_file_aio_read, 342 .read_iter = generic_file_read_iter,
343 .aio_write = generic_file_aio_write, 343 .write_iter = generic_file_write_iter,
344 .mmap = generic_file_mmap, 344 .mmap = generic_file_mmap,
345 .fsync = generic_file_fsync, 345 .fsync = generic_file_fsync,
346 .splice_read = generic_file_splice_read, 346 .splice_read = generic_file_splice_read,
diff --git a/fs/open.c b/fs/open.c
index 9d64679cec73..36662d036237 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -725,6 +725,12 @@ static int do_dentry_open(struct file *f,
725 } 725 }
726 if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) 726 if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
727 i_readcount_inc(inode); 727 i_readcount_inc(inode);
728 if ((f->f_mode & FMODE_READ) &&
729 likely(f->f_op->read || f->f_op->aio_read || f->f_op->read_iter))
730 f->f_mode |= FMODE_CAN_READ;
731 if ((f->f_mode & FMODE_WRITE) &&
732 likely(f->f_op->write || f->f_op->aio_write || f->f_op->write_iter))
733 f->f_mode |= FMODE_CAN_WRITE;
728 734
729 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 735 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
730 736
diff --git a/fs/pipe.c b/fs/pipe.c
index 034bffac3f97..21981e58e2a6 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -116,50 +116,6 @@ void pipe_wait(struct pipe_inode_info *pipe)
116 pipe_lock(pipe); 116 pipe_lock(pipe);
117} 117}
118 118
119static int
120pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
121 int atomic)
122{
123 unsigned long copy;
124
125 while (len > 0) {
126 while (!iov->iov_len)
127 iov++;
128 copy = min_t(unsigned long, len, iov->iov_len);
129
130 if (atomic) {
131 if (__copy_from_user_inatomic(to, iov->iov_base, copy))
132 return -EFAULT;
133 } else {
134 if (copy_from_user(to, iov->iov_base, copy))
135 return -EFAULT;
136 }
137 to += copy;
138 len -= copy;
139 iov->iov_base += copy;
140 iov->iov_len -= copy;
141 }
142 return 0;
143}
144
145/*
146 * Pre-fault in the user memory, so we can use atomic copies.
147 */
148static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len)
149{
150 while (!iov->iov_len)
151 iov++;
152
153 while (len > 0) {
154 unsigned long this_len;
155
156 this_len = min_t(unsigned long, len, iov->iov_len);
157 fault_in_pages_readable(iov->iov_base, this_len);
158 len -= this_len;
159 iov++;
160 }
161}
162
163static void anon_pipe_buf_release(struct pipe_inode_info *pipe, 119static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
164 struct pipe_buffer *buf) 120 struct pipe_buffer *buf)
165{ 121{
@@ -271,24 +227,18 @@ static const struct pipe_buf_operations packet_pipe_buf_ops = {
271}; 227};
272 228
273static ssize_t 229static ssize_t
274pipe_read(struct kiocb *iocb, const struct iovec *_iov, 230pipe_read(struct kiocb *iocb, struct iov_iter *to)
275 unsigned long nr_segs, loff_t pos)
276{ 231{
232 size_t total_len = iov_iter_count(to);
277 struct file *filp = iocb->ki_filp; 233 struct file *filp = iocb->ki_filp;
278 struct pipe_inode_info *pipe = filp->private_data; 234 struct pipe_inode_info *pipe = filp->private_data;
279 int do_wakeup; 235 int do_wakeup;
280 ssize_t ret; 236 ssize_t ret;
281 struct iovec *iov = (struct iovec *)_iov;
282 size_t total_len;
283 struct iov_iter iter;
284 237
285 total_len = iov_length(iov, nr_segs);
286 /* Null read succeeds. */ 238 /* Null read succeeds. */
287 if (unlikely(total_len == 0)) 239 if (unlikely(total_len == 0))
288 return 0; 240 return 0;
289 241
290 iov_iter_init(&iter, iov, nr_segs, total_len, 0);
291
292 do_wakeup = 0; 242 do_wakeup = 0;
293 ret = 0; 243 ret = 0;
294 __pipe_lock(pipe); 244 __pipe_lock(pipe);
@@ -312,7 +262,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
312 break; 262 break;
313 } 263 }
314 264
315 written = copy_page_to_iter(buf->page, buf->offset, chars, &iter); 265 written = copy_page_to_iter(buf->page, buf->offset, chars, to);
316 if (unlikely(written < chars)) { 266 if (unlikely(written < chars)) {
317 if (!ret) 267 if (!ret)
318 ret = -EFAULT; 268 ret = -EFAULT;
@@ -386,24 +336,19 @@ static inline int is_packetized(struct file *file)
386} 336}
387 337
388static ssize_t 338static ssize_t
389pipe_write(struct kiocb *iocb, const struct iovec *_iov, 339pipe_write(struct kiocb *iocb, struct iov_iter *from)
390 unsigned long nr_segs, loff_t ppos)
391{ 340{
392 struct file *filp = iocb->ki_filp; 341 struct file *filp = iocb->ki_filp;
393 struct pipe_inode_info *pipe = filp->private_data; 342 struct pipe_inode_info *pipe = filp->private_data;
394 ssize_t ret; 343 ssize_t ret = 0;
395 int do_wakeup; 344 int do_wakeup = 0;
396 struct iovec *iov = (struct iovec *)_iov; 345 size_t total_len = iov_iter_count(from);
397 size_t total_len;
398 ssize_t chars; 346 ssize_t chars;
399 347
400 total_len = iov_length(iov, nr_segs);
401 /* Null write succeeds. */ 348 /* Null write succeeds. */
402 if (unlikely(total_len == 0)) 349 if (unlikely(total_len == 0))
403 return 0; 350 return 0;
404 351
405 do_wakeup = 0;
406 ret = 0;
407 __pipe_lock(pipe); 352 __pipe_lock(pipe);
408 353
409 if (!pipe->readers) { 354 if (!pipe->readers) {
@@ -422,38 +367,19 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
422 int offset = buf->offset + buf->len; 367 int offset = buf->offset + buf->len;
423 368
424 if (ops->can_merge && offset + chars <= PAGE_SIZE) { 369 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
425 int error, atomic = 1; 370 int error = ops->confirm(pipe, buf);
426 void *addr;
427
428 error = ops->confirm(pipe, buf);
429 if (error) 371 if (error)
430 goto out; 372 goto out;
431 373
432 iov_fault_in_pages_read(iov, chars); 374 ret = copy_page_from_iter(buf->page, offset, chars, from);
433redo1: 375 if (unlikely(ret < chars)) {
434 if (atomic) 376 error = -EFAULT;
435 addr = kmap_atomic(buf->page);
436 else
437 addr = kmap(buf->page);
438 error = pipe_iov_copy_from_user(offset + addr, iov,
439 chars, atomic);
440 if (atomic)
441 kunmap_atomic(addr);
442 else
443 kunmap(buf->page);
444 ret = error;
445 do_wakeup = 1;
446 if (error) {
447 if (atomic) {
448 atomic = 0;
449 goto redo1;
450 }
451 goto out; 377 goto out;
452 } 378 }
379 do_wakeup = 1;
453 buf->len += chars; 380 buf->len += chars;
454 total_len -= chars;
455 ret = chars; 381 ret = chars;
456 if (!total_len) 382 if (!iov_iter_count(from))
457 goto out; 383 goto out;
458 } 384 }
459 } 385 }
@@ -472,8 +398,7 @@ redo1:
472 int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1); 398 int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1);
473 struct pipe_buffer *buf = pipe->bufs + newbuf; 399 struct pipe_buffer *buf = pipe->bufs + newbuf;
474 struct page *page = pipe->tmp_page; 400 struct page *page = pipe->tmp_page;
475 char *src; 401 int copied;
476 int error, atomic = 1;
477 402
478 if (!page) { 403 if (!page) {
479 page = alloc_page(GFP_HIGHUSER); 404 page = alloc_page(GFP_HIGHUSER);
@@ -489,40 +414,19 @@ redo1:
489 * FIXME! Is this really true? 414 * FIXME! Is this really true?
490 */ 415 */
491 do_wakeup = 1; 416 do_wakeup = 1;
492 chars = PAGE_SIZE; 417 copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
493 if (chars > total_len) 418 if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
494 chars = total_len;
495
496 iov_fault_in_pages_read(iov, chars);
497redo2:
498 if (atomic)
499 src = kmap_atomic(page);
500 else
501 src = kmap(page);
502
503 error = pipe_iov_copy_from_user(src, iov, chars,
504 atomic);
505 if (atomic)
506 kunmap_atomic(src);
507 else
508 kunmap(page);
509
510 if (unlikely(error)) {
511 if (atomic) {
512 atomic = 0;
513 goto redo2;
514 }
515 if (!ret) 419 if (!ret)
516 ret = error; 420 ret = -EFAULT;
517 break; 421 break;
518 } 422 }
519 ret += chars; 423 ret += copied;
520 424
521 /* Insert it into the buffer array */ 425 /* Insert it into the buffer array */
522 buf->page = page; 426 buf->page = page;
523 buf->ops = &anon_pipe_buf_ops; 427 buf->ops = &anon_pipe_buf_ops;
524 buf->offset = 0; 428 buf->offset = 0;
525 buf->len = chars; 429 buf->len = copied;
526 buf->flags = 0; 430 buf->flags = 0;
527 if (is_packetized(filp)) { 431 if (is_packetized(filp)) {
528 buf->ops = &packet_pipe_buf_ops; 432 buf->ops = &packet_pipe_buf_ops;
@@ -531,8 +435,7 @@ redo2:
531 pipe->nrbufs = ++bufs; 435 pipe->nrbufs = ++bufs;
532 pipe->tmp_page = NULL; 436 pipe->tmp_page = NULL;
533 437
534 total_len -= chars; 438 if (!iov_iter_count(from))
535 if (!total_len)
536 break; 439 break;
537 } 440 }
538 if (bufs < pipe->buffers) 441 if (bufs < pipe->buffers)
@@ -1044,10 +947,10 @@ err:
1044const struct file_operations pipefifo_fops = { 947const struct file_operations pipefifo_fops = {
1045 .open = fifo_open, 948 .open = fifo_open,
1046 .llseek = no_llseek, 949 .llseek = no_llseek,
1047 .read = do_sync_read, 950 .read = new_sync_read,
1048 .aio_read = pipe_read, 951 .read_iter = pipe_read,
1049 .write = do_sync_write, 952 .write = new_sync_write,
1050 .aio_write = pipe_write, 953 .write_iter = pipe_write,
1051 .poll = pipe_poll, 954 .poll = pipe_poll,
1052 .unlocked_ioctl = pipe_ioctl, 955 .unlocked_ioctl = pipe_ioctl,
1053 .release = pipe_release, 956 .release = pipe_release,
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 1e56a4e8cf7c..4f56de822d2f 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -31,14 +31,14 @@
31#include "internal.h" 31#include "internal.h"
32 32
33const struct file_operations ramfs_file_operations = { 33const struct file_operations ramfs_file_operations = {
34 .read = do_sync_read, 34 .read = new_sync_read,
35 .aio_read = generic_file_aio_read, 35 .read_iter = generic_file_read_iter,
36 .write = do_sync_write, 36 .write = new_sync_write,
37 .aio_write = generic_file_aio_write, 37 .write_iter = generic_file_write_iter,
38 .mmap = generic_file_mmap, 38 .mmap = generic_file_mmap,
39 .fsync = noop_fsync, 39 .fsync = noop_fsync,
40 .splice_read = generic_file_splice_read, 40 .splice_read = generic_file_splice_read,
41 .splice_write = generic_file_splice_write, 41 .splice_write = iter_file_splice_write,
42 .llseek = generic_file_llseek, 42 .llseek = generic_file_llseek,
43}; 43};
44 44
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 0b3d8e4cb2fa..dda012ad4208 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -37,13 +37,13 @@ static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma);
37const struct file_operations ramfs_file_operations = { 37const struct file_operations ramfs_file_operations = {
38 .mmap = ramfs_nommu_mmap, 38 .mmap = ramfs_nommu_mmap,
39 .get_unmapped_area = ramfs_nommu_get_unmapped_area, 39 .get_unmapped_area = ramfs_nommu_get_unmapped_area,
40 .read = do_sync_read, 40 .read = new_sync_read,
41 .aio_read = generic_file_aio_read, 41 .read_iter = generic_file_read_iter,
42 .write = do_sync_write, 42 .write = new_sync_write,
43 .aio_write = generic_file_aio_write, 43 .write_iter = generic_file_write_iter,
44 .fsync = noop_fsync, 44 .fsync = noop_fsync,
45 .splice_read = generic_file_splice_read, 45 .splice_read = generic_file_splice_read,
46 .splice_write = generic_file_splice_write, 46 .splice_write = iter_file_splice_write,
47 .llseek = generic_file_llseek, 47 .llseek = generic_file_llseek,
48}; 48};
49 49
diff --git a/fs/read_write.c b/fs/read_write.c
index 31c6efa43183..009d8542a889 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -25,11 +25,12 @@
25typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); 25typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
26typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *, 26typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
27 unsigned long, loff_t); 27 unsigned long, loff_t);
28typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
28 29
29const struct file_operations generic_ro_fops = { 30const struct file_operations generic_ro_fops = {
30 .llseek = generic_file_llseek, 31 .llseek = generic_file_llseek,
31 .read = do_sync_read, 32 .read = new_sync_read,
32 .aio_read = generic_file_aio_read, 33 .read_iter = generic_file_read_iter,
33 .mmap = generic_file_readonly_mmap, 34 .mmap = generic_file_readonly_mmap,
34 .splice_read = generic_file_splice_read, 35 .splice_read = generic_file_splice_read,
35}; 36};
@@ -390,13 +391,34 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
390 391
391EXPORT_SYMBOL(do_sync_read); 392EXPORT_SYMBOL(do_sync_read);
392 393
394ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
395{
396 struct iovec iov = { .iov_base = buf, .iov_len = len };
397 struct kiocb kiocb;
398 struct iov_iter iter;
399 ssize_t ret;
400
401 init_sync_kiocb(&kiocb, filp);
402 kiocb.ki_pos = *ppos;
403 kiocb.ki_nbytes = len;
404 iov_iter_init(&iter, READ, &iov, 1, len);
405
406 ret = filp->f_op->read_iter(&kiocb, &iter);
407 if (-EIOCBQUEUED == ret)
408 ret = wait_on_sync_kiocb(&kiocb);
409 *ppos = kiocb.ki_pos;
410 return ret;
411}
412
413EXPORT_SYMBOL(new_sync_read);
414
393ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 415ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
394{ 416{
395 ssize_t ret; 417 ssize_t ret;
396 418
397 if (!(file->f_mode & FMODE_READ)) 419 if (!(file->f_mode & FMODE_READ))
398 return -EBADF; 420 return -EBADF;
399 if (!file->f_op->read && !file->f_op->aio_read) 421 if (!(file->f_mode & FMODE_CAN_READ))
400 return -EINVAL; 422 return -EINVAL;
401 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 423 if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
402 return -EFAULT; 424 return -EFAULT;
@@ -406,8 +428,10 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
406 count = ret; 428 count = ret;
407 if (file->f_op->read) 429 if (file->f_op->read)
408 ret = file->f_op->read(file, buf, count, pos); 430 ret = file->f_op->read(file, buf, count, pos);
409 else 431 else if (file->f_op->aio_read)
410 ret = do_sync_read(file, buf, count, pos); 432 ret = do_sync_read(file, buf, count, pos);
433 else
434 ret = new_sync_read(file, buf, count, pos);
411 if (ret > 0) { 435 if (ret > 0) {
412 fsnotify_access(file); 436 fsnotify_access(file);
413 add_rchar(current, ret); 437 add_rchar(current, ret);
@@ -439,13 +463,34 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
439 463
440EXPORT_SYMBOL(do_sync_write); 464EXPORT_SYMBOL(do_sync_write);
441 465
466ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
467{
468 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
469 struct kiocb kiocb;
470 struct iov_iter iter;
471 ssize_t ret;
472
473 init_sync_kiocb(&kiocb, filp);
474 kiocb.ki_pos = *ppos;
475 kiocb.ki_nbytes = len;
476 iov_iter_init(&iter, WRITE, &iov, 1, len);
477
478 ret = filp->f_op->write_iter(&kiocb, &iter);
479 if (-EIOCBQUEUED == ret)
480 ret = wait_on_sync_kiocb(&kiocb);
481 *ppos = kiocb.ki_pos;
482 return ret;
483}
484
485EXPORT_SYMBOL(new_sync_write);
486
442ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) 487ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
443{ 488{
444 mm_segment_t old_fs; 489 mm_segment_t old_fs;
445 const char __user *p; 490 const char __user *p;
446 ssize_t ret; 491 ssize_t ret;
447 492
448 if (!file->f_op->write && !file->f_op->aio_write) 493 if (!(file->f_mode & FMODE_CAN_WRITE))
449 return -EINVAL; 494 return -EINVAL;
450 495
451 old_fs = get_fs(); 496 old_fs = get_fs();
@@ -455,8 +500,10 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t
455 count = MAX_RW_COUNT; 500 count = MAX_RW_COUNT;
456 if (file->f_op->write) 501 if (file->f_op->write)
457 ret = file->f_op->write(file, p, count, pos); 502 ret = file->f_op->write(file, p, count, pos);
458 else 503 else if (file->f_op->aio_write)
459 ret = do_sync_write(file, p, count, pos); 504 ret = do_sync_write(file, p, count, pos);
505 else
506 ret = new_sync_write(file, p, count, pos);
460 set_fs(old_fs); 507 set_fs(old_fs);
461 if (ret > 0) { 508 if (ret > 0) {
462 fsnotify_modify(file); 509 fsnotify_modify(file);
@@ -472,7 +519,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
472 519
473 if (!(file->f_mode & FMODE_WRITE)) 520 if (!(file->f_mode & FMODE_WRITE))
474 return -EBADF; 521 return -EBADF;
475 if (!file->f_op->write && !file->f_op->aio_write) 522 if (!(file->f_mode & FMODE_CAN_WRITE))
476 return -EINVAL; 523 return -EINVAL;
477 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 524 if (unlikely(!access_ok(VERIFY_READ, buf, count)))
478 return -EFAULT; 525 return -EFAULT;
@@ -483,8 +530,10 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
483 file_start_write(file); 530 file_start_write(file);
484 if (file->f_op->write) 531 if (file->f_op->write)
485 ret = file->f_op->write(file, buf, count, pos); 532 ret = file->f_op->write(file, buf, count, pos);
486 else 533 else if (file->f_op->aio_write)
487 ret = do_sync_write(file, buf, count, pos); 534 ret = do_sync_write(file, buf, count, pos);
535 else
536 ret = new_sync_write(file, buf, count, pos);
488 if (ret > 0) { 537 if (ret > 0) {
489 fsnotify_modify(file); 538 fsnotify_modify(file);
490 add_wchar(current, ret); 539 add_wchar(current, ret);
@@ -601,6 +650,25 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
601} 650}
602EXPORT_SYMBOL(iov_shorten); 651EXPORT_SYMBOL(iov_shorten);
603 652
653static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov,
654 unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn)
655{
656 struct kiocb kiocb;
657 struct iov_iter iter;
658 ssize_t ret;
659
660 init_sync_kiocb(&kiocb, filp);
661 kiocb.ki_pos = *ppos;
662 kiocb.ki_nbytes = len;
663
664 iov_iter_init(&iter, rw, iov, nr_segs, len);
665 ret = fn(&kiocb, &iter);
666 if (ret == -EIOCBQUEUED)
667 ret = wait_on_sync_kiocb(&kiocb);
668 *ppos = kiocb.ki_pos;
669 return ret;
670}
671
604static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, 672static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
605 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) 673 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
606{ 674{
@@ -738,6 +806,7 @@ static ssize_t do_readv_writev(int type, struct file *file,
738 ssize_t ret; 806 ssize_t ret;
739 io_fn_t fn; 807 io_fn_t fn;
740 iov_fn_t fnv; 808 iov_fn_t fnv;
809 iter_fn_t iter_fn;
741 810
742 ret = rw_copy_check_uvector(type, uvector, nr_segs, 811 ret = rw_copy_check_uvector(type, uvector, nr_segs,
743 ARRAY_SIZE(iovstack), iovstack, &iov); 812 ARRAY_SIZE(iovstack), iovstack, &iov);
@@ -753,13 +822,18 @@ static ssize_t do_readv_writev(int type, struct file *file,
753 if (type == READ) { 822 if (type == READ) {
754 fn = file->f_op->read; 823 fn = file->f_op->read;
755 fnv = file->f_op->aio_read; 824 fnv = file->f_op->aio_read;
825 iter_fn = file->f_op->read_iter;
756 } else { 826 } else {
757 fn = (io_fn_t)file->f_op->write; 827 fn = (io_fn_t)file->f_op->write;
758 fnv = file->f_op->aio_write; 828 fnv = file->f_op->aio_write;
829 iter_fn = file->f_op->write_iter;
759 file_start_write(file); 830 file_start_write(file);
760 } 831 }
761 832
762 if (fnv) 833 if (iter_fn)
834 ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
835 pos, iter_fn);
836 else if (fnv)
763 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 837 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
764 pos, fnv); 838 pos, fnv);
765 else 839 else
@@ -785,7 +859,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
785{ 859{
786 if (!(file->f_mode & FMODE_READ)) 860 if (!(file->f_mode & FMODE_READ))
787 return -EBADF; 861 return -EBADF;
788 if (!file->f_op->aio_read && !file->f_op->read) 862 if (!(file->f_mode & FMODE_CAN_READ))
789 return -EINVAL; 863 return -EINVAL;
790 864
791 return do_readv_writev(READ, file, vec, vlen, pos); 865 return do_readv_writev(READ, file, vec, vlen, pos);
@@ -798,7 +872,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
798{ 872{
799 if (!(file->f_mode & FMODE_WRITE)) 873 if (!(file->f_mode & FMODE_WRITE))
800 return -EBADF; 874 return -EBADF;
801 if (!file->f_op->aio_write && !file->f_op->write) 875 if (!(file->f_mode & FMODE_CAN_WRITE))
802 return -EINVAL; 876 return -EINVAL;
803 877
804 return do_readv_writev(WRITE, file, vec, vlen, pos); 878 return do_readv_writev(WRITE, file, vec, vlen, pos);
@@ -912,6 +986,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
912 ssize_t ret; 986 ssize_t ret;
913 io_fn_t fn; 987 io_fn_t fn;
914 iov_fn_t fnv; 988 iov_fn_t fnv;
989 iter_fn_t iter_fn;
915 990
916 ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, 991 ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
917 UIO_FASTIOV, iovstack, &iov); 992 UIO_FASTIOV, iovstack, &iov);
@@ -927,13 +1002,18 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
927 if (type == READ) { 1002 if (type == READ) {
928 fn = file->f_op->read; 1003 fn = file->f_op->read;
929 fnv = file->f_op->aio_read; 1004 fnv = file->f_op->aio_read;
1005 iter_fn = file->f_op->read_iter;
930 } else { 1006 } else {
931 fn = (io_fn_t)file->f_op->write; 1007 fn = (io_fn_t)file->f_op->write;
932 fnv = file->f_op->aio_write; 1008 fnv = file->f_op->aio_write;
1009 iter_fn = file->f_op->write_iter;
933 file_start_write(file); 1010 file_start_write(file);
934 } 1011 }
935 1012
936 if (fnv) 1013 if (iter_fn)
1014 ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
1015 pos, iter_fn);
1016 else if (fnv)
937 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 1017 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
938 pos, fnv); 1018 pos, fnv);
939 else 1019 else
@@ -964,7 +1044,7 @@ static size_t compat_readv(struct file *file,
964 goto out; 1044 goto out;
965 1045
966 ret = -EINVAL; 1046 ret = -EINVAL;
967 if (!file->f_op->aio_read && !file->f_op->read) 1047 if (!(file->f_mode & FMODE_CAN_READ))
968 goto out; 1048 goto out;
969 1049
970 ret = compat_do_readv_writev(READ, file, vec, vlen, pos); 1050 ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
@@ -1041,7 +1121,7 @@ static size_t compat_writev(struct file *file,
1041 goto out; 1121 goto out;
1042 1122
1043 ret = -EINVAL; 1123 ret = -EINVAL;
1044 if (!file->f_op->aio_write && !file->f_op->write) 1124 if (!(file->f_mode & FMODE_CAN_WRITE))
1045 goto out; 1125 goto out;
1046 1126
1047 ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos); 1127 ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 5f6c32c668b6..db9e80ba53a0 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -243,8 +243,8 @@ drop_write_lock:
243} 243}
244 244
245const struct file_operations reiserfs_file_operations = { 245const struct file_operations reiserfs_file_operations = {
246 .read = do_sync_read, 246 .read = new_sync_read,
247 .write = do_sync_write, 247 .write = new_sync_write,
248 .unlocked_ioctl = reiserfs_ioctl, 248 .unlocked_ioctl = reiserfs_ioctl,
249#ifdef CONFIG_COMPAT 249#ifdef CONFIG_COMPAT
250 .compat_ioctl = reiserfs_compat_ioctl, 250 .compat_ioctl = reiserfs_compat_ioctl,
@@ -253,10 +253,10 @@ const struct file_operations reiserfs_file_operations = {
253 .open = reiserfs_file_open, 253 .open = reiserfs_file_open,
254 .release = reiserfs_file_release, 254 .release = reiserfs_file_release,
255 .fsync = reiserfs_sync_file, 255 .fsync = reiserfs_sync_file,
256 .aio_read = generic_file_aio_read, 256 .read_iter = generic_file_read_iter,
257 .aio_write = generic_file_aio_write, 257 .write_iter = generic_file_write_iter,
258 .splice_read = generic_file_splice_read, 258 .splice_read = generic_file_splice_read,
259 .splice_write = generic_file_splice_write, 259 .splice_write = iter_file_splice_write,
260 .llseek = generic_file_llseek, 260 .llseek = generic_file_llseek,
261}; 261};
262 262
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index e3ca04894919..63b2b0ec49e6 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3279,15 +3279,15 @@ static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
3279 * to do in this section of the code. 3279 * to do in this section of the code.
3280 */ 3280 */
3281static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, 3281static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
3282 const struct iovec *iov, loff_t offset, 3282 struct iov_iter *iter, loff_t offset)
3283 unsigned long nr_segs)
3284{ 3283{
3285 struct file *file = iocb->ki_filp; 3284 struct file *file = iocb->ki_filp;
3286 struct inode *inode = file->f_mapping->host; 3285 struct inode *inode = file->f_mapping->host;
3286 size_t count = iov_iter_count(iter);
3287 ssize_t ret; 3287 ssize_t ret;
3288 3288
3289 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 3289 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset,
3290 reiserfs_get_blocks_direct_io); 3290 reiserfs_get_blocks_direct_io);
3291 3291
3292 /* 3292 /*
3293 * In case of error extending write may have instantiated a few 3293 * In case of error extending write may have instantiated a few
@@ -3295,7 +3295,7 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
3295 */ 3295 */
3296 if (unlikely((rw & WRITE) && ret < 0)) { 3296 if (unlikely((rw & WRITE) && ret < 0)) {
3297 loff_t isize = i_size_read(inode); 3297 loff_t isize = i_size_read(inode);
3298 loff_t end = offset + iov_length(iov, nr_segs); 3298 loff_t end = offset + count;
3299 3299
3300 if ((end > isize) && inode_newsize_ok(inode, isize) == 0) { 3300 if ((end > isize) && inode_newsize_ok(inode, isize) == 0) {
3301 truncate_setsize(inode, isize); 3301 truncate_setsize(inode, isize);
diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c
index f373bde8f545..ea06c7554860 100644
--- a/fs/romfs/mmap-nommu.c
+++ b/fs/romfs/mmap-nommu.c
@@ -72,8 +72,8 @@ static int romfs_mmap(struct file *file, struct vm_area_struct *vma)
72 72
73const struct file_operations romfs_ro_fops = { 73const struct file_operations romfs_ro_fops = {
74 .llseek = generic_file_llseek, 74 .llseek = generic_file_llseek,
75 .read = do_sync_read, 75 .read = new_sync_read,
76 .aio_read = generic_file_aio_read, 76 .read_iter = generic_file_read_iter,
77 .splice_read = generic_file_splice_read, 77 .splice_read = generic_file_splice_read,
78 .mmap = romfs_mmap, 78 .mmap = romfs_mmap,
79 .get_unmapped_area = romfs_get_unmapped_area, 79 .get_unmapped_area = romfs_get_unmapped_area,
diff --git a/fs/splice.c b/fs/splice.c
index e246954ea48c..f5cb9ba84510 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -32,6 +32,7 @@
32#include <linux/gfp.h> 32#include <linux/gfp.h>
33#include <linux/socket.h> 33#include <linux/socket.h>
34#include <linux/compat.h> 34#include <linux/compat.h>
35#include <linux/aio.h>
35#include "internal.h" 36#include "internal.h"
36 37
37/* 38/*
@@ -717,63 +718,6 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
717 sd->len, &pos, more); 718 sd->len, &pos, more);
718} 719}
719 720
720/*
721 * This is a little more tricky than the file -> pipe splicing. There are
722 * basically three cases:
723 *
724 * - Destination page already exists in the address space and there
725 * are users of it. For that case we have no other option that
726 * copying the data. Tough luck.
727 * - Destination page already exists in the address space, but there
728 * are no users of it. Make sure it's uptodate, then drop it. Fall
729 * through to last case.
730 * - Destination page does not exist, we can add the pipe page to
731 * the page cache and avoid the copy.
732 *
733 * If asked to move pages to the output file (SPLICE_F_MOVE is set in
734 * sd->flags), we attempt to migrate pages from the pipe to the output
735 * file address space page cache. This is possible if no one else has
736 * the pipe page referenced outside of the pipe and page cache. If
737 * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
738 * a new page in the output file page cache and fill/dirty that.
739 */
740int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
741 struct splice_desc *sd)
742{
743 struct file *file = sd->u.file;
744 struct address_space *mapping = file->f_mapping;
745 unsigned int offset, this_len;
746 struct page *page;
747 void *fsdata;
748 int ret;
749
750 offset = sd->pos & ~PAGE_CACHE_MASK;
751
752 this_len = sd->len;
753 if (this_len + offset > PAGE_CACHE_SIZE)
754 this_len = PAGE_CACHE_SIZE - offset;
755
756 ret = pagecache_write_begin(file, mapping, sd->pos, this_len,
757 AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
758 if (unlikely(ret))
759 goto out;
760
761 if (buf->page != page) {
762 char *src = kmap_atomic(buf->page);
763 char *dst = kmap_atomic(page);
764
765 memcpy(dst + offset, src + buf->offset, this_len);
766 flush_dcache_page(page);
767 kunmap_atomic(dst);
768 kunmap_atomic(src);
769 }
770 ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
771 page, fsdata);
772out:
773 return ret;
774}
775EXPORT_SYMBOL(pipe_to_file);
776
777static void wakeup_pipe_writers(struct pipe_inode_info *pipe) 721static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
778{ 722{
779 smp_mb(); 723 smp_mb();
@@ -802,7 +746,7 @@ static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
802 * locking is required around copying the pipe buffers to the 746 * locking is required around copying the pipe buffers to the
803 * destination. 747 * destination.
804 */ 748 */
805int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, 749static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
806 splice_actor *actor) 750 splice_actor *actor)
807{ 751{
808 int ret; 752 int ret;
@@ -849,7 +793,6 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
849 793
850 return 1; 794 return 1;
851} 795}
852EXPORT_SYMBOL(splice_from_pipe_feed);
853 796
854/** 797/**
855 * splice_from_pipe_next - wait for some data to splice from 798 * splice_from_pipe_next - wait for some data to splice from
@@ -861,7 +804,7 @@ EXPORT_SYMBOL(splice_from_pipe_feed);
861 * value (one) if pipe buffers are available. It will return zero 804 * value (one) if pipe buffers are available. It will return zero
862 * or -errno if no more data needs to be spliced. 805 * or -errno if no more data needs to be spliced.
863 */ 806 */
864int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) 807static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
865{ 808{
866 while (!pipe->nrbufs) { 809 while (!pipe->nrbufs) {
867 if (!pipe->writers) 810 if (!pipe->writers)
@@ -886,7 +829,6 @@ int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
886 829
887 return 1; 830 return 1;
888} 831}
889EXPORT_SYMBOL(splice_from_pipe_next);
890 832
891/** 833/**
892 * splice_from_pipe_begin - start splicing from pipe 834 * splice_from_pipe_begin - start splicing from pipe
@@ -897,12 +839,11 @@ EXPORT_SYMBOL(splice_from_pipe_next);
897 * splice_from_pipe_next() and splice_from_pipe_feed() to 839 * splice_from_pipe_next() and splice_from_pipe_feed() to
898 * initialize the necessary fields of @sd. 840 * initialize the necessary fields of @sd.
899 */ 841 */
900void splice_from_pipe_begin(struct splice_desc *sd) 842static void splice_from_pipe_begin(struct splice_desc *sd)
901{ 843{
902 sd->num_spliced = 0; 844 sd->num_spliced = 0;
903 sd->need_wakeup = false; 845 sd->need_wakeup = false;
904} 846}
905EXPORT_SYMBOL(splice_from_pipe_begin);
906 847
907/** 848/**
908 * splice_from_pipe_end - finish splicing from pipe 849 * splice_from_pipe_end - finish splicing from pipe
@@ -914,12 +855,11 @@ EXPORT_SYMBOL(splice_from_pipe_begin);
914 * be called after a loop containing splice_from_pipe_next() and 855 * be called after a loop containing splice_from_pipe_next() and
915 * splice_from_pipe_feed(). 856 * splice_from_pipe_feed().
916 */ 857 */
917void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd) 858static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
918{ 859{
919 if (sd->need_wakeup) 860 if (sd->need_wakeup)
920 wakeup_pipe_writers(pipe); 861 wakeup_pipe_writers(pipe);
921} 862}
922EXPORT_SYMBOL(splice_from_pipe_end);
923 863
924/** 864/**
925 * __splice_from_pipe - splice data from a pipe to given actor 865 * __splice_from_pipe - splice data from a pipe to given actor
@@ -985,7 +925,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
985} 925}
986 926
987/** 927/**
988 * generic_file_splice_write - splice data from a pipe to a file 928 * iter_file_splice_write - splice data from a pipe to a file
989 * @pipe: pipe info 929 * @pipe: pipe info
990 * @out: file to write to 930 * @out: file to write to
991 * @ppos: position in @out 931 * @ppos: position in @out
@@ -995,40 +935,122 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
995 * Description: 935 * Description:
996 * Will either move or copy pages (determined by @flags options) from 936 * Will either move or copy pages (determined by @flags options) from
997 * the given pipe inode to the given file. 937 * the given pipe inode to the given file.
938 * This one is ->write_iter-based.
998 * 939 *
999 */ 940 */
1000ssize_t 941ssize_t
1001generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, 942iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
1002 loff_t *ppos, size_t len, unsigned int flags) 943 loff_t *ppos, size_t len, unsigned int flags)
1003{ 944{
1004 struct address_space *mapping = out->f_mapping;
1005 struct inode *inode = mapping->host;
1006 struct splice_desc sd = { 945 struct splice_desc sd = {
1007 .total_len = len, 946 .total_len = len,
1008 .flags = flags, 947 .flags = flags,
1009 .pos = *ppos, 948 .pos = *ppos,
1010 .u.file = out, 949 .u.file = out,
1011 }; 950 };
951 int nbufs = pipe->buffers;
952 struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec),
953 GFP_KERNEL);
1012 ssize_t ret; 954 ssize_t ret;
1013 955
956 if (unlikely(!array))
957 return -ENOMEM;
958
1014 pipe_lock(pipe); 959 pipe_lock(pipe);
1015 960
1016 splice_from_pipe_begin(&sd); 961 splice_from_pipe_begin(&sd);
1017 do { 962 while (sd.total_len) {
963 struct iov_iter from;
964 struct kiocb kiocb;
965 size_t left;
966 int n, idx;
967
1018 ret = splice_from_pipe_next(pipe, &sd); 968 ret = splice_from_pipe_next(pipe, &sd);
1019 if (ret <= 0) 969 if (ret <= 0)
1020 break; 970 break;
1021 971
1022 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); 972 if (unlikely(nbufs < pipe->buffers)) {
1023 ret = file_remove_suid(out); 973 kfree(array);
1024 if (!ret) { 974 nbufs = pipe->buffers;
1025 ret = file_update_time(out); 975 array = kcalloc(nbufs, sizeof(struct bio_vec),
1026 if (!ret) 976 GFP_KERNEL);
1027 ret = splice_from_pipe_feed(pipe, &sd, 977 if (!array) {
1028 pipe_to_file); 978 ret = -ENOMEM;
979 break;
980 }
1029 } 981 }
1030 mutex_unlock(&inode->i_mutex); 982
1031 } while (ret > 0); 983 /* build the vector */
984 left = sd.total_len;
985 for (n = 0, idx = pipe->curbuf; left && n < pipe->nrbufs; n++, idx++) {
986 struct pipe_buffer *buf = pipe->bufs + idx;
987 size_t this_len = buf->len;
988
989 if (this_len > left)
990 this_len = left;
991
992 if (idx == pipe->buffers - 1)
993 idx = -1;
994
995 ret = buf->ops->confirm(pipe, buf);
996 if (unlikely(ret)) {
997 if (ret == -ENODATA)
998 ret = 0;
999 goto done;
1000 }
1001
1002 array[n].bv_page = buf->page;
1003 array[n].bv_len = this_len;
1004 array[n].bv_offset = buf->offset;
1005 left -= this_len;
1006 }
1007
1008 /* ... iov_iter */
1009 from.type = ITER_BVEC | WRITE;
1010 from.bvec = array;
1011 from.nr_segs = n;
1012 from.count = sd.total_len - left;
1013 from.iov_offset = 0;
1014
1015 /* ... and iocb */
1016 init_sync_kiocb(&kiocb, out);
1017 kiocb.ki_pos = sd.pos;
1018 kiocb.ki_nbytes = sd.total_len - left;
1019
1020 /* now, send it */
1021 ret = out->f_op->write_iter(&kiocb, &from);
1022 if (-EIOCBQUEUED == ret)
1023 ret = wait_on_sync_kiocb(&kiocb);
1024
1025 if (ret <= 0)
1026 break;
1027
1028 sd.num_spliced += ret;
1029 sd.total_len -= ret;
1030 *ppos = sd.pos = kiocb.ki_pos;
1031
1032 /* dismiss the fully eaten buffers, adjust the partial one */
1033 while (ret) {
1034 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
1035 if (ret >= buf->len) {
1036 const struct pipe_buf_operations *ops = buf->ops;
1037 ret -= buf->len;
1038 buf->len = 0;
1039 buf->ops = NULL;
1040 ops->release(pipe, buf);
1041 pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
1042 pipe->nrbufs--;
1043 if (pipe->files)
1044 sd.need_wakeup = true;
1045 } else {
1046 buf->offset += ret;
1047 buf->len -= ret;
1048 ret = 0;
1049 }
1050 }
1051 }
1052done:
1053 kfree(array);
1032 splice_from_pipe_end(pipe, &sd); 1054 splice_from_pipe_end(pipe, &sd);
1033 1055
1034 pipe_unlock(pipe); 1056 pipe_unlock(pipe);
@@ -1036,21 +1058,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
1036 if (sd.num_spliced) 1058 if (sd.num_spliced)
1037 ret = sd.num_spliced; 1059 ret = sd.num_spliced;
1038 1060
1039 if (ret > 0) {
1040 int err;
1041
1042 err = generic_write_sync(out, *ppos, ret);
1043 if (err)
1044 ret = err;
1045 else
1046 *ppos += ret;
1047 balance_dirty_pages_ratelimited(mapping);
1048 }
1049
1050 return ret; 1061 return ret;
1051} 1062}
1052 1063
1053EXPORT_SYMBOL(generic_file_splice_write); 1064EXPORT_SYMBOL(iter_file_splice_write);
1054 1065
1055static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 1066static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
1056 struct splice_desc *sd) 1067 struct splice_desc *sd)
@@ -1549,7 +1560,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
1549 goto out; 1560 goto out;
1550 1561
1551 count = ret; 1562 count = ret;
1552 iov_iter_init(&iter, iov, nr_segs, count, 0); 1563 iov_iter_init(&iter, READ, iov, nr_segs, count);
1553 1564
1554 sd.len = 0; 1565 sd.len = 0;
1555 sd.total_len = count; 1566 sd.total_len = count;
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 9d4dc6831792..b00811c75b24 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -21,10 +21,10 @@
21 */ 21 */
22const struct file_operations sysv_file_operations = { 22const struct file_operations sysv_file_operations = {
23 .llseek = generic_file_llseek, 23 .llseek = generic_file_llseek,
24 .read = do_sync_read, 24 .read = new_sync_read,
25 .aio_read = generic_file_aio_read, 25 .read_iter = generic_file_read_iter,
26 .write = do_sync_write, 26 .write = new_sync_write,
27 .aio_write = generic_file_aio_write, 27 .write_iter = generic_file_write_iter,
28 .mmap = generic_file_mmap, 28 .mmap = generic_file_mmap,
29 .fsync = generic_file_fsync, 29 .fsync = generic_file_fsync,
30 .splice_read = generic_file_splice_read, 30 .splice_read = generic_file_splice_read,
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 0ab7f7dfb98b..b5b593c45270 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1364,17 +1364,17 @@ static inline int mctime_update_needed(const struct inode *inode,
1364 1364
1365/** 1365/**
1366 * update_ctime - update mtime and ctime of an inode. 1366 * update_ctime - update mtime and ctime of an inode.
1367 * @c: UBIFS file-system description object
1368 * @inode: inode to update 1367 * @inode: inode to update
1369 * 1368 *
1370 * This function updates mtime and ctime of the inode if it is not equivalent to 1369 * This function updates mtime and ctime of the inode if it is not equivalent to
1371 * current time. Returns zero in case of success and a negative error code in 1370 * current time. Returns zero in case of success and a negative error code in
1372 * case of failure. 1371 * case of failure.
1373 */ 1372 */
1374static int update_mctime(struct ubifs_info *c, struct inode *inode) 1373static int update_mctime(struct inode *inode)
1375{ 1374{
1376 struct timespec now = ubifs_current_time(inode); 1375 struct timespec now = ubifs_current_time(inode);
1377 struct ubifs_inode *ui = ubifs_inode(inode); 1376 struct ubifs_inode *ui = ubifs_inode(inode);
1377 struct ubifs_info *c = inode->i_sb->s_fs_info;
1378 1378
1379 if (mctime_update_needed(inode, &now)) { 1379 if (mctime_update_needed(inode, &now)) {
1380 int err, release; 1380 int err, release;
@@ -1397,18 +1397,13 @@ static int update_mctime(struct ubifs_info *c, struct inode *inode)
1397 return 0; 1397 return 0;
1398} 1398}
1399 1399
1400static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov, 1400static ssize_t ubifs_write_iter(struct kiocb *iocb, struct iov_iter *from)
1401 unsigned long nr_segs, loff_t pos)
1402{ 1401{
1403 int err; 1402 int err = update_mctime(file_inode(iocb->ki_filp));
1404 struct inode *inode = iocb->ki_filp->f_mapping->host;
1405 struct ubifs_info *c = inode->i_sb->s_fs_info;
1406
1407 err = update_mctime(c, inode);
1408 if (err) 1403 if (err)
1409 return err; 1404 return err;
1410 1405
1411 return generic_file_aio_write(iocb, iov, nr_segs, pos); 1406 return generic_file_write_iter(iocb, from);
1412} 1407}
1413 1408
1414static int ubifs_set_page_dirty(struct page *page) 1409static int ubifs_set_page_dirty(struct page *page)
@@ -1582,15 +1577,15 @@ const struct inode_operations ubifs_symlink_inode_operations = {
1582 1577
1583const struct file_operations ubifs_file_operations = { 1578const struct file_operations ubifs_file_operations = {
1584 .llseek = generic_file_llseek, 1579 .llseek = generic_file_llseek,
1585 .read = do_sync_read, 1580 .read = new_sync_read,
1586 .write = do_sync_write, 1581 .write = new_sync_write,
1587 .aio_read = generic_file_aio_read, 1582 .read_iter = generic_file_read_iter,
1588 .aio_write = ubifs_aio_write, 1583 .write_iter = ubifs_write_iter,
1589 .mmap = ubifs_file_mmap, 1584 .mmap = ubifs_file_mmap,
1590 .fsync = ubifs_fsync, 1585 .fsync = ubifs_fsync,
1591 .unlocked_ioctl = ubifs_ioctl, 1586 .unlocked_ioctl = ubifs_ioctl,
1592 .splice_read = generic_file_splice_read, 1587 .splice_read = generic_file_splice_read,
1593 .splice_write = generic_file_splice_write, 1588 .splice_write = iter_file_splice_write,
1594#ifdef CONFIG_COMPAT 1589#ifdef CONFIG_COMPAT
1595 .compat_ioctl = ubifs_compat_ioctl, 1590 .compat_ioctl = ubifs_compat_ioctl,
1596#endif 1591#endif
diff --git a/fs/udf/file.c b/fs/udf/file.c
index d2c170f8b035..d80738fdf424 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -119,8 +119,8 @@ static int udf_adinicb_write_end(struct file *file,
119} 119}
120 120
121static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb, 121static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb,
122 const struct iovec *iov, 122 struct iov_iter *iter,
123 loff_t offset, unsigned long nr_segs) 123 loff_t offset)
124{ 124{
125 /* Fallback to buffered I/O. */ 125 /* Fallback to buffered I/O. */
126 return 0; 126 return 0;
@@ -134,8 +134,7 @@ const struct address_space_operations udf_adinicb_aops = {
134 .direct_IO = udf_adinicb_direct_IO, 134 .direct_IO = udf_adinicb_direct_IO,
135}; 135};
136 136
137static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 137static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
138 unsigned long nr_segs, loff_t ppos)
139{ 138{
140 ssize_t retval; 139 ssize_t retval;
141 struct file *file = iocb->ki_filp; 140 struct file *file = iocb->ki_filp;
@@ -150,7 +149,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
150 if (file->f_flags & O_APPEND) 149 if (file->f_flags & O_APPEND)
151 pos = inode->i_size; 150 pos = inode->i_size;
152 else 151 else
153 pos = ppos; 152 pos = iocb->ki_pos;
154 153
155 if (inode->i_sb->s_blocksize < 154 if (inode->i_sb->s_blocksize <
156 (udf_file_entry_alloc_offset(inode) + 155 (udf_file_entry_alloc_offset(inode) +
@@ -171,7 +170,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
171 } else 170 } else
172 up_write(&iinfo->i_data_sem); 171 up_write(&iinfo->i_data_sem);
173 172
174 retval = __generic_file_aio_write(iocb, iov, nr_segs); 173 retval = __generic_file_write_iter(iocb, from);
175 mutex_unlock(&inode->i_mutex); 174 mutex_unlock(&inode->i_mutex);
176 175
177 if (retval > 0) { 176 if (retval > 0) {
@@ -252,13 +251,13 @@ static int udf_release_file(struct inode *inode, struct file *filp)
252} 251}
253 252
254const struct file_operations udf_file_operations = { 253const struct file_operations udf_file_operations = {
255 .read = do_sync_read, 254 .read = new_sync_read,
256 .aio_read = generic_file_aio_read, 255 .read_iter = generic_file_read_iter,
257 .unlocked_ioctl = udf_ioctl, 256 .unlocked_ioctl = udf_ioctl,
258 .open = generic_file_open, 257 .open = generic_file_open,
259 .mmap = generic_file_mmap, 258 .mmap = generic_file_mmap,
260 .write = do_sync_write, 259 .write = new_sync_write,
261 .aio_write = udf_file_aio_write, 260 .write_iter = udf_file_write_iter,
262 .release = udf_release_file, 261 .release = udf_release_file,
263 .fsync = generic_file_fsync, 262 .fsync = generic_file_fsync,
264 .splice_read = generic_file_splice_read, 263 .splice_read = generic_file_splice_read,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 5d643706212f..236cd48184c2 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -217,18 +217,18 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
217} 217}
218 218
219static ssize_t udf_direct_IO(int rw, struct kiocb *iocb, 219static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
220 const struct iovec *iov, 220 struct iov_iter *iter,
221 loff_t offset, unsigned long nr_segs) 221 loff_t offset)
222{ 222{
223 struct file *file = iocb->ki_filp; 223 struct file *file = iocb->ki_filp;
224 struct address_space *mapping = file->f_mapping; 224 struct address_space *mapping = file->f_mapping;
225 struct inode *inode = mapping->host; 225 struct inode *inode = mapping->host;
226 size_t count = iov_iter_count(iter);
226 ssize_t ret; 227 ssize_t ret;
227 228
228 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 229 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, udf_get_block);
229 udf_get_block);
230 if (unlikely(ret < 0 && (rw & WRITE))) 230 if (unlikely(ret < 0 && (rw & WRITE)))
231 udf_write_failed(mapping, offset + iov_length(iov, nr_segs)); 231 udf_write_failed(mapping, offset + count);
232 return ret; 232 return ret;
233} 233}
234 234
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 33afa20d4509..c84ec010a676 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -35,10 +35,10 @@
35 35
36const struct file_operations ufs_file_operations = { 36const struct file_operations ufs_file_operations = {
37 .llseek = generic_file_llseek, 37 .llseek = generic_file_llseek,
38 .read = do_sync_read, 38 .read = new_sync_read,
39 .aio_read = generic_file_aio_read, 39 .read_iter = generic_file_read_iter,
40 .write = do_sync_write, 40 .write = new_sync_write,
41 .aio_write = generic_file_aio_write, 41 .write_iter = generic_file_write_iter,
42 .mmap = generic_file_mmap, 42 .mmap = generic_file_mmap,
43 .open = generic_file_open, 43 .open = generic_file_open,
44 .fsync = generic_file_fsync, 44 .fsync = generic_file_fsync,
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index e32640eedea6..faaf716e2080 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1486,9 +1486,8 @@ STATIC ssize_t
1486xfs_vm_direct_IO( 1486xfs_vm_direct_IO(
1487 int rw, 1487 int rw,
1488 struct kiocb *iocb, 1488 struct kiocb *iocb,
1489 const struct iovec *iov, 1489 struct iov_iter *iter,
1490 loff_t offset, 1490 loff_t offset)
1491 unsigned long nr_segs)
1492{ 1491{
1493 struct inode *inode = iocb->ki_filp->f_mapping->host; 1492 struct inode *inode = iocb->ki_filp->f_mapping->host;
1494 struct block_device *bdev = xfs_find_bdev_for_inode(inode); 1493 struct block_device *bdev = xfs_find_bdev_for_inode(inode);
@@ -1496,7 +1495,7 @@ xfs_vm_direct_IO(
1496 ssize_t ret; 1495 ssize_t ret;
1497 1496
1498 if (rw & WRITE) { 1497 if (rw & WRITE) {
1499 size_t size = iov_length(iov, nr_segs); 1498 size_t size = iov_iter_count(iter);
1500 1499
1501 /* 1500 /*
1502 * We cannot preallocate a size update transaction here as we 1501 * We cannot preallocate a size update transaction here as we
@@ -1508,17 +1507,15 @@ xfs_vm_direct_IO(
1508 if (offset + size > XFS_I(inode)->i_d.di_size) 1507 if (offset + size > XFS_I(inode)->i_d.di_size)
1509 ioend->io_isdirect = 1; 1508 ioend->io_isdirect = 1;
1510 1509
1511 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1510 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
1512 offset, nr_segs, 1511 offset, xfs_get_blocks_direct,
1513 xfs_get_blocks_direct,
1514 xfs_end_io_direct_write, NULL, 1512 xfs_end_io_direct_write, NULL,
1515 DIO_ASYNC_EXTEND); 1513 DIO_ASYNC_EXTEND);
1516 if (ret != -EIOCBQUEUED && iocb->private) 1514 if (ret != -EIOCBQUEUED && iocb->private)
1517 goto out_destroy_ioend; 1515 goto out_destroy_ioend;
1518 } else { 1516 } else {
1519 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1517 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
1520 offset, nr_segs, 1518 offset, xfs_get_blocks_direct,
1521 xfs_get_blocks_direct,
1522 NULL, NULL, 0); 1519 NULL, NULL, 0);
1523 } 1520 }
1524 1521
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 1b8160dc04d1..1f66779d7a46 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -229,34 +229,27 @@ xfs_file_fsync(
229} 229}
230 230
231STATIC ssize_t 231STATIC ssize_t
232xfs_file_aio_read( 232xfs_file_read_iter(
233 struct kiocb *iocb, 233 struct kiocb *iocb,
234 const struct iovec *iovp, 234 struct iov_iter *to)
235 unsigned long nr_segs,
236 loff_t pos)
237{ 235{
238 struct file *file = iocb->ki_filp; 236 struct file *file = iocb->ki_filp;
239 struct inode *inode = file->f_mapping->host; 237 struct inode *inode = file->f_mapping->host;
240 struct xfs_inode *ip = XFS_I(inode); 238 struct xfs_inode *ip = XFS_I(inode);
241 struct xfs_mount *mp = ip->i_mount; 239 struct xfs_mount *mp = ip->i_mount;
242 size_t size = 0; 240 size_t size = iov_iter_count(to);
243 ssize_t ret = 0; 241 ssize_t ret = 0;
244 int ioflags = 0; 242 int ioflags = 0;
245 xfs_fsize_t n; 243 xfs_fsize_t n;
244 loff_t pos = iocb->ki_pos;
246 245
247 XFS_STATS_INC(xs_read_calls); 246 XFS_STATS_INC(xs_read_calls);
248 247
249 BUG_ON(iocb->ki_pos != pos);
250
251 if (unlikely(file->f_flags & O_DIRECT)) 248 if (unlikely(file->f_flags & O_DIRECT))
252 ioflags |= IO_ISDIRECT; 249 ioflags |= IO_ISDIRECT;
253 if (file->f_mode & FMODE_NOCMTIME) 250 if (file->f_mode & FMODE_NOCMTIME)
254 ioflags |= IO_INVIS; 251 ioflags |= IO_INVIS;
255 252
256 ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE);
257 if (ret < 0)
258 return ret;
259
260 if (unlikely(ioflags & IO_ISDIRECT)) { 253 if (unlikely(ioflags & IO_ISDIRECT)) {
261 xfs_buftarg_t *target = 254 xfs_buftarg_t *target =
262 XFS_IS_REALTIME_INODE(ip) ? 255 XFS_IS_REALTIME_INODE(ip) ?
@@ -309,7 +302,7 @@ xfs_file_aio_read(
309 302
310 trace_xfs_file_read(ip, size, pos, ioflags); 303 trace_xfs_file_read(ip, size, pos, ioflags);
311 304
312 ret = generic_file_aio_read(iocb, iovp, nr_segs, pos); 305 ret = generic_file_read_iter(iocb, to);
313 if (ret > 0) 306 if (ret > 0)
314 XFS_STATS_ADD(xs_read_bytes, ret); 307 XFS_STATS_ADD(xs_read_bytes, ret);
315 308
@@ -350,47 +343,6 @@ xfs_file_splice_read(
350} 343}
351 344
352/* 345/*
353 * xfs_file_splice_write() does not use xfs_rw_ilock() because
354 * generic_file_splice_write() takes the i_mutex itself. This, in theory,
355 * couuld cause lock inversions between the aio_write path and the splice path
356 * if someone is doing concurrent splice(2) based writes and write(2) based
357 * writes to the same inode. The only real way to fix this is to re-implement
358 * the generic code here with correct locking orders.
359 */
360STATIC ssize_t
361xfs_file_splice_write(
362 struct pipe_inode_info *pipe,
363 struct file *outfilp,
364 loff_t *ppos,
365 size_t count,
366 unsigned int flags)
367{
368 struct inode *inode = outfilp->f_mapping->host;
369 struct xfs_inode *ip = XFS_I(inode);
370 int ioflags = 0;
371 ssize_t ret;
372
373 XFS_STATS_INC(xs_write_calls);
374
375 if (outfilp->f_mode & FMODE_NOCMTIME)
376 ioflags |= IO_INVIS;
377
378 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
379 return -EIO;
380
381 xfs_ilock(ip, XFS_IOLOCK_EXCL);
382
383 trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
384
385 ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
386 if (ret > 0)
387 XFS_STATS_ADD(xs_write_bytes, ret);
388
389 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
390 return ret;
391}
392
393/*
394 * This routine is called to handle zeroing any space in the last block of the 346 * This routine is called to handle zeroing any space in the last block of the
395 * file that is beyond the EOF. We do this since the size is being increased 347 * file that is beyond the EOF. We do this since the size is being increased
396 * without writing anything to that block and we don't want to read the 348 * without writing anything to that block and we don't want to read the
@@ -625,10 +577,7 @@ restart:
625STATIC ssize_t 577STATIC ssize_t
626xfs_file_dio_aio_write( 578xfs_file_dio_aio_write(
627 struct kiocb *iocb, 579 struct kiocb *iocb,
628 const struct iovec *iovp, 580 struct iov_iter *from)
629 unsigned long nr_segs,
630 loff_t pos,
631 size_t ocount)
632{ 581{
633 struct file *file = iocb->ki_filp; 582 struct file *file = iocb->ki_filp;
634 struct address_space *mapping = file->f_mapping; 583 struct address_space *mapping = file->f_mapping;
@@ -636,9 +585,10 @@ xfs_file_dio_aio_write(
636 struct xfs_inode *ip = XFS_I(inode); 585 struct xfs_inode *ip = XFS_I(inode);
637 struct xfs_mount *mp = ip->i_mount; 586 struct xfs_mount *mp = ip->i_mount;
638 ssize_t ret = 0; 587 ssize_t ret = 0;
639 size_t count = ocount;
640 int unaligned_io = 0; 588 int unaligned_io = 0;
641 int iolock; 589 int iolock;
590 size_t count = iov_iter_count(from);
591 loff_t pos = iocb->ki_pos;
642 struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? 592 struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
643 mp->m_rtdev_targp : mp->m_ddev_targp; 593 mp->m_rtdev_targp : mp->m_ddev_targp;
644 594
@@ -677,6 +627,7 @@ xfs_file_dio_aio_write(
677 ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); 627 ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
678 if (ret) 628 if (ret)
679 goto out; 629 goto out;
630 iov_iter_truncate(from, count);
680 631
681 if (mapping->nrpages) { 632 if (mapping->nrpages) {
682 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 633 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
@@ -698,8 +649,7 @@ xfs_file_dio_aio_write(
698 } 649 }
699 650
700 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); 651 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
701 ret = generic_file_direct_write(iocb, iovp, 652 ret = generic_file_direct_write(iocb, from, pos);
702 &nr_segs, pos, count, ocount);
703 653
704out: 654out:
705 xfs_rw_iunlock(ip, iolock); 655 xfs_rw_iunlock(ip, iolock);
@@ -712,10 +662,7 @@ out:
712STATIC ssize_t 662STATIC ssize_t
713xfs_file_buffered_aio_write( 663xfs_file_buffered_aio_write(
714 struct kiocb *iocb, 664 struct kiocb *iocb,
715 const struct iovec *iovp, 665 struct iov_iter *from)
716 unsigned long nr_segs,
717 loff_t pos,
718 size_t count)
719{ 666{
720 struct file *file = iocb->ki_filp; 667 struct file *file = iocb->ki_filp;
721 struct address_space *mapping = file->f_mapping; 668 struct address_space *mapping = file->f_mapping;
@@ -724,7 +671,8 @@ xfs_file_buffered_aio_write(
724 ssize_t ret; 671 ssize_t ret;
725 int enospc = 0; 672 int enospc = 0;
726 int iolock = XFS_IOLOCK_EXCL; 673 int iolock = XFS_IOLOCK_EXCL;
727 struct iov_iter from; 674 loff_t pos = iocb->ki_pos;
675 size_t count = iov_iter_count(from);
728 676
729 xfs_rw_ilock(ip, iolock); 677 xfs_rw_ilock(ip, iolock);
730 678
@@ -732,13 +680,13 @@ xfs_file_buffered_aio_write(
732 if (ret) 680 if (ret)
733 goto out; 681 goto out;
734 682
735 iov_iter_init(&from, iovp, nr_segs, count, 0); 683 iov_iter_truncate(from, count);
736 /* We can write back this queue in page reclaim */ 684 /* We can write back this queue in page reclaim */
737 current->backing_dev_info = mapping->backing_dev_info; 685 current->backing_dev_info = mapping->backing_dev_info;
738 686
739write_retry: 687write_retry:
740 trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); 688 trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
741 ret = generic_perform_write(file, &from, pos); 689 ret = generic_perform_write(file, from, pos);
742 if (likely(ret >= 0)) 690 if (likely(ret >= 0))
743 iocb->ki_pos = pos + ret; 691 iocb->ki_pos = pos + ret;
744 /* 692 /*
@@ -759,40 +707,29 @@ out:
759} 707}
760 708
761STATIC ssize_t 709STATIC ssize_t
762xfs_file_aio_write( 710xfs_file_write_iter(
763 struct kiocb *iocb, 711 struct kiocb *iocb,
764 const struct iovec *iovp, 712 struct iov_iter *from)
765 unsigned long nr_segs,
766 loff_t pos)
767{ 713{
768 struct file *file = iocb->ki_filp; 714 struct file *file = iocb->ki_filp;
769 struct address_space *mapping = file->f_mapping; 715 struct address_space *mapping = file->f_mapping;
770 struct inode *inode = mapping->host; 716 struct inode *inode = mapping->host;
771 struct xfs_inode *ip = XFS_I(inode); 717 struct xfs_inode *ip = XFS_I(inode);
772 ssize_t ret; 718 ssize_t ret;
773 size_t ocount = 0; 719 size_t ocount = iov_iter_count(from);
774 720
775 XFS_STATS_INC(xs_write_calls); 721 XFS_STATS_INC(xs_write_calls);
776 722
777 BUG_ON(iocb->ki_pos != pos);
778
779 ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
780 if (ret)
781 return ret;
782
783 if (ocount == 0) 723 if (ocount == 0)
784 return 0; 724 return 0;
785 725
786 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 726 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
787 ret = -EIO; 727 return -EIO;
788 goto out;
789 }
790 728
791 if (unlikely(file->f_flags & O_DIRECT)) 729 if (unlikely(file->f_flags & O_DIRECT))
792 ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount); 730 ret = xfs_file_dio_aio_write(iocb, from);
793 else 731 else
794 ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, 732 ret = xfs_file_buffered_aio_write(iocb, from);
795 ocount);
796 733
797 if (ret > 0) { 734 if (ret > 0) {
798 ssize_t err; 735 ssize_t err;
@@ -804,8 +741,6 @@ xfs_file_aio_write(
804 if (err < 0) 741 if (err < 0)
805 ret = err; 742 ret = err;
806 } 743 }
807
808out:
809 return ret; 744 return ret;
810} 745}
811 746
@@ -1461,12 +1396,12 @@ xfs_file_llseek(
1461 1396
1462const struct file_operations xfs_file_operations = { 1397const struct file_operations xfs_file_operations = {
1463 .llseek = xfs_file_llseek, 1398 .llseek = xfs_file_llseek,
1464 .read = do_sync_read, 1399 .read = new_sync_read,
1465 .write = do_sync_write, 1400 .write = new_sync_write,
1466 .aio_read = xfs_file_aio_read, 1401 .read_iter = xfs_file_read_iter,
1467 .aio_write = xfs_file_aio_write, 1402 .write_iter = xfs_file_write_iter,
1468 .splice_read = xfs_file_splice_read, 1403 .splice_read = xfs_file_splice_read,
1469 .splice_write = xfs_file_splice_write, 1404 .splice_write = iter_file_splice_write,
1470 .unlocked_ioctl = xfs_file_ioctl, 1405 .unlocked_ioctl = xfs_file_ioctl,
1471#ifdef CONFIG_COMPAT 1406#ifdef CONFIG_COMPAT
1472 .compat_ioctl = xfs_file_compat_ioctl, 1407 .compat_ioctl = xfs_file_compat_ioctl,
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 6910458915cf..152f82782630 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1118,7 +1118,6 @@ DEFINE_RW_EVENT(xfs_file_read);
1118DEFINE_RW_EVENT(xfs_file_buffered_write); 1118DEFINE_RW_EVENT(xfs_file_buffered_write);
1119DEFINE_RW_EVENT(xfs_file_direct_write); 1119DEFINE_RW_EVENT(xfs_file_direct_write);
1120DEFINE_RW_EVENT(xfs_file_splice_read); 1120DEFINE_RW_EVENT(xfs_file_splice_read);
1121DEFINE_RW_EVENT(xfs_file_splice_write);
1122 1121
1123DECLARE_EVENT_CLASS(xfs_page_class, 1122DECLARE_EVENT_CLASS(xfs_page_class,
1124 TP_PROTO(struct inode *inode, struct page *page, unsigned long off, 1123 TP_PROTO(struct inode *inode, struct page *page, unsigned long off,