aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-01-12 19:30:34 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-12 19:30:34 -0500
commitfce205e9da8e063aa1cf3d6583c1a9ed2b82f3f0 (patch)
tree1c0ae581b71a2737e2bc2d94536f1e39266f3638
parent065019a38feab5f2659cbd44080d528f8dff0b00 (diff)
parent2b3909f8a7fe94e0234850aa9d120cca15b6e1f7 (diff)
Merge branch 'work.copy_file_range' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs copy_file_range updates from Al Viro: "Several series around copy_file_range/CLONE" * 'work.copy_file_range' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: btrfs: use new dedupe data function pointer vfs: hoist the btrfs deduplication ioctl to the vfs vfs: wire up compat ioctl for CLONE/CLONE_RANGE cifs: avoid unused variable and label nfsd: implement the NFSv4.2 CLONE operation nfsd: Pass filehandle to nfs4_preprocess_stateid_op() vfs: pull btrfs clone API to vfs layer locks: new locks_mandatory_area calling convention vfs: Add vfs_copy_file_range() support for pagecache copies btrfs: add .copy_file_range file operation x86: add sys_copy_file_range to syscall tables vfs: add copy_file_range syscall and vfs helper
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--fs/btrfs/ctree.h8
-rw-r--r--fs/btrfs/file.c3
-rw-r--r--fs/btrfs/ioctl.c186
-rw-r--r--fs/cifs/cifsfs.c61
-rw-r--r--fs/cifs/cifsfs.h1
-rw-r--r--fs/cifs/ioctl.c126
-rw-r--r--fs/compat_ioctl.c5
-rw-r--r--fs/ioctl.c67
-rw-r--r--fs/locks.c22
-rw-r--r--fs/nfs/nfs4file.c87
-rw-r--r--fs/nfsd/nfs4proc.c63
-rw-r--r--fs/nfsd/nfs4state.c5
-rw-r--r--fs/nfsd/nfs4xdr.c21
-rw-r--r--fs/nfsd/state.h4
-rw-r--r--fs/nfsd/vfs.c8
-rw-r--r--fs/nfsd/vfs.h2
-rw-r--r--fs/nfsd/xdr4.h10
-rw-r--r--fs/read_write.c302
-rw-r--r--include/linux/fs.h42
-rw-r--r--include/linux/nfs4.h4
-rw-r--r--include/linux/syscalls.h3
-rw-r--r--include/uapi/asm-generic/unistd.h4
-rw-r--r--include/uapi/linux/fs.h39
-rw-r--r--kernel/sys_ni.c1
26 files changed, 733 insertions, 343 deletions
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index f17705e1332c..cb713df81180 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -383,3 +383,4 @@
383374 i386 userfaultfd sys_userfaultfd 383374 i386 userfaultfd sys_userfaultfd
384375 i386 membarrier sys_membarrier 384375 i386 membarrier sys_membarrier
385376 i386 mlock2 sys_mlock2 385376 i386 mlock2 sys_mlock2
386377 i386 copy_file_range sys_copy_file_range
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 314a90bfc09c..dc1040a50bdc 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -332,6 +332,7 @@
332323 common userfaultfd sys_userfaultfd 332323 common userfaultfd sys_userfaultfd
333324 common membarrier sys_membarrier 333324 common membarrier sys_membarrier
334325 common mlock2 sys_mlock2 334325 common mlock2 sys_mlock2
335326 common copy_file_range sys_copy_file_range
335 336
336# 337#
337# x32-specific system call numbers start at 512 to avoid cache impact 338# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 35489e7129a7..b7e4e344e8e0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -4024,7 +4024,8 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
4024 struct btrfs_ioctl_space_info *space); 4024 struct btrfs_ioctl_space_info *space);
4025void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, 4025void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
4026 struct btrfs_ioctl_balance_args *bargs); 4026 struct btrfs_ioctl_balance_args *bargs);
4027 4027ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
4028 struct file *dst_file, u64 dst_loff);
4028 4029
4029/* file.c */ 4030/* file.c */
4030int btrfs_auto_defrag_init(void); 4031int btrfs_auto_defrag_init(void);
@@ -4055,6 +4056,11 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
4055 loff_t pos, size_t write_bytes, 4056 loff_t pos, size_t write_bytes,
4056 struct extent_state **cached); 4057 struct extent_state **cached);
4057int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end); 4058int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
4059ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
4060 struct file *file_out, loff_t pos_out,
4061 size_t len, unsigned int flags);
4062int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
4063 struct file *file_out, loff_t pos_out, u64 len);
4058 4064
4059/* tree-defrag.c */ 4065/* tree-defrag.c */
4060int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, 4066int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0f09526aa7d9..e3d9022bfd4e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2934,6 +2934,9 @@ const struct file_operations btrfs_file_operations = {
2934#ifdef CONFIG_COMPAT 2934#ifdef CONFIG_COMPAT
2935 .compat_ioctl = btrfs_ioctl, 2935 .compat_ioctl = btrfs_ioctl,
2936#endif 2936#endif
2937 .copy_file_range = btrfs_copy_file_range,
2938 .clone_file_range = btrfs_clone_file_range,
2939 .dedupe_file_range = btrfs_dedupe_file_range,
2937}; 2940};
2938 2941
2939void btrfs_auto_defrag_exit(void) 2942void btrfs_auto_defrag_exit(void)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index da94138eb85e..e21997385d14 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2962,7 +2962,7 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
2962 flush_dcache_page(dst_page); 2962 flush_dcache_page(dst_page);
2963 2963
2964 if (memcmp(addr, dst_addr, cmp_len)) 2964 if (memcmp(addr, dst_addr, cmp_len))
2965 ret = BTRFS_SAME_DATA_DIFFERS; 2965 ret = -EBADE;
2966 2966
2967 kunmap_atomic(addr); 2967 kunmap_atomic(addr);
2968 kunmap_atomic(dst_addr); 2968 kunmap_atomic(dst_addr);
@@ -3098,53 +3098,16 @@ out_unlock:
3098 3098
3099#define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024) 3099#define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024)
3100 3100
3101static long btrfs_ioctl_file_extent_same(struct file *file, 3101ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
3102 struct btrfs_ioctl_same_args __user *argp) 3102 struct file *dst_file, u64 dst_loff)
3103{ 3103{
3104 struct btrfs_ioctl_same_args *same = NULL; 3104 struct inode *src = file_inode(src_file);
3105 struct btrfs_ioctl_same_extent_info *info; 3105 struct inode *dst = file_inode(dst_file);
3106 struct inode *src = file_inode(file);
3107 u64 off;
3108 u64 len;
3109 int i;
3110 int ret;
3111 unsigned long size;
3112 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; 3106 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
3113 bool is_admin = capable(CAP_SYS_ADMIN); 3107 ssize_t res;
3114 u16 count;
3115
3116 if (!(file->f_mode & FMODE_READ))
3117 return -EINVAL;
3118
3119 ret = mnt_want_write_file(file);
3120 if (ret)
3121 return ret;
3122
3123 if (get_user(count, &argp->dest_count)) {
3124 ret = -EFAULT;
3125 goto out;
3126 }
3127
3128 size = offsetof(struct btrfs_ioctl_same_args __user, info[count]);
3129
3130 same = memdup_user(argp, size);
3131
3132 if (IS_ERR(same)) {
3133 ret = PTR_ERR(same);
3134 same = NULL;
3135 goto out;
3136 }
3137 3108
3138 off = same->logical_offset; 3109 if (olen > BTRFS_MAX_DEDUPE_LEN)
3139 len = same->length; 3110 olen = BTRFS_MAX_DEDUPE_LEN;
3140
3141 /*
3142 * Limit the total length we will dedupe for each operation.
3143 * This is intended to bound the total time spent in this
3144 * ioctl to something sane.
3145 */
3146 if (len > BTRFS_MAX_DEDUPE_LEN)
3147 len = BTRFS_MAX_DEDUPE_LEN;
3148 3111
3149 if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) { 3112 if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) {
3150 /* 3113 /*
@@ -3152,58 +3115,13 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
3152 * result, btrfs_cmp_data() won't correctly handle 3115 * result, btrfs_cmp_data() won't correctly handle
3153 * this situation without an update. 3116 * this situation without an update.
3154 */ 3117 */
3155 ret = -EINVAL; 3118 return -EINVAL;
3156 goto out;
3157 }
3158
3159 ret = -EISDIR;
3160 if (S_ISDIR(src->i_mode))
3161 goto out;
3162
3163 ret = -EACCES;
3164 if (!S_ISREG(src->i_mode))
3165 goto out;
3166
3167 /* pre-format output fields to sane values */
3168 for (i = 0; i < count; i++) {
3169 same->info[i].bytes_deduped = 0ULL;
3170 same->info[i].status = 0;
3171 }
3172
3173 for (i = 0, info = same->info; i < count; i++, info++) {
3174 struct inode *dst;
3175 struct fd dst_file = fdget(info->fd);
3176 if (!dst_file.file) {
3177 info->status = -EBADF;
3178 continue;
3179 }
3180 dst = file_inode(dst_file.file);
3181
3182 if (!(is_admin || (dst_file.file->f_mode & FMODE_WRITE))) {
3183 info->status = -EINVAL;
3184 } else if (file->f_path.mnt != dst_file.file->f_path.mnt) {
3185 info->status = -EXDEV;
3186 } else if (S_ISDIR(dst->i_mode)) {
3187 info->status = -EISDIR;
3188 } else if (!S_ISREG(dst->i_mode)) {
3189 info->status = -EACCES;
3190 } else {
3191 info->status = btrfs_extent_same(src, off, len, dst,
3192 info->logical_offset);
3193 if (info->status == 0)
3194 info->bytes_deduped += len;
3195 }
3196 fdput(dst_file);
3197 } 3119 }
3198 3120
3199 ret = copy_to_user(argp, same, size); 3121 res = btrfs_extent_same(src, loff, olen, dst, dst_loff);
3200 if (ret) 3122 if (res)
3201 ret = -EFAULT; 3123 return res;
3202 3124 return olen;
3203out:
3204 mnt_drop_write_file(file);
3205 kfree(same);
3206 return ret;
3207} 3125}
3208 3126
3209static int clone_finish_inode_update(struct btrfs_trans_handle *trans, 3127static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
@@ -3779,17 +3697,16 @@ out:
3779 return ret; 3697 return ret;
3780} 3698}
3781 3699
3782static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, 3700static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
3783 u64 off, u64 olen, u64 destoff) 3701 u64 off, u64 olen, u64 destoff)
3784{ 3702{
3785 struct inode *inode = file_inode(file); 3703 struct inode *inode = file_inode(file);
3704 struct inode *src = file_inode(file_src);
3786 struct btrfs_root *root = BTRFS_I(inode)->root; 3705 struct btrfs_root *root = BTRFS_I(inode)->root;
3787 struct fd src_file;
3788 struct inode *src;
3789 int ret; 3706 int ret;
3790 u64 len = olen; 3707 u64 len = olen;
3791 u64 bs = root->fs_info->sb->s_blocksize; 3708 u64 bs = root->fs_info->sb->s_blocksize;
3792 int same_inode = 0; 3709 int same_inode = src == inode;
3793 3710
3794 /* 3711 /*
3795 * TODO: 3712 * TODO:
@@ -3802,49 +3719,20 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
3802 * be either compressed or non-compressed. 3719 * be either compressed or non-compressed.
3803 */ 3720 */
3804 3721
3805 /* the destination must be opened for writing */
3806 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
3807 return -EINVAL;
3808
3809 if (btrfs_root_readonly(root)) 3722 if (btrfs_root_readonly(root))
3810 return -EROFS; 3723 return -EROFS;
3811 3724
3812 ret = mnt_want_write_file(file); 3725 if (file_src->f_path.mnt != file->f_path.mnt ||
3813 if (ret) 3726 src->i_sb != inode->i_sb)
3814 return ret; 3727 return -EXDEV;
3815
3816 src_file = fdget(srcfd);
3817 if (!src_file.file) {
3818 ret = -EBADF;
3819 goto out_drop_write;
3820 }
3821
3822 ret = -EXDEV;
3823 if (src_file.file->f_path.mnt != file->f_path.mnt)
3824 goto out_fput;
3825
3826 src = file_inode(src_file.file);
3827
3828 ret = -EINVAL;
3829 if (src == inode)
3830 same_inode = 1;
3831
3832 /* the src must be open for reading */
3833 if (!(src_file.file->f_mode & FMODE_READ))
3834 goto out_fput;
3835 3728
3836 /* don't make the dst file partly checksummed */ 3729 /* don't make the dst file partly checksummed */
3837 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != 3730 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
3838 (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) 3731 (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
3839 goto out_fput; 3732 return -EINVAL;
3840 3733
3841 ret = -EISDIR;
3842 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) 3734 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
3843 goto out_fput; 3735 return -EISDIR;
3844
3845 ret = -EXDEV;
3846 if (src->i_sb != inode->i_sb)
3847 goto out_fput;
3848 3736
3849 if (!same_inode) { 3737 if (!same_inode) {
3850 btrfs_double_inode_lock(src, inode); 3738 btrfs_double_inode_lock(src, inode);
@@ -3921,21 +3809,25 @@ out_unlock:
3921 btrfs_double_inode_unlock(src, inode); 3809 btrfs_double_inode_unlock(src, inode);
3922 else 3810 else
3923 mutex_unlock(&src->i_mutex); 3811 mutex_unlock(&src->i_mutex);
3924out_fput:
3925 fdput(src_file);
3926out_drop_write:
3927 mnt_drop_write_file(file);
3928 return ret; 3812 return ret;
3929} 3813}
3930 3814
3931static long btrfs_ioctl_clone_range(struct file *file, void __user *argp) 3815ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
3816 struct file *file_out, loff_t pos_out,
3817 size_t len, unsigned int flags)
3932{ 3818{
3933 struct btrfs_ioctl_clone_range_args args; 3819 ssize_t ret;
3934 3820
3935 if (copy_from_user(&args, argp, sizeof(args))) 3821 ret = btrfs_clone_files(file_out, file_in, pos_in, len, pos_out);
3936 return -EFAULT; 3822 if (ret == 0)
3937 return btrfs_ioctl_clone(file, args.src_fd, args.src_offset, 3823 ret = len;
3938 args.src_length, args.dest_offset); 3824 return ret;
3825}
3826
3827int btrfs_clone_file_range(struct file *src_file, loff_t off,
3828 struct file *dst_file, loff_t destoff, u64 len)
3829{
3830 return btrfs_clone_files(dst_file, src_file, off, len, destoff);
3939} 3831}
3940 3832
3941/* 3833/*
@@ -5485,10 +5377,6 @@ long btrfs_ioctl(struct file *file, unsigned int
5485 return btrfs_ioctl_dev_info(root, argp); 5377 return btrfs_ioctl_dev_info(root, argp);
5486 case BTRFS_IOC_BALANCE: 5378 case BTRFS_IOC_BALANCE:
5487 return btrfs_ioctl_balance(file, NULL); 5379 return btrfs_ioctl_balance(file, NULL);
5488 case BTRFS_IOC_CLONE:
5489 return btrfs_ioctl_clone(file, arg, 0, 0, 0);
5490 case BTRFS_IOC_CLONE_RANGE:
5491 return btrfs_ioctl_clone_range(file, argp);
5492 case BTRFS_IOC_TRANS_START: 5380 case BTRFS_IOC_TRANS_START:
5493 return btrfs_ioctl_trans_start(file); 5381 return btrfs_ioctl_trans_start(file);
5494 case BTRFS_IOC_TRANS_END: 5382 case BTRFS_IOC_TRANS_END:
@@ -5566,8 +5454,6 @@ long btrfs_ioctl(struct file *file, unsigned int
5566 return btrfs_ioctl_get_fslabel(file, argp); 5454 return btrfs_ioctl_get_fslabel(file, argp);
5567 case BTRFS_IOC_SET_FSLABEL: 5455 case BTRFS_IOC_SET_FSLABEL:
5568 return btrfs_ioctl_set_fslabel(file, argp); 5456 return btrfs_ioctl_set_fslabel(file, argp);
5569 case BTRFS_IOC_FILE_EXTENT_SAME:
5570 return btrfs_ioctl_file_extent_same(file, argp);
5571 case BTRFS_IOC_GET_SUPPORTED_FEATURES: 5457 case BTRFS_IOC_GET_SUPPORTED_FEATURES:
5572 return btrfs_ioctl_get_supported_features(file, argp); 5458 return btrfs_ioctl_get_supported_features(file, argp);
5573 case BTRFS_IOC_GET_FEATURES: 5459 case BTRFS_IOC_GET_FEATURES:
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 90e4e2b398b6..b7fcb3151103 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -913,6 +913,59 @@ const struct inode_operations cifs_symlink_inode_ops = {
913#endif 913#endif
914}; 914};
915 915
916static int cifs_clone_file_range(struct file *src_file, loff_t off,
917 struct file *dst_file, loff_t destoff, u64 len)
918{
919 struct inode *src_inode = file_inode(src_file);
920 struct inode *target_inode = file_inode(dst_file);
921 struct cifsFileInfo *smb_file_src = src_file->private_data;
922 struct cifsFileInfo *smb_file_target = dst_file->private_data;
923 struct cifs_tcon *target_tcon = tlink_tcon(smb_file_target->tlink);
924 unsigned int xid;
925 int rc;
926
927 cifs_dbg(FYI, "clone range\n");
928
929 xid = get_xid();
930
931 if (!src_file->private_data || !dst_file->private_data) {
932 rc = -EBADF;
933 cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
934 goto out;
935 }
936
937 /*
938 * Note: cifs case is easier than btrfs since server responsible for
939 * checks for proper open modes and file type and if it wants
940 * server could even support copy of range where source = target
941 */
942 lock_two_nondirectories(target_inode, src_inode);
943
944 if (len == 0)
945 len = src_inode->i_size - off;
946
947 cifs_dbg(FYI, "about to flush pages\n");
948 /* should we flush first and last page first */
949 truncate_inode_pages_range(&target_inode->i_data, destoff,
950 PAGE_CACHE_ALIGN(destoff + len)-1);
951
952 if (target_tcon->ses->server->ops->duplicate_extents)
953 rc = target_tcon->ses->server->ops->duplicate_extents(xid,
954 smb_file_src, smb_file_target, off, len, destoff);
955 else
956 rc = -EOPNOTSUPP;
957
958 /* force revalidate of size and timestamps of target file now
959 that target is updated on the server */
960 CIFS_I(target_inode)->time = 0;
961 /* although unlocking in the reverse order from locking is not
962 strictly necessary here it is a little cleaner to be consistent */
963 unlock_two_nondirectories(src_inode, target_inode);
964out:
965 free_xid(xid);
966 return rc;
967}
968
916const struct file_operations cifs_file_ops = { 969const struct file_operations cifs_file_ops = {
917 .read_iter = cifs_loose_read_iter, 970 .read_iter = cifs_loose_read_iter,
918 .write_iter = cifs_file_write_iter, 971 .write_iter = cifs_file_write_iter,
@@ -925,6 +978,7 @@ const struct file_operations cifs_file_ops = {
925 .splice_read = generic_file_splice_read, 978 .splice_read = generic_file_splice_read,
926 .llseek = cifs_llseek, 979 .llseek = cifs_llseek,
927 .unlocked_ioctl = cifs_ioctl, 980 .unlocked_ioctl = cifs_ioctl,
981 .clone_file_range = cifs_clone_file_range,
928 .setlease = cifs_setlease, 982 .setlease = cifs_setlease,
929 .fallocate = cifs_fallocate, 983 .fallocate = cifs_fallocate,
930}; 984};
@@ -941,6 +995,8 @@ const struct file_operations cifs_file_strict_ops = {
941 .splice_read = generic_file_splice_read, 995 .splice_read = generic_file_splice_read,
942 .llseek = cifs_llseek, 996 .llseek = cifs_llseek,
943 .unlocked_ioctl = cifs_ioctl, 997 .unlocked_ioctl = cifs_ioctl,
998 .clone_file_range = cifs_clone_file_range,
999 .clone_file_range = cifs_clone_file_range,
944 .setlease = cifs_setlease, 1000 .setlease = cifs_setlease,
945 .fallocate = cifs_fallocate, 1001 .fallocate = cifs_fallocate,
946}; 1002};
@@ -957,6 +1013,7 @@ const struct file_operations cifs_file_direct_ops = {
957 .mmap = cifs_file_mmap, 1013 .mmap = cifs_file_mmap,
958 .splice_read = generic_file_splice_read, 1014 .splice_read = generic_file_splice_read,
959 .unlocked_ioctl = cifs_ioctl, 1015 .unlocked_ioctl = cifs_ioctl,
1016 .clone_file_range = cifs_clone_file_range,
960 .llseek = cifs_llseek, 1017 .llseek = cifs_llseek,
961 .setlease = cifs_setlease, 1018 .setlease = cifs_setlease,
962 .fallocate = cifs_fallocate, 1019 .fallocate = cifs_fallocate,
@@ -973,6 +1030,7 @@ const struct file_operations cifs_file_nobrl_ops = {
973 .splice_read = generic_file_splice_read, 1030 .splice_read = generic_file_splice_read,
974 .llseek = cifs_llseek, 1031 .llseek = cifs_llseek,
975 .unlocked_ioctl = cifs_ioctl, 1032 .unlocked_ioctl = cifs_ioctl,
1033 .clone_file_range = cifs_clone_file_range,
976 .setlease = cifs_setlease, 1034 .setlease = cifs_setlease,
977 .fallocate = cifs_fallocate, 1035 .fallocate = cifs_fallocate,
978}; 1036};
@@ -988,6 +1046,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
988 .splice_read = generic_file_splice_read, 1046 .splice_read = generic_file_splice_read,
989 .llseek = cifs_llseek, 1047 .llseek = cifs_llseek,
990 .unlocked_ioctl = cifs_ioctl, 1048 .unlocked_ioctl = cifs_ioctl,
1049 .clone_file_range = cifs_clone_file_range,
991 .setlease = cifs_setlease, 1050 .setlease = cifs_setlease,
992 .fallocate = cifs_fallocate, 1051 .fallocate = cifs_fallocate,
993}; 1052};
@@ -1003,6 +1062,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
1003 .mmap = cifs_file_mmap, 1062 .mmap = cifs_file_mmap,
1004 .splice_read = generic_file_splice_read, 1063 .splice_read = generic_file_splice_read,
1005 .unlocked_ioctl = cifs_ioctl, 1064 .unlocked_ioctl = cifs_ioctl,
1065 .clone_file_range = cifs_clone_file_range,
1006 .llseek = cifs_llseek, 1066 .llseek = cifs_llseek,
1007 .setlease = cifs_setlease, 1067 .setlease = cifs_setlease,
1008 .fallocate = cifs_fallocate, 1068 .fallocate = cifs_fallocate,
@@ -1013,6 +1073,7 @@ const struct file_operations cifs_dir_ops = {
1013 .release = cifs_closedir, 1073 .release = cifs_closedir,
1014 .read = generic_read_dir, 1074 .read = generic_read_dir,
1015 .unlocked_ioctl = cifs_ioctl, 1075 .unlocked_ioctl = cifs_ioctl,
1076 .clone_file_range = cifs_clone_file_range,
1016 .llseek = generic_file_llseek, 1077 .llseek = generic_file_llseek,
1017}; 1078};
1018 1079
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 26a1187d4323..68c4547528c4 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -130,7 +130,6 @@ extern int cifs_setxattr(struct dentry *, const char *, const void *,
130extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); 130extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
131extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); 131extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
132extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); 132extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
133
134#ifdef CONFIG_CIFS_NFSD_EXPORT 133#ifdef CONFIG_CIFS_NFSD_EXPORT
135extern const struct export_operations cifs_export_ops; 134extern const struct export_operations cifs_export_ops;
136#endif /* CONFIG_CIFS_NFSD_EXPORT */ 135#endif /* CONFIG_CIFS_NFSD_EXPORT */
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 35cf990f87d3..7a3b84e300f8 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -34,73 +34,36 @@
34#include "cifs_ioctl.h" 34#include "cifs_ioctl.h"
35#include <linux/btrfs.h> 35#include <linux/btrfs.h>
36 36
37static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, 37static int cifs_file_clone_range(unsigned int xid, struct file *src_file,
38 unsigned long srcfd, u64 off, u64 len, u64 destoff, 38 struct file *dst_file)
39 bool dup_extents)
40{ 39{
41 int rc; 40 struct inode *src_inode = file_inode(src_file);
42 struct cifsFileInfo *smb_file_target = dst_file->private_data;
43 struct inode *target_inode = file_inode(dst_file); 41 struct inode *target_inode = file_inode(dst_file);
44 struct cifs_tcon *target_tcon;
45 struct fd src_file;
46 struct cifsFileInfo *smb_file_src; 42 struct cifsFileInfo *smb_file_src;
47 struct inode *src_inode; 43 struct cifsFileInfo *smb_file_target;
48 struct cifs_tcon *src_tcon; 44 struct cifs_tcon *src_tcon;
45 struct cifs_tcon *target_tcon;
46 int rc;
49 47
50 cifs_dbg(FYI, "ioctl clone range\n"); 48 cifs_dbg(FYI, "ioctl clone range\n");
51 /* the destination must be opened for writing */
52 if (!(dst_file->f_mode & FMODE_WRITE)) {
53 cifs_dbg(FYI, "file target not open for write\n");
54 return -EINVAL;
55 }
56 49
57 /* check if target volume is readonly and take reference */ 50 if (!src_file->private_data || !dst_file->private_data) {
58 rc = mnt_want_write_file(dst_file);
59 if (rc) {
60 cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc);
61 return rc;
62 }
63
64 src_file = fdget(srcfd);
65 if (!src_file.file) {
66 rc = -EBADF;
67 goto out_drop_write;
68 }
69
70 if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) {
71 rc = -EBADF;
72 cifs_dbg(VFS, "src file seems to be from a different filesystem type\n");
73 goto out_fput;
74 }
75
76 if ((!src_file.file->private_data) || (!dst_file->private_data)) {
77 rc = -EBADF; 51 rc = -EBADF;
78 cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); 52 cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
79 goto out_fput; 53 goto out;
80 } 54 }
81 55
82 rc = -EXDEV; 56 rc = -EXDEV;
83 smb_file_target = dst_file->private_data; 57 smb_file_target = dst_file->private_data;
84 smb_file_src = src_file.file->private_data; 58 smb_file_src = src_file->private_data;
85 src_tcon = tlink_tcon(smb_file_src->tlink); 59 src_tcon = tlink_tcon(smb_file_src->tlink);
86 target_tcon = tlink_tcon(smb_file_target->tlink); 60 target_tcon = tlink_tcon(smb_file_target->tlink);
87 61
88 /* check source and target on same server (or volume if dup_extents) */ 62 if (src_tcon->ses != target_tcon->ses) {
89 if (dup_extents && (src_tcon != target_tcon)) {
90 cifs_dbg(VFS, "source and target of copy not on same share\n");
91 goto out_fput;
92 }
93
94 if (!dup_extents && (src_tcon->ses != target_tcon->ses)) {
95 cifs_dbg(VFS, "source and target of copy not on same server\n"); 63 cifs_dbg(VFS, "source and target of copy not on same server\n");
96 goto out_fput; 64 goto out;
97 } 65 }
98 66
99 src_inode = file_inode(src_file.file);
100 rc = -EINVAL;
101 if (S_ISDIR(src_inode->i_mode))
102 goto out_fput;
103
104 /* 67 /*
105 * Note: cifs case is easier than btrfs since server responsible for 68 * Note: cifs case is easier than btrfs since server responsible for
106 * checks for proper open modes and file type and if it wants 69 * checks for proper open modes and file type and if it wants
@@ -108,34 +71,66 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
108 */ 71 */
109 lock_two_nondirectories(target_inode, src_inode); 72 lock_two_nondirectories(target_inode, src_inode);
110 73
111 /* determine range to clone */
112 rc = -EINVAL;
113 if (off + len > src_inode->i_size || off + len < off)
114 goto out_unlock;
115 if (len == 0)
116 len = src_inode->i_size - off;
117
118 cifs_dbg(FYI, "about to flush pages\n"); 74 cifs_dbg(FYI, "about to flush pages\n");
119 /* should we flush first and last page first */ 75 /* should we flush first and last page first */
120 truncate_inode_pages_range(&target_inode->i_data, destoff, 76 truncate_inode_pages(&target_inode->i_data, 0);
121 PAGE_CACHE_ALIGN(destoff + len)-1);
122 77
123 if (dup_extents && target_tcon->ses->server->ops->duplicate_extents) 78 if (target_tcon->ses->server->ops->clone_range)
124 rc = target_tcon->ses->server->ops->duplicate_extents(xid,
125 smb_file_src, smb_file_target, off, len, destoff);
126 else if (!dup_extents && target_tcon->ses->server->ops->clone_range)
127 rc = target_tcon->ses->server->ops->clone_range(xid, 79 rc = target_tcon->ses->server->ops->clone_range(xid,
128 smb_file_src, smb_file_target, off, len, destoff); 80 smb_file_src, smb_file_target, 0, src_inode->i_size, 0);
129 else 81 else
130 rc = -EOPNOTSUPP; 82 rc = -EOPNOTSUPP;
131 83
132 /* force revalidate of size and timestamps of target file now 84 /* force revalidate of size and timestamps of target file now
133 that target is updated on the server */ 85 that target is updated on the server */
134 CIFS_I(target_inode)->time = 0; 86 CIFS_I(target_inode)->time = 0;
135out_unlock:
136 /* although unlocking in the reverse order from locking is not 87 /* although unlocking in the reverse order from locking is not
137 strictly necessary here it is a little cleaner to be consistent */ 88 strictly necessary here it is a little cleaner to be consistent */
138 unlock_two_nondirectories(src_inode, target_inode); 89 unlock_two_nondirectories(src_inode, target_inode);
90out:
91 return rc;
92}
93
94static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
95 unsigned long srcfd)
96{
97 int rc;
98 struct fd src_file;
99 struct inode *src_inode;
100
101 cifs_dbg(FYI, "ioctl clone range\n");
102 /* the destination must be opened for writing */
103 if (!(dst_file->f_mode & FMODE_WRITE)) {
104 cifs_dbg(FYI, "file target not open for write\n");
105 return -EINVAL;
106 }
107
108 /* check if target volume is readonly and take reference */
109 rc = mnt_want_write_file(dst_file);
110 if (rc) {
111 cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc);
112 return rc;
113 }
114
115 src_file = fdget(srcfd);
116 if (!src_file.file) {
117 rc = -EBADF;
118 goto out_drop_write;
119 }
120
121 if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) {
122 rc = -EBADF;
123 cifs_dbg(VFS, "src file seems to be from a different filesystem type\n");
124 goto out_fput;
125 }
126
127 src_inode = file_inode(src_file.file);
128 rc = -EINVAL;
129 if (S_ISDIR(src_inode->i_mode))
130 goto out_fput;
131
132 rc = cifs_file_clone_range(xid, src_file.file, dst_file);
133
139out_fput: 134out_fput:
140 fdput(src_file); 135 fdput(src_file);
141out_drop_write: 136out_drop_write:
@@ -256,10 +251,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
256 } 251 }
257 break; 252 break;
258 case CIFS_IOC_COPYCHUNK_FILE: 253 case CIFS_IOC_COPYCHUNK_FILE:
259 rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, false); 254 rc = cifs_ioctl_clone(xid, filep, arg);
260 break;
261 case BTRFS_IOC_CLONE:
262 rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, true);
263 break; 255 break;
264 case CIFS_IOC_SET_INTEGRITY: 256 case CIFS_IOC_SET_INTEGRITY:
265 if (pSMBFile == NULL) 257 if (pSMBFile == NULL)
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 9144b779d10e..647ee0b03dc0 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1601,6 +1601,11 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
1601 goto out_fput; 1601 goto out_fput;
1602#endif 1602#endif
1603 1603
1604 case FICLONE:
1605 case FICLONERANGE:
1606 case FIDEDUPERANGE:
1607 goto do_ioctl;
1608
1604 case FIBMAP: 1609 case FIBMAP:
1605 case FIGETBSZ: 1610 case FIGETBSZ:
1606 case FIONREAD: 1611 case FIONREAD:
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 41c352e81193..29466c380958 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -215,6 +215,29 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
215 return error; 215 return error;
216} 216}
217 217
218static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
219 u64 off, u64 olen, u64 destoff)
220{
221 struct fd src_file = fdget(srcfd);
222 int ret;
223
224 if (!src_file.file)
225 return -EBADF;
226 ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen);
227 fdput(src_file);
228 return ret;
229}
230
231static long ioctl_file_clone_range(struct file *file, void __user *argp)
232{
233 struct file_clone_range args;
234
235 if (copy_from_user(&args, argp, sizeof(args)))
236 return -EFAULT;
237 return ioctl_file_clone(file, args.src_fd, args.src_offset,
238 args.src_length, args.dest_offset);
239}
240
218#ifdef CONFIG_BLOCK 241#ifdef CONFIG_BLOCK
219 242
220static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) 243static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
@@ -545,6 +568,41 @@ static int ioctl_fsthaw(struct file *filp)
545 return thaw_super(sb); 568 return thaw_super(sb);
546} 569}
547 570
571static long ioctl_file_dedupe_range(struct file *file, void __user *arg)
572{
573 struct file_dedupe_range __user *argp = arg;
574 struct file_dedupe_range *same = NULL;
575 int ret;
576 unsigned long size;
577 u16 count;
578
579 if (get_user(count, &argp->dest_count)) {
580 ret = -EFAULT;
581 goto out;
582 }
583
584 size = offsetof(struct file_dedupe_range __user, info[count]);
585
586 same = memdup_user(argp, size);
587 if (IS_ERR(same)) {
588 ret = PTR_ERR(same);
589 same = NULL;
590 goto out;
591 }
592
593 ret = vfs_dedupe_file_range(file, same);
594 if (ret)
595 goto out;
596
597 ret = copy_to_user(argp, same, size);
598 if (ret)
599 ret = -EFAULT;
600
601out:
602 kfree(same);
603 return ret;
604}
605
548/* 606/*
549 * When you add any new common ioctls to the switches above and below 607 * When you add any new common ioctls to the switches above and below
550 * please update compat_sys_ioctl() too. 608 * please update compat_sys_ioctl() too.
@@ -600,6 +658,15 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
600 case FIGETBSZ: 658 case FIGETBSZ:
601 return put_user(inode->i_sb->s_blocksize, argp); 659 return put_user(inode->i_sb->s_blocksize, argp);
602 660
661 case FICLONE:
662 return ioctl_file_clone(filp, arg, 0, 0, 0);
663
664 case FICLONERANGE:
665 return ioctl_file_clone_range(filp, argp);
666
667 case FIDEDUPERANGE:
668 return ioctl_file_dedupe_range(filp, argp);
669
603 default: 670 default:
604 if (S_ISREG(inode->i_mode)) 671 if (S_ISREG(inode->i_mode))
605 error = file_ioctl(filp, cmd, arg); 672 error = file_ioctl(filp, cmd, arg);
diff --git a/fs/locks.c b/fs/locks.c
index a91f4ab00a90..af1ed74a657f 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1258,20 +1258,16 @@ int locks_mandatory_locked(struct file *file)
1258 1258
1259/** 1259/**
1260 * locks_mandatory_area - Check for a conflicting lock 1260 * locks_mandatory_area - Check for a conflicting lock
1261 * @read_write: %FLOCK_VERIFY_WRITE for exclusive access, %FLOCK_VERIFY_READ 1261 * @inode: the file to check
1262 * for shared
1263 * @inode: the file to check
1264 * @filp: how the file was opened (if it was) 1262 * @filp: how the file was opened (if it was)
1265 * @offset: start of area to check 1263 * @start: first byte in the file to check
1266 * @count: length of area to check 1264 * @end: lastbyte in the file to check
1265 * @type: %F_WRLCK for a write lock, else %F_RDLCK
1267 * 1266 *
1268 * Searches the inode's list of locks to find any POSIX locks which conflict. 1267 * Searches the inode's list of locks to find any POSIX locks which conflict.
1269 * This function is called from rw_verify_area() and
1270 * locks_verify_truncate().
1271 */ 1268 */
1272int locks_mandatory_area(int read_write, struct inode *inode, 1269int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start,
1273 struct file *filp, loff_t offset, 1270 loff_t end, unsigned char type)
1274 size_t count)
1275{ 1271{
1276 struct file_lock fl; 1272 struct file_lock fl;
1277 int error; 1273 int error;
@@ -1283,9 +1279,9 @@ int locks_mandatory_area(int read_write, struct inode *inode,
1283 fl.fl_flags = FL_POSIX | FL_ACCESS; 1279 fl.fl_flags = FL_POSIX | FL_ACCESS;
1284 if (filp && !(filp->f_flags & O_NONBLOCK)) 1280 if (filp && !(filp->f_flags & O_NONBLOCK))
1285 sleep = true; 1281 sleep = true;
1286 fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; 1282 fl.fl_type = type;
1287 fl.fl_start = offset; 1283 fl.fl_start = start;
1288 fl.fl_end = offset + count - 1; 1284 fl.fl_end = end;
1289 1285
1290 for (;;) { 1286 for (;;) {
1291 if (filp) { 1287 if (filp) {
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index db9b5fea5b3e..26f9a23e2b25 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -195,65 +195,27 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
195 return nfs42_proc_allocate(filep, offset, len); 195 return nfs42_proc_allocate(filep, offset, len);
196} 196}
197 197
198static noinline long 198static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
199nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, 199 struct file *dst_file, loff_t dst_off, u64 count)
200 u64 src_off, u64 dst_off, u64 count)
201{ 200{
202 struct inode *dst_inode = file_inode(dst_file); 201 struct inode *dst_inode = file_inode(dst_file);
203 struct nfs_server *server = NFS_SERVER(dst_inode); 202 struct nfs_server *server = NFS_SERVER(dst_inode);
204 struct fd src_file; 203 struct inode *src_inode = file_inode(src_file);
205 struct inode *src_inode;
206 unsigned int bs = server->clone_blksize; 204 unsigned int bs = server->clone_blksize;
207 bool same_inode = false; 205 bool same_inode = false;
208 int ret; 206 int ret;
209 207
210 /* dst file must be opened for writing */
211 if (!(dst_file->f_mode & FMODE_WRITE))
212 return -EINVAL;
213
214 ret = mnt_want_write_file(dst_file);
215 if (ret)
216 return ret;
217
218 src_file = fdget(srcfd);
219 if (!src_file.file) {
220 ret = -EBADF;
221 goto out_drop_write;
222 }
223
224 src_inode = file_inode(src_file.file);
225
226 if (src_inode == dst_inode)
227 same_inode = true;
228
229 /* src file must be opened for reading */
230 if (!(src_file.file->f_mode & FMODE_READ))
231 goto out_fput;
232
233 /* src and dst must be regular files */
234 ret = -EISDIR;
235 if (!S_ISREG(src_inode->i_mode) || !S_ISREG(dst_inode->i_mode))
236 goto out_fput;
237
238 ret = -EXDEV;
239 if (src_file.file->f_path.mnt != dst_file->f_path.mnt ||
240 src_inode->i_sb != dst_inode->i_sb)
241 goto out_fput;
242
243 /* check alignment w.r.t. clone_blksize */ 208 /* check alignment w.r.t. clone_blksize */
244 ret = -EINVAL; 209 ret = -EINVAL;
245 if (bs) { 210 if (bs) {
246 if (!IS_ALIGNED(src_off, bs) || !IS_ALIGNED(dst_off, bs)) 211 if (!IS_ALIGNED(src_off, bs) || !IS_ALIGNED(dst_off, bs))
247 goto out_fput; 212 goto out;
248 if (!IS_ALIGNED(count, bs) && i_size_read(src_inode) != (src_off + count)) 213 if (!IS_ALIGNED(count, bs) && i_size_read(src_inode) != (src_off + count))
249 goto out_fput; 214 goto out;
250 } 215 }
251 216
252 /* verify if ranges are overlapped within the same file */ 217 if (src_inode == dst_inode)
253 if (same_inode) { 218 same_inode = true;
254 if (dst_off + count > src_off && dst_off < src_off + count)
255 goto out_fput;
256 }
257 219
258 /* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */ 220 /* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */
259 if (same_inode) { 221 if (same_inode) {
@@ -275,7 +237,7 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
275 if (ret) 237 if (ret)
276 goto out_unlock; 238 goto out_unlock;
277 239
278 ret = nfs42_proc_clone(src_file.file, dst_file, src_off, dst_off, count); 240 ret = nfs42_proc_clone(src_file, dst_file, src_off, dst_off, count);
279 241
280 /* truncate inode page cache of the dst range so that future reads can fetch 242 /* truncate inode page cache of the dst range so that future reads can fetch
281 * new data from server */ 243 * new data from server */
@@ -292,37 +254,9 @@ out_unlock:
292 mutex_unlock(&dst_inode->i_mutex); 254 mutex_unlock(&dst_inode->i_mutex);
293 mutex_unlock(&src_inode->i_mutex); 255 mutex_unlock(&src_inode->i_mutex);
294 } 256 }
295out_fput: 257out:
296 fdput(src_file);
297out_drop_write:
298 mnt_drop_write_file(dst_file);
299 return ret; 258 return ret;
300} 259}
301
302static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp)
303{
304 struct btrfs_ioctl_clone_range_args args;
305
306 if (copy_from_user(&args, argp, sizeof(args)))
307 return -EFAULT;
308
309 return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_offset,
310 args.dest_offset, args.src_length);
311}
312
313long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
314{
315 void __user *argp = (void __user *)arg;
316
317 switch (cmd) {
318 case BTRFS_IOC_CLONE:
319 return nfs42_ioctl_clone(file, arg, 0, 0, 0);
320 case BTRFS_IOC_CLONE_RANGE:
321 return nfs42_ioctl_clone_range(file, argp);
322 }
323
324 return -ENOTTY;
325}
326#endif /* CONFIG_NFS_V4_2 */ 260#endif /* CONFIG_NFS_V4_2 */
327 261
328const struct file_operations nfs4_file_operations = { 262const struct file_operations nfs4_file_operations = {
@@ -342,8 +276,7 @@ const struct file_operations nfs4_file_operations = {
342#ifdef CONFIG_NFS_V4_2 276#ifdef CONFIG_NFS_V4_2
343 .llseek = nfs4_file_llseek, 277 .llseek = nfs4_file_llseek,
344 .fallocate = nfs42_fallocate, 278 .fallocate = nfs42_fallocate,
345 .unlocked_ioctl = nfs4_ioctl, 279 .clone_file_range = nfs42_clone_file_range,
346 .compat_ioctl = nfs4_ioctl,
347#else 280#else
348 .llseek = nfs_file_llseek, 281 .llseek = nfs_file_llseek,
349#endif 282#endif
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a9f096c7e99f..819ad812c71b 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -774,8 +774,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
774 clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); 774 clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
775 775
776 /* check stateid */ 776 /* check stateid */
777 status = nfs4_preprocess_stateid_op(rqstp, cstate, &read->rd_stateid, 777 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
778 RD_STATE, &read->rd_filp, &read->rd_tmp_file); 778 &read->rd_stateid, RD_STATE,
779 &read->rd_filp, &read->rd_tmp_file);
779 if (status) { 780 if (status) {
780 dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); 781 dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
781 goto out; 782 goto out;
@@ -921,7 +922,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
921 922
922 if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { 923 if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
923 status = nfs4_preprocess_stateid_op(rqstp, cstate, 924 status = nfs4_preprocess_stateid_op(rqstp, cstate,
924 &setattr->sa_stateid, WR_STATE, NULL, NULL); 925 &cstate->current_fh, &setattr->sa_stateid,
926 WR_STATE, NULL, NULL);
925 if (status) { 927 if (status) {
926 dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); 928 dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
927 return status; 929 return status;
@@ -985,8 +987,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
985 if (write->wr_offset >= OFFSET_MAX) 987 if (write->wr_offset >= OFFSET_MAX)
986 return nfserr_inval; 988 return nfserr_inval;
987 989
988 status = nfs4_preprocess_stateid_op(rqstp, cstate, stateid, WR_STATE, 990 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
989 &filp, NULL); 991 stateid, WR_STATE, &filp, NULL);
990 if (status) { 992 if (status) {
991 dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); 993 dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
992 return status; 994 return status;
@@ -1010,13 +1012,54 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1010} 1012}
1011 1013
1012static __be32 1014static __be32
1015nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1016 struct nfsd4_clone *clone)
1017{
1018 struct file *src, *dst;
1019 __be32 status;
1020
1021 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
1022 &clone->cl_src_stateid, RD_STATE,
1023 &src, NULL);
1024 if (status) {
1025 dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
1026 goto out;
1027 }
1028
1029 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
1030 &clone->cl_dst_stateid, WR_STATE,
1031 &dst, NULL);
1032 if (status) {
1033 dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
1034 goto out_put_src;
1035 }
1036
1037 /* fix up for NFS-specific error code */
1038 if (!S_ISREG(file_inode(src)->i_mode) ||
1039 !S_ISREG(file_inode(dst)->i_mode)) {
1040 status = nfserr_wrong_type;
1041 goto out_put_dst;
1042 }
1043
1044 status = nfsd4_clone_file_range(src, clone->cl_src_pos,
1045 dst, clone->cl_dst_pos, clone->cl_count);
1046
1047out_put_dst:
1048 fput(dst);
1049out_put_src:
1050 fput(src);
1051out:
1052 return status;
1053}
1054
1055static __be32
1013nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 1056nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1014 struct nfsd4_fallocate *fallocate, int flags) 1057 struct nfsd4_fallocate *fallocate, int flags)
1015{ 1058{
1016 __be32 status = nfserr_notsupp; 1059 __be32 status = nfserr_notsupp;
1017 struct file *file; 1060 struct file *file;
1018 1061
1019 status = nfs4_preprocess_stateid_op(rqstp, cstate, 1062 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
1020 &fallocate->falloc_stateid, 1063 &fallocate->falloc_stateid,
1021 WR_STATE, &file, NULL); 1064 WR_STATE, &file, NULL);
1022 if (status != nfs_ok) { 1065 if (status != nfs_ok) {
@@ -1055,7 +1098,7 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1055 __be32 status; 1098 __be32 status;
1056 struct file *file; 1099 struct file *file;
1057 1100
1058 status = nfs4_preprocess_stateid_op(rqstp, cstate, 1101 status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
1059 &seek->seek_stateid, 1102 &seek->seek_stateid,
1060 RD_STATE, &file, NULL); 1103 RD_STATE, &file, NULL);
1061 if (status) { 1104 if (status) {
@@ -2279,6 +2322,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
2279 .op_name = "OP_DEALLOCATE", 2322 .op_name = "OP_DEALLOCATE",
2280 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, 2323 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
2281 }, 2324 },
2325 [OP_CLONE] = {
2326 .op_func = (nfsd4op_func)nfsd4_clone,
2327 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
2328 .op_name = "OP_CLONE",
2329 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
2330 },
2282 [OP_SEEK] = { 2331 [OP_SEEK] = {
2283 .op_func = (nfsd4op_func)nfsd4_seek, 2332 .op_func = (nfsd4op_func)nfsd4_seek,
2284 .op_name = "OP_SEEK", 2333 .op_name = "OP_SEEK",
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6b800b5b8fed..df5dba687265 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4797,10 +4797,9 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
4797 */ 4797 */
4798__be32 4798__be32
4799nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, 4799nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
4800 struct nfsd4_compound_state *cstate, stateid_t *stateid, 4800 struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
4801 int flags, struct file **filpp, bool *tmp_file) 4801 stateid_t *stateid, int flags, struct file **filpp, bool *tmp_file)
4802{ 4802{
4803 struct svc_fh *fhp = &cstate->current_fh;
4804 struct inode *ino = d_inode(fhp->fh_dentry); 4803 struct inode *ino = d_inode(fhp->fh_dentry);
4805 struct net *net = SVC_NET(rqstp); 4804 struct net *net = SVC_NET(rqstp);
4806 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 4805 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 51c9e9ca39a4..924416f91fdd 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1675,6 +1675,25 @@ nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
1675} 1675}
1676 1676
1677static __be32 1677static __be32
1678nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone)
1679{
1680 DECODE_HEAD;
1681
1682 status = nfsd4_decode_stateid(argp, &clone->cl_src_stateid);
1683 if (status)
1684 return status;
1685 status = nfsd4_decode_stateid(argp, &clone->cl_dst_stateid);
1686 if (status)
1687 return status;
1688
1689 READ_BUF(8 + 8 + 8);
1690 p = xdr_decode_hyper(p, &clone->cl_src_pos);
1691 p = xdr_decode_hyper(p, &clone->cl_dst_pos);
1692 p = xdr_decode_hyper(p, &clone->cl_count);
1693 DECODE_TAIL;
1694}
1695
1696static __be32
1678nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) 1697nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
1679{ 1698{
1680 DECODE_HEAD; 1699 DECODE_HEAD;
@@ -1785,6 +1804,7 @@ static nfsd4_dec nfsd4_dec_ops[] = {
1785 [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp, 1804 [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp,
1786 [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, 1805 [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek,
1787 [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, 1806 [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp,
1807 [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone,
1788}; 1808};
1789 1809
1790static inline bool 1810static inline bool
@@ -4292,6 +4312,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
4292 [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop, 4312 [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop,
4293 [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, 4313 [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek,
4294 [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, 4314 [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop,
4315 [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop,
4295}; 4316};
4296 4317
4297/* 4318/*
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 77fdf4de91ba..99432b7ecb9c 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -578,8 +578,8 @@ struct nfsd4_compound_state;
578struct nfsd_net; 578struct nfsd_net;
579 579
580extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, 580extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
581 struct nfsd4_compound_state *cstate, stateid_t *stateid, 581 struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
582 int flags, struct file **filp, bool *tmp_file); 582 stateid_t *stateid, int flags, struct file **filp, bool *tmp_file);
583__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, 583__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
584 stateid_t *stateid, unsigned char typemask, 584 stateid_t *stateid, unsigned char typemask,
585 struct nfs4_stid **s, struct nfsd_net *nn); 585 struct nfs4_stid **s, struct nfsd_net *nn);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 994d66fbb446..5411bf09b810 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -36,6 +36,7 @@
36#endif /* CONFIG_NFSD_V3 */ 36#endif /* CONFIG_NFSD_V3 */
37 37
38#ifdef CONFIG_NFSD_V4 38#ifdef CONFIG_NFSD_V4
39#include "../internal.h"
39#include "acl.h" 40#include "acl.h"
40#include "idmap.h" 41#include "idmap.h"
41#endif /* CONFIG_NFSD_V4 */ 42#endif /* CONFIG_NFSD_V4 */
@@ -498,6 +499,13 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
498} 499}
499#endif 500#endif
500 501
502__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
503 u64 dst_pos, u64 count)
504{
505 return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
506 count));
507}
508
501__be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, 509__be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
502 struct file *file, loff_t offset, loff_t len, 510 struct file *file, loff_t offset, loff_t len,
503 int flags) 511 int flags)
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index fcfc48cbe136..c11ba316f23f 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -56,6 +56,8 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
56 struct xdr_netobj *); 56 struct xdr_netobj *);
57__be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *, 57__be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
58 struct file *, loff_t, loff_t, int); 58 struct file *, loff_t, loff_t, int);
59__be32 nfsd4_clone_file_range(struct file *, u64, struct file *,
60 u64, u64);
59#endif /* CONFIG_NFSD_V4 */ 61#endif /* CONFIG_NFSD_V4 */
60__be32 nfsd_create(struct svc_rqst *, struct svc_fh *, 62__be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
61 char *name, int len, struct iattr *attrs, 63 char *name, int len, struct iattr *attrs,
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index ce7362c88b48..d9554813e58a 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -491,6 +491,15 @@ struct nfsd4_fallocate {
491 u64 falloc_length; 491 u64 falloc_length;
492}; 492};
493 493
494struct nfsd4_clone {
495 /* request */
496 stateid_t cl_src_stateid;
497 stateid_t cl_dst_stateid;
498 u64 cl_src_pos;
499 u64 cl_dst_pos;
500 u64 cl_count;
501};
502
494struct nfsd4_seek { 503struct nfsd4_seek {
495 /* request */ 504 /* request */
496 stateid_t seek_stateid; 505 stateid_t seek_stateid;
@@ -555,6 +564,7 @@ struct nfsd4_op {
555 /* NFSv4.2 */ 564 /* NFSv4.2 */
556 struct nfsd4_fallocate allocate; 565 struct nfsd4_fallocate allocate;
557 struct nfsd4_fallocate deallocate; 566 struct nfsd4_fallocate deallocate;
567 struct nfsd4_clone clone;
558 struct nfsd4_seek seek; 568 struct nfsd4_seek seek;
559 } u; 569 } u;
560 struct nfs4_replay * replay; 570 struct nfs4_replay * replay;
diff --git a/fs/read_write.c b/fs/read_write.c
index 819ef3faf1bb..2116e74a83d3 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -16,6 +16,7 @@
16#include <linux/pagemap.h> 16#include <linux/pagemap.h>
17#include <linux/splice.h> 17#include <linux/splice.h>
18#include <linux/compat.h> 18#include <linux/compat.h>
19#include <linux/mount.h>
19#include "internal.h" 20#include "internal.h"
20 21
21#include <asm/uaccess.h> 22#include <asm/uaccess.h>
@@ -395,9 +396,8 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
395 } 396 }
396 397
397 if (unlikely(inode->i_flctx && mandatory_lock(inode))) { 398 if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
398 retval = locks_mandatory_area( 399 retval = locks_mandatory_area(inode, file, pos, pos + count - 1,
399 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 400 read_write == READ ? F_RDLCK : F_WRLCK);
400 inode, file, pos, count);
401 if (retval < 0) 401 if (retval < 0)
402 return retval; 402 return retval;
403 } 403 }
@@ -1327,3 +1327,299 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
1327 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1327 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1328} 1328}
1329#endif 1329#endif
1330
1331/*
1332 * copy_file_range() differs from regular file read and write in that it
1333 * specifically allows return partial success. When it does so is up to
1334 * the copy_file_range method.
1335 */
1336ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
1337 struct file *file_out, loff_t pos_out,
1338 size_t len, unsigned int flags)
1339{
1340 struct inode *inode_in = file_inode(file_in);
1341 struct inode *inode_out = file_inode(file_out);
1342 ssize_t ret;
1343
1344 if (flags != 0)
1345 return -EINVAL;
1346
1347 /* copy_file_range allows full ssize_t len, ignoring MAX_RW_COUNT */
1348 ret = rw_verify_area(READ, file_in, &pos_in, len);
1349 if (ret >= 0)
1350 ret = rw_verify_area(WRITE, file_out, &pos_out, len);
1351 if (ret < 0)
1352 return ret;
1353
1354 if (!(file_in->f_mode & FMODE_READ) ||
1355 !(file_out->f_mode & FMODE_WRITE) ||
1356 (file_out->f_flags & O_APPEND))
1357 return -EBADF;
1358
1359 /* this could be relaxed once a method supports cross-fs copies */
1360 if (inode_in->i_sb != inode_out->i_sb)
1361 return -EXDEV;
1362
1363 if (len == 0)
1364 return 0;
1365
1366 ret = mnt_want_write_file(file_out);
1367 if (ret)
1368 return ret;
1369
1370 ret = -EOPNOTSUPP;
1371 if (file_out->f_op->copy_file_range)
1372 ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out,
1373 pos_out, len, flags);
1374 if (ret == -EOPNOTSUPP)
1375 ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
1376 len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
1377
1378 if (ret > 0) {
1379 fsnotify_access(file_in);
1380 add_rchar(current, ret);
1381 fsnotify_modify(file_out);
1382 add_wchar(current, ret);
1383 }
1384 inc_syscr(current);
1385 inc_syscw(current);
1386
1387 mnt_drop_write_file(file_out);
1388
1389 return ret;
1390}
1391EXPORT_SYMBOL(vfs_copy_file_range);
1392
1393SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
1394 int, fd_out, loff_t __user *, off_out,
1395 size_t, len, unsigned int, flags)
1396{
1397 loff_t pos_in;
1398 loff_t pos_out;
1399 struct fd f_in;
1400 struct fd f_out;
1401 ssize_t ret = -EBADF;
1402
1403 f_in = fdget(fd_in);
1404 if (!f_in.file)
1405 goto out2;
1406
1407 f_out = fdget(fd_out);
1408 if (!f_out.file)
1409 goto out1;
1410
1411 ret = -EFAULT;
1412 if (off_in) {
1413 if (copy_from_user(&pos_in, off_in, sizeof(loff_t)))
1414 goto out;
1415 } else {
1416 pos_in = f_in.file->f_pos;
1417 }
1418
1419 if (off_out) {
1420 if (copy_from_user(&pos_out, off_out, sizeof(loff_t)))
1421 goto out;
1422 } else {
1423 pos_out = f_out.file->f_pos;
1424 }
1425
1426 ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
1427 flags);
1428 if (ret > 0) {
1429 pos_in += ret;
1430 pos_out += ret;
1431
1432 if (off_in) {
1433 if (copy_to_user(off_in, &pos_in, sizeof(loff_t)))
1434 ret = -EFAULT;
1435 } else {
1436 f_in.file->f_pos = pos_in;
1437 }
1438
1439 if (off_out) {
1440 if (copy_to_user(off_out, &pos_out, sizeof(loff_t)))
1441 ret = -EFAULT;
1442 } else {
1443 f_out.file->f_pos = pos_out;
1444 }
1445 }
1446
1447out:
1448 fdput(f_out);
1449out1:
1450 fdput(f_in);
1451out2:
1452 return ret;
1453}
1454
1455static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
1456{
1457 struct inode *inode = file_inode(file);
1458
1459 if (unlikely(pos < 0))
1460 return -EINVAL;
1461
1462 if (unlikely((loff_t) (pos + len) < 0))
1463 return -EINVAL;
1464
1465 if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
1466 loff_t end = len ? pos + len - 1 : OFFSET_MAX;
1467 int retval;
1468
1469 retval = locks_mandatory_area(inode, file, pos, end,
1470 write ? F_WRLCK : F_RDLCK);
1471 if (retval < 0)
1472 return retval;
1473 }
1474
1475 return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
1476}
1477
1478int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
1479 struct file *file_out, loff_t pos_out, u64 len)
1480{
1481 struct inode *inode_in = file_inode(file_in);
1482 struct inode *inode_out = file_inode(file_out);
1483 int ret;
1484
1485 if (inode_in->i_sb != inode_out->i_sb ||
1486 file_in->f_path.mnt != file_out->f_path.mnt)
1487 return -EXDEV;
1488
1489 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
1490 return -EISDIR;
1491 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
1492 return -EINVAL;
1493
1494 if (!(file_in->f_mode & FMODE_READ) ||
1495 !(file_out->f_mode & FMODE_WRITE) ||
1496 (file_out->f_flags & O_APPEND) ||
1497 !file_in->f_op->clone_file_range)
1498 return -EBADF;
1499
1500 ret = clone_verify_area(file_in, pos_in, len, false);
1501 if (ret)
1502 return ret;
1503
1504 ret = clone_verify_area(file_out, pos_out, len, true);
1505 if (ret)
1506 return ret;
1507
1508 if (pos_in + len > i_size_read(inode_in))
1509 return -EINVAL;
1510
1511 ret = mnt_want_write_file(file_out);
1512 if (ret)
1513 return ret;
1514
1515 ret = file_in->f_op->clone_file_range(file_in, pos_in,
1516 file_out, pos_out, len);
1517 if (!ret) {
1518 fsnotify_access(file_in);
1519 fsnotify_modify(file_out);
1520 }
1521
1522 mnt_drop_write_file(file_out);
1523 return ret;
1524}
1525EXPORT_SYMBOL(vfs_clone_file_range);
1526
1527int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
1528{
1529 struct file_dedupe_range_info *info;
1530 struct inode *src = file_inode(file);
1531 u64 off;
1532 u64 len;
1533 int i;
1534 int ret;
1535 bool is_admin = capable(CAP_SYS_ADMIN);
1536 u16 count = same->dest_count;
1537 struct file *dst_file;
1538 loff_t dst_off;
1539 ssize_t deduped;
1540
1541 if (!(file->f_mode & FMODE_READ))
1542 return -EINVAL;
1543
1544 if (same->reserved1 || same->reserved2)
1545 return -EINVAL;
1546
1547 off = same->src_offset;
1548 len = same->src_length;
1549
1550 ret = -EISDIR;
1551 if (S_ISDIR(src->i_mode))
1552 goto out;
1553
1554 ret = -EINVAL;
1555 if (!S_ISREG(src->i_mode))
1556 goto out;
1557
1558 ret = clone_verify_area(file, off, len, false);
1559 if (ret < 0)
1560 goto out;
1561 ret = 0;
1562
1563 /* pre-format output fields to sane values */
1564 for (i = 0; i < count; i++) {
1565 same->info[i].bytes_deduped = 0ULL;
1566 same->info[i].status = FILE_DEDUPE_RANGE_SAME;
1567 }
1568
1569 for (i = 0, info = same->info; i < count; i++, info++) {
1570 struct inode *dst;
1571 struct fd dst_fd = fdget(info->dest_fd);
1572
1573 dst_file = dst_fd.file;
1574 if (!dst_file) {
1575 info->status = -EBADF;
1576 goto next_loop;
1577 }
1578 dst = file_inode(dst_file);
1579
1580 ret = mnt_want_write_file(dst_file);
1581 if (ret) {
1582 info->status = ret;
1583 goto next_loop;
1584 }
1585
1586 dst_off = info->dest_offset;
1587 ret = clone_verify_area(dst_file, dst_off, len, true);
1588 if (ret < 0) {
1589 info->status = ret;
1590 goto next_file;
1591 }
1592 ret = 0;
1593
1594 if (info->reserved) {
1595 info->status = -EINVAL;
1596 } else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
1597 info->status = -EINVAL;
1598 } else if (file->f_path.mnt != dst_file->f_path.mnt) {
1599 info->status = -EXDEV;
1600 } else if (S_ISDIR(dst->i_mode)) {
1601 info->status = -EISDIR;
1602 } else if (dst_file->f_op->dedupe_file_range == NULL) {
1603 info->status = -EINVAL;
1604 } else {
1605 deduped = dst_file->f_op->dedupe_file_range(file, off,
1606 len, dst_file,
1607 info->dest_offset);
1608 if (deduped == -EBADE)
1609 info->status = FILE_DEDUPE_RANGE_DIFFERS;
1610 else if (deduped < 0)
1611 info->status = deduped;
1612 else
1613 info->bytes_deduped += deduped;
1614 }
1615
1616next_file:
1617 mnt_drop_write_file(dst_file);
1618next_loop:
1619 fdput(dst_fd);
1620 }
1621
1622out:
1623 return ret;
1624}
1625EXPORT_SYMBOL(vfs_dedupe_file_range);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 566f8e078ffc..ec43a24bf63d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1630,6 +1630,12 @@ struct file_operations {
1630#ifndef CONFIG_MMU 1630#ifndef CONFIG_MMU
1631 unsigned (*mmap_capabilities)(struct file *); 1631 unsigned (*mmap_capabilities)(struct file *);
1632#endif 1632#endif
1633 ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
1634 loff_t, size_t, unsigned int);
1635 int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
1636 u64);
1637 ssize_t (*dedupe_file_range)(struct file *, u64, u64, struct file *,
1638 u64);
1633}; 1639};
1634 1640
1635struct inode_operations { 1641struct inode_operations {
@@ -1680,6 +1686,12 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
1680 unsigned long, loff_t *); 1686 unsigned long, loff_t *);
1681extern ssize_t vfs_writev(struct file *, const struct iovec __user *, 1687extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
1682 unsigned long, loff_t *); 1688 unsigned long, loff_t *);
1689extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
1690 loff_t, size_t, unsigned int);
1691extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
1692 struct file *file_out, loff_t pos_out, u64 len);
1693extern int vfs_dedupe_file_range(struct file *file,
1694 struct file_dedupe_range *same);
1683 1695
1684struct super_operations { 1696struct super_operations {
1685 struct inode *(*alloc_inode)(struct super_block *sb); 1697 struct inode *(*alloc_inode)(struct super_block *sb);
@@ -2027,12 +2039,9 @@ extern struct kobject *fs_kobj;
2027 2039
2028#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) 2040#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
2029 2041
2030#define FLOCK_VERIFY_READ 1
2031#define FLOCK_VERIFY_WRITE 2
2032
2033#ifdef CONFIG_MANDATORY_FILE_LOCKING 2042#ifdef CONFIG_MANDATORY_FILE_LOCKING
2034extern int locks_mandatory_locked(struct file *); 2043extern int locks_mandatory_locked(struct file *);
2035extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); 2044extern int locks_mandatory_area(struct inode *, struct file *, loff_t, loff_t, unsigned char);
2036 2045
2037/* 2046/*
2038 * Candidates for mandatory locking have the setgid bit set 2047 * Candidates for mandatory locking have the setgid bit set
@@ -2062,17 +2071,19 @@ static inline int locks_verify_locked(struct file *file)
2062} 2071}
2063 2072
2064static inline int locks_verify_truncate(struct inode *inode, 2073static inline int locks_verify_truncate(struct inode *inode,
2065 struct file *filp, 2074 struct file *f,
2066 loff_t size) 2075 loff_t size)
2067{ 2076{
2068 if (inode->i_flctx && mandatory_lock(inode)) 2077 if (!inode->i_flctx || !mandatory_lock(inode))
2069 return locks_mandatory_area( 2078 return 0;
2070 FLOCK_VERIFY_WRITE, inode, filp, 2079
2071 size < inode->i_size ? size : inode->i_size, 2080 if (size < inode->i_size) {
2072 (size < inode->i_size ? inode->i_size - size 2081 return locks_mandatory_area(inode, f, size, inode->i_size - 1,
2073 : size - inode->i_size) 2082 F_WRLCK);
2074 ); 2083 } else {
2075 return 0; 2084 return locks_mandatory_area(inode, f, inode->i_size, size - 1,
2085 F_WRLCK);
2086 }
2076} 2087}
2077 2088
2078#else /* !CONFIG_MANDATORY_FILE_LOCKING */ 2089#else /* !CONFIG_MANDATORY_FILE_LOCKING */
@@ -2082,9 +2093,8 @@ static inline int locks_mandatory_locked(struct file *file)
2082 return 0; 2093 return 0;
2083} 2094}
2084 2095
2085static inline int locks_mandatory_area(int rw, struct inode *inode, 2096static inline int locks_mandatory_area(struct inode *inode, struct file *filp,
2086 struct file *filp, loff_t offset, 2097 loff_t start, loff_t end, unsigned char type)
2087 size_t count)
2088{ 2098{
2089 return 0; 2099 return 0;
2090} 2100}
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index e7e78537aea2..43aeabd4b968 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -139,10 +139,10 @@ enum nfs_opnum4 {
139Needs to be updated if more operations are defined in future.*/ 139Needs to be updated if more operations are defined in future.*/
140 140
141#define FIRST_NFS4_OP OP_ACCESS 141#define FIRST_NFS4_OP OP_ACCESS
142#define LAST_NFS4_OP OP_WRITE_SAME
143#define LAST_NFS40_OP OP_RELEASE_LOCKOWNER 142#define LAST_NFS40_OP OP_RELEASE_LOCKOWNER
144#define LAST_NFS41_OP OP_RECLAIM_COMPLETE 143#define LAST_NFS41_OP OP_RECLAIM_COMPLETE
145#define LAST_NFS42_OP OP_WRITE_SAME 144#define LAST_NFS42_OP OP_CLONE
145#define LAST_NFS4_OP LAST_NFS42_OP
146 146
147enum nfsstat4 { 147enum nfsstat4 {
148 NFS4_OK = 0, 148 NFS4_OK = 0,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c2b66a277e98..185815c96433 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -886,6 +886,9 @@ asmlinkage long sys_execveat(int dfd, const char __user *filename,
886 const char __user *const __user *envp, int flags); 886 const char __user *const __user *envp, int flags);
887 887
888asmlinkage long sys_membarrier(int cmd, int flags); 888asmlinkage long sys_membarrier(int cmd, int flags);
889asmlinkage long sys_copy_file_range(int fd_in, loff_t __user *off_in,
890 int fd_out, loff_t __user *off_out,
891 size_t len, unsigned int flags);
889 892
890asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags); 893asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags);
891 894
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 1324b0292ec2..2622b33fb2ec 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -715,9 +715,11 @@ __SYSCALL(__NR_userfaultfd, sys_userfaultfd)
715__SYSCALL(__NR_membarrier, sys_membarrier) 715__SYSCALL(__NR_membarrier, sys_membarrier)
716#define __NR_mlock2 284 716#define __NR_mlock2 284
717__SYSCALL(__NR_mlock2, sys_mlock2) 717__SYSCALL(__NR_mlock2, sys_mlock2)
718#define __NR_copy_file_range 285
719__SYSCALL(__NR_copy_file_range, sys_copy_file_range)
718 720
719#undef __NR_syscalls 721#undef __NR_syscalls
720#define __NR_syscalls 285 722#define __NR_syscalls 286
721 723
722/* 724/*
723 * All syscalls below here should go away really, 725 * All syscalls below here should go away really,
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index f15d980249b5..b38e647664a0 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -39,12 +39,48 @@
39#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ 39#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */
40#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ 40#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */
41 41
42struct file_clone_range {
43 __s64 src_fd;
44 __u64 src_offset;
45 __u64 src_length;
46 __u64 dest_offset;
47};
48
42struct fstrim_range { 49struct fstrim_range {
43 __u64 start; 50 __u64 start;
44 __u64 len; 51 __u64 len;
45 __u64 minlen; 52 __u64 minlen;
46}; 53};
47 54
55/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
56#define FILE_DEDUPE_RANGE_SAME 0
57#define FILE_DEDUPE_RANGE_DIFFERS 1
58
59/* from struct btrfs_ioctl_file_extent_same_info */
60struct file_dedupe_range_info {
61 __s64 dest_fd; /* in - destination file */
62 __u64 dest_offset; /* in - start of extent in destination */
63 __u64 bytes_deduped; /* out - total # of bytes we were able
64 * to dedupe from this file. */
65 /* status of this dedupe operation:
66 * < 0 for error
67 * == FILE_DEDUPE_RANGE_SAME if dedupe succeeds
68 * == FILE_DEDUPE_RANGE_DIFFERS if data differs
69 */
70 __s32 status; /* out - see above description */
71 __u32 reserved; /* must be zero */
72};
73
74/* from struct btrfs_ioctl_file_extent_same_args */
75struct file_dedupe_range {
76 __u64 src_offset; /* in - start of extent in source */
77 __u64 src_length; /* in - length of extent */
78 __u16 dest_count; /* in - total elements in info array */
79 __u16 reserved1; /* must be zero */
80 __u32 reserved2; /* must be zero */
81 struct file_dedupe_range_info info[0];
82};
83
48/* And dynamically-tunable limits and defaults: */ 84/* And dynamically-tunable limits and defaults: */
49struct files_stat_struct { 85struct files_stat_struct {
50 unsigned long nr_files; /* read only */ 86 unsigned long nr_files; /* read only */
@@ -159,6 +195,9 @@ struct inodes_stat_t {
159#define FIFREEZE _IOWR('X', 119, int) /* Freeze */ 195#define FIFREEZE _IOWR('X', 119, int) /* Freeze */
160#define FITHAW _IOWR('X', 120, int) /* Thaw */ 196#define FITHAW _IOWR('X', 120, int) /* Thaw */
161#define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ 197#define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */
198#define FICLONE _IOW(0x94, 9, int)
199#define FICLONERANGE _IOW(0x94, 13, struct file_clone_range)
200#define FIDEDUPERANGE _IOWR(0x94, 54, struct file_dedupe_range)
162 201
163#define FS_IOC_GETFLAGS _IOR('f', 1, long) 202#define FS_IOC_GETFLAGS _IOR('f', 1, long)
164#define FS_IOC_SETFLAGS _IOW('f', 2, long) 203#define FS_IOC_SETFLAGS _IOW('f', 2, long)
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 0623787ec67a..2c5e3a8e00d7 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -174,6 +174,7 @@ cond_syscall(sys_setfsuid);
174cond_syscall(sys_setfsgid); 174cond_syscall(sys_setfsgid);
175cond_syscall(sys_capget); 175cond_syscall(sys_capget);
176cond_syscall(sys_capset); 176cond_syscall(sys_capset);
177cond_syscall(sys_copy_file_range);
177 178
178/* arch-specific weak syscall entries */ 179/* arch-specific weak syscall entries */
179cond_syscall(sys_pciconfig_read); 180cond_syscall(sys_pciconfig_read);