aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2015-12-03 06:59:50 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2015-12-07 23:11:33 -0500
commit04b38d601239b4d9be641b412cf4b7456a041c67 (patch)
tree196b5fa72848de2a98e09af86099d99da70f2833
parentacc15575e78e534c12549d8057a692f490a50f61 (diff)
vfs: pull btrfs clone API to vfs layer
The btrfs clone ioctls are now adopted by other file systems, with NFS and CIFS already having support for them, and XFS being under active development. To avoid growth of various slightly incompatible implementations, add one to the VFS. Note that clones are different from file copies in several ways: - they are atomic vs other writers - they support whole file clones - they support 64-bit legth clones - they do not allow partial success (aka short writes) - clones are expected to be a fast metadata operation Because of that it would be rather cumbersome to try to piggyback them on top of the recent clone_file_range infrastructure. The converse isn't true and the clone_file_range system call could try clone file range as a first attempt to copy, something that further patches will enable. Based on earlier work from Peng Tao. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/file.c1
-rw-r--r--fs/btrfs/ioctl.c49
-rw-r--r--fs/cifs/cifsfs.c63
-rw-r--r--fs/cifs/cifsfs.h1
-rw-r--r--fs/cifs/ioctl.c126
-rw-r--r--fs/ioctl.c29
-rw-r--r--fs/nfs/nfs4file.c87
-rw-r--r--fs/read_write.c72
-rw-r--r--include/linux/fs.h7
-rw-r--r--include/uapi/linux/fs.h9
11 files changed, 254 insertions, 193 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ede7277c167f..dd4733fa882c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -4025,7 +4025,6 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
4025void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, 4025void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
4026 struct btrfs_ioctl_balance_args *bargs); 4026 struct btrfs_ioctl_balance_args *bargs);
4027 4027
4028
4029/* file.c */ 4028/* file.c */
4030int btrfs_auto_defrag_init(void); 4029int btrfs_auto_defrag_init(void);
4031void btrfs_auto_defrag_exit(void); 4030void btrfs_auto_defrag_exit(void);
@@ -4058,6 +4057,8 @@ int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
4058ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in, 4057ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
4059 struct file *file_out, loff_t pos_out, 4058 struct file *file_out, loff_t pos_out,
4060 size_t len, unsigned int flags); 4059 size_t len, unsigned int flags);
4060int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
4061 struct file *file_out, loff_t pos_out, u64 len);
4061 4062
4062/* tree-defrag.c */ 4063/* tree-defrag.c */
4063int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, 4064int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e67fe6ab8c9e..232e300a6c93 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2925,6 +2925,7 @@ const struct file_operations btrfs_file_operations = {
2925 .compat_ioctl = btrfs_ioctl, 2925 .compat_ioctl = btrfs_ioctl,
2926#endif 2926#endif
2927 .copy_file_range = btrfs_copy_file_range, 2927 .copy_file_range = btrfs_copy_file_range,
2928 .clone_file_range = btrfs_clone_file_range,
2928}; 2929};
2929 2930
2930void btrfs_auto_defrag_exit(void) 2931void btrfs_auto_defrag_exit(void)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0f92735299d3..85b1caeeec85 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3906,49 +3906,10 @@ ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
3906 return ret; 3906 return ret;
3907} 3907}
3908 3908
3909static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, 3909int btrfs_clone_file_range(struct file *src_file, loff_t off,
3910 u64 off, u64 olen, u64 destoff) 3910 struct file *dst_file, loff_t destoff, u64 len)
3911{ 3911{
3912 struct fd src_file; 3912 return btrfs_clone_files(dst_file, src_file, off, len, destoff);
3913 int ret;
3914
3915 /* the destination must be opened for writing */
3916 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
3917 return -EINVAL;
3918
3919 ret = mnt_want_write_file(file);
3920 if (ret)
3921 return ret;
3922
3923 src_file = fdget(srcfd);
3924 if (!src_file.file) {
3925 ret = -EBADF;
3926 goto out_drop_write;
3927 }
3928
3929 /* the src must be open for reading */
3930 if (!(src_file.file->f_mode & FMODE_READ)) {
3931 ret = -EINVAL;
3932 goto out_fput;
3933 }
3934
3935 ret = btrfs_clone_files(file, src_file.file, off, olen, destoff);
3936
3937out_fput:
3938 fdput(src_file);
3939out_drop_write:
3940 mnt_drop_write_file(file);
3941 return ret;
3942}
3943
3944static long btrfs_ioctl_clone_range(struct file *file, void __user *argp)
3945{
3946 struct btrfs_ioctl_clone_range_args args;
3947
3948 if (copy_from_user(&args, argp, sizeof(args)))
3949 return -EFAULT;
3950 return btrfs_ioctl_clone(file, args.src_fd, args.src_offset,
3951 args.src_length, args.dest_offset);
3952} 3913}
3953 3914
3954/* 3915/*
@@ -5498,10 +5459,6 @@ long btrfs_ioctl(struct file *file, unsigned int
5498 return btrfs_ioctl_dev_info(root, argp); 5459 return btrfs_ioctl_dev_info(root, argp);
5499 case BTRFS_IOC_BALANCE: 5460 case BTRFS_IOC_BALANCE:
5500 return btrfs_ioctl_balance(file, NULL); 5461 return btrfs_ioctl_balance(file, NULL);
5501 case BTRFS_IOC_CLONE:
5502 return btrfs_ioctl_clone(file, arg, 0, 0, 0);
5503 case BTRFS_IOC_CLONE_RANGE:
5504 return btrfs_ioctl_clone_range(file, argp);
5505 case BTRFS_IOC_TRANS_START: 5462 case BTRFS_IOC_TRANS_START:
5506 return btrfs_ioctl_trans_start(file); 5463 return btrfs_ioctl_trans_start(file);
5507 case BTRFS_IOC_TRANS_END: 5464 case BTRFS_IOC_TRANS_END:
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index cbc0f4bca0c0..e9b978f2e114 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -914,6 +914,61 @@ const struct inode_operations cifs_symlink_inode_ops = {
914#endif 914#endif
915}; 915};
916 916
917static int cifs_clone_file_range(struct file *src_file, loff_t off,
918 struct file *dst_file, loff_t destoff, u64 len)
919{
920 struct inode *src_inode = file_inode(src_file);
921 struct inode *target_inode = file_inode(dst_file);
922 struct cifsFileInfo *smb_file_src = src_file->private_data;
923 struct cifsFileInfo *smb_file_target = dst_file->private_data;
924 struct cifs_tcon *src_tcon = tlink_tcon(smb_file_src->tlink);
925 struct cifs_tcon *target_tcon = tlink_tcon(smb_file_target->tlink);
926 unsigned int xid;
927 int rc;
928
929 cifs_dbg(FYI, "clone range\n");
930
931 xid = get_xid();
932
933 if (!src_file->private_data || !dst_file->private_data) {
934 rc = -EBADF;
935 cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
936 goto out;
937 }
938
939 /*
940 * Note: cifs case is easier than btrfs since server responsible for
941 * checks for proper open modes and file type and if it wants
942 * server could even support copy of range where source = target
943 */
944 lock_two_nondirectories(target_inode, src_inode);
945
946 if (len == 0)
947 len = src_inode->i_size - off;
948
949 cifs_dbg(FYI, "about to flush pages\n");
950 /* should we flush first and last page first */
951 truncate_inode_pages_range(&target_inode->i_data, destoff,
952 PAGE_CACHE_ALIGN(destoff + len)-1);
953
954 if (target_tcon->ses->server->ops->duplicate_extents)
955 rc = target_tcon->ses->server->ops->duplicate_extents(xid,
956 smb_file_src, smb_file_target, off, len, destoff);
957 else
958 rc = -EOPNOTSUPP;
959
960 /* force revalidate of size and timestamps of target file now
961 that target is updated on the server */
962 CIFS_I(target_inode)->time = 0;
963out_unlock:
964 /* although unlocking in the reverse order from locking is not
965 strictly necessary here it is a little cleaner to be consistent */
966 unlock_two_nondirectories(src_inode, target_inode);
967out:
968 free_xid(xid);
969 return rc;
970}
971
917const struct file_operations cifs_file_ops = { 972const struct file_operations cifs_file_ops = {
918 .read_iter = cifs_loose_read_iter, 973 .read_iter = cifs_loose_read_iter,
919 .write_iter = cifs_file_write_iter, 974 .write_iter = cifs_file_write_iter,
@@ -926,6 +981,7 @@ const struct file_operations cifs_file_ops = {
926 .splice_read = generic_file_splice_read, 981 .splice_read = generic_file_splice_read,
927 .llseek = cifs_llseek, 982 .llseek = cifs_llseek,
928 .unlocked_ioctl = cifs_ioctl, 983 .unlocked_ioctl = cifs_ioctl,
984 .clone_file_range = cifs_clone_file_range,
929 .setlease = cifs_setlease, 985 .setlease = cifs_setlease,
930 .fallocate = cifs_fallocate, 986 .fallocate = cifs_fallocate,
931}; 987};
@@ -942,6 +998,8 @@ const struct file_operations cifs_file_strict_ops = {
942 .splice_read = generic_file_splice_read, 998 .splice_read = generic_file_splice_read,
943 .llseek = cifs_llseek, 999 .llseek = cifs_llseek,
944 .unlocked_ioctl = cifs_ioctl, 1000 .unlocked_ioctl = cifs_ioctl,
1001 .clone_file_range = cifs_clone_file_range,
1002 .clone_file_range = cifs_clone_file_range,
945 .setlease = cifs_setlease, 1003 .setlease = cifs_setlease,
946 .fallocate = cifs_fallocate, 1004 .fallocate = cifs_fallocate,
947}; 1005};
@@ -958,6 +1016,7 @@ const struct file_operations cifs_file_direct_ops = {
958 .mmap = cifs_file_mmap, 1016 .mmap = cifs_file_mmap,
959 .splice_read = generic_file_splice_read, 1017 .splice_read = generic_file_splice_read,
960 .unlocked_ioctl = cifs_ioctl, 1018 .unlocked_ioctl = cifs_ioctl,
1019 .clone_file_range = cifs_clone_file_range,
961 .llseek = cifs_llseek, 1020 .llseek = cifs_llseek,
962 .setlease = cifs_setlease, 1021 .setlease = cifs_setlease,
963 .fallocate = cifs_fallocate, 1022 .fallocate = cifs_fallocate,
@@ -974,6 +1033,7 @@ const struct file_operations cifs_file_nobrl_ops = {
974 .splice_read = generic_file_splice_read, 1033 .splice_read = generic_file_splice_read,
975 .llseek = cifs_llseek, 1034 .llseek = cifs_llseek,
976 .unlocked_ioctl = cifs_ioctl, 1035 .unlocked_ioctl = cifs_ioctl,
1036 .clone_file_range = cifs_clone_file_range,
977 .setlease = cifs_setlease, 1037 .setlease = cifs_setlease,
978 .fallocate = cifs_fallocate, 1038 .fallocate = cifs_fallocate,
979}; 1039};
@@ -989,6 +1049,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
989 .splice_read = generic_file_splice_read, 1049 .splice_read = generic_file_splice_read,
990 .llseek = cifs_llseek, 1050 .llseek = cifs_llseek,
991 .unlocked_ioctl = cifs_ioctl, 1051 .unlocked_ioctl = cifs_ioctl,
1052 .clone_file_range = cifs_clone_file_range,
992 .setlease = cifs_setlease, 1053 .setlease = cifs_setlease,
993 .fallocate = cifs_fallocate, 1054 .fallocate = cifs_fallocate,
994}; 1055};
@@ -1004,6 +1065,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
1004 .mmap = cifs_file_mmap, 1065 .mmap = cifs_file_mmap,
1005 .splice_read = generic_file_splice_read, 1066 .splice_read = generic_file_splice_read,
1006 .unlocked_ioctl = cifs_ioctl, 1067 .unlocked_ioctl = cifs_ioctl,
1068 .clone_file_range = cifs_clone_file_range,
1007 .llseek = cifs_llseek, 1069 .llseek = cifs_llseek,
1008 .setlease = cifs_setlease, 1070 .setlease = cifs_setlease,
1009 .fallocate = cifs_fallocate, 1071 .fallocate = cifs_fallocate,
@@ -1014,6 +1076,7 @@ const struct file_operations cifs_dir_ops = {
1014 .release = cifs_closedir, 1076 .release = cifs_closedir,
1015 .read = generic_read_dir, 1077 .read = generic_read_dir,
1016 .unlocked_ioctl = cifs_ioctl, 1078 .unlocked_ioctl = cifs_ioctl,
1079 .clone_file_range = cifs_clone_file_range,
1017 .llseek = generic_file_llseek, 1080 .llseek = generic_file_llseek,
1018}; 1081};
1019 1082
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index c3cc1609025f..c399513c3cbd 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -131,7 +131,6 @@ extern int cifs_setxattr(struct dentry *, const char *, const void *,
131extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); 131extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
132extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); 132extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
133extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); 133extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
134
135#ifdef CONFIG_CIFS_NFSD_EXPORT 134#ifdef CONFIG_CIFS_NFSD_EXPORT
136extern const struct export_operations cifs_export_ops; 135extern const struct export_operations cifs_export_ops;
137#endif /* CONFIG_CIFS_NFSD_EXPORT */ 136#endif /* CONFIG_CIFS_NFSD_EXPORT */
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 35cf990f87d3..7a3b84e300f8 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -34,73 +34,36 @@
34#include "cifs_ioctl.h" 34#include "cifs_ioctl.h"
35#include <linux/btrfs.h> 35#include <linux/btrfs.h>
36 36
37static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, 37static int cifs_file_clone_range(unsigned int xid, struct file *src_file,
38 unsigned long srcfd, u64 off, u64 len, u64 destoff, 38 struct file *dst_file)
39 bool dup_extents)
40{ 39{
41 int rc; 40 struct inode *src_inode = file_inode(src_file);
42 struct cifsFileInfo *smb_file_target = dst_file->private_data;
43 struct inode *target_inode = file_inode(dst_file); 41 struct inode *target_inode = file_inode(dst_file);
44 struct cifs_tcon *target_tcon;
45 struct fd src_file;
46 struct cifsFileInfo *smb_file_src; 42 struct cifsFileInfo *smb_file_src;
47 struct inode *src_inode; 43 struct cifsFileInfo *smb_file_target;
48 struct cifs_tcon *src_tcon; 44 struct cifs_tcon *src_tcon;
45 struct cifs_tcon *target_tcon;
46 int rc;
49 47
50 cifs_dbg(FYI, "ioctl clone range\n"); 48 cifs_dbg(FYI, "ioctl clone range\n");
51 /* the destination must be opened for writing */
52 if (!(dst_file->f_mode & FMODE_WRITE)) {
53 cifs_dbg(FYI, "file target not open for write\n");
54 return -EINVAL;
55 }
56 49
57 /* check if target volume is readonly and take reference */ 50 if (!src_file->private_data || !dst_file->private_data) {
58 rc = mnt_want_write_file(dst_file);
59 if (rc) {
60 cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc);
61 return rc;
62 }
63
64 src_file = fdget(srcfd);
65 if (!src_file.file) {
66 rc = -EBADF;
67 goto out_drop_write;
68 }
69
70 if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) {
71 rc = -EBADF;
72 cifs_dbg(VFS, "src file seems to be from a different filesystem type\n");
73 goto out_fput;
74 }
75
76 if ((!src_file.file->private_data) || (!dst_file->private_data)) {
77 rc = -EBADF; 51 rc = -EBADF;
78 cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); 52 cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
79 goto out_fput; 53 goto out;
80 } 54 }
81 55
82 rc = -EXDEV; 56 rc = -EXDEV;
83 smb_file_target = dst_file->private_data; 57 smb_file_target = dst_file->private_data;
84 smb_file_src = src_file.file->private_data; 58 smb_file_src = src_file->private_data;
85 src_tcon = tlink_tcon(smb_file_src->tlink); 59 src_tcon = tlink_tcon(smb_file_src->tlink);
86 target_tcon = tlink_tcon(smb_file_target->tlink); 60 target_tcon = tlink_tcon(smb_file_target->tlink);
87 61
88 /* check source and target on same server (or volume if dup_extents) */ 62 if (src_tcon->ses != target_tcon->ses) {
89 if (dup_extents && (src_tcon != target_tcon)) {
90 cifs_dbg(VFS, "source and target of copy not on same share\n");
91 goto out_fput;
92 }
93
94 if (!dup_extents && (src_tcon->ses != target_tcon->ses)) {
95 cifs_dbg(VFS, "source and target of copy not on same server\n"); 63 cifs_dbg(VFS, "source and target of copy not on same server\n");
96 goto out_fput; 64 goto out;
97 } 65 }
98 66
99 src_inode = file_inode(src_file.file);
100 rc = -EINVAL;
101 if (S_ISDIR(src_inode->i_mode))
102 goto out_fput;
103
104 /* 67 /*
105 * Note: cifs case is easier than btrfs since server responsible for 68 * Note: cifs case is easier than btrfs since server responsible for
106 * checks for proper open modes and file type and if it wants 69 * checks for proper open modes and file type and if it wants
@@ -108,34 +71,66 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
108 */ 71 */
109 lock_two_nondirectories(target_inode, src_inode); 72 lock_two_nondirectories(target_inode, src_inode);
110 73
111 /* determine range to clone */
112 rc = -EINVAL;
113 if (off + len > src_inode->i_size || off + len < off)
114 goto out_unlock;
115 if (len == 0)
116 len = src_inode->i_size - off;
117
118 cifs_dbg(FYI, "about to flush pages\n"); 74 cifs_dbg(FYI, "about to flush pages\n");
119 /* should we flush first and last page first */ 75 /* should we flush first and last page first */
120 truncate_inode_pages_range(&target_inode->i_data, destoff, 76 truncate_inode_pages(&target_inode->i_data, 0);
121 PAGE_CACHE_ALIGN(destoff + len)-1);
122 77
123 if (dup_extents && target_tcon->ses->server->ops->duplicate_extents) 78 if (target_tcon->ses->server->ops->clone_range)
124 rc = target_tcon->ses->server->ops->duplicate_extents(xid,
125 smb_file_src, smb_file_target, off, len, destoff);
126 else if (!dup_extents && target_tcon->ses->server->ops->clone_range)
127 rc = target_tcon->ses->server->ops->clone_range(xid, 79 rc = target_tcon->ses->server->ops->clone_range(xid,
128 smb_file_src, smb_file_target, off, len, destoff); 80 smb_file_src, smb_file_target, 0, src_inode->i_size, 0);
129 else 81 else
130 rc = -EOPNOTSUPP; 82 rc = -EOPNOTSUPP;
131 83
132 /* force revalidate of size and timestamps of target file now 84 /* force revalidate of size and timestamps of target file now
133 that target is updated on the server */ 85 that target is updated on the server */
134 CIFS_I(target_inode)->time = 0; 86 CIFS_I(target_inode)->time = 0;
135out_unlock:
136 /* although unlocking in the reverse order from locking is not 87 /* although unlocking in the reverse order from locking is not
137 strictly necessary here it is a little cleaner to be consistent */ 88 strictly necessary here it is a little cleaner to be consistent */
138 unlock_two_nondirectories(src_inode, target_inode); 89 unlock_two_nondirectories(src_inode, target_inode);
90out:
91 return rc;
92}
93
94static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
95 unsigned long srcfd)
96{
97 int rc;
98 struct fd src_file;
99 struct inode *src_inode;
100
101 cifs_dbg(FYI, "ioctl clone range\n");
102 /* the destination must be opened for writing */
103 if (!(dst_file->f_mode & FMODE_WRITE)) {
104 cifs_dbg(FYI, "file target not open for write\n");
105 return -EINVAL;
106 }
107
108 /* check if target volume is readonly and take reference */
109 rc = mnt_want_write_file(dst_file);
110 if (rc) {
111 cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc);
112 return rc;
113 }
114
115 src_file = fdget(srcfd);
116 if (!src_file.file) {
117 rc = -EBADF;
118 goto out_drop_write;
119 }
120
121 if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) {
122 rc = -EBADF;
123 cifs_dbg(VFS, "src file seems to be from a different filesystem type\n");
124 goto out_fput;
125 }
126
127 src_inode = file_inode(src_file.file);
128 rc = -EINVAL;
129 if (S_ISDIR(src_inode->i_mode))
130 goto out_fput;
131
132 rc = cifs_file_clone_range(xid, src_file.file, dst_file);
133
139out_fput: 134out_fput:
140 fdput(src_file); 135 fdput(src_file);
141out_drop_write: 136out_drop_write:
@@ -256,10 +251,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
256 } 251 }
257 break; 252 break;
258 case CIFS_IOC_COPYCHUNK_FILE: 253 case CIFS_IOC_COPYCHUNK_FILE:
259 rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, false); 254 rc = cifs_ioctl_clone(xid, filep, arg);
260 break;
261 case BTRFS_IOC_CLONE:
262 rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, true);
263 break; 255 break;
264 case CIFS_IOC_SET_INTEGRITY: 256 case CIFS_IOC_SET_INTEGRITY:
265 if (pSMBFile == NULL) 257 if (pSMBFile == NULL)
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 5d01d2638ca5..84c6e79829ab 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -215,6 +215,29 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
215 return error; 215 return error;
216} 216}
217 217
218static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
219 u64 off, u64 olen, u64 destoff)
220{
221 struct fd src_file = fdget(srcfd);
222 int ret;
223
224 if (!src_file.file)
225 return -EBADF;
226 ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen);
227 fdput(src_file);
228 return ret;
229}
230
231static long ioctl_file_clone_range(struct file *file, void __user *argp)
232{
233 struct file_clone_range args;
234
235 if (copy_from_user(&args, argp, sizeof(args)))
236 return -EFAULT;
237 return ioctl_file_clone(file, args.src_fd, args.src_offset,
238 args.src_length, args.dest_offset);
239}
240
218#ifdef CONFIG_BLOCK 241#ifdef CONFIG_BLOCK
219 242
220static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) 243static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
@@ -600,6 +623,12 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
600 case FIGETBSZ: 623 case FIGETBSZ:
601 return put_user(inode->i_sb->s_blocksize, argp); 624 return put_user(inode->i_sb->s_blocksize, argp);
602 625
626 case FICLONE:
627 return ioctl_file_clone(filp, arg, 0, 0, 0);
628
629 case FICLONERANGE:
630 return ioctl_file_clone_range(filp, argp);
631
603 default: 632 default:
604 if (S_ISREG(inode->i_mode)) 633 if (S_ISREG(inode->i_mode))
605 error = file_ioctl(filp, cmd, arg); 634 error = file_ioctl(filp, cmd, arg);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index db9b5fea5b3e..26f9a23e2b25 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -195,65 +195,27 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
195 return nfs42_proc_allocate(filep, offset, len); 195 return nfs42_proc_allocate(filep, offset, len);
196} 196}
197 197
198static noinline long 198static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
199nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, 199 struct file *dst_file, loff_t dst_off, u64 count)
200 u64 src_off, u64 dst_off, u64 count)
201{ 200{
202 struct inode *dst_inode = file_inode(dst_file); 201 struct inode *dst_inode = file_inode(dst_file);
203 struct nfs_server *server = NFS_SERVER(dst_inode); 202 struct nfs_server *server = NFS_SERVER(dst_inode);
204 struct fd src_file; 203 struct inode *src_inode = file_inode(src_file);
205 struct inode *src_inode;
206 unsigned int bs = server->clone_blksize; 204 unsigned int bs = server->clone_blksize;
207 bool same_inode = false; 205 bool same_inode = false;
208 int ret; 206 int ret;
209 207
210 /* dst file must be opened for writing */
211 if (!(dst_file->f_mode & FMODE_WRITE))
212 return -EINVAL;
213
214 ret = mnt_want_write_file(dst_file);
215 if (ret)
216 return ret;
217
218 src_file = fdget(srcfd);
219 if (!src_file.file) {
220 ret = -EBADF;
221 goto out_drop_write;
222 }
223
224 src_inode = file_inode(src_file.file);
225
226 if (src_inode == dst_inode)
227 same_inode = true;
228
229 /* src file must be opened for reading */
230 if (!(src_file.file->f_mode & FMODE_READ))
231 goto out_fput;
232
233 /* src and dst must be regular files */
234 ret = -EISDIR;
235 if (!S_ISREG(src_inode->i_mode) || !S_ISREG(dst_inode->i_mode))
236 goto out_fput;
237
238 ret = -EXDEV;
239 if (src_file.file->f_path.mnt != dst_file->f_path.mnt ||
240 src_inode->i_sb != dst_inode->i_sb)
241 goto out_fput;
242
243 /* check alignment w.r.t. clone_blksize */ 208 /* check alignment w.r.t. clone_blksize */
244 ret = -EINVAL; 209 ret = -EINVAL;
245 if (bs) { 210 if (bs) {
246 if (!IS_ALIGNED(src_off, bs) || !IS_ALIGNED(dst_off, bs)) 211 if (!IS_ALIGNED(src_off, bs) || !IS_ALIGNED(dst_off, bs))
247 goto out_fput; 212 goto out;
248 if (!IS_ALIGNED(count, bs) && i_size_read(src_inode) != (src_off + count)) 213 if (!IS_ALIGNED(count, bs) && i_size_read(src_inode) != (src_off + count))
249 goto out_fput; 214 goto out;
250 } 215 }
251 216
252 /* verify if ranges are overlapped within the same file */ 217 if (src_inode == dst_inode)
253 if (same_inode) { 218 same_inode = true;
254 if (dst_off + count > src_off && dst_off < src_off + count)
255 goto out_fput;
256 }
257 219
258 /* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */ 220 /* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */
259 if (same_inode) { 221 if (same_inode) {
@@ -275,7 +237,7 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
275 if (ret) 237 if (ret)
276 goto out_unlock; 238 goto out_unlock;
277 239
278 ret = nfs42_proc_clone(src_file.file, dst_file, src_off, dst_off, count); 240 ret = nfs42_proc_clone(src_file, dst_file, src_off, dst_off, count);
279 241
280 /* truncate inode page cache of the dst range so that future reads can fetch 242 /* truncate inode page cache of the dst range so that future reads can fetch
281 * new data from server */ 243 * new data from server */
@@ -292,37 +254,9 @@ out_unlock:
292 mutex_unlock(&dst_inode->i_mutex); 254 mutex_unlock(&dst_inode->i_mutex);
293 mutex_unlock(&src_inode->i_mutex); 255 mutex_unlock(&src_inode->i_mutex);
294 } 256 }
295out_fput: 257out:
296 fdput(src_file);
297out_drop_write:
298 mnt_drop_write_file(dst_file);
299 return ret; 258 return ret;
300} 259}
301
302static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp)
303{
304 struct btrfs_ioctl_clone_range_args args;
305
306 if (copy_from_user(&args, argp, sizeof(args)))
307 return -EFAULT;
308
309 return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_offset,
310 args.dest_offset, args.src_length);
311}
312
313long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
314{
315 void __user *argp = (void __user *)arg;
316
317 switch (cmd) {
318 case BTRFS_IOC_CLONE:
319 return nfs42_ioctl_clone(file, arg, 0, 0, 0);
320 case BTRFS_IOC_CLONE_RANGE:
321 return nfs42_ioctl_clone_range(file, argp);
322 }
323
324 return -ENOTTY;
325}
326#endif /* CONFIG_NFS_V4_2 */ 260#endif /* CONFIG_NFS_V4_2 */
327 261
328const struct file_operations nfs4_file_operations = { 262const struct file_operations nfs4_file_operations = {
@@ -342,8 +276,7 @@ const struct file_operations nfs4_file_operations = {
342#ifdef CONFIG_NFS_V4_2 276#ifdef CONFIG_NFS_V4_2
343 .llseek = nfs4_file_llseek, 277 .llseek = nfs4_file_llseek,
344 .fallocate = nfs42_fallocate, 278 .fallocate = nfs42_fallocate,
345 .unlocked_ioctl = nfs4_ioctl, 279 .clone_file_range = nfs42_clone_file_range,
346 .compat_ioctl = nfs4_ioctl,
347#else 280#else
348 .llseek = nfs_file_llseek, 281 .llseek = nfs_file_llseek,
349#endif 282#endif
diff --git a/fs/read_write.c b/fs/read_write.c
index 6cfad4761fd8..c75d02cb13ec 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1451,3 +1451,75 @@ out1:
1451out2: 1451out2:
1452 return ret; 1452 return ret;
1453} 1453}
1454
1455static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
1456{
1457 struct inode *inode = file_inode(file);
1458
1459 if (unlikely(pos < 0))
1460 return -EINVAL;
1461
1462 if (unlikely((loff_t) (pos + len) < 0))
1463 return -EINVAL;
1464
1465 if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
1466 loff_t end = len ? pos + len - 1 : OFFSET_MAX;
1467 int retval;
1468
1469 retval = locks_mandatory_area(inode, file, pos, end,
1470 write ? F_WRLCK : F_RDLCK);
1471 if (retval < 0)
1472 return retval;
1473 }
1474
1475 return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
1476}
1477
1478int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
1479 struct file *file_out, loff_t pos_out, u64 len)
1480{
1481 struct inode *inode_in = file_inode(file_in);
1482 struct inode *inode_out = file_inode(file_out);
1483 int ret;
1484
1485 if (inode_in->i_sb != inode_out->i_sb ||
1486 file_in->f_path.mnt != file_out->f_path.mnt)
1487 return -EXDEV;
1488
1489 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
1490 return -EISDIR;
1491 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
1492 return -EOPNOTSUPP;
1493
1494 if (!(file_in->f_mode & FMODE_READ) ||
1495 !(file_out->f_mode & FMODE_WRITE) ||
1496 (file_out->f_flags & O_APPEND) ||
1497 !file_in->f_op->clone_file_range)
1498 return -EBADF;
1499
1500 ret = clone_verify_area(file_in, pos_in, len, false);
1501 if (ret)
1502 return ret;
1503
1504 ret = clone_verify_area(file_out, pos_out, len, true);
1505 if (ret)
1506 return ret;
1507
1508 if (pos_in + len > i_size_read(inode_in))
1509 return -EINVAL;
1510
1511 ret = mnt_want_write_file(file_out);
1512 if (ret)
1513 return ret;
1514
1515 ret = file_in->f_op->clone_file_range(file_in, pos_in,
1516 file_out, pos_out, len);
1517 if (!ret) {
1518 fsnotify_access(file_in);
1519 fsnotify_modify(file_out);
1520 }
1521
1522 mnt_drop_write_file(file_out);
1523 return ret;
1524}
1525EXPORT_SYMBOL(vfs_clone_file_range);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4377b2df991d..5d987aefcf1e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1629,7 +1629,10 @@ struct file_operations {
1629#ifndef CONFIG_MMU 1629#ifndef CONFIG_MMU
1630 unsigned (*mmap_capabilities)(struct file *); 1630 unsigned (*mmap_capabilities)(struct file *);
1631#endif 1631#endif
1632 ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); 1632 ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
1633 loff_t, size_t, unsigned int);
1634 int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
1635 u64);
1633}; 1636};
1634 1637
1635struct inode_operations { 1638struct inode_operations {
@@ -1683,6 +1686,8 @@ extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
1683 unsigned long, loff_t *); 1686 unsigned long, loff_t *);
1684extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, 1687extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
1685 loff_t, size_t, unsigned int); 1688 loff_t, size_t, unsigned int);
1689extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
1690 struct file *file_out, loff_t pos_out, u64 len);
1686 1691
1687struct super_operations { 1692struct super_operations {
1688 struct inode *(*alloc_inode)(struct super_block *sb); 1693 struct inode *(*alloc_inode)(struct super_block *sb);
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index f15d980249b5..cd5db7fb3cb7 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -39,6 +39,13 @@
39#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ 39#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */
40#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ 40#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */
41 41
42struct file_clone_range {
43 __s64 src_fd;
44 __u64 src_offset;
45 __u64 src_length;
46 __u64 dest_offset;
47};
48
42struct fstrim_range { 49struct fstrim_range {
43 __u64 start; 50 __u64 start;
44 __u64 len; 51 __u64 len;
@@ -159,6 +166,8 @@ struct inodes_stat_t {
159#define FIFREEZE _IOWR('X', 119, int) /* Freeze */ 166#define FIFREEZE _IOWR('X', 119, int) /* Freeze */
160#define FITHAW _IOWR('X', 120, int) /* Thaw */ 167#define FITHAW _IOWR('X', 120, int) /* Thaw */
161#define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ 168#define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */
169#define FICLONE _IOW(0x94, 9, int)
170#define FICLONERANGE _IOW(0x94, 13, struct file_clone_range)
162 171
163#define FS_IOC_GETFLAGS _IOR('f', 1, long) 172#define FS_IOC_GETFLAGS _IOR('f', 1, long)
164#define FS_IOC_SETFLAGS _IOW('f', 2, long) 173#define FS_IOC_SETFLAGS _IOW('f', 2, long)