summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-11-02 12:33:08 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-11-02 12:33:08 -0400
commitc2aa1a444cab2c673650ada80a7dffc4345ce2e6 (patch)
tree3efb7e2213cabd174780b021a8dab2cea0b03386 /fs
parentb69f9e17a57a50bc34d88975afce4425086e525d (diff)
parentbf4a1fcf0bc18d52cf0fce6571d6f327ab5eaf22 (diff)
Merge tag 'xfs-4.20-merge-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull vfs dedup fixes from Dave Chinner: "This reworks the vfs data cloning infrastructure. We discovered many issues with these interfaces late in the 4.19 cycle - the worst of them (data corruption, setuid stripping) were fixed for XFS in 4.19-rc8, but a larger rework of the infrastructure fixing all the problems was needed. That rework is the contents of this pull request. Rework the vfs_clone_file_range and vfs_dedupe_file_range infrastructure to use a common .remap_file_range method and supply generic bounds and sanity checking functions that are shared with the data write path. The current VFS infrastructure has problems with rlimit, LFS file sizes, file time stamps, maximum filesystem file sizes, stripping setuid bits, etc and so they are addressed in these commits. We also introduce the ability for the ->remap_file_range methods to return short clones so that clones for vfs_copy_file_range() don't get rejected if the entire range can't be cloned. It also allows filesystems to sliently skip deduplication of partial EOF blocks if they are not capable of doing so without requiring errors to be thrown to userspace. Existing filesystems are converted to user the new remap_file_range method, and both XFS and ocfs2 are modified to make use of the new generic checking infrastructure" * tag 'xfs-4.20-merge-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (28 commits) xfs: remove [cm]time update from reflink calls xfs: remove xfs_reflink_remap_range xfs: remove redundant remap partial EOF block checks xfs: support returning partial reflink results xfs: clean up xfs_reflink_remap_blocks call site xfs: fix pagecache truncation prior to reflink ocfs2: remove ocfs2_reflink_remap_range ocfs2: support partial clone range and dedupe range ocfs2: fix pagecache truncation prior to reflink ocfs2: truncate page cache for clone destination file before remapping vfs: clean up generic_remap_file_range_prep return value vfs: hide file range comparison function vfs: enable remap callers that can handle short operations vfs: plumb remap flags through the vfs dedupe functions vfs: plumb remap flags through the vfs clone functions vfs: make remap_file_range functions take and return bytes completed vfs: remap helper should update destination inode metadata vfs: pass remap flags to generic_remap_checks vfs: pass remap flags to generic_remap_file_range_prep vfs: combine the clone and dedupe into a single remap_file_range ...
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h8
-rw-r--r--fs/btrfs/file.c3
-rw-r--r--fs/btrfs/ioctl.c50
-rw-r--r--fs/cifs/cifsfs.c24
-rw-r--r--fs/ioctl.c10
-rw-r--r--fs/nfs/nfs4file.c12
-rw-r--r--fs/nfsd/vfs.c8
-rw-r--r--fs/ocfs2/file.c93
-rw-r--r--fs/ocfs2/refcounttree.c148
-rw-r--r--fs/ocfs2/refcounttree.h24
-rw-r--r--fs/overlayfs/copy_up.c6
-rw-r--r--fs/overlayfs/file.c43
-rw-r--r--fs/read_write.c405
-rw-r--r--fs/xfs/xfs_file.c82
-rw-r--r--fs/xfs/xfs_reflink.c173
-rw-r--r--fs/xfs/xfs_reflink.h15
16 files changed, 558 insertions, 546 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 68ca41dbbef3..80953528572d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3201,9 +3201,6 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
3201 struct btrfs_ioctl_space_info *space); 3201 struct btrfs_ioctl_space_info *space);
3202void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, 3202void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
3203 struct btrfs_ioctl_balance_args *bargs); 3203 struct btrfs_ioctl_balance_args *bargs);
3204int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
3205 struct file *dst_file, loff_t dst_loff,
3206 u64 olen);
3207 3204
3208/* file.c */ 3205/* file.c */
3209int __init btrfs_auto_defrag_init(void); 3206int __init btrfs_auto_defrag_init(void);
@@ -3233,8 +3230,9 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
3233 size_t num_pages, loff_t pos, size_t write_bytes, 3230 size_t num_pages, loff_t pos, size_t write_bytes,
3234 struct extent_state **cached); 3231 struct extent_state **cached);
3235int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end); 3232int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
3236int btrfs_clone_file_range(struct file *file_in, loff_t pos_in, 3233loff_t btrfs_remap_file_range(struct file *file_in, loff_t pos_in,
3237 struct file *file_out, loff_t pos_out, u64 len); 3234 struct file *file_out, loff_t pos_out,
3235 loff_t len, unsigned int remap_flags);
3238 3236
3239/* tree-defrag.c */ 3237/* tree-defrag.c */
3240int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, 3238int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 97c7a086f7bd..a3c22e16509b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3298,8 +3298,7 @@ const struct file_operations btrfs_file_operations = {
3298#ifdef CONFIG_COMPAT 3298#ifdef CONFIG_COMPAT
3299 .compat_ioctl = btrfs_compat_ioctl, 3299 .compat_ioctl = btrfs_compat_ioctl,
3300#endif 3300#endif
3301 .clone_file_range = btrfs_clone_file_range, 3301 .remap_file_range = btrfs_remap_file_range,
3302 .dedupe_file_range = btrfs_dedupe_file_range,
3303}; 3302};
3304 3303
3305void __cold btrfs_auto_defrag_exit(void) 3304void __cold btrfs_auto_defrag_exit(void)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a990a9045139..3ca6943827ef 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3629,26 +3629,6 @@ out_unlock:
3629 return ret; 3629 return ret;
3630} 3630}
3631 3631
3632int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
3633 struct file *dst_file, loff_t dst_loff,
3634 u64 olen)
3635{
3636 struct inode *src = file_inode(src_file);
3637 struct inode *dst = file_inode(dst_file);
3638 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
3639
3640 if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
3641 /*
3642 * Btrfs does not support blocksize < page_size. As a
3643 * result, btrfs_cmp_data() won't correctly handle
3644 * this situation without an update.
3645 */
3646 return -EINVAL;
3647 }
3648
3649 return btrfs_extent_same(src, src_loff, olen, dst, dst_loff);
3650}
3651
3652static int clone_finish_inode_update(struct btrfs_trans_handle *trans, 3632static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
3653 struct inode *inode, 3633 struct inode *inode,
3654 u64 endoff, 3634 u64 endoff,
@@ -4350,10 +4330,34 @@ out_unlock:
4350 return ret; 4330 return ret;
4351} 4331}
4352 4332
4353int btrfs_clone_file_range(struct file *src_file, loff_t off, 4333loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
4354 struct file *dst_file, loff_t destoff, u64 len) 4334 struct file *dst_file, loff_t destoff, loff_t len,
4335 unsigned int remap_flags)
4355{ 4336{
4356 return btrfs_clone_files(dst_file, src_file, off, len, destoff); 4337 int ret;
4338
4339 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
4340 return -EINVAL;
4341
4342 if (remap_flags & REMAP_FILE_DEDUP) {
4343 struct inode *src = file_inode(src_file);
4344 struct inode *dst = file_inode(dst_file);
4345 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
4346
4347 if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
4348 /*
4349 * Btrfs does not support blocksize < page_size. As a
4350 * result, btrfs_cmp_data() won't correctly handle
4351 * this situation without an update.
4352 */
4353 return -EINVAL;
4354 }
4355
4356 ret = btrfs_extent_same(src, off, len, dst, destoff);
4357 } else {
4358 ret = btrfs_clone_files(dst_file, src_file, off, len, destoff);
4359 }
4360 return ret < 0 ? ret : len;
4357} 4361}
4358 4362
4359static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) 4363static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 7de9603c54f1..b7ac09e38159 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -992,8 +992,9 @@ const struct inode_operations cifs_symlink_inode_ops = {
992 .listxattr = cifs_listxattr, 992 .listxattr = cifs_listxattr,
993}; 993};
994 994
995static int cifs_clone_file_range(struct file *src_file, loff_t off, 995static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
996 struct file *dst_file, loff_t destoff, u64 len) 996 struct file *dst_file, loff_t destoff, loff_t len,
997 unsigned int remap_flags)
997{ 998{
998 struct inode *src_inode = file_inode(src_file); 999 struct inode *src_inode = file_inode(src_file);
999 struct inode *target_inode = file_inode(dst_file); 1000 struct inode *target_inode = file_inode(dst_file);
@@ -1003,6 +1004,9 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off,
1003 unsigned int xid; 1004 unsigned int xid;
1004 int rc; 1005 int rc;
1005 1006
1007 if (remap_flags & ~REMAP_FILE_ADVISORY)
1008 return -EINVAL;
1009
1006 cifs_dbg(FYI, "clone range\n"); 1010 cifs_dbg(FYI, "clone range\n");
1007 1011
1008 xid = get_xid(); 1012 xid = get_xid();
@@ -1042,7 +1046,7 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off,
1042 unlock_two_nondirectories(src_inode, target_inode); 1046 unlock_two_nondirectories(src_inode, target_inode);
1043out: 1047out:
1044 free_xid(xid); 1048 free_xid(xid);
1045 return rc; 1049 return rc < 0 ? rc : len;
1046} 1050}
1047 1051
1048ssize_t cifs_file_copychunk_range(unsigned int xid, 1052ssize_t cifs_file_copychunk_range(unsigned int xid,
@@ -1151,7 +1155,7 @@ const struct file_operations cifs_file_ops = {
1151 .llseek = cifs_llseek, 1155 .llseek = cifs_llseek,
1152 .unlocked_ioctl = cifs_ioctl, 1156 .unlocked_ioctl = cifs_ioctl,
1153 .copy_file_range = cifs_copy_file_range, 1157 .copy_file_range = cifs_copy_file_range,
1154 .clone_file_range = cifs_clone_file_range, 1158 .remap_file_range = cifs_remap_file_range,
1155 .setlease = cifs_setlease, 1159 .setlease = cifs_setlease,
1156 .fallocate = cifs_fallocate, 1160 .fallocate = cifs_fallocate,
1157}; 1161};
@@ -1170,7 +1174,7 @@ const struct file_operations cifs_file_strict_ops = {
1170 .llseek = cifs_llseek, 1174 .llseek = cifs_llseek,
1171 .unlocked_ioctl = cifs_ioctl, 1175 .unlocked_ioctl = cifs_ioctl,
1172 .copy_file_range = cifs_copy_file_range, 1176 .copy_file_range = cifs_copy_file_range,
1173 .clone_file_range = cifs_clone_file_range, 1177 .remap_file_range = cifs_remap_file_range,
1174 .setlease = cifs_setlease, 1178 .setlease = cifs_setlease,
1175 .fallocate = cifs_fallocate, 1179 .fallocate = cifs_fallocate,
1176}; 1180};
@@ -1189,7 +1193,7 @@ const struct file_operations cifs_file_direct_ops = {
1189 .splice_write = iter_file_splice_write, 1193 .splice_write = iter_file_splice_write,
1190 .unlocked_ioctl = cifs_ioctl, 1194 .unlocked_ioctl = cifs_ioctl,
1191 .copy_file_range = cifs_copy_file_range, 1195 .copy_file_range = cifs_copy_file_range,
1192 .clone_file_range = cifs_clone_file_range, 1196 .remap_file_range = cifs_remap_file_range,
1193 .llseek = cifs_llseek, 1197 .llseek = cifs_llseek,
1194 .setlease = cifs_setlease, 1198 .setlease = cifs_setlease,
1195 .fallocate = cifs_fallocate, 1199 .fallocate = cifs_fallocate,
@@ -1208,7 +1212,7 @@ const struct file_operations cifs_file_nobrl_ops = {
1208 .llseek = cifs_llseek, 1212 .llseek = cifs_llseek,
1209 .unlocked_ioctl = cifs_ioctl, 1213 .unlocked_ioctl = cifs_ioctl,
1210 .copy_file_range = cifs_copy_file_range, 1214 .copy_file_range = cifs_copy_file_range,
1211 .clone_file_range = cifs_clone_file_range, 1215 .remap_file_range = cifs_remap_file_range,
1212 .setlease = cifs_setlease, 1216 .setlease = cifs_setlease,
1213 .fallocate = cifs_fallocate, 1217 .fallocate = cifs_fallocate,
1214}; 1218};
@@ -1226,7 +1230,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
1226 .llseek = cifs_llseek, 1230 .llseek = cifs_llseek,
1227 .unlocked_ioctl = cifs_ioctl, 1231 .unlocked_ioctl = cifs_ioctl,
1228 .copy_file_range = cifs_copy_file_range, 1232 .copy_file_range = cifs_copy_file_range,
1229 .clone_file_range = cifs_clone_file_range, 1233 .remap_file_range = cifs_remap_file_range,
1230 .setlease = cifs_setlease, 1234 .setlease = cifs_setlease,
1231 .fallocate = cifs_fallocate, 1235 .fallocate = cifs_fallocate,
1232}; 1236};
@@ -1244,7 +1248,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
1244 .splice_write = iter_file_splice_write, 1248 .splice_write = iter_file_splice_write,
1245 .unlocked_ioctl = cifs_ioctl, 1249 .unlocked_ioctl = cifs_ioctl,
1246 .copy_file_range = cifs_copy_file_range, 1250 .copy_file_range = cifs_copy_file_range,
1247 .clone_file_range = cifs_clone_file_range, 1251 .remap_file_range = cifs_remap_file_range,
1248 .llseek = cifs_llseek, 1252 .llseek = cifs_llseek,
1249 .setlease = cifs_setlease, 1253 .setlease = cifs_setlease,
1250 .fallocate = cifs_fallocate, 1254 .fallocate = cifs_fallocate,
@@ -1256,7 +1260,7 @@ const struct file_operations cifs_dir_ops = {
1256 .read = generic_read_dir, 1260 .read = generic_read_dir,
1257 .unlocked_ioctl = cifs_ioctl, 1261 .unlocked_ioctl = cifs_ioctl,
1258 .copy_file_range = cifs_copy_file_range, 1262 .copy_file_range = cifs_copy_file_range,
1259 .clone_file_range = cifs_clone_file_range, 1263 .remap_file_range = cifs_remap_file_range,
1260 .llseek = generic_file_llseek, 1264 .llseek = generic_file_llseek,
1261 .fsync = cifs_dir_fsync, 1265 .fsync = cifs_dir_fsync,
1262}; 1266};
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 0400297c8d72..d64f622cac8b 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -223,6 +223,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
223 u64 off, u64 olen, u64 destoff) 223 u64 off, u64 olen, u64 destoff)
224{ 224{
225 struct fd src_file = fdget(srcfd); 225 struct fd src_file = fdget(srcfd);
226 loff_t cloned;
226 int ret; 227 int ret;
227 228
228 if (!src_file.file) 229 if (!src_file.file)
@@ -230,7 +231,14 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
230 ret = -EXDEV; 231 ret = -EXDEV;
231 if (src_file.file->f_path.mnt != dst_file->f_path.mnt) 232 if (src_file.file->f_path.mnt != dst_file->f_path.mnt)
232 goto fdput; 233 goto fdput;
233 ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen); 234 cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff,
235 olen, 0);
236 if (cloned < 0)
237 ret = cloned;
238 else if (olen && cloned != olen)
239 ret = -EINVAL;
240 else
241 ret = 0;
234fdput: 242fdput:
235 fdput(src_file); 243 fdput(src_file);
236 return ret; 244 return ret;
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 4288a6ecaf75..46d691ba04bc 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -180,8 +180,9 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
180 return nfs42_proc_allocate(filep, offset, len); 180 return nfs42_proc_allocate(filep, offset, len);
181} 181}
182 182
183static int nfs42_clone_file_range(struct file *src_file, loff_t src_off, 183static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off,
184 struct file *dst_file, loff_t dst_off, u64 count) 184 struct file *dst_file, loff_t dst_off, loff_t count,
185 unsigned int remap_flags)
185{ 186{
186 struct inode *dst_inode = file_inode(dst_file); 187 struct inode *dst_inode = file_inode(dst_file);
187 struct nfs_server *server = NFS_SERVER(dst_inode); 188 struct nfs_server *server = NFS_SERVER(dst_inode);
@@ -190,6 +191,9 @@ static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
190 bool same_inode = false; 191 bool same_inode = false;
191 int ret; 192 int ret;
192 193
194 if (remap_flags & ~REMAP_FILE_ADVISORY)
195 return -EINVAL;
196
193 /* check alignment w.r.t. clone_blksize */ 197 /* check alignment w.r.t. clone_blksize */
194 ret = -EINVAL; 198 ret = -EINVAL;
195 if (bs) { 199 if (bs) {
@@ -240,7 +244,7 @@ out_unlock:
240 inode_unlock(src_inode); 244 inode_unlock(src_inode);
241 } 245 }
242out: 246out:
243 return ret; 247 return ret < 0 ? ret : count;
244} 248}
245#endif /* CONFIG_NFS_V4_2 */ 249#endif /* CONFIG_NFS_V4_2 */
246 250
@@ -262,7 +266,7 @@ const struct file_operations nfs4_file_operations = {
262 .copy_file_range = nfs4_copy_file_range, 266 .copy_file_range = nfs4_copy_file_range,
263 .llseek = nfs4_file_llseek, 267 .llseek = nfs4_file_llseek,
264 .fallocate = nfs42_fallocate, 268 .fallocate = nfs42_fallocate,
265 .clone_file_range = nfs42_clone_file_range, 269 .remap_file_range = nfs42_remap_file_range,
266#else 270#else
267 .llseek = nfs_file_llseek, 271 .llseek = nfs_file_llseek,
268#endif 272#endif
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index fb28be653014..eb67098117b4 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -541,8 +541,12 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
541__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, 541__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
542 u64 dst_pos, u64 count) 542 u64 dst_pos, u64 count)
543{ 543{
544 return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos, 544 loff_t cloned;
545 count)); 545
546 cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
547 if (count && cloned != count)
548 cloned = -EINVAL;
549 return nfserrno(cloned < 0 ? cloned : 0);
546} 550}
547 551
548ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, 552ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9fa35cb6f6e0..fe570824b991 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2527,24 +2527,79 @@ out:
2527 return offset; 2527 return offset;
2528} 2528}
2529 2529
2530static int ocfs2_file_clone_range(struct file *file_in, 2530static loff_t ocfs2_remap_file_range(struct file *file_in, loff_t pos_in,
2531 loff_t pos_in, 2531 struct file *file_out, loff_t pos_out,
2532 struct file *file_out, 2532 loff_t len, unsigned int remap_flags)
2533 loff_t pos_out,
2534 u64 len)
2535{ 2533{
2536 return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out, 2534 struct inode *inode_in = file_inode(file_in);
2537 len, false); 2535 struct inode *inode_out = file_inode(file_out);
2538} 2536 struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb);
2537 struct buffer_head *in_bh = NULL, *out_bh = NULL;
2538 bool same_inode = (inode_in == inode_out);
2539 loff_t remapped = 0;
2540 ssize_t ret;
2539 2541
2540static int ocfs2_file_dedupe_range(struct file *file_in, 2542 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
2541 loff_t pos_in, 2543 return -EINVAL;
2542 struct file *file_out, 2544 if (!ocfs2_refcount_tree(osb))
2543 loff_t pos_out, 2545 return -EOPNOTSUPP;
2544 u64 len) 2546 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
2545{ 2547 return -EROFS;
2546 return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out, 2548
2547 len, true); 2549 /* Lock both files against IO */
2550 ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh);
2551 if (ret)
2552 return ret;
2553
2554 /* Check file eligibility and prepare for block sharing. */
2555 ret = -EINVAL;
2556 if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) ||
2557 (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
2558 goto out_unlock;
2559
2560 ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
2561 &len, remap_flags);
2562 if (ret < 0 || len == 0)
2563 goto out_unlock;
2564
2565 /* Lock out changes to the allocation maps and remap. */
2566 down_write(&OCFS2_I(inode_in)->ip_alloc_sem);
2567 if (!same_inode)
2568 down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem,
2569 SINGLE_DEPTH_NESTING);
2570
2571 /* Zap any page cache for the destination file's range. */
2572 truncate_inode_pages_range(&inode_out->i_data,
2573 round_down(pos_out, PAGE_SIZE),
2574 round_up(pos_out + len, PAGE_SIZE) - 1);
2575
2576 remapped = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in,
2577 inode_out, out_bh, pos_out, len);
2578 up_write(&OCFS2_I(inode_in)->ip_alloc_sem);
2579 if (!same_inode)
2580 up_write(&OCFS2_I(inode_out)->ip_alloc_sem);
2581 if (remapped < 0) {
2582 ret = remapped;
2583 mlog_errno(ret);
2584 goto out_unlock;
2585 }
2586
2587 /*
2588 * Empty the extent map so that we may get the right extent
2589 * record from the disk.
2590 */
2591 ocfs2_extent_map_trunc(inode_in, 0);
2592 ocfs2_extent_map_trunc(inode_out, 0);
2593
2594 ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len);
2595 if (ret) {
2596 mlog_errno(ret);
2597 goto out_unlock;
2598 }
2599
2600out_unlock:
2601 ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
2602 return remapped > 0 ? remapped : ret;
2548} 2603}
2549 2604
2550const struct inode_operations ocfs2_file_iops = { 2605const struct inode_operations ocfs2_file_iops = {
@@ -2586,8 +2641,7 @@ const struct file_operations ocfs2_fops = {
2586 .splice_read = generic_file_splice_read, 2641 .splice_read = generic_file_splice_read,
2587 .splice_write = iter_file_splice_write, 2642 .splice_write = iter_file_splice_write,
2588 .fallocate = ocfs2_fallocate, 2643 .fallocate = ocfs2_fallocate,
2589 .clone_file_range = ocfs2_file_clone_range, 2644 .remap_file_range = ocfs2_remap_file_range,
2590 .dedupe_file_range = ocfs2_file_dedupe_range,
2591}; 2645};
2592 2646
2593const struct file_operations ocfs2_dops = { 2647const struct file_operations ocfs2_dops = {
@@ -2633,8 +2687,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
2633 .splice_read = generic_file_splice_read, 2687 .splice_read = generic_file_splice_read,
2634 .splice_write = iter_file_splice_write, 2688 .splice_write = iter_file_splice_write,
2635 .fallocate = ocfs2_fallocate, 2689 .fallocate = ocfs2_fallocate,
2636 .clone_file_range = ocfs2_file_clone_range, 2690 .remap_file_range = ocfs2_remap_file_range,
2637 .dedupe_file_range = ocfs2_file_dedupe_range,
2638}; 2691};
2639 2692
2640const struct file_operations ocfs2_dops_no_plocks = { 2693const struct file_operations ocfs2_dops_no_plocks = {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 1114ef02e780..a35259eebc56 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4466,9 +4466,9 @@ out:
4466} 4466}
4467 4467
4468/* Update destination inode size, if necessary. */ 4468/* Update destination inode size, if necessary. */
4469static int ocfs2_reflink_update_dest(struct inode *dest, 4469int ocfs2_reflink_update_dest(struct inode *dest,
4470 struct buffer_head *d_bh, 4470 struct buffer_head *d_bh,
4471 loff_t newlen) 4471 loff_t newlen)
4472{ 4472{
4473 handle_t *handle; 4473 handle_t *handle;
4474 int ret; 4474 int ret;
@@ -4505,14 +4505,14 @@ out_commit:
4505} 4505}
4506 4506
4507/* Remap the range pos_in:len in s_inode to pos_out:len in t_inode. */ 4507/* Remap the range pos_in:len in s_inode to pos_out:len in t_inode. */
4508static int ocfs2_reflink_remap_extent(struct inode *s_inode, 4508static loff_t ocfs2_reflink_remap_extent(struct inode *s_inode,
4509 struct buffer_head *s_bh, 4509 struct buffer_head *s_bh,
4510 loff_t pos_in, 4510 loff_t pos_in,
4511 struct inode *t_inode, 4511 struct inode *t_inode,
4512 struct buffer_head *t_bh, 4512 struct buffer_head *t_bh,
4513 loff_t pos_out, 4513 loff_t pos_out,
4514 loff_t len, 4514 loff_t len,
4515 struct ocfs2_cached_dealloc_ctxt *dealloc) 4515 struct ocfs2_cached_dealloc_ctxt *dealloc)
4516{ 4516{
4517 struct ocfs2_extent_tree s_et; 4517 struct ocfs2_extent_tree s_et;
4518 struct ocfs2_extent_tree t_et; 4518 struct ocfs2_extent_tree t_et;
@@ -4520,8 +4520,9 @@ static int ocfs2_reflink_remap_extent(struct inode *s_inode,
4520 struct buffer_head *ref_root_bh = NULL; 4520 struct buffer_head *ref_root_bh = NULL;
4521 struct ocfs2_refcount_tree *ref_tree; 4521 struct ocfs2_refcount_tree *ref_tree;
4522 struct ocfs2_super *osb; 4522 struct ocfs2_super *osb;
4523 loff_t remapped_bytes = 0;
4523 loff_t pstart, plen; 4524 loff_t pstart, plen;
4524 u32 p_cluster, num_clusters, slast, spos, tpos; 4525 u32 p_cluster, num_clusters, slast, spos, tpos, remapped_clus = 0;
4525 unsigned int ext_flags; 4526 unsigned int ext_flags;
4526 int ret = 0; 4527 int ret = 0;
4527 4528
@@ -4603,30 +4604,34 @@ static int ocfs2_reflink_remap_extent(struct inode *s_inode,
4603next_loop: 4604next_loop:
4604 spos += num_clusters; 4605 spos += num_clusters;
4605 tpos += num_clusters; 4606 tpos += num_clusters;
4607 remapped_clus += num_clusters;
4606 } 4608 }
4607 4609
4608out: 4610 goto out;
4609 return ret;
4610out_unlock_refcount: 4611out_unlock_refcount:
4611 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 4612 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
4612 brelse(ref_root_bh); 4613 brelse(ref_root_bh);
4613 return ret; 4614out:
4615 remapped_bytes = ocfs2_clusters_to_bytes(t_inode->i_sb, remapped_clus);
4616 remapped_bytes = min_t(loff_t, len, remapped_bytes);
4617
4618 return remapped_bytes > 0 ? remapped_bytes : ret;
4614} 4619}
4615 4620
4616/* Set up refcount tree and remap s_inode to t_inode. */ 4621/* Set up refcount tree and remap s_inode to t_inode. */
4617static int ocfs2_reflink_remap_blocks(struct inode *s_inode, 4622loff_t ocfs2_reflink_remap_blocks(struct inode *s_inode,
4618 struct buffer_head *s_bh, 4623 struct buffer_head *s_bh,
4619 loff_t pos_in, 4624 loff_t pos_in,
4620 struct inode *t_inode, 4625 struct inode *t_inode,
4621 struct buffer_head *t_bh, 4626 struct buffer_head *t_bh,
4622 loff_t pos_out, 4627 loff_t pos_out,
4623 loff_t len) 4628 loff_t len)
4624{ 4629{
4625 struct ocfs2_cached_dealloc_ctxt dealloc; 4630 struct ocfs2_cached_dealloc_ctxt dealloc;
4626 struct ocfs2_super *osb; 4631 struct ocfs2_super *osb;
4627 struct ocfs2_dinode *dis; 4632 struct ocfs2_dinode *dis;
4628 struct ocfs2_dinode *dit; 4633 struct ocfs2_dinode *dit;
4629 int ret; 4634 loff_t ret;
4630 4635
4631 osb = OCFS2_SB(s_inode->i_sb); 4636 osb = OCFS2_SB(s_inode->i_sb);
4632 dis = (struct ocfs2_dinode *)s_bh->b_data; 4637 dis = (struct ocfs2_dinode *)s_bh->b_data;
@@ -4698,7 +4703,7 @@ static int ocfs2_reflink_remap_blocks(struct inode *s_inode,
4698 /* Actually remap extents now. */ 4703 /* Actually remap extents now. */
4699 ret = ocfs2_reflink_remap_extent(s_inode, s_bh, pos_in, t_inode, t_bh, 4704 ret = ocfs2_reflink_remap_extent(s_inode, s_bh, pos_in, t_inode, t_bh,
4700 pos_out, len, &dealloc); 4705 pos_out, len, &dealloc);
4701 if (ret) { 4706 if (ret < 0) {
4702 mlog_errno(ret); 4707 mlog_errno(ret);
4703 goto out; 4708 goto out;
4704 } 4709 }
@@ -4713,10 +4718,10 @@ out:
4713} 4718}
4714 4719
4715/* Lock an inode and grab a bh pointing to the inode. */ 4720/* Lock an inode and grab a bh pointing to the inode. */
4716static int ocfs2_reflink_inodes_lock(struct inode *s_inode, 4721int ocfs2_reflink_inodes_lock(struct inode *s_inode,
4717 struct buffer_head **bh1, 4722 struct buffer_head **bh1,
4718 struct inode *t_inode, 4723 struct inode *t_inode,
4719 struct buffer_head **bh2) 4724 struct buffer_head **bh2)
4720{ 4725{
4721 struct inode *inode1; 4726 struct inode *inode1;
4722 struct inode *inode2; 4727 struct inode *inode2;
@@ -4801,10 +4806,10 @@ out_i1:
4801} 4806}
4802 4807
4803/* Unlock both inodes and release buffers. */ 4808/* Unlock both inodes and release buffers. */
4804static void ocfs2_reflink_inodes_unlock(struct inode *s_inode, 4809void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
4805 struct buffer_head *s_bh, 4810 struct buffer_head *s_bh,
4806 struct inode *t_inode, 4811 struct inode *t_inode,
4807 struct buffer_head *t_bh) 4812 struct buffer_head *t_bh)
4808{ 4813{
4809 ocfs2_inode_unlock(s_inode, 1); 4814 ocfs2_inode_unlock(s_inode, 1);
4810 ocfs2_rw_unlock(s_inode, 1); 4815 ocfs2_rw_unlock(s_inode, 1);
@@ -4816,82 +4821,3 @@ static void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
4816 } 4821 }
4817 unlock_two_nondirectories(s_inode, t_inode); 4822 unlock_two_nondirectories(s_inode, t_inode);
4818} 4823}
4819
4820/* Link a range of blocks from one file to another. */
4821int ocfs2_reflink_remap_range(struct file *file_in,
4822 loff_t pos_in,
4823 struct file *file_out,
4824 loff_t pos_out,
4825 u64 len,
4826 bool is_dedupe)
4827{
4828 struct inode *inode_in = file_inode(file_in);
4829 struct inode *inode_out = file_inode(file_out);
4830 struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb);
4831 struct buffer_head *in_bh = NULL, *out_bh = NULL;
4832 bool same_inode = (inode_in == inode_out);
4833 ssize_t ret;
4834
4835 if (!ocfs2_refcount_tree(osb))
4836 return -EOPNOTSUPP;
4837 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
4838 return -EROFS;
4839
4840 /* Lock both files against IO */
4841 ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh);
4842 if (ret)
4843 return ret;
4844
4845 /* Check file eligibility and prepare for block sharing. */
4846 ret = -EINVAL;
4847 if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) ||
4848 (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
4849 goto out_unlock;
4850
4851 ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
4852 &len, is_dedupe);
4853 if (ret <= 0)
4854 goto out_unlock;
4855
4856 /* Lock out changes to the allocation maps and remap. */
4857 down_write(&OCFS2_I(inode_in)->ip_alloc_sem);
4858 if (!same_inode)
4859 down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem,
4860 SINGLE_DEPTH_NESTING);
4861
4862 ret = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in, inode_out,
4863 out_bh, pos_out, len);
4864
4865 /* Zap any page cache for the destination file's range. */
4866 if (!ret)
4867 truncate_inode_pages_range(&inode_out->i_data, pos_out,
4868 PAGE_ALIGN(pos_out + len) - 1);
4869
4870 up_write(&OCFS2_I(inode_in)->ip_alloc_sem);
4871 if (!same_inode)
4872 up_write(&OCFS2_I(inode_out)->ip_alloc_sem);
4873 if (ret) {
4874 mlog_errno(ret);
4875 goto out_unlock;
4876 }
4877
4878 /*
4879 * Empty the extent map so that we may get the right extent
4880 * record from the disk.
4881 */
4882 ocfs2_extent_map_trunc(inode_in, 0);
4883 ocfs2_extent_map_trunc(inode_out, 0);
4884
4885 ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len);
4886 if (ret) {
4887 mlog_errno(ret);
4888 goto out_unlock;
4889 }
4890
4891 ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
4892 return 0;
4893
4894out_unlock:
4895 ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
4896 return ret;
4897}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 4af55bf4b35b..e9e862be4a1e 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -115,11 +115,23 @@ int ocfs2_reflink_ioctl(struct inode *inode,
115 const char __user *oldname, 115 const char __user *oldname,
116 const char __user *newname, 116 const char __user *newname,
117 bool preserve); 117 bool preserve);
118int ocfs2_reflink_remap_range(struct file *file_in, 118loff_t ocfs2_reflink_remap_blocks(struct inode *s_inode,
119 loff_t pos_in, 119 struct buffer_head *s_bh,
120 struct file *file_out, 120 loff_t pos_in,
121 loff_t pos_out, 121 struct inode *t_inode,
122 u64 len, 122 struct buffer_head *t_bh,
123 bool is_dedupe); 123 loff_t pos_out,
124 loff_t len);
125int ocfs2_reflink_inodes_lock(struct inode *s_inode,
126 struct buffer_head **bh1,
127 struct inode *t_inode,
128 struct buffer_head **bh2);
129void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
130 struct buffer_head *s_bh,
131 struct inode *t_inode,
132 struct buffer_head *t_bh);
133int ocfs2_reflink_update_dest(struct inode *dest,
134 struct buffer_head *d_bh,
135 loff_t newlen);
124 136
125#endif /* OCFS2_REFCOUNTTREE_H */ 137#endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index d6a3346e2672..9e62dcf06fc4 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -125,6 +125,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
125 struct file *new_file; 125 struct file *new_file;
126 loff_t old_pos = 0; 126 loff_t old_pos = 0;
127 loff_t new_pos = 0; 127 loff_t new_pos = 0;
128 loff_t cloned;
128 int error = 0; 129 int error = 0;
129 130
130 if (len == 0) 131 if (len == 0)
@@ -141,11 +142,10 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
141 } 142 }
142 143
143 /* Try to use clone_file_range to clone up within the same fs */ 144 /* Try to use clone_file_range to clone up within the same fs */
144 error = do_clone_file_range(old_file, 0, new_file, 0, len); 145 cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
145 if (!error) 146 if (cloned == len)
146 goto out; 147 goto out;
147 /* Couldn't clone, so now we try to copy the data */ 148 /* Couldn't clone, so now we try to copy the data */
148 error = 0;
149 149
150 /* FIXME: copy up sparse files efficiently */ 150 /* FIXME: copy up sparse files efficiently */
151 while (len) { 151 while (len) {
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 986313da0c88..84dd957efa24 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -434,14 +434,14 @@ enum ovl_copyop {
434 OVL_DEDUPE, 434 OVL_DEDUPE,
435}; 435};
436 436
437static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in, 437static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
438 struct file *file_out, loff_t pos_out, 438 struct file *file_out, loff_t pos_out,
439 u64 len, unsigned int flags, enum ovl_copyop op) 439 loff_t len, unsigned int flags, enum ovl_copyop op)
440{ 440{
441 struct inode *inode_out = file_inode(file_out); 441 struct inode *inode_out = file_inode(file_out);
442 struct fd real_in, real_out; 442 struct fd real_in, real_out;
443 const struct cred *old_cred; 443 const struct cred *old_cred;
444 ssize_t ret; 444 loff_t ret;
445 445
446 ret = ovl_real_fdget(file_out, &real_out); 446 ret = ovl_real_fdget(file_out, &real_out);
447 if (ret) 447 if (ret)
@@ -462,12 +462,13 @@ static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in,
462 462
463 case OVL_CLONE: 463 case OVL_CLONE:
464 ret = vfs_clone_file_range(real_in.file, pos_in, 464 ret = vfs_clone_file_range(real_in.file, pos_in,
465 real_out.file, pos_out, len); 465 real_out.file, pos_out, len, flags);
466 break; 466 break;
467 467
468 case OVL_DEDUPE: 468 case OVL_DEDUPE:
469 ret = vfs_dedupe_file_range_one(real_in.file, pos_in, 469 ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
470 real_out.file, pos_out, len); 470 real_out.file, pos_out, len,
471 flags);
471 break; 472 break;
472 } 473 }
473 revert_creds(old_cred); 474 revert_creds(old_cred);
@@ -489,26 +490,31 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
489 OVL_COPY); 490 OVL_COPY);
490} 491}
491 492
492static int ovl_clone_file_range(struct file *file_in, loff_t pos_in, 493static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
493 struct file *file_out, loff_t pos_out, u64 len) 494 struct file *file_out, loff_t pos_out,
495 loff_t len, unsigned int remap_flags)
494{ 496{
495 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0, 497 enum ovl_copyop op;
496 OVL_CLONE); 498
497} 499 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
500 return -EINVAL;
501
502 if (remap_flags & REMAP_FILE_DEDUP)
503 op = OVL_DEDUPE;
504 else
505 op = OVL_CLONE;
498 506
499static int ovl_dedupe_file_range(struct file *file_in, loff_t pos_in,
500 struct file *file_out, loff_t pos_out, u64 len)
501{
502 /* 507 /*
503 * Don't copy up because of a dedupe request, this wouldn't make sense 508 * Don't copy up because of a dedupe request, this wouldn't make sense
504 * most of the time (data would be duplicated instead of deduplicated). 509 * most of the time (data would be duplicated instead of deduplicated).
505 */ 510 */
506 if (!ovl_inode_upper(file_inode(file_in)) || 511 if (op == OVL_DEDUPE &&
507 !ovl_inode_upper(file_inode(file_out))) 512 (!ovl_inode_upper(file_inode(file_in)) ||
513 !ovl_inode_upper(file_inode(file_out))))
508 return -EPERM; 514 return -EPERM;
509 515
510 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0, 516 return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
511 OVL_DEDUPE); 517 remap_flags, op);
512} 518}
513 519
514const struct file_operations ovl_file_operations = { 520const struct file_operations ovl_file_operations = {
@@ -525,6 +531,5 @@ const struct file_operations ovl_file_operations = {
525 .compat_ioctl = ovl_compat_ioctl, 531 .compat_ioctl = ovl_compat_ioctl,
526 532
527 .copy_file_range = ovl_copy_file_range, 533 .copy_file_range = ovl_copy_file_range,
528 .clone_file_range = ovl_clone_file_range, 534 .remap_file_range = ovl_remap_file_range,
529 .dedupe_file_range = ovl_dedupe_file_range,
530}; 535};
diff --git a/fs/read_write.c b/fs/read_write.c
index 5a2ee488c5d2..bfcb4ced5664 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1587,11 +1587,15 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
1587 * Try cloning first, this is supported by more file systems, and 1587 * Try cloning first, this is supported by more file systems, and
1588 * more efficient if both clone and copy are supported (e.g. NFS). 1588 * more efficient if both clone and copy are supported (e.g. NFS).
1589 */ 1589 */
1590 if (file_in->f_op->clone_file_range) { 1590 if (file_in->f_op->remap_file_range) {
1591 ret = file_in->f_op->clone_file_range(file_in, pos_in, 1591 loff_t cloned;
1592 file_out, pos_out, len); 1592
1593 if (ret == 0) { 1593 cloned = file_in->f_op->remap_file_range(file_in, pos_in,
1594 ret = len; 1594 file_out, pos_out,
1595 min_t(loff_t, MAX_RW_COUNT, len),
1596 REMAP_FILE_CAN_SHORTEN);
1597 if (cloned > 0) {
1598 ret = cloned;
1595 goto done; 1599 goto done;
1596 } 1600 }
1597 } 1601 }
@@ -1685,11 +1689,12 @@ out2:
1685 return ret; 1689 return ret;
1686} 1690}
1687 1691
1688static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write) 1692static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
1693 bool write)
1689{ 1694{
1690 struct inode *inode = file_inode(file); 1695 struct inode *inode = file_inode(file);
1691 1696
1692 if (unlikely(pos < 0)) 1697 if (unlikely(pos < 0 || len < 0))
1693 return -EINVAL; 1698 return -EINVAL;
1694 1699
1695 if (unlikely((loff_t) (pos + len) < 0)) 1700 if (unlikely((loff_t) (pos + len) < 0))
@@ -1707,22 +1712,150 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
1707 1712
1708 return security_file_permission(file, write ? MAY_WRITE : MAY_READ); 1713 return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
1709} 1714}
1715/*
1716 * Ensure that we don't remap a partial EOF block in the middle of something
1717 * else. Assume that the offsets have already been checked for block
1718 * alignment.
1719 *
1720 * For deduplication we always scale down to the previous block because we
1721 * can't meaningfully compare post-EOF contents.
1722 *
1723 * For clone we only link a partial EOF block above the destination file's EOF.
1724 *
1725 * Shorten the request if possible.
1726 */
1727static int generic_remap_check_len(struct inode *inode_in,
1728 struct inode *inode_out,
1729 loff_t pos_out,
1730 loff_t *len,
1731 unsigned int remap_flags)
1732{
1733 u64 blkmask = i_blocksize(inode_in) - 1;
1734 loff_t new_len = *len;
1735
1736 if ((*len & blkmask) == 0)
1737 return 0;
1738
1739 if ((remap_flags & REMAP_FILE_DEDUP) ||
1740 pos_out + *len < i_size_read(inode_out))
1741 new_len &= ~blkmask;
1742
1743 if (new_len == *len)
1744 return 0;
1745
1746 if (remap_flags & REMAP_FILE_CAN_SHORTEN) {
1747 *len = new_len;
1748 return 0;
1749 }
1750
1751 return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
1752}
1753
1754/*
1755 * Read a page's worth of file data into the page cache. Return the page
1756 * locked.
1757 */
1758static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
1759{
1760 struct page *page;
1761
1762 page = read_mapping_page(inode->i_mapping, offset >> PAGE_SHIFT, NULL);
1763 if (IS_ERR(page))
1764 return page;
1765 if (!PageUptodate(page)) {
1766 put_page(page);
1767 return ERR_PTR(-EIO);
1768 }
1769 lock_page(page);
1770 return page;
1771}
1772
1773/*
1774 * Compare extents of two files to see if they are the same.
1775 * Caller must have locked both inodes to prevent write races.
1776 */
1777static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
1778 struct inode *dest, loff_t destoff,
1779 loff_t len, bool *is_same)
1780{
1781 loff_t src_poff;
1782 loff_t dest_poff;
1783 void *src_addr;
1784 void *dest_addr;
1785 struct page *src_page;
1786 struct page *dest_page;
1787 loff_t cmp_len;
1788 bool same;
1789 int error;
1790
1791 error = -EINVAL;
1792 same = true;
1793 while (len) {
1794 src_poff = srcoff & (PAGE_SIZE - 1);
1795 dest_poff = destoff & (PAGE_SIZE - 1);
1796 cmp_len = min(PAGE_SIZE - src_poff,
1797 PAGE_SIZE - dest_poff);
1798 cmp_len = min(cmp_len, len);
1799 if (cmp_len <= 0)
1800 goto out_error;
1801
1802 src_page = vfs_dedupe_get_page(src, srcoff);
1803 if (IS_ERR(src_page)) {
1804 error = PTR_ERR(src_page);
1805 goto out_error;
1806 }
1807 dest_page = vfs_dedupe_get_page(dest, destoff);
1808 if (IS_ERR(dest_page)) {
1809 error = PTR_ERR(dest_page);
1810 unlock_page(src_page);
1811 put_page(src_page);
1812 goto out_error;
1813 }
1814 src_addr = kmap_atomic(src_page);
1815 dest_addr = kmap_atomic(dest_page);
1816
1817 flush_dcache_page(src_page);
1818 flush_dcache_page(dest_page);
1819
1820 if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
1821 same = false;
1822
1823 kunmap_atomic(dest_addr);
1824 kunmap_atomic(src_addr);
1825 unlock_page(dest_page);
1826 unlock_page(src_page);
1827 put_page(dest_page);
1828 put_page(src_page);
1829
1830 if (!same)
1831 break;
1832
1833 srcoff += cmp_len;
1834 destoff += cmp_len;
1835 len -= cmp_len;
1836 }
1837
1838 *is_same = same;
1839 return 0;
1840
1841out_error:
1842 return error;
1843}
1710 1844
1711/* 1845/*
1712 * Check that the two inodes are eligible for cloning, the ranges make 1846 * Check that the two inodes are eligible for cloning, the ranges make
1713 * sense, and then flush all dirty data. Caller must ensure that the 1847 * sense, and then flush all dirty data. Caller must ensure that the
1714 * inodes have been locked against any other modifications. 1848 * inodes have been locked against any other modifications.
1715 * 1849 *
1716 * Returns: 0 for "nothing to clone", 1 for "something to clone", or 1850 * If there's an error, then the usual negative error code is returned.
1717 * the usual negative error code. 1851 * Otherwise returns 0 with *len set to the request length.
1718 */ 1852 */
1719int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, 1853int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
1720 struct inode *inode_out, loff_t pos_out, 1854 struct file *file_out, loff_t pos_out,
1721 u64 *len, bool is_dedupe) 1855 loff_t *len, unsigned int remap_flags)
1722{ 1856{
1723 loff_t bs = inode_out->i_sb->s_blocksize; 1857 struct inode *inode_in = file_inode(file_in);
1724 loff_t blen; 1858 struct inode *inode_out = file_inode(file_out);
1725 loff_t isize;
1726 bool same_inode = (inode_in == inode_out); 1859 bool same_inode = (inode_in == inode_out);
1727 int ret; 1860 int ret;
1728 1861
@@ -1739,50 +1872,24 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
1739 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) 1872 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
1740 return -EINVAL; 1873 return -EINVAL;
1741 1874
1742 /* Are we going all the way to the end? */
1743 isize = i_size_read(inode_in);
1744 if (isize == 0)
1745 return 0;
1746
1747 /* Zero length dedupe exits immediately; reflink goes to EOF. */ 1875 /* Zero length dedupe exits immediately; reflink goes to EOF. */
1748 if (*len == 0) { 1876 if (*len == 0) {
1749 if (is_dedupe || pos_in == isize) 1877 loff_t isize = i_size_read(inode_in);
1878
1879 if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize)
1750 return 0; 1880 return 0;
1751 if (pos_in > isize) 1881 if (pos_in > isize)
1752 return -EINVAL; 1882 return -EINVAL;
1753 *len = isize - pos_in; 1883 *len = isize - pos_in;
1884 if (*len == 0)
1885 return 0;
1754 } 1886 }
1755 1887
1756 /* Ensure offsets don't wrap and the input is inside i_size */ 1888 /* Check that we don't violate system file offset limits. */
1757 if (pos_in + *len < pos_in || pos_out + *len < pos_out || 1889 ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
1758 pos_in + *len > isize) 1890 remap_flags);
1759 return -EINVAL; 1891 if (ret)
1760 1892 return ret;
1761 /* Don't allow dedupe past EOF in the dest file */
1762 if (is_dedupe) {
1763 loff_t disize;
1764
1765 disize = i_size_read(inode_out);
1766 if (pos_out >= disize || pos_out + *len > disize)
1767 return -EINVAL;
1768 }
1769
1770 /* If we're linking to EOF, continue to the block boundary. */
1771 if (pos_in + *len == isize)
1772 blen = ALIGN(isize, bs) - pos_in;
1773 else
1774 blen = *len;
1775
1776 /* Only reflink if we're aligned to block boundaries */
1777 if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
1778 !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
1779 return -EINVAL;
1780
1781 /* Don't allow overlapped reflink within the same file */
1782 if (same_inode) {
1783 if (pos_out + blen > pos_in && pos_out < pos_in + blen)
1784 return -EINVAL;
1785 }
1786 1893
1787 /* Wait for the completion of any pending IOs on both files */ 1894 /* Wait for the completion of any pending IOs on both files */
1788 inode_dio_wait(inode_in); 1895 inode_dio_wait(inode_in);
@@ -1802,7 +1909,7 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
1802 /* 1909 /*
1803 * Check that the extents are the same. 1910 * Check that the extents are the same.
1804 */ 1911 */
1805 if (is_dedupe) { 1912 if (remap_flags & REMAP_FILE_DEDUP) {
1806 bool is_same = false; 1913 bool is_same = false;
1807 1914
1808 ret = vfs_dedupe_file_range_compare(inode_in, pos_in, 1915 ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
@@ -1813,16 +1920,43 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
1813 return -EBADE; 1920 return -EBADE;
1814 } 1921 }
1815 1922
1816 return 1; 1923 ret = generic_remap_check_len(inode_in, inode_out, pos_out, len,
1924 remap_flags);
1925 if (ret)
1926 return ret;
1927
1928 /* If can't alter the file contents, we're done. */
1929 if (!(remap_flags & REMAP_FILE_DEDUP)) {
1930 /* Update the timestamps, since we can alter file contents. */
1931 if (!(file_out->f_mode & FMODE_NOCMTIME)) {
1932 ret = file_update_time(file_out);
1933 if (ret)
1934 return ret;
1935 }
1936
1937 /*
1938 * Clear the security bits if the process is not being run by
1939 * root. This keeps people from modifying setuid and setgid
1940 * binaries.
1941 */
1942 ret = file_remove_privs(file_out);
1943 if (ret)
1944 return ret;
1945 }
1946
1947 return 0;
1817} 1948}
1818EXPORT_SYMBOL(vfs_clone_file_prep_inodes); 1949EXPORT_SYMBOL(generic_remap_file_range_prep);
1819 1950
1820int do_clone_file_range(struct file *file_in, loff_t pos_in, 1951loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
1821 struct file *file_out, loff_t pos_out, u64 len) 1952 struct file *file_out, loff_t pos_out,
1953 loff_t len, unsigned int remap_flags)
1822{ 1954{
1823 struct inode *inode_in = file_inode(file_in); 1955 struct inode *inode_in = file_inode(file_in);
1824 struct inode *inode_out = file_inode(file_out); 1956 struct inode *inode_out = file_inode(file_out);
1825 int ret; 1957 loff_t ret;
1958
1959 WARN_ON_ONCE(remap_flags);
1826 1960
1827 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) 1961 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
1828 return -EISDIR; 1962 return -EISDIR;
@@ -1842,140 +1976,43 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in,
1842 (file_out->f_flags & O_APPEND)) 1976 (file_out->f_flags & O_APPEND))
1843 return -EBADF; 1977 return -EBADF;
1844 1978
1845 if (!file_in->f_op->clone_file_range) 1979 if (!file_in->f_op->remap_file_range)
1846 return -EOPNOTSUPP; 1980 return -EOPNOTSUPP;
1847 1981
1848 ret = clone_verify_area(file_in, pos_in, len, false); 1982 ret = remap_verify_area(file_in, pos_in, len, false);
1849 if (ret) 1983 if (ret)
1850 return ret; 1984 return ret;
1851 1985
1852 ret = clone_verify_area(file_out, pos_out, len, true); 1986 ret = remap_verify_area(file_out, pos_out, len, true);
1853 if (ret) 1987 if (ret)
1854 return ret; 1988 return ret;
1855 1989
1856 if (pos_in + len > i_size_read(inode_in)) 1990 ret = file_in->f_op->remap_file_range(file_in, pos_in,
1857 return -EINVAL; 1991 file_out, pos_out, len, remap_flags);
1858 1992 if (ret < 0)
1859 ret = file_in->f_op->clone_file_range(file_in, pos_in, 1993 return ret;
1860 file_out, pos_out, len);
1861 if (!ret) {
1862 fsnotify_access(file_in);
1863 fsnotify_modify(file_out);
1864 }
1865 1994
1995 fsnotify_access(file_in);
1996 fsnotify_modify(file_out);
1866 return ret; 1997 return ret;
1867} 1998}
1868EXPORT_SYMBOL(do_clone_file_range); 1999EXPORT_SYMBOL(do_clone_file_range);
1869 2000
1870int vfs_clone_file_range(struct file *file_in, loff_t pos_in, 2001loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
1871 struct file *file_out, loff_t pos_out, u64 len) 2002 struct file *file_out, loff_t pos_out,
2003 loff_t len, unsigned int remap_flags)
1872{ 2004{
1873 int ret; 2005 loff_t ret;
1874 2006
1875 file_start_write(file_out); 2007 file_start_write(file_out);
1876 ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len); 2008 ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
2009 remap_flags);
1877 file_end_write(file_out); 2010 file_end_write(file_out);
1878 2011
1879 return ret; 2012 return ret;
1880} 2013}
1881EXPORT_SYMBOL(vfs_clone_file_range); 2014EXPORT_SYMBOL(vfs_clone_file_range);
1882 2015
1883/*
1884 * Read a page's worth of file data into the page cache. Return the page
1885 * locked.
1886 */
1887static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
1888{
1889 struct address_space *mapping;
1890 struct page *page;
1891 pgoff_t n;
1892
1893 n = offset >> PAGE_SHIFT;
1894 mapping = inode->i_mapping;
1895 page = read_mapping_page(mapping, n, NULL);
1896 if (IS_ERR(page))
1897 return page;
1898 if (!PageUptodate(page)) {
1899 put_page(page);
1900 return ERR_PTR(-EIO);
1901 }
1902 lock_page(page);
1903 return page;
1904}
1905
1906/*
1907 * Compare extents of two files to see if they are the same.
1908 * Caller must have locked both inodes to prevent write races.
1909 */
1910int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
1911 struct inode *dest, loff_t destoff,
1912 loff_t len, bool *is_same)
1913{
1914 loff_t src_poff;
1915 loff_t dest_poff;
1916 void *src_addr;
1917 void *dest_addr;
1918 struct page *src_page;
1919 struct page *dest_page;
1920 loff_t cmp_len;
1921 bool same;
1922 int error;
1923
1924 error = -EINVAL;
1925 same = true;
1926 while (len) {
1927 src_poff = srcoff & (PAGE_SIZE - 1);
1928 dest_poff = destoff & (PAGE_SIZE - 1);
1929 cmp_len = min(PAGE_SIZE - src_poff,
1930 PAGE_SIZE - dest_poff);
1931 cmp_len = min(cmp_len, len);
1932 if (cmp_len <= 0)
1933 goto out_error;
1934
1935 src_page = vfs_dedupe_get_page(src, srcoff);
1936 if (IS_ERR(src_page)) {
1937 error = PTR_ERR(src_page);
1938 goto out_error;
1939 }
1940 dest_page = vfs_dedupe_get_page(dest, destoff);
1941 if (IS_ERR(dest_page)) {
1942 error = PTR_ERR(dest_page);
1943 unlock_page(src_page);
1944 put_page(src_page);
1945 goto out_error;
1946 }
1947 src_addr = kmap_atomic(src_page);
1948 dest_addr = kmap_atomic(dest_page);
1949
1950 flush_dcache_page(src_page);
1951 flush_dcache_page(dest_page);
1952
1953 if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
1954 same = false;
1955
1956 kunmap_atomic(dest_addr);
1957 kunmap_atomic(src_addr);
1958 unlock_page(dest_page);
1959 unlock_page(src_page);
1960 put_page(dest_page);
1961 put_page(src_page);
1962
1963 if (!same)
1964 break;
1965
1966 srcoff += cmp_len;
1967 destoff += cmp_len;
1968 len -= cmp_len;
1969 }
1970
1971 *is_same = same;
1972 return 0;
1973
1974out_error:
1975 return error;
1976}
1977EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
1978
1979/* Check whether we are allowed to dedupe the destination file */ 2016/* Check whether we are allowed to dedupe the destination file */
1980static bool allow_file_dedupe(struct file *file) 2017static bool allow_file_dedupe(struct file *file)
1981{ 2018{
@@ -1990,16 +2027,20 @@ static bool allow_file_dedupe(struct file *file)
1990 return false; 2027 return false;
1991} 2028}
1992 2029
1993int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, 2030loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
1994 struct file *dst_file, loff_t dst_pos, u64 len) 2031 struct file *dst_file, loff_t dst_pos,
2032 loff_t len, unsigned int remap_flags)
1995{ 2033{
1996 s64 ret; 2034 loff_t ret;
2035
2036 WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
2037 REMAP_FILE_CAN_SHORTEN));
1997 2038
1998 ret = mnt_want_write_file(dst_file); 2039 ret = mnt_want_write_file(dst_file);
1999 if (ret) 2040 if (ret)
2000 return ret; 2041 return ret;
2001 2042
2002 ret = clone_verify_area(dst_file, dst_pos, len, true); 2043 ret = remap_verify_area(dst_file, dst_pos, len, true);
2003 if (ret < 0) 2044 if (ret < 0)
2004 goto out_drop_write; 2045 goto out_drop_write;
2005 2046
@@ -2016,11 +2057,16 @@ int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
2016 goto out_drop_write; 2057 goto out_drop_write;
2017 2058
2018 ret = -EINVAL; 2059 ret = -EINVAL;
2019 if (!dst_file->f_op->dedupe_file_range) 2060 if (!dst_file->f_op->remap_file_range)
2020 goto out_drop_write; 2061 goto out_drop_write;
2021 2062
2022 ret = dst_file->f_op->dedupe_file_range(src_file, src_pos, 2063 if (len == 0) {
2023 dst_file, dst_pos, len); 2064 ret = 0;
2065 goto out_drop_write;
2066 }
2067
2068 ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file,
2069 dst_pos, len, remap_flags | REMAP_FILE_DEDUP);
2024out_drop_write: 2070out_drop_write:
2025 mnt_drop_write_file(dst_file); 2071 mnt_drop_write_file(dst_file);
2026 2072
@@ -2037,7 +2083,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
2037 int i; 2083 int i;
2038 int ret; 2084 int ret;
2039 u16 count = same->dest_count; 2085 u16 count = same->dest_count;
2040 int deduped; 2086 loff_t deduped;
2041 2087
2042 if (!(file->f_mode & FMODE_READ)) 2088 if (!(file->f_mode & FMODE_READ))
2043 return -EINVAL; 2089 return -EINVAL;
@@ -2056,7 +2102,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
2056 if (!S_ISREG(src->i_mode)) 2102 if (!S_ISREG(src->i_mode))
2057 goto out; 2103 goto out;
2058 2104
2059 ret = clone_verify_area(file, off, len, false); 2105 ret = remap_verify_area(file, off, len, false);
2060 if (ret < 0) 2106 if (ret < 0)
2061 goto out; 2107 goto out;
2062 ret = 0; 2108 ret = 0;
@@ -2088,7 +2134,8 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
2088 } 2134 }
2089 2135
2090 deduped = vfs_dedupe_file_range_one(file, off, dst_file, 2136 deduped = vfs_dedupe_file_range_one(file, off, dst_file,
2091 info->dest_offset, len); 2137 info->dest_offset, len,
2138 REMAP_FILE_CAN_SHORTEN);
2092 if (deduped == -EBADE) 2139 if (deduped == -EBADE)
2093 info->status = FILE_DEDUPE_RANGE_DIFFERS; 2140 info->status = FILE_DEDUPE_RANGE_DIFFERS;
2094 else if (deduped < 0) 2141 else if (deduped < 0)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 61a5ad2600e8..53c9ab8fb777 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -919,28 +919,67 @@ out_unlock:
919 return error; 919 return error;
920} 920}
921 921
922STATIC int
923xfs_file_clone_range(
924 struct file *file_in,
925 loff_t pos_in,
926 struct file *file_out,
927 loff_t pos_out,
928 u64 len)
929{
930 return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
931 len, false);
932}
933 922
934STATIC int 923loff_t
935xfs_file_dedupe_range( 924xfs_file_remap_range(
936 struct file *file_in, 925 struct file *file_in,
937 loff_t pos_in, 926 loff_t pos_in,
938 struct file *file_out, 927 struct file *file_out,
939 loff_t pos_out, 928 loff_t pos_out,
940 u64 len) 929 loff_t len,
930 unsigned int remap_flags)
941{ 931{
942 return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, 932 struct inode *inode_in = file_inode(file_in);
943 len, true); 933 struct xfs_inode *src = XFS_I(inode_in);
934 struct inode *inode_out = file_inode(file_out);
935 struct xfs_inode *dest = XFS_I(inode_out);
936 struct xfs_mount *mp = src->i_mount;
937 loff_t remapped = 0;
938 xfs_extlen_t cowextsize;
939 int ret;
940
941 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
942 return -EINVAL;
943
944 if (!xfs_sb_version_hasreflink(&mp->m_sb))
945 return -EOPNOTSUPP;
946
947 if (XFS_FORCED_SHUTDOWN(mp))
948 return -EIO;
949
950 /* Prepare and then clone file data. */
951 ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
952 &len, remap_flags);
953 if (ret < 0 || len == 0)
954 return ret;
955
956 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
957
958 ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len,
959 &remapped);
960 if (ret)
961 goto out_unlock;
962
963 /*
964 * Carry the cowextsize hint from src to dest if we're sharing the
965 * entire source file to the entire destination file, the source file
966 * has a cowextsize hint, and the destination file does not.
967 */
968 cowextsize = 0;
969 if (pos_in == 0 && len == i_size_read(inode_in) &&
970 (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
971 pos_out == 0 && len >= i_size_read(inode_out) &&
972 !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
973 cowextsize = src->i_d.di_cowextsize;
974
975 ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
976 remap_flags);
977
978out_unlock:
979 xfs_reflink_remap_unlock(file_in, file_out);
980 if (ret)
981 trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
982 return remapped > 0 ? remapped : ret;
944} 983}
945 984
946STATIC int 985STATIC int
@@ -1175,8 +1214,7 @@ const struct file_operations xfs_file_operations = {
1175 .fsync = xfs_file_fsync, 1214 .fsync = xfs_file_fsync,
1176 .get_unmapped_area = thp_get_unmapped_area, 1215 .get_unmapped_area = thp_get_unmapped_area,
1177 .fallocate = xfs_file_fallocate, 1216 .fallocate = xfs_file_fallocate,
1178 .clone_file_range = xfs_file_clone_range, 1217 .remap_file_range = xfs_file_remap_range,
1179 .dedupe_file_range = xfs_file_dedupe_range,
1180}; 1218};
1181 1219
1182const struct file_operations xfs_dir_file_operations = { 1220const struct file_operations xfs_dir_file_operations = {
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 8eaeec9d58ed..ecdb086bc23e 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -913,18 +913,18 @@ out_error:
913/* 913/*
914 * Update destination inode size & cowextsize hint, if necessary. 914 * Update destination inode size & cowextsize hint, if necessary.
915 */ 915 */
916STATIC int 916int
917xfs_reflink_update_dest( 917xfs_reflink_update_dest(
918 struct xfs_inode *dest, 918 struct xfs_inode *dest,
919 xfs_off_t newlen, 919 xfs_off_t newlen,
920 xfs_extlen_t cowextsize, 920 xfs_extlen_t cowextsize,
921 bool is_dedupe) 921 unsigned int remap_flags)
922{ 922{
923 struct xfs_mount *mp = dest->i_mount; 923 struct xfs_mount *mp = dest->i_mount;
924 struct xfs_trans *tp; 924 struct xfs_trans *tp;
925 int error; 925 int error;
926 926
927 if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) 927 if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
928 return 0; 928 return 0;
929 929
930 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); 930 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
@@ -945,10 +945,6 @@ xfs_reflink_update_dest(
945 dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; 945 dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
946 } 946 }
947 947
948 if (!is_dedupe) {
949 xfs_trans_ichgtime(tp, dest,
950 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
951 }
952 xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); 948 xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE);
953 949
954 error = xfs_trans_commit(tp); 950 error = xfs_trans_commit(tp);
@@ -1112,19 +1108,28 @@ out:
1112/* 1108/*
1113 * Iteratively remap one file's extents (and holes) to another's. 1109 * Iteratively remap one file's extents (and holes) to another's.
1114 */ 1110 */
1115STATIC int 1111int
1116xfs_reflink_remap_blocks( 1112xfs_reflink_remap_blocks(
1117 struct xfs_inode *src, 1113 struct xfs_inode *src,
1118 xfs_fileoff_t srcoff, 1114 loff_t pos_in,
1119 struct xfs_inode *dest, 1115 struct xfs_inode *dest,
1120 xfs_fileoff_t destoff, 1116 loff_t pos_out,
1121 xfs_filblks_t len, 1117 loff_t remap_len,
1122 xfs_off_t new_isize) 1118 loff_t *remapped)
1123{ 1119{
1124 struct xfs_bmbt_irec imap; 1120 struct xfs_bmbt_irec imap;
1121 xfs_fileoff_t srcoff;
1122 xfs_fileoff_t destoff;
1123 xfs_filblks_t len;
1124 xfs_filblks_t range_len;
1125 xfs_filblks_t remapped_len = 0;
1126 xfs_off_t new_isize = pos_out + remap_len;
1125 int nimaps; 1127 int nimaps;
1126 int error = 0; 1128 int error = 0;
1127 xfs_filblks_t range_len; 1129
1130 destoff = XFS_B_TO_FSBT(src->i_mount, pos_out);
1131 srcoff = XFS_B_TO_FSBT(src->i_mount, pos_in);
1132 len = XFS_B_TO_FSB(src->i_mount, remap_len);
1128 1133
1129 /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */ 1134 /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */
1130 while (len) { 1135 while (len) {
@@ -1139,7 +1144,7 @@ xfs_reflink_remap_blocks(
1139 error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0); 1144 error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0);
1140 xfs_iunlock(src, lock_mode); 1145 xfs_iunlock(src, lock_mode);
1141 if (error) 1146 if (error)
1142 goto err; 1147 break;
1143 ASSERT(nimaps == 1); 1148 ASSERT(nimaps == 1);
1144 1149
1145 trace_xfs_reflink_remap_imap(src, srcoff, len, XFS_IO_OVERWRITE, 1150 trace_xfs_reflink_remap_imap(src, srcoff, len, XFS_IO_OVERWRITE,
@@ -1153,23 +1158,24 @@ xfs_reflink_remap_blocks(
1153 error = xfs_reflink_remap_extent(dest, &imap, destoff, 1158 error = xfs_reflink_remap_extent(dest, &imap, destoff,
1154 new_isize); 1159 new_isize);
1155 if (error) 1160 if (error)
1156 goto err; 1161 break;
1157 1162
1158 if (fatal_signal_pending(current)) { 1163 if (fatal_signal_pending(current)) {
1159 error = -EINTR; 1164 error = -EINTR;
1160 goto err; 1165 break;
1161 } 1166 }
1162 1167
1163 /* Advance drange/srange */ 1168 /* Advance drange/srange */
1164 srcoff += range_len; 1169 srcoff += range_len;
1165 destoff += range_len; 1170 destoff += range_len;
1166 len -= range_len; 1171 len -= range_len;
1172 remapped_len += range_len;
1167 } 1173 }
1168 1174
1169 return 0; 1175 if (error)
1170 1176 trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_);
1171err: 1177 *remapped = min_t(loff_t, remap_len,
1172 trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_); 1178 XFS_FSB_TO_B(src->i_mount, remapped_len));
1173 return error; 1179 return error;
1174} 1180}
1175 1181
@@ -1218,7 +1224,7 @@ retry:
1218} 1224}
1219 1225
1220/* Unlock both inodes after they've been prepped for a range clone. */ 1226/* Unlock both inodes after they've been prepped for a range clone. */
1221STATIC void 1227void
1222xfs_reflink_remap_unlock( 1228xfs_reflink_remap_unlock(
1223 struct file *file_in, 1229 struct file *file_in,
1224 struct file *file_out) 1230 struct file *file_out)
@@ -1286,21 +1292,20 @@ xfs_reflink_zero_posteof(
1286 * stale data in the destination file. Hence we reject these clone attempts with 1292 * stale data in the destination file. Hence we reject these clone attempts with
1287 * -EINVAL in this case. 1293 * -EINVAL in this case.
1288 */ 1294 */
1289STATIC int 1295int
1290xfs_reflink_remap_prep( 1296xfs_reflink_remap_prep(
1291 struct file *file_in, 1297 struct file *file_in,
1292 loff_t pos_in, 1298 loff_t pos_in,
1293 struct file *file_out, 1299 struct file *file_out,
1294 loff_t pos_out, 1300 loff_t pos_out,
1295 u64 *len, 1301 loff_t *len,
1296 bool is_dedupe) 1302 unsigned int remap_flags)
1297{ 1303{
1298 struct inode *inode_in = file_inode(file_in); 1304 struct inode *inode_in = file_inode(file_in);
1299 struct xfs_inode *src = XFS_I(inode_in); 1305 struct xfs_inode *src = XFS_I(inode_in);
1300 struct inode *inode_out = file_inode(file_out); 1306 struct inode *inode_out = file_inode(file_out);
1301 struct xfs_inode *dest = XFS_I(inode_out); 1307 struct xfs_inode *dest = XFS_I(inode_out);
1302 bool same_inode = (inode_in == inode_out); 1308 bool same_inode = (inode_in == inode_out);
1303 u64 blkmask = i_blocksize(inode_in) - 1;
1304 ssize_t ret; 1309 ssize_t ret;
1305 1310
1306 /* Lock both files against IO */ 1311 /* Lock both files against IO */
@@ -1323,29 +1328,11 @@ xfs_reflink_remap_prep(
1323 if (IS_DAX(inode_in) || IS_DAX(inode_out)) 1328 if (IS_DAX(inode_in) || IS_DAX(inode_out))
1324 goto out_unlock; 1329 goto out_unlock;
1325 1330
1326 ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, 1331 ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
1327 len, is_dedupe); 1332 len, remap_flags);
1328 if (ret <= 0) 1333 if (ret < 0 || *len == 0)
1329 goto out_unlock; 1334 goto out_unlock;
1330 1335
1331 /*
1332 * If the dedupe data matches, chop off the partial EOF block
1333 * from the source file so we don't try to dedupe the partial
1334 * EOF block.
1335 */
1336 if (is_dedupe) {
1337 *len &= ~blkmask;
1338 } else if (*len & blkmask) {
1339 /*
1340 * The user is attempting to share a partial EOF block,
1341 * if it's inside the destination EOF then reject it.
1342 */
1343 if (pos_out + *len < i_size_read(inode_out)) {
1344 ret = -EINVAL;
1345 goto out_unlock;
1346 }
1347 }
1348
1349 /* Attach dquots to dest inode before changing block map */ 1336 /* Attach dquots to dest inode before changing block map */
1350 ret = xfs_qm_dqattach(dest); 1337 ret = xfs_qm_dqattach(dest);
1351 if (ret) 1338 if (ret)
@@ -1365,31 +1352,9 @@ xfs_reflink_remap_prep(
1365 goto out_unlock; 1352 goto out_unlock;
1366 1353
1367 /* Zap any page cache for the destination file's range. */ 1354 /* Zap any page cache for the destination file's range. */
1368 truncate_inode_pages_range(&inode_out->i_data, pos_out, 1355 truncate_inode_pages_range(&inode_out->i_data,
1369 PAGE_ALIGN(pos_out + *len) - 1); 1356 round_down(pos_out, PAGE_SIZE),
1370 1357 round_up(pos_out + *len, PAGE_SIZE) - 1);
1371 /* If we're altering the file contents... */
1372 if (!is_dedupe) {
1373 /*
1374 * ...update the timestamps (which will grab the ilock again
1375 * from xfs_fs_dirty_inode, so we have to call it before we
1376 * take the ilock).
1377 */
1378 if (!(file_out->f_mode & FMODE_NOCMTIME)) {
1379 ret = file_update_time(file_out);
1380 if (ret)
1381 goto out_unlock;
1382 }
1383
1384 /*
1385 * ...clear the security bits if the process is not being run
1386 * by root. This keeps people from modifying setuid and setgid
1387 * binaries.
1388 */
1389 ret = file_remove_privs(file_out);
1390 if (ret)
1391 goto out_unlock;
1392 }
1393 1358
1394 return 1; 1359 return 1;
1395out_unlock: 1360out_unlock:
@@ -1398,72 +1363,6 @@ out_unlock:
1398} 1363}
1399 1364
1400/* 1365/*
1401 * Link a range of blocks from one file to another.
1402 */
1403int
1404xfs_reflink_remap_range(
1405 struct file *file_in,
1406 loff_t pos_in,
1407 struct file *file_out,
1408 loff_t pos_out,
1409 u64 len,
1410 bool is_dedupe)
1411{
1412 struct inode *inode_in = file_inode(file_in);
1413 struct xfs_inode *src = XFS_I(inode_in);
1414 struct inode *inode_out = file_inode(file_out);
1415 struct xfs_inode *dest = XFS_I(inode_out);
1416 struct xfs_mount *mp = src->i_mount;
1417 xfs_fileoff_t sfsbno, dfsbno;
1418 xfs_filblks_t fsblen;
1419 xfs_extlen_t cowextsize;
1420 ssize_t ret;
1421
1422 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1423 return -EOPNOTSUPP;
1424
1425 if (XFS_FORCED_SHUTDOWN(mp))
1426 return -EIO;
1427
1428 /* Prepare and then clone file data. */
1429 ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
1430 &len, is_dedupe);
1431 if (ret <= 0)
1432 return ret;
1433
1434 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
1435
1436 dfsbno = XFS_B_TO_FSBT(mp, pos_out);
1437 sfsbno = XFS_B_TO_FSBT(mp, pos_in);
1438 fsblen = XFS_B_TO_FSB(mp, len);
1439 ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
1440 pos_out + len);
1441 if (ret)
1442 goto out_unlock;
1443
1444 /*
1445 * Carry the cowextsize hint from src to dest if we're sharing the
1446 * entire source file to the entire destination file, the source file
1447 * has a cowextsize hint, and the destination file does not.
1448 */
1449 cowextsize = 0;
1450 if (pos_in == 0 && len == i_size_read(inode_in) &&
1451 (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
1452 pos_out == 0 && len >= i_size_read(inode_out) &&
1453 !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
1454 cowextsize = src->i_d.di_cowextsize;
1455
1456 ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
1457 is_dedupe);
1458
1459out_unlock:
1460 xfs_reflink_remap_unlock(file_in, file_out);
1461 if (ret)
1462 trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
1463 return ret;
1464}
1465
1466/*
1467 * The user wants to preemptively CoW all shared blocks in this file, 1366 * The user wants to preemptively CoW all shared blocks in this file,
1468 * which enables us to turn off the reflink flag. Iterate all 1367 * which enables us to turn off the reflink flag. Iterate all
1469 * extents which are not prealloc/delalloc to see which ranges are 1368 * extents which are not prealloc/delalloc to see which ranges are
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 7f47202b5639..6d73daef1f13 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -27,13 +27,24 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
27extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, 27extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
28 xfs_off_t count); 28 xfs_off_t count);
29extern int xfs_reflink_recover_cow(struct xfs_mount *mp); 29extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
30extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in, 30extern loff_t xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
31 struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe); 31 struct file *file_out, loff_t pos_out, loff_t len,
32 unsigned int remap_flags);
32extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp, 33extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp,
33 struct xfs_inode *ip, bool *has_shared); 34 struct xfs_inode *ip, bool *has_shared);
34extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip, 35extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
35 struct xfs_trans **tpp); 36 struct xfs_trans **tpp);
36extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset, 37extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,
37 xfs_off_t len); 38 xfs_off_t len);
39extern int xfs_reflink_remap_prep(struct file *file_in, loff_t pos_in,
40 struct file *file_out, loff_t pos_out, loff_t *len,
41 unsigned int remap_flags);
42extern int xfs_reflink_remap_blocks(struct xfs_inode *src, loff_t pos_in,
43 struct xfs_inode *dest, loff_t pos_out, loff_t remap_len,
44 loff_t *remapped);
45extern int xfs_reflink_update_dest(struct xfs_inode *dest, xfs_off_t newlen,
46 xfs_extlen_t cowextsize, unsigned int remap_flags);
47extern void xfs_reflink_remap_unlock(struct file *file_in,
48 struct file *file_out);
38 49
39#endif /* __XFS_REFLINK_H */ 50#endif /* __XFS_REFLINK_H */