aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2016-10-03 12:11:41 -0400
committerDarrick J. Wong <darrick.wong@oracle.com>2016-10-05 19:26:26 -0400
commitcc714660bb8b14dd897cd805bbcd8b76a7606289 (patch)
tree7c55c2dba94c899cc23f07e2333c99d2b9dc9313
parent9fe26045e98f8787999f6aa45aec35d16565c1bd (diff)
xfs: add dedupe range vfs function
Define a VFS function which allows userspace to request that the kernel reflink a range of blocks between two files if the ranges' contents match. The function fits the new VFS ioctl that standardizes the checking for the btrfs EXTENT SAME ioctl. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
-rw-r--r--fs/xfs/xfs_file.c48
-rw-r--r--fs/xfs/xfs_reflink.c127
-rw-r--r--fs/xfs/xfs_reflink.h5
3 files changed, 174 insertions, 6 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index cf24b61951e3..39fde9f51303 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1010,7 +1010,8 @@ xfs_file_share_range(
1010 loff_t pos_in, 1010 loff_t pos_in,
1011 struct file *file_out, 1011 struct file *file_out,
1012 loff_t pos_out, 1012 loff_t pos_out,
1013 u64 len) 1013 u64 len,
1014 bool is_dedupe)
1014{ 1015{
1015 struct inode *inode_in; 1016 struct inode *inode_in;
1016 struct inode *inode_out; 1017 struct inode *inode_out;
@@ -1019,6 +1020,7 @@ xfs_file_share_range(
1019 loff_t isize; 1020 loff_t isize;
1020 int same_inode; 1021 int same_inode;
1021 loff_t blen; 1022 loff_t blen;
1023 unsigned int flags = 0;
1022 1024
1023 inode_in = file_inode(file_in); 1025 inode_in = file_inode(file_in);
1024 inode_out = file_inode(file_out); 1026 inode_out = file_inode(file_out);
@@ -1056,6 +1058,15 @@ xfs_file_share_range(
1056 pos_in + len > isize) 1058 pos_in + len > isize)
1057 return -EINVAL; 1059 return -EINVAL;
1058 1060
1061 /* Don't allow dedupe past EOF in the dest file */
1062 if (is_dedupe) {
1063 loff_t disize;
1064
1065 disize = i_size_read(inode_out);
1066 if (pos_out >= disize || pos_out + len > disize)
1067 return -EINVAL;
1068 }
1069
1059 /* If we're linking to EOF, continue to the block boundary. */ 1070 /* If we're linking to EOF, continue to the block boundary. */
1060 if (pos_in + len == isize) 1071 if (pos_in + len == isize)
1061 blen = ALIGN(isize, bs) - pos_in; 1072 blen = ALIGN(isize, bs) - pos_in;
@@ -1079,8 +1090,10 @@ xfs_file_share_range(
1079 if (ret) 1090 if (ret)
1080 goto out_unlock; 1091 goto out_unlock;
1081 1092
1093 if (is_dedupe)
1094 flags |= XFS_REFLINK_DEDUPE;
1082 ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out), 1095 ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out),
1083 pos_out, len); 1096 pos_out, len, flags);
1084 if (ret < 0) 1097 if (ret < 0)
1085 goto out_unlock; 1098 goto out_unlock;
1086 1099
@@ -1100,7 +1113,7 @@ xfs_file_copy_range(
1100 int error; 1113 int error;
1101 1114
1102 error = xfs_file_share_range(file_in, pos_in, file_out, pos_out, 1115 error = xfs_file_share_range(file_in, pos_in, file_out, pos_out,
1103 len); 1116 len, false);
1104 if (error) 1117 if (error)
1105 return error; 1118 return error;
1106 return len; 1119 return len;
@@ -1115,7 +1128,33 @@ xfs_file_clone_range(
1115 u64 len) 1128 u64 len)
1116{ 1129{
1117 return xfs_file_share_range(file_in, pos_in, file_out, pos_out, 1130 return xfs_file_share_range(file_in, pos_in, file_out, pos_out,
1118 len); 1131 len, false);
1132}
1133
1134#define XFS_MAX_DEDUPE_LEN (16 * 1024 * 1024)
1135STATIC ssize_t
1136xfs_file_dedupe_range(
1137 struct file *src_file,
1138 u64 loff,
1139 u64 len,
1140 struct file *dst_file,
1141 u64 dst_loff)
1142{
1143 int error;
1144
1145 /*
1146 * Limit the total length we will dedupe for each operation.
1147 * This is intended to bound the total time spent in this
1148 * ioctl to something sane.
1149 */
1150 if (len > XFS_MAX_DEDUPE_LEN)
1151 len = XFS_MAX_DEDUPE_LEN;
1152
1153 error = xfs_file_share_range(src_file, loff, dst_file, dst_loff,
1154 len, true);
1155 if (error)
1156 return error;
1157 return len;
1119} 1158}
1120 1159
1121STATIC int 1160STATIC int
@@ -1779,6 +1818,7 @@ const struct file_operations xfs_file_operations = {
1779 .fallocate = xfs_file_fallocate, 1818 .fallocate = xfs_file_fallocate,
1780 .copy_file_range = xfs_file_copy_range, 1819 .copy_file_range = xfs_file_copy_range,
1781 .clone_file_range = xfs_file_clone_range, 1820 .clone_file_range = xfs_file_clone_range,
1821 .dedupe_file_range = xfs_file_dedupe_range,
1782}; 1822};
1783 1823
1784const struct file_operations xfs_dir_file_operations = { 1824const struct file_operations xfs_dir_file_operations = {
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index c1e98a43a937..6b22669421b2 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1150,6 +1150,111 @@ err:
1150} 1150}
1151 1151
1152/* 1152/*
1153 * Read a page's worth of file data into the page cache. Return the page
1154 * locked.
1155 */
1156static struct page *
1157xfs_get_page(
1158 struct inode *inode,
1159 xfs_off_t offset)
1160{
1161 struct address_space *mapping;
1162 struct page *page;
1163 pgoff_t n;
1164
1165 n = offset >> PAGE_SHIFT;
1166 mapping = inode->i_mapping;
1167 page = read_mapping_page(mapping, n, NULL);
1168 if (IS_ERR(page))
1169 return page;
1170 if (!PageUptodate(page)) {
1171 put_page(page);
1172 return ERR_PTR(-EIO);
1173 }
1174 lock_page(page);
1175 return page;
1176}
1177
1178/*
1179 * Compare extents of two files to see if they are the same.
1180 */
1181static int
1182xfs_compare_extents(
1183 struct inode *src,
1184 xfs_off_t srcoff,
1185 struct inode *dest,
1186 xfs_off_t destoff,
1187 xfs_off_t len,
1188 bool *is_same)
1189{
1190 xfs_off_t src_poff;
1191 xfs_off_t dest_poff;
1192 void *src_addr;
1193 void *dest_addr;
1194 struct page *src_page;
1195 struct page *dest_page;
1196 xfs_off_t cmp_len;
1197 bool same;
1198 int error;
1199
1200 error = -EINVAL;
1201 same = true;
1202 while (len) {
1203 src_poff = srcoff & (PAGE_SIZE - 1);
1204 dest_poff = destoff & (PAGE_SIZE - 1);
1205 cmp_len = min(PAGE_SIZE - src_poff,
1206 PAGE_SIZE - dest_poff);
1207 cmp_len = min(cmp_len, len);
1208 ASSERT(cmp_len > 0);
1209
1210 trace_xfs_reflink_compare_extents(XFS_I(src), srcoff, cmp_len,
1211 XFS_I(dest), destoff);
1212
1213 src_page = xfs_get_page(src, srcoff);
1214 if (IS_ERR(src_page)) {
1215 error = PTR_ERR(src_page);
1216 goto out_error;
1217 }
1218 dest_page = xfs_get_page(dest, destoff);
1219 if (IS_ERR(dest_page)) {
1220 error = PTR_ERR(dest_page);
1221 unlock_page(src_page);
1222 put_page(src_page);
1223 goto out_error;
1224 }
1225 src_addr = kmap_atomic(src_page);
1226 dest_addr = kmap_atomic(dest_page);
1227
1228 flush_dcache_page(src_page);
1229 flush_dcache_page(dest_page);
1230
1231 if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
1232 same = false;
1233
1234 kunmap_atomic(dest_addr);
1235 kunmap_atomic(src_addr);
1236 unlock_page(dest_page);
1237 unlock_page(src_page);
1238 put_page(dest_page);
1239 put_page(src_page);
1240
1241 if (!same)
1242 break;
1243
1244 srcoff += cmp_len;
1245 destoff += cmp_len;
1246 len -= cmp_len;
1247 }
1248
1249 *is_same = same;
1250 return 0;
1251
1252out_error:
1253 trace_xfs_reflink_compare_extents_error(XFS_I(dest), error, _RET_IP_);
1254 return error;
1255}
1256
1257/*
1153 * Link a range of blocks from one file to another. 1258 * Link a range of blocks from one file to another.
1154 */ 1259 */
1155int 1260int
@@ -1158,12 +1263,14 @@ xfs_reflink_remap_range(
1158 xfs_off_t srcoff, 1263 xfs_off_t srcoff,
1159 struct xfs_inode *dest, 1264 struct xfs_inode *dest,
1160 xfs_off_t destoff, 1265 xfs_off_t destoff,
1161 xfs_off_t len) 1266 xfs_off_t len,
1267 unsigned int flags)
1162{ 1268{
1163 struct xfs_mount *mp = src->i_mount; 1269 struct xfs_mount *mp = src->i_mount;
1164 xfs_fileoff_t sfsbno, dfsbno; 1270 xfs_fileoff_t sfsbno, dfsbno;
1165 xfs_filblks_t fsblen; 1271 xfs_filblks_t fsblen;
1166 int error; 1272 int error;
1273 bool is_same;
1167 1274
1168 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1275 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1169 return -EOPNOTSUPP; 1276 return -EOPNOTSUPP;
@@ -1175,6 +1282,9 @@ xfs_reflink_remap_range(
1175 if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) 1282 if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
1176 return -EINVAL; 1283 return -EINVAL;
1177 1284
1285 if (flags & ~XFS_REFLINK_ALL)
1286 return -EINVAL;
1287
1178 trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff); 1288 trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff);
1179 1289
1180 /* Lock both files against IO */ 1290 /* Lock both files against IO */
@@ -1186,6 +1296,21 @@ xfs_reflink_remap_range(
1186 xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); 1296 xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
1187 } 1297 }
1188 1298
1299 /*
1300 * Check that the extents are the same.
1301 */
1302 if (flags & XFS_REFLINK_DEDUPE) {
1303 is_same = false;
1304 error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest),
1305 destoff, len, &is_same);
1306 if (error)
1307 goto out_error;
1308 if (!is_same) {
1309 error = -EBADE;
1310 goto out_error;
1311 }
1312 }
1313
1189 error = xfs_reflink_set_inode_flag(src, dest); 1314 error = xfs_reflink_set_inode_flag(src, dest);
1190 if (error) 1315 if (error)
1191 goto out_error; 1316 goto out_error;
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index c35ce299281b..df82b2049187 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -43,7 +43,10 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
43extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, 43extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
44 xfs_off_t count); 44 xfs_off_t count);
45extern int xfs_reflink_recover_cow(struct xfs_mount *mp); 45extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
46#define XFS_REFLINK_DEDUPE 1 /* only reflink if contents match */
47#define XFS_REFLINK_ALL (XFS_REFLINK_DEDUPE)
46extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff, 48extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff,
47 struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len); 49 struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len,
50 unsigned int flags);
48 51
49#endif /* __XFS_REFLINK_H */ 52#endif /* __XFS_REFLINK_H */