diff options
author | Darrick J. Wong <darrick.wong@oracle.com> | 2016-10-03 12:11:41 -0400 |
---|---|---|
committer | Darrick J. Wong <darrick.wong@oracle.com> | 2016-10-05 19:26:26 -0400 |
commit | cc714660bb8b14dd897cd805bbcd8b76a7606289 (patch) | |
tree | 7c55c2dba94c899cc23f07e2333c99d2b9dc9313 | |
parent | 9fe26045e98f8787999f6aa45aec35d16565c1bd (diff) |
xfs: add dedupe range vfs function
Define a VFS function which allows userspace to request that the
kernel reflink a range of blocks between two files if the ranges'
contents match. The function fits the new VFS ioctl that standardizes
the checking for the btrfs EXTENT SAME ioctl.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
-rw-r--r-- | fs/xfs/xfs_file.c | 48 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.c | 127 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.h | 5 |
3 files changed, 174 insertions, 6 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index cf24b61951e3..39fde9f51303 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -1010,7 +1010,8 @@ xfs_file_share_range( | |||
1010 | loff_t pos_in, | 1010 | loff_t pos_in, |
1011 | struct file *file_out, | 1011 | struct file *file_out, |
1012 | loff_t pos_out, | 1012 | loff_t pos_out, |
1013 | u64 len) | 1013 | u64 len, |
1014 | bool is_dedupe) | ||
1014 | { | 1015 | { |
1015 | struct inode *inode_in; | 1016 | struct inode *inode_in; |
1016 | struct inode *inode_out; | 1017 | struct inode *inode_out; |
@@ -1019,6 +1020,7 @@ xfs_file_share_range( | |||
1019 | loff_t isize; | 1020 | loff_t isize; |
1020 | int same_inode; | 1021 | int same_inode; |
1021 | loff_t blen; | 1022 | loff_t blen; |
1023 | unsigned int flags = 0; | ||
1022 | 1024 | ||
1023 | inode_in = file_inode(file_in); | 1025 | inode_in = file_inode(file_in); |
1024 | inode_out = file_inode(file_out); | 1026 | inode_out = file_inode(file_out); |
@@ -1056,6 +1058,15 @@ xfs_file_share_range( | |||
1056 | pos_in + len > isize) | 1058 | pos_in + len > isize) |
1057 | return -EINVAL; | 1059 | return -EINVAL; |
1058 | 1060 | ||
1061 | /* Don't allow dedupe past EOF in the dest file */ | ||
1062 | if (is_dedupe) { | ||
1063 | loff_t disize; | ||
1064 | |||
1065 | disize = i_size_read(inode_out); | ||
1066 | if (pos_out >= disize || pos_out + len > disize) | ||
1067 | return -EINVAL; | ||
1068 | } | ||
1069 | |||
1059 | /* If we're linking to EOF, continue to the block boundary. */ | 1070 | /* If we're linking to EOF, continue to the block boundary. */ |
1060 | if (pos_in + len == isize) | 1071 | if (pos_in + len == isize) |
1061 | blen = ALIGN(isize, bs) - pos_in; | 1072 | blen = ALIGN(isize, bs) - pos_in; |
@@ -1079,8 +1090,10 @@ xfs_file_share_range( | |||
1079 | if (ret) | 1090 | if (ret) |
1080 | goto out_unlock; | 1091 | goto out_unlock; |
1081 | 1092 | ||
1093 | if (is_dedupe) | ||
1094 | flags |= XFS_REFLINK_DEDUPE; | ||
1082 | ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out), | 1095 | ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out), |
1083 | pos_out, len); | 1096 | pos_out, len, flags); |
1084 | if (ret < 0) | 1097 | if (ret < 0) |
1085 | goto out_unlock; | 1098 | goto out_unlock; |
1086 | 1099 | ||
@@ -1100,7 +1113,7 @@ xfs_file_copy_range( | |||
1100 | int error; | 1113 | int error; |
1101 | 1114 | ||
1102 | error = xfs_file_share_range(file_in, pos_in, file_out, pos_out, | 1115 | error = xfs_file_share_range(file_in, pos_in, file_out, pos_out, |
1103 | len); | 1116 | len, false); |
1104 | if (error) | 1117 | if (error) |
1105 | return error; | 1118 | return error; |
1106 | return len; | 1119 | return len; |
@@ -1115,7 +1128,33 @@ xfs_file_clone_range( | |||
1115 | u64 len) | 1128 | u64 len) |
1116 | { | 1129 | { |
1117 | return xfs_file_share_range(file_in, pos_in, file_out, pos_out, | 1130 | return xfs_file_share_range(file_in, pos_in, file_out, pos_out, |
1118 | len); | 1131 | len, false); |
1132 | } | ||
1133 | |||
1134 | #define XFS_MAX_DEDUPE_LEN (16 * 1024 * 1024) | ||
1135 | STATIC ssize_t | ||
1136 | xfs_file_dedupe_range( | ||
1137 | struct file *src_file, | ||
1138 | u64 loff, | ||
1139 | u64 len, | ||
1140 | struct file *dst_file, | ||
1141 | u64 dst_loff) | ||
1142 | { | ||
1143 | int error; | ||
1144 | |||
1145 | /* | ||
1146 | * Limit the total length we will dedupe for each operation. | ||
1147 | * This is intended to bound the total time spent in this | ||
1148 | * ioctl to something sane. | ||
1149 | */ | ||
1150 | if (len > XFS_MAX_DEDUPE_LEN) | ||
1151 | len = XFS_MAX_DEDUPE_LEN; | ||
1152 | |||
1153 | error = xfs_file_share_range(src_file, loff, dst_file, dst_loff, | ||
1154 | len, true); | ||
1155 | if (error) | ||
1156 | return error; | ||
1157 | return len; | ||
1119 | } | 1158 | } |
1120 | 1159 | ||
1121 | STATIC int | 1160 | STATIC int |
@@ -1779,6 +1818,7 @@ const struct file_operations xfs_file_operations = { | |||
1779 | .fallocate = xfs_file_fallocate, | 1818 | .fallocate = xfs_file_fallocate, |
1780 | .copy_file_range = xfs_file_copy_range, | 1819 | .copy_file_range = xfs_file_copy_range, |
1781 | .clone_file_range = xfs_file_clone_range, | 1820 | .clone_file_range = xfs_file_clone_range, |
1821 | .dedupe_file_range = xfs_file_dedupe_range, | ||
1782 | }; | 1822 | }; |
1783 | 1823 | ||
1784 | const struct file_operations xfs_dir_file_operations = { | 1824 | const struct file_operations xfs_dir_file_operations = { |
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index c1e98a43a937..6b22669421b2 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c | |||
@@ -1150,6 +1150,111 @@ err: | |||
1150 | } | 1150 | } |
1151 | 1151 | ||
1152 | /* | 1152 | /* |
1153 | * Read a page's worth of file data into the page cache. Return the page | ||
1154 | * locked. | ||
1155 | */ | ||
1156 | static struct page * | ||
1157 | xfs_get_page( | ||
1158 | struct inode *inode, | ||
1159 | xfs_off_t offset) | ||
1160 | { | ||
1161 | struct address_space *mapping; | ||
1162 | struct page *page; | ||
1163 | pgoff_t n; | ||
1164 | |||
1165 | n = offset >> PAGE_SHIFT; | ||
1166 | mapping = inode->i_mapping; | ||
1167 | page = read_mapping_page(mapping, n, NULL); | ||
1168 | if (IS_ERR(page)) | ||
1169 | return page; | ||
1170 | if (!PageUptodate(page)) { | ||
1171 | put_page(page); | ||
1172 | return ERR_PTR(-EIO); | ||
1173 | } | ||
1174 | lock_page(page); | ||
1175 | return page; | ||
1176 | } | ||
1177 | |||
1178 | /* | ||
1179 | * Compare extents of two files to see if they are the same. | ||
1180 | */ | ||
1181 | static int | ||
1182 | xfs_compare_extents( | ||
1183 | struct inode *src, | ||
1184 | xfs_off_t srcoff, | ||
1185 | struct inode *dest, | ||
1186 | xfs_off_t destoff, | ||
1187 | xfs_off_t len, | ||
1188 | bool *is_same) | ||
1189 | { | ||
1190 | xfs_off_t src_poff; | ||
1191 | xfs_off_t dest_poff; | ||
1192 | void *src_addr; | ||
1193 | void *dest_addr; | ||
1194 | struct page *src_page; | ||
1195 | struct page *dest_page; | ||
1196 | xfs_off_t cmp_len; | ||
1197 | bool same; | ||
1198 | int error; | ||
1199 | |||
1200 | error = -EINVAL; | ||
1201 | same = true; | ||
1202 | while (len) { | ||
1203 | src_poff = srcoff & (PAGE_SIZE - 1); | ||
1204 | dest_poff = destoff & (PAGE_SIZE - 1); | ||
1205 | cmp_len = min(PAGE_SIZE - src_poff, | ||
1206 | PAGE_SIZE - dest_poff); | ||
1207 | cmp_len = min(cmp_len, len); | ||
1208 | ASSERT(cmp_len > 0); | ||
1209 | |||
1210 | trace_xfs_reflink_compare_extents(XFS_I(src), srcoff, cmp_len, | ||
1211 | XFS_I(dest), destoff); | ||
1212 | |||
1213 | src_page = xfs_get_page(src, srcoff); | ||
1214 | if (IS_ERR(src_page)) { | ||
1215 | error = PTR_ERR(src_page); | ||
1216 | goto out_error; | ||
1217 | } | ||
1218 | dest_page = xfs_get_page(dest, destoff); | ||
1219 | if (IS_ERR(dest_page)) { | ||
1220 | error = PTR_ERR(dest_page); | ||
1221 | unlock_page(src_page); | ||
1222 | put_page(src_page); | ||
1223 | goto out_error; | ||
1224 | } | ||
1225 | src_addr = kmap_atomic(src_page); | ||
1226 | dest_addr = kmap_atomic(dest_page); | ||
1227 | |||
1228 | flush_dcache_page(src_page); | ||
1229 | flush_dcache_page(dest_page); | ||
1230 | |||
1231 | if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len)) | ||
1232 | same = false; | ||
1233 | |||
1234 | kunmap_atomic(dest_addr); | ||
1235 | kunmap_atomic(src_addr); | ||
1236 | unlock_page(dest_page); | ||
1237 | unlock_page(src_page); | ||
1238 | put_page(dest_page); | ||
1239 | put_page(src_page); | ||
1240 | |||
1241 | if (!same) | ||
1242 | break; | ||
1243 | |||
1244 | srcoff += cmp_len; | ||
1245 | destoff += cmp_len; | ||
1246 | len -= cmp_len; | ||
1247 | } | ||
1248 | |||
1249 | *is_same = same; | ||
1250 | return 0; | ||
1251 | |||
1252 | out_error: | ||
1253 | trace_xfs_reflink_compare_extents_error(XFS_I(dest), error, _RET_IP_); | ||
1254 | return error; | ||
1255 | } | ||
1256 | |||
1257 | /* | ||
1153 | * Link a range of blocks from one file to another. | 1258 | * Link a range of blocks from one file to another. |
1154 | */ | 1259 | */ |
1155 | int | 1260 | int |
@@ -1158,12 +1263,14 @@ xfs_reflink_remap_range( | |||
1158 | xfs_off_t srcoff, | 1263 | xfs_off_t srcoff, |
1159 | struct xfs_inode *dest, | 1264 | struct xfs_inode *dest, |
1160 | xfs_off_t destoff, | 1265 | xfs_off_t destoff, |
1161 | xfs_off_t len) | 1266 | xfs_off_t len, |
1267 | unsigned int flags) | ||
1162 | { | 1268 | { |
1163 | struct xfs_mount *mp = src->i_mount; | 1269 | struct xfs_mount *mp = src->i_mount; |
1164 | xfs_fileoff_t sfsbno, dfsbno; | 1270 | xfs_fileoff_t sfsbno, dfsbno; |
1165 | xfs_filblks_t fsblen; | 1271 | xfs_filblks_t fsblen; |
1166 | int error; | 1272 | int error; |
1273 | bool is_same; | ||
1167 | 1274 | ||
1168 | if (!xfs_sb_version_hasreflink(&mp->m_sb)) | 1275 | if (!xfs_sb_version_hasreflink(&mp->m_sb)) |
1169 | return -EOPNOTSUPP; | 1276 | return -EOPNOTSUPP; |
@@ -1175,6 +1282,9 @@ xfs_reflink_remap_range( | |||
1175 | if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) | 1282 | if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) |
1176 | return -EINVAL; | 1283 | return -EINVAL; |
1177 | 1284 | ||
1285 | if (flags & ~XFS_REFLINK_ALL) | ||
1286 | return -EINVAL; | ||
1287 | |||
1178 | trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff); | 1288 | trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff); |
1179 | 1289 | ||
1180 | /* Lock both files against IO */ | 1290 | /* Lock both files against IO */ |
@@ -1186,6 +1296,21 @@ xfs_reflink_remap_range( | |||
1186 | xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); | 1296 | xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); |
1187 | } | 1297 | } |
1188 | 1298 | ||
1299 | /* | ||
1300 | * Check that the extents are the same. | ||
1301 | */ | ||
1302 | if (flags & XFS_REFLINK_DEDUPE) { | ||
1303 | is_same = false; | ||
1304 | error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest), | ||
1305 | destoff, len, &is_same); | ||
1306 | if (error) | ||
1307 | goto out_error; | ||
1308 | if (!is_same) { | ||
1309 | error = -EBADE; | ||
1310 | goto out_error; | ||
1311 | } | ||
1312 | } | ||
1313 | |||
1189 | error = xfs_reflink_set_inode_flag(src, dest); | 1314 | error = xfs_reflink_set_inode_flag(src, dest); |
1190 | if (error) | 1315 | if (error) |
1191 | goto out_error; | 1316 | goto out_error; |
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index c35ce299281b..df82b2049187 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h | |||
@@ -43,7 +43,10 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, | |||
43 | extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, | 43 | extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, |
44 | xfs_off_t count); | 44 | xfs_off_t count); |
45 | extern int xfs_reflink_recover_cow(struct xfs_mount *mp); | 45 | extern int xfs_reflink_recover_cow(struct xfs_mount *mp); |
46 | #define XFS_REFLINK_DEDUPE 1 /* only reflink if contents match */ | ||
47 | #define XFS_REFLINK_ALL (XFS_REFLINK_DEDUPE) | ||
46 | extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff, | 48 | extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff, |
47 | struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len); | 49 | struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len, |
50 | unsigned int flags); | ||
48 | 51 | ||
49 | #endif /* __XFS_REFLINK_H */ | 52 | #endif /* __XFS_REFLINK_H */ |