diff options
author | David S. Miller <davem@davemloft.net> | 2018-10-13 00:38:46 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-10-13 00:38:46 -0400 |
commit | d864991b220b7c62e81d21209e1fd978fd67352c (patch) | |
tree | b570a1ad6fc1b959c5bcda6ceca0b321319c01e0 /fs/xfs/xfs_reflink.c | |
parent | a688c53a0277d8ea21d86a5c56884892e3442c5e (diff) | |
parent | bab5c80b211035739997ebd361a679fa85b39465 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Conflicts were easy to resolve using immediate context mostly,
except the cls_u32.c one where I simply too the entire HEAD
chunk.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'fs/xfs/xfs_reflink.c')
-rw-r--r-- | fs/xfs/xfs_reflink.c | 200 |
1 files changed, 165 insertions, 35 deletions
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 5289e22cb081..42ea7bab9144 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c | |||
@@ -1220,35 +1220,92 @@ retry: | |||
1220 | return 0; | 1220 | return 0; |
1221 | } | 1221 | } |
1222 | 1222 | ||
1223 | /* Unlock both inodes after they've been prepped for a range clone. */ | ||
1224 | STATIC void | ||
1225 | xfs_reflink_remap_unlock( | ||
1226 | struct file *file_in, | ||
1227 | struct file *file_out) | ||
1228 | { | ||
1229 | struct inode *inode_in = file_inode(file_in); | ||
1230 | struct xfs_inode *src = XFS_I(inode_in); | ||
1231 | struct inode *inode_out = file_inode(file_out); | ||
1232 | struct xfs_inode *dest = XFS_I(inode_out); | ||
1233 | bool same_inode = (inode_in == inode_out); | ||
1234 | |||
1235 | xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); | ||
1236 | if (!same_inode) | ||
1237 | xfs_iunlock(src, XFS_MMAPLOCK_SHARED); | ||
1238 | inode_unlock(inode_out); | ||
1239 | if (!same_inode) | ||
1240 | inode_unlock_shared(inode_in); | ||
1241 | } | ||
1242 | |||
1223 | /* | 1243 | /* |
1224 | * Link a range of blocks from one file to another. | 1244 | * If we're reflinking to a point past the destination file's EOF, we must |
1245 | * zero any speculative post-EOF preallocations that sit between the old EOF | ||
1246 | * and the destination file offset. | ||
1225 | */ | 1247 | */ |
1226 | int | 1248 | static int |
1227 | xfs_reflink_remap_range( | 1249 | xfs_reflink_zero_posteof( |
1250 | struct xfs_inode *ip, | ||
1251 | loff_t pos) | ||
1252 | { | ||
1253 | loff_t isize = i_size_read(VFS_I(ip)); | ||
1254 | |||
1255 | if (pos <= isize) | ||
1256 | return 0; | ||
1257 | |||
1258 | trace_xfs_zero_eof(ip, isize, pos - isize); | ||
1259 | return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL, | ||
1260 | &xfs_iomap_ops); | ||
1261 | } | ||
1262 | |||
1263 | /* | ||
1264 | * Prepare two files for range cloning. Upon a successful return both inodes | ||
1265 | * will have the iolock and mmaplock held, the page cache of the out file will | ||
1266 | * be truncated, and any leases on the out file will have been broken. This | ||
1267 | * function borrows heavily from xfs_file_aio_write_checks. | ||
1268 | * | ||
1269 | * The VFS allows partial EOF blocks to "match" for dedupe even though it hasn't | ||
1270 | * checked that the bytes beyond EOF physically match. Hence we cannot use the | ||
1271 | * EOF block in the source dedupe range because it's not a complete block match, | ||
1272 | * hence can introduce a corruption into the file that has it's block replaced. | ||
1273 | * | ||
1274 | * In similar fashion, the VFS file cloning also allows partial EOF blocks to be | ||
1275 | * "block aligned" for the purposes of cloning entire files. However, if the | ||
1276 | * source file range includes the EOF block and it lands within the existing EOF | ||
1277 | * of the destination file, then we can expose stale data from beyond the source | ||
1278 | * file EOF in the destination file. | ||
1279 | * | ||
1280 | * XFS doesn't support partial block sharing, so in both cases we have check | ||
1281 | * these cases ourselves. For dedupe, we can simply round the length to dedupe | ||
1282 | * down to the previous whole block and ignore the partial EOF block. While this | ||
1283 | * means we can't dedupe the last block of a file, this is an acceptible | ||
1284 | * tradeoff for simplicity on implementation. | ||
1285 | * | ||
1286 | * For cloning, we want to share the partial EOF block if it is also the new EOF | ||
1287 | * block of the destination file. If the partial EOF block lies inside the | ||
1288 | * existing destination EOF, then we have to abort the clone to avoid exposing | ||
1289 | * stale data in the destination file. Hence we reject these clone attempts with | ||
1290 | * -EINVAL in this case. | ||
1291 | */ | ||
1292 | STATIC int | ||
1293 | xfs_reflink_remap_prep( | ||
1228 | struct file *file_in, | 1294 | struct file *file_in, |
1229 | loff_t pos_in, | 1295 | loff_t pos_in, |
1230 | struct file *file_out, | 1296 | struct file *file_out, |
1231 | loff_t pos_out, | 1297 | loff_t pos_out, |
1232 | u64 len, | 1298 | u64 *len, |
1233 | bool is_dedupe) | 1299 | bool is_dedupe) |
1234 | { | 1300 | { |
1235 | struct inode *inode_in = file_inode(file_in); | 1301 | struct inode *inode_in = file_inode(file_in); |
1236 | struct xfs_inode *src = XFS_I(inode_in); | 1302 | struct xfs_inode *src = XFS_I(inode_in); |
1237 | struct inode *inode_out = file_inode(file_out); | 1303 | struct inode *inode_out = file_inode(file_out); |
1238 | struct xfs_inode *dest = XFS_I(inode_out); | 1304 | struct xfs_inode *dest = XFS_I(inode_out); |
1239 | struct xfs_mount *mp = src->i_mount; | ||
1240 | bool same_inode = (inode_in == inode_out); | 1305 | bool same_inode = (inode_in == inode_out); |
1241 | xfs_fileoff_t sfsbno, dfsbno; | 1306 | u64 blkmask = i_blocksize(inode_in) - 1; |
1242 | xfs_filblks_t fsblen; | ||
1243 | xfs_extlen_t cowextsize; | ||
1244 | ssize_t ret; | 1307 | ssize_t ret; |
1245 | 1308 | ||
1246 | if (!xfs_sb_version_hasreflink(&mp->m_sb)) | ||
1247 | return -EOPNOTSUPP; | ||
1248 | |||
1249 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1250 | return -EIO; | ||
1251 | |||
1252 | /* Lock both files against IO */ | 1309 | /* Lock both files against IO */ |
1253 | ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out); | 1310 | ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out); |
1254 | if (ret) | 1311 | if (ret) |
@@ -1270,33 +1327,115 @@ xfs_reflink_remap_range( | |||
1270 | goto out_unlock; | 1327 | goto out_unlock; |
1271 | 1328 | ||
1272 | ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, | 1329 | ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, |
1273 | &len, is_dedupe); | 1330 | len, is_dedupe); |
1274 | if (ret <= 0) | 1331 | if (ret <= 0) |
1275 | goto out_unlock; | 1332 | goto out_unlock; |
1276 | 1333 | ||
1334 | /* | ||
1335 | * If the dedupe data matches, chop off the partial EOF block | ||
1336 | * from the source file so we don't try to dedupe the partial | ||
1337 | * EOF block. | ||
1338 | */ | ||
1339 | if (is_dedupe) { | ||
1340 | *len &= ~blkmask; | ||
1341 | } else if (*len & blkmask) { | ||
1342 | /* | ||
1343 | * The user is attempting to share a partial EOF block, | ||
1344 | * if it's inside the destination EOF then reject it. | ||
1345 | */ | ||
1346 | if (pos_out + *len < i_size_read(inode_out)) { | ||
1347 | ret = -EINVAL; | ||
1348 | goto out_unlock; | ||
1349 | } | ||
1350 | } | ||
1351 | |||
1277 | /* Attach dquots to dest inode before changing block map */ | 1352 | /* Attach dquots to dest inode before changing block map */ |
1278 | ret = xfs_qm_dqattach(dest); | 1353 | ret = xfs_qm_dqattach(dest); |
1279 | if (ret) | 1354 | if (ret) |
1280 | goto out_unlock; | 1355 | goto out_unlock; |
1281 | 1356 | ||
1282 | trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); | ||
1283 | |||
1284 | /* | 1357 | /* |
1285 | * Clear out post-eof preallocations because we don't have page cache | 1358 | * Zero existing post-eof speculative preallocations in the destination |
1286 | * backing the delayed allocations and they'll never get freed on | 1359 | * file. |
1287 | * their own. | ||
1288 | */ | 1360 | */ |
1289 | if (xfs_can_free_eofblocks(dest, true)) { | 1361 | ret = xfs_reflink_zero_posteof(dest, pos_out); |
1290 | ret = xfs_free_eofblocks(dest); | 1362 | if (ret) |
1291 | if (ret) | 1363 | goto out_unlock; |
1292 | goto out_unlock; | ||
1293 | } | ||
1294 | 1364 | ||
1295 | /* Set flags and remap blocks. */ | 1365 | /* Set flags and remap blocks. */ |
1296 | ret = xfs_reflink_set_inode_flag(src, dest); | 1366 | ret = xfs_reflink_set_inode_flag(src, dest); |
1297 | if (ret) | 1367 | if (ret) |
1298 | goto out_unlock; | 1368 | goto out_unlock; |
1299 | 1369 | ||
1370 | /* Zap any page cache for the destination file's range. */ | ||
1371 | truncate_inode_pages_range(&inode_out->i_data, pos_out, | ||
1372 | PAGE_ALIGN(pos_out + *len) - 1); | ||
1373 | |||
1374 | /* If we're altering the file contents... */ | ||
1375 | if (!is_dedupe) { | ||
1376 | /* | ||
1377 | * ...update the timestamps (which will grab the ilock again | ||
1378 | * from xfs_fs_dirty_inode, so we have to call it before we | ||
1379 | * take the ilock). | ||
1380 | */ | ||
1381 | if (!(file_out->f_mode & FMODE_NOCMTIME)) { | ||
1382 | ret = file_update_time(file_out); | ||
1383 | if (ret) | ||
1384 | goto out_unlock; | ||
1385 | } | ||
1386 | |||
1387 | /* | ||
1388 | * ...clear the security bits if the process is not being run | ||
1389 | * by root. This keeps people from modifying setuid and setgid | ||
1390 | * binaries. | ||
1391 | */ | ||
1392 | ret = file_remove_privs(file_out); | ||
1393 | if (ret) | ||
1394 | goto out_unlock; | ||
1395 | } | ||
1396 | |||
1397 | return 1; | ||
1398 | out_unlock: | ||
1399 | xfs_reflink_remap_unlock(file_in, file_out); | ||
1400 | return ret; | ||
1401 | } | ||
1402 | |||
1403 | /* | ||
1404 | * Link a range of blocks from one file to another. | ||
1405 | */ | ||
1406 | int | ||
1407 | xfs_reflink_remap_range( | ||
1408 | struct file *file_in, | ||
1409 | loff_t pos_in, | ||
1410 | struct file *file_out, | ||
1411 | loff_t pos_out, | ||
1412 | u64 len, | ||
1413 | bool is_dedupe) | ||
1414 | { | ||
1415 | struct inode *inode_in = file_inode(file_in); | ||
1416 | struct xfs_inode *src = XFS_I(inode_in); | ||
1417 | struct inode *inode_out = file_inode(file_out); | ||
1418 | struct xfs_inode *dest = XFS_I(inode_out); | ||
1419 | struct xfs_mount *mp = src->i_mount; | ||
1420 | xfs_fileoff_t sfsbno, dfsbno; | ||
1421 | xfs_filblks_t fsblen; | ||
1422 | xfs_extlen_t cowextsize; | ||
1423 | ssize_t ret; | ||
1424 | |||
1425 | if (!xfs_sb_version_hasreflink(&mp->m_sb)) | ||
1426 | return -EOPNOTSUPP; | ||
1427 | |||
1428 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1429 | return -EIO; | ||
1430 | |||
1431 | /* Prepare and then clone file data. */ | ||
1432 | ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out, | ||
1433 | &len, is_dedupe); | ||
1434 | if (ret <= 0) | ||
1435 | return ret; | ||
1436 | |||
1437 | trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); | ||
1438 | |||
1300 | dfsbno = XFS_B_TO_FSBT(mp, pos_out); | 1439 | dfsbno = XFS_B_TO_FSBT(mp, pos_out); |
1301 | sfsbno = XFS_B_TO_FSBT(mp, pos_in); | 1440 | sfsbno = XFS_B_TO_FSBT(mp, pos_in); |
1302 | fsblen = XFS_B_TO_FSB(mp, len); | 1441 | fsblen = XFS_B_TO_FSB(mp, len); |
@@ -1305,10 +1444,6 @@ xfs_reflink_remap_range( | |||
1305 | if (ret) | 1444 | if (ret) |
1306 | goto out_unlock; | 1445 | goto out_unlock; |
1307 | 1446 | ||
1308 | /* Zap any page cache for the destination file's range. */ | ||
1309 | truncate_inode_pages_range(&inode_out->i_data, pos_out, | ||
1310 | PAGE_ALIGN(pos_out + len) - 1); | ||
1311 | |||
1312 | /* | 1447 | /* |
1313 | * Carry the cowextsize hint from src to dest if we're sharing the | 1448 | * Carry the cowextsize hint from src to dest if we're sharing the |
1314 | * entire source file to the entire destination file, the source file | 1449 | * entire source file to the entire destination file, the source file |
@@ -1325,12 +1460,7 @@ xfs_reflink_remap_range( | |||
1325 | is_dedupe); | 1460 | is_dedupe); |
1326 | 1461 | ||
1327 | out_unlock: | 1462 | out_unlock: |
1328 | xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); | 1463 | xfs_reflink_remap_unlock(file_in, file_out); |
1329 | if (!same_inode) | ||
1330 | xfs_iunlock(src, XFS_MMAPLOCK_SHARED); | ||
1331 | inode_unlock(inode_out); | ||
1332 | if (!same_inode) | ||
1333 | inode_unlock_shared(inode_in); | ||
1334 | if (ret) | 1464 | if (ret) |
1335 | trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); | 1465 | trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); |
1336 | return ret; | 1466 | return ret; |