aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_reflink.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-10-13 00:38:46 -0400
committerDavid S. Miller <davem@davemloft.net>2018-10-13 00:38:46 -0400
commitd864991b220b7c62e81d21209e1fd978fd67352c (patch)
treeb570a1ad6fc1b959c5bcda6ceca0b321319c01e0 /fs/xfs/xfs_reflink.c
parenta688c53a0277d8ea21d86a5c56884892e3442c5e (diff)
parentbab5c80b211035739997ebd361a679fa85b39465 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Conflicts were easy to resolve using immediate context mostly, except the cls_u32.c one where I simply too the entire HEAD chunk. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'fs/xfs/xfs_reflink.c')
-rw-r--r--fs/xfs/xfs_reflink.c200
1 files changed, 165 insertions, 35 deletions
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 5289e22cb081..42ea7bab9144 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1220,35 +1220,92 @@ retry:
1220 return 0; 1220 return 0;
1221} 1221}
1222 1222
1223/* Unlock both inodes after they've been prepped for a range clone. */
1224STATIC void
1225xfs_reflink_remap_unlock(
1226 struct file *file_in,
1227 struct file *file_out)
1228{
1229 struct inode *inode_in = file_inode(file_in);
1230 struct xfs_inode *src = XFS_I(inode_in);
1231 struct inode *inode_out = file_inode(file_out);
1232 struct xfs_inode *dest = XFS_I(inode_out);
1233 bool same_inode = (inode_in == inode_out);
1234
1235 xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
1236 if (!same_inode)
1237 xfs_iunlock(src, XFS_MMAPLOCK_SHARED);
1238 inode_unlock(inode_out);
1239 if (!same_inode)
1240 inode_unlock_shared(inode_in);
1241}
1242
1223/* 1243/*
1224 * Link a range of blocks from one file to another. 1244 * If we're reflinking to a point past the destination file's EOF, we must
1245 * zero any speculative post-EOF preallocations that sit between the old EOF
1246 * and the destination file offset.
1225 */ 1247 */
1226int 1248static int
1227xfs_reflink_remap_range( 1249xfs_reflink_zero_posteof(
1250 struct xfs_inode *ip,
1251 loff_t pos)
1252{
1253 loff_t isize = i_size_read(VFS_I(ip));
1254
1255 if (pos <= isize)
1256 return 0;
1257
1258 trace_xfs_zero_eof(ip, isize, pos - isize);
1259 return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL,
1260 &xfs_iomap_ops);
1261}
1262
1263/*
1264 * Prepare two files for range cloning. Upon a successful return both inodes
1265 * will have the iolock and mmaplock held, the page cache of the out file will
1266 * be truncated, and any leases on the out file will have been broken. This
1267 * function borrows heavily from xfs_file_aio_write_checks.
1268 *
1269 * The VFS allows partial EOF blocks to "match" for dedupe even though it hasn't
1270 * checked that the bytes beyond EOF physically match. Hence we cannot use the
1271 * EOF block in the source dedupe range because it's not a complete block match,
1272 * hence can introduce a corruption into the file that has it's block replaced.
1273 *
1274 * In similar fashion, the VFS file cloning also allows partial EOF blocks to be
1275 * "block aligned" for the purposes of cloning entire files. However, if the
1276 * source file range includes the EOF block and it lands within the existing EOF
1277 * of the destination file, then we can expose stale data from beyond the source
1278 * file EOF in the destination file.
1279 *
1280 * XFS doesn't support partial block sharing, so in both cases we have check
1281 * these cases ourselves. For dedupe, we can simply round the length to dedupe
1282 * down to the previous whole block and ignore the partial EOF block. While this
1283 * means we can't dedupe the last block of a file, this is an acceptible
1284 * tradeoff for simplicity on implementation.
1285 *
1286 * For cloning, we want to share the partial EOF block if it is also the new EOF
1287 * block of the destination file. If the partial EOF block lies inside the
1288 * existing destination EOF, then we have to abort the clone to avoid exposing
1289 * stale data in the destination file. Hence we reject these clone attempts with
1290 * -EINVAL in this case.
1291 */
1292STATIC int
1293xfs_reflink_remap_prep(
1228 struct file *file_in, 1294 struct file *file_in,
1229 loff_t pos_in, 1295 loff_t pos_in,
1230 struct file *file_out, 1296 struct file *file_out,
1231 loff_t pos_out, 1297 loff_t pos_out,
1232 u64 len, 1298 u64 *len,
1233 bool is_dedupe) 1299 bool is_dedupe)
1234{ 1300{
1235 struct inode *inode_in = file_inode(file_in); 1301 struct inode *inode_in = file_inode(file_in);
1236 struct xfs_inode *src = XFS_I(inode_in); 1302 struct xfs_inode *src = XFS_I(inode_in);
1237 struct inode *inode_out = file_inode(file_out); 1303 struct inode *inode_out = file_inode(file_out);
1238 struct xfs_inode *dest = XFS_I(inode_out); 1304 struct xfs_inode *dest = XFS_I(inode_out);
1239 struct xfs_mount *mp = src->i_mount;
1240 bool same_inode = (inode_in == inode_out); 1305 bool same_inode = (inode_in == inode_out);
1241 xfs_fileoff_t sfsbno, dfsbno; 1306 u64 blkmask = i_blocksize(inode_in) - 1;
1242 xfs_filblks_t fsblen;
1243 xfs_extlen_t cowextsize;
1244 ssize_t ret; 1307 ssize_t ret;
1245 1308
1246 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1247 return -EOPNOTSUPP;
1248
1249 if (XFS_FORCED_SHUTDOWN(mp))
1250 return -EIO;
1251
1252 /* Lock both files against IO */ 1309 /* Lock both files against IO */
1253 ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out); 1310 ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out);
1254 if (ret) 1311 if (ret)
@@ -1270,33 +1327,115 @@ xfs_reflink_remap_range(
1270 goto out_unlock; 1327 goto out_unlock;
1271 1328
1272 ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, 1329 ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
1273 &len, is_dedupe); 1330 len, is_dedupe);
1274 if (ret <= 0) 1331 if (ret <= 0)
1275 goto out_unlock; 1332 goto out_unlock;
1276 1333
1334 /*
1335 * If the dedupe data matches, chop off the partial EOF block
1336 * from the source file so we don't try to dedupe the partial
1337 * EOF block.
1338 */
1339 if (is_dedupe) {
1340 *len &= ~blkmask;
1341 } else if (*len & blkmask) {
1342 /*
1343 * The user is attempting to share a partial EOF block,
1344 * if it's inside the destination EOF then reject it.
1345 */
1346 if (pos_out + *len < i_size_read(inode_out)) {
1347 ret = -EINVAL;
1348 goto out_unlock;
1349 }
1350 }
1351
1277 /* Attach dquots to dest inode before changing block map */ 1352 /* Attach dquots to dest inode before changing block map */
1278 ret = xfs_qm_dqattach(dest); 1353 ret = xfs_qm_dqattach(dest);
1279 if (ret) 1354 if (ret)
1280 goto out_unlock; 1355 goto out_unlock;
1281 1356
1282 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
1283
1284 /* 1357 /*
1285 * Clear out post-eof preallocations because we don't have page cache 1358 * Zero existing post-eof speculative preallocations in the destination
1286 * backing the delayed allocations and they'll never get freed on 1359 * file.
1287 * their own.
1288 */ 1360 */
1289 if (xfs_can_free_eofblocks(dest, true)) { 1361 ret = xfs_reflink_zero_posteof(dest, pos_out);
1290 ret = xfs_free_eofblocks(dest); 1362 if (ret)
1291 if (ret) 1363 goto out_unlock;
1292 goto out_unlock;
1293 }
1294 1364
1295 /* Set flags and remap blocks. */ 1365 /* Set flags and remap blocks. */
1296 ret = xfs_reflink_set_inode_flag(src, dest); 1366 ret = xfs_reflink_set_inode_flag(src, dest);
1297 if (ret) 1367 if (ret)
1298 goto out_unlock; 1368 goto out_unlock;
1299 1369
1370 /* Zap any page cache for the destination file's range. */
1371 truncate_inode_pages_range(&inode_out->i_data, pos_out,
1372 PAGE_ALIGN(pos_out + *len) - 1);
1373
1374 /* If we're altering the file contents... */
1375 if (!is_dedupe) {
1376 /*
1377 * ...update the timestamps (which will grab the ilock again
1378 * from xfs_fs_dirty_inode, so we have to call it before we
1379 * take the ilock).
1380 */
1381 if (!(file_out->f_mode & FMODE_NOCMTIME)) {
1382 ret = file_update_time(file_out);
1383 if (ret)
1384 goto out_unlock;
1385 }
1386
1387 /*
1388 * ...clear the security bits if the process is not being run
1389 * by root. This keeps people from modifying setuid and setgid
1390 * binaries.
1391 */
1392 ret = file_remove_privs(file_out);
1393 if (ret)
1394 goto out_unlock;
1395 }
1396
1397 return 1;
1398out_unlock:
1399 xfs_reflink_remap_unlock(file_in, file_out);
1400 return ret;
1401}
1402
1403/*
1404 * Link a range of blocks from one file to another.
1405 */
1406int
1407xfs_reflink_remap_range(
1408 struct file *file_in,
1409 loff_t pos_in,
1410 struct file *file_out,
1411 loff_t pos_out,
1412 u64 len,
1413 bool is_dedupe)
1414{
1415 struct inode *inode_in = file_inode(file_in);
1416 struct xfs_inode *src = XFS_I(inode_in);
1417 struct inode *inode_out = file_inode(file_out);
1418 struct xfs_inode *dest = XFS_I(inode_out);
1419 struct xfs_mount *mp = src->i_mount;
1420 xfs_fileoff_t sfsbno, dfsbno;
1421 xfs_filblks_t fsblen;
1422 xfs_extlen_t cowextsize;
1423 ssize_t ret;
1424
1425 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1426 return -EOPNOTSUPP;
1427
1428 if (XFS_FORCED_SHUTDOWN(mp))
1429 return -EIO;
1430
1431 /* Prepare and then clone file data. */
1432 ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
1433 &len, is_dedupe);
1434 if (ret <= 0)
1435 return ret;
1436
1437 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
1438
1300 dfsbno = XFS_B_TO_FSBT(mp, pos_out); 1439 dfsbno = XFS_B_TO_FSBT(mp, pos_out);
1301 sfsbno = XFS_B_TO_FSBT(mp, pos_in); 1440 sfsbno = XFS_B_TO_FSBT(mp, pos_in);
1302 fsblen = XFS_B_TO_FSB(mp, len); 1441 fsblen = XFS_B_TO_FSB(mp, len);
@@ -1305,10 +1444,6 @@ xfs_reflink_remap_range(
1305 if (ret) 1444 if (ret)
1306 goto out_unlock; 1445 goto out_unlock;
1307 1446
1308 /* Zap any page cache for the destination file's range. */
1309 truncate_inode_pages_range(&inode_out->i_data, pos_out,
1310 PAGE_ALIGN(pos_out + len) - 1);
1311
1312 /* 1447 /*
1313 * Carry the cowextsize hint from src to dest if we're sharing the 1448 * Carry the cowextsize hint from src to dest if we're sharing the
1314 * entire source file to the entire destination file, the source file 1449 * entire source file to the entire destination file, the source file
@@ -1325,12 +1460,7 @@ xfs_reflink_remap_range(
1325 is_dedupe); 1460 is_dedupe);
1326 1461
1327out_unlock: 1462out_unlock:
1328 xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); 1463 xfs_reflink_remap_unlock(file_in, file_out);
1329 if (!same_inode)
1330 xfs_iunlock(src, XFS_MMAPLOCK_SHARED);
1331 inode_unlock(inode_out);
1332 if (!same_inode)
1333 inode_unlock_shared(inode_in);
1334 if (ret) 1464 if (ret)
1335 trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); 1465 trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
1336 return ret; 1466 return ret;