diff options
author | Darrick J. Wong <darrick.wong@oracle.com> | 2016-10-03 12:11:43 -0400 |
---|---|---|
committer | Darrick J. Wong <darrick.wong@oracle.com> | 2016-10-05 19:26:26 -0400 |
commit | 98cc2db5b8b2c9f38aebf54a7b03657406b6de26 (patch) | |
tree | 2cac01aab4083ea3e9c3dae5ee7665a16e923190 | |
parent | f0bc4d134b46607967bda1205ce3226a1bd95f42 (diff) |
xfs: unshare a range of blocks via fallocate
Unshare all shared extents if the user calls fallocate with the new
unshare mode flag set, so that we can guarantee that a subsequent
write will not ENOSPC.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
[hch: pass inode instead of file to xfs_reflink_dirty_range,
use iomap infrastructure for copy up]
Signed-off-by: Christoph Hellwig <hch@lst.de>
-rw-r--r-- | fs/xfs/xfs_file.c | 10 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.c | 255 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.h | 4 |
3 files changed, 267 insertions, 2 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 39fde9f51303..07f951dd2685 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -848,7 +848,7 @@ buffered: | |||
848 | #define XFS_FALLOC_FL_SUPPORTED \ | 848 | #define XFS_FALLOC_FL_SUPPORTED \ |
849 | (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ | 849 | (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ |
850 | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ | 850 | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ |
851 | FALLOC_FL_INSERT_RANGE) | 851 | FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE) |
852 | 852 | ||
853 | STATIC long | 853 | STATIC long |
854 | xfs_file_fallocate( | 854 | xfs_file_fallocate( |
@@ -938,9 +938,15 @@ xfs_file_fallocate( | |||
938 | 938 | ||
939 | if (mode & FALLOC_FL_ZERO_RANGE) | 939 | if (mode & FALLOC_FL_ZERO_RANGE) |
940 | error = xfs_zero_file_space(ip, offset, len); | 940 | error = xfs_zero_file_space(ip, offset, len); |
941 | else | 941 | else { |
942 | if (mode & FALLOC_FL_UNSHARE_RANGE) { | ||
943 | error = xfs_reflink_unshare(ip, offset, len); | ||
944 | if (error) | ||
945 | goto out_unlock; | ||
946 | } | ||
942 | error = xfs_alloc_file_space(ip, offset, len, | 947 | error = xfs_alloc_file_space(ip, offset, len, |
943 | XFS_BMAPI_PREALLOC); | 948 | XFS_BMAPI_PREALLOC); |
949 | } | ||
944 | if (error) | 950 | if (error) |
945 | goto out_unlock; | 951 | goto out_unlock; |
946 | } | 952 | } |
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 6b22669421b2..d4707e627a74 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c | |||
@@ -1345,3 +1345,258 @@ out_error: | |||
1345 | trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_); | 1345 | trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_); |
1346 | return error; | 1346 | return error; |
1347 | } | 1347 | } |
1348 | |||
1349 | /* | ||
1350 | * The user wants to preemptively CoW all shared blocks in this file, | ||
1351 | * which enables us to turn off the reflink flag. Iterate all | ||
1352 | * extents which are not prealloc/delalloc to see which ranges are | ||
1353 | * mentioned in the refcount tree, then read those blocks into the | ||
1354 | * pagecache, dirty them, fsync them back out, and then we can update | ||
1355 | * the inode flag. What happens if we run out of memory? :) | ||
1356 | */ | ||
1357 | STATIC int | ||
1358 | xfs_reflink_dirty_extents( | ||
1359 | struct xfs_inode *ip, | ||
1360 | xfs_fileoff_t fbno, | ||
1361 | xfs_filblks_t end, | ||
1362 | xfs_off_t isize) | ||
1363 | { | ||
1364 | struct xfs_mount *mp = ip->i_mount; | ||
1365 | xfs_agnumber_t agno; | ||
1366 | xfs_agblock_t agbno; | ||
1367 | xfs_extlen_t aglen; | ||
1368 | xfs_agblock_t rbno; | ||
1369 | xfs_extlen_t rlen; | ||
1370 | xfs_off_t fpos; | ||
1371 | xfs_off_t flen; | ||
1372 | struct xfs_bmbt_irec map[2]; | ||
1373 | int nmaps; | ||
1374 | int error; | ||
1375 | |||
1376 | while (end - fbno > 0) { | ||
1377 | nmaps = 1; | ||
1378 | /* | ||
1379 | * Look for extents in the file. Skip holes, delalloc, or | ||
1380 | * unwritten extents; they can't be reflinked. | ||
1381 | */ | ||
1382 | error = xfs_bmapi_read(ip, fbno, end - fbno, map, &nmaps, 0); | ||
1383 | if (error) | ||
1384 | goto out; | ||
1385 | if (nmaps == 0) | ||
1386 | break; | ||
1387 | if (map[0].br_startblock == HOLESTARTBLOCK || | ||
1388 | map[0].br_startblock == DELAYSTARTBLOCK || | ||
1389 | ISUNWRITTEN(&map[0])) | ||
1390 | goto next; | ||
1391 | |||
1392 | map[1] = map[0]; | ||
1393 | while (map[1].br_blockcount) { | ||
1394 | agno = XFS_FSB_TO_AGNO(mp, map[1].br_startblock); | ||
1395 | agbno = XFS_FSB_TO_AGBNO(mp, map[1].br_startblock); | ||
1396 | aglen = map[1].br_blockcount; | ||
1397 | |||
1398 | error = xfs_reflink_find_shared(mp, agno, agbno, aglen, | ||
1399 | &rbno, &rlen, true); | ||
1400 | if (error) | ||
1401 | goto out; | ||
1402 | if (rbno == NULLAGBLOCK) | ||
1403 | break; | ||
1404 | |||
1405 | /* Dirty the pages */ | ||
1406 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1407 | fpos = XFS_FSB_TO_B(mp, map[1].br_startoff + | ||
1408 | (rbno - agbno)); | ||
1409 | flen = XFS_FSB_TO_B(mp, rlen); | ||
1410 | if (fpos + flen > isize) | ||
1411 | flen = isize - fpos; | ||
1412 | error = iomap_file_dirty(VFS_I(ip), fpos, flen, | ||
1413 | &xfs_iomap_ops); | ||
1414 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
1415 | if (error) | ||
1416 | goto out; | ||
1417 | |||
1418 | map[1].br_blockcount -= (rbno - agbno + rlen); | ||
1419 | map[1].br_startoff += (rbno - agbno + rlen); | ||
1420 | map[1].br_startblock += (rbno - agbno + rlen); | ||
1421 | } | ||
1422 | |||
1423 | next: | ||
1424 | fbno = map[0].br_startoff + map[0].br_blockcount; | ||
1425 | } | ||
1426 | out: | ||
1427 | return error; | ||
1428 | } | ||
1429 | |||
1430 | /* Clear the inode reflink flag if there are no shared extents. */ | ||
1431 | int | ||
1432 | xfs_reflink_clear_inode_flag( | ||
1433 | struct xfs_inode *ip, | ||
1434 | struct xfs_trans **tpp) | ||
1435 | { | ||
1436 | struct xfs_mount *mp = ip->i_mount; | ||
1437 | xfs_fileoff_t fbno; | ||
1438 | xfs_filblks_t end; | ||
1439 | xfs_agnumber_t agno; | ||
1440 | xfs_agblock_t agbno; | ||
1441 | xfs_extlen_t aglen; | ||
1442 | xfs_agblock_t rbno; | ||
1443 | xfs_extlen_t rlen; | ||
1444 | struct xfs_bmbt_irec map[2]; | ||
1445 | int nmaps; | ||
1446 | int error = 0; | ||
1447 | |||
1448 | if (!(ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK)) | ||
1449 | return 0; | ||
1450 | |||
1451 | fbno = 0; | ||
1452 | end = XFS_B_TO_FSB(mp, i_size_read(VFS_I(ip))); | ||
1453 | while (end - fbno > 0) { | ||
1454 | nmaps = 1; | ||
1455 | /* | ||
1456 | * Look for extents in the file. Skip holes, delalloc, or | ||
1457 | * unwritten extents; they can't be reflinked. | ||
1458 | */ | ||
1459 | error = xfs_bmapi_read(ip, fbno, end - fbno, map, &nmaps, 0); | ||
1460 | if (error) | ||
1461 | return error; | ||
1462 | if (nmaps == 0) | ||
1463 | break; | ||
1464 | if (map[0].br_startblock == HOLESTARTBLOCK || | ||
1465 | map[0].br_startblock == DELAYSTARTBLOCK || | ||
1466 | ISUNWRITTEN(&map[0])) | ||
1467 | goto next; | ||
1468 | |||
1469 | map[1] = map[0]; | ||
1470 | while (map[1].br_blockcount) { | ||
1471 | agno = XFS_FSB_TO_AGNO(mp, map[1].br_startblock); | ||
1472 | agbno = XFS_FSB_TO_AGBNO(mp, map[1].br_startblock); | ||
1473 | aglen = map[1].br_blockcount; | ||
1474 | |||
1475 | error = xfs_reflink_find_shared(mp, agno, agbno, aglen, | ||
1476 | &rbno, &rlen, false); | ||
1477 | if (error) | ||
1478 | return error; | ||
1479 | /* Is there still a shared block here? */ | ||
1480 | if (rbno != NULLAGBLOCK) | ||
1481 | return 0; | ||
1482 | |||
1483 | map[1].br_blockcount -= aglen; | ||
1484 | map[1].br_startoff += aglen; | ||
1485 | map[1].br_startblock += aglen; | ||
1486 | } | ||
1487 | |||
1488 | next: | ||
1489 | fbno = map[0].br_startoff + map[0].br_blockcount; | ||
1490 | } | ||
1491 | |||
1492 | /* | ||
1493 | * We didn't find any shared blocks so turn off the reflink flag. | ||
1494 | * First, get rid of any leftover CoW mappings. | ||
1495 | */ | ||
1496 | error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF); | ||
1497 | if (error) | ||
1498 | return error; | ||
1499 | |||
1500 | /* Clear the inode flag. */ | ||
1501 | trace_xfs_reflink_unset_inode_flag(ip); | ||
1502 | ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; | ||
1503 | xfs_trans_ijoin(*tpp, ip, 0); | ||
1504 | xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); | ||
1505 | |||
1506 | return error; | ||
1507 | } | ||
1508 | |||
1509 | /* | ||
1510 | * Clear the inode reflink flag if there are no shared extents and the size | ||
1511 | * hasn't changed. | ||
1512 | */ | ||
1513 | STATIC int | ||
1514 | xfs_reflink_try_clear_inode_flag( | ||
1515 | struct xfs_inode *ip, | ||
1516 | xfs_off_t old_isize) | ||
1517 | { | ||
1518 | struct xfs_mount *mp = ip->i_mount; | ||
1519 | struct xfs_trans *tp; | ||
1520 | int error = 0; | ||
1521 | |||
1522 | /* Start a rolling transaction to remove the mappings */ | ||
1523 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); | ||
1524 | if (error) | ||
1525 | return error; | ||
1526 | |||
1527 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
1528 | xfs_trans_ijoin(tp, ip, 0); | ||
1529 | |||
1530 | if (old_isize != i_size_read(VFS_I(ip))) | ||
1531 | goto cancel; | ||
1532 | |||
1533 | error = xfs_reflink_clear_inode_flag(ip, &tp); | ||
1534 | if (error) | ||
1535 | goto cancel; | ||
1536 | |||
1537 | error = xfs_trans_commit(tp); | ||
1538 | if (error) | ||
1539 | goto out; | ||
1540 | |||
1541 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1542 | return 0; | ||
1543 | cancel: | ||
1544 | xfs_trans_cancel(tp); | ||
1545 | out: | ||
1546 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1547 | return error; | ||
1548 | } | ||
1549 | |||
1550 | /* | ||
1551 | * Pre-COW all shared blocks within a given byte range of a file and turn off | ||
1552 | * the reflink flag if we unshare all of the file's blocks. | ||
1553 | */ | ||
1554 | int | ||
1555 | xfs_reflink_unshare( | ||
1556 | struct xfs_inode *ip, | ||
1557 | xfs_off_t offset, | ||
1558 | xfs_off_t len) | ||
1559 | { | ||
1560 | struct xfs_mount *mp = ip->i_mount; | ||
1561 | xfs_fileoff_t fbno; | ||
1562 | xfs_filblks_t end; | ||
1563 | xfs_off_t isize; | ||
1564 | int error; | ||
1565 | |||
1566 | if (!xfs_is_reflink_inode(ip)) | ||
1567 | return 0; | ||
1568 | |||
1569 | trace_xfs_reflink_unshare(ip, offset, len); | ||
1570 | |||
1571 | inode_dio_wait(VFS_I(ip)); | ||
1572 | |||
1573 | /* Try to CoW the selected ranges */ | ||
1574 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
1575 | fbno = XFS_B_TO_FSB(mp, offset); | ||
1576 | isize = i_size_read(VFS_I(ip)); | ||
1577 | end = XFS_B_TO_FSB(mp, offset + len); | ||
1578 | error = xfs_reflink_dirty_extents(ip, fbno, end, isize); | ||
1579 | if (error) | ||
1580 | goto out_unlock; | ||
1581 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1582 | |||
1583 | /* Wait for the IO to finish */ | ||
1584 | error = filemap_write_and_wait(VFS_I(ip)->i_mapping); | ||
1585 | if (error) | ||
1586 | goto out; | ||
1587 | |||
1588 | /* Turn off the reflink flag if we unshared the whole file */ | ||
1589 | if (offset == 0 && len == isize) { | ||
1590 | error = xfs_reflink_try_clear_inode_flag(ip, isize); | ||
1591 | if (error) | ||
1592 | goto out; | ||
1593 | } | ||
1594 | |||
1595 | return 0; | ||
1596 | |||
1597 | out_unlock: | ||
1598 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1599 | out: | ||
1600 | trace_xfs_reflink_unshare_error(ip, error, _RET_IP_); | ||
1601 | return error; | ||
1602 | } | ||
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index df82b2049187..bade5a61f3b0 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h | |||
@@ -48,5 +48,9 @@ extern int xfs_reflink_recover_cow(struct xfs_mount *mp); | |||
48 | extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff, | 48 | extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff, |
49 | struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len, | 49 | struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len, |
50 | unsigned int flags); | 50 | unsigned int flags); |
51 | extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip, | ||
52 | struct xfs_trans **tpp); | ||
53 | extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset, | ||
54 | xfs_off_t len); | ||
51 | 55 | ||
52 | #endif /* __XFS_REFLINK_H */ | 56 | #endif /* __XFS_REFLINK_H */ |