aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2016-10-03 12:11:43 -0400
committerDarrick J. Wong <darrick.wong@oracle.com>2016-10-05 19:26:26 -0400
commit98cc2db5b8b2c9f38aebf54a7b03657406b6de26 (patch)
tree2cac01aab4083ea3e9c3dae5ee7665a16e923190
parentf0bc4d134b46607967bda1205ce3226a1bd95f42 (diff)
xfs: unshare a range of blocks via fallocate
Unshare all shared extents if the user calls fallocate with the new unshare mode flag set, so that we can guarantee that a subsequent write will not ENOSPC. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> [hch: pass inode instead of file to xfs_reflink_dirty_range, use iomap infrastructure for copy up] Signed-off-by: Christoph Hellwig <hch@lst.de>
-rw-r--r--fs/xfs/xfs_file.c10
-rw-r--r--fs/xfs/xfs_reflink.c255
-rw-r--r--fs/xfs/xfs_reflink.h4
3 files changed, 267 insertions, 2 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 39fde9f51303..07f951dd2685 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -848,7 +848,7 @@ buffered:
848#define XFS_FALLOC_FL_SUPPORTED \ 848#define XFS_FALLOC_FL_SUPPORTED \
849 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ 849 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
850 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ 850 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
851 FALLOC_FL_INSERT_RANGE) 851 FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE)
852 852
853STATIC long 853STATIC long
854xfs_file_fallocate( 854xfs_file_fallocate(
@@ -938,9 +938,15 @@ xfs_file_fallocate(
938 938
939 if (mode & FALLOC_FL_ZERO_RANGE) 939 if (mode & FALLOC_FL_ZERO_RANGE)
940 error = xfs_zero_file_space(ip, offset, len); 940 error = xfs_zero_file_space(ip, offset, len);
941 else 941 else {
942 if (mode & FALLOC_FL_UNSHARE_RANGE) {
943 error = xfs_reflink_unshare(ip, offset, len);
944 if (error)
945 goto out_unlock;
946 }
942 error = xfs_alloc_file_space(ip, offset, len, 947 error = xfs_alloc_file_space(ip, offset, len,
943 XFS_BMAPI_PREALLOC); 948 XFS_BMAPI_PREALLOC);
949 }
944 if (error) 950 if (error)
945 goto out_unlock; 951 goto out_unlock;
946 } 952 }
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 6b22669421b2..d4707e627a74 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1345,3 +1345,258 @@ out_error:
1345 trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_); 1345 trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_);
1346 return error; 1346 return error;
1347} 1347}
1348
1349/*
1350 * The user wants to preemptively CoW all shared blocks in this file,
1351 * which enables us to turn off the reflink flag. Iterate all
1352 * extents which are not prealloc/delalloc to see which ranges are
1353 * mentioned in the refcount tree, then read those blocks into the
1354 * pagecache, dirty them, fsync them back out, and then we can update
1355 * the inode flag. What happens if we run out of memory? :)
1356 */
1357STATIC int
1358xfs_reflink_dirty_extents(
1359 struct xfs_inode *ip,
1360 xfs_fileoff_t fbno,
1361 xfs_filblks_t end,
1362 xfs_off_t isize)
1363{
1364 struct xfs_mount *mp = ip->i_mount;
1365 xfs_agnumber_t agno;
1366 xfs_agblock_t agbno;
1367 xfs_extlen_t aglen;
1368 xfs_agblock_t rbno;
1369 xfs_extlen_t rlen;
1370 xfs_off_t fpos;
1371 xfs_off_t flen;
1372 struct xfs_bmbt_irec map[2];
1373 int nmaps;
1374 int error;
1375
1376 while (end - fbno > 0) {
1377 nmaps = 1;
1378 /*
1379 * Look for extents in the file. Skip holes, delalloc, or
1380 * unwritten extents; they can't be reflinked.
1381 */
1382 error = xfs_bmapi_read(ip, fbno, end - fbno, map, &nmaps, 0);
1383 if (error)
1384 goto out;
1385 if (nmaps == 0)
1386 break;
1387 if (map[0].br_startblock == HOLESTARTBLOCK ||
1388 map[0].br_startblock == DELAYSTARTBLOCK ||
1389 ISUNWRITTEN(&map[0]))
1390 goto next;
1391
1392 map[1] = map[0];
1393 while (map[1].br_blockcount) {
1394 agno = XFS_FSB_TO_AGNO(mp, map[1].br_startblock);
1395 agbno = XFS_FSB_TO_AGBNO(mp, map[1].br_startblock);
1396 aglen = map[1].br_blockcount;
1397
1398 error = xfs_reflink_find_shared(mp, agno, agbno, aglen,
1399 &rbno, &rlen, true);
1400 if (error)
1401 goto out;
1402 if (rbno == NULLAGBLOCK)
1403 break;
1404
1405 /* Dirty the pages */
1406 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1407 fpos = XFS_FSB_TO_B(mp, map[1].br_startoff +
1408 (rbno - agbno));
1409 flen = XFS_FSB_TO_B(mp, rlen);
1410 if (fpos + flen > isize)
1411 flen = isize - fpos;
1412 error = iomap_file_dirty(VFS_I(ip), fpos, flen,
1413 &xfs_iomap_ops);
1414 xfs_ilock(ip, XFS_ILOCK_EXCL);
1415 if (error)
1416 goto out;
1417
1418 map[1].br_blockcount -= (rbno - agbno + rlen);
1419 map[1].br_startoff += (rbno - agbno + rlen);
1420 map[1].br_startblock += (rbno - agbno + rlen);
1421 }
1422
1423next:
1424 fbno = map[0].br_startoff + map[0].br_blockcount;
1425 }
1426out:
1427 return error;
1428}
1429
1430/* Clear the inode reflink flag if there are no shared extents. */
1431int
1432xfs_reflink_clear_inode_flag(
1433 struct xfs_inode *ip,
1434 struct xfs_trans **tpp)
1435{
1436 struct xfs_mount *mp = ip->i_mount;
1437 xfs_fileoff_t fbno;
1438 xfs_filblks_t end;
1439 xfs_agnumber_t agno;
1440 xfs_agblock_t agbno;
1441 xfs_extlen_t aglen;
1442 xfs_agblock_t rbno;
1443 xfs_extlen_t rlen;
1444 struct xfs_bmbt_irec map[2];
1445 int nmaps;
1446 int error = 0;
1447
1448 if (!(ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK))
1449 return 0;
1450
1451 fbno = 0;
1452 end = XFS_B_TO_FSB(mp, i_size_read(VFS_I(ip)));
1453 while (end - fbno > 0) {
1454 nmaps = 1;
1455 /*
1456 * Look for extents in the file. Skip holes, delalloc, or
1457 * unwritten extents; they can't be reflinked.
1458 */
1459 error = xfs_bmapi_read(ip, fbno, end - fbno, map, &nmaps, 0);
1460 if (error)
1461 return error;
1462 if (nmaps == 0)
1463 break;
1464 if (map[0].br_startblock == HOLESTARTBLOCK ||
1465 map[0].br_startblock == DELAYSTARTBLOCK ||
1466 ISUNWRITTEN(&map[0]))
1467 goto next;
1468
1469 map[1] = map[0];
1470 while (map[1].br_blockcount) {
1471 agno = XFS_FSB_TO_AGNO(mp, map[1].br_startblock);
1472 agbno = XFS_FSB_TO_AGBNO(mp, map[1].br_startblock);
1473 aglen = map[1].br_blockcount;
1474
1475 error = xfs_reflink_find_shared(mp, agno, agbno, aglen,
1476 &rbno, &rlen, false);
1477 if (error)
1478 return error;
1479 /* Is there still a shared block here? */
1480 if (rbno != NULLAGBLOCK)
1481 return 0;
1482
1483 map[1].br_blockcount -= aglen;
1484 map[1].br_startoff += aglen;
1485 map[1].br_startblock += aglen;
1486 }
1487
1488next:
1489 fbno = map[0].br_startoff + map[0].br_blockcount;
1490 }
1491
1492 /*
1493 * We didn't find any shared blocks so turn off the reflink flag.
1494 * First, get rid of any leftover CoW mappings.
1495 */
1496 error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF);
1497 if (error)
1498 return error;
1499
1500 /* Clear the inode flag. */
1501 trace_xfs_reflink_unset_inode_flag(ip);
1502 ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
1503 xfs_trans_ijoin(*tpp, ip, 0);
1504 xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
1505
1506 return error;
1507}
1508
1509/*
1510 * Clear the inode reflink flag if there are no shared extents and the size
1511 * hasn't changed.
1512 */
1513STATIC int
1514xfs_reflink_try_clear_inode_flag(
1515 struct xfs_inode *ip,
1516 xfs_off_t old_isize)
1517{
1518 struct xfs_mount *mp = ip->i_mount;
1519 struct xfs_trans *tp;
1520 int error = 0;
1521
1522 /* Start a rolling transaction to remove the mappings */
1523 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
1524 if (error)
1525 return error;
1526
1527 xfs_ilock(ip, XFS_ILOCK_EXCL);
1528 xfs_trans_ijoin(tp, ip, 0);
1529
1530 if (old_isize != i_size_read(VFS_I(ip)))
1531 goto cancel;
1532
1533 error = xfs_reflink_clear_inode_flag(ip, &tp);
1534 if (error)
1535 goto cancel;
1536
1537 error = xfs_trans_commit(tp);
1538 if (error)
1539 goto out;
1540
1541 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1542 return 0;
1543cancel:
1544 xfs_trans_cancel(tp);
1545out:
1546 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1547 return error;
1548}
1549
1550/*
1551 * Pre-COW all shared blocks within a given byte range of a file and turn off
1552 * the reflink flag if we unshare all of the file's blocks.
1553 */
1554int
1555xfs_reflink_unshare(
1556 struct xfs_inode *ip,
1557 xfs_off_t offset,
1558 xfs_off_t len)
1559{
1560 struct xfs_mount *mp = ip->i_mount;
1561 xfs_fileoff_t fbno;
1562 xfs_filblks_t end;
1563 xfs_off_t isize;
1564 int error;
1565
1566 if (!xfs_is_reflink_inode(ip))
1567 return 0;
1568
1569 trace_xfs_reflink_unshare(ip, offset, len);
1570
1571 inode_dio_wait(VFS_I(ip));
1572
1573 /* Try to CoW the selected ranges */
1574 xfs_ilock(ip, XFS_ILOCK_EXCL);
1575 fbno = XFS_B_TO_FSB(mp, offset);
1576 isize = i_size_read(VFS_I(ip));
1577 end = XFS_B_TO_FSB(mp, offset + len);
1578 error = xfs_reflink_dirty_extents(ip, fbno, end, isize);
1579 if (error)
1580 goto out_unlock;
1581 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1582
1583 /* Wait for the IO to finish */
1584 error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
1585 if (error)
1586 goto out;
1587
1588 /* Turn off the reflink flag if we unshared the whole file */
1589 if (offset == 0 && len == isize) {
1590 error = xfs_reflink_try_clear_inode_flag(ip, isize);
1591 if (error)
1592 goto out;
1593 }
1594
1595 return 0;
1596
1597out_unlock:
1598 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1599out:
1600 trace_xfs_reflink_unshare_error(ip, error, _RET_IP_);
1601 return error;
1602}
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index df82b2049187..bade5a61f3b0 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -48,5 +48,9 @@ extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
48extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff, 48extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff,
49 struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len, 49 struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len,
50 unsigned int flags); 50 unsigned int flags);
51extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
52 struct xfs_trans **tpp);
53extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,
54 xfs_off_t len);
51 55
52#endif /* __XFS_REFLINK_H */ 56#endif /* __XFS_REFLINK_H */