aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNamjae Jeon <namjae.jeon@samsung.com>2014-02-23 18:58:19 -0500
committerDave Chinner <david@fromorbit.com>2014-02-23 18:58:19 -0500
commite1d8fb88a64c1f8094b9f6c3b6d2d9e6719c970d (patch)
treeec95b7acd9f3182e683ea16fab40092d9f038ec6
parent00f5e61998dd17f5375d9dfc01331f104b83f841 (diff)
xfs: Add support FALLOC_FL_COLLAPSE_RANGE for fallocate
This patch implements fallocate's FALLOC_FL_COLLAPSE_RANGE for XFS. The semantics of this flag are following: 1) It collapses the range lying between offset and length by removing any data blocks which are present in this range and than updates all the logical offsets of extents beyond "offset + len" to nullify the hole created by removing blocks. In short, it does not leave a hole. 2) It should be used exclusively. No other fallocate flag in combination. 3) Offset and length supplied to fallocate should be fs block size aligned in case of xfs and ext4. 4) Collaspe range does not work beyond i_size. Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com> Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
-rw-r--r--fs/xfs/xfs_bmap.c193
-rw-r--r--fs/xfs/xfs_bmap.h15
-rw-r--r--fs/xfs/xfs_bmap_util.c97
-rw-r--r--fs/xfs/xfs_bmap_util.h2
-rw-r--r--fs/xfs/xfs_file.c19
-rw-r--r--fs/xfs/xfs_trace.h1
6 files changed, 324 insertions, 3 deletions
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 152543c4ca70..5b6092ef51ef 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5378,3 +5378,196 @@ error0:
5378 } 5378 }
5379 return error; 5379 return error;
5380} 5380}
5381
5382/*
5383 * Shift extent records to the left to cover a hole.
5384 *
5385 * The maximum number of extents to be shifted in a single operation
5386 * is @num_exts, and @current_ext keeps track of the current extent
5387 * index we have shifted. @offset_shift_fsb is the length by which each
5388 * extent is shifted. If there is no hole to shift the extents
5389 * into, this will be considered invalid operation and we abort immediately.
5390 */
5391int
5392xfs_bmap_shift_extents(
5393 struct xfs_trans *tp,
5394 struct xfs_inode *ip,
5395 int *done,
5396 xfs_fileoff_t start_fsb,
5397 xfs_fileoff_t offset_shift_fsb,
5398 xfs_extnum_t *current_ext,
5399 xfs_fsblock_t *firstblock,
5400 struct xfs_bmap_free *flist,
5401 int num_exts)
5402{
5403 struct xfs_btree_cur *cur;
5404 struct xfs_bmbt_rec_host *gotp;
5405 struct xfs_bmbt_irec got;
5406 struct xfs_bmbt_irec left;
5407 struct xfs_mount *mp = ip->i_mount;
5408 struct xfs_ifork *ifp;
5409 xfs_extnum_t nexts = 0;
5410 xfs_fileoff_t startoff;
5411 int error = 0;
5412 int i;
5413 int whichfork = XFS_DATA_FORK;
5414 int logflags;
5415 xfs_filblks_t blockcount = 0;
5416
5417 if (unlikely(XFS_TEST_ERROR(
5418 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5419 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5420 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
5421 XFS_ERROR_REPORT("xfs_bmap_shift_extents",
5422 XFS_ERRLEVEL_LOW, mp);
5423 return XFS_ERROR(EFSCORRUPTED);
5424 }
5425
5426 if (XFS_FORCED_SHUTDOWN(mp))
5427 return XFS_ERROR(EIO);
5428
5429 ASSERT(current_ext != NULL);
5430
5431 ifp = XFS_IFORK_PTR(ip, whichfork);
5432
5433 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5434 /* Read in all the extents */
5435 error = xfs_iread_extents(tp, ip, whichfork);
5436 if (error)
5437 return error;
5438 }
5439
5440 /*
5441 * If *current_ext is 0, we would need to lookup the extent
5442 * from where we would start shifting and store it in gotp.
5443 */
5444 if (!*current_ext) {
5445 gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext);
5446 /*
5447 * gotp can be null in 2 cases: 1) if there are no extents
5448 * or 2) start_fsb lies in a hole beyond which there are
5449 * no extents. Either way, we are done.
5450 */
5451 if (!gotp) {
5452 *done = 1;
5453 return 0;
5454 }
5455 }
5456
5457 /* We are going to change core inode */
5458 logflags = XFS_ILOG_CORE;
5459
5460 if (ifp->if_flags & XFS_IFBROOT) {
5461 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5462 cur->bc_private.b.firstblock = *firstblock;
5463 cur->bc_private.b.flist = flist;
5464 cur->bc_private.b.flags = 0;
5465 } else {
5466 cur = NULL;
5467 logflags |= XFS_ILOG_DEXT;
5468 }
5469
5470 while (nexts++ < num_exts &&
5471 *current_ext < XFS_IFORK_NEXTENTS(ip, whichfork)) {
5472
5473 gotp = xfs_iext_get_ext(ifp, *current_ext);
5474 xfs_bmbt_get_all(gotp, &got);
5475 startoff = got.br_startoff - offset_shift_fsb;
5476
5477 /*
5478 * Before shifting extent into hole, make sure that the hole
5479 * is large enough to accomodate the shift.
5480 */
5481 if (*current_ext) {
5482 xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
5483 *current_ext - 1), &left);
5484
5485 if (startoff < left.br_startoff + left.br_blockcount)
5486 error = XFS_ERROR(EINVAL);
5487 } else if (offset_shift_fsb > got.br_startoff) {
5488 /*
5489 * When first extent is shifted, offset_shift_fsb
5490 * should be less than the stating offset of
5491 * the first extent.
5492 */
5493 error = XFS_ERROR(EINVAL);
5494 }
5495
5496 if (error)
5497 goto del_cursor;
5498
5499 if (cur) {
5500 error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5501 got.br_startblock,
5502 got.br_blockcount,
5503 &i);
5504 if (error)
5505 goto del_cursor;
5506 XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
5507 }
5508
5509 /* Check if we can merge 2 adjacent extents */
5510 if (*current_ext &&
5511 left.br_startoff + left.br_blockcount == startoff &&
5512 left.br_startblock + left.br_blockcount ==
5513 got.br_startblock &&
5514 left.br_state == got.br_state &&
5515 left.br_blockcount + got.br_blockcount <= MAXEXTLEN) {
5516 blockcount = left.br_blockcount +
5517 got.br_blockcount;
5518 xfs_iext_remove(ip, *current_ext, 1, 0);
5519 if (cur) {
5520 error = xfs_btree_delete(cur, &i);
5521 if (error)
5522 goto del_cursor;
5523 XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
5524 }
5525 XFS_IFORK_NEXT_SET(ip, whichfork,
5526 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
5527 gotp = xfs_iext_get_ext(ifp, --*current_ext);
5528 xfs_bmbt_get_all(gotp, &got);
5529
5530 /* Make cursor point to the extent we will update */
5531 if (cur) {
5532 error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5533 got.br_startblock,
5534 got.br_blockcount,
5535 &i);
5536 if (error)
5537 goto del_cursor;
5538 XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
5539 }
5540
5541 xfs_bmbt_set_blockcount(gotp, blockcount);
5542 got.br_blockcount = blockcount;
5543 } else {
5544 /* We have to update the startoff */
5545 xfs_bmbt_set_startoff(gotp, startoff);
5546 got.br_startoff = startoff;
5547 }
5548
5549 if (cur) {
5550 error = xfs_bmbt_update(cur, got.br_startoff,
5551 got.br_startblock,
5552 got.br_blockcount,
5553 got.br_state);
5554 if (error)
5555 goto del_cursor;
5556 }
5557
5558 (*current_ext)++;
5559 }
5560
5561 /* Check if we are done */
5562 if (*current_ext == XFS_IFORK_NEXTENTS(ip, whichfork))
5563 *done = 1;
5564
5565del_cursor:
5566 if (cur)
5567 xfs_btree_del_cursor(cur,
5568 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5569
5570 xfs_trans_log_inode(tp, ip, logflags);
5571
5572 return error;
5573}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 33b41f351225..f84bd7af43be 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -127,6 +127,16 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
127 { BMAP_RIGHT_FILLING, "RF" }, \ 127 { BMAP_RIGHT_FILLING, "RF" }, \
128 { BMAP_ATTRFORK, "ATTR" } 128 { BMAP_ATTRFORK, "ATTR" }
129 129
130
131/*
132 * This macro is used to determine how many extents will be shifted
133 * in one write transaction. We could require two splits,
134 * an extent move on the first and an extent merge on the second,
135 * So it is proper that one extent is shifted inside write transaction
136 * at a time.
137 */
138#define XFS_BMAP_MAX_SHIFT_EXTENTS 1
139
130#ifdef DEBUG 140#ifdef DEBUG
131void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, 141void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
132 int whichfork, unsigned long caller_ip); 142 int whichfork, unsigned long caller_ip);
@@ -169,5 +179,10 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
169int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, 179int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
170 xfs_extnum_t num); 180 xfs_extnum_t num);
171uint xfs_default_attroffset(struct xfs_inode *ip); 181uint xfs_default_attroffset(struct xfs_inode *ip);
182int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
183 int *done, xfs_fileoff_t start_fsb,
184 xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext,
185 xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist,
186 int num_exts);
172 187
173#endif /* __XFS_BMAP_H__ */ 188#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index f264616080ca..01f6a646caa1 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1349,7 +1349,6 @@ xfs_free_file_space(
1349 * the freeing of the space succeeds at ENOSPC. 1349 * the freeing of the space succeeds at ENOSPC.
1350 */ 1350 */
1351 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 1351 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
1352 tp->t_flags |= XFS_TRANS_RESERVE;
1353 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); 1352 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
1354 1353
1355 /* 1354 /*
@@ -1468,6 +1467,102 @@ out:
1468} 1467}
1469 1468
1470/* 1469/*
1470 * xfs_collapse_file_space()
1471 * This routine frees disk space and shift extent for the given file.
1472 * The first thing we do is to free data blocks in the specified range
1473 * by calling xfs_free_file_space(). It would also sync dirty data
1474 * and invalidate page cache over the region on which collapse range
1475 * is working. And Shift extent records to the left to cover a hole.
1476 * RETURNS:
1477 * 0 on success
1478 * errno on error
1479 *
1480 */
1481int
1482xfs_collapse_file_space(
1483 struct xfs_inode *ip,
1484 xfs_off_t offset,
1485 xfs_off_t len)
1486{
1487 int done = 0;
1488 struct xfs_mount *mp = ip->i_mount;
1489 struct xfs_trans *tp;
1490 int error;
1491 xfs_extnum_t current_ext = 0;
1492 struct xfs_bmap_free free_list;
1493 xfs_fsblock_t first_block;
1494 int committed;
1495 xfs_fileoff_t start_fsb;
1496 xfs_fileoff_t shift_fsb;
1497
1498 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1499
1500 trace_xfs_collapse_file_space(ip);
1501
1502 start_fsb = XFS_B_TO_FSB(mp, offset + len);
1503 shift_fsb = XFS_B_TO_FSB(mp, len);
1504
1505 error = xfs_free_file_space(ip, offset, len);
1506 if (error)
1507 return error;
1508
1509 while (!error && !done) {
1510 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
1511 tp->t_flags |= XFS_TRANS_RESERVE;
1512 /*
1513 * We would need to reserve permanent block for transaction.
1514 * This will come into picture when after shifting extent into
1515 * hole we found that adjacent extents can be merged which
1516 * may lead to freeing of a block during record update.
1517 */
1518 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
1519 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
1520 if (error) {
1521 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
1522 xfs_trans_cancel(tp, 0);
1523 break;
1524 }
1525
1526 xfs_ilock(ip, XFS_ILOCK_EXCL);
1527 error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
1528 ip->i_gdquot, ip->i_pdquot,
1529 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
1530 XFS_QMOPT_RES_REGBLKS);
1531 if (error)
1532 goto out;
1533
1534 xfs_trans_ijoin(tp, ip, 0);
1535
1536 xfs_bmap_init(&free_list, &first_block);
1537
1538 /*
1539 * We are using the write transaction in which max 2 bmbt
1540 * updates are allowed
1541 */
1542 error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb,
1543 shift_fsb, &current_ext,
1544 &first_block, &free_list,
1545 XFS_BMAP_MAX_SHIFT_EXTENTS);
1546 if (error)
1547 goto out;
1548
1549 error = xfs_bmap_finish(&tp, &free_list, &committed);
1550 if (error)
1551 goto out;
1552
1553 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1554 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1555 }
1556
1557 return error;
1558
1559out:
1560 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
1561 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1562 return error;
1563}
1564
1565/*
1471 * We need to check that the format of the data fork in the temporary inode is 1566 * We need to check that the format of the data fork in the temporary inode is
1472 * valid for the target inode before doing the swap. This is not a problem with 1567 * valid for the target inode before doing the swap. This is not a problem with
1473 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized 1568 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 900747b25772..935ed2b24edf 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -99,6 +99,8 @@ int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
99 xfs_off_t len); 99 xfs_off_t len);
100int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset, 100int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
101 xfs_off_t len); 101 xfs_off_t len);
102int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
103 xfs_off_t len);
102 104
103/* EOF block manipulation functions */ 105/* EOF block manipulation functions */
104bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); 106bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 2e7989e3a2d6..52f96e16694c 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -823,7 +823,8 @@ xfs_file_fallocate(
823 823
824 if (!S_ISREG(inode->i_mode)) 824 if (!S_ISREG(inode->i_mode))
825 return -EINVAL; 825 return -EINVAL;
826 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 826 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
827 FALLOC_FL_COLLAPSE_RANGE))
827 return -EOPNOTSUPP; 828 return -EOPNOTSUPP;
828 829
829 xfs_ilock(ip, XFS_IOLOCK_EXCL); 830 xfs_ilock(ip, XFS_IOLOCK_EXCL);
@@ -831,6 +832,20 @@ xfs_file_fallocate(
831 error = xfs_free_file_space(ip, offset, len); 832 error = xfs_free_file_space(ip, offset, len);
832 if (error) 833 if (error)
833 goto out_unlock; 834 goto out_unlock;
835 } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
836 unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
837
838 if (offset & blksize_mask || len & blksize_mask) {
839 error = -EINVAL;
840 goto out_unlock;
841 }
842
843 ASSERT(offset + len < i_size_read(inode));
844 new_size = i_size_read(inode) - len;
845
846 error = xfs_collapse_file_space(ip, offset, len);
847 if (error)
848 goto out_unlock;
834 } else { 849 } else {
835 if (!(mode & FALLOC_FL_KEEP_SIZE) && 850 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
836 offset + len > i_size_read(inode)) { 851 offset + len > i_size_read(inode)) {
@@ -859,7 +874,7 @@ xfs_file_fallocate(
859 if (ip->i_d.di_mode & S_IXGRP) 874 if (ip->i_d.di_mode & S_IXGRP)
860 ip->i_d.di_mode &= ~S_ISGID; 875 ip->i_d.di_mode &= ~S_ISGID;
861 876
862 if (!(mode & FALLOC_FL_PUNCH_HOLE)) 877 if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)))
863 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 878 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
864 879
865 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 880 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 425dfa45b9a0..a4ae41c179a8 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -603,6 +603,7 @@ DEFINE_INODE_EVENT(xfs_readlink);
603DEFINE_INODE_EVENT(xfs_inactive_symlink); 603DEFINE_INODE_EVENT(xfs_inactive_symlink);
604DEFINE_INODE_EVENT(xfs_alloc_file_space); 604DEFINE_INODE_EVENT(xfs_alloc_file_space);
605DEFINE_INODE_EVENT(xfs_free_file_space); 605DEFINE_INODE_EVENT(xfs_free_file_space);
606DEFINE_INODE_EVENT(xfs_collapse_file_space);
606DEFINE_INODE_EVENT(xfs_readdir); 607DEFINE_INODE_EVENT(xfs_readdir);
607#ifdef CONFIG_XFS_POSIX_ACL 608#ifdef CONFIG_XFS_POSIX_ACL
608DEFINE_INODE_EVENT(xfs_get_acl); 609DEFINE_INODE_EVENT(xfs_get_acl);