aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorNamjae Jeon <namjae.jeon@samsung.com>2015-03-25 00:08:56 -0400
committerDave Chinner <david@fromorbit.com>2015-03-25 00:08:56 -0400
commita904b1ca5751faf5ece8600e18cd3b674afcca1b (patch)
tree2b757182d3b07c4df47b45571f579cb93af8bc9b /fs/xfs
parentdd46c787788d5bf5b974729d43e4c405814a4c7d (diff)
xfs: Add support FALLOC_FL_INSERT_RANGE for fallocate
This patch implements fallocate's FALLOC_FL_INSERT_RANGE for XFS. 1) Make sure that both offset and len are block size aligned. 2) Update the i_size of inode by len bytes. 3) Compute the file's logical block number against offset. If the computed block number is not the starting block of the extent, split the extent such that the block number is the starting block of the extent. 4) Shift all the extents which are lying bewteen [offset, last allocated extent] towards right by len bytes. This step will make a hole of len bytes at offset. Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com> Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c356
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h13
-rw-r--r--fs/xfs/xfs_bmap_util.c131
-rw-r--r--fs/xfs/xfs_bmap_util.h2
-rw-r--r--fs/xfs/xfs_file.c41
-rw-r--r--fs/xfs/xfs_trace.h1
6 files changed, 461 insertions, 83 deletions
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 60cfa90163b8..e8696f5a8041 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5526,52 +5526,92 @@ xfs_bmse_shift_one(
5526 int *current_ext, 5526 int *current_ext,
5527 struct xfs_bmbt_rec_host *gotp, 5527 struct xfs_bmbt_rec_host *gotp,
5528 struct xfs_btree_cur *cur, 5528 struct xfs_btree_cur *cur,
5529 int *logflags) 5529 int *logflags,
5530 enum shift_direction direction)
5530{ 5531{
5531 struct xfs_ifork *ifp; 5532 struct xfs_ifork *ifp;
5532 struct xfs_mount *mp; 5533 struct xfs_mount *mp;
5533 xfs_fileoff_t startoff; 5534 xfs_fileoff_t startoff;
5534 struct xfs_bmbt_rec_host *leftp; 5535 struct xfs_bmbt_rec_host *adj_irecp;
5535 struct xfs_bmbt_irec got; 5536 struct xfs_bmbt_irec got;
5536 struct xfs_bmbt_irec left; 5537 struct xfs_bmbt_irec adj_irec;
5537 int error; 5538 int error;
5538 int i; 5539 int i;
5540 int total_extents;
5539 5541
5540 mp = ip->i_mount; 5542 mp = ip->i_mount;
5541 ifp = XFS_IFORK_PTR(ip, whichfork); 5543 ifp = XFS_IFORK_PTR(ip, whichfork);
5544 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5542 5545
5543 xfs_bmbt_get_all(gotp, &got); 5546 xfs_bmbt_get_all(gotp, &got);
5544 startoff = got.br_startoff - offset_shift_fsb;
5545 5547
5546 /* delalloc extents should be prevented by caller */ 5548 /* delalloc extents should be prevented by caller */
5547 XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock)); 5549 XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
5548 5550
5549 /* 5551 if (direction == SHIFT_LEFT) {
5550 * Check for merge if we've got an extent to the left, otherwise make 5552 startoff = got.br_startoff - offset_shift_fsb;
5551 * sure there's enough room at the start of the file for the shift. 5553
5552 */ 5554 /*
5553 if (*current_ext) { 5555 * Check for merge if we've got an extent to the left,
5554 /* grab the left extent and check for a large enough hole */ 5556 * otherwise make sure there's enough room at the start
5555 leftp = xfs_iext_get_ext(ifp, *current_ext - 1); 5557 * of the file for the shift.
5556 xfs_bmbt_get_all(leftp, &left); 5558 */
5559 if (!*current_ext) {
5560 if (got.br_startoff < offset_shift_fsb)
5561 return -EINVAL;
5562 goto update_current_ext;
5563 }
5564 /*
5565 * grab the left extent and check for a large
5566 * enough hole.
5567 */
5568 adj_irecp = xfs_iext_get_ext(ifp, *current_ext - 1);
5569 xfs_bmbt_get_all(adj_irecp, &adj_irec);
5557 5570
5558 if (startoff < left.br_startoff + left.br_blockcount) 5571 if (startoff <
5572 adj_irec.br_startoff + adj_irec.br_blockcount)
5559 return -EINVAL; 5573 return -EINVAL;
5560 5574
5561 /* check whether to merge the extent or shift it down */ 5575 /* check whether to merge the extent or shift it down */
5562 if (xfs_bmse_can_merge(&left, &got, offset_shift_fsb)) { 5576 if (xfs_bmse_can_merge(&adj_irec, &got,
5577 offset_shift_fsb)) {
5563 return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, 5578 return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
5564 *current_ext, gotp, leftp, cur, 5579 *current_ext, gotp, adj_irecp,
5565 logflags); 5580 cur, logflags);
5566 } 5581 }
5567 } else if (got.br_startoff < offset_shift_fsb) 5582 } else {
5568 return -EINVAL; 5583 startoff = got.br_startoff + offset_shift_fsb;
5569 5584 /* nothing to move if this is the last extent */
5585 if (*current_ext >= (total_extents - 1))
5586 goto update_current_ext;
5587 /*
5588 * If this is not the last extent in the file, make sure there
5589 * is enough room between current extent and next extent for
5590 * accommodating the shift.
5591 */
5592 adj_irecp = xfs_iext_get_ext(ifp, *current_ext + 1);
5593 xfs_bmbt_get_all(adj_irecp, &adj_irec);
5594 if (startoff + got.br_blockcount > adj_irec.br_startoff)
5595 return -EINVAL;
5596 /*
5597 * Unlike a left shift (which involves a hole punch),
5598 * a right shift does not modify extent neighbors
5599 * in any way. We should never find mergeable extents
5600 * in this scenario. Check anyways and warn if we
5601 * encounter two extents that could be one.
5602 */
5603 if (xfs_bmse_can_merge(&got, &adj_irec, offset_shift_fsb))
5604 WARN_ON_ONCE(1);
5605 }
5570 /* 5606 /*
5571 * Increment the extent index for the next iteration, update the start 5607 * Increment the extent index for the next iteration, update the start
5572 * offset of the in-core extent and update the btree if applicable. 5608 * offset of the in-core extent and update the btree if applicable.
5573 */ 5609 */
5574 (*current_ext)++; 5610update_current_ext:
5611 if (direction == SHIFT_LEFT)
5612 (*current_ext)++;
5613 else
5614 (*current_ext)--;
5575 xfs_bmbt_set_startoff(gotp, startoff); 5615 xfs_bmbt_set_startoff(gotp, startoff);
5576 *logflags |= XFS_ILOG_CORE; 5616 *logflags |= XFS_ILOG_CORE;
5577 if (!cur) { 5617 if (!cur) {
@@ -5587,14 +5627,14 @@ xfs_bmse_shift_one(
5587 5627
5588 got.br_startoff = startoff; 5628 got.br_startoff = startoff;
5589 return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, 5629 return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
5590 got.br_blockcount, got.br_state); 5630 got.br_blockcount, got.br_state);
5591} 5631}
5592 5632
5593/* 5633/*
5594 * Shift extent records to the left to cover a hole. 5634 * Shift extent records to the left/right to cover/create a hole.
5595 * 5635 *
5596 * The maximum number of extents to be shifted in a single operation is 5636 * The maximum number of extents to be shifted in a single operation is
5597 * @num_exts. @start_fsb specifies the file offset to start the shift and the 5637 * @num_exts. @stop_fsb specifies the file offset at which to stop shift and the
5598 * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb 5638 * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb
5599 * is the length by which each extent is shifted. If there is no hole to shift 5639 * is the length by which each extent is shifted. If there is no hole to shift
5600 * the extents into, this will be considered invalid operation and we abort 5640 * the extents into, this will be considered invalid operation and we abort
@@ -5604,12 +5644,13 @@ int
5604xfs_bmap_shift_extents( 5644xfs_bmap_shift_extents(
5605 struct xfs_trans *tp, 5645 struct xfs_trans *tp,
5606 struct xfs_inode *ip, 5646 struct xfs_inode *ip,
5607 xfs_fileoff_t start_fsb, 5647 xfs_fileoff_t *next_fsb,
5608 xfs_fileoff_t offset_shift_fsb, 5648 xfs_fileoff_t offset_shift_fsb,
5609 int *done, 5649 int *done,
5610 xfs_fileoff_t *next_fsb, 5650 xfs_fileoff_t stop_fsb,
5611 xfs_fsblock_t *firstblock, 5651 xfs_fsblock_t *firstblock,
5612 struct xfs_bmap_free *flist, 5652 struct xfs_bmap_free *flist,
5653 enum shift_direction direction,
5613 int num_exts) 5654 int num_exts)
5614{ 5655{
5615 struct xfs_btree_cur *cur = NULL; 5656 struct xfs_btree_cur *cur = NULL;
@@ -5619,10 +5660,11 @@ xfs_bmap_shift_extents(
5619 struct xfs_ifork *ifp; 5660 struct xfs_ifork *ifp;
5620 xfs_extnum_t nexts = 0; 5661 xfs_extnum_t nexts = 0;
5621 xfs_extnum_t current_ext; 5662 xfs_extnum_t current_ext;
5663 xfs_extnum_t total_extents;
5664 xfs_extnum_t stop_extent;
5622 int error = 0; 5665 int error = 0;
5623 int whichfork = XFS_DATA_FORK; 5666 int whichfork = XFS_DATA_FORK;
5624 int logflags = 0; 5667 int logflags = 0;
5625 int total_extents;
5626 5668
5627 if (unlikely(XFS_TEST_ERROR( 5669 if (unlikely(XFS_TEST_ERROR(
5628 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 5670 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
@@ -5638,6 +5680,8 @@ xfs_bmap_shift_extents(
5638 5680
5639 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 5681 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5640 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 5682 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5683 ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
5684 ASSERT(*next_fsb != NULLFSBLOCK || direction == SHIFT_RIGHT);
5641 5685
5642 ifp = XFS_IFORK_PTR(ip, whichfork); 5686 ifp = XFS_IFORK_PTR(ip, whichfork);
5643 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 5687 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
@@ -5655,43 +5699,83 @@ xfs_bmap_shift_extents(
5655 } 5699 }
5656 5700
5657 /* 5701 /*
5702 * There may be delalloc extents in the data fork before the range we
5703 * are collapsing out, so we cannot use the count of real extents here.
5704 * Instead we have to calculate it from the incore fork.
5705 */
5706 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5707 if (total_extents == 0) {
5708 *done = 1;
5709 goto del_cursor;
5710 }
5711
5712 /*
5713 * In case of first right shift, we need to initialize next_fsb
5714 */
5715 if (*next_fsb == NULLFSBLOCK) {
5716 gotp = xfs_iext_get_ext(ifp, total_extents - 1);
5717 xfs_bmbt_get_all(gotp, &got);
5718 *next_fsb = got.br_startoff;
5719 if (stop_fsb > *next_fsb) {
5720 *done = 1;
5721 goto del_cursor;
5722 }
5723 }
5724
5725 /* Lookup the extent index at which we have to stop */
5726 if (direction == SHIFT_RIGHT) {
5727 gotp = xfs_iext_bno_to_ext(ifp, stop_fsb, &stop_extent);
5728 /* Make stop_extent exclusive of shift range */
5729 stop_extent--;
5730 } else
5731 stop_extent = total_extents;
5732
5733 /*
5658 * Look up the extent index for the fsb where we start shifting. We can 5734 * Look up the extent index for the fsb where we start shifting. We can
5659 * henceforth iterate with current_ext as extent list changes are locked 5735 * henceforth iterate with current_ext as extent list changes are locked
5660 * out via ilock. 5736 * out via ilock.
5661 * 5737 *
5662 * gotp can be null in 2 cases: 1) if there are no extents or 2) 5738 * gotp can be null in 2 cases: 1) if there are no extents or 2)
5663 * start_fsb lies in a hole beyond which there are no extents. Either 5739 * *next_fsb lies in a hole beyond which there are no extents. Either
5664 * way, we are done. 5740 * way, we are done.
5665 */ 5741 */
5666 gotp = xfs_iext_bno_to_ext(ifp, start_fsb, &current_ext); 5742 gotp = xfs_iext_bno_to_ext(ifp, *next_fsb, &current_ext);
5667 if (!gotp) { 5743 if (!gotp) {
5668 *done = 1; 5744 *done = 1;
5669 goto del_cursor; 5745 goto del_cursor;
5670 } 5746 }
5671 5747
5672 /* 5748 /* some sanity checking before we finally start shifting extents */
5673 * There may be delalloc extents in the data fork before the range we 5749 if ((direction == SHIFT_LEFT && current_ext >= stop_extent) ||
5674 * are collapsing out, so we cannot use the count of real extents here. 5750 (direction == SHIFT_RIGHT && current_ext <= stop_extent)) {
5675 * Instead we have to calculate it from the incore fork. 5751 error = -EIO;
5676 */ 5752 goto del_cursor;
5677 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); 5753 }
5678 while (nexts++ < num_exts && current_ext < total_extents) { 5754
5755 while (nexts++ < num_exts) {
5679 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb, 5756 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
5680 &current_ext, gotp, cur, &logflags); 5757 &current_ext, gotp, cur, &logflags,
5758 direction);
5681 if (error) 5759 if (error)
5682 goto del_cursor; 5760 goto del_cursor;
5761 /*
5762 * If there was an extent merge during the shift, the extent
5763 * count can change. Update the total and grade the next record.
5764 */
5765 if (direction == SHIFT_LEFT) {
5766 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5767 stop_extent = total_extents;
5768 }
5683 5769
5684 /* update total extent count and grab the next record */ 5770 if (current_ext == stop_extent) {
5685 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); 5771 *done = 1;
5686 if (current_ext >= total_extents) 5772 *next_fsb = NULLFSBLOCK;
5687 break; 5773 break;
5774 }
5688 gotp = xfs_iext_get_ext(ifp, current_ext); 5775 gotp = xfs_iext_get_ext(ifp, current_ext);
5689 } 5776 }
5690 5777
5691 /* Check if we are done */ 5778 if (!*done) {
5692 if (current_ext == total_extents) {
5693 *done = 1;
5694 } else if (next_fsb) {
5695 xfs_bmbt_get_all(gotp, &got); 5779 xfs_bmbt_get_all(gotp, &got);
5696 *next_fsb = got.br_startoff; 5780 *next_fsb = got.br_startoff;
5697 } 5781 }
@@ -5706,3 +5790,189 @@ del_cursor:
5706 5790
5707 return error; 5791 return error;
5708} 5792}
5793
5794/*
5795 * Splits an extent into two extents at split_fsb block such that it is
5796 * the first block of the current_ext. @current_ext is a target extent
5797 * to be split. @split_fsb is a block where the extents is split.
5798 * If split_fsb lies in a hole or the first block of extents, just return 0.
5799 */
5800STATIC int
5801xfs_bmap_split_extent_at(
5802 struct xfs_trans *tp,
5803 struct xfs_inode *ip,
5804 xfs_fileoff_t split_fsb,
5805 xfs_fsblock_t *firstfsb,
5806 struct xfs_bmap_free *free_list)
5807{
5808 int whichfork = XFS_DATA_FORK;
5809 struct xfs_btree_cur *cur = NULL;
5810 struct xfs_bmbt_rec_host *gotp;
5811 struct xfs_bmbt_irec got;
5812 struct xfs_bmbt_irec new; /* split extent */
5813 struct xfs_mount *mp = ip->i_mount;
5814 struct xfs_ifork *ifp;
5815 xfs_fsblock_t gotblkcnt; /* new block count for got */
5816 xfs_extnum_t current_ext;
5817 int error = 0;
5818 int logflags = 0;
5819 int i = 0;
5820
5821 if (unlikely(XFS_TEST_ERROR(
5822 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5823 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5824 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
5825 XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
5826 XFS_ERRLEVEL_LOW, mp);
5827 return -EFSCORRUPTED;
5828 }
5829
5830 if (XFS_FORCED_SHUTDOWN(mp))
5831 return -EIO;
5832
5833 ifp = XFS_IFORK_PTR(ip, whichfork);
5834 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5835 /* Read in all the extents */
5836 error = xfs_iread_extents(tp, ip, whichfork);
5837 if (error)
5838 return error;
5839 }
5840
5841 /*
5842 * gotp can be null in 2 cases: 1) if there are no extents
5843 * or 2) split_fsb lies in a hole beyond which there are
5844 * no extents. Either way, we are done.
5845 */
5846 gotp = xfs_iext_bno_to_ext(ifp, split_fsb, &current_ext);
5847 if (!gotp)
5848 return 0;
5849
5850 xfs_bmbt_get_all(gotp, &got);
5851
5852 /*
5853 * Check split_fsb lies in a hole or the start boundary offset
5854 * of the extent.
5855 */
5856 if (got.br_startoff >= split_fsb)
5857 return 0;
5858
5859 gotblkcnt = split_fsb - got.br_startoff;
5860 new.br_startoff = split_fsb;
5861 new.br_startblock = got.br_startblock + gotblkcnt;
5862 new.br_blockcount = got.br_blockcount - gotblkcnt;
5863 new.br_state = got.br_state;
5864
5865 if (ifp->if_flags & XFS_IFBROOT) {
5866 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5867 cur->bc_private.b.firstblock = *firstfsb;
5868 cur->bc_private.b.flist = free_list;
5869 cur->bc_private.b.flags = 0;
5870 error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5871 got.br_startblock,
5872 got.br_blockcount,
5873 &i);
5874 if (error)
5875 goto del_cursor;
5876 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5877 }
5878
5879 xfs_bmbt_set_blockcount(gotp, gotblkcnt);
5880 got.br_blockcount = gotblkcnt;
5881
5882 logflags = XFS_ILOG_CORE;
5883 if (cur) {
5884 error = xfs_bmbt_update(cur, got.br_startoff,
5885 got.br_startblock,
5886 got.br_blockcount,
5887 got.br_state);
5888 if (error)
5889 goto del_cursor;
5890 } else
5891 logflags |= XFS_ILOG_DEXT;
5892
5893 /* Add new extent */
5894 current_ext++;
5895 xfs_iext_insert(ip, current_ext, 1, &new, 0);
5896 XFS_IFORK_NEXT_SET(ip, whichfork,
5897 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
5898
5899 if (cur) {
5900 error = xfs_bmbt_lookup_eq(cur, new.br_startoff,
5901 new.br_startblock, new.br_blockcount,
5902 &i);
5903 if (error)
5904 goto del_cursor;
5905 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor);
5906 cur->bc_rec.b.br_state = new.br_state;
5907
5908 error = xfs_btree_insert(cur, &i);
5909 if (error)
5910 goto del_cursor;
5911 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5912 }
5913
5914 /*
5915 * Convert to a btree if necessary.
5916 */
5917 if (xfs_bmap_needs_btree(ip, whichfork)) {
5918 int tmp_logflags; /* partial log flag return val */
5919
5920 ASSERT(cur == NULL);
5921 error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, free_list,
5922 &cur, 0, &tmp_logflags, whichfork);
5923 logflags |= tmp_logflags;
5924 }
5925
5926del_cursor:
5927 if (cur) {
5928 cur->bc_private.b.allocated = 0;
5929 xfs_btree_del_cursor(cur,
5930 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5931 }
5932
5933 if (logflags)
5934 xfs_trans_log_inode(tp, ip, logflags);
5935 return error;
5936}
5937
5938int
5939xfs_bmap_split_extent(
5940 struct xfs_inode *ip,
5941 xfs_fileoff_t split_fsb)
5942{
5943 struct xfs_mount *mp = ip->i_mount;
5944 struct xfs_trans *tp;
5945 struct xfs_bmap_free free_list;
5946 xfs_fsblock_t firstfsb;
5947 int committed;
5948 int error;
5949
5950 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
5951 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
5952 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
5953 if (error) {
5954 xfs_trans_cancel(tp, 0);
5955 return error;
5956 }
5957
5958 xfs_ilock(ip, XFS_ILOCK_EXCL);
5959 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
5960
5961 xfs_bmap_init(&free_list, &firstfsb);
5962
5963 error = xfs_bmap_split_extent_at(tp, ip, split_fsb,
5964 &firstfsb, &free_list);
5965 if (error)
5966 goto out;
5967
5968 error = xfs_bmap_finish(&tp, &free_list, &committed);
5969 if (error)
5970 goto out;
5971
5972 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
5973
5974
5975out:
5976 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
5977 return error;
5978}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index b9d8a499d2c4..6aaa0c1c7200 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -166,6 +166,11 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
166 */ 166 */
167#define XFS_BMAP_MAX_SHIFT_EXTENTS 1 167#define XFS_BMAP_MAX_SHIFT_EXTENTS 1
168 168
169enum shift_direction {
170 SHIFT_LEFT = 0,
171 SHIFT_RIGHT,
172};
173
169#ifdef DEBUG 174#ifdef DEBUG
170void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, 175void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
171 int whichfork, unsigned long caller_ip); 176 int whichfork, unsigned long caller_ip);
@@ -211,8 +216,10 @@ int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
211 xfs_extnum_t num); 216 xfs_extnum_t num);
212uint xfs_default_attroffset(struct xfs_inode *ip); 217uint xfs_default_attroffset(struct xfs_inode *ip);
213int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, 218int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
214 xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb, 219 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
215 int *done, xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock, 220 int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
216 struct xfs_bmap_free *flist, int num_exts); 221 struct xfs_bmap_free *flist, enum shift_direction direction,
222 int num_exts);
223int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
217 224
218#endif /* __XFS_BMAP_H__ */ 225#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 22a5dcb70b32..fe1f11b96d0d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1376,22 +1376,19 @@ out:
1376} 1376}
1377 1377
1378/* 1378/*
1379 * xfs_collapse_file_space() 1379 * @next_fsb will keep track of the extent currently undergoing shift.
1380 * This routine frees disk space and shift extent for the given file. 1380 * @stop_fsb will keep track of the extent at which we have to stop.
1381 * The first thing we do is to free data blocks in the specified range 1381 * If we are shifting left, we will start with block (offset + len) and
1382 * by calling xfs_free_file_space(). It would also sync dirty data 1382 * shift each extent till last extent.
1383 * and invalidate page cache over the region on which collapse range 1383 * If we are shifting right, we will start with last extent inside file space
1384 * is working. And Shift extent records to the left to cover a hole. 1384 * and continue until we reach the block corresponding to offset.
1385 * RETURNS:
1386 * 0 on success
1387 * errno on error
1388 *
1389 */ 1385 */
1390int 1386int
1391xfs_collapse_file_space( 1387xfs_shift_file_space(
1392 struct xfs_inode *ip, 1388 struct xfs_inode *ip,
1393 xfs_off_t offset, 1389 xfs_off_t offset,
1394 xfs_off_t len) 1390 xfs_off_t len,
1391 enum shift_direction direction)
1395{ 1392{
1396 int done = 0; 1393 int done = 0;
1397 struct xfs_mount *mp = ip->i_mount; 1394 struct xfs_mount *mp = ip->i_mount;
@@ -1400,21 +1397,26 @@ xfs_collapse_file_space(
1400 struct xfs_bmap_free free_list; 1397 struct xfs_bmap_free free_list;
1401 xfs_fsblock_t first_block; 1398 xfs_fsblock_t first_block;
1402 int committed; 1399 int committed;
1403 xfs_fileoff_t start_fsb; 1400 xfs_fileoff_t stop_fsb;
1404 xfs_fileoff_t next_fsb; 1401 xfs_fileoff_t next_fsb;
1405 xfs_fileoff_t shift_fsb; 1402 xfs_fileoff_t shift_fsb;
1406 1403
1407 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1404 ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
1408 1405
1409 trace_xfs_collapse_file_space(ip); 1406 if (direction == SHIFT_LEFT) {
1407 next_fsb = XFS_B_TO_FSB(mp, offset + len);
1408 stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
1409 } else {
1410 /*
1411 * If right shift, delegate the work of initialization of
1412 * next_fsb to xfs_bmap_shift_extent as it has ilock held.
1413 */
1414 next_fsb = NULLFSBLOCK;
1415 stop_fsb = XFS_B_TO_FSB(mp, offset);
1416 }
1410 1417
1411 next_fsb = XFS_B_TO_FSB(mp, offset + len);
1412 shift_fsb = XFS_B_TO_FSB(mp, len); 1418 shift_fsb = XFS_B_TO_FSB(mp, len);
1413 1419
1414 error = xfs_free_file_space(ip, offset, len);
1415 if (error)
1416 return error;
1417
1418 /* 1420 /*
1419 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation 1421 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation
1420 * into the accessible region of the file. 1422 * into the accessible region of the file.
@@ -1427,20 +1429,28 @@ xfs_collapse_file_space(
1427 1429
1428 /* 1430 /*
1429 * Writeback and invalidate cache for the remainder of the file as we're 1431 * Writeback and invalidate cache for the remainder of the file as we're
1430 * about to shift down every extent from the collapse range to EOF. The 1432 * about to shift down every extent from offset to EOF.
1431 * free of the collapse range above might have already done some of
1432 * this, but we shouldn't rely on it to do anything outside of the range
1433 * that was freed.
1434 */ 1433 */
1435 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 1434 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
1436 offset + len, -1); 1435 offset, -1);
1437 if (error) 1436 if (error)
1438 return error; 1437 return error;
1439 error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, 1438 error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
1440 (offset + len) >> PAGE_CACHE_SHIFT, -1); 1439 offset >> PAGE_CACHE_SHIFT, -1);
1441 if (error) 1440 if (error)
1442 return error; 1441 return error;
1443 1442
1443 /*
1444 * The extent shiting code works on extent granularity. So, if
1445 * stop_fsb is not the starting block of extent, we need to split
1446 * the extent at stop_fsb.
1447 */
1448 if (direction == SHIFT_RIGHT) {
1449 error = xfs_bmap_split_extent(ip, stop_fsb);
1450 if (error)
1451 return error;
1452 }
1453
1444 while (!error && !done) { 1454 while (!error && !done) {
1445 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 1455 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
1446 /* 1456 /*
@@ -1464,7 +1474,7 @@ xfs_collapse_file_space(
1464 if (error) 1474 if (error)
1465 goto out; 1475 goto out;
1466 1476
1467 xfs_trans_ijoin(tp, ip, 0); 1477 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1468 1478
1469 xfs_bmap_init(&free_list, &first_block); 1479 xfs_bmap_init(&free_list, &first_block);
1470 1480
@@ -1472,10 +1482,9 @@ xfs_collapse_file_space(
1472 * We are using the write transaction in which max 2 bmbt 1482 * We are using the write transaction in which max 2 bmbt
1473 * updates are allowed 1483 * updates are allowed
1474 */ 1484 */
1475 start_fsb = next_fsb; 1485 error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb,
1476 error = xfs_bmap_shift_extents(tp, ip, start_fsb, shift_fsb, 1486 &done, stop_fsb, &first_block, &free_list,
1477 &done, &next_fsb, &first_block, &free_list, 1487 direction, XFS_BMAP_MAX_SHIFT_EXTENTS);
1478 XFS_BMAP_MAX_SHIFT_EXTENTS);
1479 if (error) 1488 if (error)
1480 goto out; 1489 goto out;
1481 1490
@@ -1484,18 +1493,70 @@ xfs_collapse_file_space(
1484 goto out; 1493 goto out;
1485 1494
1486 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1495 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1487 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1488 } 1496 }
1489 1497
1490 return error; 1498 return error;
1491 1499
1492out: 1500out:
1493 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1501 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
1494 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1495 return error; 1502 return error;
1496} 1503}
1497 1504
1498/* 1505/*
1506 * xfs_collapse_file_space()
1507 * This routine frees disk space and shift extent for the given file.
1508 * The first thing we do is to free data blocks in the specified range
1509 * by calling xfs_free_file_space(). It would also sync dirty data
1510 * and invalidate page cache over the region on which collapse range
1511 * is working. And Shift extent records to the left to cover a hole.
1512 * RETURNS:
1513 * 0 on success
1514 * errno on error
1515 *
1516 */
1517int
1518xfs_collapse_file_space(
1519 struct xfs_inode *ip,
1520 xfs_off_t offset,
1521 xfs_off_t len)
1522{
1523 int error;
1524
1525 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1526 trace_xfs_collapse_file_space(ip);
1527
1528 error = xfs_free_file_space(ip, offset, len);
1529 if (error)
1530 return error;
1531
1532 return xfs_shift_file_space(ip, offset, len, SHIFT_LEFT);
1533}
1534
1535/*
1536 * xfs_insert_file_space()
1537 * This routine create hole space by shifting extents for the given file.
1538 * The first thing we do is to sync dirty data and invalidate page cache
1539 * over the region on which insert range is working. And split an extent
1540 * to two extents at given offset by calling xfs_bmap_split_extent.
1541 * And shift all extent records which are laying between [offset,
1542 * last allocated extent] to the right to reserve hole range.
1543 * RETURNS:
1544 * 0 on success
1545 * errno on error
1546 */
1547int
1548xfs_insert_file_space(
1549 struct xfs_inode *ip,
1550 loff_t offset,
1551 loff_t len)
1552{
1553 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1554 trace_xfs_insert_file_space(ip);
1555
1556 return xfs_shift_file_space(ip, offset, len, SHIFT_RIGHT);
1557}
1558
1559/*
1499 * We need to check that the format of the data fork in the temporary inode is 1560 * We need to check that the format of the data fork in the temporary inode is
1500 * valid for the target inode before doing the swap. This is not a problem with 1561 * valid for the target inode before doing the swap. This is not a problem with
1501 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized 1562 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 736429a72a12..af97d9a1dfb4 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -63,6 +63,8 @@ int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
63 xfs_off_t len); 63 xfs_off_t len);
64int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset, 64int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
65 xfs_off_t len); 65 xfs_off_t len);
66int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
67 xfs_off_t len);
66 68
67/* EOF block manipulation functions */ 69/* EOF block manipulation functions */
68bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); 70bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ce615d12fb44..edeaccc7961a 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -816,6 +816,11 @@ xfs_file_write_iter(
816 return ret; 816 return ret;
817} 817}
818 818
819#define XFS_FALLOC_FL_SUPPORTED \
820 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
821 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
822 FALLOC_FL_INSERT_RANGE)
823
819STATIC long 824STATIC long
820xfs_file_fallocate( 825xfs_file_fallocate(
821 struct file *file, 826 struct file *file,
@@ -829,11 +834,11 @@ xfs_file_fallocate(
829 enum xfs_prealloc_flags flags = 0; 834 enum xfs_prealloc_flags flags = 0;
830 uint iolock = XFS_IOLOCK_EXCL; 835 uint iolock = XFS_IOLOCK_EXCL;
831 loff_t new_size = 0; 836 loff_t new_size = 0;
837 bool do_file_insert = 0;
832 838
833 if (!S_ISREG(inode->i_mode)) 839 if (!S_ISREG(inode->i_mode))
834 return -EINVAL; 840 return -EINVAL;
835 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 841 if (mode & ~XFS_FALLOC_FL_SUPPORTED)
836 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
837 return -EOPNOTSUPP; 842 return -EOPNOTSUPP;
838 843
839 xfs_ilock(ip, iolock); 844 xfs_ilock(ip, iolock);
@@ -867,6 +872,27 @@ xfs_file_fallocate(
867 error = xfs_collapse_file_space(ip, offset, len); 872 error = xfs_collapse_file_space(ip, offset, len);
868 if (error) 873 if (error)
869 goto out_unlock; 874 goto out_unlock;
875 } else if (mode & FALLOC_FL_INSERT_RANGE) {
876 unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
877
878 new_size = i_size_read(inode) + len;
879 if (offset & blksize_mask || len & blksize_mask) {
880 error = -EINVAL;
881 goto out_unlock;
882 }
883
884 /* check the new inode size does not wrap through zero */
885 if (new_size > inode->i_sb->s_maxbytes) {
886 error = -EFBIG;
887 goto out_unlock;
888 }
889
890 /* Offset should be less than i_size */
891 if (offset >= i_size_read(inode)) {
892 error = -EINVAL;
893 goto out_unlock;
894 }
895 do_file_insert = 1;
870 } else { 896 } else {
871 flags |= XFS_PREALLOC_SET; 897 flags |= XFS_PREALLOC_SET;
872 898
@@ -901,8 +927,19 @@ xfs_file_fallocate(
901 iattr.ia_valid = ATTR_SIZE; 927 iattr.ia_valid = ATTR_SIZE;
902 iattr.ia_size = new_size; 928 iattr.ia_size = new_size;
903 error = xfs_setattr_size(ip, &iattr); 929 error = xfs_setattr_size(ip, &iattr);
930 if (error)
931 goto out_unlock;
904 } 932 }
905 933
934 /*
935 * Perform hole insertion now that the file size has been
936 * updated so that if we crash during the operation we don't
937 * leave shifted extents past EOF and hence losing access to
938 * the data that is contained within them.
939 */
940 if (do_file_insert)
941 error = xfs_insert_file_space(ip, offset, len);
942
906out_unlock: 943out_unlock:
907 xfs_iunlock(ip, iolock); 944 xfs_iunlock(ip, iolock);
908 return error; 945 return error;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 51372e34d988..7e45fa155ea8 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -664,6 +664,7 @@ DEFINE_INODE_EVENT(xfs_alloc_file_space);
664DEFINE_INODE_EVENT(xfs_free_file_space); 664DEFINE_INODE_EVENT(xfs_free_file_space);
665DEFINE_INODE_EVENT(xfs_zero_file_space); 665DEFINE_INODE_EVENT(xfs_zero_file_space);
666DEFINE_INODE_EVENT(xfs_collapse_file_space); 666DEFINE_INODE_EVENT(xfs_collapse_file_space);
667DEFINE_INODE_EVENT(xfs_insert_file_space);
667DEFINE_INODE_EVENT(xfs_readdir); 668DEFINE_INODE_EVENT(xfs_readdir);
668#ifdef CONFIG_XFS_POSIX_ACL 669#ifdef CONFIG_XFS_POSIX_ACL
669DEFINE_INODE_EVENT(xfs_get_acl); 670DEFINE_INODE_EVENT(xfs_get_acl);