aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c356
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h13
-rw-r--r--fs/xfs/xfs_bmap_util.c131
-rw-r--r--fs/xfs/xfs_bmap_util.h2
-rw-r--r--fs/xfs/xfs_file.c41
-rw-r--r--fs/xfs/xfs_trace.h1
6 files changed, 461 insertions, 83 deletions
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 60cfa90163b8..e8696f5a8041 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5526,52 +5526,92 @@ xfs_bmse_shift_one(
5526 int *current_ext, 5526 int *current_ext,
5527 struct xfs_bmbt_rec_host *gotp, 5527 struct xfs_bmbt_rec_host *gotp,
5528 struct xfs_btree_cur *cur, 5528 struct xfs_btree_cur *cur,
5529 int *logflags) 5529 int *logflags,
5530 enum shift_direction direction)
5530{ 5531{
5531 struct xfs_ifork *ifp; 5532 struct xfs_ifork *ifp;
5532 struct xfs_mount *mp; 5533 struct xfs_mount *mp;
5533 xfs_fileoff_t startoff; 5534 xfs_fileoff_t startoff;
5534 struct xfs_bmbt_rec_host *leftp; 5535 struct xfs_bmbt_rec_host *adj_irecp;
5535 struct xfs_bmbt_irec got; 5536 struct xfs_bmbt_irec got;
5536 struct xfs_bmbt_irec left; 5537 struct xfs_bmbt_irec adj_irec;
5537 int error; 5538 int error;
5538 int i; 5539 int i;
5540 int total_extents;
5539 5541
5540 mp = ip->i_mount; 5542 mp = ip->i_mount;
5541 ifp = XFS_IFORK_PTR(ip, whichfork); 5543 ifp = XFS_IFORK_PTR(ip, whichfork);
5544 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5542 5545
5543 xfs_bmbt_get_all(gotp, &got); 5546 xfs_bmbt_get_all(gotp, &got);
5544 startoff = got.br_startoff - offset_shift_fsb;
5545 5547
5546 /* delalloc extents should be prevented by caller */ 5548 /* delalloc extents should be prevented by caller */
5547 XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock)); 5549 XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
5548 5550
5549 /* 5551 if (direction == SHIFT_LEFT) {
5550 * Check for merge if we've got an extent to the left, otherwise make 5552 startoff = got.br_startoff - offset_shift_fsb;
5551 * sure there's enough room at the start of the file for the shift. 5553
5552 */ 5554 /*
5553 if (*current_ext) { 5555 * Check for merge if we've got an extent to the left,
5554 /* grab the left extent and check for a large enough hole */ 5556 * otherwise make sure there's enough room at the start
5555 leftp = xfs_iext_get_ext(ifp, *current_ext - 1); 5557 * of the file for the shift.
5556 xfs_bmbt_get_all(leftp, &left); 5558 */
5559 if (!*current_ext) {
5560 if (got.br_startoff < offset_shift_fsb)
5561 return -EINVAL;
5562 goto update_current_ext;
5563 }
5564 /*
5565 * grab the left extent and check for a large
5566 * enough hole.
5567 */
5568 adj_irecp = xfs_iext_get_ext(ifp, *current_ext - 1);
5569 xfs_bmbt_get_all(adj_irecp, &adj_irec);
5557 5570
5558 if (startoff < left.br_startoff + left.br_blockcount) 5571 if (startoff <
5572 adj_irec.br_startoff + adj_irec.br_blockcount)
5559 return -EINVAL; 5573 return -EINVAL;
5560 5574
5561 /* check whether to merge the extent or shift it down */ 5575 /* check whether to merge the extent or shift it down */
5562 if (xfs_bmse_can_merge(&left, &got, offset_shift_fsb)) { 5576 if (xfs_bmse_can_merge(&adj_irec, &got,
5577 offset_shift_fsb)) {
5563 return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, 5578 return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
5564 *current_ext, gotp, leftp, cur, 5579 *current_ext, gotp, adj_irecp,
5565 logflags); 5580 cur, logflags);
5566 } 5581 }
5567 } else if (got.br_startoff < offset_shift_fsb) 5582 } else {
5568 return -EINVAL; 5583 startoff = got.br_startoff + offset_shift_fsb;
5569 5584 /* nothing to move if this is the last extent */
5585 if (*current_ext >= (total_extents - 1))
5586 goto update_current_ext;
5587 /*
5588 * If this is not the last extent in the file, make sure there
5589 * is enough room between current extent and next extent for
5590 * accommodating the shift.
5591 */
5592 adj_irecp = xfs_iext_get_ext(ifp, *current_ext + 1);
5593 xfs_bmbt_get_all(adj_irecp, &adj_irec);
5594 if (startoff + got.br_blockcount > adj_irec.br_startoff)
5595 return -EINVAL;
5596 /*
5597 * Unlike a left shift (which involves a hole punch),
5598 * a right shift does not modify extent neighbors
5599 * in any way. We should never find mergeable extents
5600 * in this scenario. Check anyways and warn if we
5601 * encounter two extents that could be one.
5602 */
5603 if (xfs_bmse_can_merge(&got, &adj_irec, offset_shift_fsb))
5604 WARN_ON_ONCE(1);
5605 }
5570 /* 5606 /*
5571 * Increment the extent index for the next iteration, update the start 5607 * Increment the extent index for the next iteration, update the start
5572 * offset of the in-core extent and update the btree if applicable. 5608 * offset of the in-core extent and update the btree if applicable.
5573 */ 5609 */
5574 (*current_ext)++; 5610update_current_ext:
5611 if (direction == SHIFT_LEFT)
5612 (*current_ext)++;
5613 else
5614 (*current_ext)--;
5575 xfs_bmbt_set_startoff(gotp, startoff); 5615 xfs_bmbt_set_startoff(gotp, startoff);
5576 *logflags |= XFS_ILOG_CORE; 5616 *logflags |= XFS_ILOG_CORE;
5577 if (!cur) { 5617 if (!cur) {
@@ -5587,14 +5627,14 @@ xfs_bmse_shift_one(
5587 5627
5588 got.br_startoff = startoff; 5628 got.br_startoff = startoff;
5589 return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, 5629 return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
5590 got.br_blockcount, got.br_state); 5630 got.br_blockcount, got.br_state);
5591} 5631}
5592 5632
5593/* 5633/*
5594 * Shift extent records to the left to cover a hole. 5634 * Shift extent records to the left/right to cover/create a hole.
5595 * 5635 *
5596 * The maximum number of extents to be shifted in a single operation is 5636 * The maximum number of extents to be shifted in a single operation is
5597 * @num_exts. @start_fsb specifies the file offset to start the shift and the 5637 * @num_exts. @stop_fsb specifies the file offset at which to stop shift and the
5598 * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb 5638 * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb
5599 * is the length by which each extent is shifted. If there is no hole to shift 5639 * is the length by which each extent is shifted. If there is no hole to shift
5600 * the extents into, this will be considered invalid operation and we abort 5640 * the extents into, this will be considered invalid operation and we abort
@@ -5604,12 +5644,13 @@ int
5604xfs_bmap_shift_extents( 5644xfs_bmap_shift_extents(
5605 struct xfs_trans *tp, 5645 struct xfs_trans *tp,
5606 struct xfs_inode *ip, 5646 struct xfs_inode *ip,
5607 xfs_fileoff_t start_fsb, 5647 xfs_fileoff_t *next_fsb,
5608 xfs_fileoff_t offset_shift_fsb, 5648 xfs_fileoff_t offset_shift_fsb,
5609 int *done, 5649 int *done,
5610 xfs_fileoff_t *next_fsb, 5650 xfs_fileoff_t stop_fsb,
5611 xfs_fsblock_t *firstblock, 5651 xfs_fsblock_t *firstblock,
5612 struct xfs_bmap_free *flist, 5652 struct xfs_bmap_free *flist,
5653 enum shift_direction direction,
5613 int num_exts) 5654 int num_exts)
5614{ 5655{
5615 struct xfs_btree_cur *cur = NULL; 5656 struct xfs_btree_cur *cur = NULL;
@@ -5619,10 +5660,11 @@ xfs_bmap_shift_extents(
5619 struct xfs_ifork *ifp; 5660 struct xfs_ifork *ifp;
5620 xfs_extnum_t nexts = 0; 5661 xfs_extnum_t nexts = 0;
5621 xfs_extnum_t current_ext; 5662 xfs_extnum_t current_ext;
5663 xfs_extnum_t total_extents;
5664 xfs_extnum_t stop_extent;
5622 int error = 0; 5665 int error = 0;
5623 int whichfork = XFS_DATA_FORK; 5666 int whichfork = XFS_DATA_FORK;
5624 int logflags = 0; 5667 int logflags = 0;
5625 int total_extents;
5626 5668
5627 if (unlikely(XFS_TEST_ERROR( 5669 if (unlikely(XFS_TEST_ERROR(
5628 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 5670 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
@@ -5638,6 +5680,8 @@ xfs_bmap_shift_extents(
5638 5680
5639 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 5681 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5640 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 5682 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5683 ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
5684 ASSERT(*next_fsb != NULLFSBLOCK || direction == SHIFT_RIGHT);
5641 5685
5642 ifp = XFS_IFORK_PTR(ip, whichfork); 5686 ifp = XFS_IFORK_PTR(ip, whichfork);
5643 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 5687 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
@@ -5655,43 +5699,83 @@ xfs_bmap_shift_extents(
5655 } 5699 }
5656 5700
5657 /* 5701 /*
5702 * There may be delalloc extents in the data fork before the range we
5703 * are collapsing out, so we cannot use the count of real extents here.
5704 * Instead we have to calculate it from the incore fork.
5705 */
5706 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5707 if (total_extents == 0) {
5708 *done = 1;
5709 goto del_cursor;
5710 }
5711
5712 /*
5713 * In case of first right shift, we need to initialize next_fsb
5714 */
5715 if (*next_fsb == NULLFSBLOCK) {
5716 gotp = xfs_iext_get_ext(ifp, total_extents - 1);
5717 xfs_bmbt_get_all(gotp, &got);
5718 *next_fsb = got.br_startoff;
5719 if (stop_fsb > *next_fsb) {
5720 *done = 1;
5721 goto del_cursor;
5722 }
5723 }
5724
5725 /* Lookup the extent index at which we have to stop */
5726 if (direction == SHIFT_RIGHT) {
5727 gotp = xfs_iext_bno_to_ext(ifp, stop_fsb, &stop_extent);
5728 /* Make stop_extent exclusive of shift range */
5729 stop_extent--;
5730 } else
5731 stop_extent = total_extents;
5732
5733 /*
5658 * Look up the extent index for the fsb where we start shifting. We can 5734 * Look up the extent index for the fsb where we start shifting. We can
5659 * henceforth iterate with current_ext as extent list changes are locked 5735 * henceforth iterate with current_ext as extent list changes are locked
5660 * out via ilock. 5736 * out via ilock.
5661 * 5737 *
5662 * gotp can be null in 2 cases: 1) if there are no extents or 2) 5738 * gotp can be null in 2 cases: 1) if there are no extents or 2)
5663 * start_fsb lies in a hole beyond which there are no extents. Either 5739 * *next_fsb lies in a hole beyond which there are no extents. Either
5664 * way, we are done. 5740 * way, we are done.
5665 */ 5741 */
5666 gotp = xfs_iext_bno_to_ext(ifp, start_fsb, &current_ext); 5742 gotp = xfs_iext_bno_to_ext(ifp, *next_fsb, &current_ext);
5667 if (!gotp) { 5743 if (!gotp) {
5668 *done = 1; 5744 *done = 1;
5669 goto del_cursor; 5745 goto del_cursor;
5670 } 5746 }
5671 5747
5672 /* 5748 /* some sanity checking before we finally start shifting extents */
5673 * There may be delalloc extents in the data fork before the range we 5749 if ((direction == SHIFT_LEFT && current_ext >= stop_extent) ||
5674 * are collapsing out, so we cannot use the count of real extents here. 5750 (direction == SHIFT_RIGHT && current_ext <= stop_extent)) {
5675 * Instead we have to calculate it from the incore fork. 5751 error = -EIO;
5676 */ 5752 goto del_cursor;
5677 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); 5753 }
5678 while (nexts++ < num_exts && current_ext < total_extents) { 5754
5755 while (nexts++ < num_exts) {
5679 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb, 5756 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
5680 &current_ext, gotp, cur, &logflags); 5757 &current_ext, gotp, cur, &logflags,
5758 direction);
5681 if (error) 5759 if (error)
5682 goto del_cursor; 5760 goto del_cursor;
5761 /*
5762 * If there was an extent merge during the shift, the extent
5763 * count can change. Update the total and grade the next record.
5764 */
5765 if (direction == SHIFT_LEFT) {
5766 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5767 stop_extent = total_extents;
5768 }
5683 5769
5684 /* update total extent count and grab the next record */ 5770 if (current_ext == stop_extent) {
5685 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); 5771 *done = 1;
5686 if (current_ext >= total_extents) 5772 *next_fsb = NULLFSBLOCK;
5687 break; 5773 break;
5774 }
5688 gotp = xfs_iext_get_ext(ifp, current_ext); 5775 gotp = xfs_iext_get_ext(ifp, current_ext);
5689 } 5776 }
5690 5777
5691 /* Check if we are done */ 5778 if (!*done) {
5692 if (current_ext == total_extents) {
5693 *done = 1;
5694 } else if (next_fsb) {
5695 xfs_bmbt_get_all(gotp, &got); 5779 xfs_bmbt_get_all(gotp, &got);
5696 *next_fsb = got.br_startoff; 5780 *next_fsb = got.br_startoff;
5697 } 5781 }
@@ -5706,3 +5790,189 @@ del_cursor:
5706 5790
5707 return error; 5791 return error;
5708} 5792}
5793
5794/*
5795 * Splits an extent into two extents at split_fsb block such that it is
5796 * the first block of the current_ext. @current_ext is a target extent
5797 * to be split. @split_fsb is a block where the extents is split.
5798 * If split_fsb lies in a hole or the first block of extents, just return 0.
5799 */
5800STATIC int
5801xfs_bmap_split_extent_at(
5802 struct xfs_trans *tp,
5803 struct xfs_inode *ip,
5804 xfs_fileoff_t split_fsb,
5805 xfs_fsblock_t *firstfsb,
5806 struct xfs_bmap_free *free_list)
5807{
5808 int whichfork = XFS_DATA_FORK;
5809 struct xfs_btree_cur *cur = NULL;
5810 struct xfs_bmbt_rec_host *gotp;
5811 struct xfs_bmbt_irec got;
5812 struct xfs_bmbt_irec new; /* split extent */
5813 struct xfs_mount *mp = ip->i_mount;
5814 struct xfs_ifork *ifp;
5815 xfs_fsblock_t gotblkcnt; /* new block count for got */
5816 xfs_extnum_t current_ext;
5817 int error = 0;
5818 int logflags = 0;
5819 int i = 0;
5820
5821 if (unlikely(XFS_TEST_ERROR(
5822 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5823 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5824 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
5825 XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
5826 XFS_ERRLEVEL_LOW, mp);
5827 return -EFSCORRUPTED;
5828 }
5829
5830 if (XFS_FORCED_SHUTDOWN(mp))
5831 return -EIO;
5832
5833 ifp = XFS_IFORK_PTR(ip, whichfork);
5834 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5835 /* Read in all the extents */
5836 error = xfs_iread_extents(tp, ip, whichfork);
5837 if (error)
5838 return error;
5839 }
5840
5841 /*
5842 * gotp can be null in 2 cases: 1) if there are no extents
5843 * or 2) split_fsb lies in a hole beyond which there are
5844 * no extents. Either way, we are done.
5845 */
5846 gotp = xfs_iext_bno_to_ext(ifp, split_fsb, &current_ext);
5847 if (!gotp)
5848 return 0;
5849
5850 xfs_bmbt_get_all(gotp, &got);
5851
5852 /*
5853 * Check split_fsb lies in a hole or the start boundary offset
5854 * of the extent.
5855 */
5856 if (got.br_startoff >= split_fsb)
5857 return 0;
5858
5859 gotblkcnt = split_fsb - got.br_startoff;
5860 new.br_startoff = split_fsb;
5861 new.br_startblock = got.br_startblock + gotblkcnt;
5862 new.br_blockcount = got.br_blockcount - gotblkcnt;
5863 new.br_state = got.br_state;
5864
5865 if (ifp->if_flags & XFS_IFBROOT) {
5866 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5867 cur->bc_private.b.firstblock = *firstfsb;
5868 cur->bc_private.b.flist = free_list;
5869 cur->bc_private.b.flags = 0;
5870 error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5871 got.br_startblock,
5872 got.br_blockcount,
5873 &i);
5874 if (error)
5875 goto del_cursor;
5876 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5877 }
5878
5879 xfs_bmbt_set_blockcount(gotp, gotblkcnt);
5880 got.br_blockcount = gotblkcnt;
5881
5882 logflags = XFS_ILOG_CORE;
5883 if (cur) {
5884 error = xfs_bmbt_update(cur, got.br_startoff,
5885 got.br_startblock,
5886 got.br_blockcount,
5887 got.br_state);
5888 if (error)
5889 goto del_cursor;
5890 } else
5891 logflags |= XFS_ILOG_DEXT;
5892
5893 /* Add new extent */
5894 current_ext++;
5895 xfs_iext_insert(ip, current_ext, 1, &new, 0);
5896 XFS_IFORK_NEXT_SET(ip, whichfork,
5897 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
5898
5899 if (cur) {
5900 error = xfs_bmbt_lookup_eq(cur, new.br_startoff,
5901 new.br_startblock, new.br_blockcount,
5902 &i);
5903 if (error)
5904 goto del_cursor;
5905 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor);
5906 cur->bc_rec.b.br_state = new.br_state;
5907
5908 error = xfs_btree_insert(cur, &i);
5909 if (error)
5910 goto del_cursor;
5911 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5912 }
5913
5914 /*
5915 * Convert to a btree if necessary.
5916 */
5917 if (xfs_bmap_needs_btree(ip, whichfork)) {
5918 int tmp_logflags; /* partial log flag return val */
5919
5920 ASSERT(cur == NULL);
5921 error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, free_list,
5922 &cur, 0, &tmp_logflags, whichfork);
5923 logflags |= tmp_logflags;
5924 }
5925
5926del_cursor:
5927 if (cur) {
5928 cur->bc_private.b.allocated = 0;
5929 xfs_btree_del_cursor(cur,
5930 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5931 }
5932
5933 if (logflags)
5934 xfs_trans_log_inode(tp, ip, logflags);
5935 return error;
5936}
5937
5938int
5939xfs_bmap_split_extent(
5940 struct xfs_inode *ip,
5941 xfs_fileoff_t split_fsb)
5942{
5943 struct xfs_mount *mp = ip->i_mount;
5944 struct xfs_trans *tp;
5945 struct xfs_bmap_free free_list;
5946 xfs_fsblock_t firstfsb;
5947 int committed;
5948 int error;
5949
5950 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
5951 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
5952 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
5953 if (error) {
5954 xfs_trans_cancel(tp, 0);
5955 return error;
5956 }
5957
5958 xfs_ilock(ip, XFS_ILOCK_EXCL);
5959 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
5960
5961 xfs_bmap_init(&free_list, &firstfsb);
5962
5963 error = xfs_bmap_split_extent_at(tp, ip, split_fsb,
5964 &firstfsb, &free_list);
5965 if (error)
5966 goto out;
5967
5968 error = xfs_bmap_finish(&tp, &free_list, &committed);
5969 if (error)
5970 goto out;
5971
5972 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
5973
5974
5975out:
5976 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
5977 return error;
5978}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index b9d8a499d2c4..6aaa0c1c7200 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -166,6 +166,11 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
166 */ 166 */
167#define XFS_BMAP_MAX_SHIFT_EXTENTS 1 167#define XFS_BMAP_MAX_SHIFT_EXTENTS 1
168 168
169enum shift_direction {
170 SHIFT_LEFT = 0,
171 SHIFT_RIGHT,
172};
173
169#ifdef DEBUG 174#ifdef DEBUG
170void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, 175void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
171 int whichfork, unsigned long caller_ip); 176 int whichfork, unsigned long caller_ip);
@@ -211,8 +216,10 @@ int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
211 xfs_extnum_t num); 216 xfs_extnum_t num);
212uint xfs_default_attroffset(struct xfs_inode *ip); 217uint xfs_default_attroffset(struct xfs_inode *ip);
213int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, 218int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
214 xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb, 219 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
215 int *done, xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock, 220 int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
216 struct xfs_bmap_free *flist, int num_exts); 221 struct xfs_bmap_free *flist, enum shift_direction direction,
222 int num_exts);
223int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
217 224
218#endif /* __XFS_BMAP_H__ */ 225#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 22a5dcb70b32..fe1f11b96d0d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1376,22 +1376,19 @@ out:
1376} 1376}
1377 1377
1378/* 1378/*
1379 * xfs_collapse_file_space() 1379 * @next_fsb will keep track of the extent currently undergoing shift.
1380 * This routine frees disk space and shift extent for the given file. 1380 * @stop_fsb will keep track of the extent at which we have to stop.
1381 * The first thing we do is to free data blocks in the specified range 1381 * If we are shifting left, we will start with block (offset + len) and
1382 * by calling xfs_free_file_space(). It would also sync dirty data 1382 * shift each extent till last extent.
1383 * and invalidate page cache over the region on which collapse range 1383 * If we are shifting right, we will start with last extent inside file space
1384 * is working. And Shift extent records to the left to cover a hole. 1384 * and continue until we reach the block corresponding to offset.
1385 * RETURNS:
1386 * 0 on success
1387 * errno on error
1388 *
1389 */ 1385 */
1390int 1386int
1391xfs_collapse_file_space( 1387xfs_shift_file_space(
1392 struct xfs_inode *ip, 1388 struct xfs_inode *ip,
1393 xfs_off_t offset, 1389 xfs_off_t offset,
1394 xfs_off_t len) 1390 xfs_off_t len,
1391 enum shift_direction direction)
1395{ 1392{
1396 int done = 0; 1393 int done = 0;
1397 struct xfs_mount *mp = ip->i_mount; 1394 struct xfs_mount *mp = ip->i_mount;
@@ -1400,21 +1397,26 @@ xfs_collapse_file_space(
1400 struct xfs_bmap_free free_list; 1397 struct xfs_bmap_free free_list;
1401 xfs_fsblock_t first_block; 1398 xfs_fsblock_t first_block;
1402 int committed; 1399 int committed;
1403 xfs_fileoff_t start_fsb; 1400 xfs_fileoff_t stop_fsb;
1404 xfs_fileoff_t next_fsb; 1401 xfs_fileoff_t next_fsb;
1405 xfs_fileoff_t shift_fsb; 1402 xfs_fileoff_t shift_fsb;
1406 1403
1407 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1404 ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
1408 1405
1409 trace_xfs_collapse_file_space(ip); 1406 if (direction == SHIFT_LEFT) {
1407 next_fsb = XFS_B_TO_FSB(mp, offset + len);
1408 stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
1409 } else {
1410 /*
1411 * If right shift, delegate the work of initialization of
1412 * next_fsb to xfs_bmap_shift_extent as it has ilock held.
1413 */
1414 next_fsb = NULLFSBLOCK;
1415 stop_fsb = XFS_B_TO_FSB(mp, offset);
1416 }
1410 1417
1411 next_fsb = XFS_B_TO_FSB(mp, offset + len);
1412 shift_fsb = XFS_B_TO_FSB(mp, len); 1418 shift_fsb = XFS_B_TO_FSB(mp, len);
1413 1419
1414 error = xfs_free_file_space(ip, offset, len);
1415 if (error)
1416 return error;
1417
1418 /* 1420 /*
1419 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation 1421 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation
1420 * into the accessible region of the file. 1422 * into the accessible region of the file.
@@ -1427,20 +1429,28 @@ xfs_collapse_file_space(
1427 1429
1428 /* 1430 /*
1429 * Writeback and invalidate cache for the remainder of the file as we're 1431 * Writeback and invalidate cache for the remainder of the file as we're
1430 * about to shift down every extent from the collapse range to EOF. The 1432 * about to shift down every extent from offset to EOF.
1431 * free of the collapse range above might have already done some of
1432 * this, but we shouldn't rely on it to do anything outside of the range
1433 * that was freed.
1434 */ 1433 */
1435 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 1434 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
1436 offset + len, -1); 1435 offset, -1);
1437 if (error) 1436 if (error)
1438 return error; 1437 return error;
1439 error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, 1438 error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
1440 (offset + len) >> PAGE_CACHE_SHIFT, -1); 1439 offset >> PAGE_CACHE_SHIFT, -1);
1441 if (error) 1440 if (error)
1442 return error; 1441 return error;
1443 1442
1443 /*
1444 * The extent shiting code works on extent granularity. So, if
1445 * stop_fsb is not the starting block of extent, we need to split
1446 * the extent at stop_fsb.
1447 */
1448 if (direction == SHIFT_RIGHT) {
1449 error = xfs_bmap_split_extent(ip, stop_fsb);
1450 if (error)
1451 return error;
1452 }
1453
1444 while (!error && !done) { 1454 while (!error && !done) {
1445 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 1455 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
1446 /* 1456 /*
@@ -1464,7 +1474,7 @@ xfs_collapse_file_space(
1464 if (error) 1474 if (error)
1465 goto out; 1475 goto out;
1466 1476
1467 xfs_trans_ijoin(tp, ip, 0); 1477 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1468 1478
1469 xfs_bmap_init(&free_list, &first_block); 1479 xfs_bmap_init(&free_list, &first_block);
1470 1480
@@ -1472,10 +1482,9 @@ xfs_collapse_file_space(
1472 * We are using the write transaction in which max 2 bmbt 1482 * We are using the write transaction in which max 2 bmbt
1473 * updates are allowed 1483 * updates are allowed
1474 */ 1484 */
1475 start_fsb = next_fsb; 1485 error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb,
1476 error = xfs_bmap_shift_extents(tp, ip, start_fsb, shift_fsb, 1486 &done, stop_fsb, &first_block, &free_list,
1477 &done, &next_fsb, &first_block, &free_list, 1487 direction, XFS_BMAP_MAX_SHIFT_EXTENTS);
1478 XFS_BMAP_MAX_SHIFT_EXTENTS);
1479 if (error) 1488 if (error)
1480 goto out; 1489 goto out;
1481 1490
@@ -1484,18 +1493,70 @@ xfs_collapse_file_space(
1484 goto out; 1493 goto out;
1485 1494
1486 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1495 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1487 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1488 } 1496 }
1489 1497
1490 return error; 1498 return error;
1491 1499
1492out: 1500out:
1493 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1501 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
1494 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1495 return error; 1502 return error;
1496} 1503}
1497 1504
1498/* 1505/*
1506 * xfs_collapse_file_space()
1507 * This routine frees disk space and shift extent for the given file.
1508 * The first thing we do is to free data blocks in the specified range
1509 * by calling xfs_free_file_space(). It would also sync dirty data
1510 * and invalidate page cache over the region on which collapse range
1511 * is working. And Shift extent records to the left to cover a hole.
1512 * RETURNS:
1513 * 0 on success
1514 * errno on error
1515 *
1516 */
1517int
1518xfs_collapse_file_space(
1519 struct xfs_inode *ip,
1520 xfs_off_t offset,
1521 xfs_off_t len)
1522{
1523 int error;
1524
1525 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1526 trace_xfs_collapse_file_space(ip);
1527
1528 error = xfs_free_file_space(ip, offset, len);
1529 if (error)
1530 return error;
1531
1532 return xfs_shift_file_space(ip, offset, len, SHIFT_LEFT);
1533}
1534
1535/*
1536 * xfs_insert_file_space()
1537 * This routine create hole space by shifting extents for the given file.
1538 * The first thing we do is to sync dirty data and invalidate page cache
1539 * over the region on which insert range is working. And split an extent
1540 * to two extents at given offset by calling xfs_bmap_split_extent.
1541 * And shift all extent records which are laying between [offset,
1542 * last allocated extent] to the right to reserve hole range.
1543 * RETURNS:
1544 * 0 on success
1545 * errno on error
1546 */
1547int
1548xfs_insert_file_space(
1549 struct xfs_inode *ip,
1550 loff_t offset,
1551 loff_t len)
1552{
1553 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1554 trace_xfs_insert_file_space(ip);
1555
1556 return xfs_shift_file_space(ip, offset, len, SHIFT_RIGHT);
1557}
1558
1559/*
1499 * We need to check that the format of the data fork in the temporary inode is 1560 * We need to check that the format of the data fork in the temporary inode is
1500 * valid for the target inode before doing the swap. This is not a problem with 1561 * valid for the target inode before doing the swap. This is not a problem with
1501 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized 1562 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 736429a72a12..af97d9a1dfb4 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -63,6 +63,8 @@ int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
63 xfs_off_t len); 63 xfs_off_t len);
64int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset, 64int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
65 xfs_off_t len); 65 xfs_off_t len);
66int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
67 xfs_off_t len);
66 68
67/* EOF block manipulation functions */ 69/* EOF block manipulation functions */
68bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); 70bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ce615d12fb44..edeaccc7961a 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -816,6 +816,11 @@ xfs_file_write_iter(
816 return ret; 816 return ret;
817} 817}
818 818
819#define XFS_FALLOC_FL_SUPPORTED \
820 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
821 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
822 FALLOC_FL_INSERT_RANGE)
823
819STATIC long 824STATIC long
820xfs_file_fallocate( 825xfs_file_fallocate(
821 struct file *file, 826 struct file *file,
@@ -829,11 +834,11 @@ xfs_file_fallocate(
829 enum xfs_prealloc_flags flags = 0; 834 enum xfs_prealloc_flags flags = 0;
830 uint iolock = XFS_IOLOCK_EXCL; 835 uint iolock = XFS_IOLOCK_EXCL;
831 loff_t new_size = 0; 836 loff_t new_size = 0;
837 bool do_file_insert = 0;
832 838
833 if (!S_ISREG(inode->i_mode)) 839 if (!S_ISREG(inode->i_mode))
834 return -EINVAL; 840 return -EINVAL;
835 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 841 if (mode & ~XFS_FALLOC_FL_SUPPORTED)
836 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
837 return -EOPNOTSUPP; 842 return -EOPNOTSUPP;
838 843
839 xfs_ilock(ip, iolock); 844 xfs_ilock(ip, iolock);
@@ -867,6 +872,27 @@ xfs_file_fallocate(
867 error = xfs_collapse_file_space(ip, offset, len); 872 error = xfs_collapse_file_space(ip, offset, len);
868 if (error) 873 if (error)
869 goto out_unlock; 874 goto out_unlock;
875 } else if (mode & FALLOC_FL_INSERT_RANGE) {
876 unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
877
878 new_size = i_size_read(inode) + len;
879 if (offset & blksize_mask || len & blksize_mask) {
880 error = -EINVAL;
881 goto out_unlock;
882 }
883
884 /* check the new inode size does not wrap through zero */
885 if (new_size > inode->i_sb->s_maxbytes) {
886 error = -EFBIG;
887 goto out_unlock;
888 }
889
890 /* Offset should be less than i_size */
891 if (offset >= i_size_read(inode)) {
892 error = -EINVAL;
893 goto out_unlock;
894 }
895 do_file_insert = 1;
870 } else { 896 } else {
871 flags |= XFS_PREALLOC_SET; 897 flags |= XFS_PREALLOC_SET;
872 898
@@ -901,8 +927,19 @@ xfs_file_fallocate(
901 iattr.ia_valid = ATTR_SIZE; 927 iattr.ia_valid = ATTR_SIZE;
902 iattr.ia_size = new_size; 928 iattr.ia_size = new_size;
903 error = xfs_setattr_size(ip, &iattr); 929 error = xfs_setattr_size(ip, &iattr);
930 if (error)
931 goto out_unlock;
904 } 932 }
905 933
934 /*
935 * Perform hole insertion now that the file size has been
936 * updated so that if we crash during the operation we don't
937 * leave shifted extents past EOF and hence losing access to
938 * the data that is contained within them.
939 */
940 if (do_file_insert)
941 error = xfs_insert_file_space(ip, offset, len);
942
906out_unlock: 943out_unlock:
907 xfs_iunlock(ip, iolock); 944 xfs_iunlock(ip, iolock);
908 return error; 945 return error;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 51372e34d988..7e45fa155ea8 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -664,6 +664,7 @@ DEFINE_INODE_EVENT(xfs_alloc_file_space);
664DEFINE_INODE_EVENT(xfs_free_file_space); 664DEFINE_INODE_EVENT(xfs_free_file_space);
665DEFINE_INODE_EVENT(xfs_zero_file_space); 665DEFINE_INODE_EVENT(xfs_zero_file_space);
666DEFINE_INODE_EVENT(xfs_collapse_file_space); 666DEFINE_INODE_EVENT(xfs_collapse_file_space);
667DEFINE_INODE_EVENT(xfs_insert_file_space);
667DEFINE_INODE_EVENT(xfs_readdir); 668DEFINE_INODE_EVENT(xfs_readdir);
668#ifdef CONFIG_XFS_POSIX_ACL 669#ifdef CONFIG_XFS_POSIX_ACL
669DEFINE_INODE_EVENT(xfs_get_acl); 670DEFINE_INODE_EVENT(xfs_get_acl);