diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-27 15:34:50 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-27 15:34:50 -0400 |
commit | e890038e6a0b1f1c5a5a0037025499704353a3eb (patch) | |
tree | 6a51161df0b1c428bab9b6dcd8746d108d867942 | |
parent | 18c2152d526e7956457fcdcbdf6d77ae2c663a26 (diff) | |
parent | c17a8ef43d6b80ed3519b828c37d18645445949f (diff) |
Merge tag 'xfs-fixes-for-linus-4.9-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pull xfs fixes from Dave Chinner:
"This update contains fixes for most of the outstanding regressions
introduced with the 4.9-rc1 XFS merge. There is also a fix for an
iomap bug, too.
This is a quite a bit larger than I'd prefer for a -rc3, but most of
the change comes from cleaning up the new reflink copy on write code;
it's much simpler and easier to understand now. These changes fixed
several bugs in the new code, and it wasn't clear that there was an
easier/simpler way to fix them. The rest of the fixes are the usual
size you'd expect at this stage.
I've left the commits to soak in linux-next for a some extra time
because of the size before asking you to pull, no new problems with
them have been reported so I think it's all OK.
Summary:
- iomap page offset masking fix for page faults
- add IOMAP_REPORT to distinguish between read and fiemap map
requests
- cleanups to new shared data extent code
- fix mount active status on failed log recovery
- fix broken dquots in a buffer calculation
- fix locking order issues and merge xfs_reflink_remap_range and
xfs_file_share_range
- rework unmapping of CoW extents and remove now unused functions
- clean state when CoW is done"
* tag 'xfs-fixes-for-linus-4.9-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (25 commits)
xfs: clear cowblocks tag when cow fork is emptied
xfs: fix up inode cowblocks tracking tracepoints
fs: Do to trim high file position bits in iomap_page_mkwrite_actor
xfs: remove xfs_bunmapi_cow
xfs: optimize xfs_reflink_end_cow
xfs: optimize xfs_reflink_cancel_cow_blocks
xfs: refactor xfs_bunmapi_cow
xfs: optimize writes to reflink files
xfs: don't bother looking at the refcount tree for reads
xfs: handle "raw" delayed extents xfs_reflink_trim_around_shared
xfs: add xfs_trim_extent
iomap: add IOMAP_REPORT
xfs: merge xfs_reflink_remap_range and xfs_file_share_range
xfs: remove xfs_file_wait_for_io
xfs: move inode locking from xfs_reflink_remap_range to xfs_file_share_range
xfs: fix the same_inode check in xfs_file_share_range
xfs: remove the same fs check from xfs_file_share_range
libxfs: v3 inodes are only valid on crc-enabled filesystems
libxfs: clean up _calc_dquots_per_chunk
xfs: unset MS_ACTIVE if mount fails
...
-rw-r--r-- | fs/iomap.c | 5 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 418 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.h | 8 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_btree.c | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_dquot_buf.c | 3 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_format.h | 1 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_inode_buf.c | 13 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_inode_buf.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 232 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.c | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 57 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.c | 499 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.h | 11 | ||||
-rw-r--r-- | fs/xfs/xfs_sysfs.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 4 | ||||
-rw-r--r-- | include/linux/iomap.h | 17 |
17 files changed, 640 insertions, 645 deletions
diff --git a/fs/iomap.c b/fs/iomap.c index 013d1d36fbbf..a8ee8c33ca78 100644 --- a/fs/iomap.c +++ b/fs/iomap.c | |||
@@ -433,8 +433,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, | |||
433 | struct page *page = data; | 433 | struct page *page = data; |
434 | int ret; | 434 | int ret; |
435 | 435 | ||
436 | ret = __block_write_begin_int(page, pos & ~PAGE_MASK, length, | 436 | ret = __block_write_begin_int(page, pos, length, NULL, iomap); |
437 | NULL, iomap); | ||
438 | if (ret) | 437 | if (ret) |
439 | return ret; | 438 | return ret; |
440 | 439 | ||
@@ -561,7 +560,7 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi, | |||
561 | } | 560 | } |
562 | 561 | ||
563 | while (len > 0) { | 562 | while (len > 0) { |
564 | ret = iomap_apply(inode, start, len, 0, ops, &ctx, | 563 | ret = iomap_apply(inode, start, len, IOMAP_REPORT, ops, &ctx, |
565 | iomap_fiemap_actor); | 564 | iomap_fiemap_actor); |
566 | /* inode with no (attribute) mapping will give ENOENT */ | 565 | /* inode with no (attribute) mapping will give ENOENT */ |
567 | if (ret == -ENOENT) | 566 | if (ret == -ENOENT) |
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index c27344cf38e1..c6eb21940783 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c | |||
@@ -3974,9 +3974,6 @@ xfs_bmap_remap_alloc( | |||
3974 | * allocating, so skip that check by pretending to be freeing. | 3974 | * allocating, so skip that check by pretending to be freeing. |
3975 | */ | 3975 | */ |
3976 | error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); | 3976 | error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); |
3977 | if (error) | ||
3978 | goto error0; | ||
3979 | error0: | ||
3980 | xfs_perag_put(args.pag); | 3977 | xfs_perag_put(args.pag); |
3981 | if (error) | 3978 | if (error) |
3982 | trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_); | 3979 | trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_); |
@@ -3999,6 +3996,39 @@ xfs_bmap_alloc( | |||
3999 | return xfs_bmap_btalloc(ap); | 3996 | return xfs_bmap_btalloc(ap); |
4000 | } | 3997 | } |
4001 | 3998 | ||
3999 | /* Trim extent to fit a logical block range. */ | ||
4000 | void | ||
4001 | xfs_trim_extent( | ||
4002 | struct xfs_bmbt_irec *irec, | ||
4003 | xfs_fileoff_t bno, | ||
4004 | xfs_filblks_t len) | ||
4005 | { | ||
4006 | xfs_fileoff_t distance; | ||
4007 | xfs_fileoff_t end = bno + len; | ||
4008 | |||
4009 | if (irec->br_startoff + irec->br_blockcount <= bno || | ||
4010 | irec->br_startoff >= end) { | ||
4011 | irec->br_blockcount = 0; | ||
4012 | return; | ||
4013 | } | ||
4014 | |||
4015 | if (irec->br_startoff < bno) { | ||
4016 | distance = bno - irec->br_startoff; | ||
4017 | if (isnullstartblock(irec->br_startblock)) | ||
4018 | irec->br_startblock = DELAYSTARTBLOCK; | ||
4019 | if (irec->br_startblock != DELAYSTARTBLOCK && | ||
4020 | irec->br_startblock != HOLESTARTBLOCK) | ||
4021 | irec->br_startblock += distance; | ||
4022 | irec->br_startoff += distance; | ||
4023 | irec->br_blockcount -= distance; | ||
4024 | } | ||
4025 | |||
4026 | if (end < irec->br_startoff + irec->br_blockcount) { | ||
4027 | distance = irec->br_startoff + irec->br_blockcount - end; | ||
4028 | irec->br_blockcount -= distance; | ||
4029 | } | ||
4030 | } | ||
4031 | |||
4002 | /* | 4032 | /* |
4003 | * Trim the returned map to the required bounds | 4033 | * Trim the returned map to the required bounds |
4004 | */ | 4034 | */ |
@@ -4829,6 +4859,219 @@ xfs_bmap_split_indlen( | |||
4829 | return stolen; | 4859 | return stolen; |
4830 | } | 4860 | } |
4831 | 4861 | ||
4862 | int | ||
4863 | xfs_bmap_del_extent_delay( | ||
4864 | struct xfs_inode *ip, | ||
4865 | int whichfork, | ||
4866 | xfs_extnum_t *idx, | ||
4867 | struct xfs_bmbt_irec *got, | ||
4868 | struct xfs_bmbt_irec *del) | ||
4869 | { | ||
4870 | struct xfs_mount *mp = ip->i_mount; | ||
4871 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); | ||
4872 | struct xfs_bmbt_irec new; | ||
4873 | int64_t da_old, da_new, da_diff = 0; | ||
4874 | xfs_fileoff_t del_endoff, got_endoff; | ||
4875 | xfs_filblks_t got_indlen, new_indlen, stolen; | ||
4876 | int error = 0, state = 0; | ||
4877 | bool isrt; | ||
4878 | |||
4879 | XFS_STATS_INC(mp, xs_del_exlist); | ||
4880 | |||
4881 | isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); | ||
4882 | del_endoff = del->br_startoff + del->br_blockcount; | ||
4883 | got_endoff = got->br_startoff + got->br_blockcount; | ||
4884 | da_old = startblockval(got->br_startblock); | ||
4885 | da_new = 0; | ||
4886 | |||
4887 | ASSERT(*idx >= 0); | ||
4888 | ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); | ||
4889 | ASSERT(del->br_blockcount > 0); | ||
4890 | ASSERT(got->br_startoff <= del->br_startoff); | ||
4891 | ASSERT(got_endoff >= del_endoff); | ||
4892 | |||
4893 | if (isrt) { | ||
4894 | int64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount); | ||
4895 | |||
4896 | do_div(rtexts, mp->m_sb.sb_rextsize); | ||
4897 | xfs_mod_frextents(mp, rtexts); | ||
4898 | } | ||
4899 | |||
4900 | /* | ||
4901 | * Update the inode delalloc counter now and wait to update the | ||
4902 | * sb counters as we might have to borrow some blocks for the | ||
4903 | * indirect block accounting. | ||
4904 | */ | ||
4905 | xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0, | ||
4906 | isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); | ||
4907 | ip->i_delayed_blks -= del->br_blockcount; | ||
4908 | |||
4909 | if (whichfork == XFS_COW_FORK) | ||
4910 | state |= BMAP_COWFORK; | ||
4911 | |||
4912 | if (got->br_startoff == del->br_startoff) | ||
4913 | state |= BMAP_LEFT_CONTIG; | ||
4914 | if (got_endoff == del_endoff) | ||
4915 | state |= BMAP_RIGHT_CONTIG; | ||
4916 | |||
4917 | switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { | ||
4918 | case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: | ||
4919 | /* | ||
4920 | * Matches the whole extent. Delete the entry. | ||
4921 | */ | ||
4922 | xfs_iext_remove(ip, *idx, 1, state); | ||
4923 | --*idx; | ||
4924 | break; | ||
4925 | case BMAP_LEFT_CONTIG: | ||
4926 | /* | ||
4927 | * Deleting the first part of the extent. | ||
4928 | */ | ||
4929 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
4930 | got->br_startoff = del_endoff; | ||
4931 | got->br_blockcount -= del->br_blockcount; | ||
4932 | da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, | ||
4933 | got->br_blockcount), da_old); | ||
4934 | got->br_startblock = nullstartblock((int)da_new); | ||
4935 | xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); | ||
4936 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
4937 | break; | ||
4938 | case BMAP_RIGHT_CONTIG: | ||
4939 | /* | ||
4940 | * Deleting the last part of the extent. | ||
4941 | */ | ||
4942 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
4943 | got->br_blockcount = got->br_blockcount - del->br_blockcount; | ||
4944 | da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, | ||
4945 | got->br_blockcount), da_old); | ||
4946 | got->br_startblock = nullstartblock((int)da_new); | ||
4947 | xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); | ||
4948 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
4949 | break; | ||
4950 | case 0: | ||
4951 | /* | ||
4952 | * Deleting the middle of the extent. | ||
4953 | * | ||
4954 | * Distribute the original indlen reservation across the two new | ||
4955 | * extents. Steal blocks from the deleted extent if necessary. | ||
4956 | * Stealing blocks simply fudges the fdblocks accounting below. | ||
4957 | * Warn if either of the new indlen reservations is zero as this | ||
4958 | * can lead to delalloc problems. | ||
4959 | */ | ||
4960 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
4961 | |||
4962 | got->br_blockcount = del->br_startoff - got->br_startoff; | ||
4963 | got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount); | ||
4964 | |||
4965 | new.br_blockcount = got_endoff - del_endoff; | ||
4966 | new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount); | ||
4967 | |||
4968 | WARN_ON_ONCE(!got_indlen || !new_indlen); | ||
4969 | stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen, | ||
4970 | del->br_blockcount); | ||
4971 | |||
4972 | got->br_startblock = nullstartblock((int)got_indlen); | ||
4973 | xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); | ||
4974 | trace_xfs_bmap_post_update(ip, *idx, 0, _THIS_IP_); | ||
4975 | |||
4976 | new.br_startoff = del_endoff; | ||
4977 | new.br_state = got->br_state; | ||
4978 | new.br_startblock = nullstartblock((int)new_indlen); | ||
4979 | |||
4980 | ++*idx; | ||
4981 | xfs_iext_insert(ip, *idx, 1, &new, state); | ||
4982 | |||
4983 | da_new = got_indlen + new_indlen - stolen; | ||
4984 | del->br_blockcount -= stolen; | ||
4985 | break; | ||
4986 | } | ||
4987 | |||
4988 | ASSERT(da_old >= da_new); | ||
4989 | da_diff = da_old - da_new; | ||
4990 | if (!isrt) | ||
4991 | da_diff += del->br_blockcount; | ||
4992 | if (da_diff) | ||
4993 | xfs_mod_fdblocks(mp, da_diff, false); | ||
4994 | return error; | ||
4995 | } | ||
4996 | |||
4997 | void | ||
4998 | xfs_bmap_del_extent_cow( | ||
4999 | struct xfs_inode *ip, | ||
5000 | xfs_extnum_t *idx, | ||
5001 | struct xfs_bmbt_irec *got, | ||
5002 | struct xfs_bmbt_irec *del) | ||
5003 | { | ||
5004 | struct xfs_mount *mp = ip->i_mount; | ||
5005 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); | ||
5006 | struct xfs_bmbt_irec new; | ||
5007 | xfs_fileoff_t del_endoff, got_endoff; | ||
5008 | int state = BMAP_COWFORK; | ||
5009 | |||
5010 | XFS_STATS_INC(mp, xs_del_exlist); | ||
5011 | |||
5012 | del_endoff = del->br_startoff + del->br_blockcount; | ||
5013 | got_endoff = got->br_startoff + got->br_blockcount; | ||
5014 | |||
5015 | ASSERT(*idx >= 0); | ||
5016 | ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); | ||
5017 | ASSERT(del->br_blockcount > 0); | ||
5018 | ASSERT(got->br_startoff <= del->br_startoff); | ||
5019 | ASSERT(got_endoff >= del_endoff); | ||
5020 | ASSERT(!isnullstartblock(got->br_startblock)); | ||
5021 | |||
5022 | if (got->br_startoff == del->br_startoff) | ||
5023 | state |= BMAP_LEFT_CONTIG; | ||
5024 | if (got_endoff == del_endoff) | ||
5025 | state |= BMAP_RIGHT_CONTIG; | ||
5026 | |||
5027 | switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { | ||
5028 | case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: | ||
5029 | /* | ||
5030 | * Matches the whole extent. Delete the entry. | ||
5031 | */ | ||
5032 | xfs_iext_remove(ip, *idx, 1, state); | ||
5033 | --*idx; | ||
5034 | break; | ||
5035 | case BMAP_LEFT_CONTIG: | ||
5036 | /* | ||
5037 | * Deleting the first part of the extent. | ||
5038 | */ | ||
5039 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
5040 | got->br_startoff = del_endoff; | ||
5041 | got->br_blockcount -= del->br_blockcount; | ||
5042 | got->br_startblock = del->br_startblock + del->br_blockcount; | ||
5043 | xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); | ||
5044 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
5045 | break; | ||
5046 | case BMAP_RIGHT_CONTIG: | ||
5047 | /* | ||
5048 | * Deleting the last part of the extent. | ||
5049 | */ | ||
5050 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
5051 | got->br_blockcount -= del->br_blockcount; | ||
5052 | xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); | ||
5053 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
5054 | break; | ||
5055 | case 0: | ||
5056 | /* | ||
5057 | * Deleting the middle of the extent. | ||
5058 | */ | ||
5059 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
5060 | got->br_blockcount = del->br_startoff - got->br_startoff; | ||
5061 | xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); | ||
5062 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); | ||
5063 | |||
5064 | new.br_startoff = del_endoff; | ||
5065 | new.br_blockcount = got_endoff - del_endoff; | ||
5066 | new.br_state = got->br_state; | ||
5067 | new.br_startblock = del->br_startblock + del->br_blockcount; | ||
5068 | |||
5069 | ++*idx; | ||
5070 | xfs_iext_insert(ip, *idx, 1, &new, state); | ||
5071 | break; | ||
5072 | } | ||
5073 | } | ||
5074 | |||
4832 | /* | 5075 | /* |
4833 | * Called by xfs_bmapi to update file extent records and the btree | 5076 | * Called by xfs_bmapi to update file extent records and the btree |
4834 | * after removing space (or undoing a delayed allocation). | 5077 | * after removing space (or undoing a delayed allocation). |
@@ -5171,175 +5414,6 @@ done: | |||
5171 | return error; | 5414 | return error; |
5172 | } | 5415 | } |
5173 | 5416 | ||
5174 | /* Remove an extent from the CoW fork. Similar to xfs_bmap_del_extent. */ | ||
5175 | int | ||
5176 | xfs_bunmapi_cow( | ||
5177 | struct xfs_inode *ip, | ||
5178 | struct xfs_bmbt_irec *del) | ||
5179 | { | ||
5180 | xfs_filblks_t da_new; | ||
5181 | xfs_filblks_t da_old; | ||
5182 | xfs_fsblock_t del_endblock = 0; | ||
5183 | xfs_fileoff_t del_endoff; | ||
5184 | int delay; | ||
5185 | struct xfs_bmbt_rec_host *ep; | ||
5186 | int error; | ||
5187 | struct xfs_bmbt_irec got; | ||
5188 | xfs_fileoff_t got_endoff; | ||
5189 | struct xfs_ifork *ifp; | ||
5190 | struct xfs_mount *mp; | ||
5191 | xfs_filblks_t nblks; | ||
5192 | struct xfs_bmbt_irec new; | ||
5193 | /* REFERENCED */ | ||
5194 | uint qfield; | ||
5195 | xfs_filblks_t temp; | ||
5196 | xfs_filblks_t temp2; | ||
5197 | int state = BMAP_COWFORK; | ||
5198 | int eof; | ||
5199 | xfs_extnum_t eidx; | ||
5200 | |||
5201 | mp = ip->i_mount; | ||
5202 | XFS_STATS_INC(mp, xs_del_exlist); | ||
5203 | |||
5204 | ep = xfs_bmap_search_extents(ip, del->br_startoff, XFS_COW_FORK, &eof, | ||
5205 | &eidx, &got, &new); | ||
5206 | |||
5207 | ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); ifp = ifp; | ||
5208 | ASSERT((eidx >= 0) && (eidx < ifp->if_bytes / | ||
5209 | (uint)sizeof(xfs_bmbt_rec_t))); | ||
5210 | ASSERT(del->br_blockcount > 0); | ||
5211 | ASSERT(got.br_startoff <= del->br_startoff); | ||
5212 | del_endoff = del->br_startoff + del->br_blockcount; | ||
5213 | got_endoff = got.br_startoff + got.br_blockcount; | ||
5214 | ASSERT(got_endoff >= del_endoff); | ||
5215 | delay = isnullstartblock(got.br_startblock); | ||
5216 | ASSERT(isnullstartblock(del->br_startblock) == delay); | ||
5217 | qfield = 0; | ||
5218 | error = 0; | ||
5219 | /* | ||
5220 | * If deleting a real allocation, must free up the disk space. | ||
5221 | */ | ||
5222 | if (!delay) { | ||
5223 | nblks = del->br_blockcount; | ||
5224 | qfield = XFS_TRANS_DQ_BCOUNT; | ||
5225 | /* | ||
5226 | * Set up del_endblock and cur for later. | ||
5227 | */ | ||
5228 | del_endblock = del->br_startblock + del->br_blockcount; | ||
5229 | da_old = da_new = 0; | ||
5230 | } else { | ||
5231 | da_old = startblockval(got.br_startblock); | ||
5232 | da_new = 0; | ||
5233 | nblks = 0; | ||
5234 | } | ||
5235 | qfield = qfield; | ||
5236 | nblks = nblks; | ||
5237 | |||
5238 | /* | ||
5239 | * Set flag value to use in switch statement. | ||
5240 | * Left-contig is 2, right-contig is 1. | ||
5241 | */ | ||
5242 | switch (((got.br_startoff == del->br_startoff) << 1) | | ||
5243 | (got_endoff == del_endoff)) { | ||
5244 | case 3: | ||
5245 | /* | ||
5246 | * Matches the whole extent. Delete the entry. | ||
5247 | */ | ||
5248 | xfs_iext_remove(ip, eidx, 1, BMAP_COWFORK); | ||
5249 | --eidx; | ||
5250 | break; | ||
5251 | |||
5252 | case 2: | ||
5253 | /* | ||
5254 | * Deleting the first part of the extent. | ||
5255 | */ | ||
5256 | trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_); | ||
5257 | xfs_bmbt_set_startoff(ep, del_endoff); | ||
5258 | temp = got.br_blockcount - del->br_blockcount; | ||
5259 | xfs_bmbt_set_blockcount(ep, temp); | ||
5260 | if (delay) { | ||
5261 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | ||
5262 | da_old); | ||
5263 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | ||
5264 | trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); | ||
5265 | da_new = temp; | ||
5266 | break; | ||
5267 | } | ||
5268 | xfs_bmbt_set_startblock(ep, del_endblock); | ||
5269 | trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); | ||
5270 | break; | ||
5271 | |||
5272 | case 1: | ||
5273 | /* | ||
5274 | * Deleting the last part of the extent. | ||
5275 | */ | ||
5276 | temp = got.br_blockcount - del->br_blockcount; | ||
5277 | trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_); | ||
5278 | xfs_bmbt_set_blockcount(ep, temp); | ||
5279 | if (delay) { | ||
5280 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | ||
5281 | da_old); | ||
5282 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | ||
5283 | trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); | ||
5284 | da_new = temp; | ||
5285 | break; | ||
5286 | } | ||
5287 | trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); | ||
5288 | break; | ||
5289 | |||
5290 | case 0: | ||
5291 | /* | ||
5292 | * Deleting the middle of the extent. | ||
5293 | */ | ||
5294 | temp = del->br_startoff - got.br_startoff; | ||
5295 | trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_); | ||
5296 | xfs_bmbt_set_blockcount(ep, temp); | ||
5297 | new.br_startoff = del_endoff; | ||
5298 | temp2 = got_endoff - del_endoff; | ||
5299 | new.br_blockcount = temp2; | ||
5300 | new.br_state = got.br_state; | ||
5301 | if (!delay) { | ||
5302 | new.br_startblock = del_endblock; | ||
5303 | } else { | ||
5304 | temp = xfs_bmap_worst_indlen(ip, temp); | ||
5305 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | ||
5306 | temp2 = xfs_bmap_worst_indlen(ip, temp2); | ||
5307 | new.br_startblock = nullstartblock((int)temp2); | ||
5308 | da_new = temp + temp2; | ||
5309 | while (da_new > da_old) { | ||
5310 | if (temp) { | ||
5311 | temp--; | ||
5312 | da_new--; | ||
5313 | xfs_bmbt_set_startblock(ep, | ||
5314 | nullstartblock((int)temp)); | ||
5315 | } | ||
5316 | if (da_new == da_old) | ||
5317 | break; | ||
5318 | if (temp2) { | ||
5319 | temp2--; | ||
5320 | da_new--; | ||
5321 | new.br_startblock = | ||
5322 | nullstartblock((int)temp2); | ||
5323 | } | ||
5324 | } | ||
5325 | } | ||
5326 | trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); | ||
5327 | xfs_iext_insert(ip, eidx + 1, 1, &new, state); | ||
5328 | ++eidx; | ||
5329 | break; | ||
5330 | } | ||
5331 | |||
5332 | /* | ||
5333 | * Account for change in delayed indirect blocks. | ||
5334 | * Nothing to do for disk quota accounting here. | ||
5335 | */ | ||
5336 | ASSERT(da_old >= da_new); | ||
5337 | if (da_old > da_new) | ||
5338 | xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false); | ||
5339 | |||
5340 | return error; | ||
5341 | } | ||
5342 | |||
5343 | /* | 5417 | /* |
5344 | * Unmap (remove) blocks from a file. | 5418 | * Unmap (remove) blocks from a file. |
5345 | * If nexts is nonzero then the number of extents to remove is limited to | 5419 | * If nexts is nonzero then the number of extents to remove is limited to |
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index f97db7132564..7cae6ec27fa6 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h | |||
@@ -190,6 +190,8 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, | |||
190 | #define XFS_BMAP_TRACE_EXLIST(ip,c,w) | 190 | #define XFS_BMAP_TRACE_EXLIST(ip,c,w) |
191 | #endif | 191 | #endif |
192 | 192 | ||
193 | void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, | ||
194 | xfs_filblks_t len); | ||
193 | int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); | 195 | int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); |
194 | void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); | 196 | void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); |
195 | void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops, | 197 | void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops, |
@@ -221,7 +223,11 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, | |||
221 | xfs_fileoff_t bno, xfs_filblks_t len, int flags, | 223 | xfs_fileoff_t bno, xfs_filblks_t len, int flags, |
222 | xfs_extnum_t nexts, xfs_fsblock_t *firstblock, | 224 | xfs_extnum_t nexts, xfs_fsblock_t *firstblock, |
223 | struct xfs_defer_ops *dfops, int *done); | 225 | struct xfs_defer_ops *dfops, int *done); |
224 | int xfs_bunmapi_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *del); | 226 | int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork, |
227 | xfs_extnum_t *idx, struct xfs_bmbt_irec *got, | ||
228 | struct xfs_bmbt_irec *del); | ||
229 | void xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx, | ||
230 | struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del); | ||
225 | int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, | 231 | int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, |
226 | xfs_extnum_t num); | 232 | xfs_extnum_t num); |
227 | uint xfs_default_attroffset(struct xfs_inode *ip); | 233 | uint xfs_default_attroffset(struct xfs_inode *ip); |
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 5c8e6f2ce44f..0e80993c8a59 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c | |||
@@ -4826,7 +4826,7 @@ xfs_btree_calc_size( | |||
4826 | return rval; | 4826 | return rval; |
4827 | } | 4827 | } |
4828 | 4828 | ||
4829 | int | 4829 | static int |
4830 | xfs_btree_count_blocks_helper( | 4830 | xfs_btree_count_blocks_helper( |
4831 | struct xfs_btree_cur *cur, | 4831 | struct xfs_btree_cur *cur, |
4832 | int level, | 4832 | int level, |
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index 3cc3cf767474..ac9a003dd29a 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c | |||
@@ -191,8 +191,7 @@ xfs_dquot_buf_verify_crc( | |||
191 | if (mp->m_quotainfo) | 191 | if (mp->m_quotainfo) |
192 | ndquots = mp->m_quotainfo->qi_dqperchunk; | 192 | ndquots = mp->m_quotainfo->qi_dqperchunk; |
193 | else | 193 | else |
194 | ndquots = xfs_calc_dquots_per_chunk( | 194 | ndquots = xfs_calc_dquots_per_chunk(bp->b_length); |
195 | XFS_BB_TO_FSB(mp, bp->b_length)); | ||
196 | 195 | ||
197 | for (i = 0; i < ndquots; i++, d++) { | 196 | for (i = 0; i < ndquots; i++, d++) { |
198 | if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk), | 197 | if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk), |
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index f6547fc5e016..6b7579e7b60a 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h | |||
@@ -865,7 +865,6 @@ typedef struct xfs_timestamp { | |||
865 | * padding field for v3 inodes. | 865 | * padding field for v3 inodes. |
866 | */ | 866 | */ |
867 | #define XFS_DINODE_MAGIC 0x494e /* 'IN' */ | 867 | #define XFS_DINODE_MAGIC 0x494e /* 'IN' */ |
868 | #define XFS_DINODE_GOOD_VERSION(v) ((v) >= 1 && (v) <= 3) | ||
869 | typedef struct xfs_dinode { | 868 | typedef struct xfs_dinode { |
870 | __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ | 869 | __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ |
871 | __be16 di_mode; /* mode and type of file */ | 870 | __be16 di_mode; /* mode and type of file */ |
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 8de9a3a29589..134424fac434 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c | |||
@@ -57,6 +57,17 @@ xfs_inobp_check( | |||
57 | } | 57 | } |
58 | #endif | 58 | #endif |
59 | 59 | ||
60 | bool | ||
61 | xfs_dinode_good_version( | ||
62 | struct xfs_mount *mp, | ||
63 | __u8 version) | ||
64 | { | ||
65 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
66 | return version == 3; | ||
67 | |||
68 | return version == 1 || version == 2; | ||
69 | } | ||
70 | |||
60 | /* | 71 | /* |
61 | * If we are doing readahead on an inode buffer, we might be in log recovery | 72 | * If we are doing readahead on an inode buffer, we might be in log recovery |
62 | * reading an inode allocation buffer that hasn't yet been replayed, and hence | 73 | * reading an inode allocation buffer that hasn't yet been replayed, and hence |
@@ -91,7 +102,7 @@ xfs_inode_buf_verify( | |||
91 | 102 | ||
92 | dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); | 103 | dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); |
93 | di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && | 104 | di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && |
94 | XFS_DINODE_GOOD_VERSION(dip->di_version); | 105 | xfs_dinode_good_version(mp, dip->di_version); |
95 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, | 106 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, |
96 | XFS_ERRTAG_ITOBP_INOTOBP, | 107 | XFS_ERRTAG_ITOBP_INOTOBP, |
97 | XFS_RANDOM_ITOBP_INOTOBP))) { | 108 | XFS_RANDOM_ITOBP_INOTOBP))) { |
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index 62d9d4681c8c..3cfe12a4f58a 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h | |||
@@ -74,6 +74,8 @@ void xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from); | |||
74 | void xfs_log_dinode_to_disk(struct xfs_log_dinode *from, | 74 | void xfs_log_dinode_to_disk(struct xfs_log_dinode *from, |
75 | struct xfs_dinode *to); | 75 | struct xfs_dinode *to); |
76 | 76 | ||
77 | bool xfs_dinode_good_version(struct xfs_mount *mp, __u8 version); | ||
78 | |||
77 | #if defined(DEBUG) | 79 | #if defined(DEBUG) |
78 | void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); | 80 | void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); |
79 | #else | 81 | #else |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index a314fc7b56fa..6e4f7f900fea 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -249,6 +249,7 @@ xfs_file_dio_aio_read( | |||
249 | struct xfs_inode *ip = XFS_I(inode); | 249 | struct xfs_inode *ip = XFS_I(inode); |
250 | loff_t isize = i_size_read(inode); | 250 | loff_t isize = i_size_read(inode); |
251 | size_t count = iov_iter_count(to); | 251 | size_t count = iov_iter_count(to); |
252 | loff_t end = iocb->ki_pos + count - 1; | ||
252 | struct iov_iter data; | 253 | struct iov_iter data; |
253 | struct xfs_buftarg *target; | 254 | struct xfs_buftarg *target; |
254 | ssize_t ret = 0; | 255 | ssize_t ret = 0; |
@@ -272,49 +273,21 @@ xfs_file_dio_aio_read( | |||
272 | 273 | ||
273 | file_accessed(iocb->ki_filp); | 274 | file_accessed(iocb->ki_filp); |
274 | 275 | ||
275 | /* | ||
276 | * Locking is a bit tricky here. If we take an exclusive lock for direct | ||
277 | * IO, we effectively serialise all new concurrent read IO to this file | ||
278 | * and block it behind IO that is currently in progress because IO in | ||
279 | * progress holds the IO lock shared. We only need to hold the lock | ||
280 | * exclusive to blow away the page cache, so only take lock exclusively | ||
281 | * if the page cache needs invalidation. This allows the normal direct | ||
282 | * IO case of no page cache pages to proceeed concurrently without | ||
283 | * serialisation. | ||
284 | */ | ||
285 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); | 276 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); |
286 | if (mapping->nrpages) { | 277 | if (mapping->nrpages) { |
287 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); | 278 | ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end); |
288 | xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); | 279 | if (ret) |
280 | goto out_unlock; | ||
289 | 281 | ||
290 | /* | 282 | /* |
291 | * The generic dio code only flushes the range of the particular | 283 | * Invalidate whole pages. This can return an error if we fail |
292 | * I/O. Because we take an exclusive lock here, this whole | 284 | * to invalidate a page, but this should never happen on XFS. |
293 | * sequence is considerably more expensive for us. This has a | 285 | * Warn if it does fail. |
294 | * noticeable performance impact for any file with cached pages, | ||
295 | * even when outside of the range of the particular I/O. | ||
296 | * | ||
297 | * Hence, amortize the cost of the lock against a full file | ||
298 | * flush and reduce the chances of repeated iolock cycles going | ||
299 | * forward. | ||
300 | */ | 286 | */ |
301 | if (mapping->nrpages) { | 287 | ret = invalidate_inode_pages2_range(mapping, |
302 | ret = filemap_write_and_wait(mapping); | 288 | iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); |
303 | if (ret) { | 289 | WARN_ON_ONCE(ret); |
304 | xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); | 290 | ret = 0; |
305 | return ret; | ||
306 | } | ||
307 | |||
308 | /* | ||
309 | * Invalidate whole pages. This can return an error if | ||
310 | * we fail to invalidate a page, but this should never | ||
311 | * happen on XFS. Warn if it does fail. | ||
312 | */ | ||
313 | ret = invalidate_inode_pages2(mapping); | ||
314 | WARN_ON_ONCE(ret); | ||
315 | ret = 0; | ||
316 | } | ||
317 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); | ||
318 | } | 291 | } |
319 | 292 | ||
320 | data = *to; | 293 | data = *to; |
@@ -324,8 +297,9 @@ xfs_file_dio_aio_read( | |||
324 | iocb->ki_pos += ret; | 297 | iocb->ki_pos += ret; |
325 | iov_iter_advance(to, ret); | 298 | iov_iter_advance(to, ret); |
326 | } | 299 | } |
327 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); | ||
328 | 300 | ||
301 | out_unlock: | ||
302 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); | ||
329 | return ret; | 303 | return ret; |
330 | } | 304 | } |
331 | 305 | ||
@@ -570,61 +544,49 @@ xfs_file_dio_aio_write( | |||
570 | if ((iocb->ki_pos | count) & target->bt_logical_sectormask) | 544 | if ((iocb->ki_pos | count) & target->bt_logical_sectormask) |
571 | return -EINVAL; | 545 | return -EINVAL; |
572 | 546 | ||
573 | /* "unaligned" here means not aligned to a filesystem block */ | ||
574 | if ((iocb->ki_pos & mp->m_blockmask) || | ||
575 | ((iocb->ki_pos + count) & mp->m_blockmask)) | ||
576 | unaligned_io = 1; | ||
577 | |||
578 | /* | 547 | /* |
579 | * We don't need to take an exclusive lock unless there page cache needs | 548 | * Don't take the exclusive iolock here unless the I/O is unaligned to |
580 | * to be invalidated or unaligned IO is being executed. We don't need to | 549 | * the file system block size. We don't need to consider the EOF |
581 | * consider the EOF extension case here because | 550 | * extension case here because xfs_file_aio_write_checks() will relock |
582 | * xfs_file_aio_write_checks() will relock the inode as necessary for | 551 | * the inode as necessary for EOF zeroing cases and fill out the new |
583 | * EOF zeroing cases and fill out the new inode size as appropriate. | 552 | * inode size as appropriate. |
584 | */ | 553 | */ |
585 | if (unaligned_io || mapping->nrpages) | 554 | if ((iocb->ki_pos & mp->m_blockmask) || |
555 | ((iocb->ki_pos + count) & mp->m_blockmask)) { | ||
556 | unaligned_io = 1; | ||
586 | iolock = XFS_IOLOCK_EXCL; | 557 | iolock = XFS_IOLOCK_EXCL; |
587 | else | 558 | } else { |
588 | iolock = XFS_IOLOCK_SHARED; | 559 | iolock = XFS_IOLOCK_SHARED; |
589 | xfs_rw_ilock(ip, iolock); | ||
590 | |||
591 | /* | ||
592 | * Recheck if there are cached pages that need invalidate after we got | ||
593 | * the iolock to protect against other threads adding new pages while | ||
594 | * we were waiting for the iolock. | ||
595 | */ | ||
596 | if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) { | ||
597 | xfs_rw_iunlock(ip, iolock); | ||
598 | iolock = XFS_IOLOCK_EXCL; | ||
599 | xfs_rw_ilock(ip, iolock); | ||
600 | } | 560 | } |
601 | 561 | ||
562 | xfs_rw_ilock(ip, iolock); | ||
563 | |||
602 | ret = xfs_file_aio_write_checks(iocb, from, &iolock); | 564 | ret = xfs_file_aio_write_checks(iocb, from, &iolock); |
603 | if (ret) | 565 | if (ret) |
604 | goto out; | 566 | goto out; |
605 | count = iov_iter_count(from); | 567 | count = iov_iter_count(from); |
606 | end = iocb->ki_pos + count - 1; | 568 | end = iocb->ki_pos + count - 1; |
607 | 569 | ||
608 | /* | ||
609 | * See xfs_file_dio_aio_read() for why we do a full-file flush here. | ||
610 | */ | ||
611 | if (mapping->nrpages) { | 570 | if (mapping->nrpages) { |
612 | ret = filemap_write_and_wait(VFS_I(ip)->i_mapping); | 571 | ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end); |
613 | if (ret) | 572 | if (ret) |
614 | goto out; | 573 | goto out; |
574 | |||
615 | /* | 575 | /* |
616 | * Invalidate whole pages. This can return an error if we fail | 576 | * Invalidate whole pages. This can return an error if we fail |
617 | * to invalidate a page, but this should never happen on XFS. | 577 | * to invalidate a page, but this should never happen on XFS. |
618 | * Warn if it does fail. | 578 | * Warn if it does fail. |
619 | */ | 579 | */ |
620 | ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping); | 580 | ret = invalidate_inode_pages2_range(mapping, |
581 | iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); | ||
621 | WARN_ON_ONCE(ret); | 582 | WARN_ON_ONCE(ret); |
622 | ret = 0; | 583 | ret = 0; |
623 | } | 584 | } |
624 | 585 | ||
625 | /* | 586 | /* |
626 | * If we are doing unaligned IO, wait for all other IO to drain, | 587 | * If we are doing unaligned IO, wait for all other IO to drain, |
627 | * otherwise demote the lock if we had to flush cached pages | 588 | * otherwise demote the lock if we had to take the exclusive lock |
589 | * for other reasons in xfs_file_aio_write_checks. | ||
628 | */ | 590 | */ |
629 | if (unaligned_io) | 591 | if (unaligned_io) |
630 | inode_dio_wait(inode); | 592 | inode_dio_wait(inode); |
@@ -947,134 +909,6 @@ out_unlock: | |||
947 | return error; | 909 | return error; |
948 | } | 910 | } |
949 | 911 | ||
950 | /* | ||
951 | * Flush all file writes out to disk. | ||
952 | */ | ||
953 | static int | ||
954 | xfs_file_wait_for_io( | ||
955 | struct inode *inode, | ||
956 | loff_t offset, | ||
957 | size_t len) | ||
958 | { | ||
959 | loff_t rounding; | ||
960 | loff_t ioffset; | ||
961 | loff_t iendoffset; | ||
962 | loff_t bs; | ||
963 | int ret; | ||
964 | |||
965 | bs = inode->i_sb->s_blocksize; | ||
966 | inode_dio_wait(inode); | ||
967 | |||
968 | rounding = max_t(xfs_off_t, bs, PAGE_SIZE); | ||
969 | ioffset = round_down(offset, rounding); | ||
970 | iendoffset = round_up(offset + len, rounding) - 1; | ||
971 | ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, | ||
972 | iendoffset); | ||
973 | return ret; | ||
974 | } | ||
975 | |||
976 | /* Hook up to the VFS reflink function */ | ||
977 | STATIC int | ||
978 | xfs_file_share_range( | ||
979 | struct file *file_in, | ||
980 | loff_t pos_in, | ||
981 | struct file *file_out, | ||
982 | loff_t pos_out, | ||
983 | u64 len, | ||
984 | bool is_dedupe) | ||
985 | { | ||
986 | struct inode *inode_in; | ||
987 | struct inode *inode_out; | ||
988 | ssize_t ret; | ||
989 | loff_t bs; | ||
990 | loff_t isize; | ||
991 | int same_inode; | ||
992 | loff_t blen; | ||
993 | unsigned int flags = 0; | ||
994 | |||
995 | inode_in = file_inode(file_in); | ||
996 | inode_out = file_inode(file_out); | ||
997 | bs = inode_out->i_sb->s_blocksize; | ||
998 | |||
999 | /* Don't touch certain kinds of inodes */ | ||
1000 | if (IS_IMMUTABLE(inode_out)) | ||
1001 | return -EPERM; | ||
1002 | if (IS_SWAPFILE(inode_in) || | ||
1003 | IS_SWAPFILE(inode_out)) | ||
1004 | return -ETXTBSY; | ||
1005 | |||
1006 | /* Reflink only works within this filesystem. */ | ||
1007 | if (inode_in->i_sb != inode_out->i_sb) | ||
1008 | return -EXDEV; | ||
1009 | same_inode = (inode_in->i_ino == inode_out->i_ino); | ||
1010 | |||
1011 | /* Don't reflink dirs, pipes, sockets... */ | ||
1012 | if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) | ||
1013 | return -EISDIR; | ||
1014 | if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode)) | ||
1015 | return -EINVAL; | ||
1016 | if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) | ||
1017 | return -EINVAL; | ||
1018 | |||
1019 | /* Don't share DAX file data for now. */ | ||
1020 | if (IS_DAX(inode_in) || IS_DAX(inode_out)) | ||
1021 | return -EINVAL; | ||
1022 | |||
1023 | /* Are we going all the way to the end? */ | ||
1024 | isize = i_size_read(inode_in); | ||
1025 | if (isize == 0) | ||
1026 | return 0; | ||
1027 | if (len == 0) | ||
1028 | len = isize - pos_in; | ||
1029 | |||
1030 | /* Ensure offsets don't wrap and the input is inside i_size */ | ||
1031 | if (pos_in + len < pos_in || pos_out + len < pos_out || | ||
1032 | pos_in + len > isize) | ||
1033 | return -EINVAL; | ||
1034 | |||
1035 | /* Don't allow dedupe past EOF in the dest file */ | ||
1036 | if (is_dedupe) { | ||
1037 | loff_t disize; | ||
1038 | |||
1039 | disize = i_size_read(inode_out); | ||
1040 | if (pos_out >= disize || pos_out + len > disize) | ||
1041 | return -EINVAL; | ||
1042 | } | ||
1043 | |||
1044 | /* If we're linking to EOF, continue to the block boundary. */ | ||
1045 | if (pos_in + len == isize) | ||
1046 | blen = ALIGN(isize, bs) - pos_in; | ||
1047 | else | ||
1048 | blen = len; | ||
1049 | |||
1050 | /* Only reflink if we're aligned to block boundaries */ | ||
1051 | if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) || | ||
1052 | !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs)) | ||
1053 | return -EINVAL; | ||
1054 | |||
1055 | /* Don't allow overlapped reflink within the same file */ | ||
1056 | if (same_inode && pos_out + blen > pos_in && pos_out < pos_in + blen) | ||
1057 | return -EINVAL; | ||
1058 | |||
1059 | /* Wait for the completion of any pending IOs on srcfile */ | ||
1060 | ret = xfs_file_wait_for_io(inode_in, pos_in, len); | ||
1061 | if (ret) | ||
1062 | goto out; | ||
1063 | ret = xfs_file_wait_for_io(inode_out, pos_out, len); | ||
1064 | if (ret) | ||
1065 | goto out; | ||
1066 | |||
1067 | if (is_dedupe) | ||
1068 | flags |= XFS_REFLINK_DEDUPE; | ||
1069 | ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out), | ||
1070 | pos_out, len, flags); | ||
1071 | if (ret < 0) | ||
1072 | goto out; | ||
1073 | |||
1074 | out: | ||
1075 | return ret; | ||
1076 | } | ||
1077 | |||
1078 | STATIC ssize_t | 912 | STATIC ssize_t |
1079 | xfs_file_copy_range( | 913 | xfs_file_copy_range( |
1080 | struct file *file_in, | 914 | struct file *file_in, |
@@ -1086,7 +920,7 @@ xfs_file_copy_range( | |||
1086 | { | 920 | { |
1087 | int error; | 921 | int error; |
1088 | 922 | ||
1089 | error = xfs_file_share_range(file_in, pos_in, file_out, pos_out, | 923 | error = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, |
1090 | len, false); | 924 | len, false); |
1091 | if (error) | 925 | if (error) |
1092 | return error; | 926 | return error; |
@@ -1101,7 +935,7 @@ xfs_file_clone_range( | |||
1101 | loff_t pos_out, | 935 | loff_t pos_out, |
1102 | u64 len) | 936 | u64 len) |
1103 | { | 937 | { |
1104 | return xfs_file_share_range(file_in, pos_in, file_out, pos_out, | 938 | return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, |
1105 | len, false); | 939 | len, false); |
1106 | } | 940 | } |
1107 | 941 | ||
@@ -1124,7 +958,7 @@ xfs_file_dedupe_range( | |||
1124 | if (len > XFS_MAX_DEDUPE_LEN) | 958 | if (len > XFS_MAX_DEDUPE_LEN) |
1125 | len = XFS_MAX_DEDUPE_LEN; | 959 | len = XFS_MAX_DEDUPE_LEN; |
1126 | 960 | ||
1127 | error = xfs_file_share_range(src_file, loff, dst_file, dst_loff, | 961 | error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff, |
1128 | len, true); | 962 | len, true); |
1129 | if (error) | 963 | if (error) |
1130 | return error; | 964 | return error; |
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 14796b744e0a..f295049db681 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c | |||
@@ -1656,9 +1656,9 @@ void | |||
1656 | xfs_inode_set_cowblocks_tag( | 1656 | xfs_inode_set_cowblocks_tag( |
1657 | xfs_inode_t *ip) | 1657 | xfs_inode_t *ip) |
1658 | { | 1658 | { |
1659 | trace_xfs_inode_set_eofblocks_tag(ip); | 1659 | trace_xfs_inode_set_cowblocks_tag(ip); |
1660 | return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks, | 1660 | return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks, |
1661 | trace_xfs_perag_set_eofblocks, | 1661 | trace_xfs_perag_set_cowblocks, |
1662 | XFS_ICI_COWBLOCKS_TAG); | 1662 | XFS_ICI_COWBLOCKS_TAG); |
1663 | } | 1663 | } |
1664 | 1664 | ||
@@ -1666,7 +1666,7 @@ void | |||
1666 | xfs_inode_clear_cowblocks_tag( | 1666 | xfs_inode_clear_cowblocks_tag( |
1667 | xfs_inode_t *ip) | 1667 | xfs_inode_t *ip) |
1668 | { | 1668 | { |
1669 | trace_xfs_inode_clear_eofblocks_tag(ip); | 1669 | trace_xfs_inode_clear_cowblocks_tag(ip); |
1670 | return __xfs_inode_clear_eofblocks_tag(ip, | 1670 | return __xfs_inode_clear_eofblocks_tag(ip, |
1671 | trace_xfs_perag_clear_eofblocks, XFS_ICI_COWBLOCKS_TAG); | 1671 | trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG); |
1672 | } | 1672 | } |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index d907eb9f8ef3..436e109bb01e 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -566,6 +566,17 @@ xfs_file_iomap_begin_delay( | |||
566 | xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx, | 566 | xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx, |
567 | &got, &prev); | 567 | &got, &prev); |
568 | if (!eof && got.br_startoff <= offset_fsb) { | 568 | if (!eof && got.br_startoff <= offset_fsb) { |
569 | if (xfs_is_reflink_inode(ip)) { | ||
570 | bool shared; | ||
571 | |||
572 | end_fsb = min(XFS_B_TO_FSB(mp, offset + count), | ||
573 | maxbytes_fsb); | ||
574 | xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb); | ||
575 | error = xfs_reflink_reserve_cow(ip, &got, &shared); | ||
576 | if (error) | ||
577 | goto out_unlock; | ||
578 | } | ||
579 | |||
569 | trace_xfs_iomap_found(ip, offset, count, 0, &got); | 580 | trace_xfs_iomap_found(ip, offset, count, 0, &got); |
570 | goto done; | 581 | goto done; |
571 | } | 582 | } |
@@ -961,19 +972,13 @@ xfs_file_iomap_begin( | |||
961 | struct xfs_mount *mp = ip->i_mount; | 972 | struct xfs_mount *mp = ip->i_mount; |
962 | struct xfs_bmbt_irec imap; | 973 | struct xfs_bmbt_irec imap; |
963 | xfs_fileoff_t offset_fsb, end_fsb; | 974 | xfs_fileoff_t offset_fsb, end_fsb; |
964 | bool shared, trimmed; | ||
965 | int nimaps = 1, error = 0; | 975 | int nimaps = 1, error = 0; |
976 | bool shared = false, trimmed = false; | ||
966 | unsigned lockmode; | 977 | unsigned lockmode; |
967 | 978 | ||
968 | if (XFS_FORCED_SHUTDOWN(mp)) | 979 | if (XFS_FORCED_SHUTDOWN(mp)) |
969 | return -EIO; | 980 | return -EIO; |
970 | 981 | ||
971 | if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { | ||
972 | error = xfs_reflink_reserve_cow_range(ip, offset, length); | ||
973 | if (error < 0) | ||
974 | return error; | ||
975 | } | ||
976 | |||
977 | if ((flags & IOMAP_WRITE) && !IS_DAX(inode) && | 982 | if ((flags & IOMAP_WRITE) && !IS_DAX(inode) && |
978 | !xfs_get_extsz_hint(ip)) { | 983 | !xfs_get_extsz_hint(ip)) { |
979 | /* Reserve delalloc blocks for regular writeback. */ | 984 | /* Reserve delalloc blocks for regular writeback. */ |
@@ -981,7 +986,16 @@ xfs_file_iomap_begin( | |||
981 | iomap); | 986 | iomap); |
982 | } | 987 | } |
983 | 988 | ||
984 | lockmode = xfs_ilock_data_map_shared(ip); | 989 | /* |
990 | * COW writes will allocate delalloc space, so we need to make sure | ||
991 | * to take the lock exclusively here. | ||
992 | */ | ||
993 | if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { | ||
994 | lockmode = XFS_ILOCK_EXCL; | ||
995 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
996 | } else { | ||
997 | lockmode = xfs_ilock_data_map_shared(ip); | ||
998 | } | ||
985 | 999 | ||
986 | ASSERT(offset <= mp->m_super->s_maxbytes); | 1000 | ASSERT(offset <= mp->m_super->s_maxbytes); |
987 | if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) | 1001 | if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) |
@@ -991,16 +1005,24 @@ xfs_file_iomap_begin( | |||
991 | 1005 | ||
992 | error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, | 1006 | error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, |
993 | &nimaps, 0); | 1007 | &nimaps, 0); |
994 | if (error) { | 1008 | if (error) |
995 | xfs_iunlock(ip, lockmode); | 1009 | goto out_unlock; |
996 | return error; | 1010 | |
1011 | if (flags & IOMAP_REPORT) { | ||
1012 | /* Trim the mapping to the nearest shared extent boundary. */ | ||
1013 | error = xfs_reflink_trim_around_shared(ip, &imap, &shared, | ||
1014 | &trimmed); | ||
1015 | if (error) | ||
1016 | goto out_unlock; | ||
997 | } | 1017 | } |
998 | 1018 | ||
999 | /* Trim the mapping to the nearest shared extent boundary. */ | 1019 | if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { |
1000 | error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed); | 1020 | error = xfs_reflink_reserve_cow(ip, &imap, &shared); |
1001 | if (error) { | 1021 | if (error) |
1002 | xfs_iunlock(ip, lockmode); | 1022 | goto out_unlock; |
1003 | return error; | 1023 | |
1024 | end_fsb = imap.br_startoff + imap.br_blockcount; | ||
1025 | length = XFS_FSB_TO_B(mp, end_fsb) - offset; | ||
1004 | } | 1026 | } |
1005 | 1027 | ||
1006 | if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) { | 1028 | if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) { |
@@ -1039,6 +1061,9 @@ xfs_file_iomap_begin( | |||
1039 | if (shared) | 1061 | if (shared) |
1040 | iomap->flags |= IOMAP_F_SHARED; | 1062 | iomap->flags |= IOMAP_F_SHARED; |
1041 | return 0; | 1063 | return 0; |
1064 | out_unlock: | ||
1065 | xfs_iunlock(ip, lockmode); | ||
1066 | return error; | ||
1042 | } | 1067 | } |
1043 | 1068 | ||
1044 | static int | 1069 | static int |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index fc7873942bea..b341f10cf481 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -1009,6 +1009,7 @@ xfs_mountfs( | |||
1009 | out_quota: | 1009 | out_quota: |
1010 | xfs_qm_unmount_quotas(mp); | 1010 | xfs_qm_unmount_quotas(mp); |
1011 | out_rtunmount: | 1011 | out_rtunmount: |
1012 | mp->m_super->s_flags &= ~MS_ACTIVE; | ||
1012 | xfs_rtunmount_inodes(mp); | 1013 | xfs_rtunmount_inodes(mp); |
1013 | out_rele_rip: | 1014 | out_rele_rip: |
1014 | IRELE(rip); | 1015 | IRELE(rip); |
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 5965e9455d91..a279b4e7f5fe 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c | |||
@@ -182,7 +182,8 @@ xfs_reflink_trim_around_shared( | |||
182 | if (!xfs_is_reflink_inode(ip) || | 182 | if (!xfs_is_reflink_inode(ip) || |
183 | ISUNWRITTEN(irec) || | 183 | ISUNWRITTEN(irec) || |
184 | irec->br_startblock == HOLESTARTBLOCK || | 184 | irec->br_startblock == HOLESTARTBLOCK || |
185 | irec->br_startblock == DELAYSTARTBLOCK) { | 185 | irec->br_startblock == DELAYSTARTBLOCK || |
186 | isnullstartblock(irec->br_startblock)) { | ||
186 | *shared = false; | 187 | *shared = false; |
187 | return 0; | 188 | return 0; |
188 | } | 189 | } |
@@ -227,50 +228,54 @@ xfs_reflink_trim_around_shared( | |||
227 | } | 228 | } |
228 | } | 229 | } |
229 | 230 | ||
230 | /* Create a CoW reservation for a range of blocks within a file. */ | 231 | /* |
231 | static int | 232 | * Trim the passed in imap to the next shared/unshared extent boundary, and |
232 | __xfs_reflink_reserve_cow( | 233 | * if imap->br_startoff points to a shared extent reserve space for it in the |
234 | * COW fork. In this case *shared is set to true, else to false. | ||
235 | * | ||
236 | * Note that imap will always contain the block numbers for the existing blocks | ||
237 | * in the data fork, as the upper layers need them for read-modify-write | ||
238 | * operations. | ||
239 | */ | ||
240 | int | ||
241 | xfs_reflink_reserve_cow( | ||
233 | struct xfs_inode *ip, | 242 | struct xfs_inode *ip, |
234 | xfs_fileoff_t *offset_fsb, | 243 | struct xfs_bmbt_irec *imap, |
235 | xfs_fileoff_t end_fsb, | 244 | bool *shared) |
236 | bool *skipped) | ||
237 | { | 245 | { |
238 | struct xfs_bmbt_irec got, prev, imap; | 246 | struct xfs_bmbt_irec got, prev; |
239 | xfs_fileoff_t orig_end_fsb; | 247 | xfs_fileoff_t end_fsb, orig_end_fsb; |
240 | int nimaps, eof = 0, error = 0; | 248 | int eof = 0, error = 0; |
241 | bool shared = false, trimmed = false; | 249 | bool trimmed; |
242 | xfs_extnum_t idx; | 250 | xfs_extnum_t idx; |
243 | xfs_extlen_t align; | 251 | xfs_extlen_t align; |
244 | 252 | ||
245 | /* Already reserved? Skip the refcount btree access. */ | 253 | /* |
246 | xfs_bmap_search_extents(ip, *offset_fsb, XFS_COW_FORK, &eof, &idx, | 254 | * Search the COW fork extent list first. This serves two purposes: |
255 | * first this implement the speculative preallocation using cowextisze, | ||
256 | * so that we also unshared block adjacent to shared blocks instead | ||
257 | * of just the shared blocks themselves. Second the lookup in the | ||
258 | * extent list is generally faster than going out to the shared extent | ||
259 | * tree. | ||
260 | */ | ||
261 | xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx, | ||
247 | &got, &prev); | 262 | &got, &prev); |
248 | if (!eof && got.br_startoff <= *offset_fsb) { | 263 | if (!eof && got.br_startoff <= imap->br_startoff) { |
249 | end_fsb = orig_end_fsb = got.br_startoff + got.br_blockcount; | 264 | trace_xfs_reflink_cow_found(ip, imap); |
250 | trace_xfs_reflink_cow_found(ip, &got); | 265 | xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); |
251 | goto done; | ||
252 | } | ||
253 | 266 | ||
254 | /* Read extent from the source file. */ | 267 | *shared = true; |
255 | nimaps = 1; | 268 | return 0; |
256 | error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, | 269 | } |
257 | &imap, &nimaps, 0); | ||
258 | if (error) | ||
259 | goto out_unlock; | ||
260 | ASSERT(nimaps == 1); | ||
261 | 270 | ||
262 | /* Trim the mapping to the nearest shared extent boundary. */ | 271 | /* Trim the mapping to the nearest shared extent boundary. */ |
263 | error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed); | 272 | error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed); |
264 | if (error) | 273 | if (error) |
265 | goto out_unlock; | 274 | return error; |
266 | |||
267 | end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount; | ||
268 | 275 | ||
269 | /* Not shared? Just report the (potentially capped) extent. */ | 276 | /* Not shared? Just report the (potentially capped) extent. */ |
270 | if (!shared) { | 277 | if (!*shared) |
271 | *skipped = true; | 278 | return 0; |
272 | goto done; | ||
273 | } | ||
274 | 279 | ||
275 | /* | 280 | /* |
276 | * Fork all the shared blocks from our write offset until the end of | 281 | * Fork all the shared blocks from our write offset until the end of |
@@ -278,72 +283,38 @@ __xfs_reflink_reserve_cow( | |||
278 | */ | 283 | */ |
279 | error = xfs_qm_dqattach_locked(ip, 0); | 284 | error = xfs_qm_dqattach_locked(ip, 0); |
280 | if (error) | 285 | if (error) |
281 | goto out_unlock; | 286 | return error; |
287 | |||
288 | end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount; | ||
282 | 289 | ||
283 | align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip)); | 290 | align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip)); |
284 | if (align) | 291 | if (align) |
285 | end_fsb = roundup_64(end_fsb, align); | 292 | end_fsb = roundup_64(end_fsb, align); |
286 | 293 | ||
287 | retry: | 294 | retry: |
288 | error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, *offset_fsb, | 295 | error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff, |
289 | end_fsb - *offset_fsb, &got, | 296 | end_fsb - imap->br_startoff, &got, &prev, &idx, eof); |
290 | &prev, &idx, eof); | ||
291 | switch (error) { | 297 | switch (error) { |
292 | case 0: | 298 | case 0: |
293 | break; | 299 | break; |
294 | case -ENOSPC: | 300 | case -ENOSPC: |
295 | case -EDQUOT: | 301 | case -EDQUOT: |
296 | /* retry without any preallocation */ | 302 | /* retry without any preallocation */ |
297 | trace_xfs_reflink_cow_enospc(ip, &imap); | 303 | trace_xfs_reflink_cow_enospc(ip, imap); |
298 | if (end_fsb != orig_end_fsb) { | 304 | if (end_fsb != orig_end_fsb) { |
299 | end_fsb = orig_end_fsb; | 305 | end_fsb = orig_end_fsb; |
300 | goto retry; | 306 | goto retry; |
301 | } | 307 | } |
302 | /*FALLTHRU*/ | 308 | /*FALLTHRU*/ |
303 | default: | 309 | default: |
304 | goto out_unlock; | 310 | return error; |
305 | } | 311 | } |
306 | 312 | ||
307 | if (end_fsb != orig_end_fsb) | 313 | if (end_fsb != orig_end_fsb) |
308 | xfs_inode_set_cowblocks_tag(ip); | 314 | xfs_inode_set_cowblocks_tag(ip); |
309 | 315 | ||
310 | trace_xfs_reflink_cow_alloc(ip, &got); | 316 | trace_xfs_reflink_cow_alloc(ip, &got); |
311 | done: | 317 | return 0; |
312 | *offset_fsb = end_fsb; | ||
313 | out_unlock: | ||
314 | return error; | ||
315 | } | ||
316 | |||
317 | /* Create a CoW reservation for part of a file. */ | ||
318 | int | ||
319 | xfs_reflink_reserve_cow_range( | ||
320 | struct xfs_inode *ip, | ||
321 | xfs_off_t offset, | ||
322 | xfs_off_t count) | ||
323 | { | ||
324 | struct xfs_mount *mp = ip->i_mount; | ||
325 | xfs_fileoff_t offset_fsb, end_fsb; | ||
326 | bool skipped = false; | ||
327 | int error; | ||
328 | |||
329 | trace_xfs_reflink_reserve_cow_range(ip, offset, count); | ||
330 | |||
331 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
332 | end_fsb = XFS_B_TO_FSB(mp, offset + count); | ||
333 | |||
334 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
335 | while (offset_fsb < end_fsb) { | ||
336 | error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb, | ||
337 | &skipped); | ||
338 | if (error) { | ||
339 | trace_xfs_reflink_reserve_cow_range_error(ip, error, | ||
340 | _RET_IP_); | ||
341 | break; | ||
342 | } | ||
343 | } | ||
344 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
345 | |||
346 | return error; | ||
347 | } | 318 | } |
348 | 319 | ||
349 | /* Allocate all CoW reservations covering a range of blocks in a file. */ | 320 | /* Allocate all CoW reservations covering a range of blocks in a file. */ |
@@ -358,9 +329,8 @@ __xfs_reflink_allocate_cow( | |||
358 | struct xfs_defer_ops dfops; | 329 | struct xfs_defer_ops dfops; |
359 | struct xfs_trans *tp; | 330 | struct xfs_trans *tp; |
360 | xfs_fsblock_t first_block; | 331 | xfs_fsblock_t first_block; |
361 | xfs_fileoff_t next_fsb; | ||
362 | int nimaps = 1, error; | 332 | int nimaps = 1, error; |
363 | bool skipped = false; | 333 | bool shared; |
364 | 334 | ||
365 | xfs_defer_init(&dfops, &first_block); | 335 | xfs_defer_init(&dfops, &first_block); |
366 | 336 | ||
@@ -371,33 +341,38 @@ __xfs_reflink_allocate_cow( | |||
371 | 341 | ||
372 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 342 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
373 | 343 | ||
374 | next_fsb = *offset_fsb; | 344 | /* Read extent from the source file. */ |
375 | error = __xfs_reflink_reserve_cow(ip, &next_fsb, end_fsb, &skipped); | 345 | nimaps = 1; |
346 | error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, | ||
347 | &imap, &nimaps, 0); | ||
348 | if (error) | ||
349 | goto out_unlock; | ||
350 | ASSERT(nimaps == 1); | ||
351 | |||
352 | error = xfs_reflink_reserve_cow(ip, &imap, &shared); | ||
376 | if (error) | 353 | if (error) |
377 | goto out_trans_cancel; | 354 | goto out_trans_cancel; |
378 | 355 | ||
379 | if (skipped) { | 356 | if (!shared) { |
380 | *offset_fsb = next_fsb; | 357 | *offset_fsb = imap.br_startoff + imap.br_blockcount; |
381 | goto out_trans_cancel; | 358 | goto out_trans_cancel; |
382 | } | 359 | } |
383 | 360 | ||
384 | xfs_trans_ijoin(tp, ip, 0); | 361 | xfs_trans_ijoin(tp, ip, 0); |
385 | error = xfs_bmapi_write(tp, ip, *offset_fsb, next_fsb - *offset_fsb, | 362 | error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount, |
386 | XFS_BMAPI_COWFORK, &first_block, | 363 | XFS_BMAPI_COWFORK, &first_block, |
387 | XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), | 364 | XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), |
388 | &imap, &nimaps, &dfops); | 365 | &imap, &nimaps, &dfops); |
389 | if (error) | 366 | if (error) |
390 | goto out_trans_cancel; | 367 | goto out_trans_cancel; |
391 | 368 | ||
392 | /* We might not have been able to map the whole delalloc extent */ | ||
393 | *offset_fsb = min(*offset_fsb + imap.br_blockcount, next_fsb); | ||
394 | |||
395 | error = xfs_defer_finish(&tp, &dfops, NULL); | 369 | error = xfs_defer_finish(&tp, &dfops, NULL); |
396 | if (error) | 370 | if (error) |
397 | goto out_trans_cancel; | 371 | goto out_trans_cancel; |
398 | 372 | ||
399 | error = xfs_trans_commit(tp); | 373 | error = xfs_trans_commit(tp); |
400 | 374 | ||
375 | *offset_fsb = imap.br_startoff + imap.br_blockcount; | ||
401 | out_unlock: | 376 | out_unlock: |
402 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 377 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
403 | return error; | 378 | return error; |
@@ -536,58 +511,49 @@ xfs_reflink_cancel_cow_blocks( | |||
536 | xfs_fileoff_t offset_fsb, | 511 | xfs_fileoff_t offset_fsb, |
537 | xfs_fileoff_t end_fsb) | 512 | xfs_fileoff_t end_fsb) |
538 | { | 513 | { |
539 | struct xfs_bmbt_irec irec; | 514 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); |
540 | xfs_filblks_t count_fsb; | 515 | struct xfs_bmbt_irec got, prev, del; |
516 | xfs_extnum_t idx; | ||
541 | xfs_fsblock_t firstfsb; | 517 | xfs_fsblock_t firstfsb; |
542 | struct xfs_defer_ops dfops; | 518 | struct xfs_defer_ops dfops; |
543 | int error = 0; | 519 | int error = 0, eof = 0; |
544 | int nimaps; | ||
545 | 520 | ||
546 | if (!xfs_is_reflink_inode(ip)) | 521 | if (!xfs_is_reflink_inode(ip)) |
547 | return 0; | 522 | return 0; |
548 | 523 | ||
549 | /* Go find the old extent in the CoW fork. */ | 524 | xfs_bmap_search_extents(ip, offset_fsb, XFS_COW_FORK, &eof, &idx, |
550 | while (offset_fsb < end_fsb) { | 525 | &got, &prev); |
551 | nimaps = 1; | 526 | if (eof) |
552 | count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); | 527 | return 0; |
553 | error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec, | ||
554 | &nimaps, XFS_BMAPI_COWFORK); | ||
555 | if (error) | ||
556 | break; | ||
557 | ASSERT(nimaps == 1); | ||
558 | |||
559 | trace_xfs_reflink_cancel_cow(ip, &irec); | ||
560 | 528 | ||
561 | if (irec.br_startblock == DELAYSTARTBLOCK) { | 529 | while (got.br_startoff < end_fsb) { |
562 | /* Free a delayed allocation. */ | 530 | del = got; |
563 | xfs_mod_fdblocks(ip->i_mount, irec.br_blockcount, | 531 | xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); |
564 | false); | 532 | trace_xfs_reflink_cancel_cow(ip, &del); |
565 | ip->i_delayed_blks -= irec.br_blockcount; | ||
566 | 533 | ||
567 | /* Remove the mapping from the CoW fork. */ | 534 | if (isnullstartblock(del.br_startblock)) { |
568 | error = xfs_bunmapi_cow(ip, &irec); | 535 | error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK, |
536 | &idx, &got, &del); | ||
569 | if (error) | 537 | if (error) |
570 | break; | 538 | break; |
571 | } else if (irec.br_startblock == HOLESTARTBLOCK) { | ||
572 | /* empty */ | ||
573 | } else { | 539 | } else { |
574 | xfs_trans_ijoin(*tpp, ip, 0); | 540 | xfs_trans_ijoin(*tpp, ip, 0); |
575 | xfs_defer_init(&dfops, &firstfsb); | 541 | xfs_defer_init(&dfops, &firstfsb); |
576 | 542 | ||
577 | /* Free the CoW orphan record. */ | 543 | /* Free the CoW orphan record. */ |
578 | error = xfs_refcount_free_cow_extent(ip->i_mount, | 544 | error = xfs_refcount_free_cow_extent(ip->i_mount, |
579 | &dfops, irec.br_startblock, | 545 | &dfops, del.br_startblock, |
580 | irec.br_blockcount); | 546 | del.br_blockcount); |
581 | if (error) | 547 | if (error) |
582 | break; | 548 | break; |
583 | 549 | ||
584 | xfs_bmap_add_free(ip->i_mount, &dfops, | 550 | xfs_bmap_add_free(ip->i_mount, &dfops, |
585 | irec.br_startblock, irec.br_blockcount, | 551 | del.br_startblock, del.br_blockcount, |
586 | NULL); | 552 | NULL); |
587 | 553 | ||
588 | /* Update quota accounting */ | 554 | /* Update quota accounting */ |
589 | xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT, | 555 | xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT, |
590 | -(long)irec.br_blockcount); | 556 | -(long)del.br_blockcount); |
591 | 557 | ||
592 | /* Roll the transaction */ | 558 | /* Roll the transaction */ |
593 | error = xfs_defer_finish(tpp, &dfops, ip); | 559 | error = xfs_defer_finish(tpp, &dfops, ip); |
@@ -597,15 +563,18 @@ xfs_reflink_cancel_cow_blocks( | |||
597 | } | 563 | } |
598 | 564 | ||
599 | /* Remove the mapping from the CoW fork. */ | 565 | /* Remove the mapping from the CoW fork. */ |
600 | error = xfs_bunmapi_cow(ip, &irec); | 566 | xfs_bmap_del_extent_cow(ip, &idx, &got, &del); |
601 | if (error) | ||
602 | break; | ||
603 | } | 567 | } |
604 | 568 | ||
605 | /* Roll on... */ | 569 | if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)) |
606 | offset_fsb = irec.br_startoff + irec.br_blockcount; | 570 | break; |
571 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got); | ||
607 | } | 572 | } |
608 | 573 | ||
574 | /* clear tag if cow fork is emptied */ | ||
575 | if (!ifp->if_bytes) | ||
576 | xfs_inode_clear_cowblocks_tag(ip); | ||
577 | |||
609 | return error; | 578 | return error; |
610 | } | 579 | } |
611 | 580 | ||
@@ -668,25 +637,26 @@ xfs_reflink_end_cow( | |||
668 | xfs_off_t offset, | 637 | xfs_off_t offset, |
669 | xfs_off_t count) | 638 | xfs_off_t count) |
670 | { | 639 | { |
671 | struct xfs_bmbt_irec irec; | 640 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); |
672 | struct xfs_bmbt_irec uirec; | 641 | struct xfs_bmbt_irec got, prev, del; |
673 | struct xfs_trans *tp; | 642 | struct xfs_trans *tp; |
674 | xfs_fileoff_t offset_fsb; | 643 | xfs_fileoff_t offset_fsb; |
675 | xfs_fileoff_t end_fsb; | 644 | xfs_fileoff_t end_fsb; |
676 | xfs_filblks_t count_fsb; | ||
677 | xfs_fsblock_t firstfsb; | 645 | xfs_fsblock_t firstfsb; |
678 | struct xfs_defer_ops dfops; | 646 | struct xfs_defer_ops dfops; |
679 | int error; | 647 | int error, eof = 0; |
680 | unsigned int resblks; | 648 | unsigned int resblks; |
681 | xfs_filblks_t ilen; | ||
682 | xfs_filblks_t rlen; | 649 | xfs_filblks_t rlen; |
683 | int nimaps; | 650 | xfs_extnum_t idx; |
684 | 651 | ||
685 | trace_xfs_reflink_end_cow(ip, offset, count); | 652 | trace_xfs_reflink_end_cow(ip, offset, count); |
686 | 653 | ||
654 | /* No COW extents? That's easy! */ | ||
655 | if (ifp->if_bytes == 0) | ||
656 | return 0; | ||
657 | |||
687 | offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); | 658 | offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); |
688 | end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); | 659 | end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); |
689 | count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); | ||
690 | 660 | ||
691 | /* Start a rolling transaction to switch the mappings */ | 661 | /* Start a rolling transaction to switch the mappings */ |
692 | resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK); | 662 | resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK); |
@@ -698,72 +668,65 @@ xfs_reflink_end_cow( | |||
698 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 668 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
699 | xfs_trans_ijoin(tp, ip, 0); | 669 | xfs_trans_ijoin(tp, ip, 0); |
700 | 670 | ||
701 | /* Go find the old extent in the CoW fork. */ | 671 | xfs_bmap_search_extents(ip, end_fsb - 1, XFS_COW_FORK, &eof, &idx, |
702 | while (offset_fsb < end_fsb) { | 672 | &got, &prev); |
703 | /* Read extent from the source file */ | ||
704 | nimaps = 1; | ||
705 | count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); | ||
706 | error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec, | ||
707 | &nimaps, XFS_BMAPI_COWFORK); | ||
708 | if (error) | ||
709 | goto out_cancel; | ||
710 | ASSERT(nimaps == 1); | ||
711 | 673 | ||
712 | ASSERT(irec.br_startblock != DELAYSTARTBLOCK); | 674 | /* If there is a hole at end_fsb - 1 go to the previous extent */ |
713 | trace_xfs_reflink_cow_remap(ip, &irec); | 675 | if (eof || got.br_startoff > end_fsb) { |
676 | ASSERT(idx > 0); | ||
677 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, --idx), &got); | ||
678 | } | ||
714 | 679 | ||
715 | /* | 680 | /* Walk backwards until we're out of the I/O range... */ |
716 | * We can have a hole in the CoW fork if part of a directio | 681 | while (got.br_startoff + got.br_blockcount > offset_fsb) { |
717 | * write is CoW but part of it isn't. | 682 | del = got; |
718 | */ | 683 | xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); |
719 | rlen = ilen = irec.br_blockcount; | 684 | |
720 | if (irec.br_startblock == HOLESTARTBLOCK) | 685 | /* Extent delete may have bumped idx forward */ |
686 | if (!del.br_blockcount) { | ||
687 | idx--; | ||
721 | goto next_extent; | 688 | goto next_extent; |
689 | } | ||
690 | |||
691 | ASSERT(!isnullstartblock(got.br_startblock)); | ||
722 | 692 | ||
723 | /* Unmap the old blocks in the data fork. */ | 693 | /* Unmap the old blocks in the data fork. */ |
724 | while (rlen) { | 694 | xfs_defer_init(&dfops, &firstfsb); |
725 | xfs_defer_init(&dfops, &firstfsb); | 695 | rlen = del.br_blockcount; |
726 | error = __xfs_bunmapi(tp, ip, irec.br_startoff, | 696 | error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1, |
727 | &rlen, 0, 1, &firstfsb, &dfops); | 697 | &firstfsb, &dfops); |
728 | if (error) | 698 | if (error) |
729 | goto out_defer; | 699 | goto out_defer; |
730 | |||
731 | /* | ||
732 | * Trim the extent to whatever got unmapped. | ||
733 | * Remember, bunmapi works backwards. | ||
734 | */ | ||
735 | uirec.br_startblock = irec.br_startblock + rlen; | ||
736 | uirec.br_startoff = irec.br_startoff + rlen; | ||
737 | uirec.br_blockcount = irec.br_blockcount - rlen; | ||
738 | irec.br_blockcount = rlen; | ||
739 | trace_xfs_reflink_cow_remap_piece(ip, &uirec); | ||
740 | 700 | ||
741 | /* Free the CoW orphan record. */ | 701 | /* Trim the extent to whatever got unmapped. */ |
742 | error = xfs_refcount_free_cow_extent(tp->t_mountp, | 702 | if (rlen) { |
743 | &dfops, uirec.br_startblock, | 703 | xfs_trim_extent(&del, del.br_startoff + rlen, |
744 | uirec.br_blockcount); | 704 | del.br_blockcount - rlen); |
745 | if (error) | 705 | } |
746 | goto out_defer; | 706 | trace_xfs_reflink_cow_remap(ip, &del); |
747 | 707 | ||
748 | /* Map the new blocks into the data fork. */ | 708 | /* Free the CoW orphan record. */ |
749 | error = xfs_bmap_map_extent(tp->t_mountp, &dfops, | 709 | error = xfs_refcount_free_cow_extent(tp->t_mountp, &dfops, |
750 | ip, &uirec); | 710 | del.br_startblock, del.br_blockcount); |
751 | if (error) | 711 | if (error) |
752 | goto out_defer; | 712 | goto out_defer; |
753 | 713 | ||
754 | /* Remove the mapping from the CoW fork. */ | 714 | /* Map the new blocks into the data fork. */ |
755 | error = xfs_bunmapi_cow(ip, &uirec); | 715 | error = xfs_bmap_map_extent(tp->t_mountp, &dfops, ip, &del); |
756 | if (error) | 716 | if (error) |
757 | goto out_defer; | 717 | goto out_defer; |
758 | 718 | ||
759 | error = xfs_defer_finish(&tp, &dfops, ip); | 719 | /* Remove the mapping from the CoW fork. */ |
760 | if (error) | 720 | xfs_bmap_del_extent_cow(ip, &idx, &got, &del); |
761 | goto out_defer; | 721 | |
762 | } | 722 | error = xfs_defer_finish(&tp, &dfops, ip); |
723 | if (error) | ||
724 | goto out_defer; | ||
763 | 725 | ||
764 | next_extent: | 726 | next_extent: |
765 | /* Roll on... */ | 727 | if (idx < 0) |
766 | offset_fsb = irec.br_startoff + ilen; | 728 | break; |
729 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got); | ||
767 | } | 730 | } |
768 | 731 | ||
769 | error = xfs_trans_commit(tp); | 732 | error = xfs_trans_commit(tp); |
@@ -774,7 +737,6 @@ next_extent: | |||
774 | 737 | ||
775 | out_defer: | 738 | out_defer: |
776 | xfs_defer_cancel(&dfops); | 739 | xfs_defer_cancel(&dfops); |
777 | out_cancel: | ||
778 | xfs_trans_cancel(tp); | 740 | xfs_trans_cancel(tp); |
779 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 741 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
780 | out: | 742 | out: |
@@ -1312,19 +1274,26 @@ out_error: | |||
1312 | */ | 1274 | */ |
1313 | int | 1275 | int |
1314 | xfs_reflink_remap_range( | 1276 | xfs_reflink_remap_range( |
1315 | struct xfs_inode *src, | 1277 | struct file *file_in, |
1316 | xfs_off_t srcoff, | 1278 | loff_t pos_in, |
1317 | struct xfs_inode *dest, | 1279 | struct file *file_out, |
1318 | xfs_off_t destoff, | 1280 | loff_t pos_out, |
1319 | xfs_off_t len, | 1281 | u64 len, |
1320 | unsigned int flags) | 1282 | bool is_dedupe) |
1321 | { | 1283 | { |
1284 | struct inode *inode_in = file_inode(file_in); | ||
1285 | struct xfs_inode *src = XFS_I(inode_in); | ||
1286 | struct inode *inode_out = file_inode(file_out); | ||
1287 | struct xfs_inode *dest = XFS_I(inode_out); | ||
1322 | struct xfs_mount *mp = src->i_mount; | 1288 | struct xfs_mount *mp = src->i_mount; |
1289 | loff_t bs = inode_out->i_sb->s_blocksize; | ||
1290 | bool same_inode = (inode_in == inode_out); | ||
1323 | xfs_fileoff_t sfsbno, dfsbno; | 1291 | xfs_fileoff_t sfsbno, dfsbno; |
1324 | xfs_filblks_t fsblen; | 1292 | xfs_filblks_t fsblen; |
1325 | int error; | ||
1326 | xfs_extlen_t cowextsize; | 1293 | xfs_extlen_t cowextsize; |
1327 | bool is_same; | 1294 | loff_t isize; |
1295 | ssize_t ret; | ||
1296 | loff_t blen; | ||
1328 | 1297 | ||
1329 | if (!xfs_sb_version_hasreflink(&mp->m_sb)) | 1298 | if (!xfs_sb_version_hasreflink(&mp->m_sb)) |
1330 | return -EOPNOTSUPP; | 1299 | return -EOPNOTSUPP; |
@@ -1332,17 +1301,8 @@ xfs_reflink_remap_range( | |||
1332 | if (XFS_FORCED_SHUTDOWN(mp)) | 1301 | if (XFS_FORCED_SHUTDOWN(mp)) |
1333 | return -EIO; | 1302 | return -EIO; |
1334 | 1303 | ||
1335 | /* Don't reflink realtime inodes */ | ||
1336 | if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) | ||
1337 | return -EINVAL; | ||
1338 | |||
1339 | if (flags & ~XFS_REFLINK_ALL) | ||
1340 | return -EINVAL; | ||
1341 | |||
1342 | trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff); | ||
1343 | |||
1344 | /* Lock both files against IO */ | 1304 | /* Lock both files against IO */ |
1345 | if (src->i_ino == dest->i_ino) { | 1305 | if (same_inode) { |
1346 | xfs_ilock(src, XFS_IOLOCK_EXCL); | 1306 | xfs_ilock(src, XFS_IOLOCK_EXCL); |
1347 | xfs_ilock(src, XFS_MMAPLOCK_EXCL); | 1307 | xfs_ilock(src, XFS_MMAPLOCK_EXCL); |
1348 | } else { | 1308 | } else { |
@@ -1350,39 +1310,126 @@ xfs_reflink_remap_range( | |||
1350 | xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); | 1310 | xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); |
1351 | } | 1311 | } |
1352 | 1312 | ||
1313 | /* Don't touch certain kinds of inodes */ | ||
1314 | ret = -EPERM; | ||
1315 | if (IS_IMMUTABLE(inode_out)) | ||
1316 | goto out_unlock; | ||
1317 | |||
1318 | ret = -ETXTBSY; | ||
1319 | if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) | ||
1320 | goto out_unlock; | ||
1321 | |||
1322 | |||
1323 | /* Don't reflink dirs, pipes, sockets... */ | ||
1324 | ret = -EISDIR; | ||
1325 | if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) | ||
1326 | goto out_unlock; | ||
1327 | ret = -EINVAL; | ||
1328 | if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode)) | ||
1329 | goto out_unlock; | ||
1330 | if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) | ||
1331 | goto out_unlock; | ||
1332 | |||
1333 | /* Don't reflink realtime inodes */ | ||
1334 | if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) | ||
1335 | goto out_unlock; | ||
1336 | |||
1337 | /* Don't share DAX file data for now. */ | ||
1338 | if (IS_DAX(inode_in) || IS_DAX(inode_out)) | ||
1339 | goto out_unlock; | ||
1340 | |||
1341 | /* Are we going all the way to the end? */ | ||
1342 | isize = i_size_read(inode_in); | ||
1343 | if (isize == 0) { | ||
1344 | ret = 0; | ||
1345 | goto out_unlock; | ||
1346 | } | ||
1347 | |||
1348 | if (len == 0) | ||
1349 | len = isize - pos_in; | ||
1350 | |||
1351 | /* Ensure offsets don't wrap and the input is inside i_size */ | ||
1352 | if (pos_in + len < pos_in || pos_out + len < pos_out || | ||
1353 | pos_in + len > isize) | ||
1354 | goto out_unlock; | ||
1355 | |||
1356 | /* Don't allow dedupe past EOF in the dest file */ | ||
1357 | if (is_dedupe) { | ||
1358 | loff_t disize; | ||
1359 | |||
1360 | disize = i_size_read(inode_out); | ||
1361 | if (pos_out >= disize || pos_out + len > disize) | ||
1362 | goto out_unlock; | ||
1363 | } | ||
1364 | |||
1365 | /* If we're linking to EOF, continue to the block boundary. */ | ||
1366 | if (pos_in + len == isize) | ||
1367 | blen = ALIGN(isize, bs) - pos_in; | ||
1368 | else | ||
1369 | blen = len; | ||
1370 | |||
1371 | /* Only reflink if we're aligned to block boundaries */ | ||
1372 | if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) || | ||
1373 | !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs)) | ||
1374 | goto out_unlock; | ||
1375 | |||
1376 | /* Don't allow overlapped reflink within the same file */ | ||
1377 | if (same_inode) { | ||
1378 | if (pos_out + blen > pos_in && pos_out < pos_in + blen) | ||
1379 | goto out_unlock; | ||
1380 | } | ||
1381 | |||
1382 | /* Wait for the completion of any pending IOs on both files */ | ||
1383 | inode_dio_wait(inode_in); | ||
1384 | if (!same_inode) | ||
1385 | inode_dio_wait(inode_out); | ||
1386 | |||
1387 | ret = filemap_write_and_wait_range(inode_in->i_mapping, | ||
1388 | pos_in, pos_in + len - 1); | ||
1389 | if (ret) | ||
1390 | goto out_unlock; | ||
1391 | |||
1392 | ret = filemap_write_and_wait_range(inode_out->i_mapping, | ||
1393 | pos_out, pos_out + len - 1); | ||
1394 | if (ret) | ||
1395 | goto out_unlock; | ||
1396 | |||
1397 | trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); | ||
1398 | |||
1353 | /* | 1399 | /* |
1354 | * Check that the extents are the same. | 1400 | * Check that the extents are the same. |
1355 | */ | 1401 | */ |
1356 | if (flags & XFS_REFLINK_DEDUPE) { | 1402 | if (is_dedupe) { |
1357 | is_same = false; | 1403 | bool is_same = false; |
1358 | error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest), | 1404 | |
1359 | destoff, len, &is_same); | 1405 | ret = xfs_compare_extents(inode_in, pos_in, inode_out, pos_out, |
1360 | if (error) | 1406 | len, &is_same); |
1361 | goto out_error; | 1407 | if (ret) |
1408 | goto out_unlock; | ||
1362 | if (!is_same) { | 1409 | if (!is_same) { |
1363 | error = -EBADE; | 1410 | ret = -EBADE; |
1364 | goto out_error; | 1411 | goto out_unlock; |
1365 | } | 1412 | } |
1366 | } | 1413 | } |
1367 | 1414 | ||
1368 | error = xfs_reflink_set_inode_flag(src, dest); | 1415 | ret = xfs_reflink_set_inode_flag(src, dest); |
1369 | if (error) | 1416 | if (ret) |
1370 | goto out_error; | 1417 | goto out_unlock; |
1371 | 1418 | ||
1372 | /* | 1419 | /* |
1373 | * Invalidate the page cache so that we can clear any CoW mappings | 1420 | * Invalidate the page cache so that we can clear any CoW mappings |
1374 | * in the destination file. | 1421 | * in the destination file. |
1375 | */ | 1422 | */ |
1376 | truncate_inode_pages_range(&VFS_I(dest)->i_data, destoff, | 1423 | truncate_inode_pages_range(&inode_out->i_data, pos_out, |
1377 | PAGE_ALIGN(destoff + len) - 1); | 1424 | PAGE_ALIGN(pos_out + len) - 1); |
1378 | 1425 | ||
1379 | dfsbno = XFS_B_TO_FSBT(mp, destoff); | 1426 | dfsbno = XFS_B_TO_FSBT(mp, pos_out); |
1380 | sfsbno = XFS_B_TO_FSBT(mp, srcoff); | 1427 | sfsbno = XFS_B_TO_FSBT(mp, pos_in); |
1381 | fsblen = XFS_B_TO_FSB(mp, len); | 1428 | fsblen = XFS_B_TO_FSB(mp, len); |
1382 | error = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen, | 1429 | ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen, |
1383 | destoff + len); | 1430 | pos_out + len); |
1384 | if (error) | 1431 | if (ret) |
1385 | goto out_error; | 1432 | goto out_unlock; |
1386 | 1433 | ||
1387 | /* | 1434 | /* |
1388 | * Carry the cowextsize hint from src to dest if we're sharing the | 1435 | * Carry the cowextsize hint from src to dest if we're sharing the |
@@ -1390,26 +1437,24 @@ xfs_reflink_remap_range( | |||
1390 | * has a cowextsize hint, and the destination file does not. | 1437 | * has a cowextsize hint, and the destination file does not. |
1391 | */ | 1438 | */ |
1392 | cowextsize = 0; | 1439 | cowextsize = 0; |
1393 | if (srcoff == 0 && len == i_size_read(VFS_I(src)) && | 1440 | if (pos_in == 0 && len == i_size_read(inode_in) && |
1394 | (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && | 1441 | (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && |
1395 | destoff == 0 && len >= i_size_read(VFS_I(dest)) && | 1442 | pos_out == 0 && len >= i_size_read(inode_out) && |
1396 | !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) | 1443 | !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) |
1397 | cowextsize = src->i_d.di_cowextsize; | 1444 | cowextsize = src->i_d.di_cowextsize; |
1398 | 1445 | ||
1399 | error = xfs_reflink_update_dest(dest, destoff + len, cowextsize); | 1446 | ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize); |
1400 | if (error) | ||
1401 | goto out_error; | ||
1402 | 1447 | ||
1403 | out_error: | 1448 | out_unlock: |
1404 | xfs_iunlock(src, XFS_MMAPLOCK_EXCL); | 1449 | xfs_iunlock(src, XFS_MMAPLOCK_EXCL); |
1405 | xfs_iunlock(src, XFS_IOLOCK_EXCL); | 1450 | xfs_iunlock(src, XFS_IOLOCK_EXCL); |
1406 | if (src->i_ino != dest->i_ino) { | 1451 | if (src->i_ino != dest->i_ino) { |
1407 | xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); | 1452 | xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); |
1408 | xfs_iunlock(dest, XFS_IOLOCK_EXCL); | 1453 | xfs_iunlock(dest, XFS_IOLOCK_EXCL); |
1409 | } | 1454 | } |
1410 | if (error) | 1455 | if (ret) |
1411 | trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_); | 1456 | trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); |
1412 | return error; | 1457 | return ret; |
1413 | } | 1458 | } |
1414 | 1459 | ||
1415 | /* | 1460 | /* |
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 5dc3c8ac12aa..fad11607c9ad 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h | |||
@@ -26,8 +26,8 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno, | |||
26 | extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, | 26 | extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, |
27 | struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed); | 27 | struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed); |
28 | 28 | ||
29 | extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip, | 29 | extern int xfs_reflink_reserve_cow(struct xfs_inode *ip, |
30 | xfs_off_t offset, xfs_off_t count); | 30 | struct xfs_bmbt_irec *imap, bool *shared); |
31 | extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip, | 31 | extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip, |
32 | xfs_off_t offset, xfs_off_t count); | 32 | xfs_off_t offset, xfs_off_t count); |
33 | extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, | 33 | extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, |
@@ -43,11 +43,8 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, | |||
43 | extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, | 43 | extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, |
44 | xfs_off_t count); | 44 | xfs_off_t count); |
45 | extern int xfs_reflink_recover_cow(struct xfs_mount *mp); | 45 | extern int xfs_reflink_recover_cow(struct xfs_mount *mp); |
46 | #define XFS_REFLINK_DEDUPE 1 /* only reflink if contents match */ | 46 | extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in, |
47 | #define XFS_REFLINK_ALL (XFS_REFLINK_DEDUPE) | 47 | struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe); |
48 | extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff, | ||
49 | struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len, | ||
50 | unsigned int flags); | ||
51 | extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip, | 48 | extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip, |
52 | struct xfs_trans **tpp); | 49 | struct xfs_trans **tpp); |
53 | extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset, | 50 | extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset, |
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 5f8d55d29a11..276d3023d60f 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c | |||
@@ -512,13 +512,13 @@ static struct attribute *xfs_error_attrs[] = { | |||
512 | }; | 512 | }; |
513 | 513 | ||
514 | 514 | ||
515 | struct kobj_type xfs_error_cfg_ktype = { | 515 | static struct kobj_type xfs_error_cfg_ktype = { |
516 | .release = xfs_sysfs_release, | 516 | .release = xfs_sysfs_release, |
517 | .sysfs_ops = &xfs_sysfs_ops, | 517 | .sysfs_ops = &xfs_sysfs_ops, |
518 | .default_attrs = xfs_error_attrs, | 518 | .default_attrs = xfs_error_attrs, |
519 | }; | 519 | }; |
520 | 520 | ||
521 | struct kobj_type xfs_error_ktype = { | 521 | static struct kobj_type xfs_error_ktype = { |
522 | .release = xfs_sysfs_release, | 522 | .release = xfs_sysfs_release, |
523 | .sysfs_ops = &xfs_sysfs_ops, | 523 | .sysfs_ops = &xfs_sysfs_ops, |
524 | }; | 524 | }; |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ad188d3a83f3..0907752be62d 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -3346,7 +3346,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc); | |||
3346 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); | 3346 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); |
3347 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); | 3347 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); |
3348 | 3348 | ||
3349 | DEFINE_RW_EVENT(xfs_reflink_reserve_cow_range); | 3349 | DEFINE_RW_EVENT(xfs_reflink_reserve_cow); |
3350 | DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range); | 3350 | DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range); |
3351 | 3351 | ||
3352 | DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write); | 3352 | DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write); |
@@ -3356,9 +3356,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec); | |||
3356 | DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range); | 3356 | DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range); |
3357 | DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow); | 3357 | DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow); |
3358 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap); | 3358 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap); |
3359 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_piece); | ||
3360 | 3359 | ||
3361 | DEFINE_INODE_ERROR_EVENT(xfs_reflink_reserve_cow_range_error); | ||
3362 | DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error); | 3360 | DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error); |
3363 | DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error); | 3361 | DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error); |
3364 | DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error); | 3362 | DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error); |
diff --git a/include/linux/iomap.h b/include/linux/iomap.h index e63e288dee83..7892f55a1866 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h | |||
@@ -19,11 +19,15 @@ struct vm_fault; | |||
19 | #define IOMAP_UNWRITTEN 0x04 /* blocks allocated @blkno in unwritten state */ | 19 | #define IOMAP_UNWRITTEN 0x04 /* blocks allocated @blkno in unwritten state */ |
20 | 20 | ||
21 | /* | 21 | /* |
22 | * Flags for iomap mappings: | 22 | * Flags for all iomap mappings: |
23 | */ | 23 | */ |
24 | #define IOMAP_F_MERGED 0x01 /* contains multiple blocks/extents */ | 24 | #define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ |
25 | #define IOMAP_F_SHARED 0x02 /* block shared with another file */ | 25 | |
26 | #define IOMAP_F_NEW 0x04 /* blocks have been newly allocated */ | 26 | /* |
27 | * Flags that only need to be reported for IOMAP_REPORT requests: | ||
28 | */ | ||
29 | #define IOMAP_F_MERGED 0x10 /* contains multiple blocks/extents */ | ||
30 | #define IOMAP_F_SHARED 0x20 /* block shared with another file */ | ||
27 | 31 | ||
28 | /* | 32 | /* |
29 | * Magic value for blkno: | 33 | * Magic value for blkno: |
@@ -42,8 +46,9 @@ struct iomap { | |||
42 | /* | 46 | /* |
43 | * Flags for iomap_begin / iomap_end. No flag implies a read. | 47 | * Flags for iomap_begin / iomap_end. No flag implies a read. |
44 | */ | 48 | */ |
45 | #define IOMAP_WRITE (1 << 0) | 49 | #define IOMAP_WRITE (1 << 0) /* writing, must allocate blocks */ |
46 | #define IOMAP_ZERO (1 << 1) | 50 | #define IOMAP_ZERO (1 << 1) /* zeroing operation, may skip holes */ |
51 | #define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */ | ||
47 | 52 | ||
48 | struct iomap_ops { | 53 | struct iomap_ops { |
49 | /* | 54 | /* |