aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-10-27 15:34:50 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-10-27 15:34:50 -0400
commite890038e6a0b1f1c5a5a0037025499704353a3eb (patch)
tree6a51161df0b1c428bab9b6dcd8746d108d867942
parent18c2152d526e7956457fcdcbdf6d77ae2c663a26 (diff)
parentc17a8ef43d6b80ed3519b828c37d18645445949f (diff)
Merge tag 'xfs-fixes-for-linus-4.9-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pull xfs fixes from Dave Chinner: "This update contains fixes for most of the outstanding regressions introduced with the 4.9-rc1 XFS merge. There is also a fix for an iomap bug, too. This is a quite a bit larger than I'd prefer for a -rc3, but most of the change comes from cleaning up the new reflink copy on write code; it's much simpler and easier to understand now. These changes fixed several bugs in the new code, and it wasn't clear that there was an easier/simpler way to fix them. The rest of the fixes are the usual size you'd expect at this stage. I've left the commits to soak in linux-next for a some extra time because of the size before asking you to pull, no new problems with them have been reported so I think it's all OK. Summary: - iomap page offset masking fix for page faults - add IOMAP_REPORT to distinguish between read and fiemap map requests - cleanups to new shared data extent code - fix mount active status on failed log recovery - fix broken dquots in a buffer calculation - fix locking order issues and merge xfs_reflink_remap_range and xfs_file_share_range - rework unmapping of CoW extents and remove now unused functions - clean state when CoW is done" * tag 'xfs-fixes-for-linus-4.9-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (25 commits) xfs: clear cowblocks tag when cow fork is emptied xfs: fix up inode cowblocks tracking tracepoints fs: Do to trim high file position bits in iomap_page_mkwrite_actor xfs: remove xfs_bunmapi_cow xfs: optimize xfs_reflink_end_cow xfs: optimize xfs_reflink_cancel_cow_blocks xfs: refactor xfs_bunmapi_cow xfs: optimize writes to reflink files xfs: don't bother looking at the refcount tree for reads xfs: handle "raw" delayed extents xfs_reflink_trim_around_shared xfs: add xfs_trim_extent iomap: add IOMAP_REPORT xfs: merge xfs_reflink_remap_range and xfs_file_share_range xfs: remove xfs_file_wait_for_io xfs: move inode locking from xfs_reflink_remap_range to xfs_file_share_range xfs: fix the same_inode check in xfs_file_share_range xfs: remove the same fs check from xfs_file_share_range libxfs: v3 inodes are only valid on crc-enabled filesystems libxfs: clean up _calc_dquots_per_chunk xfs: unset MS_ACTIVE if mount fails ...
-rw-r--r--fs/iomap.c5
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c418
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h8
-rw-r--r--fs/xfs/libxfs/xfs_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c3
-rw-r--r--fs/xfs/libxfs/xfs_format.h1
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c13
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h2
-rw-r--r--fs/xfs/xfs_file.c232
-rw-r--r--fs/xfs/xfs_icache.c8
-rw-r--r--fs/xfs/xfs_iomap.c57
-rw-r--r--fs/xfs/xfs_mount.c1
-rw-r--r--fs/xfs/xfs_reflink.c499
-rw-r--r--fs/xfs/xfs_reflink.h11
-rw-r--r--fs/xfs/xfs_sysfs.c4
-rw-r--r--fs/xfs/xfs_trace.h4
-rw-r--r--include/linux/iomap.h17
17 files changed, 640 insertions, 645 deletions
diff --git a/fs/iomap.c b/fs/iomap.c
index 013d1d36fbbf..a8ee8c33ca78 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -433,8 +433,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
433 struct page *page = data; 433 struct page *page = data;
434 int ret; 434 int ret;
435 435
436 ret = __block_write_begin_int(page, pos & ~PAGE_MASK, length, 436 ret = __block_write_begin_int(page, pos, length, NULL, iomap);
437 NULL, iomap);
438 if (ret) 437 if (ret)
439 return ret; 438 return ret;
440 439
@@ -561,7 +560,7 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
561 } 560 }
562 561
563 while (len > 0) { 562 while (len > 0) {
564 ret = iomap_apply(inode, start, len, 0, ops, &ctx, 563 ret = iomap_apply(inode, start, len, IOMAP_REPORT, ops, &ctx,
565 iomap_fiemap_actor); 564 iomap_fiemap_actor);
566 /* inode with no (attribute) mapping will give ENOENT */ 565 /* inode with no (attribute) mapping will give ENOENT */
567 if (ret == -ENOENT) 566 if (ret == -ENOENT)
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index c27344cf38e1..c6eb21940783 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3974,9 +3974,6 @@ xfs_bmap_remap_alloc(
3974 * allocating, so skip that check by pretending to be freeing. 3974 * allocating, so skip that check by pretending to be freeing.
3975 */ 3975 */
3976 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); 3976 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
3977 if (error)
3978 goto error0;
3979error0:
3980 xfs_perag_put(args.pag); 3977 xfs_perag_put(args.pag);
3981 if (error) 3978 if (error)
3982 trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_); 3979 trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_);
@@ -3999,6 +3996,39 @@ xfs_bmap_alloc(
3999 return xfs_bmap_btalloc(ap); 3996 return xfs_bmap_btalloc(ap);
4000} 3997}
4001 3998
3999/* Trim extent to fit a logical block range. */
4000void
4001xfs_trim_extent(
4002 struct xfs_bmbt_irec *irec,
4003 xfs_fileoff_t bno,
4004 xfs_filblks_t len)
4005{
4006 xfs_fileoff_t distance;
4007 xfs_fileoff_t end = bno + len;
4008
4009 if (irec->br_startoff + irec->br_blockcount <= bno ||
4010 irec->br_startoff >= end) {
4011 irec->br_blockcount = 0;
4012 return;
4013 }
4014
4015 if (irec->br_startoff < bno) {
4016 distance = bno - irec->br_startoff;
4017 if (isnullstartblock(irec->br_startblock))
4018 irec->br_startblock = DELAYSTARTBLOCK;
4019 if (irec->br_startblock != DELAYSTARTBLOCK &&
4020 irec->br_startblock != HOLESTARTBLOCK)
4021 irec->br_startblock += distance;
4022 irec->br_startoff += distance;
4023 irec->br_blockcount -= distance;
4024 }
4025
4026 if (end < irec->br_startoff + irec->br_blockcount) {
4027 distance = irec->br_startoff + irec->br_blockcount - end;
4028 irec->br_blockcount -= distance;
4029 }
4030}
4031
4002/* 4032/*
4003 * Trim the returned map to the required bounds 4033 * Trim the returned map to the required bounds
4004 */ 4034 */
@@ -4829,6 +4859,219 @@ xfs_bmap_split_indlen(
4829 return stolen; 4859 return stolen;
4830} 4860}
4831 4861
4862int
4863xfs_bmap_del_extent_delay(
4864 struct xfs_inode *ip,
4865 int whichfork,
4866 xfs_extnum_t *idx,
4867 struct xfs_bmbt_irec *got,
4868 struct xfs_bmbt_irec *del)
4869{
4870 struct xfs_mount *mp = ip->i_mount;
4871 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
4872 struct xfs_bmbt_irec new;
4873 int64_t da_old, da_new, da_diff = 0;
4874 xfs_fileoff_t del_endoff, got_endoff;
4875 xfs_filblks_t got_indlen, new_indlen, stolen;
4876 int error = 0, state = 0;
4877 bool isrt;
4878
4879 XFS_STATS_INC(mp, xs_del_exlist);
4880
4881 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
4882 del_endoff = del->br_startoff + del->br_blockcount;
4883 got_endoff = got->br_startoff + got->br_blockcount;
4884 da_old = startblockval(got->br_startblock);
4885 da_new = 0;
4886
4887 ASSERT(*idx >= 0);
4888 ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
4889 ASSERT(del->br_blockcount > 0);
4890 ASSERT(got->br_startoff <= del->br_startoff);
4891 ASSERT(got_endoff >= del_endoff);
4892
4893 if (isrt) {
4894 int64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
4895
4896 do_div(rtexts, mp->m_sb.sb_rextsize);
4897 xfs_mod_frextents(mp, rtexts);
4898 }
4899
4900 /*
4901 * Update the inode delalloc counter now and wait to update the
4902 * sb counters as we might have to borrow some blocks for the
4903 * indirect block accounting.
4904 */
4905 xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0,
4906 isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4907 ip->i_delayed_blks -= del->br_blockcount;
4908
4909 if (whichfork == XFS_COW_FORK)
4910 state |= BMAP_COWFORK;
4911
4912 if (got->br_startoff == del->br_startoff)
4913 state |= BMAP_LEFT_CONTIG;
4914 if (got_endoff == del_endoff)
4915 state |= BMAP_RIGHT_CONTIG;
4916
4917 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
4918 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
4919 /*
4920 * Matches the whole extent. Delete the entry.
4921 */
4922 xfs_iext_remove(ip, *idx, 1, state);
4923 --*idx;
4924 break;
4925 case BMAP_LEFT_CONTIG:
4926 /*
4927 * Deleting the first part of the extent.
4928 */
4929 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4930 got->br_startoff = del_endoff;
4931 got->br_blockcount -= del->br_blockcount;
4932 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4933 got->br_blockcount), da_old);
4934 got->br_startblock = nullstartblock((int)da_new);
4935 xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
4936 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4937 break;
4938 case BMAP_RIGHT_CONTIG:
4939 /*
4940 * Deleting the last part of the extent.
4941 */
4942 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4943 got->br_blockcount = got->br_blockcount - del->br_blockcount;
4944 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4945 got->br_blockcount), da_old);
4946 got->br_startblock = nullstartblock((int)da_new);
4947 xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
4948 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4949 break;
4950 case 0:
4951 /*
4952 * Deleting the middle of the extent.
4953 *
4954 * Distribute the original indlen reservation across the two new
4955 * extents. Steal blocks from the deleted extent if necessary.
4956 * Stealing blocks simply fudges the fdblocks accounting below.
4957 * Warn if either of the new indlen reservations is zero as this
4958 * can lead to delalloc problems.
4959 */
4960 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4961
4962 got->br_blockcount = del->br_startoff - got->br_startoff;
4963 got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
4964
4965 new.br_blockcount = got_endoff - del_endoff;
4966 new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
4967
4968 WARN_ON_ONCE(!got_indlen || !new_indlen);
4969 stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
4970 del->br_blockcount);
4971
4972 got->br_startblock = nullstartblock((int)got_indlen);
4973 xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
4974 trace_xfs_bmap_post_update(ip, *idx, 0, _THIS_IP_);
4975
4976 new.br_startoff = del_endoff;
4977 new.br_state = got->br_state;
4978 new.br_startblock = nullstartblock((int)new_indlen);
4979
4980 ++*idx;
4981 xfs_iext_insert(ip, *idx, 1, &new, state);
4982
4983 da_new = got_indlen + new_indlen - stolen;
4984 del->br_blockcount -= stolen;
4985 break;
4986 }
4987
4988 ASSERT(da_old >= da_new);
4989 da_diff = da_old - da_new;
4990 if (!isrt)
4991 da_diff += del->br_blockcount;
4992 if (da_diff)
4993 xfs_mod_fdblocks(mp, da_diff, false);
4994 return error;
4995}
4996
4997void
4998xfs_bmap_del_extent_cow(
4999 struct xfs_inode *ip,
5000 xfs_extnum_t *idx,
5001 struct xfs_bmbt_irec *got,
5002 struct xfs_bmbt_irec *del)
5003{
5004 struct xfs_mount *mp = ip->i_mount;
5005 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
5006 struct xfs_bmbt_irec new;
5007 xfs_fileoff_t del_endoff, got_endoff;
5008 int state = BMAP_COWFORK;
5009
5010 XFS_STATS_INC(mp, xs_del_exlist);
5011
5012 del_endoff = del->br_startoff + del->br_blockcount;
5013 got_endoff = got->br_startoff + got->br_blockcount;
5014
5015 ASSERT(*idx >= 0);
5016 ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
5017 ASSERT(del->br_blockcount > 0);
5018 ASSERT(got->br_startoff <= del->br_startoff);
5019 ASSERT(got_endoff >= del_endoff);
5020 ASSERT(!isnullstartblock(got->br_startblock));
5021
5022 if (got->br_startoff == del->br_startoff)
5023 state |= BMAP_LEFT_CONTIG;
5024 if (got_endoff == del_endoff)
5025 state |= BMAP_RIGHT_CONTIG;
5026
5027 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
5028 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
5029 /*
5030 * Matches the whole extent. Delete the entry.
5031 */
5032 xfs_iext_remove(ip, *idx, 1, state);
5033 --*idx;
5034 break;
5035 case BMAP_LEFT_CONTIG:
5036 /*
5037 * Deleting the first part of the extent.
5038 */
5039 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
5040 got->br_startoff = del_endoff;
5041 got->br_blockcount -= del->br_blockcount;
5042 got->br_startblock = del->br_startblock + del->br_blockcount;
5043 xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
5044 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5045 break;
5046 case BMAP_RIGHT_CONTIG:
5047 /*
5048 * Deleting the last part of the extent.
5049 */
5050 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
5051 got->br_blockcount -= del->br_blockcount;
5052 xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
5053 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5054 break;
5055 case 0:
5056 /*
5057 * Deleting the middle of the extent.
5058 */
5059 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
5060 got->br_blockcount = del->br_startoff - got->br_startoff;
5061 xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
5062 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5063
5064 new.br_startoff = del_endoff;
5065 new.br_blockcount = got_endoff - del_endoff;
5066 new.br_state = got->br_state;
5067 new.br_startblock = del->br_startblock + del->br_blockcount;
5068
5069 ++*idx;
5070 xfs_iext_insert(ip, *idx, 1, &new, state);
5071 break;
5072 }
5073}
5074
4832/* 5075/*
4833 * Called by xfs_bmapi to update file extent records and the btree 5076 * Called by xfs_bmapi to update file extent records and the btree
4834 * after removing space (or undoing a delayed allocation). 5077 * after removing space (or undoing a delayed allocation).
@@ -5171,175 +5414,6 @@ done:
5171 return error; 5414 return error;
5172} 5415}
5173 5416
5174/* Remove an extent from the CoW fork. Similar to xfs_bmap_del_extent. */
5175int
5176xfs_bunmapi_cow(
5177 struct xfs_inode *ip,
5178 struct xfs_bmbt_irec *del)
5179{
5180 xfs_filblks_t da_new;
5181 xfs_filblks_t da_old;
5182 xfs_fsblock_t del_endblock = 0;
5183 xfs_fileoff_t del_endoff;
5184 int delay;
5185 struct xfs_bmbt_rec_host *ep;
5186 int error;
5187 struct xfs_bmbt_irec got;
5188 xfs_fileoff_t got_endoff;
5189 struct xfs_ifork *ifp;
5190 struct xfs_mount *mp;
5191 xfs_filblks_t nblks;
5192 struct xfs_bmbt_irec new;
5193 /* REFERENCED */
5194 uint qfield;
5195 xfs_filblks_t temp;
5196 xfs_filblks_t temp2;
5197 int state = BMAP_COWFORK;
5198 int eof;
5199 xfs_extnum_t eidx;
5200
5201 mp = ip->i_mount;
5202 XFS_STATS_INC(mp, xs_del_exlist);
5203
5204 ep = xfs_bmap_search_extents(ip, del->br_startoff, XFS_COW_FORK, &eof,
5205 &eidx, &got, &new);
5206
5207 ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); ifp = ifp;
5208 ASSERT((eidx >= 0) && (eidx < ifp->if_bytes /
5209 (uint)sizeof(xfs_bmbt_rec_t)));
5210 ASSERT(del->br_blockcount > 0);
5211 ASSERT(got.br_startoff <= del->br_startoff);
5212 del_endoff = del->br_startoff + del->br_blockcount;
5213 got_endoff = got.br_startoff + got.br_blockcount;
5214 ASSERT(got_endoff >= del_endoff);
5215 delay = isnullstartblock(got.br_startblock);
5216 ASSERT(isnullstartblock(del->br_startblock) == delay);
5217 qfield = 0;
5218 error = 0;
5219 /*
5220 * If deleting a real allocation, must free up the disk space.
5221 */
5222 if (!delay) {
5223 nblks = del->br_blockcount;
5224 qfield = XFS_TRANS_DQ_BCOUNT;
5225 /*
5226 * Set up del_endblock and cur for later.
5227 */
5228 del_endblock = del->br_startblock + del->br_blockcount;
5229 da_old = da_new = 0;
5230 } else {
5231 da_old = startblockval(got.br_startblock);
5232 da_new = 0;
5233 nblks = 0;
5234 }
5235 qfield = qfield;
5236 nblks = nblks;
5237
5238 /*
5239 * Set flag value to use in switch statement.
5240 * Left-contig is 2, right-contig is 1.
5241 */
5242 switch (((got.br_startoff == del->br_startoff) << 1) |
5243 (got_endoff == del_endoff)) {
5244 case 3:
5245 /*
5246 * Matches the whole extent. Delete the entry.
5247 */
5248 xfs_iext_remove(ip, eidx, 1, BMAP_COWFORK);
5249 --eidx;
5250 break;
5251
5252 case 2:
5253 /*
5254 * Deleting the first part of the extent.
5255 */
5256 trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
5257 xfs_bmbt_set_startoff(ep, del_endoff);
5258 temp = got.br_blockcount - del->br_blockcount;
5259 xfs_bmbt_set_blockcount(ep, temp);
5260 if (delay) {
5261 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
5262 da_old);
5263 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
5264 trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
5265 da_new = temp;
5266 break;
5267 }
5268 xfs_bmbt_set_startblock(ep, del_endblock);
5269 trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
5270 break;
5271
5272 case 1:
5273 /*
5274 * Deleting the last part of the extent.
5275 */
5276 temp = got.br_blockcount - del->br_blockcount;
5277 trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
5278 xfs_bmbt_set_blockcount(ep, temp);
5279 if (delay) {
5280 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
5281 da_old);
5282 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
5283 trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
5284 da_new = temp;
5285 break;
5286 }
5287 trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
5288 break;
5289
5290 case 0:
5291 /*
5292 * Deleting the middle of the extent.
5293 */
5294 temp = del->br_startoff - got.br_startoff;
5295 trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
5296 xfs_bmbt_set_blockcount(ep, temp);
5297 new.br_startoff = del_endoff;
5298 temp2 = got_endoff - del_endoff;
5299 new.br_blockcount = temp2;
5300 new.br_state = got.br_state;
5301 if (!delay) {
5302 new.br_startblock = del_endblock;
5303 } else {
5304 temp = xfs_bmap_worst_indlen(ip, temp);
5305 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
5306 temp2 = xfs_bmap_worst_indlen(ip, temp2);
5307 new.br_startblock = nullstartblock((int)temp2);
5308 da_new = temp + temp2;
5309 while (da_new > da_old) {
5310 if (temp) {
5311 temp--;
5312 da_new--;
5313 xfs_bmbt_set_startblock(ep,
5314 nullstartblock((int)temp));
5315 }
5316 if (da_new == da_old)
5317 break;
5318 if (temp2) {
5319 temp2--;
5320 da_new--;
5321 new.br_startblock =
5322 nullstartblock((int)temp2);
5323 }
5324 }
5325 }
5326 trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
5327 xfs_iext_insert(ip, eidx + 1, 1, &new, state);
5328 ++eidx;
5329 break;
5330 }
5331
5332 /*
5333 * Account for change in delayed indirect blocks.
5334 * Nothing to do for disk quota accounting here.
5335 */
5336 ASSERT(da_old >= da_new);
5337 if (da_old > da_new)
5338 xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
5339
5340 return error;
5341}
5342
5343/* 5417/*
5344 * Unmap (remove) blocks from a file. 5418 * Unmap (remove) blocks from a file.
5345 * If nexts is nonzero then the number of extents to remove is limited to 5419 * If nexts is nonzero then the number of extents to remove is limited to
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index f97db7132564..7cae6ec27fa6 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -190,6 +190,8 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
190#define XFS_BMAP_TRACE_EXLIST(ip,c,w) 190#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
191#endif 191#endif
192 192
193void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
194 xfs_filblks_t len);
193int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); 195int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
194void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); 196void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
195void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops, 197void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
@@ -221,7 +223,11 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
221 xfs_fileoff_t bno, xfs_filblks_t len, int flags, 223 xfs_fileoff_t bno, xfs_filblks_t len, int flags,
222 xfs_extnum_t nexts, xfs_fsblock_t *firstblock, 224 xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
223 struct xfs_defer_ops *dfops, int *done); 225 struct xfs_defer_ops *dfops, int *done);
224int xfs_bunmapi_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *del); 226int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork,
227 xfs_extnum_t *idx, struct xfs_bmbt_irec *got,
228 struct xfs_bmbt_irec *del);
229void xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx,
230 struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del);
225int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, 231int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
226 xfs_extnum_t num); 232 xfs_extnum_t num);
227uint xfs_default_attroffset(struct xfs_inode *ip); 233uint xfs_default_attroffset(struct xfs_inode *ip);
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 5c8e6f2ce44f..0e80993c8a59 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4826,7 +4826,7 @@ xfs_btree_calc_size(
4826 return rval; 4826 return rval;
4827} 4827}
4828 4828
4829int 4829static int
4830xfs_btree_count_blocks_helper( 4830xfs_btree_count_blocks_helper(
4831 struct xfs_btree_cur *cur, 4831 struct xfs_btree_cur *cur,
4832 int level, 4832 int level,
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 3cc3cf767474..ac9a003dd29a 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -191,8 +191,7 @@ xfs_dquot_buf_verify_crc(
191 if (mp->m_quotainfo) 191 if (mp->m_quotainfo)
192 ndquots = mp->m_quotainfo->qi_dqperchunk; 192 ndquots = mp->m_quotainfo->qi_dqperchunk;
193 else 193 else
194 ndquots = xfs_calc_dquots_per_chunk( 194 ndquots = xfs_calc_dquots_per_chunk(bp->b_length);
195 XFS_BB_TO_FSB(mp, bp->b_length));
196 195
197 for (i = 0; i < ndquots; i++, d++) { 196 for (i = 0; i < ndquots; i++, d++) {
198 if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk), 197 if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index f6547fc5e016..6b7579e7b60a 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -865,7 +865,6 @@ typedef struct xfs_timestamp {
865 * padding field for v3 inodes. 865 * padding field for v3 inodes.
866 */ 866 */
867#define XFS_DINODE_MAGIC 0x494e /* 'IN' */ 867#define XFS_DINODE_MAGIC 0x494e /* 'IN' */
868#define XFS_DINODE_GOOD_VERSION(v) ((v) >= 1 && (v) <= 3)
869typedef struct xfs_dinode { 868typedef struct xfs_dinode {
870 __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ 869 __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */
871 __be16 di_mode; /* mode and type of file */ 870 __be16 di_mode; /* mode and type of file */
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 8de9a3a29589..134424fac434 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -57,6 +57,17 @@ xfs_inobp_check(
57} 57}
58#endif 58#endif
59 59
60bool
61xfs_dinode_good_version(
62 struct xfs_mount *mp,
63 __u8 version)
64{
65 if (xfs_sb_version_hascrc(&mp->m_sb))
66 return version == 3;
67
68 return version == 1 || version == 2;
69}
70
60/* 71/*
61 * If we are doing readahead on an inode buffer, we might be in log recovery 72 * If we are doing readahead on an inode buffer, we might be in log recovery
62 * reading an inode allocation buffer that hasn't yet been replayed, and hence 73 * reading an inode allocation buffer that hasn't yet been replayed, and hence
@@ -91,7 +102,7 @@ xfs_inode_buf_verify(
91 102
92 dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); 103 dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
93 di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && 104 di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
94 XFS_DINODE_GOOD_VERSION(dip->di_version); 105 xfs_dinode_good_version(mp, dip->di_version);
95 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 106 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
96 XFS_ERRTAG_ITOBP_INOTOBP, 107 XFS_ERRTAG_ITOBP_INOTOBP,
97 XFS_RANDOM_ITOBP_INOTOBP))) { 108 XFS_RANDOM_ITOBP_INOTOBP))) {
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 62d9d4681c8c..3cfe12a4f58a 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -74,6 +74,8 @@ void xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
74void xfs_log_dinode_to_disk(struct xfs_log_dinode *from, 74void xfs_log_dinode_to_disk(struct xfs_log_dinode *from,
75 struct xfs_dinode *to); 75 struct xfs_dinode *to);
76 76
77bool xfs_dinode_good_version(struct xfs_mount *mp, __u8 version);
78
77#if defined(DEBUG) 79#if defined(DEBUG)
78void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); 80void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
79#else 81#else
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a314fc7b56fa..6e4f7f900fea 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -249,6 +249,7 @@ xfs_file_dio_aio_read(
249 struct xfs_inode *ip = XFS_I(inode); 249 struct xfs_inode *ip = XFS_I(inode);
250 loff_t isize = i_size_read(inode); 250 loff_t isize = i_size_read(inode);
251 size_t count = iov_iter_count(to); 251 size_t count = iov_iter_count(to);
252 loff_t end = iocb->ki_pos + count - 1;
252 struct iov_iter data; 253 struct iov_iter data;
253 struct xfs_buftarg *target; 254 struct xfs_buftarg *target;
254 ssize_t ret = 0; 255 ssize_t ret = 0;
@@ -272,49 +273,21 @@ xfs_file_dio_aio_read(
272 273
273 file_accessed(iocb->ki_filp); 274 file_accessed(iocb->ki_filp);
274 275
275 /*
276 * Locking is a bit tricky here. If we take an exclusive lock for direct
277 * IO, we effectively serialise all new concurrent read IO to this file
278 * and block it behind IO that is currently in progress because IO in
279 * progress holds the IO lock shared. We only need to hold the lock
280 * exclusive to blow away the page cache, so only take lock exclusively
281 * if the page cache needs invalidation. This allows the normal direct
282 * IO case of no page cache pages to proceeed concurrently without
283 * serialisation.
284 */
285 xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); 276 xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
286 if (mapping->nrpages) { 277 if (mapping->nrpages) {
287 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); 278 ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
288 xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); 279 if (ret)
280 goto out_unlock;
289 281
290 /* 282 /*
291 * The generic dio code only flushes the range of the particular 283 * Invalidate whole pages. This can return an error if we fail
292 * I/O. Because we take an exclusive lock here, this whole 284 * to invalidate a page, but this should never happen on XFS.
293 * sequence is considerably more expensive for us. This has a 285 * Warn if it does fail.
294 * noticeable performance impact for any file with cached pages,
295 * even when outside of the range of the particular I/O.
296 *
297 * Hence, amortize the cost of the lock against a full file
298 * flush and reduce the chances of repeated iolock cycles going
299 * forward.
300 */ 286 */
301 if (mapping->nrpages) { 287 ret = invalidate_inode_pages2_range(mapping,
302 ret = filemap_write_and_wait(mapping); 288 iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
303 if (ret) { 289 WARN_ON_ONCE(ret);
304 xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); 290 ret = 0;
305 return ret;
306 }
307
308 /*
309 * Invalidate whole pages. This can return an error if
310 * we fail to invalidate a page, but this should never
311 * happen on XFS. Warn if it does fail.
312 */
313 ret = invalidate_inode_pages2(mapping);
314 WARN_ON_ONCE(ret);
315 ret = 0;
316 }
317 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
318 } 291 }
319 292
320 data = *to; 293 data = *to;
@@ -324,8 +297,9 @@ xfs_file_dio_aio_read(
324 iocb->ki_pos += ret; 297 iocb->ki_pos += ret;
325 iov_iter_advance(to, ret); 298 iov_iter_advance(to, ret);
326 } 299 }
327 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
328 300
301out_unlock:
302 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
329 return ret; 303 return ret;
330} 304}
331 305
@@ -570,61 +544,49 @@ xfs_file_dio_aio_write(
570 if ((iocb->ki_pos | count) & target->bt_logical_sectormask) 544 if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
571 return -EINVAL; 545 return -EINVAL;
572 546
573 /* "unaligned" here means not aligned to a filesystem block */
574 if ((iocb->ki_pos & mp->m_blockmask) ||
575 ((iocb->ki_pos + count) & mp->m_blockmask))
576 unaligned_io = 1;
577
578 /* 547 /*
579 * We don't need to take an exclusive lock unless there page cache needs 548 * Don't take the exclusive iolock here unless the I/O is unaligned to
580 * to be invalidated or unaligned IO is being executed. We don't need to 549 * the file system block size. We don't need to consider the EOF
581 * consider the EOF extension case here because 550 * extension case here because xfs_file_aio_write_checks() will relock
582 * xfs_file_aio_write_checks() will relock the inode as necessary for 551 * the inode as necessary for EOF zeroing cases and fill out the new
583 * EOF zeroing cases and fill out the new inode size as appropriate. 552 * inode size as appropriate.
584 */ 553 */
585 if (unaligned_io || mapping->nrpages) 554 if ((iocb->ki_pos & mp->m_blockmask) ||
555 ((iocb->ki_pos + count) & mp->m_blockmask)) {
556 unaligned_io = 1;
586 iolock = XFS_IOLOCK_EXCL; 557 iolock = XFS_IOLOCK_EXCL;
587 else 558 } else {
588 iolock = XFS_IOLOCK_SHARED; 559 iolock = XFS_IOLOCK_SHARED;
589 xfs_rw_ilock(ip, iolock);
590
591 /*
592 * Recheck if there are cached pages that need invalidate after we got
593 * the iolock to protect against other threads adding new pages while
594 * we were waiting for the iolock.
595 */
596 if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) {
597 xfs_rw_iunlock(ip, iolock);
598 iolock = XFS_IOLOCK_EXCL;
599 xfs_rw_ilock(ip, iolock);
600 } 560 }
601 561
562 xfs_rw_ilock(ip, iolock);
563
602 ret = xfs_file_aio_write_checks(iocb, from, &iolock); 564 ret = xfs_file_aio_write_checks(iocb, from, &iolock);
603 if (ret) 565 if (ret)
604 goto out; 566 goto out;
605 count = iov_iter_count(from); 567 count = iov_iter_count(from);
606 end = iocb->ki_pos + count - 1; 568 end = iocb->ki_pos + count - 1;
607 569
608 /*
609 * See xfs_file_dio_aio_read() for why we do a full-file flush here.
610 */
611 if (mapping->nrpages) { 570 if (mapping->nrpages) {
612 ret = filemap_write_and_wait(VFS_I(ip)->i_mapping); 571 ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
613 if (ret) 572 if (ret)
614 goto out; 573 goto out;
574
615 /* 575 /*
616 * Invalidate whole pages. This can return an error if we fail 576 * Invalidate whole pages. This can return an error if we fail
617 * to invalidate a page, but this should never happen on XFS. 577 * to invalidate a page, but this should never happen on XFS.
618 * Warn if it does fail. 578 * Warn if it does fail.
619 */ 579 */
620 ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping); 580 ret = invalidate_inode_pages2_range(mapping,
581 iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
621 WARN_ON_ONCE(ret); 582 WARN_ON_ONCE(ret);
622 ret = 0; 583 ret = 0;
623 } 584 }
624 585
625 /* 586 /*
626 * If we are doing unaligned IO, wait for all other IO to drain, 587 * If we are doing unaligned IO, wait for all other IO to drain,
627 * otherwise demote the lock if we had to flush cached pages 588 * otherwise demote the lock if we had to take the exclusive lock
589 * for other reasons in xfs_file_aio_write_checks.
628 */ 590 */
629 if (unaligned_io) 591 if (unaligned_io)
630 inode_dio_wait(inode); 592 inode_dio_wait(inode);
@@ -947,134 +909,6 @@ out_unlock:
947 return error; 909 return error;
948} 910}
949 911
950/*
951 * Flush all file writes out to disk.
952 */
953static int
954xfs_file_wait_for_io(
955 struct inode *inode,
956 loff_t offset,
957 size_t len)
958{
959 loff_t rounding;
960 loff_t ioffset;
961 loff_t iendoffset;
962 loff_t bs;
963 int ret;
964
965 bs = inode->i_sb->s_blocksize;
966 inode_dio_wait(inode);
967
968 rounding = max_t(xfs_off_t, bs, PAGE_SIZE);
969 ioffset = round_down(offset, rounding);
970 iendoffset = round_up(offset + len, rounding) - 1;
971 ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
972 iendoffset);
973 return ret;
974}
975
976/* Hook up to the VFS reflink function */
977STATIC int
978xfs_file_share_range(
979 struct file *file_in,
980 loff_t pos_in,
981 struct file *file_out,
982 loff_t pos_out,
983 u64 len,
984 bool is_dedupe)
985{
986 struct inode *inode_in;
987 struct inode *inode_out;
988 ssize_t ret;
989 loff_t bs;
990 loff_t isize;
991 int same_inode;
992 loff_t blen;
993 unsigned int flags = 0;
994
995 inode_in = file_inode(file_in);
996 inode_out = file_inode(file_out);
997 bs = inode_out->i_sb->s_blocksize;
998
999 /* Don't touch certain kinds of inodes */
1000 if (IS_IMMUTABLE(inode_out))
1001 return -EPERM;
1002 if (IS_SWAPFILE(inode_in) ||
1003 IS_SWAPFILE(inode_out))
1004 return -ETXTBSY;
1005
1006 /* Reflink only works within this filesystem. */
1007 if (inode_in->i_sb != inode_out->i_sb)
1008 return -EXDEV;
1009 same_inode = (inode_in->i_ino == inode_out->i_ino);
1010
1011 /* Don't reflink dirs, pipes, sockets... */
1012 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
1013 return -EISDIR;
1014 if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
1015 return -EINVAL;
1016 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
1017 return -EINVAL;
1018
1019 /* Don't share DAX file data for now. */
1020 if (IS_DAX(inode_in) || IS_DAX(inode_out))
1021 return -EINVAL;
1022
1023 /* Are we going all the way to the end? */
1024 isize = i_size_read(inode_in);
1025 if (isize == 0)
1026 return 0;
1027 if (len == 0)
1028 len = isize - pos_in;
1029
1030 /* Ensure offsets don't wrap and the input is inside i_size */
1031 if (pos_in + len < pos_in || pos_out + len < pos_out ||
1032 pos_in + len > isize)
1033 return -EINVAL;
1034
1035 /* Don't allow dedupe past EOF in the dest file */
1036 if (is_dedupe) {
1037 loff_t disize;
1038
1039 disize = i_size_read(inode_out);
1040 if (pos_out >= disize || pos_out + len > disize)
1041 return -EINVAL;
1042 }
1043
1044 /* If we're linking to EOF, continue to the block boundary. */
1045 if (pos_in + len == isize)
1046 blen = ALIGN(isize, bs) - pos_in;
1047 else
1048 blen = len;
1049
1050 /* Only reflink if we're aligned to block boundaries */
1051 if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
1052 !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
1053 return -EINVAL;
1054
1055 /* Don't allow overlapped reflink within the same file */
1056 if (same_inode && pos_out + blen > pos_in && pos_out < pos_in + blen)
1057 return -EINVAL;
1058
1059 /* Wait for the completion of any pending IOs on srcfile */
1060 ret = xfs_file_wait_for_io(inode_in, pos_in, len);
1061 if (ret)
1062 goto out;
1063 ret = xfs_file_wait_for_io(inode_out, pos_out, len);
1064 if (ret)
1065 goto out;
1066
1067 if (is_dedupe)
1068 flags |= XFS_REFLINK_DEDUPE;
1069 ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out),
1070 pos_out, len, flags);
1071 if (ret < 0)
1072 goto out;
1073
1074out:
1075 return ret;
1076}
1077
1078STATIC ssize_t 912STATIC ssize_t
1079xfs_file_copy_range( 913xfs_file_copy_range(
1080 struct file *file_in, 914 struct file *file_in,
@@ -1086,7 +920,7 @@ xfs_file_copy_range(
1086{ 920{
1087 int error; 921 int error;
1088 922
1089 error = xfs_file_share_range(file_in, pos_in, file_out, pos_out, 923 error = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
1090 len, false); 924 len, false);
1091 if (error) 925 if (error)
1092 return error; 926 return error;
@@ -1101,7 +935,7 @@ xfs_file_clone_range(
1101 loff_t pos_out, 935 loff_t pos_out,
1102 u64 len) 936 u64 len)
1103{ 937{
1104 return xfs_file_share_range(file_in, pos_in, file_out, pos_out, 938 return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
1105 len, false); 939 len, false);
1106} 940}
1107 941
@@ -1124,7 +958,7 @@ xfs_file_dedupe_range(
1124 if (len > XFS_MAX_DEDUPE_LEN) 958 if (len > XFS_MAX_DEDUPE_LEN)
1125 len = XFS_MAX_DEDUPE_LEN; 959 len = XFS_MAX_DEDUPE_LEN;
1126 960
1127 error = xfs_file_share_range(src_file, loff, dst_file, dst_loff, 961 error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
1128 len, true); 962 len, true);
1129 if (error) 963 if (error)
1130 return error; 964 return error;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 14796b744e0a..f295049db681 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1656,9 +1656,9 @@ void
1656xfs_inode_set_cowblocks_tag( 1656xfs_inode_set_cowblocks_tag(
1657 xfs_inode_t *ip) 1657 xfs_inode_t *ip)
1658{ 1658{
1659 trace_xfs_inode_set_eofblocks_tag(ip); 1659 trace_xfs_inode_set_cowblocks_tag(ip);
1660 return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks, 1660 return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks,
1661 trace_xfs_perag_set_eofblocks, 1661 trace_xfs_perag_set_cowblocks,
1662 XFS_ICI_COWBLOCKS_TAG); 1662 XFS_ICI_COWBLOCKS_TAG);
1663} 1663}
1664 1664
@@ -1666,7 +1666,7 @@ void
1666xfs_inode_clear_cowblocks_tag( 1666xfs_inode_clear_cowblocks_tag(
1667 xfs_inode_t *ip) 1667 xfs_inode_t *ip)
1668{ 1668{
1669 trace_xfs_inode_clear_eofblocks_tag(ip); 1669 trace_xfs_inode_clear_cowblocks_tag(ip);
1670 return __xfs_inode_clear_eofblocks_tag(ip, 1670 return __xfs_inode_clear_eofblocks_tag(ip,
1671 trace_xfs_perag_clear_eofblocks, XFS_ICI_COWBLOCKS_TAG); 1671 trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
1672} 1672}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index d907eb9f8ef3..436e109bb01e 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -566,6 +566,17 @@ xfs_file_iomap_begin_delay(
566 xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx, 566 xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx,
567 &got, &prev); 567 &got, &prev);
568 if (!eof && got.br_startoff <= offset_fsb) { 568 if (!eof && got.br_startoff <= offset_fsb) {
569 if (xfs_is_reflink_inode(ip)) {
570 bool shared;
571
572 end_fsb = min(XFS_B_TO_FSB(mp, offset + count),
573 maxbytes_fsb);
574 xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb);
575 error = xfs_reflink_reserve_cow(ip, &got, &shared);
576 if (error)
577 goto out_unlock;
578 }
579
569 trace_xfs_iomap_found(ip, offset, count, 0, &got); 580 trace_xfs_iomap_found(ip, offset, count, 0, &got);
570 goto done; 581 goto done;
571 } 582 }
@@ -961,19 +972,13 @@ xfs_file_iomap_begin(
961 struct xfs_mount *mp = ip->i_mount; 972 struct xfs_mount *mp = ip->i_mount;
962 struct xfs_bmbt_irec imap; 973 struct xfs_bmbt_irec imap;
963 xfs_fileoff_t offset_fsb, end_fsb; 974 xfs_fileoff_t offset_fsb, end_fsb;
964 bool shared, trimmed;
965 int nimaps = 1, error = 0; 975 int nimaps = 1, error = 0;
976 bool shared = false, trimmed = false;
966 unsigned lockmode; 977 unsigned lockmode;
967 978
968 if (XFS_FORCED_SHUTDOWN(mp)) 979 if (XFS_FORCED_SHUTDOWN(mp))
969 return -EIO; 980 return -EIO;
970 981
971 if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
972 error = xfs_reflink_reserve_cow_range(ip, offset, length);
973 if (error < 0)
974 return error;
975 }
976
977 if ((flags & IOMAP_WRITE) && !IS_DAX(inode) && 982 if ((flags & IOMAP_WRITE) && !IS_DAX(inode) &&
978 !xfs_get_extsz_hint(ip)) { 983 !xfs_get_extsz_hint(ip)) {
979 /* Reserve delalloc blocks for regular writeback. */ 984 /* Reserve delalloc blocks for regular writeback. */
@@ -981,7 +986,16 @@ xfs_file_iomap_begin(
981 iomap); 986 iomap);
982 } 987 }
983 988
984 lockmode = xfs_ilock_data_map_shared(ip); 989 /*
990 * COW writes will allocate delalloc space, so we need to make sure
991 * to take the lock exclusively here.
992 */
993 if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
994 lockmode = XFS_ILOCK_EXCL;
995 xfs_ilock(ip, XFS_ILOCK_EXCL);
996 } else {
997 lockmode = xfs_ilock_data_map_shared(ip);
998 }
985 999
986 ASSERT(offset <= mp->m_super->s_maxbytes); 1000 ASSERT(offset <= mp->m_super->s_maxbytes);
987 if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) 1001 if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
@@ -991,16 +1005,24 @@ xfs_file_iomap_begin(
991 1005
992 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 1006 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
993 &nimaps, 0); 1007 &nimaps, 0);
994 if (error) { 1008 if (error)
995 xfs_iunlock(ip, lockmode); 1009 goto out_unlock;
996 return error; 1010
1011 if (flags & IOMAP_REPORT) {
1012 /* Trim the mapping to the nearest shared extent boundary. */
1013 error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
1014 &trimmed);
1015 if (error)
1016 goto out_unlock;
997 } 1017 }
998 1018
999 /* Trim the mapping to the nearest shared extent boundary. */ 1019 if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
1000 error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed); 1020 error = xfs_reflink_reserve_cow(ip, &imap, &shared);
1001 if (error) { 1021 if (error)
1002 xfs_iunlock(ip, lockmode); 1022 goto out_unlock;
1003 return error; 1023
1024 end_fsb = imap.br_startoff + imap.br_blockcount;
1025 length = XFS_FSB_TO_B(mp, end_fsb) - offset;
1004 } 1026 }
1005 1027
1006 if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) { 1028 if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
@@ -1039,6 +1061,9 @@ xfs_file_iomap_begin(
1039 if (shared) 1061 if (shared)
1040 iomap->flags |= IOMAP_F_SHARED; 1062 iomap->flags |= IOMAP_F_SHARED;
1041 return 0; 1063 return 0;
1064out_unlock:
1065 xfs_iunlock(ip, lockmode);
1066 return error;
1042} 1067}
1043 1068
1044static int 1069static int
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index fc7873942bea..b341f10cf481 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1009,6 +1009,7 @@ xfs_mountfs(
1009 out_quota: 1009 out_quota:
1010 xfs_qm_unmount_quotas(mp); 1010 xfs_qm_unmount_quotas(mp);
1011 out_rtunmount: 1011 out_rtunmount:
1012 mp->m_super->s_flags &= ~MS_ACTIVE;
1012 xfs_rtunmount_inodes(mp); 1013 xfs_rtunmount_inodes(mp);
1013 out_rele_rip: 1014 out_rele_rip:
1014 IRELE(rip); 1015 IRELE(rip);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 5965e9455d91..a279b4e7f5fe 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -182,7 +182,8 @@ xfs_reflink_trim_around_shared(
182 if (!xfs_is_reflink_inode(ip) || 182 if (!xfs_is_reflink_inode(ip) ||
183 ISUNWRITTEN(irec) || 183 ISUNWRITTEN(irec) ||
184 irec->br_startblock == HOLESTARTBLOCK || 184 irec->br_startblock == HOLESTARTBLOCK ||
185 irec->br_startblock == DELAYSTARTBLOCK) { 185 irec->br_startblock == DELAYSTARTBLOCK ||
186 isnullstartblock(irec->br_startblock)) {
186 *shared = false; 187 *shared = false;
187 return 0; 188 return 0;
188 } 189 }
@@ -227,50 +228,54 @@ xfs_reflink_trim_around_shared(
227 } 228 }
228} 229}
229 230
230/* Create a CoW reservation for a range of blocks within a file. */ 231/*
231static int 232 * Trim the passed in imap to the next shared/unshared extent boundary, and
232__xfs_reflink_reserve_cow( 233 * if imap->br_startoff points to a shared extent reserve space for it in the
234 * COW fork. In this case *shared is set to true, else to false.
235 *
236 * Note that imap will always contain the block numbers for the existing blocks
237 * in the data fork, as the upper layers need them for read-modify-write
238 * operations.
239 */
240int
241xfs_reflink_reserve_cow(
233 struct xfs_inode *ip, 242 struct xfs_inode *ip,
234 xfs_fileoff_t *offset_fsb, 243 struct xfs_bmbt_irec *imap,
235 xfs_fileoff_t end_fsb, 244 bool *shared)
236 bool *skipped)
237{ 245{
238 struct xfs_bmbt_irec got, prev, imap; 246 struct xfs_bmbt_irec got, prev;
239 xfs_fileoff_t orig_end_fsb; 247 xfs_fileoff_t end_fsb, orig_end_fsb;
240 int nimaps, eof = 0, error = 0; 248 int eof = 0, error = 0;
241 bool shared = false, trimmed = false; 249 bool trimmed;
242 xfs_extnum_t idx; 250 xfs_extnum_t idx;
243 xfs_extlen_t align; 251 xfs_extlen_t align;
244 252
245 /* Already reserved? Skip the refcount btree access. */ 253 /*
246 xfs_bmap_search_extents(ip, *offset_fsb, XFS_COW_FORK, &eof, &idx, 254 * Search the COW fork extent list first. This serves two purposes:
255 * first this implement the speculative preallocation using cowextisze,
256 * so that we also unshared block adjacent to shared blocks instead
257 * of just the shared blocks themselves. Second the lookup in the
258 * extent list is generally faster than going out to the shared extent
259 * tree.
260 */
261 xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx,
247 &got, &prev); 262 &got, &prev);
248 if (!eof && got.br_startoff <= *offset_fsb) { 263 if (!eof && got.br_startoff <= imap->br_startoff) {
249 end_fsb = orig_end_fsb = got.br_startoff + got.br_blockcount; 264 trace_xfs_reflink_cow_found(ip, imap);
250 trace_xfs_reflink_cow_found(ip, &got); 265 xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
251 goto done;
252 }
253 266
254 /* Read extent from the source file. */ 267 *shared = true;
255 nimaps = 1; 268 return 0;
256 error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, 269 }
257 &imap, &nimaps, 0);
258 if (error)
259 goto out_unlock;
260 ASSERT(nimaps == 1);
261 270
262 /* Trim the mapping to the nearest shared extent boundary. */ 271 /* Trim the mapping to the nearest shared extent boundary. */
263 error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed); 272 error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
264 if (error) 273 if (error)
265 goto out_unlock; 274 return error;
266
267 end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount;
268 275
269 /* Not shared? Just report the (potentially capped) extent. */ 276 /* Not shared? Just report the (potentially capped) extent. */
270 if (!shared) { 277 if (!*shared)
271 *skipped = true; 278 return 0;
272 goto done;
273 }
274 279
275 /* 280 /*
276 * Fork all the shared blocks from our write offset until the end of 281 * Fork all the shared blocks from our write offset until the end of
@@ -278,72 +283,38 @@ __xfs_reflink_reserve_cow(
278 */ 283 */
279 error = xfs_qm_dqattach_locked(ip, 0); 284 error = xfs_qm_dqattach_locked(ip, 0);
280 if (error) 285 if (error)
281 goto out_unlock; 286 return error;
287
288 end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount;
282 289
283 align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip)); 290 align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip));
284 if (align) 291 if (align)
285 end_fsb = roundup_64(end_fsb, align); 292 end_fsb = roundup_64(end_fsb, align);
286 293
287retry: 294retry:
288 error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, *offset_fsb, 295 error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
289 end_fsb - *offset_fsb, &got, 296 end_fsb - imap->br_startoff, &got, &prev, &idx, eof);
290 &prev, &idx, eof);
291 switch (error) { 297 switch (error) {
292 case 0: 298 case 0:
293 break; 299 break;
294 case -ENOSPC: 300 case -ENOSPC:
295 case -EDQUOT: 301 case -EDQUOT:
296 /* retry without any preallocation */ 302 /* retry without any preallocation */
297 trace_xfs_reflink_cow_enospc(ip, &imap); 303 trace_xfs_reflink_cow_enospc(ip, imap);
298 if (end_fsb != orig_end_fsb) { 304 if (end_fsb != orig_end_fsb) {
299 end_fsb = orig_end_fsb; 305 end_fsb = orig_end_fsb;
300 goto retry; 306 goto retry;
301 } 307 }
302 /*FALLTHRU*/ 308 /*FALLTHRU*/
303 default: 309 default:
304 goto out_unlock; 310 return error;
305 } 311 }
306 312
307 if (end_fsb != orig_end_fsb) 313 if (end_fsb != orig_end_fsb)
308 xfs_inode_set_cowblocks_tag(ip); 314 xfs_inode_set_cowblocks_tag(ip);
309 315
310 trace_xfs_reflink_cow_alloc(ip, &got); 316 trace_xfs_reflink_cow_alloc(ip, &got);
311done: 317 return 0;
312 *offset_fsb = end_fsb;
313out_unlock:
314 return error;
315}
316
317/* Create a CoW reservation for part of a file. */
318int
319xfs_reflink_reserve_cow_range(
320 struct xfs_inode *ip,
321 xfs_off_t offset,
322 xfs_off_t count)
323{
324 struct xfs_mount *mp = ip->i_mount;
325 xfs_fileoff_t offset_fsb, end_fsb;
326 bool skipped = false;
327 int error;
328
329 trace_xfs_reflink_reserve_cow_range(ip, offset, count);
330
331 offset_fsb = XFS_B_TO_FSBT(mp, offset);
332 end_fsb = XFS_B_TO_FSB(mp, offset + count);
333
334 xfs_ilock(ip, XFS_ILOCK_EXCL);
335 while (offset_fsb < end_fsb) {
336 error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb,
337 &skipped);
338 if (error) {
339 trace_xfs_reflink_reserve_cow_range_error(ip, error,
340 _RET_IP_);
341 break;
342 }
343 }
344 xfs_iunlock(ip, XFS_ILOCK_EXCL);
345
346 return error;
347} 318}
348 319
349/* Allocate all CoW reservations covering a range of blocks in a file. */ 320/* Allocate all CoW reservations covering a range of blocks in a file. */
@@ -358,9 +329,8 @@ __xfs_reflink_allocate_cow(
358 struct xfs_defer_ops dfops; 329 struct xfs_defer_ops dfops;
359 struct xfs_trans *tp; 330 struct xfs_trans *tp;
360 xfs_fsblock_t first_block; 331 xfs_fsblock_t first_block;
361 xfs_fileoff_t next_fsb;
362 int nimaps = 1, error; 332 int nimaps = 1, error;
363 bool skipped = false; 333 bool shared;
364 334
365 xfs_defer_init(&dfops, &first_block); 335 xfs_defer_init(&dfops, &first_block);
366 336
@@ -371,33 +341,38 @@ __xfs_reflink_allocate_cow(
371 341
372 xfs_ilock(ip, XFS_ILOCK_EXCL); 342 xfs_ilock(ip, XFS_ILOCK_EXCL);
373 343
374 next_fsb = *offset_fsb; 344 /* Read extent from the source file. */
375 error = __xfs_reflink_reserve_cow(ip, &next_fsb, end_fsb, &skipped); 345 nimaps = 1;
346 error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
347 &imap, &nimaps, 0);
348 if (error)
349 goto out_unlock;
350 ASSERT(nimaps == 1);
351
352 error = xfs_reflink_reserve_cow(ip, &imap, &shared);
376 if (error) 353 if (error)
377 goto out_trans_cancel; 354 goto out_trans_cancel;
378 355
379 if (skipped) { 356 if (!shared) {
380 *offset_fsb = next_fsb; 357 *offset_fsb = imap.br_startoff + imap.br_blockcount;
381 goto out_trans_cancel; 358 goto out_trans_cancel;
382 } 359 }
383 360
384 xfs_trans_ijoin(tp, ip, 0); 361 xfs_trans_ijoin(tp, ip, 0);
385 error = xfs_bmapi_write(tp, ip, *offset_fsb, next_fsb - *offset_fsb, 362 error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
386 XFS_BMAPI_COWFORK, &first_block, 363 XFS_BMAPI_COWFORK, &first_block,
387 XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 364 XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
388 &imap, &nimaps, &dfops); 365 &imap, &nimaps, &dfops);
389 if (error) 366 if (error)
390 goto out_trans_cancel; 367 goto out_trans_cancel;
391 368
392 /* We might not have been able to map the whole delalloc extent */
393 *offset_fsb = min(*offset_fsb + imap.br_blockcount, next_fsb);
394
395 error = xfs_defer_finish(&tp, &dfops, NULL); 369 error = xfs_defer_finish(&tp, &dfops, NULL);
396 if (error) 370 if (error)
397 goto out_trans_cancel; 371 goto out_trans_cancel;
398 372
399 error = xfs_trans_commit(tp); 373 error = xfs_trans_commit(tp);
400 374
375 *offset_fsb = imap.br_startoff + imap.br_blockcount;
401out_unlock: 376out_unlock:
402 xfs_iunlock(ip, XFS_ILOCK_EXCL); 377 xfs_iunlock(ip, XFS_ILOCK_EXCL);
403 return error; 378 return error;
@@ -536,58 +511,49 @@ xfs_reflink_cancel_cow_blocks(
536 xfs_fileoff_t offset_fsb, 511 xfs_fileoff_t offset_fsb,
537 xfs_fileoff_t end_fsb) 512 xfs_fileoff_t end_fsb)
538{ 513{
539 struct xfs_bmbt_irec irec; 514 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
540 xfs_filblks_t count_fsb; 515 struct xfs_bmbt_irec got, prev, del;
516 xfs_extnum_t idx;
541 xfs_fsblock_t firstfsb; 517 xfs_fsblock_t firstfsb;
542 struct xfs_defer_ops dfops; 518 struct xfs_defer_ops dfops;
543 int error = 0; 519 int error = 0, eof = 0;
544 int nimaps;
545 520
546 if (!xfs_is_reflink_inode(ip)) 521 if (!xfs_is_reflink_inode(ip))
547 return 0; 522 return 0;
548 523
549 /* Go find the old extent in the CoW fork. */ 524 xfs_bmap_search_extents(ip, offset_fsb, XFS_COW_FORK, &eof, &idx,
550 while (offset_fsb < end_fsb) { 525 &got, &prev);
551 nimaps = 1; 526 if (eof)
552 count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); 527 return 0;
553 error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec,
554 &nimaps, XFS_BMAPI_COWFORK);
555 if (error)
556 break;
557 ASSERT(nimaps == 1);
558
559 trace_xfs_reflink_cancel_cow(ip, &irec);
560 528
561 if (irec.br_startblock == DELAYSTARTBLOCK) { 529 while (got.br_startoff < end_fsb) {
562 /* Free a delayed allocation. */ 530 del = got;
563 xfs_mod_fdblocks(ip->i_mount, irec.br_blockcount, 531 xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
564 false); 532 trace_xfs_reflink_cancel_cow(ip, &del);
565 ip->i_delayed_blks -= irec.br_blockcount;
566 533
567 /* Remove the mapping from the CoW fork. */ 534 if (isnullstartblock(del.br_startblock)) {
568 error = xfs_bunmapi_cow(ip, &irec); 535 error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK,
536 &idx, &got, &del);
569 if (error) 537 if (error)
570 break; 538 break;
571 } else if (irec.br_startblock == HOLESTARTBLOCK) {
572 /* empty */
573 } else { 539 } else {
574 xfs_trans_ijoin(*tpp, ip, 0); 540 xfs_trans_ijoin(*tpp, ip, 0);
575 xfs_defer_init(&dfops, &firstfsb); 541 xfs_defer_init(&dfops, &firstfsb);
576 542
577 /* Free the CoW orphan record. */ 543 /* Free the CoW orphan record. */
578 error = xfs_refcount_free_cow_extent(ip->i_mount, 544 error = xfs_refcount_free_cow_extent(ip->i_mount,
579 &dfops, irec.br_startblock, 545 &dfops, del.br_startblock,
580 irec.br_blockcount); 546 del.br_blockcount);
581 if (error) 547 if (error)
582 break; 548 break;
583 549
584 xfs_bmap_add_free(ip->i_mount, &dfops, 550 xfs_bmap_add_free(ip->i_mount, &dfops,
585 irec.br_startblock, irec.br_blockcount, 551 del.br_startblock, del.br_blockcount,
586 NULL); 552 NULL);
587 553
588 /* Update quota accounting */ 554 /* Update quota accounting */
589 xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT, 555 xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT,
590 -(long)irec.br_blockcount); 556 -(long)del.br_blockcount);
591 557
592 /* Roll the transaction */ 558 /* Roll the transaction */
593 error = xfs_defer_finish(tpp, &dfops, ip); 559 error = xfs_defer_finish(tpp, &dfops, ip);
@@ -597,15 +563,18 @@ xfs_reflink_cancel_cow_blocks(
597 } 563 }
598 564
599 /* Remove the mapping from the CoW fork. */ 565 /* Remove the mapping from the CoW fork. */
600 error = xfs_bunmapi_cow(ip, &irec); 566 xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
601 if (error)
602 break;
603 } 567 }
604 568
605 /* Roll on... */ 569 if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec))
606 offset_fsb = irec.br_startoff + irec.br_blockcount; 570 break;
571 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
607 } 572 }
608 573
574 /* clear tag if cow fork is emptied */
575 if (!ifp->if_bytes)
576 xfs_inode_clear_cowblocks_tag(ip);
577
609 return error; 578 return error;
610} 579}
611 580
@@ -668,25 +637,26 @@ xfs_reflink_end_cow(
668 xfs_off_t offset, 637 xfs_off_t offset,
669 xfs_off_t count) 638 xfs_off_t count)
670{ 639{
671 struct xfs_bmbt_irec irec; 640 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
672 struct xfs_bmbt_irec uirec; 641 struct xfs_bmbt_irec got, prev, del;
673 struct xfs_trans *tp; 642 struct xfs_trans *tp;
674 xfs_fileoff_t offset_fsb; 643 xfs_fileoff_t offset_fsb;
675 xfs_fileoff_t end_fsb; 644 xfs_fileoff_t end_fsb;
676 xfs_filblks_t count_fsb;
677 xfs_fsblock_t firstfsb; 645 xfs_fsblock_t firstfsb;
678 struct xfs_defer_ops dfops; 646 struct xfs_defer_ops dfops;
679 int error; 647 int error, eof = 0;
680 unsigned int resblks; 648 unsigned int resblks;
681 xfs_filblks_t ilen;
682 xfs_filblks_t rlen; 649 xfs_filblks_t rlen;
683 int nimaps; 650 xfs_extnum_t idx;
684 651
685 trace_xfs_reflink_end_cow(ip, offset, count); 652 trace_xfs_reflink_end_cow(ip, offset, count);
686 653
654 /* No COW extents? That's easy! */
655 if (ifp->if_bytes == 0)
656 return 0;
657
687 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 658 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
688 end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); 659 end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
689 count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
690 660
691 /* Start a rolling transaction to switch the mappings */ 661 /* Start a rolling transaction to switch the mappings */
692 resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK); 662 resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
@@ -698,72 +668,65 @@ xfs_reflink_end_cow(
698 xfs_ilock(ip, XFS_ILOCK_EXCL); 668 xfs_ilock(ip, XFS_ILOCK_EXCL);
699 xfs_trans_ijoin(tp, ip, 0); 669 xfs_trans_ijoin(tp, ip, 0);
700 670
701 /* Go find the old extent in the CoW fork. */ 671 xfs_bmap_search_extents(ip, end_fsb - 1, XFS_COW_FORK, &eof, &idx,
702 while (offset_fsb < end_fsb) { 672 &got, &prev);
703 /* Read extent from the source file */
704 nimaps = 1;
705 count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
706 error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec,
707 &nimaps, XFS_BMAPI_COWFORK);
708 if (error)
709 goto out_cancel;
710 ASSERT(nimaps == 1);
711 673
712 ASSERT(irec.br_startblock != DELAYSTARTBLOCK); 674 /* If there is a hole at end_fsb - 1 go to the previous extent */
713 trace_xfs_reflink_cow_remap(ip, &irec); 675 if (eof || got.br_startoff > end_fsb) {
676 ASSERT(idx > 0);
677 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, --idx), &got);
678 }
714 679
715 /* 680 /* Walk backwards until we're out of the I/O range... */
716 * We can have a hole in the CoW fork if part of a directio 681 while (got.br_startoff + got.br_blockcount > offset_fsb) {
717 * write is CoW but part of it isn't. 682 del = got;
718 */ 683 xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
719 rlen = ilen = irec.br_blockcount; 684
720 if (irec.br_startblock == HOLESTARTBLOCK) 685 /* Extent delete may have bumped idx forward */
686 if (!del.br_blockcount) {
687 idx--;
721 goto next_extent; 688 goto next_extent;
689 }
690
691 ASSERT(!isnullstartblock(got.br_startblock));
722 692
723 /* Unmap the old blocks in the data fork. */ 693 /* Unmap the old blocks in the data fork. */
724 while (rlen) { 694 xfs_defer_init(&dfops, &firstfsb);
725 xfs_defer_init(&dfops, &firstfsb); 695 rlen = del.br_blockcount;
726 error = __xfs_bunmapi(tp, ip, irec.br_startoff, 696 error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1,
727 &rlen, 0, 1, &firstfsb, &dfops); 697 &firstfsb, &dfops);
728 if (error) 698 if (error)
729 goto out_defer; 699 goto out_defer;
730
731 /*
732 * Trim the extent to whatever got unmapped.
733 * Remember, bunmapi works backwards.
734 */
735 uirec.br_startblock = irec.br_startblock + rlen;
736 uirec.br_startoff = irec.br_startoff + rlen;
737 uirec.br_blockcount = irec.br_blockcount - rlen;
738 irec.br_blockcount = rlen;
739 trace_xfs_reflink_cow_remap_piece(ip, &uirec);
740 700
741 /* Free the CoW orphan record. */ 701 /* Trim the extent to whatever got unmapped. */
742 error = xfs_refcount_free_cow_extent(tp->t_mountp, 702 if (rlen) {
743 &dfops, uirec.br_startblock, 703 xfs_trim_extent(&del, del.br_startoff + rlen,
744 uirec.br_blockcount); 704 del.br_blockcount - rlen);
745 if (error) 705 }
746 goto out_defer; 706 trace_xfs_reflink_cow_remap(ip, &del);
747 707
748 /* Map the new blocks into the data fork. */ 708 /* Free the CoW orphan record. */
749 error = xfs_bmap_map_extent(tp->t_mountp, &dfops, 709 error = xfs_refcount_free_cow_extent(tp->t_mountp, &dfops,
750 ip, &uirec); 710 del.br_startblock, del.br_blockcount);
751 if (error) 711 if (error)
752 goto out_defer; 712 goto out_defer;
753 713
754 /* Remove the mapping from the CoW fork. */ 714 /* Map the new blocks into the data fork. */
755 error = xfs_bunmapi_cow(ip, &uirec); 715 error = xfs_bmap_map_extent(tp->t_mountp, &dfops, ip, &del);
756 if (error) 716 if (error)
757 goto out_defer; 717 goto out_defer;
758 718
759 error = xfs_defer_finish(&tp, &dfops, ip); 719 /* Remove the mapping from the CoW fork. */
760 if (error) 720 xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
761 goto out_defer; 721
762 } 722 error = xfs_defer_finish(&tp, &dfops, ip);
723 if (error)
724 goto out_defer;
763 725
764next_extent: 726next_extent:
765 /* Roll on... */ 727 if (idx < 0)
766 offset_fsb = irec.br_startoff + ilen; 728 break;
729 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
767 } 730 }
768 731
769 error = xfs_trans_commit(tp); 732 error = xfs_trans_commit(tp);
@@ -774,7 +737,6 @@ next_extent:
774 737
775out_defer: 738out_defer:
776 xfs_defer_cancel(&dfops); 739 xfs_defer_cancel(&dfops);
777out_cancel:
778 xfs_trans_cancel(tp); 740 xfs_trans_cancel(tp);
779 xfs_iunlock(ip, XFS_ILOCK_EXCL); 741 xfs_iunlock(ip, XFS_ILOCK_EXCL);
780out: 742out:
@@ -1312,19 +1274,26 @@ out_error:
1312 */ 1274 */
1313int 1275int
1314xfs_reflink_remap_range( 1276xfs_reflink_remap_range(
1315 struct xfs_inode *src, 1277 struct file *file_in,
1316 xfs_off_t srcoff, 1278 loff_t pos_in,
1317 struct xfs_inode *dest, 1279 struct file *file_out,
1318 xfs_off_t destoff, 1280 loff_t pos_out,
1319 xfs_off_t len, 1281 u64 len,
1320 unsigned int flags) 1282 bool is_dedupe)
1321{ 1283{
1284 struct inode *inode_in = file_inode(file_in);
1285 struct xfs_inode *src = XFS_I(inode_in);
1286 struct inode *inode_out = file_inode(file_out);
1287 struct xfs_inode *dest = XFS_I(inode_out);
1322 struct xfs_mount *mp = src->i_mount; 1288 struct xfs_mount *mp = src->i_mount;
1289 loff_t bs = inode_out->i_sb->s_blocksize;
1290 bool same_inode = (inode_in == inode_out);
1323 xfs_fileoff_t sfsbno, dfsbno; 1291 xfs_fileoff_t sfsbno, dfsbno;
1324 xfs_filblks_t fsblen; 1292 xfs_filblks_t fsblen;
1325 int error;
1326 xfs_extlen_t cowextsize; 1293 xfs_extlen_t cowextsize;
1327 bool is_same; 1294 loff_t isize;
1295 ssize_t ret;
1296 loff_t blen;
1328 1297
1329 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1298 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1330 return -EOPNOTSUPP; 1299 return -EOPNOTSUPP;
@@ -1332,17 +1301,8 @@ xfs_reflink_remap_range(
1332 if (XFS_FORCED_SHUTDOWN(mp)) 1301 if (XFS_FORCED_SHUTDOWN(mp))
1333 return -EIO; 1302 return -EIO;
1334 1303
1335 /* Don't reflink realtime inodes */
1336 if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
1337 return -EINVAL;
1338
1339 if (flags & ~XFS_REFLINK_ALL)
1340 return -EINVAL;
1341
1342 trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff);
1343
1344 /* Lock both files against IO */ 1304 /* Lock both files against IO */
1345 if (src->i_ino == dest->i_ino) { 1305 if (same_inode) {
1346 xfs_ilock(src, XFS_IOLOCK_EXCL); 1306 xfs_ilock(src, XFS_IOLOCK_EXCL);
1347 xfs_ilock(src, XFS_MMAPLOCK_EXCL); 1307 xfs_ilock(src, XFS_MMAPLOCK_EXCL);
1348 } else { 1308 } else {
@@ -1350,39 +1310,126 @@ xfs_reflink_remap_range(
1350 xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); 1310 xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
1351 } 1311 }
1352 1312
1313 /* Don't touch certain kinds of inodes */
1314 ret = -EPERM;
1315 if (IS_IMMUTABLE(inode_out))
1316 goto out_unlock;
1317
1318 ret = -ETXTBSY;
1319 if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
1320 goto out_unlock;
1321
1322
1323 /* Don't reflink dirs, pipes, sockets... */
1324 ret = -EISDIR;
1325 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
1326 goto out_unlock;
1327 ret = -EINVAL;
1328 if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
1329 goto out_unlock;
1330 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
1331 goto out_unlock;
1332
1333 /* Don't reflink realtime inodes */
1334 if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
1335 goto out_unlock;
1336
1337 /* Don't share DAX file data for now. */
1338 if (IS_DAX(inode_in) || IS_DAX(inode_out))
1339 goto out_unlock;
1340
1341 /* Are we going all the way to the end? */
1342 isize = i_size_read(inode_in);
1343 if (isize == 0) {
1344 ret = 0;
1345 goto out_unlock;
1346 }
1347
1348 if (len == 0)
1349 len = isize - pos_in;
1350
1351 /* Ensure offsets don't wrap and the input is inside i_size */
1352 if (pos_in + len < pos_in || pos_out + len < pos_out ||
1353 pos_in + len > isize)
1354 goto out_unlock;
1355
1356 /* Don't allow dedupe past EOF in the dest file */
1357 if (is_dedupe) {
1358 loff_t disize;
1359
1360 disize = i_size_read(inode_out);
1361 if (pos_out >= disize || pos_out + len > disize)
1362 goto out_unlock;
1363 }
1364
1365 /* If we're linking to EOF, continue to the block boundary. */
1366 if (pos_in + len == isize)
1367 blen = ALIGN(isize, bs) - pos_in;
1368 else
1369 blen = len;
1370
1371 /* Only reflink if we're aligned to block boundaries */
1372 if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
1373 !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
1374 goto out_unlock;
1375
1376 /* Don't allow overlapped reflink within the same file */
1377 if (same_inode) {
1378 if (pos_out + blen > pos_in && pos_out < pos_in + blen)
1379 goto out_unlock;
1380 }
1381
1382 /* Wait for the completion of any pending IOs on both files */
1383 inode_dio_wait(inode_in);
1384 if (!same_inode)
1385 inode_dio_wait(inode_out);
1386
1387 ret = filemap_write_and_wait_range(inode_in->i_mapping,
1388 pos_in, pos_in + len - 1);
1389 if (ret)
1390 goto out_unlock;
1391
1392 ret = filemap_write_and_wait_range(inode_out->i_mapping,
1393 pos_out, pos_out + len - 1);
1394 if (ret)
1395 goto out_unlock;
1396
1397 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
1398
1353 /* 1399 /*
1354 * Check that the extents are the same. 1400 * Check that the extents are the same.
1355 */ 1401 */
1356 if (flags & XFS_REFLINK_DEDUPE) { 1402 if (is_dedupe) {
1357 is_same = false; 1403 bool is_same = false;
1358 error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest), 1404
1359 destoff, len, &is_same); 1405 ret = xfs_compare_extents(inode_in, pos_in, inode_out, pos_out,
1360 if (error) 1406 len, &is_same);
1361 goto out_error; 1407 if (ret)
1408 goto out_unlock;
1362 if (!is_same) { 1409 if (!is_same) {
1363 error = -EBADE; 1410 ret = -EBADE;
1364 goto out_error; 1411 goto out_unlock;
1365 } 1412 }
1366 } 1413 }
1367 1414
1368 error = xfs_reflink_set_inode_flag(src, dest); 1415 ret = xfs_reflink_set_inode_flag(src, dest);
1369 if (error) 1416 if (ret)
1370 goto out_error; 1417 goto out_unlock;
1371 1418
1372 /* 1419 /*
1373 * Invalidate the page cache so that we can clear any CoW mappings 1420 * Invalidate the page cache so that we can clear any CoW mappings
1374 * in the destination file. 1421 * in the destination file.
1375 */ 1422 */
1376 truncate_inode_pages_range(&VFS_I(dest)->i_data, destoff, 1423 truncate_inode_pages_range(&inode_out->i_data, pos_out,
1377 PAGE_ALIGN(destoff + len) - 1); 1424 PAGE_ALIGN(pos_out + len) - 1);
1378 1425
1379 dfsbno = XFS_B_TO_FSBT(mp, destoff); 1426 dfsbno = XFS_B_TO_FSBT(mp, pos_out);
1380 sfsbno = XFS_B_TO_FSBT(mp, srcoff); 1427 sfsbno = XFS_B_TO_FSBT(mp, pos_in);
1381 fsblen = XFS_B_TO_FSB(mp, len); 1428 fsblen = XFS_B_TO_FSB(mp, len);
1382 error = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen, 1429 ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
1383 destoff + len); 1430 pos_out + len);
1384 if (error) 1431 if (ret)
1385 goto out_error; 1432 goto out_unlock;
1386 1433
1387 /* 1434 /*
1388 * Carry the cowextsize hint from src to dest if we're sharing the 1435 * Carry the cowextsize hint from src to dest if we're sharing the
@@ -1390,26 +1437,24 @@ xfs_reflink_remap_range(
1390 * has a cowextsize hint, and the destination file does not. 1437 * has a cowextsize hint, and the destination file does not.
1391 */ 1438 */
1392 cowextsize = 0; 1439 cowextsize = 0;
1393 if (srcoff == 0 && len == i_size_read(VFS_I(src)) && 1440 if (pos_in == 0 && len == i_size_read(inode_in) &&
1394 (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && 1441 (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
1395 destoff == 0 && len >= i_size_read(VFS_I(dest)) && 1442 pos_out == 0 && len >= i_size_read(inode_out) &&
1396 !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) 1443 !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
1397 cowextsize = src->i_d.di_cowextsize; 1444 cowextsize = src->i_d.di_cowextsize;
1398 1445
1399 error = xfs_reflink_update_dest(dest, destoff + len, cowextsize); 1446 ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize);
1400 if (error)
1401 goto out_error;
1402 1447
1403out_error: 1448out_unlock:
1404 xfs_iunlock(src, XFS_MMAPLOCK_EXCL); 1449 xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
1405 xfs_iunlock(src, XFS_IOLOCK_EXCL); 1450 xfs_iunlock(src, XFS_IOLOCK_EXCL);
1406 if (src->i_ino != dest->i_ino) { 1451 if (src->i_ino != dest->i_ino) {
1407 xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); 1452 xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
1408 xfs_iunlock(dest, XFS_IOLOCK_EXCL); 1453 xfs_iunlock(dest, XFS_IOLOCK_EXCL);
1409 } 1454 }
1410 if (error) 1455 if (ret)
1411 trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_); 1456 trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
1412 return error; 1457 return ret;
1413} 1458}
1414 1459
1415/* 1460/*
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 5dc3c8ac12aa..fad11607c9ad 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -26,8 +26,8 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno,
26extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, 26extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
27 struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed); 27 struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed);
28 28
29extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip, 29extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
30 xfs_off_t offset, xfs_off_t count); 30 struct xfs_bmbt_irec *imap, bool *shared);
31extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip, 31extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
32 xfs_off_t offset, xfs_off_t count); 32 xfs_off_t offset, xfs_off_t count);
33extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, 33extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
@@ -43,11 +43,8 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
43extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, 43extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
44 xfs_off_t count); 44 xfs_off_t count);
45extern int xfs_reflink_recover_cow(struct xfs_mount *mp); 45extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
46#define XFS_REFLINK_DEDUPE 1 /* only reflink if contents match */ 46extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
47#define XFS_REFLINK_ALL (XFS_REFLINK_DEDUPE) 47 struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe);
48extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff,
49 struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len,
50 unsigned int flags);
51extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip, 48extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
52 struct xfs_trans **tpp); 49 struct xfs_trans **tpp);
53extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset, 50extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 5f8d55d29a11..276d3023d60f 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -512,13 +512,13 @@ static struct attribute *xfs_error_attrs[] = {
512}; 512};
513 513
514 514
515struct kobj_type xfs_error_cfg_ktype = { 515static struct kobj_type xfs_error_cfg_ktype = {
516 .release = xfs_sysfs_release, 516 .release = xfs_sysfs_release,
517 .sysfs_ops = &xfs_sysfs_ops, 517 .sysfs_ops = &xfs_sysfs_ops,
518 .default_attrs = xfs_error_attrs, 518 .default_attrs = xfs_error_attrs,
519}; 519};
520 520
521struct kobj_type xfs_error_ktype = { 521static struct kobj_type xfs_error_ktype = {
522 .release = xfs_sysfs_release, 522 .release = xfs_sysfs_release,
523 .sysfs_ops = &xfs_sysfs_ops, 523 .sysfs_ops = &xfs_sysfs_ops,
524}; 524};
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ad188d3a83f3..0907752be62d 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3346,7 +3346,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc);
3346DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); 3346DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found);
3347DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); 3347DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
3348 3348
3349DEFINE_RW_EVENT(xfs_reflink_reserve_cow_range); 3349DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
3350DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range); 3350DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
3351 3351
3352DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write); 3352DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
@@ -3356,9 +3356,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec);
3356DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range); 3356DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range);
3357DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow); 3357DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow);
3358DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap); 3358DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap);
3359DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_piece);
3360 3359
3361DEFINE_INODE_ERROR_EVENT(xfs_reflink_reserve_cow_range_error);
3362DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error); 3360DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error);
3363DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error); 3361DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error);
3364DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error); 3362DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index e63e288dee83..7892f55a1866 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -19,11 +19,15 @@ struct vm_fault;
19#define IOMAP_UNWRITTEN 0x04 /* blocks allocated @blkno in unwritten state */ 19#define IOMAP_UNWRITTEN 0x04 /* blocks allocated @blkno in unwritten state */
20 20
21/* 21/*
22 * Flags for iomap mappings: 22 * Flags for all iomap mappings:
23 */ 23 */
24#define IOMAP_F_MERGED 0x01 /* contains multiple blocks/extents */ 24#define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */
25#define IOMAP_F_SHARED 0x02 /* block shared with another file */ 25
26#define IOMAP_F_NEW 0x04 /* blocks have been newly allocated */ 26/*
27 * Flags that only need to be reported for IOMAP_REPORT requests:
28 */
29#define IOMAP_F_MERGED 0x10 /* contains multiple blocks/extents */
30#define IOMAP_F_SHARED 0x20 /* block shared with another file */
27 31
28/* 32/*
29 * Magic value for blkno: 33 * Magic value for blkno:
@@ -42,8 +46,9 @@ struct iomap {
42/* 46/*
43 * Flags for iomap_begin / iomap_end. No flag implies a read. 47 * Flags for iomap_begin / iomap_end. No flag implies a read.
44 */ 48 */
45#define IOMAP_WRITE (1 << 0) 49#define IOMAP_WRITE (1 << 0) /* writing, must allocate blocks */
46#define IOMAP_ZERO (1 << 1) 50#define IOMAP_ZERO (1 << 1) /* zeroing operation, may skip holes */
51#define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */
47 52
48struct iomap_ops { 53struct iomap_ops {
49 /* 54 /*