aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-17 18:54:56 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-17 18:54:56 -0500
commit22b4eb5e3174efb49791c62823d0cccc35394c36 (patch)
tree7324d6b53ad3c18c244094ad5d51b500716d7304 /fs
parentd65773b22b749252b2805dcf96bdeb951a9481d8 (diff)
parentd060646436233912178e6b9e3a7f30a41214220f (diff)
Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
* 'for-linus' of git://oss.sgi.com/xfs/xfs: xfs: cleanup xfs_file_aio_write xfs: always return with the iolock held from xfs_file_aio_write_checks xfs: remove the i_new_size field in struct xfs_inode xfs: remove the i_size field in struct xfs_inode xfs: replace i_pin_wait with a bit waitqueue xfs: replace i_flock with a sleeping bitlock xfs: make i_flags an unsigned long xfs: remove the if_ext_max field in struct xfs_ifork xfs: remove the unused dm_attrs structure xfs: cleanup xfs_iomap_eof_align_last_fsb xfs: remove xfs_itruncate_data
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_aops.c29
-rw-r--r--fs/xfs/xfs_attr.c4
-rw-r--r--fs/xfs/xfs_attr_leaf.c9
-rw-r--r--fs/xfs/xfs_bmap.c116
-rw-r--r--fs/xfs/xfs_dfrag.c43
-rw-r--r--fs/xfs/xfs_file.c184
-rw-r--r--fs/xfs/xfs_fs_subr.c2
-rw-r--r--fs/xfs/xfs_iget.c24
-rw-r--r--fs/xfs/xfs_inode.c193
-rw-r--r--fs/xfs/xfs_inode.h114
-rw-r--r--fs/xfs/xfs_inode_item.c8
-rw-r--r--fs/xfs/xfs_iomap.c46
-rw-r--r--fs/xfs/xfs_iops.c46
-rw-r--r--fs/xfs/xfs_qm_syscalls.c8
-rw-r--r--fs/xfs/xfs_super.c8
-rw-r--r--fs/xfs/xfs_sync.c9
-rw-r--r--fs/xfs/xfs_trace.h29
-rw-r--r--fs/xfs/xfs_vnodeops.c44
18 files changed, 374 insertions, 542 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 574d4ee9b625..74b9baf36ac3 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -111,8 +111,7 @@ xfs_ioend_new_eof(
111 xfs_fsize_t bsize; 111 xfs_fsize_t bsize;
112 112
113 bsize = ioend->io_offset + ioend->io_size; 113 bsize = ioend->io_offset + ioend->io_size;
114 isize = MAX(ip->i_size, ip->i_new_size); 114 isize = MIN(i_size_read(VFS_I(ip)), bsize);
115 isize = MIN(isize, bsize);
116 return isize > ip->i_d.di_size ? isize : 0; 115 return isize > ip->i_d.di_size ? isize : 0;
117} 116}
118 117
@@ -126,11 +125,7 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
126} 125}
127 126
128/* 127/*
129 * Update on-disk file size now that data has been written to disk. The 128 * Update on-disk file size now that data has been written to disk.
130 * current in-memory file size is i_size. If a write is beyond eof i_new_size
131 * will be the intended file size until i_size is updated. If this write does
132 * not extend all the way to the valid file size then restrict this update to
133 * the end of the write.
134 * 129 *
135 * This function does not block as blocking on the inode lock in IO completion 130 * This function does not block as blocking on the inode lock in IO completion
136 * can lead to IO completion order dependency deadlocks.. If it can't get the 131 * can lead to IO completion order dependency deadlocks.. If it can't get the
@@ -1279,6 +1274,15 @@ xfs_end_io_direct_write(
1279 struct xfs_ioend *ioend = iocb->private; 1274 struct xfs_ioend *ioend = iocb->private;
1280 1275
1281 /* 1276 /*
1277 * While the generic direct I/O code updates the inode size, it does
1278 * so only after the end_io handler is called, which means our
1279 * end_io handler thinks the on-disk size is outside the in-core
1280 * size. To prevent this just update it a little bit earlier here.
1281 */
1282 if (offset + size > i_size_read(ioend->io_inode))
1283 i_size_write(ioend->io_inode, offset + size);
1284
1285 /*
1282 * blockdev_direct_IO can return an error even after the I/O 1286 * blockdev_direct_IO can return an error even after the I/O
1283 * completion handler was called. Thus we need to protect 1287 * completion handler was called. Thus we need to protect
1284 * against double-freeing. 1288 * against double-freeing.
@@ -1340,12 +1344,11 @@ xfs_vm_write_failed(
1340 1344
1341 if (to > inode->i_size) { 1345 if (to > inode->i_size) {
1342 /* 1346 /*
1343 * punch out the delalloc blocks we have already allocated. We 1347 * Punch out the delalloc blocks we have already allocated.
1344 * don't call xfs_setattr() to do this as we may be in the 1348 *
1345 * middle of a multi-iovec write and so the vfs inode->i_size 1349 * Don't bother with xfs_setattr given that nothing can have
1346 * will not match the xfs ip->i_size and so it will zero too 1350 * made it to disk yet as the page is still locked at this
1347 * much. Hence we jus truncate the page cache to zero what is 1351 * point.
1348 * necessary and punch the delalloc blocks directly.
1349 */ 1352 */
1350 struct xfs_inode *ip = XFS_I(inode); 1353 struct xfs_inode *ip = XFS_I(inode);
1351 xfs_fileoff_t start_fsb; 1354 xfs_fileoff_t start_fsb;
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 1e5d97f86ea8..08b9ac644c31 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -827,10 +827,6 @@ xfs_attr_inactive(xfs_inode_t *dp)
827 if (error) 827 if (error)
828 goto out; 828 goto out;
829 829
830 /*
831 * Commit the last in the sequence of transactions.
832 */
833 xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
834 error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES); 830 error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
835 xfs_iunlock(dp, XFS_ILOCK_EXCL); 831 xfs_iunlock(dp, XFS_ILOCK_EXCL);
836 832
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index c1b55e596551..d25eafd4d28d 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -271,10 +271,6 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
271 dp = args->dp; 271 dp = args->dp;
272 mp = dp->i_mount; 272 mp = dp->i_mount;
273 dp->i_d.di_forkoff = forkoff; 273 dp->i_d.di_forkoff = forkoff;
274 dp->i_df.if_ext_max =
275 XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
276 dp->i_afp->if_ext_max =
277 XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
278 274
279 ifp = dp->i_afp; 275 ifp = dp->i_afp;
280 ASSERT(ifp->if_flags & XFS_IFINLINE); 276 ASSERT(ifp->if_flags & XFS_IFINLINE);
@@ -326,7 +322,6 @@ xfs_attr_fork_reset(
326 ASSERT(ip->i_d.di_anextents == 0); 322 ASSERT(ip->i_d.di_anextents == 0);
327 ASSERT(ip->i_afp == NULL); 323 ASSERT(ip->i_afp == NULL);
328 324
329 ip->i_df.if_ext_max = XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t);
330 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 325 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
331} 326}
332 327
@@ -389,10 +384,6 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
389 (args->op_flags & XFS_DA_OP_ADDNAME) || 384 (args->op_flags & XFS_DA_OP_ADDNAME) ||
390 !(mp->m_flags & XFS_MOUNT_ATTR2) || 385 !(mp->m_flags & XFS_MOUNT_ATTR2) ||
391 dp->i_d.di_format == XFS_DINODE_FMT_BTREE); 386 dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
392 dp->i_afp->if_ext_max =
393 XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
394 dp->i_df.if_ext_max =
395 XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
396 xfs_trans_log_inode(args->trans, dp, 387 xfs_trans_log_inode(args->trans, dp,
397 XFS_ILOG_CORE | XFS_ILOG_ADATA); 388 XFS_ILOG_CORE | XFS_ILOG_ADATA);
398 } 389 }
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index d0ab78837057..188ef2fbd628 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -249,7 +249,27 @@ xfs_bmbt_lookup_ge(
249} 249}
250 250
251/* 251/*
252* Update the record referred to by cur to the value given 252 * Check if the inode needs to be converted to btree format.
253 */
254static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
255{
256 return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
257 XFS_IFORK_NEXTENTS(ip, whichfork) >
258 XFS_IFORK_MAXEXT(ip, whichfork);
259}
260
261/*
262 * Check if the inode should be converted to extent format.
263 */
264static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
265{
266 return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
267 XFS_IFORK_NEXTENTS(ip, whichfork) <=
268 XFS_IFORK_MAXEXT(ip, whichfork);
269}
270
271/*
272 * Update the record referred to by cur to the value given
253 * by [off, bno, len, state]. 273 * by [off, bno, len, state].
254 * This either works (return 0) or gets an EFSCORRUPTED error. 274 * This either works (return 0) or gets an EFSCORRUPTED error.
255 */ 275 */
@@ -683,8 +703,8 @@ xfs_bmap_add_extent_delay_real(
683 goto done; 703 goto done;
684 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 704 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
685 } 705 }
686 if (bma->ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 706
687 bma->ip->i_d.di_nextents > bma->ip->i_df.if_ext_max) { 707 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
688 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 708 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
689 bma->firstblock, bma->flist, 709 bma->firstblock, bma->flist,
690 &bma->cur, 1, &tmp_rval, XFS_DATA_FORK); 710 &bma->cur, 1, &tmp_rval, XFS_DATA_FORK);
@@ -767,8 +787,8 @@ xfs_bmap_add_extent_delay_real(
767 goto done; 787 goto done;
768 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 788 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
769 } 789 }
770 if (bma->ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 790
771 bma->ip->i_d.di_nextents > bma->ip->i_df.if_ext_max) { 791 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
772 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 792 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
773 bma->firstblock, bma->flist, &bma->cur, 1, 793 bma->firstblock, bma->flist, &bma->cur, 1,
774 &tmp_rval, XFS_DATA_FORK); 794 &tmp_rval, XFS_DATA_FORK);
@@ -836,8 +856,8 @@ xfs_bmap_add_extent_delay_real(
836 goto done; 856 goto done;
837 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 857 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
838 } 858 }
839 if (bma->ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 859
840 bma->ip->i_d.di_nextents > bma->ip->i_df.if_ext_max) { 860 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
841 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 861 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
842 bma->firstblock, bma->flist, &bma->cur, 862 bma->firstblock, bma->flist, &bma->cur,
843 1, &tmp_rval, XFS_DATA_FORK); 863 1, &tmp_rval, XFS_DATA_FORK);
@@ -884,8 +904,7 @@ xfs_bmap_add_extent_delay_real(
884 } 904 }
885 905
886 /* convert to a btree if necessary */ 906 /* convert to a btree if necessary */
887 if (XFS_IFORK_FORMAT(bma->ip, XFS_DATA_FORK) == XFS_DINODE_FMT_EXTENTS && 907 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
888 XFS_IFORK_NEXTENTS(bma->ip, XFS_DATA_FORK) > ifp->if_ext_max) {
889 int tmp_logflags; /* partial log flag return val */ 908 int tmp_logflags; /* partial log flag return val */
890 909
891 ASSERT(bma->cur == NULL); 910 ASSERT(bma->cur == NULL);
@@ -1421,8 +1440,7 @@ xfs_bmap_add_extent_unwritten_real(
1421 } 1440 }
1422 1441
1423 /* convert to a btree if necessary */ 1442 /* convert to a btree if necessary */
1424 if (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) == XFS_DINODE_FMT_EXTENTS && 1443 if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
1425 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > ifp->if_ext_max) {
1426 int tmp_logflags; /* partial log flag return val */ 1444 int tmp_logflags; /* partial log flag return val */
1427 1445
1428 ASSERT(cur == NULL); 1446 ASSERT(cur == NULL);
@@ -1812,8 +1830,7 @@ xfs_bmap_add_extent_hole_real(
1812 } 1830 }
1813 1831
1814 /* convert to a btree if necessary */ 1832 /* convert to a btree if necessary */
1815 if (XFS_IFORK_FORMAT(bma->ip, whichfork) == XFS_DINODE_FMT_EXTENTS && 1833 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1816 XFS_IFORK_NEXTENTS(bma->ip, whichfork) > ifp->if_ext_max) {
1817 int tmp_logflags; /* partial log flag return val */ 1834 int tmp_logflags; /* partial log flag return val */
1818 1835
1819 ASSERT(bma->cur == NULL); 1836 ASSERT(bma->cur == NULL);
@@ -3037,8 +3054,7 @@ xfs_bmap_extents_to_btree(
3037 3054
3038 ifp = XFS_IFORK_PTR(ip, whichfork); 3055 ifp = XFS_IFORK_PTR(ip, whichfork);
3039 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); 3056 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
3040 ASSERT(ifp->if_ext_max == 3057
3041 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
3042 /* 3058 /*
3043 * Make space in the inode incore. 3059 * Make space in the inode incore.
3044 */ 3060 */
@@ -3184,13 +3200,8 @@ xfs_bmap_forkoff_reset(
3184 ip->i_d.di_format != XFS_DINODE_FMT_BTREE) { 3200 ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
3185 uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; 3201 uint dfl_forkoff = xfs_default_attroffset(ip) >> 3;
3186 3202
3187 if (dfl_forkoff > ip->i_d.di_forkoff) { 3203 if (dfl_forkoff > ip->i_d.di_forkoff)
3188 ip->i_d.di_forkoff = dfl_forkoff; 3204 ip->i_d.di_forkoff = dfl_forkoff;
3189 ip->i_df.if_ext_max =
3190 XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t);
3191 ip->i_afp->if_ext_max =
3192 XFS_IFORK_ASIZE(ip) / sizeof(xfs_bmbt_rec_t);
3193 }
3194 } 3205 }
3195} 3206}
3196 3207
@@ -3430,8 +3441,6 @@ xfs_bmap_add_attrfork(
3430 int error; /* error return value */ 3441 int error; /* error return value */
3431 3442
3432 ASSERT(XFS_IFORK_Q(ip) == 0); 3443 ASSERT(XFS_IFORK_Q(ip) == 0);
3433 ASSERT(ip->i_df.if_ext_max ==
3434 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
3435 3444
3436 mp = ip->i_mount; 3445 mp = ip->i_mount;
3437 ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); 3446 ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
@@ -3486,12 +3495,9 @@ xfs_bmap_add_attrfork(
3486 error = XFS_ERROR(EINVAL); 3495 error = XFS_ERROR(EINVAL);
3487 goto error1; 3496 goto error1;
3488 } 3497 }
3489 ip->i_df.if_ext_max = 3498
3490 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
3491 ASSERT(ip->i_afp == NULL); 3499 ASSERT(ip->i_afp == NULL);
3492 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); 3500 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
3493 ip->i_afp->if_ext_max =
3494 XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
3495 ip->i_afp->if_flags = XFS_IFEXTENTS; 3501 ip->i_afp->if_flags = XFS_IFEXTENTS;
3496 logflags = 0; 3502 logflags = 0;
3497 xfs_bmap_init(&flist, &firstblock); 3503 xfs_bmap_init(&flist, &firstblock);
@@ -3535,20 +3541,17 @@ xfs_bmap_add_attrfork(
3535 } else 3541 } else
3536 spin_unlock(&mp->m_sb_lock); 3542 spin_unlock(&mp->m_sb_lock);
3537 } 3543 }
3538 if ((error = xfs_bmap_finish(&tp, &flist, &committed))) 3544
3545 error = xfs_bmap_finish(&tp, &flist, &committed);
3546 if (error)
3539 goto error2; 3547 goto error2;
3540 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 3548 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
3541 ASSERT(ip->i_df.if_ext_max ==
3542 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
3543 return error;
3544error2: 3549error2:
3545 xfs_bmap_cancel(&flist); 3550 xfs_bmap_cancel(&flist);
3546error1: 3551error1:
3547 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3552 xfs_iunlock(ip, XFS_ILOCK_EXCL);
3548error0: 3553error0:
3549 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 3554 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
3550 ASSERT(ip->i_df.if_ext_max ==
3551 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
3552 return error; 3555 return error;
3553} 3556}
3554 3557
@@ -3994,11 +3997,8 @@ xfs_bmap_one_block(
3994 xfs_bmbt_irec_t s; /* internal version of extent */ 3997 xfs_bmbt_irec_t s; /* internal version of extent */
3995 3998
3996#ifndef DEBUG 3999#ifndef DEBUG
3997 if (whichfork == XFS_DATA_FORK) { 4000 if (whichfork == XFS_DATA_FORK)
3998 return S_ISREG(ip->i_d.di_mode) ? 4001 return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
3999 (ip->i_size == ip->i_mount->m_sb.sb_blocksize) :
4000 (ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize);
4001 }
4002#endif /* !DEBUG */ 4002#endif /* !DEBUG */
4003 if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1) 4003 if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
4004 return 0; 4004 return 0;
@@ -4010,7 +4010,7 @@ xfs_bmap_one_block(
4010 xfs_bmbt_get_all(ep, &s); 4010 xfs_bmbt_get_all(ep, &s);
4011 rval = s.br_startoff == 0 && s.br_blockcount == 1; 4011 rval = s.br_startoff == 0 && s.br_blockcount == 1;
4012 if (rval && whichfork == XFS_DATA_FORK) 4012 if (rval && whichfork == XFS_DATA_FORK)
4013 ASSERT(ip->i_size == ip->i_mount->m_sb.sb_blocksize); 4013 ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
4014 return rval; 4014 return rval;
4015} 4015}
4016 4016
@@ -4379,8 +4379,6 @@ xfs_bmapi_read(
4379 XFS_STATS_INC(xs_blk_mapr); 4379 XFS_STATS_INC(xs_blk_mapr);
4380 4380
4381 ifp = XFS_IFORK_PTR(ip, whichfork); 4381 ifp = XFS_IFORK_PTR(ip, whichfork);
4382 ASSERT(ifp->if_ext_max ==
4383 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
4384 4382
4385 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 4383 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4386 error = xfs_iread_extents(NULL, ip, whichfork); 4384 error = xfs_iread_extents(NULL, ip, whichfork);
@@ -4871,8 +4869,6 @@ xfs_bmapi_write(
4871 return XFS_ERROR(EIO); 4869 return XFS_ERROR(EIO);
4872 4870
4873 ifp = XFS_IFORK_PTR(ip, whichfork); 4871 ifp = XFS_IFORK_PTR(ip, whichfork);
4874 ASSERT(ifp->if_ext_max ==
4875 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
4876 4872
4877 XFS_STATS_INC(xs_blk_mapw); 4873 XFS_STATS_INC(xs_blk_mapw);
4878 4874
@@ -4981,8 +4977,7 @@ xfs_bmapi_write(
4981 /* 4977 /*
4982 * Transform from btree to extents, give it cur. 4978 * Transform from btree to extents, give it cur.
4983 */ 4979 */
4984 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && 4980 if (xfs_bmap_wants_extents(ip, whichfork)) {
4985 XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) {
4986 int tmp_logflags = 0; 4981 int tmp_logflags = 0;
4987 4982
4988 ASSERT(bma.cur); 4983 ASSERT(bma.cur);
@@ -4992,10 +4987,10 @@ xfs_bmapi_write(
4992 if (error) 4987 if (error)
4993 goto error0; 4988 goto error0;
4994 } 4989 }
4995 ASSERT(ifp->if_ext_max == 4990
4996 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
4997 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE || 4991 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
4998 XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max); 4992 XFS_IFORK_NEXTENTS(ip, whichfork) >
4993 XFS_IFORK_MAXEXT(ip, whichfork));
4999 error = 0; 4994 error = 0;
5000error0: 4995error0:
5001 /* 4996 /*
@@ -5095,8 +5090,7 @@ xfs_bunmapi(
5095 5090
5096 ASSERT(len > 0); 5091 ASSERT(len > 0);
5097 ASSERT(nexts >= 0); 5092 ASSERT(nexts >= 0);
5098 ASSERT(ifp->if_ext_max == 5093
5099 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
5100 if (!(ifp->if_flags & XFS_IFEXTENTS) && 5094 if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5101 (error = xfs_iread_extents(tp, ip, whichfork))) 5095 (error = xfs_iread_extents(tp, ip, whichfork)))
5102 return error; 5096 return error;
@@ -5322,7 +5316,8 @@ xfs_bunmapi(
5322 */ 5316 */
5323 if (!wasdel && xfs_trans_get_block_res(tp) == 0 && 5317 if (!wasdel && xfs_trans_get_block_res(tp) == 0 &&
5324 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && 5318 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
5325 XFS_IFORK_NEXTENTS(ip, whichfork) >= ifp->if_ext_max && 5319 XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
5320 XFS_IFORK_MAXEXT(ip, whichfork) &&
5326 del.br_startoff > got.br_startoff && 5321 del.br_startoff > got.br_startoff &&
5327 del.br_startoff + del.br_blockcount < 5322 del.br_startoff + del.br_blockcount <
5328 got.br_startoff + got.br_blockcount) { 5323 got.br_startoff + got.br_blockcount) {
@@ -5353,13 +5348,11 @@ nodelete:
5353 } 5348 }
5354 } 5349 }
5355 *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0; 5350 *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0;
5356 ASSERT(ifp->if_ext_max == 5351
5357 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
5358 /* 5352 /*
5359 * Convert to a btree if necessary. 5353 * Convert to a btree if necessary.
5360 */ 5354 */
5361 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && 5355 if (xfs_bmap_needs_btree(ip, whichfork)) {
5362 XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) {
5363 ASSERT(cur == NULL); 5356 ASSERT(cur == NULL);
5364 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, 5357 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist,
5365 &cur, 0, &tmp_logflags, whichfork); 5358 &cur, 0, &tmp_logflags, whichfork);
@@ -5370,8 +5363,7 @@ nodelete:
5370 /* 5363 /*
5371 * transform from btree to extents, give it cur 5364 * transform from btree to extents, give it cur
5372 */ 5365 */
5373 else if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && 5366 else if (xfs_bmap_wants_extents(ip, whichfork)) {
5374 XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) {
5375 ASSERT(cur != NULL); 5367 ASSERT(cur != NULL);
5376 error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags, 5368 error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
5377 whichfork); 5369 whichfork);
@@ -5382,8 +5374,6 @@ nodelete:
5382 /* 5374 /*
5383 * transform from extents to local? 5375 * transform from extents to local?
5384 */ 5376 */
5385 ASSERT(ifp->if_ext_max ==
5386 XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
5387 error = 0; 5377 error = 0;
5388error0: 5378error0:
5389 /* 5379 /*
@@ -5434,7 +5424,7 @@ xfs_getbmapx_fix_eof_hole(
5434 if (startblock == HOLESTARTBLOCK) { 5424 if (startblock == HOLESTARTBLOCK) {
5435 mp = ip->i_mount; 5425 mp = ip->i_mount;
5436 out->bmv_block = -1; 5426 out->bmv_block = -1;
5437 fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, ip->i_size)); 5427 fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
5438 fixlen -= out->bmv_offset; 5428 fixlen -= out->bmv_offset;
5439 if (prealloced && out->bmv_offset + out->bmv_length == end) { 5429 if (prealloced && out->bmv_offset + out->bmv_length == end) {
5440 /* Came to hole at EOF. Trim it. */ 5430 /* Came to hole at EOF. Trim it. */
@@ -5522,7 +5512,7 @@ xfs_getbmap(
5522 fixlen = XFS_MAXIOFFSET(mp); 5512 fixlen = XFS_MAXIOFFSET(mp);
5523 } else { 5513 } else {
5524 prealloced = 0; 5514 prealloced = 0;
5525 fixlen = ip->i_size; 5515 fixlen = XFS_ISIZE(ip);
5526 } 5516 }
5527 } 5517 }
5528 5518
@@ -5551,7 +5541,7 @@ xfs_getbmap(
5551 5541
5552 xfs_ilock(ip, XFS_IOLOCK_SHARED); 5542 xfs_ilock(ip, XFS_IOLOCK_SHARED);
5553 if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { 5543 if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
5554 if (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size) { 5544 if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) {
5555 error = xfs_flush_pages(ip, 0, -1, 0, FI_REMAPF); 5545 error = xfs_flush_pages(ip, 0, -1, 0, FI_REMAPF);
5556 if (error) 5546 if (error)
5557 goto out_unlock_iolock; 5547 goto out_unlock_iolock;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 654dc6f05bac..dd974a55c77d 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -163,12 +163,14 @@ xfs_swap_extents_check_format(
163 163
164 /* Check temp in extent form to max in target */ 164 /* Check temp in extent form to max in target */
165 if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 165 if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
166 XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max) 166 XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) >
167 XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
167 return EINVAL; 168 return EINVAL;
168 169
169 /* Check target in extent form to max in temp */ 170 /* Check target in extent form to max in temp */
170 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 171 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
171 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max) 172 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) >
173 XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
172 return EINVAL; 174 return EINVAL;
173 175
174 /* 176 /*
@@ -180,18 +182,25 @@ xfs_swap_extents_check_format(
180 * (a common defrag case) which will occur when the temp inode is in 182 * (a common defrag case) which will occur when the temp inode is in
181 * extent format... 183 * extent format...
182 */ 184 */
183 if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE && 185 if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
184 ((XFS_IFORK_BOFF(ip) && 186 if (XFS_IFORK_BOFF(ip) &&
185 tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) || 187 tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip))
186 XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= ip->i_df.if_ext_max)) 188 return EINVAL;
187 return EINVAL; 189 if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
190 XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
191 return EINVAL;
192 }
188 193
189 /* Reciprocal target->temp btree format checks */ 194 /* Reciprocal target->temp btree format checks */
190 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && 195 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
191 ((XFS_IFORK_BOFF(tip) && 196 if (XFS_IFORK_BOFF(tip) &&
192 ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) || 197 ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip))
193 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= tip->i_df.if_ext_max)) 198 return EINVAL;
194 return EINVAL; 199
200 if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
201 XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
202 return EINVAL;
203 }
195 204
196 return 0; 205 return 0;
197} 206}
@@ -349,16 +358,6 @@ xfs_swap_extents(
349 *tifp = *tempifp; /* struct copy */ 358 *tifp = *tempifp; /* struct copy */
350 359
351 /* 360 /*
352 * Fix the in-memory data fork values that are dependent on the fork
353 * offset in the inode. We can't assume they remain the same as attr2
354 * has dynamic fork offsets.
355 */
356 ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) /
357 (uint)sizeof(xfs_bmbt_rec_t);
358 tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) /
359 (uint)sizeof(xfs_bmbt_rec_t);
360
361 /*
362 * Fix the on-disk inode values 361 * Fix the on-disk inode values
363 */ 362 */
364 tmp = (__uint64_t)ip->i_d.di_nblocks; 363 tmp = (__uint64_t)ip->i_d.di_nblocks;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f675f3d9d7b3..7e5bc872f2b4 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -327,7 +327,7 @@ xfs_file_aio_read(
327 mp->m_rtdev_targp : mp->m_ddev_targp; 327 mp->m_rtdev_targp : mp->m_ddev_targp;
328 if ((iocb->ki_pos & target->bt_smask) || 328 if ((iocb->ki_pos & target->bt_smask) ||
329 (size & target->bt_smask)) { 329 (size & target->bt_smask)) {
330 if (iocb->ki_pos == ip->i_size) 330 if (iocb->ki_pos == i_size_read(inode))
331 return 0; 331 return 0;
332 return -XFS_ERROR(EINVAL); 332 return -XFS_ERROR(EINVAL);
333 } 333 }
@@ -412,51 +412,6 @@ xfs_file_splice_read(
412 return ret; 412 return ret;
413} 413}
414 414
415STATIC void
416xfs_aio_write_isize_update(
417 struct inode *inode,
418 loff_t *ppos,
419 ssize_t bytes_written)
420{
421 struct xfs_inode *ip = XFS_I(inode);
422 xfs_fsize_t isize = i_size_read(inode);
423
424 if (bytes_written > 0)
425 XFS_STATS_ADD(xs_write_bytes, bytes_written);
426
427 if (unlikely(bytes_written < 0 && bytes_written != -EFAULT &&
428 *ppos > isize))
429 *ppos = isize;
430
431 if (*ppos > ip->i_size) {
432 xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
433 if (*ppos > ip->i_size)
434 ip->i_size = *ppos;
435 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
436 }
437}
438
439/*
440 * If this was a direct or synchronous I/O that failed (such as ENOSPC) then
441 * part of the I/O may have been written to disk before the error occurred. In
442 * this case the on-disk file size may have been adjusted beyond the in-memory
443 * file size and now needs to be truncated back.
444 */
445STATIC void
446xfs_aio_write_newsize_update(
447 struct xfs_inode *ip,
448 xfs_fsize_t new_size)
449{
450 if (new_size == ip->i_new_size) {
451 xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
452 if (new_size == ip->i_new_size)
453 ip->i_new_size = 0;
454 if (ip->i_d.di_size > ip->i_size)
455 ip->i_d.di_size = ip->i_size;
456 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
457 }
458}
459
460/* 415/*
461 * xfs_file_splice_write() does not use xfs_rw_ilock() because 416 * xfs_file_splice_write() does not use xfs_rw_ilock() because
462 * generic_file_splice_write() takes the i_mutex itself. This, in theory, 417 * generic_file_splice_write() takes the i_mutex itself. This, in theory,
@@ -475,7 +430,6 @@ xfs_file_splice_write(
475{ 430{
476 struct inode *inode = outfilp->f_mapping->host; 431 struct inode *inode = outfilp->f_mapping->host;
477 struct xfs_inode *ip = XFS_I(inode); 432 struct xfs_inode *ip = XFS_I(inode);
478 xfs_fsize_t new_size;
479 int ioflags = 0; 433 int ioflags = 0;
480 ssize_t ret; 434 ssize_t ret;
481 435
@@ -489,19 +443,12 @@ xfs_file_splice_write(
489 443
490 xfs_ilock(ip, XFS_IOLOCK_EXCL); 444 xfs_ilock(ip, XFS_IOLOCK_EXCL);
491 445
492 new_size = *ppos + count;
493
494 xfs_ilock(ip, XFS_ILOCK_EXCL);
495 if (new_size > ip->i_size)
496 ip->i_new_size = new_size;
497 xfs_iunlock(ip, XFS_ILOCK_EXCL);
498
499 trace_xfs_file_splice_write(ip, count, *ppos, ioflags); 446 trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
500 447
501 ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); 448 ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
449 if (ret > 0)
450 XFS_STATS_ADD(xs_write_bytes, ret);
502 451
503 xfs_aio_write_isize_update(inode, ppos, ret);
504 xfs_aio_write_newsize_update(ip, new_size);
505 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 452 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
506 return ret; 453 return ret;
507} 454}
@@ -689,28 +636,26 @@ out_lock:
689/* 636/*
690 * Common pre-write limit and setup checks. 637 * Common pre-write limit and setup checks.
691 * 638 *
692 * Returns with iolock held according to @iolock. 639 * Called with the iolocked held either shared and exclusive according to
640 * @iolock, and returns with it held. Might upgrade the iolock to exclusive
641 * if called for a direct write beyond i_size.
693 */ 642 */
694STATIC ssize_t 643STATIC ssize_t
695xfs_file_aio_write_checks( 644xfs_file_aio_write_checks(
696 struct file *file, 645 struct file *file,
697 loff_t *pos, 646 loff_t *pos,
698 size_t *count, 647 size_t *count,
699 xfs_fsize_t *new_sizep,
700 int *iolock) 648 int *iolock)
701{ 649{
702 struct inode *inode = file->f_mapping->host; 650 struct inode *inode = file->f_mapping->host;
703 struct xfs_inode *ip = XFS_I(inode); 651 struct xfs_inode *ip = XFS_I(inode);
704 xfs_fsize_t new_size;
705 int error = 0; 652 int error = 0;
706 653
707 xfs_rw_ilock(ip, XFS_ILOCK_EXCL); 654 xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
708 *new_sizep = 0;
709restart: 655restart:
710 error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); 656 error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
711 if (error) { 657 if (error) {
712 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); 658 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
713 *iolock = 0;
714 return error; 659 return error;
715 } 660 }
716 661
@@ -720,36 +665,21 @@ restart:
720 /* 665 /*
721 * If the offset is beyond the size of the file, we need to zero any 666 * If the offset is beyond the size of the file, we need to zero any
722 * blocks that fall between the existing EOF and the start of this 667 * blocks that fall between the existing EOF and the start of this
723 * write. There is no need to issue zeroing if another in-flght IO ends 668 * write. If zeroing is needed and we are currently holding the
724 * at or before this one If zeronig is needed and we are currently 669 * iolock shared, we need to update it to exclusive which involves
725 * holding the iolock shared, we need to update it to exclusive which 670 * dropping all locks and relocking to maintain correct locking order.
726 * involves dropping all locks and relocking to maintain correct locking 671 * If we do this, restart the function to ensure all checks and values
727 * order. If we do this, restart the function to ensure all checks and 672 * are still valid.
728 * values are still valid.
729 */ 673 */
730 if ((ip->i_new_size && *pos > ip->i_new_size) || 674 if (*pos > i_size_read(inode)) {
731 (!ip->i_new_size && *pos > ip->i_size)) {
732 if (*iolock == XFS_IOLOCK_SHARED) { 675 if (*iolock == XFS_IOLOCK_SHARED) {
733 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); 676 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
734 *iolock = XFS_IOLOCK_EXCL; 677 *iolock = XFS_IOLOCK_EXCL;
735 xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); 678 xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
736 goto restart; 679 goto restart;
737 } 680 }
738 error = -xfs_zero_eof(ip, *pos, ip->i_size); 681 error = -xfs_zero_eof(ip, *pos, i_size_read(inode));
739 } 682 }
740
741 /*
742 * If this IO extends beyond EOF, we may need to update ip->i_new_size.
743 * We have already zeroed space beyond EOF (if necessary). Only update
744 * ip->i_new_size if this IO ends beyond any other in-flight writes.
745 */
746 new_size = *pos + *count;
747 if (new_size > ip->i_size) {
748 if (new_size > ip->i_new_size)
749 ip->i_new_size = new_size;
750 *new_sizep = new_size;
751 }
752
753 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); 683 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
754 if (error) 684 if (error)
755 return error; 685 return error;
@@ -794,9 +724,7 @@ xfs_file_dio_aio_write(
794 const struct iovec *iovp, 724 const struct iovec *iovp,
795 unsigned long nr_segs, 725 unsigned long nr_segs,
796 loff_t pos, 726 loff_t pos,
797 size_t ocount, 727 size_t ocount)
798 xfs_fsize_t *new_size,
799 int *iolock)
800{ 728{
801 struct file *file = iocb->ki_filp; 729 struct file *file = iocb->ki_filp;
802 struct address_space *mapping = file->f_mapping; 730 struct address_space *mapping = file->f_mapping;
@@ -806,10 +734,10 @@ xfs_file_dio_aio_write(
806 ssize_t ret = 0; 734 ssize_t ret = 0;
807 size_t count = ocount; 735 size_t count = ocount;
808 int unaligned_io = 0; 736 int unaligned_io = 0;
737 int iolock;
809 struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? 738 struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
810 mp->m_rtdev_targp : mp->m_ddev_targp; 739 mp->m_rtdev_targp : mp->m_ddev_targp;
811 740
812 *iolock = 0;
813 if ((pos & target->bt_smask) || (count & target->bt_smask)) 741 if ((pos & target->bt_smask) || (count & target->bt_smask))
814 return -XFS_ERROR(EINVAL); 742 return -XFS_ERROR(EINVAL);
815 743
@@ -824,31 +752,31 @@ xfs_file_dio_aio_write(
824 * EOF zeroing cases and fill out the new inode size as appropriate. 752 * EOF zeroing cases and fill out the new inode size as appropriate.
825 */ 753 */
826 if (unaligned_io || mapping->nrpages) 754 if (unaligned_io || mapping->nrpages)
827 *iolock = XFS_IOLOCK_EXCL; 755 iolock = XFS_IOLOCK_EXCL;
828 else 756 else
829 *iolock = XFS_IOLOCK_SHARED; 757 iolock = XFS_IOLOCK_SHARED;
830 xfs_rw_ilock(ip, *iolock); 758 xfs_rw_ilock(ip, iolock);
831 759
832 /* 760 /*
833 * Recheck if there are cached pages that need invalidate after we got 761 * Recheck if there are cached pages that need invalidate after we got
834 * the iolock to protect against other threads adding new pages while 762 * the iolock to protect against other threads adding new pages while
835 * we were waiting for the iolock. 763 * we were waiting for the iolock.
836 */ 764 */
837 if (mapping->nrpages && *iolock == XFS_IOLOCK_SHARED) { 765 if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) {
838 xfs_rw_iunlock(ip, *iolock); 766 xfs_rw_iunlock(ip, iolock);
839 *iolock = XFS_IOLOCK_EXCL; 767 iolock = XFS_IOLOCK_EXCL;
840 xfs_rw_ilock(ip, *iolock); 768 xfs_rw_ilock(ip, iolock);
841 } 769 }
842 770
843 ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock); 771 ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
844 if (ret) 772 if (ret)
845 return ret; 773 goto out;
846 774
847 if (mapping->nrpages) { 775 if (mapping->nrpages) {
848 ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, 776 ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
849 FI_REMAPF_LOCKED); 777 FI_REMAPF_LOCKED);
850 if (ret) 778 if (ret)
851 return ret; 779 goto out;
852 } 780 }
853 781
854 /* 782 /*
@@ -857,15 +785,18 @@ xfs_file_dio_aio_write(
857 */ 785 */
858 if (unaligned_io) 786 if (unaligned_io)
859 inode_dio_wait(inode); 787 inode_dio_wait(inode);
860 else if (*iolock == XFS_IOLOCK_EXCL) { 788 else if (iolock == XFS_IOLOCK_EXCL) {
861 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); 789 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
862 *iolock = XFS_IOLOCK_SHARED; 790 iolock = XFS_IOLOCK_SHARED;
863 } 791 }
864 792
865 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); 793 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
866 ret = generic_file_direct_write(iocb, iovp, 794 ret = generic_file_direct_write(iocb, iovp,
867 &nr_segs, pos, &iocb->ki_pos, count, ocount); 795 &nr_segs, pos, &iocb->ki_pos, count, ocount);
868 796
797out:
798 xfs_rw_iunlock(ip, iolock);
799
869 /* No fallback to buffered IO on errors for XFS. */ 800 /* No fallback to buffered IO on errors for XFS. */
870 ASSERT(ret < 0 || ret == count); 801 ASSERT(ret < 0 || ret == count);
871 return ret; 802 return ret;
@@ -877,9 +808,7 @@ xfs_file_buffered_aio_write(
877 const struct iovec *iovp, 808 const struct iovec *iovp,
878 unsigned long nr_segs, 809 unsigned long nr_segs,
879 loff_t pos, 810 loff_t pos,
880 size_t ocount, 811 size_t ocount)
881 xfs_fsize_t *new_size,
882 int *iolock)
883{ 812{
884 struct file *file = iocb->ki_filp; 813 struct file *file = iocb->ki_filp;
885 struct address_space *mapping = file->f_mapping; 814 struct address_space *mapping = file->f_mapping;
@@ -887,14 +816,14 @@ xfs_file_buffered_aio_write(
887 struct xfs_inode *ip = XFS_I(inode); 816 struct xfs_inode *ip = XFS_I(inode);
888 ssize_t ret; 817 ssize_t ret;
889 int enospc = 0; 818 int enospc = 0;
819 int iolock = XFS_IOLOCK_EXCL;
890 size_t count = ocount; 820 size_t count = ocount;
891 821
892 *iolock = XFS_IOLOCK_EXCL; 822 xfs_rw_ilock(ip, iolock);
893 xfs_rw_ilock(ip, *iolock);
894 823
895 ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock); 824 ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
896 if (ret) 825 if (ret)
897 return ret; 826 goto out;
898 827
899 /* We can write back this queue in page reclaim */ 828 /* We can write back this queue in page reclaim */
900 current->backing_dev_info = mapping->backing_dev_info; 829 current->backing_dev_info = mapping->backing_dev_info;
@@ -908,13 +837,15 @@ write_retry:
908 * page locks and retry *once* 837 * page locks and retry *once*
909 */ 838 */
910 if (ret == -ENOSPC && !enospc) { 839 if (ret == -ENOSPC && !enospc) {
911 ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
912 if (ret)
913 return ret;
914 enospc = 1; 840 enospc = 1;
915 goto write_retry; 841 ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
842 if (!ret)
843 goto write_retry;
916 } 844 }
845
917 current->backing_dev_info = NULL; 846 current->backing_dev_info = NULL;
847out:
848 xfs_rw_iunlock(ip, iolock);
918 return ret; 849 return ret;
919} 850}
920 851
@@ -930,9 +861,7 @@ xfs_file_aio_write(
930 struct inode *inode = mapping->host; 861 struct inode *inode = mapping->host;
931 struct xfs_inode *ip = XFS_I(inode); 862 struct xfs_inode *ip = XFS_I(inode);
932 ssize_t ret; 863 ssize_t ret;
933 int iolock;
934 size_t ocount = 0; 864 size_t ocount = 0;
935 xfs_fsize_t new_size = 0;
936 865
937 XFS_STATS_INC(xs_write_calls); 866 XFS_STATS_INC(xs_write_calls);
938 867
@@ -951,33 +880,22 @@ xfs_file_aio_write(
951 return -EIO; 880 return -EIO;
952 881
953 if (unlikely(file->f_flags & O_DIRECT)) 882 if (unlikely(file->f_flags & O_DIRECT))
954 ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, 883 ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount);
955 ocount, &new_size, &iolock);
956 else 884 else
957 ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, 885 ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
958 ocount, &new_size, &iolock); 886 ocount);
959
960 xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
961 887
962 if (ret <= 0) 888 if (ret > 0) {
963 goto out_unlock; 889 ssize_t err;
964 890
965 /* Handle various SYNC-type writes */ 891 XFS_STATS_ADD(xs_write_bytes, ret);
966 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
967 loff_t end = pos + ret - 1;
968 int error;
969 892
970 xfs_rw_iunlock(ip, iolock); 893 /* Handle various SYNC-type writes */
971 error = xfs_file_fsync(file, pos, end, 894 err = generic_write_sync(file, pos, ret);
972 (file->f_flags & __O_SYNC) ? 0 : 1); 895 if (err < 0)
973 xfs_rw_ilock(ip, iolock); 896 ret = err;
974 if (error)
975 ret = error;
976 } 897 }
977 898
978out_unlock:
979 xfs_aio_write_newsize_update(ip, new_size);
980 xfs_rw_iunlock(ip, iolock);
981 return ret; 899 return ret;
982} 900}
983 901
diff --git a/fs/xfs/xfs_fs_subr.c b/fs/xfs/xfs_fs_subr.c
index ed88ed16811c..652b875a9d4c 100644
--- a/fs/xfs/xfs_fs_subr.c
+++ b/fs/xfs/xfs_fs_subr.c
@@ -90,7 +90,7 @@ xfs_wait_on_pages(
90 90
91 if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) { 91 if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
92 return -filemap_fdatawait_range(mapping, first, 92 return -filemap_fdatawait_range(mapping, first,
93 last == -1 ? ip->i_size - 1 : last); 93 last == -1 ? XFS_ISIZE(ip) - 1 : last);
94 } 94 }
95 return 0; 95 return 0;
96} 96}
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 3960a066d7ff..8c3e46394d48 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -77,7 +77,7 @@ xfs_inode_alloc(
77 77
78 ASSERT(atomic_read(&ip->i_pincount) == 0); 78 ASSERT(atomic_read(&ip->i_pincount) == 0);
79 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 79 ASSERT(!spin_is_locked(&ip->i_flags_lock));
80 ASSERT(completion_done(&ip->i_flush)); 80 ASSERT(!xfs_isiflocked(ip));
81 ASSERT(ip->i_ino == 0); 81 ASSERT(ip->i_ino == 0);
82 82
83 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 83 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
@@ -94,8 +94,6 @@ xfs_inode_alloc(
94 ip->i_update_core = 0; 94 ip->i_update_core = 0;
95 ip->i_delayed_blks = 0; 95 ip->i_delayed_blks = 0;
96 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); 96 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
97 ip->i_size = 0;
98 ip->i_new_size = 0;
99 97
100 return ip; 98 return ip;
101} 99}
@@ -150,7 +148,7 @@ xfs_inode_free(
150 /* asserts to verify all state is correct here */ 148 /* asserts to verify all state is correct here */
151 ASSERT(atomic_read(&ip->i_pincount) == 0); 149 ASSERT(atomic_read(&ip->i_pincount) == 0);
152 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 150 ASSERT(!spin_is_locked(&ip->i_flags_lock));
153 ASSERT(completion_done(&ip->i_flush)); 151 ASSERT(!xfs_isiflocked(ip));
154 152
155 /* 153 /*
156 * Because we use RCU freeing we need to ensure the inode always 154 * Because we use RCU freeing we need to ensure the inode always
@@ -450,8 +448,6 @@ again:
450 448
451 *ipp = ip; 449 *ipp = ip;
452 450
453 ASSERT(ip->i_df.if_ext_max ==
454 XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
455 /* 451 /*
456 * If we have a real type for an on-disk inode, we can set ops(&unlock) 452 * If we have a real type for an on-disk inode, we can set ops(&unlock)
457 * now. If it's a new inode being created, xfs_ialloc will handle it. 453 * now. If it's a new inode being created, xfs_ialloc will handle it.
@@ -715,3 +711,19 @@ xfs_isilocked(
715 return 0; 711 return 0;
716} 712}
717#endif 713#endif
714
715void
716__xfs_iflock(
717 struct xfs_inode *ip)
718{
719 wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
720 DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
721
722 do {
723 prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
724 if (xfs_isiflocked(ip))
725 io_schedule();
726 } while (!xfs_iflock_nowait(ip));
727
728 finish_wait(wq, &wait.wait);
729}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 9dda7cc32848..b21022499c2e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -299,11 +299,8 @@ xfs_iformat(
299{ 299{
300 xfs_attr_shortform_t *atp; 300 xfs_attr_shortform_t *atp;
301 int size; 301 int size;
302 int error; 302 int error = 0;
303 xfs_fsize_t di_size; 303 xfs_fsize_t di_size;
304 ip->i_df.if_ext_max =
305 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
306 error = 0;
307 304
308 if (unlikely(be32_to_cpu(dip->di_nextents) + 305 if (unlikely(be32_to_cpu(dip->di_nextents) +
309 be16_to_cpu(dip->di_anextents) > 306 be16_to_cpu(dip->di_anextents) >
@@ -350,7 +347,6 @@ xfs_iformat(
350 return XFS_ERROR(EFSCORRUPTED); 347 return XFS_ERROR(EFSCORRUPTED);
351 } 348 }
352 ip->i_d.di_size = 0; 349 ip->i_d.di_size = 0;
353 ip->i_size = 0;
354 ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); 350 ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
355 break; 351 break;
356 352
@@ -409,10 +405,10 @@ xfs_iformat(
409 } 405 }
410 if (!XFS_DFORK_Q(dip)) 406 if (!XFS_DFORK_Q(dip))
411 return 0; 407 return 0;
408
412 ASSERT(ip->i_afp == NULL); 409 ASSERT(ip->i_afp == NULL);
413 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS); 410 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
414 ip->i_afp->if_ext_max = 411
415 XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
416 switch (dip->di_aformat) { 412 switch (dip->di_aformat) {
417 case XFS_DINODE_FMT_LOCAL: 413 case XFS_DINODE_FMT_LOCAL:
418 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); 414 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
@@ -604,10 +600,11 @@ xfs_iformat_btree(
604 * or the number of extents is greater than the number of 600 * or the number of extents is greater than the number of
605 * blocks. 601 * blocks.
606 */ 602 */
607 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max 603 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
608 || XFS_BMDR_SPACE_CALC(nrecs) > 604 XFS_IFORK_MAXEXT(ip, whichfork) ||
609 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) 605 XFS_BMDR_SPACE_CALC(nrecs) >
610 || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { 606 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) ||
607 XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
611 xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).", 608 xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).",
612 (unsigned long long) ip->i_ino); 609 (unsigned long long) ip->i_ino);
613 XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 610 XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
@@ -835,12 +832,6 @@ xfs_iread(
835 * with the uninitialized part of it. 832 * with the uninitialized part of it.
836 */ 833 */
837 ip->i_d.di_mode = 0; 834 ip->i_d.di_mode = 0;
838 /*
839 * Initialize the per-fork minima and maxima for a new
840 * inode here. xfs_iformat will do it for old inodes.
841 */
842 ip->i_df.if_ext_max =
843 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
844 } 835 }
845 836
846 /* 837 /*
@@ -861,7 +852,6 @@ xfs_iread(
861 } 852 }
862 853
863 ip->i_delayed_blks = 0; 854 ip->i_delayed_blks = 0;
864 ip->i_size = ip->i_d.di_size;
865 855
866 /* 856 /*
867 * Mark the buffer containing the inode as something to keep 857 * Mark the buffer containing the inode as something to keep
@@ -1051,7 +1041,6 @@ xfs_ialloc(
1051 } 1041 }
1052 1042
1053 ip->i_d.di_size = 0; 1043 ip->i_d.di_size = 0;
1054 ip->i_size = 0;
1055 ip->i_d.di_nextents = 0; 1044 ip->i_d.di_nextents = 0;
1056 ASSERT(ip->i_d.di_nblocks == 0); 1045 ASSERT(ip->i_d.di_nblocks == 0);
1057 1046
@@ -1166,52 +1155,6 @@ xfs_ialloc(
1166} 1155}
1167 1156
1168/* 1157/*
1169 * Check to make sure that there are no blocks allocated to the
1170 * file beyond the size of the file. We don't check this for
1171 * files with fixed size extents or real time extents, but we
1172 * at least do it for regular files.
1173 */
1174#ifdef DEBUG
1175STATIC void
1176xfs_isize_check(
1177 struct xfs_inode *ip,
1178 xfs_fsize_t isize)
1179{
1180 struct xfs_mount *mp = ip->i_mount;
1181 xfs_fileoff_t map_first;
1182 int nimaps;
1183 xfs_bmbt_irec_t imaps[2];
1184 int error;
1185
1186 if (!S_ISREG(ip->i_d.di_mode))
1187 return;
1188
1189 if (XFS_IS_REALTIME_INODE(ip))
1190 return;
1191
1192 if (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
1193 return;
1194
1195 nimaps = 2;
1196 map_first = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
1197 /*
1198 * The filesystem could be shutting down, so bmapi may return
1199 * an error.
1200 */
1201 error = xfs_bmapi_read(ip, map_first,
1202 (XFS_B_TO_FSB(mp,
1203 (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - map_first),
1204 imaps, &nimaps, XFS_BMAPI_ENTIRE);
1205 if (error)
1206 return;
1207 ASSERT(nimaps == 1);
1208 ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
1209}
1210#else /* DEBUG */
1211#define xfs_isize_check(ip, isize)
1212#endif /* DEBUG */
1213
1214/*
1215 * Free up the underlying blocks past new_size. The new size must be smaller 1158 * Free up the underlying blocks past new_size. The new size must be smaller
1216 * than the current size. This routine can be used both for the attribute and 1159 * than the current size. This routine can be used both for the attribute and
1217 * data fork, and does not modify the inode size, which is left to the caller. 1160 * data fork, and does not modify the inode size, which is left to the caller.
@@ -1252,12 +1195,14 @@ xfs_itruncate_extents(
1252 int done = 0; 1195 int done = 0;
1253 1196
1254 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); 1197 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1255 ASSERT(new_size <= ip->i_size); 1198 ASSERT(new_size <= XFS_ISIZE(ip));
1256 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 1199 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
1257 ASSERT(ip->i_itemp != NULL); 1200 ASSERT(ip->i_itemp != NULL);
1258 ASSERT(ip->i_itemp->ili_lock_flags == 0); 1201 ASSERT(ip->i_itemp->ili_lock_flags == 0);
1259 ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); 1202 ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1260 1203
1204 trace_xfs_itruncate_extents_start(ip, new_size);
1205
1261 /* 1206 /*
1262 * Since it is possible for space to become allocated beyond 1207 * Since it is possible for space to become allocated beyond
1263 * the end of the file (in a crash where the space is allocated 1208 * the end of the file (in a crash where the space is allocated
@@ -1325,6 +1270,14 @@ xfs_itruncate_extents(
1325 goto out; 1270 goto out;
1326 } 1271 }
1327 1272
1273 /*
1274 * Always re-log the inode so that our permanent transaction can keep
1275 * on rolling it forward in the log.
1276 */
1277 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1278
1279 trace_xfs_itruncate_extents_end(ip, new_size);
1280
1328out: 1281out:
1329 *tpp = tp; 1282 *tpp = tp;
1330 return error; 1283 return error;
@@ -1338,74 +1291,6 @@ out_bmap_cancel:
1338 goto out; 1291 goto out;
1339} 1292}
1340 1293
1341int
1342xfs_itruncate_data(
1343 struct xfs_trans **tpp,
1344 struct xfs_inode *ip,
1345 xfs_fsize_t new_size)
1346{
1347 int error;
1348
1349 trace_xfs_itruncate_data_start(ip, new_size);
1350
1351 /*
1352 * The first thing we do is set the size to new_size permanently on
1353 * disk. This way we don't have to worry about anyone ever being able
1354 * to look at the data being freed even in the face of a crash.
1355 * What we're getting around here is the case where we free a block, it
1356 * is allocated to another file, it is written to, and then we crash.
1357 * If the new data gets written to the file but the log buffers
1358 * containing the free and reallocation don't, then we'd end up with
1359 * garbage in the blocks being freed. As long as we make the new_size
1360 * permanent before actually freeing any blocks it doesn't matter if
1361 * they get written to.
1362 */
1363 if (ip->i_d.di_nextents > 0) {
1364 /*
1365 * If we are not changing the file size then do not update
1366 * the on-disk file size - we may be called from
1367 * xfs_inactive_free_eofblocks(). If we update the on-disk
1368 * file size and then the system crashes before the contents
1369 * of the file are flushed to disk then the files may be
1370 * full of holes (ie NULL files bug).
1371 */
1372 if (ip->i_size != new_size) {
1373 ip->i_d.di_size = new_size;
1374 ip->i_size = new_size;
1375 xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
1376 }
1377 }
1378
1379 error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size);
1380 if (error)
1381 return error;
1382
1383 /*
1384 * If we are not changing the file size then do not update the on-disk
1385 * file size - we may be called from xfs_inactive_free_eofblocks().
1386 * If we update the on-disk file size and then the system crashes
1387 * before the contents of the file are flushed to disk then the files
1388 * may be full of holes (ie NULL files bug).
1389 */
1390 xfs_isize_check(ip, new_size);
1391 if (ip->i_size != new_size) {
1392 ip->i_d.di_size = new_size;
1393 ip->i_size = new_size;
1394 }
1395
1396 ASSERT(new_size != 0 || ip->i_delayed_blks == 0);
1397 ASSERT(new_size != 0 || ip->i_d.di_nextents == 0);
1398
1399 /*
1400 * Always re-log the inode so that our permanent transaction can keep
1401 * on rolling it forward in the log.
1402 */
1403 xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
1404
1405 trace_xfs_itruncate_data_end(ip, new_size);
1406 return 0;
1407}
1408
1409/* 1294/*
1410 * This is called when the inode's link count goes to 0. 1295 * This is called when the inode's link count goes to 0.
1411 * We place the on-disk inode on a list in the AGI. It 1296 * We place the on-disk inode on a list in the AGI. It
@@ -1824,8 +1709,7 @@ xfs_ifree(
1824 ASSERT(ip->i_d.di_nlink == 0); 1709 ASSERT(ip->i_d.di_nlink == 0);
1825 ASSERT(ip->i_d.di_nextents == 0); 1710 ASSERT(ip->i_d.di_nextents == 0);
1826 ASSERT(ip->i_d.di_anextents == 0); 1711 ASSERT(ip->i_d.di_anextents == 0);
1827 ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) || 1712 ASSERT(ip->i_d.di_size == 0 || !S_ISREG(ip->i_d.di_mode));
1828 (!S_ISREG(ip->i_d.di_mode)));
1829 ASSERT(ip->i_d.di_nblocks == 0); 1713 ASSERT(ip->i_d.di_nblocks == 0);
1830 1714
1831 /* 1715 /*
@@ -1844,8 +1728,6 @@ xfs_ifree(
1844 ip->i_d.di_flags = 0; 1728 ip->i_d.di_flags = 0;
1845 ip->i_d.di_dmevmask = 0; 1729 ip->i_d.di_dmevmask = 0;
1846 ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ 1730 ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */
1847 ip->i_df.if_ext_max =
1848 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
1849 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 1731 ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
1850 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 1732 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
1851 /* 1733 /*
@@ -2151,7 +2033,7 @@ xfs_idestroy_fork(
2151 * once someone is waiting for it to be unpinned. 2033 * once someone is waiting for it to be unpinned.
2152 */ 2034 */
2153static void 2035static void
2154xfs_iunpin_nowait( 2036xfs_iunpin(
2155 struct xfs_inode *ip) 2037 struct xfs_inode *ip)
2156{ 2038{
2157 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2039 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
@@ -2163,14 +2045,29 @@ xfs_iunpin_nowait(
2163 2045
2164} 2046}
2165 2047
2048static void
2049__xfs_iunpin_wait(
2050 struct xfs_inode *ip)
2051{
2052 wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IPINNED_BIT);
2053 DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT);
2054
2055 xfs_iunpin(ip);
2056
2057 do {
2058 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
2059 if (xfs_ipincount(ip))
2060 io_schedule();
2061 } while (xfs_ipincount(ip));
2062 finish_wait(wq, &wait.wait);
2063}
2064
2166void 2065void
2167xfs_iunpin_wait( 2066xfs_iunpin_wait(
2168 struct xfs_inode *ip) 2067 struct xfs_inode *ip)
2169{ 2068{
2170 if (xfs_ipincount(ip)) { 2069 if (xfs_ipincount(ip))
2171 xfs_iunpin_nowait(ip); 2070 __xfs_iunpin_wait(ip);
2172 wait_event(ip->i_ipin_wait, (xfs_ipincount(ip) == 0));
2173 }
2174} 2071}
2175 2072
2176/* 2073/*
@@ -2510,9 +2407,9 @@ xfs_iflush(
2510 XFS_STATS_INC(xs_iflush_count); 2407 XFS_STATS_INC(xs_iflush_count);
2511 2408
2512 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2409 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2513 ASSERT(!completion_done(&ip->i_flush)); 2410 ASSERT(xfs_isiflocked(ip));
2514 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 2411 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
2515 ip->i_d.di_nextents > ip->i_df.if_ext_max); 2412 ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
2516 2413
2517 iip = ip->i_itemp; 2414 iip = ip->i_itemp;
2518 mp = ip->i_mount; 2415 mp = ip->i_mount;
@@ -2529,7 +2426,7 @@ xfs_iflush(
2529 * out for us if they occur after the log force completes. 2426 * out for us if they occur after the log force completes.
2530 */ 2427 */
2531 if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) { 2428 if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) {
2532 xfs_iunpin_nowait(ip); 2429 xfs_iunpin(ip);
2533 xfs_ifunlock(ip); 2430 xfs_ifunlock(ip);
2534 return EAGAIN; 2431 return EAGAIN;
2535 } 2432 }
@@ -2626,9 +2523,9 @@ xfs_iflush_int(
2626#endif 2523#endif
2627 2524
2628 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2525 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2629 ASSERT(!completion_done(&ip->i_flush)); 2526 ASSERT(xfs_isiflocked(ip));
2630 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 2527 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
2631 ip->i_d.di_nextents > ip->i_df.if_ext_max); 2528 ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
2632 2529
2633 iip = ip->i_itemp; 2530 iip = ip->i_itemp;
2634 mp = ip->i_mount; 2531 mp = ip->i_mount;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f0e6b151ba37..2f27b7454085 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -66,7 +66,6 @@ typedef struct xfs_ifork {
66 struct xfs_btree_block *if_broot; /* file's incore btree root */ 66 struct xfs_btree_block *if_broot; /* file's incore btree root */
67 short if_broot_bytes; /* bytes allocated for root */ 67 short if_broot_bytes; /* bytes allocated for root */
68 unsigned char if_flags; /* per-fork flags */ 68 unsigned char if_flags; /* per-fork flags */
69 unsigned char if_ext_max; /* max # of extent records */
70 union { 69 union {
71 xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ 70 xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */
72 xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ 71 xfs_ext_irec_t *if_ext_irec; /* irec map file exts */
@@ -206,12 +205,12 @@ typedef struct xfs_icdinode {
206 ((w) == XFS_DATA_FORK ? \ 205 ((w) == XFS_DATA_FORK ? \
207 ((ip)->i_d.di_nextents = (n)) : \ 206 ((ip)->i_d.di_nextents = (n)) : \
208 ((ip)->i_d.di_anextents = (n))) 207 ((ip)->i_d.di_anextents = (n)))
209 208#define XFS_IFORK_MAXEXT(ip, w) \
209 (XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t))
210 210
211 211
212#ifdef __KERNEL__ 212#ifdef __KERNEL__
213 213
214struct bhv_desc;
215struct xfs_buf; 214struct xfs_buf;
216struct xfs_bmap_free; 215struct xfs_bmap_free;
217struct xfs_bmbt_irec; 216struct xfs_bmbt_irec;
@@ -220,12 +219,6 @@ struct xfs_mount;
220struct xfs_trans; 219struct xfs_trans;
221struct xfs_dquot; 220struct xfs_dquot;
222 221
223typedef struct dm_attrs_s {
224 __uint32_t da_dmevmask; /* DMIG event mask */
225 __uint16_t da_dmstate; /* DMIG state info */
226 __uint16_t da_pad; /* DMIG extra padding */
227} dm_attrs_t;
228
229typedef struct xfs_inode { 222typedef struct xfs_inode {
230 /* Inode linking and identification information. */ 223 /* Inode linking and identification information. */
231 struct xfs_mount *i_mount; /* fs mount struct ptr */ 224 struct xfs_mount *i_mount; /* fs mount struct ptr */
@@ -244,27 +237,19 @@ typedef struct xfs_inode {
244 struct xfs_inode_log_item *i_itemp; /* logging information */ 237 struct xfs_inode_log_item *i_itemp; /* logging information */
245 mrlock_t i_lock; /* inode lock */ 238 mrlock_t i_lock; /* inode lock */
246 mrlock_t i_iolock; /* inode IO lock */ 239 mrlock_t i_iolock; /* inode IO lock */
247 struct completion i_flush; /* inode flush completion q */
248 atomic_t i_pincount; /* inode pin count */ 240 atomic_t i_pincount; /* inode pin count */
249 wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */
250 spinlock_t i_flags_lock; /* inode i_flags lock */ 241 spinlock_t i_flags_lock; /* inode i_flags lock */
251 /* Miscellaneous state. */ 242 /* Miscellaneous state. */
252 unsigned short i_flags; /* see defined flags below */ 243 unsigned long i_flags; /* see defined flags below */
253 unsigned char i_update_core; /* timestamps/size is dirty */ 244 unsigned char i_update_core; /* timestamps/size is dirty */
254 unsigned int i_delayed_blks; /* count of delay alloc blks */ 245 unsigned int i_delayed_blks; /* count of delay alloc blks */
255 246
256 xfs_icdinode_t i_d; /* most of ondisk inode */ 247 xfs_icdinode_t i_d; /* most of ondisk inode */
257 248
258 xfs_fsize_t i_size; /* in-memory size */
259 xfs_fsize_t i_new_size; /* size when write completes */
260
261 /* VFS inode */ 249 /* VFS inode */
262 struct inode i_vnode; /* embedded VFS inode */ 250 struct inode i_vnode; /* embedded VFS inode */
263} xfs_inode_t; 251} xfs_inode_t;
264 252
265#define XFS_ISIZE(ip) S_ISREG((ip)->i_d.di_mode) ? \
266 (ip)->i_size : (ip)->i_d.di_size;
267
268/* Convert from vfs inode to xfs inode */ 253/* Convert from vfs inode to xfs inode */
269static inline struct xfs_inode *XFS_I(struct inode *inode) 254static inline struct xfs_inode *XFS_I(struct inode *inode)
270{ 255{
@@ -278,6 +263,18 @@ static inline struct inode *VFS_I(struct xfs_inode *ip)
278} 263}
279 264
280/* 265/*
266 * For regular files we only update the on-disk filesize when actually
267 * writing data back to disk. Until then only the copy in the VFS inode
268 * is uptodate.
269 */
270static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip)
271{
272 if (S_ISREG(ip->i_d.di_mode))
273 return i_size_read(VFS_I(ip));
274 return ip->i_d.di_size;
275}
276
277/*
281 * i_flags helper functions 278 * i_flags helper functions
282 */ 279 */
283static inline void 280static inline void
@@ -331,6 +328,19 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
331 return ret; 328 return ret;
332} 329}
333 330
331static inline int
332xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags)
333{
334 int ret;
335
336 spin_lock(&ip->i_flags_lock);
337 ret = ip->i_flags & flags;
338 if (!ret)
339 ip->i_flags |= flags;
340 spin_unlock(&ip->i_flags_lock);
341 return ret;
342}
343
334/* 344/*
335 * Project quota id helpers (previously projid was 16bit only 345 * Project quota id helpers (previously projid was 16bit only
336 * and using two 16bit values to hold new 32bit projid was chosen 346 * and using two 16bit values to hold new 32bit projid was chosen
@@ -351,35 +361,19 @@ xfs_set_projid(struct xfs_inode *ip,
351} 361}
352 362
353/* 363/*
354 * Manage the i_flush queue embedded in the inode. This completion
355 * queue synchronizes processes attempting to flush the in-core
356 * inode back to disk.
357 */
358static inline void xfs_iflock(xfs_inode_t *ip)
359{
360 wait_for_completion(&ip->i_flush);
361}
362
363static inline int xfs_iflock_nowait(xfs_inode_t *ip)
364{
365 return try_wait_for_completion(&ip->i_flush);
366}
367
368static inline void xfs_ifunlock(xfs_inode_t *ip)
369{
370 complete(&ip->i_flush);
371}
372
373/*
374 * In-core inode flags. 364 * In-core inode flags.
375 */ 365 */
376#define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */ 366#define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */
377#define XFS_ISTALE 0x0002 /* inode has been staled */ 367#define XFS_ISTALE (1 << 1) /* inode has been staled */
378#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ 368#define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */
379#define XFS_INEW 0x0008 /* inode has just been allocated */ 369#define XFS_INEW (1 << 3) /* inode has just been allocated */
380#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ 370#define XFS_IFILESTREAM (1 << 4) /* inode is in a filestream dir. */
381#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ 371#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */
382#define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */ 372#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */
373#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */
374#define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT)
375#define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */
376#define XFS_IPINNED (1 << __XFS_IPINNED_BIT)
383 377
384/* 378/*
385 * Per-lifetime flags need to be reset when re-using a reclaimable inode during 379 * Per-lifetime flags need to be reset when re-using a reclaimable inode during
@@ -392,6 +386,34 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
392 XFS_IFILESTREAM); 386 XFS_IFILESTREAM);
393 387
394/* 388/*
389 * Synchronize processes attempting to flush the in-core inode back to disk.
390 */
391
392extern void __xfs_iflock(struct xfs_inode *ip);
393
394static inline int xfs_iflock_nowait(struct xfs_inode *ip)
395{
396 return !xfs_iflags_test_and_set(ip, XFS_IFLOCK);
397}
398
399static inline void xfs_iflock(struct xfs_inode *ip)
400{
401 if (!xfs_iflock_nowait(ip))
402 __xfs_iflock(ip);
403}
404
405static inline void xfs_ifunlock(struct xfs_inode *ip)
406{
407 xfs_iflags_clear(ip, XFS_IFLOCK);
408 wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
409}
410
411static inline int xfs_isiflocked(struct xfs_inode *ip)
412{
413 return xfs_iflags_test(ip, XFS_IFLOCK);
414}
415
416/*
395 * Flags for inode locking. 417 * Flags for inode locking.
396 * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) 418 * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield)
397 * 1<<16 - 1<<32-1 -- lockdep annotation (integers) 419 * 1<<16 - 1<<32-1 -- lockdep annotation (integers)
@@ -491,8 +513,6 @@ int xfs_ifree(struct xfs_trans *, xfs_inode_t *,
491 struct xfs_bmap_free *); 513 struct xfs_bmap_free *);
492int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *, 514int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
493 int, xfs_fsize_t); 515 int, xfs_fsize_t);
494int xfs_itruncate_data(struct xfs_trans **, struct xfs_inode *,
495 xfs_fsize_t);
496int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); 516int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
497 517
498void xfs_iext_realloc(xfs_inode_t *, int, int); 518void xfs_iext_realloc(xfs_inode_t *, int, int);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index cfd6c7f8cc3c..91d71dcd4852 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -79,8 +79,6 @@ xfs_inode_item_size(
79 break; 79 break;
80 80
81 case XFS_DINODE_FMT_BTREE: 81 case XFS_DINODE_FMT_BTREE:
82 ASSERT(ip->i_df.if_ext_max ==
83 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
84 iip->ili_format.ilf_fields &= 82 iip->ili_format.ilf_fields &=
85 ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | 83 ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
86 XFS_ILOG_DEV | XFS_ILOG_UUID); 84 XFS_ILOG_DEV | XFS_ILOG_UUID);
@@ -557,7 +555,7 @@ xfs_inode_item_unpin(
557 trace_xfs_inode_unpin(ip, _RET_IP_); 555 trace_xfs_inode_unpin(ip, _RET_IP_);
558 ASSERT(atomic_read(&ip->i_pincount) > 0); 556 ASSERT(atomic_read(&ip->i_pincount) > 0);
559 if (atomic_dec_and_test(&ip->i_pincount)) 557 if (atomic_dec_and_test(&ip->i_pincount))
560 wake_up(&ip->i_ipin_wait); 558 wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
561} 559}
562 560
563/* 561/*
@@ -719,7 +717,7 @@ xfs_inode_item_pushbuf(
719 * If a flush is not in progress anymore, chances are that the 717 * If a flush is not in progress anymore, chances are that the
720 * inode was taken off the AIL. So, just get out. 718 * inode was taken off the AIL. So, just get out.
721 */ 719 */
722 if (completion_done(&ip->i_flush) || 720 if (!xfs_isiflocked(ip) ||
723 !(lip->li_flags & XFS_LI_IN_AIL)) { 721 !(lip->li_flags & XFS_LI_IN_AIL)) {
724 xfs_iunlock(ip, XFS_ILOCK_SHARED); 722 xfs_iunlock(ip, XFS_ILOCK_SHARED);
725 return true; 723 return true;
@@ -752,7 +750,7 @@ xfs_inode_item_push(
752 struct xfs_inode *ip = iip->ili_inode; 750 struct xfs_inode *ip = iip->ili_inode;
753 751
754 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); 752 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
755 ASSERT(!completion_done(&ip->i_flush)); 753 ASSERT(xfs_isiflocked(ip));
756 754
757 /* 755 /*
758 * Since we were able to lock the inode's flush lock and 756 * Since we were able to lock the inode's flush lock and
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 9afa282aa937..246c7d57c6f9 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -57,26 +57,26 @@ xfs_iomap_eof_align_last_fsb(
57 xfs_fileoff_t *last_fsb) 57 xfs_fileoff_t *last_fsb)
58{ 58{
59 xfs_fileoff_t new_last_fsb = 0; 59 xfs_fileoff_t new_last_fsb = 0;
60 xfs_extlen_t align; 60 xfs_extlen_t align = 0;
61 int eof, error; 61 int eof, error;
62 62
63 if (XFS_IS_REALTIME_INODE(ip)) 63 if (!XFS_IS_REALTIME_INODE(ip)) {
64 ; 64 /*
65 /* 65 * Round up the allocation request to a stripe unit
66 * If mounted with the "-o swalloc" option, roundup the allocation 66 * (m_dalign) boundary if the file size is >= stripe unit
67 * request to a stripe width boundary if the file size is >= 67 * size, and we are allocating past the allocation eof.
68 * stripe width and we are allocating past the allocation eof. 68 *
69 */ 69 * If mounted with the "-o swalloc" option the alignment is
70 else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) && 70 * increased from the strip unit size to the stripe width.
71 (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_swidth))) 71 */
72 new_last_fsb = roundup_64(*last_fsb, mp->m_swidth); 72 if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
73 /* 73 align = mp->m_swidth;
74 * Roundup the allocation request to a stripe unit (m_dalign) boundary 74 else if (mp->m_dalign)
75 * if the file size is >= stripe unit size, and we are allocating past 75 align = mp->m_dalign;
76 * the allocation eof. 76
77 */ 77 if (align && XFS_ISIZE(ip) >= XFS_FSB_TO_B(mp, align))
78 else if (mp->m_dalign && (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_dalign))) 78 new_last_fsb = roundup_64(*last_fsb, align);
79 new_last_fsb = roundup_64(*last_fsb, mp->m_dalign); 79 }
80 80
81 /* 81 /*
82 * Always round up the allocation request to an extent boundary 82 * Always round up the allocation request to an extent boundary
@@ -154,7 +154,7 @@ xfs_iomap_write_direct(
154 154
155 offset_fsb = XFS_B_TO_FSBT(mp, offset); 155 offset_fsb = XFS_B_TO_FSBT(mp, offset);
156 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 156 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
157 if ((offset + count) > ip->i_size) { 157 if ((offset + count) > XFS_ISIZE(ip)) {
158 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); 158 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
159 if (error) 159 if (error)
160 goto error_out; 160 goto error_out;
@@ -211,7 +211,7 @@ xfs_iomap_write_direct(
211 xfs_trans_ijoin(tp, ip, 0); 211 xfs_trans_ijoin(tp, ip, 0);
212 212
213 bmapi_flag = 0; 213 bmapi_flag = 0;
214 if (offset < ip->i_size || extsz) 214 if (offset < XFS_ISIZE(ip) || extsz)
215 bmapi_flag |= XFS_BMAPI_PREALLOC; 215 bmapi_flag |= XFS_BMAPI_PREALLOC;
216 216
217 /* 217 /*
@@ -286,7 +286,7 @@ xfs_iomap_eof_want_preallocate(
286 int found_delalloc = 0; 286 int found_delalloc = 0;
287 287
288 *prealloc = 0; 288 *prealloc = 0;
289 if ((offset + count) <= ip->i_size) 289 if (offset + count <= XFS_ISIZE(ip))
290 return 0; 290 return 0;
291 291
292 /* 292 /*
@@ -340,7 +340,7 @@ xfs_iomap_prealloc_size(
340 * if we pass in alloc_blocks = 0. Hence the "+ 1" to 340 * if we pass in alloc_blocks = 0. Hence the "+ 1" to
341 * ensure we always pass in a non-zero value. 341 * ensure we always pass in a non-zero value.
342 */ 342 */
343 alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size) + 1; 343 alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1;
344 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, 344 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
345 rounddown_pow_of_two(alloc_blocks)); 345 rounddown_pow_of_two(alloc_blocks));
346 346
@@ -564,7 +564,7 @@ xfs_iomap_write_allocate(
564 * back.... 564 * back....
565 */ 565 */
566 nimaps = 1; 566 nimaps = 1;
567 end_fsb = XFS_B_TO_FSB(mp, ip->i_size); 567 end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
568 error = xfs_bmap_last_offset(NULL, ip, &last_block, 568 error = xfs_bmap_last_offset(NULL, ip, &last_block,
569 XFS_DATA_FORK); 569 XFS_DATA_FORK);
570 if (error) 570 if (error)
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index f9babd179223..ab302539e5b9 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -750,6 +750,7 @@ xfs_setattr_size(
750 struct xfs_mount *mp = ip->i_mount; 750 struct xfs_mount *mp = ip->i_mount;
751 struct inode *inode = VFS_I(ip); 751 struct inode *inode = VFS_I(ip);
752 int mask = iattr->ia_valid; 752 int mask = iattr->ia_valid;
753 xfs_off_t oldsize, newsize;
753 struct xfs_trans *tp; 754 struct xfs_trans *tp;
754 int error; 755 int error;
755 uint lock_flags; 756 uint lock_flags;
@@ -777,11 +778,13 @@ xfs_setattr_size(
777 lock_flags |= XFS_IOLOCK_EXCL; 778 lock_flags |= XFS_IOLOCK_EXCL;
778 xfs_ilock(ip, lock_flags); 779 xfs_ilock(ip, lock_flags);
779 780
781 oldsize = inode->i_size;
782 newsize = iattr->ia_size;
783
780 /* 784 /*
781 * Short circuit the truncate case for zero length files. 785 * Short circuit the truncate case for zero length files.
782 */ 786 */
783 if (iattr->ia_size == 0 && 787 if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) {
784 ip->i_size == 0 && ip->i_d.di_nextents == 0) {
785 if (!(mask & (ATTR_CTIME|ATTR_MTIME))) 788 if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
786 goto out_unlock; 789 goto out_unlock;
787 790
@@ -807,14 +810,14 @@ xfs_setattr_size(
807 * the inode to the transaction, because the inode cannot be unlocked 810 * the inode to the transaction, because the inode cannot be unlocked
808 * once it is a part of the transaction. 811 * once it is a part of the transaction.
809 */ 812 */
810 if (iattr->ia_size > ip->i_size) { 813 if (newsize > oldsize) {
811 /* 814 /*
812 * Do the first part of growing a file: zero any data in the 815 * Do the first part of growing a file: zero any data in the
813 * last block that is beyond the old EOF. We need to do this 816 * last block that is beyond the old EOF. We need to do this
814 * before the inode is joined to the transaction to modify 817 * before the inode is joined to the transaction to modify
815 * i_size. 818 * i_size.
816 */ 819 */
817 error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); 820 error = xfs_zero_eof(ip, newsize, oldsize);
818 if (error) 821 if (error)
819 goto out_unlock; 822 goto out_unlock;
820 } 823 }
@@ -833,8 +836,8 @@ xfs_setattr_size(
833 * here and prevents waiting for other data not within the range we 836 * here and prevents waiting for other data not within the range we
834 * care about here. 837 * care about here.
835 */ 838 */
836 if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) { 839 if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) {
837 error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, 0, 840 error = xfs_flush_pages(ip, ip->i_d.di_size, newsize, 0,
838 FI_NONE); 841 FI_NONE);
839 if (error) 842 if (error)
840 goto out_unlock; 843 goto out_unlock;
@@ -845,8 +848,7 @@ xfs_setattr_size(
845 */ 848 */
846 inode_dio_wait(inode); 849 inode_dio_wait(inode);
847 850
848 error = -block_truncate_page(inode->i_mapping, iattr->ia_size, 851 error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
849 xfs_get_blocks);
850 if (error) 852 if (error)
851 goto out_unlock; 853 goto out_unlock;
852 854
@@ -857,7 +859,7 @@ xfs_setattr_size(
857 if (error) 859 if (error)
858 goto out_trans_cancel; 860 goto out_trans_cancel;
859 861
860 truncate_setsize(inode, iattr->ia_size); 862 truncate_setsize(inode, newsize);
861 863
862 commit_flags = XFS_TRANS_RELEASE_LOG_RES; 864 commit_flags = XFS_TRANS_RELEASE_LOG_RES;
863 lock_flags |= XFS_ILOCK_EXCL; 865 lock_flags |= XFS_ILOCK_EXCL;
@@ -876,19 +878,29 @@ xfs_setattr_size(
876 * these flags set. For all other operations the VFS set these flags 878 * these flags set. For all other operations the VFS set these flags
877 * explicitly if it wants a timestamp update. 879 * explicitly if it wants a timestamp update.
878 */ 880 */
879 if (iattr->ia_size != ip->i_size && 881 if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
880 (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
881 iattr->ia_ctime = iattr->ia_mtime = 882 iattr->ia_ctime = iattr->ia_mtime =
882 current_fs_time(inode->i_sb); 883 current_fs_time(inode->i_sb);
883 mask |= ATTR_CTIME | ATTR_MTIME; 884 mask |= ATTR_CTIME | ATTR_MTIME;
884 } 885 }
885 886
886 if (iattr->ia_size > ip->i_size) { 887 /*
887 ip->i_d.di_size = iattr->ia_size; 888 * The first thing we do is set the size to new_size permanently on
888 ip->i_size = iattr->ia_size; 889 * disk. This way we don't have to worry about anyone ever being able
889 } else if (iattr->ia_size <= ip->i_size || 890 * to look at the data being freed even in the face of a crash.
890 (iattr->ia_size == 0 && ip->i_d.di_nextents)) { 891 * What we're getting around here is the case where we free a block, it
891 error = xfs_itruncate_data(&tp, ip, iattr->ia_size); 892 * is allocated to another file, it is written to, and then we crash.
893 * If the new data gets written to the file but the log buffers
894 * containing the free and reallocation don't, then we'd end up with
895 * garbage in the blocks being freed. As long as we make the new size
896 * permanent before actually freeing any blocks it doesn't matter if
897 * they get written to.
898 */
899 ip->i_d.di_size = newsize;
900 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
901
902 if (newsize <= oldsize) {
903 error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize);
892 if (error) 904 if (error)
893 goto out_trans_abort; 905 goto out_trans_abort;
894 906
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 5cc3dde1bc90..eafbcff81f3a 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -31,6 +31,7 @@
31#include "xfs_mount.h" 31#include "xfs_mount.h"
32#include "xfs_bmap_btree.h" 32#include "xfs_bmap_btree.h"
33#include "xfs_inode.h" 33#include "xfs_inode.h"
34#include "xfs_inode_item.h"
34#include "xfs_itable.h" 35#include "xfs_itable.h"
35#include "xfs_bmap.h" 36#include "xfs_bmap.h"
36#include "xfs_rtalloc.h" 37#include "xfs_rtalloc.h"
@@ -263,13 +264,18 @@ xfs_qm_scall_trunc_qfile(
263 xfs_ilock(ip, XFS_ILOCK_EXCL); 264 xfs_ilock(ip, XFS_ILOCK_EXCL);
264 xfs_trans_ijoin(tp, ip, 0); 265 xfs_trans_ijoin(tp, ip, 0);
265 266
266 error = xfs_itruncate_data(&tp, ip, 0); 267 ip->i_d.di_size = 0;
268 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
269
270 error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
267 if (error) { 271 if (error) {
268 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | 272 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
269 XFS_TRANS_ABORT); 273 XFS_TRANS_ABORT);
270 goto out_unlock; 274 goto out_unlock;
271 } 275 }
272 276
277 ASSERT(ip->i_d.di_nextents == 0);
278
273 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 279 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
274 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 280 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
275 281
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 281961c1d81a..ee5b695c99a7 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -828,14 +828,6 @@ xfs_fs_inode_init_once(
828 /* xfs inode */ 828 /* xfs inode */
829 atomic_set(&ip->i_pincount, 0); 829 atomic_set(&ip->i_pincount, 0);
830 spin_lock_init(&ip->i_flags_lock); 830 spin_lock_init(&ip->i_flags_lock);
831 init_waitqueue_head(&ip->i_ipin_wait);
832 /*
833 * Because we want to use a counting completion, complete
834 * the flush completion once to allow a single access to
835 * the flush completion without blocking.
836 */
837 init_completion(&ip->i_flush);
838 complete(&ip->i_flush);
839 831
840 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 832 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
841 "xfsino", ip->i_ino); 833 "xfsino", ip->i_ino);
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 72c01a1c16e7..40b75eecd2b4 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -707,14 +707,13 @@ xfs_reclaim_inode_grab(
707 return 1; 707 return 1;
708 708
709 /* 709 /*
710 * do some unlocked checks first to avoid unnecessary lock traffic. 710 * If we are asked for non-blocking operation, do unlocked checks to
711 * The first is a flush lock check, the second is a already in reclaim 711 * see if the inode already is being flushed or in reclaim to avoid
712 * check. Only do these checks if we are not going to block on locks. 712 * lock traffic.
713 */ 713 */
714 if ((flags & SYNC_TRYLOCK) && 714 if ((flags & SYNC_TRYLOCK) &&
715 (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) { 715 __xfs_iflags_test(ip, XFS_IFLOCK | XFS_IRECLAIM))
716 return 1; 716 return 1;
717 }
718 717
719 /* 718 /*
720 * The radix tree lock here protects a thread in xfs_iget from racing 719 * The radix tree lock here protects a thread in xfs_iget from racing
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index a9d5b1e06efe..6b6df5802e95 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -891,7 +891,6 @@ DECLARE_EVENT_CLASS(xfs_file_class,
891 __field(dev_t, dev) 891 __field(dev_t, dev)
892 __field(xfs_ino_t, ino) 892 __field(xfs_ino_t, ino)
893 __field(xfs_fsize_t, size) 893 __field(xfs_fsize_t, size)
894 __field(xfs_fsize_t, new_size)
895 __field(loff_t, offset) 894 __field(loff_t, offset)
896 __field(size_t, count) 895 __field(size_t, count)
897 __field(int, flags) 896 __field(int, flags)
@@ -900,17 +899,15 @@ DECLARE_EVENT_CLASS(xfs_file_class,
900 __entry->dev = VFS_I(ip)->i_sb->s_dev; 899 __entry->dev = VFS_I(ip)->i_sb->s_dev;
901 __entry->ino = ip->i_ino; 900 __entry->ino = ip->i_ino;
902 __entry->size = ip->i_d.di_size; 901 __entry->size = ip->i_d.di_size;
903 __entry->new_size = ip->i_new_size;
904 __entry->offset = offset; 902 __entry->offset = offset;
905 __entry->count = count; 903 __entry->count = count;
906 __entry->flags = flags; 904 __entry->flags = flags;
907 ), 905 ),
908 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " 906 TP_printk("dev %d:%d ino 0x%llx size 0x%llx "
909 "offset 0x%llx count 0x%zx ioflags %s", 907 "offset 0x%llx count 0x%zx ioflags %s",
910 MAJOR(__entry->dev), MINOR(__entry->dev), 908 MAJOR(__entry->dev), MINOR(__entry->dev),
911 __entry->ino, 909 __entry->ino,
912 __entry->size, 910 __entry->size,
913 __entry->new_size,
914 __entry->offset, 911 __entry->offset,
915 __entry->count, 912 __entry->count,
916 __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) 913 __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
@@ -978,7 +975,6 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
978 __field(dev_t, dev) 975 __field(dev_t, dev)
979 __field(xfs_ino_t, ino) 976 __field(xfs_ino_t, ino)
980 __field(loff_t, size) 977 __field(loff_t, size)
981 __field(loff_t, new_size)
982 __field(loff_t, offset) 978 __field(loff_t, offset)
983 __field(size_t, count) 979 __field(size_t, count)
984 __field(int, type) 980 __field(int, type)
@@ -990,7 +986,6 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
990 __entry->dev = VFS_I(ip)->i_sb->s_dev; 986 __entry->dev = VFS_I(ip)->i_sb->s_dev;
991 __entry->ino = ip->i_ino; 987 __entry->ino = ip->i_ino;
992 __entry->size = ip->i_d.di_size; 988 __entry->size = ip->i_d.di_size;
993 __entry->new_size = ip->i_new_size;
994 __entry->offset = offset; 989 __entry->offset = offset;
995 __entry->count = count; 990 __entry->count = count;
996 __entry->type = type; 991 __entry->type = type;
@@ -998,13 +993,11 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
998 __entry->startblock = irec ? irec->br_startblock : 0; 993 __entry->startblock = irec ? irec->br_startblock : 0;
999 __entry->blockcount = irec ? irec->br_blockcount : 0; 994 __entry->blockcount = irec ? irec->br_blockcount : 0;
1000 ), 995 ),
1001 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " 996 TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count %zd "
1002 "offset 0x%llx count %zd type %s " 997 "type %s startoff 0x%llx startblock %lld blockcount 0x%llx",
1003 "startoff 0x%llx startblock %lld blockcount 0x%llx",
1004 MAJOR(__entry->dev), MINOR(__entry->dev), 998 MAJOR(__entry->dev), MINOR(__entry->dev),
1005 __entry->ino, 999 __entry->ino,
1006 __entry->size, 1000 __entry->size,
1007 __entry->new_size,
1008 __entry->offset, 1001 __entry->offset,
1009 __entry->count, 1002 __entry->count,
1010 __print_symbolic(__entry->type, XFS_IO_TYPES), 1003 __print_symbolic(__entry->type, XFS_IO_TYPES),
@@ -1031,26 +1024,23 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class,
1031 __field(xfs_ino_t, ino) 1024 __field(xfs_ino_t, ino)
1032 __field(loff_t, isize) 1025 __field(loff_t, isize)
1033 __field(loff_t, disize) 1026 __field(loff_t, disize)
1034 __field(loff_t, new_size)
1035 __field(loff_t, offset) 1027 __field(loff_t, offset)
1036 __field(size_t, count) 1028 __field(size_t, count)
1037 ), 1029 ),
1038 TP_fast_assign( 1030 TP_fast_assign(
1039 __entry->dev = VFS_I(ip)->i_sb->s_dev; 1031 __entry->dev = VFS_I(ip)->i_sb->s_dev;
1040 __entry->ino = ip->i_ino; 1032 __entry->ino = ip->i_ino;
1041 __entry->isize = ip->i_size; 1033 __entry->isize = VFS_I(ip)->i_size;
1042 __entry->disize = ip->i_d.di_size; 1034 __entry->disize = ip->i_d.di_size;
1043 __entry->new_size = ip->i_new_size;
1044 __entry->offset = offset; 1035 __entry->offset = offset;
1045 __entry->count = count; 1036 __entry->count = count;
1046 ), 1037 ),
1047 TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx " 1038 TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx "
1048 "offset 0x%llx count %zd", 1039 "offset 0x%llx count %zd",
1049 MAJOR(__entry->dev), MINOR(__entry->dev), 1040 MAJOR(__entry->dev), MINOR(__entry->dev),
1050 __entry->ino, 1041 __entry->ino,
1051 __entry->isize, 1042 __entry->isize,
1052 __entry->disize, 1043 __entry->disize,
1053 __entry->new_size,
1054 __entry->offset, 1044 __entry->offset,
1055 __entry->count) 1045 __entry->count)
1056); 1046);
@@ -1090,8 +1080,8 @@ DECLARE_EVENT_CLASS(xfs_itrunc_class,
1090DEFINE_EVENT(xfs_itrunc_class, name, \ 1080DEFINE_EVENT(xfs_itrunc_class, name, \
1091 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ 1081 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
1092 TP_ARGS(ip, new_size)) 1082 TP_ARGS(ip, new_size))
1093DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start); 1083DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_start);
1094DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end); 1084DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_end);
1095 1085
1096TRACE_EVENT(xfs_pagecache_inval, 1086TRACE_EVENT(xfs_pagecache_inval,
1097 TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish), 1087 TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
@@ -1568,7 +1558,6 @@ DECLARE_EVENT_CLASS(xfs_swap_extent_class,
1568 __field(xfs_ino_t, ino) 1558 __field(xfs_ino_t, ino)
1569 __field(int, format) 1559 __field(int, format)
1570 __field(int, nex) 1560 __field(int, nex)
1571 __field(int, max_nex)
1572 __field(int, broot_size) 1561 __field(int, broot_size)
1573 __field(int, fork_off) 1562 __field(int, fork_off)
1574 ), 1563 ),
@@ -1578,18 +1567,16 @@ DECLARE_EVENT_CLASS(xfs_swap_extent_class,
1578 __entry->ino = ip->i_ino; 1567 __entry->ino = ip->i_ino;
1579 __entry->format = ip->i_d.di_format; 1568 __entry->format = ip->i_d.di_format;
1580 __entry->nex = ip->i_d.di_nextents; 1569 __entry->nex = ip->i_d.di_nextents;
1581 __entry->max_nex = ip->i_df.if_ext_max;
1582 __entry->broot_size = ip->i_df.if_broot_bytes; 1570 __entry->broot_size = ip->i_df.if_broot_bytes;
1583 __entry->fork_off = XFS_IFORK_BOFF(ip); 1571 __entry->fork_off = XFS_IFORK_BOFF(ip);
1584 ), 1572 ),
1585 TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, " 1573 TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, "
1586 "Max in-fork extents %d, broot size %d, fork offset %d", 1574 "broot size %d, fork offset %d",
1587 MAJOR(__entry->dev), MINOR(__entry->dev), 1575 MAJOR(__entry->dev), MINOR(__entry->dev),
1588 __entry->ino, 1576 __entry->ino,
1589 __print_symbolic(__entry->which, XFS_SWAPEXT_INODES), 1577 __print_symbolic(__entry->which, XFS_SWAPEXT_INODES),
1590 __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR), 1578 __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR),
1591 __entry->nex, 1579 __entry->nex,
1592 __entry->max_nex,
1593 __entry->broot_size, 1580 __entry->broot_size,
1594 __entry->fork_off) 1581 __entry->fork_off)
1595) 1582)
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index f2fea868d4db..0cf52da9d246 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -175,7 +175,7 @@ xfs_free_eofblocks(
175 * Figure out if there are any blocks beyond the end 175 * Figure out if there are any blocks beyond the end
176 * of the file. If not, then there is nothing to do. 176 * of the file. If not, then there is nothing to do.
177 */ 177 */
178 end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size)); 178 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
179 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 179 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
180 if (last_fsb <= end_fsb) 180 if (last_fsb <= end_fsb)
181 return 0; 181 return 0;
@@ -226,7 +226,14 @@ xfs_free_eofblocks(
226 xfs_ilock(ip, XFS_ILOCK_EXCL); 226 xfs_ilock(ip, XFS_ILOCK_EXCL);
227 xfs_trans_ijoin(tp, ip, 0); 227 xfs_trans_ijoin(tp, ip, 0);
228 228
229 error = xfs_itruncate_data(&tp, ip, ip->i_size); 229 /*
230 * Do not update the on-disk file size. If we update the
231 * on-disk file size and then the system crashes before the
232 * contents of the file are flushed to disk then the files
233 * may be full of holes (ie NULL files bug).
234 */
235 error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK,
236 XFS_ISIZE(ip));
230 if (error) { 237 if (error) {
231 /* 238 /*
232 * If we get an error at this point we simply don't 239 * If we get an error at this point we simply don't
@@ -540,8 +547,8 @@ xfs_release(
540 return 0; 547 return 0;
541 548
542 if ((S_ISREG(ip->i_d.di_mode) && 549 if ((S_ISREG(ip->i_d.di_mode) &&
543 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || 550 (VFS_I(ip)->i_size > 0 ||
544 ip->i_delayed_blks > 0)) && 551 (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) &&
545 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 552 (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
546 (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { 553 (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
547 554
@@ -618,7 +625,7 @@ xfs_inactive(
618 * only one with a reference to the inode. 625 * only one with a reference to the inode.
619 */ 626 */
620 truncate = ((ip->i_d.di_nlink == 0) && 627 truncate = ((ip->i_d.di_nlink == 0) &&
621 ((ip->i_d.di_size != 0) || (ip->i_size != 0) || 628 ((ip->i_d.di_size != 0) || XFS_ISIZE(ip) != 0 ||
622 (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) && 629 (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) &&
623 S_ISREG(ip->i_d.di_mode)); 630 S_ISREG(ip->i_d.di_mode));
624 631
@@ -632,12 +639,12 @@ xfs_inactive(
632 639
633 if (ip->i_d.di_nlink != 0) { 640 if (ip->i_d.di_nlink != 0) {
634 if ((S_ISREG(ip->i_d.di_mode) && 641 if ((S_ISREG(ip->i_d.di_mode) &&
635 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || 642 (VFS_I(ip)->i_size > 0 ||
636 ip->i_delayed_blks > 0)) && 643 (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) &&
637 (ip->i_df.if_flags & XFS_IFEXTENTS) && 644 (ip->i_df.if_flags & XFS_IFEXTENTS) &&
638 (!(ip->i_d.di_flags & 645 (!(ip->i_d.di_flags &
639 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || 646 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
640 (ip->i_delayed_blks != 0)))) { 647 ip->i_delayed_blks != 0))) {
641 error = xfs_free_eofblocks(mp, ip, 0); 648 error = xfs_free_eofblocks(mp, ip, 0);
642 if (error) 649 if (error)
643 return VN_INACTIVE_CACHE; 650 return VN_INACTIVE_CACHE;
@@ -670,13 +677,18 @@ xfs_inactive(
670 xfs_ilock(ip, XFS_ILOCK_EXCL); 677 xfs_ilock(ip, XFS_ILOCK_EXCL);
671 xfs_trans_ijoin(tp, ip, 0); 678 xfs_trans_ijoin(tp, ip, 0);
672 679
673 error = xfs_itruncate_data(&tp, ip, 0); 680 ip->i_d.di_size = 0;
681 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
682
683 error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
674 if (error) { 684 if (error) {
675 xfs_trans_cancel(tp, 685 xfs_trans_cancel(tp,
676 XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 686 XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
677 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 687 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
678 return VN_INACTIVE_CACHE; 688 return VN_INACTIVE_CACHE;
679 } 689 }
690
691 ASSERT(ip->i_d.di_nextents == 0);
680 } else if (S_ISLNK(ip->i_d.di_mode)) { 692 } else if (S_ISLNK(ip->i_d.di_mode)) {
681 693
682 /* 694 /*
@@ -1961,11 +1973,11 @@ xfs_zero_remaining_bytes(
1961 * since nothing can read beyond eof. The space will 1973 * since nothing can read beyond eof. The space will
1962 * be zeroed when the file is extended anyway. 1974 * be zeroed when the file is extended anyway.
1963 */ 1975 */
1964 if (startoff >= ip->i_size) 1976 if (startoff >= XFS_ISIZE(ip))
1965 return 0; 1977 return 0;
1966 1978
1967 if (endoff > ip->i_size) 1979 if (endoff > XFS_ISIZE(ip))
1968 endoff = ip->i_size; 1980 endoff = XFS_ISIZE(ip);
1969 1981
1970 bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? 1982 bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
1971 mp->m_rtdev_targp : mp->m_ddev_targp, 1983 mp->m_rtdev_targp : mp->m_ddev_targp,
@@ -2260,7 +2272,7 @@ xfs_change_file_space(
2260 bf->l_start += offset; 2272 bf->l_start += offset;
2261 break; 2273 break;
2262 case 2: /*SEEK_END*/ 2274 case 2: /*SEEK_END*/
2263 bf->l_start += ip->i_size; 2275 bf->l_start += XFS_ISIZE(ip);
2264 break; 2276 break;
2265 default: 2277 default:
2266 return XFS_ERROR(EINVAL); 2278 return XFS_ERROR(EINVAL);
@@ -2277,7 +2289,7 @@ xfs_change_file_space(
2277 bf->l_whence = 0; 2289 bf->l_whence = 0;
2278 2290
2279 startoffset = bf->l_start; 2291 startoffset = bf->l_start;
2280 fsize = ip->i_size; 2292 fsize = XFS_ISIZE(ip);
2281 2293
2282 /* 2294 /*
2283 * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve 2295 * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve