Merge branch 'mw-3.1-jul25' of git://oss.oracle.com/git/smushran/linux-2.6 into ocfs2-fixes

author: Joel Becker <jlbec@evilplan.org> 2011-08-22 00:02:57 -0400
committer: Joel Becker <jlbec@evilplan.org> 2011-08-22 00:02:57 -0400
commit: 99b1bb61b225c3eb4d3b196d4f1d041695b19a7e (patch)
tree: 06cabdc34538f3b38a39e3b802ecc1a2ab2aae00 /fs/xfs/xfs_inode.c
parent: c7e25e6e0b0486492c5faaf6312b37413642c48e (diff)
parent: 93862d5e1ab875664c6cc95254fc365028a48bb1 (diff)
1 files changed, 137 insertions, 400 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a098a20ca63e..3cc21ddf9f7e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -37,7 +37,6 @@
 #include "xfs_buf_item.h"
 #include "xfs_inode_item.h"
 #include "xfs_btree.h"
-#include "xfs_btree_trace.h"
 #include "xfs_alloc.h"
 #include "xfs_ialloc.h"
 #include "xfs_bmap.h"
@@ -52,7 +51,7 @@ kmem_zone_t *xfs_ifork_zone;
 kmem_zone_t *xfs_inode_zone;
 /*
- * Used in xfs_itruncate().  This is the maximum number of extents
+ * Used in xfs_itruncate_extents().  This is the maximum number of extents
 * freed from a file in a single transaction.
 */
 #define XFS_ITRUNC_MAX_EXTENTS  2
@@ -167,7 +166,7 @@ xfs_imap_to_bp(
                dip = (xfs_dinode_t *)xfs_buf_offset(bp,
                                        (i << mp->m_sb.sb_inodelog));
-                di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC &&
+                di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
                            XFS_DINODE_GOOD_VERSION(dip->di_version);
                if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
                                                XFS_ERRTAG_ITOBP_INOTOBP,
@@ -802,7 +801,7 @@ xfs_iread(
         * If we got something that isn't an inode it means someone
         * (nfs or dmi) has a stale handle.
         */
-        if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) {
+        if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) {
 #ifdef DEBUG
                xfs_alert(mp,
                        "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
@@ -1179,15 +1178,15 @@ xfs_ialloc(
 * at least do it for regular files.
 */
 #ifdef DEBUG
-void
+STATIC void
 xfs_isize_check(
-        xfs_mount_t     *mp,
+        struct xfs_inode        *ip,
-        xfs_inode_t     *ip,
+        xfs_fsize_t             isize)
-        xfs_fsize_t     isize)
 {
-        xfs_fileoff_t   map_first;
+        struct xfs_mount        *mp = ip->i_mount;
-        int             nimaps;
+        xfs_fileoff_t           map_first;
-        xfs_bmbt_irec_t imaps[2];
+        int                     nimaps;
+        xfs_bmbt_irec_t         imaps[2];
        if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
                return;
@@ -1214,168 +1213,14 @@ xfs_isize_check(
        ASSERT(nimaps == 1);
        ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
 }
+#else   /* DEBUG */
+#define xfs_isize_check(ip, isize)
 #endif  /* DEBUG */
 /*
- * Calculate the last possible buffered byte in a file.  This must
+ * Free up the underlying blocks past new_size.  The new size must be smaller
- * include data that was buffered beyond the EOF by the write code.
+ * than the current size.  This routine can be used both for the attribute and
- * This also needs to deal with overflowing the xfs_fsize_t type
+ * data fork, and does not modify the inode size, which is left to the caller.
- * which can happen for sizes near the limit.
- *
- * We also need to take into account any blocks beyond the EOF.  It
- * may be the case that they were buffered by a write which failed.
- * In that case the pages will still be in memory, but the inode size
- * will never have been updated.
- */
-STATIC xfs_fsize_t
-xfs_file_last_byte(
-        xfs_inode_t     *ip)
-{
-        xfs_mount_t     *mp;
-        xfs_fsize_t     last_byte;
-        xfs_fileoff_t   last_block;
-        xfs_fileoff_t   size_last_block;
-        int             error;
-        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
-        mp = ip->i_mount;
-        /*
-         * Only check for blocks beyond the EOF if the extents have
-         * been read in.  This eliminates the need for the inode lock,
-         * and it also saves us from looking when it really isn't
-         * necessary.
-         */
-        if (ip->i_df.if_flags & XFS_IFEXTENTS) {
-                xfs_ilock(ip, XFS_ILOCK_SHARED);
-                error = xfs_bmap_last_offset(NULL, ip, &last_block,
-                        XFS_DATA_FORK);
-                xfs_iunlock(ip, XFS_ILOCK_SHARED);
-                if (error) {
-                        last_block = 0;
-                }
-        } else {
-                last_block = 0;
-        }
-        size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size);
-        last_block = XFS_FILEOFF_MAX(last_block, size_last_block);
-        last_byte = XFS_FSB_TO_B(mp, last_block);
-        if (last_byte < 0) {
-                return XFS_MAXIOFFSET(mp);
-        }
-        last_byte += (1 << mp->m_writeio_log);
-        if (last_byte < 0) {
-                return XFS_MAXIOFFSET(mp);
-        }
-        return last_byte;
-}
-/*
- * Start the truncation of the file to new_size.  The new size
- * must be smaller than the current size.  This routine will
- * clear the buffer and page caches of file data in the removed
- * range, and xfs_itruncate_finish() will remove the underlying
- * disk blocks.
- *
- * The inode must have its I/O lock locked EXCLUSIVELY, and it
- * must NOT have the inode lock held at all.  This is because we're
- * calling into the buffer/page cache code and we can't hold the
- * inode lock when we do so.
- *
- * We need to wait for any direct I/Os in flight to complete before we
- * proceed with the truncate. This is needed to prevent the extents
- * being read or written by the direct I/Os from being removed while the
- * I/O is in flight as there is no other method of synchronising
- * direct I/O with the truncate operation.  Also, because we hold
- * the IOLOCK in exclusive mode, we prevent new direct I/Os from being
- * started until the truncate completes and drops the lock. Essentially,
- * the xfs_ioend_wait() call forms an I/O barrier that provides strict
- * ordering between direct I/Os and the truncate operation.
- *
- * The flags parameter can have either the value XFS_ITRUNC_DEFINITE
- * or XFS_ITRUNC_MAYBE.  The XFS_ITRUNC_MAYBE value should be used
- * in the case that the caller is locking things out of order and
- * may not be able to call xfs_itruncate_finish() with the inode lock
- * held without dropping the I/O lock.  If the caller must drop the
- * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start()
- * must be called again with all the same restrictions as the initial
- * call.
- */
-int
-xfs_itruncate_start(
-        xfs_inode_t     *ip,
-        uint            flags,
-        xfs_fsize_t     new_size)
-{
-        xfs_fsize_t     last_byte;
-        xfs_off_t       toss_start;
-        xfs_mount_t     *mp;
-        int             error = 0;
-        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
-        ASSERT((new_size == 0) || (new_size <= ip->i_size));
-        ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
-               (flags == XFS_ITRUNC_MAYBE));
-        mp = ip->i_mount;
-        /* wait for the completion of any pending DIOs */
-        if (new_size == 0 || new_size < ip->i_size)
-                xfs_ioend_wait(ip);
-        /*
-         * Call toss_pages or flushinval_pages to get rid of pages
-         * overlapping the region being removed.  We have to use
-         * the less efficient flushinval_pages in the case that the
-         * caller may not be able to finish the truncate without
-         * dropping the inode's I/O lock.  Make sure
-         * to catch any pages brought in by buffers overlapping
-         * the EOF by searching out beyond the isize by our
-         * block size. We round new_size up to a block boundary
-         * so that we don't toss things on the same block as
-         * new_size but before it.
-         *
-         * Before calling toss_page or flushinval_pages, make sure to
-         * call remapf() over the same region if the file is mapped.
-         * This frees up mapped file references to the pages in the
-         * given range and for the flushinval_pages case it ensures
-         * that we get the latest mapped changes flushed out.
-         */
-        toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
-        toss_start = XFS_FSB_TO_B(mp, toss_start);
-        if (toss_start < 0) {
-                /*
-                 * The place to start tossing is beyond our maximum
-                 * file size, so there is no way that the data extended
-                 * out there.
-                 */
-                return 0;
-        }
-        last_byte = xfs_file_last_byte(ip);
-        trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte);
-        if (last_byte > toss_start) {
-                if (flags & XFS_ITRUNC_DEFINITE) {
-                        xfs_tosspages(ip, toss_start,
-                                        -1, FI_REMAPF_LOCKED);
-                } else {
-                        error = xfs_flushinval_pages(ip, toss_start,
-                                        -1, FI_REMAPF_LOCKED);
-                }
-        }
-#ifdef DEBUG
-        if (new_size == 0) {
-                ASSERT(VN_CACHED(VFS_I(ip)) == 0);
-        }
-#endif
-        return error;
-}
-/*
- * Shrink the file to the given new_size.  The new size must be smaller than
- * the current size.  This will free up the underlying blocks in the removed
- * range after a call to xfs_itruncate_start() or xfs_atruncate_start().
 *
 * The transaction passed to this routine must have made a permanent log
 * reservation of at least XFS_ITRUNCATE_LOG_RES.  This routine may commit the
@@ -1387,31 +1232,6 @@ xfs_itruncate_start(
 * will be "held" within the returned transaction.  This routine does NOT
 * require any disk space to be reserved for it within the transaction.
 *
- * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it
- * indicates the fork which is to be truncated.  For the attribute fork we only
- * support truncation to size 0.
- *
- * We use the sync parameter to indicate whether or not the first transaction
- * we perform might have to be synchronous.  For the attr fork, it needs to be
- * so if the unlink of the inode is not yet known to be permanent in the log.
- * This keeps us from freeing and reusing the blocks of the attribute fork
- * before the unlink of the inode becomes permanent.
- *
- * For the data fork, we normally have to run synchronously if we're being
- * called out of the inactive path or we're being called out of the create path
- * where we're truncating an existing file.  Either way, the truncate needs to
- * be sync so blocks don't reappear in the file with altered data in case of a
- * crash.  wsync filesystems can run the first case async because anything that
- * shrinks the inode has to run sync so by the time we're called here from
- * inactive, the inode size is permanently set to 0.
- *
- * Calls from the truncate path always need to be sync unless we're in a wsync
- * filesystem and the file has already been unlinked.
- *
- * The caller is responsible for correctly setting the sync parameter.  It gets
- * too hard for us to guess here which path we're being called out of just
- * based on inode state.
- *
 * If we get an error, we must return with the inode locked and linked into the
 * current transaction. This keeps things simple for the higher level code,
 * because it always knows that the inode is locked and held in the transaction
@@ -1419,124 +1239,30 @@ xfs_itruncate_start(
 * dirty on error so that transactions can be easily aborted if possible.
 */
 int
-xfs_itruncate_finish(
+xfs_itruncate_extents(
-        xfs_trans_t     **tp,
+        struct xfs_trans        **tpp,
-        xfs_inode_t     *ip,
+        struct xfs_inode        *ip,
-        xfs_fsize_t     new_size,
+        int                     whichfork,
-        int             fork,
+        xfs_fsize_t             new_size)
-        int             sync)
 {
-        xfs_fsblock_t   first_block;
+        struct xfs_mount        *mp = ip->i_mount;
-        xfs_fileoff_t   first_unmap_block;
+        struct xfs_trans        *tp = *tpp;
-        xfs_fileoff_t   last_block;
+        struct xfs_trans        *ntp;
-        xfs_filblks_t   unmap_len=0;
+        xfs_bmap_free_t         free_list;
-        xfs_mount_t     *mp;
+        xfs_fsblock_t           first_block;
-        xfs_trans_t     *ntp;
+        xfs_fileoff_t           first_unmap_block;
-        int             done;
+        xfs_fileoff_t           last_block;
-        int             committed;
+        xfs_filblks_t           unmap_len;
-        xfs_bmap_free_t free_list;
+        int                     committed;
-        int             error;
+        int                     error = 0;
+        int                     done = 0;
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-        ASSERT((new_size == 0) || (new_size <= ip->i_size));
+        ASSERT(new_size <= ip->i_size);
-        ASSERT(*tp != NULL);
+        ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
-        ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
-        ASSERT(ip->i_transp == *tp);
        ASSERT(ip->i_itemp != NULL);
        ASSERT(ip->i_itemp->ili_lock_flags == 0);
+        ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
-        ntp = *tp;
-        mp = (ntp)->t_mountp;
-        ASSERT(! XFS_NOT_DQATTACHED(mp, ip));
-        /*
-         * We only support truncating the entire attribute fork.
-         */
-        if (fork == XFS_ATTR_FORK) {
-                new_size = 0LL;
-        }
-        first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
-        trace_xfs_itruncate_finish_start(ip, new_size);
-        /*
-         * The first thing we do is set the size to new_size permanently
-         * on disk.  This way we don't have to worry about anyone ever
-         * being able to look at the data being freed even in the face
-         * of a crash.  What we're getting around here is the case where
-         * we free a block, it is allocated to another file, it is written
-         * to, and then we crash.  If the new data gets written to the
-         * file but the log buffers containing the free and reallocation
-         * don't, then we'd end up with garbage in the blocks being freed.
-         * As long as we make the new_size permanent before actually
-         * freeing any blocks it doesn't matter if they get written to.
-         *
-         * The callers must signal into us whether or not the size
-         * setting here must be synchronous.  There are a few cases
-         * where it doesn't have to be synchronous.  Those cases
-         * occur if the file is unlinked and we know the unlink is
-         * permanent or if the blocks being truncated are guaranteed
-         * to be beyond the inode eof (regardless of the link count)
-         * and the eof value is permanent.  Both of these cases occur
-         * only on wsync-mounted filesystems.  In those cases, we're
-         * guaranteed that no user will ever see the data in the blocks
-         * that are being truncated so the truncate can run async.
-         * In the free beyond eof case, the file may wind up with
-         * more blocks allocated to it than it needs if we crash
-         * and that won't get fixed until the next time the file
-         * is re-opened and closed but that's ok as that shouldn't
-         * be too many blocks.
-         *
-         * However, we can't just make all wsync xactions run async
-         * because there's one call out of the create path that needs
-         * to run sync where it's truncating an existing file to size
-         * 0 whose size is > 0.
-         *
-         * It's probably possible to come up with a test in this
-         * routine that would correctly distinguish all the above
-         * cases from the values of the function parameters and the
-         * inode state but for sanity's sake, I've decided to let the
-         * layers above just tell us.  It's simpler to correctly figure
-         * out in the layer above exactly under what conditions we
-         * can run async and I think it's easier for others read and
-         * follow the logic in case something has to be changed.
-         * cscope is your friend -- rcc.
-         *
-         * The attribute fork is much simpler.
-         *
-         * For the attribute fork we allow the caller to tell us whether
-         * the unlink of the inode that led to this call is yet permanent
-         * in the on disk log.  If it is not and we will be freeing extents
-         * in this inode then we make the first transaction synchronous
-         * to make sure that the unlink is permanent by the time we free
-         * the blocks.
-         */
-        if (fork == XFS_DATA_FORK) {
-                if (ip->i_d.di_nextents > 0) {
-                        /*
-                         * If we are not changing the file size then do
-                         * not update the on-disk file size - we may be
-                         * called from xfs_inactive_free_eofblocks().  If we
-                         * update the on-disk file size and then the system
-                         * crashes before the contents of the file are
-                         * flushed to disk then the files may be full of
-                         * holes (ie NULL files bug).
-                         */
-                        if (ip->i_size != new_size) {
-                                ip->i_d.di_size = new_size;
-                                ip->i_size = new_size;
-                                xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
-                        }
-                }
-        } else if (sync) {
-                ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC));
-                if (ip->i_d.di_anextents > 0)
-                        xfs_trans_set_sync(ntp);
-        }
-        ASSERT(fork == XFS_DATA_FORK ||
-                (fork == XFS_ATTR_FORK &&
-                        ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) ||
-                         (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC)))));
        /*
         * Since it is possible for space to become allocated beyond
@@ -1547,128 +1273,142 @@ xfs_itruncate_finish(
         * beyond the maximum file size (ie it is the same as last_block),
         * then there is nothing to do.
         */
+        first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
        last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
-        ASSERT(first_unmap_block <= last_block);
+        if (first_unmap_block == last_block)
-        done = 0;
+                return 0;
-        if (last_block == first_unmap_block) {
-                done = 1;
+        ASSERT(first_unmap_block < last_block);
-        } else {
+        unmap_len = last_block - first_unmap_block + 1;
-                unmap_len = last_block - first_unmap_block + 1;
-        }
        while (!done) {
-                /*
-                 * Free up up to XFS_ITRUNC_MAX_EXTENTS.  xfs_bunmapi()
-                 * will tell us whether it freed the entire range or
-                 * not.  If this is a synchronous mount (wsync),
-                 * then we can tell bunmapi to keep all the
-                 * transactions asynchronous since the unlink
-                 * transaction that made this inode inactive has
-                 * already hit the disk.  There's no danger of
-                 * the freed blocks being reused, there being a
-                 * crash, and the reused blocks suddenly reappearing
-                 * in this file with garbage in them once recovery
-                 * runs.
-                 */
                xfs_bmap_init(&free_list, &first_block);
-                error = xfs_bunmapi(ntp, ip,
+                error = xfs_bunmapi(tp, ip,
                                    first_unmap_block, unmap_len,
-                                    xfs_bmapi_aflag(fork),
+                                    xfs_bmapi_aflag(whichfork),
                                    XFS_ITRUNC_MAX_EXTENTS,
                                    &first_block, &free_list,
                                    &done);
-                if (error) {
+                if (error)
-                        /*
+                        goto out_bmap_cancel;
-                         * If the bunmapi call encounters an error,
-                         * return to the caller where the transaction
-                         * can be properly aborted.  We just need to
-                         * make sure we're not holding any resources
-                         * that we were not when we came in.
-                         */
-                        xfs_bmap_cancel(&free_list);
-                        return error;
-                }
                /*
                 * Duplicate the transaction that has the permanent
                 * reservation and commit the old transaction.
                 */
-                error = xfs_bmap_finish(tp, &free_list, &committed);
+                error = xfs_bmap_finish(&tp, &free_list, &committed);
-                ntp = *tp;
                if (committed)
-                        xfs_trans_ijoin(ntp, ip);
+                        xfs_trans_ijoin(tp, ip);
+                if (error)
-                if (error) {
+                        goto out_bmap_cancel;
-                        /*
-                         * If the bmap finish call encounters an error, return
-                         * to the caller where the transaction can be properly
-                         * aborted.  We just need to make sure we're not
-                         * holding any resources that we were not when we came
-                         * in.
-                         *
-                         * Aborting from this point might lose some blocks in
-                         * the file system, but oh well.
-                         */
-                        xfs_bmap_cancel(&free_list);
-                        return error;
-                }
                if (committed) {
                        /*
                         * Mark the inode dirty so it will be logged and
                         * moved forward in the log as part of every commit.
                         */
-                        xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
+                        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
                }
-                ntp = xfs_trans_dup(ntp);
+                ntp = xfs_trans_dup(tp);
-                error = xfs_trans_commit(*tp, 0);
+                error = xfs_trans_commit(tp, 0);
-                *tp = ntp;
+                tp = ntp;
-                xfs_trans_ijoin(ntp, ip);
+                xfs_trans_ijoin(tp, ip);
                if (error)
-                        return error;
+                        goto out;
                /*
-                 * transaction commit worked ok so we can drop the extra ticket
+                 * Transaction commit worked ok so we can drop the extra ticket
                 * reference that we gained in xfs_trans_dup()
                 */
-                xfs_log_ticket_put(ntp->t_ticket);
+                xfs_log_ticket_put(tp->t_ticket);
-                error = xfs_trans_reserve(ntp, 0,
+                error = xfs_trans_reserve(tp, 0,
                                        XFS_ITRUNCATE_LOG_RES(mp), 0,
                                        XFS_TRANS_PERM_LOG_RES,
                                        XFS_ITRUNCATE_LOG_COUNT);
                if (error)
-                        return error;
+                        goto out;
        }
+out:
+        *tpp = tp;
+        return error;
+out_bmap_cancel:
        /*
-         * Only update the size in the case of the data fork, but
+         * If the bunmapi call encounters an error, return to the caller where
-         * always re-log the inode so that our permanent transaction
+         * the transaction can be properly aborted.  We just need to make sure
-         * can keep on rolling it forward in the log.
+         * we're not holding any resources that we were not when we came in.
         */
-        if (fork == XFS_DATA_FORK) {
+        xfs_bmap_cancel(&free_list);
-                xfs_isize_check(mp, ip, new_size);
+        goto out;
+}
+int
+xfs_itruncate_data(
+        struct xfs_trans        **tpp,
+        struct xfs_inode        *ip,
+        xfs_fsize_t             new_size)
+{
+        int                     error;
+        trace_xfs_itruncate_data_start(ip, new_size);
+        /*
+         * The first thing we do is set the size to new_size permanently on
+         * disk.  This way we don't have to worry about anyone ever being able
+         * to look at the data being freed even in the face of a crash.
+         * What we're getting around here is the case where we free a block, it
+         * is allocated to another file, it is written to, and then we crash.
+         * If the new data gets written to the file but the log buffers
+         * containing the free and reallocation don't, then we'd end up with
+         * garbage in the blocks being freed.  As long as we make the new_size
+         * permanent before actually freeing any blocks it doesn't matter if
+         * they get written to.
+         */
+        if (ip->i_d.di_nextents > 0) {
                /*
-                 * If we are not changing the file size then do
+                 * If we are not changing the file size then do not update
-                 * not update the on-disk file size - we may be
+                 * the on-disk file size - we may be called from
-                 * called from xfs_inactive_free_eofblocks().  If we
+                 * xfs_inactive_free_eofblocks().  If we update the on-disk
-                 * update the on-disk file size and then the system
+                 * file size and then the system crashes before the contents
-                 * crashes before the contents of the file are
+                 * of the file are flushed to disk then the files may be
-                 * flushed to disk then the files may be full of
+                 * full of holes (ie NULL files bug).
-                 * holes (ie NULL files bug).
                 */
                if (ip->i_size != new_size) {
                        ip->i_d.di_size = new_size;
                        ip->i_size = new_size;
+                        xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
                }
        }
-        xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
-        ASSERT((new_size != 0) ||
+        error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size);
-               (fork == XFS_ATTR_FORK) ||
+        if (error)
-               (ip->i_delayed_blks == 0));
+                return error;
-        ASSERT((new_size != 0) ||
-               (fork == XFS_ATTR_FORK) ||
+        /*
-               (ip->i_d.di_nextents == 0));
+         * If we are not changing the file size then do not update the on-disk
-        trace_xfs_itruncate_finish_end(ip, new_size);
+         * file size - we may be called from xfs_inactive_free_eofblocks().
+         * If we update the on-disk file size and then the system crashes
+         * before the contents of the file are flushed to disk then the files
+         * may be full of holes (ie NULL files bug).
+         */
+        xfs_isize_check(ip, new_size);
+        if (ip->i_size != new_size) {
+                ip->i_d.di_size = new_size;
+                ip->i_size = new_size;
+        }
+        ASSERT(new_size != 0 || ip->i_delayed_blks == 0);
+        ASSERT(new_size != 0 || ip->i_d.di_nextents == 0);
+        /*
+         * Always re-log the inode so that our permanent transaction can keep
+         * on rolling it forward in the log.
+         */
+        xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
+        trace_xfs_itruncate_data_end(ip, new_size);
        return 0;
 }
@@ -1694,7 +1434,6 @@ xfs_iunlink(
        ASSERT(ip->i_d.di_nlink == 0);
        ASSERT(ip->i_d.di_mode != 0);
-        ASSERT(ip->i_transp == tp);
        mp = tp->t_mountp;
@@ -1717,7 +1456,7 @@ xfs_iunlink(
        ASSERT(agi->agi_unlinked[bucket_index]);
        ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
-        if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) {
+        if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) {
                /*
                 * There is already another inode in the bucket we need
                 * to add ourselves to.  Add us at the front of the list.
@@ -1728,8 +1467,7 @@ xfs_iunlink(
                if (error)
                        return error;
-                ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO);
+                ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO));
-                /* both on-disk, don't endian flip twice */
                dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
                offset = ip->i_imap.im_boffset +
                        offsetof(xfs_dinode_t, di_next_unlinked);
@@ -1794,7 +1532,7 @@ xfs_iunlink_remove(
        agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
        ASSERT(agino != 0);
        bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
-        ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO);
+        ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO));
        ASSERT(agi->agi_unlinked[bucket_index]);
        if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
@@ -1959,7 +1697,7 @@ xfs_ifree_cluster(
                 * stale first, we will not attempt to lock them in the loop
                 * below as the XFS_ISTALE flag will be set.
                 */
-                lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+                lip = bp->b_fspriv;
                while (lip) {
                        if (lip->li_type == XFS_LI_INODE) {
                                iip = (xfs_inode_log_item_t *)lip;
@@ -2086,7 +1824,6 @@ xfs_ifree(
        xfs_buf_t               *ibp;
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-        ASSERT(ip->i_transp == tp);
        ASSERT(ip->i_d.di_nlink == 0);
        ASSERT(ip->i_d.di_nextents == 0);
        ASSERT(ip->i_d.di_anextents == 0);
@@ -2733,7 +2470,7 @@ cluster_corrupt_out:
                 * mark the buffer as an error and call them.  Otherwise
                 * mark it as stale and brelse.
                 */
-                if (XFS_BUF_IODONE_FUNC(bp)) {
+                if (bp->b_iodone) {
                        XFS_BUF_UNDONE(bp);
                        XFS_BUF_STALE(bp);
                        XFS_BUF_ERROR(bp,EIO);
@@ -2920,7 +2657,7 @@ xfs_iflush_int(
         */
        xfs_synchronize_times(ip);
-        if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC,
+        if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
                               mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
                xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
                        "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
@@ -3073,8 +2810,8 @@ xfs_iflush_int(
                 */
                xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
-                ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+                ASSERT(bp->b_fspriv != NULL);
-                ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL);
+                ASSERT(bp->b_iodone != NULL);
        } else {
                /*
                 * We're flushing an inode which is not in the AIL and has
author	Joel Becker <jlbec@evilplan.org>	2011-08-22 00:02:57 -0400
committer	Joel Becker <jlbec@evilplan.org>	2011-08-22 00:02:57 -0400
commit	99b1bb61b225c3eb4d3b196d4f1d041695b19a7e (patch)
tree	06cabdc34538f3b38a39e3b802ecc1a2ab2aae00 /fs/xfs/xfs_inode.c
parent	c7e25e6e0b0486492c5faaf6312b37413642c48e (diff)
parent	93862d5e1ab875664c6cc95254fc365028a48bb1 (diff)