Merge branch 'xfs-misc-fixes-for-4.3-2' into for-next

author: Dave Chinner <david@fromorbit.com> 2015-08-19 19:28:45 -0400
committer: Dave Chinner <david@fromorbit.com> 2015-08-19 19:28:45 -0400
commit: aa493382cb8c5768ba452d87f175fc2aff63911d (patch)
tree: 85896c37038d3f5eda1283d090eeb95ee6ec9c82
parent: 5be203ad115c1d8294e8685253e05fcea0202e04 (diff)
parent: 3403ccc0c9f069c40ea751a93ac6746f5ef2116a (diff)
15 files changed, 235 insertions, 106 deletions
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 3349c9a1e845..ff065578969f 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -139,6 +139,8 @@ xfs_attr_get(
        args.value = value;
        args.valuelen = *valuelenp;
+        /* Entirely possible to look up a name which doesn't exist */
+        args.op_flags = XFS_DA_OP_OKNOENT;
        lock_mode = xfs_ilock_attr_map_shared(ip);
        if (!xfs_inode_hasattr(ip))
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 3264d81488db..cd2201f5ab52 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -1822,6 +1822,7 @@ xfs_da3_path_shift(
        struct xfs_da_args      *args;
        struct xfs_da_node_entry *btree;
        struct xfs_da3_icnode_hdr nodehdr;
+        struct xfs_buf          *bp;
        xfs_dablk_t             blkno = 0;
        int                     level;
        int                     error;
@@ -1866,20 +1867,24 @@ xfs_da3_path_shift(
         */
        for (blk++, level++; level < path->active; blk++, level++) {
                /*
-                 * Release the old block.
+                 * Read the next child block into a local buffer.
-                 * (if it's dirty, trans won't actually let go)
                 */
-                if (release)
+                error = xfs_da3_node_read(args->trans, dp, blkno, -1, &bp,
-                        xfs_trans_brelse(args->trans, blk->bp);
+                                          args->whichfork);
+                if (error)
+                        return error;
                /*
-                 * Read the next child block.
+                 * Release the old block (if it's dirty, the trans doesn't
+                 * actually let go) and swap the local buffer into the path
+                 * structure. This ensures failure of the above read doesn't set
+                 * a NULL buffer in an active slot in the path.
                 */
+                if (release)
+                        xfs_trans_brelse(args->trans, blk->bp);
                blk->blkno = blkno;
-                error = xfs_da3_node_read(args->trans, dp, blkno, -1,
+                blk->bp = bp;
-                                        &blk->bp, args->whichfork);
-                if (error)
-                        return error;
                info = blk->bp->b_addr;
                ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
                       info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 74bcbabfa523..b14bbd6bb05f 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -680,8 +680,15 @@ typedef struct xfs_attr_leaf_name_remote {
 typedef struct xfs_attr_leafblock {
        xfs_attr_leaf_hdr_t     hdr;    /* constant-structure header block */
        xfs_attr_leaf_entry_t   entries[1];     /* sorted on key, not name */
-        xfs_attr_leaf_name_local_t namelist;    /* grows from bottom of buf */
+        /*
-        xfs_attr_leaf_name_remote_t valuelist;  /* grows from bottom of buf */
+         * The rest of the block contains the following structures after the
+         * leaf entries, growing from the bottom up. The variables are never
+         * referenced and definining them can actually make gcc optimize away
+         * accesses to the 'entries' array above index 0 so don't do that.
+         *
+         * xfs_attr_leaf_name_local_t namelist;
+         * xfs_attr_leaf_name_remote_t valuelist;
+         */
 } xfs_attr_leafblock_t;
 /*
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index e0ba97610f01..9de401d297e5 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -362,6 +362,7 @@ xfs_dir_lookup(
        struct xfs_da_args *args;
        int             rval;
        int             v;              /* type-checking value */
+        int             lock_mode;
        ASSERT(S_ISDIR(dp->i_d.di_mode));
        XFS_STATS_INC(xs_dir_lookup);
@@ -387,6 +388,7 @@ xfs_dir_lookup(
        if (ci_name)
                args->op_flags |= XFS_DA_OP_CILOOKUP;
+        lock_mode = xfs_ilock_data_map_shared(dp);
        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
                rval = xfs_dir2_sf_lookup(args);
                goto out_check_rval;
@@ -419,6 +421,7 @@ out_check_rval:
                }
        }
 out_free:
+        xfs_iunlock(dp, lock_mode);
        kmem_free(args);
        return rval;
 }
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index 6a57fdbc63ef..824131e71bc5 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -252,7 +252,8 @@ xfs_dir3_data_reada_verify(
                return;
        case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
        case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
-                xfs_dir3_data_verify(bp);
+                bp->b_ops = &xfs_dir3_data_buf_ops;
+                bp->b_ops->verify_read(bp);
                return;
        default:
                xfs_buf_ioerror(bp, -EFSCORRUPTED);
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 21009dbdc21d..47425140f343 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -186,9 +186,6 @@ xfs_mount_validate_sb(
        if (xfs_sb_version_hassparseinodes(sbp)) {
                uint32_t        align;
-                xfs_alert(mp,
-        "EXPERIMENTAL sparse inode feature enabled. Use at your own risk!");
                align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize
                                >> sbp->sb_blocklog;
                if (sbp->sb_inoalignmt != align) {
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 098cd78fe708..a989a9c7edb7 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -171,6 +171,7 @@ xfs_dir2_block_getdents(
        int                     wantoff;        /* starting block offset */
        xfs_off_t               cook;
        struct xfs_da_geometry  *geo = args->geo;
+        int                     lock_mode;
        /*
         * If the block number in the offset is out of range, we're done.
@@ -178,7 +179,9 @@ xfs_dir2_block_getdents(
        if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk)
                return 0;
+        lock_mode = xfs_ilock_data_map_shared(dp);
        error = xfs_dir3_block_read(NULL, dp, &bp);
+        xfs_iunlock(dp, lock_mode);
        if (error)
                return error;
@@ -529,9 +532,12 @@ xfs_dir2_leaf_getdents(
                 * current buffer, need to get another one.
                 */
                if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) {
+                        int     lock_mode;
+                        lock_mode = xfs_ilock_data_map_shared(dp);
                        error = xfs_dir2_leaf_readbuf(args, bufsize, map_info,
                                                      &curoff, &bp);
+                        xfs_iunlock(dp, lock_mode);
                        if (error || !map_info->map_valid)
                                break;
@@ -653,7 +659,6 @@ xfs_readdir(
        struct xfs_da_args      args = { NULL };
        int                     rval;
        int                     v;
-        uint                    lock_mode;
        trace_xfs_readdir(dp);
@@ -666,7 +671,7 @@ xfs_readdir(
        args.dp = dp;
        args.geo = dp->i_mount->m_dir_geo;
-        lock_mode = xfs_ilock_data_map_shared(dp);
+        xfs_ilock(dp, XFS_IOLOCK_SHARED);
        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
                rval = xfs_dir2_sf_getdents(&args, ctx);
        else if ((rval = xfs_dir2_isblock(&args, &v)))
@@ -675,7 +680,7 @@ xfs_readdir(
                rval = xfs_dir2_block_getdents(&args, ctx);
        else
                rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize);
-        xfs_iunlock(dp, lock_mode);
+        xfs_iunlock(dp, XFS_IOLOCK_SHARED);
        return rval;
 }
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 6964d7ceba96..30cb3afb67f0 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -251,7 +251,7 @@ xfs_qm_init_dquot_blk(
                d->dd_diskdq.d_id = cpu_to_be32(curid);
                d->dd_diskdq.d_flags = type;
                if (xfs_sb_version_hascrc(&mp->m_sb)) {
-                        uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
+                        uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid);
                        xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
                                         XFS_DQUOT_CRC_OFF);
                }
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index db4acc1c3e73..de2c2376242b 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -317,24 +317,33 @@ xfs_file_read_iter(
                return -EIO;
        /*
-         * Locking is a bit tricky here. If we take an exclusive lock
+         * Locking is a bit tricky here. If we take an exclusive lock for direct
-         * for direct IO, we effectively serialise all new concurrent
+         * IO, we effectively serialise all new concurrent read IO to this file
-         * read IO to this file and block it behind IO that is currently in
+         * and block it behind IO that is currently in progress because IO in
-         * progress because IO in progress holds the IO lock shared. We only
+         * progress holds the IO lock shared. We only need to hold the lock
-         * need to hold the lock exclusive to blow away the page cache, so
+         * exclusive to blow away the page cache, so only take lock exclusively
-         * only take lock exclusively if the page cache needs invalidation.
+         * if the page cache needs invalidation. This allows the normal direct
-         * This allows the normal direct IO case of no page cache pages to
+         * IO case of no page cache pages to proceeed concurrently without
-         * proceeed concurrently without serialisation.
+         * serialisation.
         */
        xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
        if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
                xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
                xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
+                /*
+                 * The generic dio code only flushes the range of the particular
+                 * I/O. Because we take an exclusive lock here, this whole
+                 * sequence is considerably more expensive for us. This has a
+                 * noticeable performance impact for any file with cached pages,
+                 * even when outside of the range of the particular I/O.
+                 *
+                 * Hence, amortize the cost of the lock against a full file
+                 * flush and reduce the chances of repeated iolock cycles going
+                 * forward.
+                 */
                if (inode->i_mapping->nrpages) {
-                        ret = filemap_write_and_wait_range(
+                        ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
-                                                        VFS_I(ip)->i_mapping,
-                                                        pos, pos + size - 1);
                        if (ret) {
                                xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
                                return ret;
@@ -345,9 +354,7 @@ xfs_file_read_iter(
                         * we fail to invalidate a page, but this should never
                         * happen on XFS. Warn if it does fail.
                         */
-                        ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
+                        ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
-                                        pos >> PAGE_CACHE_SHIFT,
-                                        (pos + size - 1) >> PAGE_CACHE_SHIFT);
                        WARN_ON_ONCE(ret);
                        ret = 0;
                }
@@ -733,19 +740,19 @@ xfs_file_dio_aio_write(
        pos = iocb->ki_pos;
        end = pos + count - 1;
+        /*
+         * See xfs_file_read_iter() for why we do a full-file flush here.
+         */
        if (mapping->nrpages) {
-                ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
+                ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
-                                                   pos, end);
                if (ret)
                        goto out;
                /*
-                 * Invalidate whole pages. This can return an error if
+                 * Invalidate whole pages. This can return an error if we fail
-                 * we fail to invalidate a page, but this should never
+                 * to invalidate a page, but this should never happen on XFS.
-                 * happen on XFS. Warn if it does fail.
+                 * Warn if it does fail.
                 */
-                ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
+                ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
-                                        pos >> PAGE_CACHE_SHIFT,
-                                        end >> PAGE_CACHE_SHIFT);
                WARN_ON_ONCE(ret);
                ret = 0;
        }
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 9b3438a7680f..ee3aaa0a5317 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -250,7 +250,7 @@ xfs_growfs_data_private(
                agf->agf_freeblks = cpu_to_be32(tmpsize);
                agf->agf_longest = cpu_to_be32(tmpsize);
                if (xfs_sb_version_hascrc(&mp->m_sb))
-                        uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_uuid);
+                        uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
                error = xfs_bwrite(bp);
                xfs_buf_relse(bp);
@@ -273,7 +273,7 @@ xfs_growfs_data_private(
                if (xfs_sb_version_hascrc(&mp->m_sb)) {
                        agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
                        agfl->agfl_seqno = cpu_to_be32(agno);
-                        uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid);
+                        uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
                }
                agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
@@ -309,7 +309,7 @@ xfs_growfs_data_private(
                agi->agi_newino = cpu_to_be32(NULLAGINO);
                agi->agi_dirino = cpu_to_be32(NULLAGINO);
                if (xfs_sb_version_hascrc(&mp->m_sb))
-                        uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid);
+                        uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
                if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
                        agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
                        agi->agi_free_level = cpu_to_be32(1);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d8230ba1b471..30555f8fd44b 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -164,7 +164,7 @@ xfs_ilock(
               (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
        ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
               (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
-        ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
+        ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
        if (lock_flags & XFS_IOLOCK_EXCL)
                mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
@@ -212,7 +212,7 @@ xfs_ilock_nowait(
               (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
        ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
               (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
-        ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
+        ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
        if (lock_flags & XFS_IOLOCK_EXCL) {
                if (!mrtryupdate(&ip->i_iolock))
@@ -281,7 +281,7 @@ xfs_iunlock(
               (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
        ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
               (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
-        ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
+        ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
        ASSERT(lock_flags != 0);
        if (lock_flags & XFS_IOLOCK_EXCL)
@@ -362,32 +362,52 @@ int xfs_lots_retries;
 int xfs_lock_delays;
 #endif
+#ifdef CONFIG_LOCKDEP
+static bool
+xfs_lockdep_subclass_ok(
+        int subclass)
+{
+        return subclass < MAX_LOCKDEP_SUBCLASSES;
+}
+#else
+#define xfs_lockdep_subclass_ok(subclass)       (true)
+#endif
 /*
 * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
- * value. This shouldn't be called for page fault locking, but we also need to
+ * value. This can be called for any type of inode lock combination, including
- * ensure we don't overrun the number of lockdep subclasses for the iolock or
+ * parent locking. Care must be taken to ensure we don't overrun the subclass
- * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
+ * storage fields in the class mask we build.
 */
 static inline int
 xfs_lock_inumorder(int lock_mode, int subclass)
 {
+        int     class = 0;
+        ASSERT(!(lock_mode & (XFS_ILOCK_PARENT | XFS_ILOCK_RTBITMAP |
+                              XFS_ILOCK_RTSUM)));
+        ASSERT(xfs_lockdep_subclass_ok(subclass));
        if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
-                ASSERT(subclass + XFS_LOCK_INUMORDER <
+                ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS);
-                        (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
+                ASSERT(xfs_lockdep_subclass_ok(subclass +
-                lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
+                                                XFS_IOLOCK_PARENT_VAL));
+                class += subclass << XFS_IOLOCK_SHIFT;
+                if (lock_mode & XFS_IOLOCK_PARENT)
+                        class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT;
        }
        if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
-                ASSERT(subclass + XFS_LOCK_INUMORDER <
+                ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS);
-                        (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
+                class += subclass << XFS_MMAPLOCK_SHIFT;
-                lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
-                                                        XFS_MMAPLOCK_SHIFT;
        }
-        if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
+        if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) {
-                lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
+                ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS);
+                class += subclass << XFS_ILOCK_SHIFT;
+        }
-        return lock_mode;
+        return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) | class;
 }
 /*
@@ -399,6 +419,11 @@ xfs_lock_inumorder(int lock_mode, int subclass)
 * transaction (such as truncate). This can result in deadlock since the long
 * running trans might need to wait for the inode we just locked in order to
 * push the tail and free space in the log.
+ *
+ * xfs_lock_inodes() can only be used to lock one type of lock at a time -
+ * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
+ * lock more than one at a time, lockdep will report false positives saying we
+ * have violated locking orders.
 */
 void
 xfs_lock_inodes(
@@ -409,8 +434,29 @@ xfs_lock_inodes(
        int             attempts = 0, i, j, try_lock;
        xfs_log_item_t  *lp;
-        /* currently supports between 2 and 5 inodes */
+        /*
+         * Currently supports between 2 and 5 inodes with exclusive locking.  We
+         * support an arbitrary depth of locking here, but absolute limits on
+         * inodes depend on the the type of locking and the limits placed by
+         * lockdep annotations in xfs_lock_inumorder.  These are all checked by
+         * the asserts.
+         */
        ASSERT(ips && inodes >= 2 && inodes <= 5);
+        ASSERT(lock_mode & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL |
+                            XFS_ILOCK_EXCL));
+        ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED |
+                              XFS_ILOCK_SHARED)));
+        ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) ||
+                inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1);
+        ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) ||
+                inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1);
+        ASSERT(!(lock_mode & XFS_ILOCK_EXCL) ||
+                inodes <= XFS_ILOCK_MAX_SUBCLASS + 1);
+        if (lock_mode & XFS_IOLOCK_EXCL) {
+                ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL | XFS_ILOCK_EXCL)));
+        } else if (lock_mode & XFS_MMAPLOCK_EXCL)
+                ASSERT(!(lock_mode & XFS_ILOCK_EXCL));
        try_lock = 0;
        i = 0;
@@ -629,30 +675,29 @@ xfs_lookup(
 {
        xfs_ino_t               inum;
        int                     error;
-        uint                    lock_mode;
        trace_xfs_lookup(dp, name);
        if (XFS_FORCED_SHUTDOWN(dp->i_mount))
                return -EIO;
-        lock_mode = xfs_ilock_data_map_shared(dp);
+        xfs_ilock(dp, XFS_IOLOCK_SHARED);
        error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
-        xfs_iunlock(dp, lock_mode);
        if (error)
-                goto out;
+                goto out_unlock;
        error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
        if (error)
                goto out_free_name;
+        xfs_iunlock(dp, XFS_IOLOCK_SHARED);
        return 0;
 out_free_name:
        if (ci_name)
                kmem_free(ci_name->name);
-out:
+out_unlock:
+        xfs_iunlock(dp, XFS_IOLOCK_SHARED);
        *ipp = NULL;
        return error;
 }
@@ -787,7 +832,7 @@ xfs_ialloc(
        if (ip->i_d.di_version == 3) {
                ASSERT(ip->i_d.di_ino == ino);
-                ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid));
+                ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid));
                ip->i_d.di_crc = 0;
                ip->i_d.di_changecount = 1;
                ip->i_d.di_lsn = 0;
@@ -1149,7 +1194,8 @@ xfs_create(
                goto out_trans_cancel;
-        xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
+        xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
+                      XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
        unlock_dp_on_error = true;
        xfs_bmap_init(&free_list, &first_block);
@@ -1185,7 +1231,7 @@ xfs_create(
         * the transaction cancel unlocking dp so don't do it explicitly in the
         * error path.
         */
-        xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
        unlock_dp_on_error = false;
        error = xfs_dir_createname(tp, dp, name, ip->i_ino,
@@ -1258,7 +1304,7 @@ xfs_create(
        xfs_qm_dqrele(pdqp);
        if (unlock_dp_on_error)
-                xfs_iunlock(dp, XFS_ILOCK_EXCL);
+                xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
        return error;
 }
@@ -1403,10 +1449,11 @@ xfs_link(
        if (error)
                goto error_return;
+        xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
        xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
        xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
-        xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(tp, tdp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
        /*
         * If we are using project inheritance, we only allow hard link
@@ -2510,9 +2557,10 @@ xfs_remove(
                goto out_trans_cancel;
        }
+        xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
        xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
-        xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
        /*
@@ -2893,6 +2941,12 @@ xfs_rename(
         * whether the target directory is the same as the source
         * directory, we can lock from 2 to 4 inodes.
         */
+        if (!new_parent)
+                xfs_ilock(src_dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
+        else
+                xfs_lock_two_inodes(src_dp, target_dp,
+                                    XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
        xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
        /*
@@ -2900,9 +2954,9 @@ xfs_rename(
         * we can rely on either trans_commit or trans_cancel to unlock
         * them.
         */
-        xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(tp, src_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
        if (new_parent)
-                xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
+                xfs_trans_ijoin(tp, target_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
        xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
        if (target_ip)
                xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 8f22d20368d8..ca9e11989cbd 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -284,9 +284,9 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
 * Flags for lockdep annotations.
 *
 * XFS_LOCK_PARENT - for directory operations that require locking a
- * parent directory inode and a child entry inode.  The parent gets locked
+ * parent directory inode and a child entry inode. IOLOCK requires nesting,
- * with this flag so it gets a lockdep subclass of 1 and the child entry
+ * MMAPLOCK does not support this class, ILOCK requires a single subclass
- * lock will have a lockdep subclass of 0.
+ * to differentiate parent from child.
 *
 * XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary
 * inodes do not participate in the normal lock order, and thus have their
@@ -295,30 +295,63 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
 * XFS_LOCK_INUMORDER - for locking several inodes at the some time
 * with xfs_lock_inodes().  This flag is used as the starting subclass
 * and each subsequent lock acquired will increment the subclass by one.
- * So the first lock acquired will have a lockdep subclass of 4, the
+ * However, MAX_LOCKDEP_SUBCLASSES == 8, which means we are greatly
- * second lock will have a lockdep subclass of 5, and so on. It is
+ * limited to the subclasses we can represent via nesting. We need at least
- * the responsibility of the class builder to shift this to the correct
+ * 5 inodes nest depth for the ILOCK through rename, and we also have to support
- * portion of the lock_mode lockdep mask.
+ * XFS_ILOCK_PARENT, which gives 6 subclasses. Then we have XFS_ILOCK_RTBITMAP
+ * and XFS_ILOCK_RTSUM, which are another 2 unique subclasses, so that's all
+ * 8 subclasses supported by lockdep.
+ *
+ * This also means we have to number the sub-classes in the lowest bits of
+ * the mask we keep, and we have to ensure we never exceed 3 bits of lockdep
+ * mask and we can't use bit-masking to build the subclasses. What a mess.
+ *
+ * Bit layout:
+ *
+ * Bit          Lock Region
+ * 16-19        XFS_IOLOCK_SHIFT dependencies
+ * 20-23        XFS_MMAPLOCK_SHIFT dependencies
+ * 24-31        XFS_ILOCK_SHIFT dependencies
+ *
+ * IOLOCK values
+ *
+ * 0-3          subclass value
+ * 4-7          PARENT subclass values
+ *
+ * MMAPLOCK values
+ *
+ * 0-3          subclass value
+ * 4-7          unused
+ *
+ * ILOCK values
+ * 0-4          subclass values
+ * 5            PARENT subclass (not nestable)
+ * 6            RTBITMAP subclass (not nestable)
+ * 7            RTSUM subclass (not nestable)
+ * 
 */
-#define XFS_LOCK_PARENT         1
+#define XFS_IOLOCK_SHIFT                16
-#define XFS_LOCK_RTBITMAP       2
+#define XFS_IOLOCK_PARENT_VAL           4
-#define XFS_LOCK_RTSUM          3
+#define XFS_IOLOCK_MAX_SUBCLASS         (XFS_IOLOCK_PARENT_VAL - 1)
-#define XFS_LOCK_INUMORDER      4
+#define XFS_IOLOCK_DEP_MASK             0x000f0000
+#define XFS_IOLOCK_PARENT               (XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT)
-#define XFS_IOLOCK_SHIFT        16
-#define XFS_IOLOCK_PARENT       (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
+#define XFS_MMAPLOCK_SHIFT              20
+#define XFS_MMAPLOCK_NUMORDER           0
-#define XFS_MMAPLOCK_SHIFT      20
+#define XFS_MMAPLOCK_MAX_SUBCLASS       3
+#define XFS_MMAPLOCK_DEP_MASK           0x00f00000
-#define XFS_ILOCK_SHIFT         24
-#define XFS_ILOCK_PARENT        (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
+#define XFS_ILOCK_SHIFT                 24
-#define XFS_ILOCK_RTBITMAP      (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
+#define XFS_ILOCK_PARENT_VAL            5
-#define XFS_ILOCK_RTSUM         (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
+#define XFS_ILOCK_MAX_SUBCLASS          (XFS_ILOCK_PARENT_VAL - 1)
+#define XFS_ILOCK_RTBITMAP_VAL          6
-#define XFS_IOLOCK_DEP_MASK     0x000f0000
+#define XFS_ILOCK_RTSUM_VAL             7
-#define XFS_MMAPLOCK_DEP_MASK   0x00f00000
+#define XFS_ILOCK_DEP_MASK              0xff000000
-#define XFS_ILOCK_DEP_MASK      0xff000000
+#define XFS_ILOCK_PARENT                (XFS_ILOCK_PARENT_VAL << XFS_ILOCK_SHIFT)
-#define XFS_LOCK_DEP_MASK       (XFS_IOLOCK_DEP_MASK | \
+#define XFS_ILOCK_RTBITMAP              (XFS_ILOCK_RTBITMAP_VAL << XFS_ILOCK_SHIFT)
+#define XFS_ILOCK_RTSUM                 (XFS_ILOCK_RTSUM_VAL << XFS_ILOCK_SHIFT)
+#define XFS_LOCK_SUBCLASS_MASK  (XFS_IOLOCK_DEP_MASK | \
                                 XFS_MMAPLOCK_DEP_MASK | \
                                 XFS_ILOCK_DEP_MASK)
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 2e40f5e3cdf2..512a0945d52a 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1895,15 +1895,25 @@ xlog_recover_get_buf_lsn(
                 */
                goto recover_immediately;
        case XFS_SB_MAGIC:
+                /*
+                 * superblock uuids are magic. We may or may not have a
+                 * sb_meta_uuid on disk, but it will be set in the in-core
+                 * superblock. We set the uuid pointer for verification
+                 * according to the superblock feature mask to ensure we check
+                 * the relevant UUID in the superblock.
+                 */
                lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
-                uuid = &((struct xfs_dsb *)blk)->sb_uuid;
+                if (xfs_sb_version_hasmetauuid(&mp->m_sb))
+                        uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid;
+                else
+                        uuid = &((struct xfs_dsb *)blk)->sb_uuid;
                break;
        default:
                break;
        }
        if (lsn != (xfs_lsn_t)-1) {
-                if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
+                if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid))
                        goto recover_immediately;
                return lsn;
        }
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 1fb16562c159..f98ce83b7bc4 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1528,6 +1528,10 @@ xfs_fs_fill_super(
                }
        }
+        if (xfs_sb_version_hassparseinodes(&mp->m_sb))
+                xfs_alert(mp,
+        "EXPERIMENTAL sparse inode feature enabled. Use at your own risk!");
        error = xfs_mountfs(mp);
        if (error)
                goto out_filestream_unmount;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 05c44bf51b5f..996481eeb491 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -240,7 +240,8 @@ xfs_symlink(
        if (error)
                goto out_trans_cancel;
-        xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
+        xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
+                      XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
        unlock_dp_on_error = true;
        /*
@@ -288,7 +289,7 @@ xfs_symlink(
         * the transaction cancel unlocking dp so don't do it explicitly in the
         * error path.
         */
-        xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
        unlock_dp_on_error = false;
        /*
@@ -421,7 +422,7 @@ out_release_inode:
        xfs_qm_dqrele(pdqp);
        if (unlock_dp_on_error)
-                xfs_iunlock(dp, XFS_ILOCK_EXCL);
+                xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
        return error;
 }
author	Dave Chinner <david@fromorbit.com>	2015-08-19 19:28:45 -0400
committer	Dave Chinner <david@fromorbit.com>	2015-08-19 19:28:45 -0400
commit	aa493382cb8c5768ba452d87f175fc2aff63911d (patch)
tree	85896c37038d3f5eda1283d090eeb95ee6ec9c82
parent	5be203ad115c1d8294e8685253e05fcea0202e04 (diff)
parent	3403ccc0c9f069c40ea751a93ac6746f5ef2116a (diff)