79 files changed, 1304 insertions, 1234 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 3dab9e9948d0..722743b152d8 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode)
 {
        struct address_space *mapping = vnode->vfs_inode.i_mapping;
        struct writeback_control wbc = {
-                .bdi            = mapping->backing_dev_info,
                .sync_mode      = WB_SYNC_ALL,
                .nr_to_write    = LONG_MAX,
                .range_cyclic   = 1,
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index b6ab27ccf214..811384bec8de 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -68,11 +68,7 @@
 * Here we can be a bit looser than the data sections since this
 * needs to only meet arch ABI requirements.
 */
-#ifdef ARCH_SLAB_MINALIGN
+#define FLAT_STACK_ALIGN        max_t(unsigned long, sizeof(void *), ARCH_SLAB_MINALIGN)
-#define FLAT_STACK_ALIGN        (ARCH_SLAB_MINALIGN)
-#else
-#define FLAT_STACK_ALIGN        (sizeof(void *))
-#endif
 #define RELOC_FAILED 0xff00ff01         /* Relocation incorrect somewhere */
 #define UNLOADED_LIB 0x7ff000ff         /* Placeholder for unused library */
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 0d1d966b0fe4..c3df14ce2cc2 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2304,12 +2304,17 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
        return ret;
 }
+/*
+ * min slot controls the lowest index we're willing to push to the
+ * right.  We'll push up to and including min_slot, but no lower
+ */
 static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
                                      struct btrfs_root *root,
                                      struct btrfs_path *path,
                                      int data_size, int empty,
                                      struct extent_buffer *right,
-                                      int free_space, u32 left_nritems)
+                                      int free_space, u32 left_nritems,
+                                      u32 min_slot)
 {
        struct extent_buffer *left = path->nodes[0];
        struct extent_buffer *upper = path->nodes[1];
@@ -2327,7 +2332,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
        if (empty)
                nr = 0;
        else
-                nr = 1;
+                nr = max_t(u32, 1, min_slot);
        if (path->slots[0] >= left_nritems)
                push_space += data_size;
@@ -2469,10 +2474,14 @@ out_unlock:
 *
 * returns 1 if the push failed because the other node didn't have enough
 * room, 0 if everything worked out and < 0 if there were major errors.
+ *
+ * this will push starting from min_slot to the end of the leaf.  It won't
+ * push any slot lower than min_slot
 */
 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
-                           *root, struct btrfs_path *path, int data_size,
+                           *root, struct btrfs_path *path,
-                           int empty)
+                           int min_data_size, int data_size,
+                           int empty, u32 min_slot)
 {
        struct extent_buffer *left = path->nodes[0];
        struct extent_buffer *right;
@@ -2514,8 +2523,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
        if (left_nritems == 0)
                goto out_unlock;
-        return __push_leaf_right(trans, root, path, data_size, empty,
+        return __push_leaf_right(trans, root, path, min_data_size, empty,
-                                right, free_space, left_nritems);
+                                right, free_space, left_nritems, min_slot);
 out_unlock:
        btrfs_tree_unlock(right);
        free_extent_buffer(right);
@@ -2525,12 +2534,17 @@ out_unlock:
 /*
 * push some data in the path leaf to the left, trying to free up at
 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items.  The
+ * item at 'max_slot' won't be touched.  Use (u32)-1 to make us do all the
+ * items
 */
 static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root,
                                     struct btrfs_path *path, int data_size,
                                     int empty, struct extent_buffer *left,
-                                     int free_space, int right_nritems)
+                                     int free_space, u32 right_nritems,
+                                     u32 max_slot)
 {
        struct btrfs_disk_key disk_key;
        struct extent_buffer *right = path->nodes[0];
@@ -2549,9 +2563,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
        slot = path->slots[1];
        if (empty)
-                nr = right_nritems;
+                nr = min(right_nritems, max_slot);
        else
-                nr = right_nritems - 1;
+                nr = min(right_nritems - 1, max_slot);
        for (i = 0; i < nr; i++) {
                item = btrfs_item_nr(right, i);
@@ -2712,10 +2726,14 @@ out:
 /*
 * push some data in the path leaf to the left, trying to free up at
 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items.  The
+ * item at 'max_slot' won't be touched.  Use (u32)-1 to make us push all the
+ * items
 */
 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
-                          *root, struct btrfs_path *path, int data_size,
+                          *root, struct btrfs_path *path, int min_data_size,
-                          int empty)
+                          int data_size, int empty, u32 max_slot)
 {
        struct extent_buffer *right = path->nodes[0];
        struct extent_buffer *left;
@@ -2761,8 +2779,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
                goto out;
        }
-        return __push_leaf_left(trans, root, path, data_size,
+        return __push_leaf_left(trans, root, path, min_data_size,
-                               empty, left, free_space, right_nritems);
+                               empty, left, free_space, right_nritems,
+                               max_slot);
 out:
        btrfs_tree_unlock(left);
        free_extent_buffer(left);
@@ -2855,6 +2874,64 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
 }
 /*
+ * double splits happen when we need to insert a big item in the middle
+ * of a leaf.  A double split can leave us with 3 mostly empty leaves:
+ * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
+ *          A                 B                 C
+ *
+ * We avoid this by trying to push the items on either side of our target
+ * into the adjacent leaves.  If all goes well we can avoid the double split
+ * completely.
+ */
+static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
+                                          struct btrfs_root *root,
+                                          struct btrfs_path *path,
+                                          int data_size)
+{
+        int ret;
+        int progress = 0;
+        int slot;
+        u32 nritems;
+        slot = path->slots[0];
+        /*
+         * try to push all the items after our slot into the
+         * right leaf
+         */
+        ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
+        if (ret < 0)
+                return ret;
+        if (ret == 0)
+                progress++;
+        nritems = btrfs_header_nritems(path->nodes[0]);
+        /*
+         * our goal is to get our slot at the start or end of a leaf.  If
+         * we've done so we're done
+         */
+        if (path->slots[0] == 0 || path->slots[0] == nritems)
+                return 0;
+        if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+                return 0;
+        /* try to push all the items before our slot into the next leaf */
+        slot = path->slots[0];
+        ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
+        if (ret < 0)
+                return ret;
+        if (ret == 0)
+                progress++;
+        if (progress)
+                return 0;
+        return 1;
+}
+/*
 * split the path's leaf in two, making sure there is at least data_size
 * available for the resulting leaf level of the path.
 *
@@ -2876,6 +2953,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
        int wret;
        int split;
        int num_doubles = 0;
+        int tried_avoid_double = 0;
        l = path->nodes[0];
        slot = path->slots[0];
@@ -2884,12 +2962,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
                return -EOVERFLOW;
        /* first try to make some room by pushing left and right */
-        if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
+        if (data_size) {
-                wret = push_leaf_right(trans, root, path, data_size, 0);
+                wret = push_leaf_right(trans, root, path, data_size,
+                                       data_size, 0, 0);
                if (wret < 0)
                        return wret;
                if (wret) {
-                        wret = push_leaf_left(trans, root, path, data_size, 0);
+                        wret = push_leaf_left(trans, root, path, data_size,
+                                              data_size, 0, (u32)-1);
                        if (wret < 0)
                                return wret;
                }
@@ -2923,6 +3003,8 @@ again:
                                if (mid != nritems &&
                                    leaf_space_used(l, mid, nritems - mid) +
                                    data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+                                        if (data_size && !tried_avoid_double)
+                                                goto push_for_double;
                                        split = 2;
                                }
                        }
@@ -2939,6 +3021,8 @@ again:
                                if (mid != nritems &&
                                    leaf_space_used(l, mid, nritems - mid) +
                                    data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+                                        if (data_size && !tried_avoid_double)
+                                                goto push_for_double;
                                        split = 2 ;
                                }
                        }
@@ -3019,6 +3103,13 @@ again:
        }
        return ret;
+push_for_double:
+        push_for_double_split(trans, root, path, data_size);
+        tried_avoid_double = 1;
+        if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+                return 0;
+        goto again;
 }
 static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
@@ -3915,13 +4006,15 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                        extent_buffer_get(leaf);
                        btrfs_set_path_blocking(path);
-                        wret = push_leaf_left(trans, root, path, 1, 1);
+                        wret = push_leaf_left(trans, root, path, 1, 1,
+                                              1, (u32)-1);
                        if (wret < 0 && wret != -ENOSPC)
                                ret = wret;
                        if (path->nodes[0] == leaf &&
                            btrfs_header_nritems(leaf)) {
-                                wret = push_leaf_right(trans, root, path, 1, 1);
+                                wret = push_leaf_right(trans, root, path, 1,
+                                                       1, 1, 0);
                                if (wret < 0 && wret != -ENOSPC)
                                        ret = wret;
                        }
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a4080c21ec55..d74e6af9b53a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
                .sync_io = wbc->sync_mode == WB_SYNC_ALL,
        };
        struct writeback_control wbc_writepages = {
-                .bdi            = wbc->bdi,
                .sync_mode      = wbc->sync_mode,
                .older_than_this = NULL,
                .nr_to_write    = 64,
@@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
                .sync_io = mode == WB_SYNC_ALL,
        };
        struct writeback_control wbc_writepages = {
-                .bdi            = inode->i_mapping->backing_dev_info,
                .sync_mode      = mode,
                .older_than_this = NULL,
                .nr_to_write    = nr_pages * 2,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4dbaf89b1337..9254b3d58dbe 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1458,7 +1458,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
         */
        /* the destination must be opened for writing */
-        if (!(file->f_mode & FMODE_WRITE))
+        if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
                return -EINVAL;
        ret = mnt_want_write(file->f_path.mnt);
@@ -1511,7 +1511,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
        /* determine range to clone */
        ret = -EINVAL;
-        if (off >= src->i_size || off + len > src->i_size)
+        if (off + len > src->i_size || off + len < off)
                goto out_unlock;
        if (len == 0)
                olen = len = src->i_size - off;
@@ -1578,6 +1578,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                        u64 disko = 0, diskl = 0;
                        u64 datao = 0, datal = 0;
                        u8 comp;
+                        u64 endoff;
                        size = btrfs_item_size_nr(leaf, slot);
                        read_extent_buffer(leaf, buf,
@@ -1712,9 +1713,18 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                        btrfs_release_path(root, path);
                        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-                        if (new_key.offset + datal > inode->i_size)
-                                btrfs_i_size_write(inode,
+                        /*
-                                                   new_key.offset + datal);
+                         * we round up to the block size at eof when
+                         * determining which extents to clone above,
+                         * but shouldn't round up the file size
+                         */
+                        endoff = new_key.offset + datal;
+                        if (endoff > off+olen)
+                                endoff = off+olen;
+                        if (endoff > inode->i_size)
+                                btrfs_i_size_write(inode, endoff);
                        BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
                        ret = btrfs_update_inode(trans, root, inode);
                        BUG_ON(ret);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 78c02eb4cb1f..484e52bb40bb 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -473,14 +473,24 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data)
        return 0;
 }
+void cifs_drop_inode(struct inode *inode)
+{
+        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
+                return generic_drop_inode(inode);
+        return generic_delete_inode(inode);
+}
 static const struct super_operations cifs_super_ops = {
        .put_super = cifs_put_super,
        .statfs = cifs_statfs,
        .alloc_inode = cifs_alloc_inode,
        .destroy_inode = cifs_destroy_inode,
-/*      .drop_inode         = generic_delete_inode,
+        .drop_inode     = cifs_drop_inode,
-        .delete_inode   = cifs_delete_inode,  */  /* Do not need above two
+/*      .delete_inode   = cifs_delete_inode,  */  /* Do not need above
-        functions unless later we add lazy close of inodes or unless the
+        function unless later we add lazy close of inodes or unless the
        kernel forgets to call us with the same number of releases (closes)
        as opens */
        .show_options = cifs_show_options,
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index fb1657e0fdb8..fb6318b81509 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -106,7 +106,6 @@ extern struct cifsFileInfo *cifs_new_fileinfo(struct inode *newinode,
                                __u16 fileHandle, struct file *file,
                                struct vfsmount *mnt, unsigned int oflags);
 extern int cifs_posix_open(char *full_path, struct inode **pinode,
-                                struct vfsmount *mnt,
                                struct super_block *sb,
                                int mode, int oflags,
                                __u32 *poplock, __u16 *pnetfid, int xid);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 391816b461ca..e7ae78b66fa1 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/namei.h>
 #include <linux/mount.h>
+#include <linux/file.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
 #include "cifsglob.h"
@@ -184,12 +185,13 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
        }
        write_unlock(&GlobalSMBSeslock);
+        file->private_data = pCifsFile;
        return pCifsFile;
 }
 int cifs_posix_open(char *full_path, struct inode **pinode,
-                        struct vfsmount *mnt, struct super_block *sb,
+                        struct super_block *sb, int mode, int oflags,
-                        int mode, int oflags,
                        __u32 *poplock, __u16 *pnetfid, int xid)
 {
        int rc;
@@ -258,19 +260,6 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
                cifs_fattr_to_inode(*pinode, &fattr);
        }
-        /*
-         * cifs_fill_filedata() takes care of setting cifsFileInfo pointer to
-         * file->private_data.
-         */
-        if (mnt) {
-                struct cifsFileInfo *pfile_info;
-                pfile_info = cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt,
-                                               oflags);
-                if (pfile_info == NULL)
-                        rc = -ENOMEM;
-        }
 posix_open_ret:
        kfree(presp_data);
        return rc;
@@ -298,7 +287,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
        int create_options = CREATE_NOT_DIR;
        __u32 oplock = 0;
        int oflags;
-        bool posix_create = false;
        /*
         * BB below access is probably too much for mknod to request
         *    but we have to do query and setpathinfo so requesting
@@ -339,7 +327,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
            (CIFS_UNIX_POSIX_PATH_OPS_CAP &
                        le64_to_cpu(tcon->fsUnixInfo.Capability))) {
                rc = cifs_posix_open(full_path, &newinode,
-                        nd ? nd->path.mnt : NULL,
                        inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
                /* EIO could indicate that (posix open) operation is not
                   supported, despite what server claimed in capability
@@ -347,7 +334,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
                   handled in posix open */
                if (rc == 0) {
-                        posix_create = true;
                        if (newinode == NULL) /* query inode info */
                                goto cifs_create_get_file_info;
                        else /* success, no need to query */
@@ -478,21 +464,28 @@ cifs_create_set_dentry:
        else
                cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
-        /* nfsd case - nfs srv does not set nd */
+        if (newinode && nd && (nd->flags & LOOKUP_OPEN)) {
-        if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) {
-                /* mknod case - do not leave file open */
-                CIFSSMBClose(xid, tcon, fileHandle);
-        } else if (!(posix_create) && (newinode)) {
                struct cifsFileInfo *pfile_info;
-                /*
+                struct file *filp;
-                 * cifs_fill_filedata() takes care of setting cifsFileInfo
-                 * pointer to file->private_data.
+                filp = lookup_instantiate_filp(nd, direntry, generic_file_open);
-                 */
+                if (IS_ERR(filp)) {
-                pfile_info = cifs_new_fileinfo(newinode, fileHandle, NULL,
+                        rc = PTR_ERR(filp);
+                        CIFSSMBClose(xid, tcon, fileHandle);
+                        goto cifs_create_out;
+                }
+                pfile_info = cifs_new_fileinfo(newinode, fileHandle, filp,
                                               nd->path.mnt, oflags);
-                if (pfile_info == NULL)
+                if (pfile_info == NULL) {
+                        fput(filp);
+                        CIFSSMBClose(xid, tcon, fileHandle);
                        rc = -ENOMEM;
+                }
+        } else {
+                CIFSSMBClose(xid, tcon, fileHandle);
        }
 cifs_create_out:
        kfree(buf);
        kfree(full_path);
@@ -636,6 +629,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
        bool posix_open = false;
        struct cifs_sb_info *cifs_sb;
        struct cifsTconInfo *pTcon;
+        struct cifsFileInfo *cfile;
        struct inode *newInode = NULL;
        char *full_path = NULL;
        struct file *filp;
@@ -703,7 +697,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
                if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) &&
                     (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
                     (nd->intent.open.flags & O_CREAT)) {
-                        rc = cifs_posix_open(full_path, &newInode, nd->path.mnt,
+                        rc = cifs_posix_open(full_path, &newInode,
                                        parent_dir_inode->i_sb,
                                        nd->intent.open.create_mode,
                                        nd->intent.open.flags, &oplock,
@@ -733,8 +727,25 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
                else
                        direntry->d_op = &cifs_dentry_ops;
                d_add(direntry, newInode);
-                if (posix_open)
+                if (posix_open) {
-                        filp = lookup_instantiate_filp(nd, direntry, NULL);
+                        filp = lookup_instantiate_filp(nd, direntry,
+                                                       generic_file_open);
+                        if (IS_ERR(filp)) {
+                                rc = PTR_ERR(filp);
+                                CIFSSMBClose(xid, pTcon, fileHandle);
+                                goto lookup_out;
+                        }
+                        cfile = cifs_new_fileinfo(newInode, fileHandle, filp,
+                                                  nd->path.mnt,
+                                                  nd->intent.open.flags);
+                        if (cfile == NULL) {
+                                fput(filp);
+                                CIFSSMBClose(xid, pTcon, fileHandle);
+                                rc = -ENOMEM;
+                                goto lookup_out;
+                        }
+                }
                /* since paths are not looked up by component - the parent
                   directories are presumed to be good here */
                renew_parental_timestamps(direntry);
@@ -755,6 +766,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
                is a common return code */
        }
+lookup_out:
        kfree(full_path);
        FreeXid(xid);
        return ERR_PTR(rc);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 75541af4b3db..409e4f523e61 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -162,44 +162,12 @@ psx_client_can_cache:
        return 0;
 }
-static struct cifsFileInfo *
-cifs_fill_filedata(struct file *file)
-{
-        struct list_head *tmp;
-        struct cifsFileInfo *pCifsFile = NULL;
-        struct cifsInodeInfo *pCifsInode = NULL;
-        /* search inode for this file and fill in file->private_data */
-        pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
-        read_lock(&GlobalSMBSeslock);
-        list_for_each(tmp, &pCifsInode->openFileList) {
-                pCifsFile = list_entry(tmp, struct cifsFileInfo, flist);
-                if ((pCifsFile->pfile == NULL) &&
-                    (pCifsFile->pid == current->tgid)) {
-                        /* mode set in cifs_create */
-                        /* needed for writepage */
-                        pCifsFile->pfile = file;
-                        file->private_data = pCifsFile;
-                        break;
-                }
-        }
-        read_unlock(&GlobalSMBSeslock);
-        if (file->private_data != NULL) {
-                return pCifsFile;
-        } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
-                        cERROR(1, "could not find file instance for "
-                                   "new file %p", file);
-        return NULL;
-}
 /* all arguments to this function must be checked for validity in caller */
-static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
+static inline int cifs_open_inode_helper(struct inode *inode,
-        struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
        struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
        char *full_path, int xid)
 {
+        struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
        struct timespec temp;
        int rc;
@@ -213,36 +181,35 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
        /* if not oplocked, invalidate inode pages if mtime or file
           size changed */
        temp = cifs_NTtimeToUnix(buf->LastWriteTime);
-        if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
+        if (timespec_equal(&inode->i_mtime, &temp) &&
-                           (file->f_path.dentry->d_inode->i_size ==
+                           (inode->i_size ==
                            (loff_t)le64_to_cpu(buf->EndOfFile))) {
                cFYI(1, "inode unchanged on server");
        } else {
-                if (file->f_path.dentry->d_inode->i_mapping) {
+                if (inode->i_mapping) {
                        /* BB no need to lock inode until after invalidate
                        since namei code should already have it locked? */
-                        rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
+                        rc = filemap_write_and_wait(inode->i_mapping);
                        if (rc != 0)
-                                CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
+                                pCifsInode->write_behind_rc = rc;
                }
                cFYI(1, "invalidating remote inode since open detected it "
                         "changed");
-                invalidate_remote_inode(file->f_path.dentry->d_inode);
+                invalidate_remote_inode(inode);
        }
 client_can_cache:
        if (pTcon->unix_ext)
-                rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode,
+                rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
-                        full_path, inode->i_sb, xid);
+                                              xid);
        else
-                rc = cifs_get_inode_info(&file->f_path.dentry->d_inode,
+                rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
-                        full_path, buf, inode->i_sb, xid, NULL);
+                                         xid, NULL);
        if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
                pCifsInode->clientCanCacheAll = true;
                pCifsInode->clientCanCacheRead = true;
-                cFYI(1, "Exclusive Oplock granted on inode %p",
+                cFYI(1, "Exclusive Oplock granted on inode %p", inode);
-                         file->f_path.dentry->d_inode);
        } else if ((*oplock & 0xF) == OPLOCK_READ)
                pCifsInode->clientCanCacheRead = true;
@@ -256,7 +223,7 @@ int cifs_open(struct inode *inode, struct file *file)
        __u32 oplock;
        struct cifs_sb_info *cifs_sb;
        struct cifsTconInfo *tcon;
-        struct cifsFileInfo *pCifsFile;
+        struct cifsFileInfo *pCifsFile = NULL;
        struct cifsInodeInfo *pCifsInode;
        char *full_path = NULL;
        int desiredAccess;
@@ -270,12 +237,6 @@ int cifs_open(struct inode *inode, struct file *file)
        tcon = cifs_sb->tcon;
        pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
-        pCifsFile = cifs_fill_filedata(file);
-        if (pCifsFile) {
-                rc = 0;
-                FreeXid(xid);
-                return rc;
-        }
        full_path = build_path_from_dentry(file->f_path.dentry);
        if (full_path == NULL) {
@@ -299,8 +260,7 @@ int cifs_open(struct inode *inode, struct file *file)
                int oflags = (int) cifs_posix_convert_flags(file->f_flags);
                oflags |= SMB_O_CREAT;
                /* can not refresh inode info since size could be stale */
-                rc = cifs_posix_open(full_path, &inode, file->f_path.mnt,
+                rc = cifs_posix_open(full_path, &inode, inode->i_sb,
-                                inode->i_sb,
                                cifs_sb->mnt_file_mode /* ignored */,
                                oflags, &oplock, &netfid, xid);
                if (rc == 0) {
@@ -308,9 +268,20 @@ int cifs_open(struct inode *inode, struct file *file)
                        /* no need for special case handling of setting mode
                           on read only files needed here */
-                        pCifsFile = cifs_fill_filedata(file);
+                        rc = cifs_posix_open_inode_helper(inode, file,
-                        cifs_posix_open_inode_helper(inode, file, pCifsInode,
+                                        pCifsInode, oplock, netfid);
-                                                     oplock, netfid);
+                        if (rc != 0) {
+                                CIFSSMBClose(xid, tcon, netfid);
+                                goto out;
+                        }
+                        pCifsFile = cifs_new_fileinfo(inode, netfid, file,
+                                                        file->f_path.mnt,
+                                                        oflags);
+                        if (pCifsFile == NULL) {
+                                CIFSSMBClose(xid, tcon, netfid);
+                                rc = -ENOMEM;
+                        }
                        goto out;
                } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
                        if (tcon->ses->serverNOS)
@@ -391,17 +362,17 @@ int cifs_open(struct inode *inode, struct file *file)
                goto out;
        }
+        rc = cifs_open_inode_helper(inode, tcon, &oplock, buf, full_path, xid);
+        if (rc != 0)
+                goto out;
        pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt,
                                        file->f_flags);
-        file->private_data = pCifsFile;
+        if (pCifsFile == NULL) {
-        if (file->private_data == NULL) {
                rc = -ENOMEM;
                goto out;
        }
-        rc = cifs_open_inode_helper(inode, file, pCifsInode, pCifsFile, tcon,
-                                    &oplock, buf, full_path, xid);
        if (oplock & CIFS_CREATE_ACTION) {
                /* time to set mode which we can not set earlier due to
                   problems creating new read-only files */
@@ -513,8 +484,7 @@ reopen_error_exit:
                        le64_to_cpu(tcon->fsUnixInfo.Capability))) {
                int oflags = (int) cifs_posix_convert_flags(file->f_flags);
                /* can not refresh inode info since size could be stale */
-                rc = cifs_posix_open(full_path, NULL, file->f_path.mnt,
+                rc = cifs_posix_open(full_path, NULL, inode->i_sb,
-                                inode->i_sb,
                                cifs_sb->mnt_file_mode /* ignored */,
                                oflags, &oplock, &netfid, xid);
                if (rc == 0) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 62b324f26a56..6f0683c68952 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1401,6 +1401,10 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath,
        if (rc == 0 || rc != -ETXTBSY)
                return rc;
+        /* open-file renames don't work across directories */
+        if (to_dentry->d_parent != from_dentry->d_parent)
+                return rc;
        /* open the file to be renamed -- we need DELETE perms */
        rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE,
                         CREATE_NOT_DIR, &srcfid, &oplock, NULL,
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7707389bdf2c..0a57cb7db5dd 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -730,15 +730,7 @@ ssetup_ntlmssp_authenticate:
                /* calculate session key */
                setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
-                if (first_time) /* should this be moved into common code
+                /* FIXME: calculate MAC key */
-                                   with similar ntlmv2 path? */
-                /*   cifs_calculate_ntlmv2_mac_key(ses->server->mac_signing_key,
-                                response BB FIXME, v2_sess_key); */
-                /* copy session key */
-        /*      memcpy(bcc_ptr, (char *)ntlm_session_key,LM2_SESS_KEY_SIZE);
-                bcc_ptr += LM2_SESS_KEY_SIZE; */
                memcpy(bcc_ptr, (char *)v2_sess_key,
                       sizeof(struct ntlmv2_resp));
                bcc_ptr += sizeof(struct ntlmv2_resp);
diff --git a/fs/dcache.c b/fs/dcache.c
index d96047b4a633..86d4db15473e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -590,6 +590,8 @@ static void prune_dcache(int count)
                        up_read(&sb->s_umount);
                }
                spin_lock(&sb_lock);
+                /* lock was dropped, must reset next */
+                list_safe_reset_next(sb, n, s_list);
                count -= pruned;
                __put_super(sb);
                /* more work left to do? */
@@ -894,7 +896,7 @@ EXPORT_SYMBOL(shrink_dcache_parent);
 *
 * In this case we return -1 to tell the caller that we baled.
 */
-static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
+static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
        if (nr) {
                if (!(gfp_mask & __GFP_FS))
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index ca7e2a0ed98a..2bcc0431bada 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -200,6 +200,7 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
                                        return error;
                                else {
                                        inode->i_mode = mode;
+                                        inode->i_ctime = CURRENT_TIME_SEC;
                                        mark_inode_dirty(inode);
                                        if (error == 0)
                                                acl = NULL;
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 01552abbca3c..8a11fe212183 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -205,6 +205,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
                                        return error;
                                else {
                                        inode->i_mode = mode;
+                                        inode->i_ctime = CURRENT_TIME_SEC;
                                        ext3_mark_inode_dirty(handle, inode);
                                        if (error == 0)
                                                acl = NULL;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 51e11bf5708f..9d175d623aab 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -733,12 +733,14 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
 {
        while (fa) {
                struct fown_struct *fown;
+                unsigned long flags;
                if (fa->magic != FASYNC_MAGIC) {
                        printk(KERN_ERR "kill_fasync: bad magic number in "
                               "fasync_struct!\n");
                        return;
                }
-                spin_lock(&fa->fa_lock);
+                spin_lock_irqsave(&fa->fa_lock, flags);
                if (fa->fa_file) {
                        fown = &fa->fa_file->f_owner;
                        /* Don't send SIGURG to processes which have not set a
@@ -747,7 +749,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
                        if (!(sig == SIGURG && fown->signum == 0))
                                send_sigio(fown, fa->fa_fd, band);
                }
-                spin_unlock(&fa->fa_lock);
+                spin_unlock_irqrestore(&fa->fa_lock, flags);
                fa = rcu_dereference(fa->fa_next);
        }
 }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1d1088f48bc2..d5be1693ac93 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -38,51 +38,18 @@ int nr_pdflush_threads;
 /*
 * Passed into wb_writeback(), essentially a subset of writeback_control
 */
-struct wb_writeback_args {
+struct wb_writeback_work {
        long nr_pages;
        struct super_block *sb;
        enum writeback_sync_modes sync_mode;
        unsigned int for_kupdate:1;
        unsigned int range_cyclic:1;
        unsigned int for_background:1;
-};
-/*
- * Work items for the bdi_writeback threads
- */
-struct bdi_work {
        struct list_head list;          /* pending work list */
-        struct rcu_head rcu_head;       /* for RCU free/clear of work */
+        struct completion *done;        /* set if the caller waits */
-        unsigned long seen;             /* threads that have seen this work */
-        atomic_t pending;               /* number of threads still to do work */
-        struct wb_writeback_args args;  /* writeback arguments */
-        unsigned long state;            /* flag bits, see WS_* */
 };
-enum {
-        WS_USED_B = 0,
-        WS_ONSTACK_B,
-};
-#define WS_USED (1 << WS_USED_B)
-#define WS_ONSTACK (1 << WS_ONSTACK_B)
-static inline bool bdi_work_on_stack(struct bdi_work *work)
-{
-        return test_bit(WS_ONSTACK_B, &work->state);
-}
-static inline void bdi_work_init(struct bdi_work *work,
-                                 struct wb_writeback_args *args)
-{
-        INIT_RCU_HEAD(&work->rcu_head);
-        work->args = *args;
-        work->state = WS_USED;
-}
 /**
 * writeback_in_progress - determine whether there is writeback in progress
 * @bdi: the device's backing_dev_info structure.
@@ -95,76 +62,11 @@ int writeback_in_progress(struct backing_dev_info *bdi)
        return !list_empty(&bdi->work_list);
 }
-static void bdi_work_clear(struct bdi_work *work)
+static void bdi_queue_work(struct backing_dev_info *bdi,
-{
+                struct wb_writeback_work *work)
-        clear_bit(WS_USED_B, &work->state);
-        smp_mb__after_clear_bit();
-        /*
-         * work can have disappeared at this point. bit waitq functions
-         * should be able to tolerate this, provided bdi_sched_wait does
-         * not dereference it's pointer argument.
-        */
-        wake_up_bit(&work->state, WS_USED_B);
-}
-static void bdi_work_free(struct rcu_head *head)
-{
-        struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
-        if (!bdi_work_on_stack(work))
-                kfree(work);
-        else
-                bdi_work_clear(work);
-}
-static void wb_work_complete(struct bdi_work *work)
-{
-        const enum writeback_sync_modes sync_mode = work->args.sync_mode;
-        int onstack = bdi_work_on_stack(work);
-        /*
-         * For allocated work, we can clear the done/seen bit right here.
-         * For on-stack work, we need to postpone both the clear and free
-         * to after the RCU grace period, since the stack could be invalidated
-         * as soon as bdi_work_clear() has done the wakeup.
-         */
-        if (!onstack)
-                bdi_work_clear(work);
-        if (sync_mode == WB_SYNC_NONE || onstack)
-                call_rcu(&work->rcu_head, bdi_work_free);
-}
-static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
-{
-        /*
-         * The caller has retrieved the work arguments from this work,
-         * drop our reference. If this is the last ref, delete and free it
-         */
-        if (atomic_dec_and_test(&work->pending)) {
-                struct backing_dev_info *bdi = wb->bdi;
-                spin_lock(&bdi->wb_lock);
-                list_del_rcu(&work->list);
-                spin_unlock(&bdi->wb_lock);
-                wb_work_complete(work);
-        }
-}
-static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
 {
-        work->seen = bdi->wb_mask;
-        BUG_ON(!work->seen);
-        atomic_set(&work->pending, bdi->wb_cnt);
-        BUG_ON(!bdi->wb_cnt);
-        /*
-         * list_add_tail_rcu() contains the necessary barriers to
-         * make sure the above stores are seen before the item is
-         * noticed on the list
-         */
        spin_lock(&bdi->wb_lock);
-        list_add_tail_rcu(&work->list, &bdi->work_list);
+        list_add_tail(&work->list, &bdi->work_list);
        spin_unlock(&bdi->wb_lock);
        /*
@@ -181,97 +83,59 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
        }
 }
-/*
+static void
- * Used for on-stack allocated work items. The caller needs to wait until
+__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
- * the wb threads have acked the work before it's safe to continue.
+                bool range_cyclic, bool for_background)
- */
-static void bdi_wait_on_work_clear(struct bdi_work *work)
-{
-        wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait,
-                    TASK_UNINTERRUPTIBLE);
-}
-static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
-                                 struct wb_writeback_args *args)
 {
-        struct bdi_work *work;
+        struct wb_writeback_work *work;
        /*
         * This is WB_SYNC_NONE writeback, so if allocation fails just
         * wakeup the thread for old dirty data writeback
         */
-        work = kmalloc(sizeof(*work), GFP_ATOMIC);
+        work = kzalloc(sizeof(*work), GFP_ATOMIC);
-        if (work) {
+        if (!work) {
-                bdi_work_init(work, args);
+                if (bdi->wb.task)
-                bdi_queue_work(bdi, work);
+                        wake_up_process(bdi->wb.task);
-        } else {
+                return;
-                struct bdi_writeback *wb = &bdi->wb;
-                if (wb->task)
-                        wake_up_process(wb->task);
        }
+        work->sync_mode = WB_SYNC_NONE;
+        work->nr_pages  = nr_pages;
+        work->range_cyclic = range_cyclic;
+        work->for_background = for_background;
+        bdi_queue_work(bdi, work);
 }
 /**
- * bdi_sync_writeback - start and wait for writeback
+ * bdi_start_writeback - start writeback
 * @bdi: the backing device to write from
- * @sb: write inodes from this super_block
+ * @nr_pages: the number of pages to write
 *
 * Description:
- *   This does WB_SYNC_ALL data integrity writeback and waits for the
+ *   This does WB_SYNC_NONE opportunistic writeback. The IO is only
- *   IO to complete. Callers must hold the sb s_umount semaphore for
+ *   started when this function returns, we make no guarentees on
- *   reading, to avoid having the super disappear before we are done.
+ *   completion. Caller need not hold sb s_umount semaphore.
+ *
 */
-static void bdi_sync_writeback(struct backing_dev_info *bdi,
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
-                               struct super_block *sb)
 {
-        struct wb_writeback_args args = {
+        __bdi_start_writeback(bdi, nr_pages, true, false);
-                .sb             = sb,
-                .sync_mode      = WB_SYNC_ALL,
-                .nr_pages       = LONG_MAX,
-                .range_cyclic   = 0,
-        };
-        struct bdi_work work;
-        bdi_work_init(&work, &args);
-        work.state |= WS_ONSTACK;
-        bdi_queue_work(bdi, &work);
-        bdi_wait_on_work_clear(&work);
 }
 /**
- * bdi_start_writeback - start writeback
+ * bdi_start_background_writeback - start background writeback
 * @bdi: the backing device to write from
- * @sb: write inodes from this super_block
- * @nr_pages: the number of pages to write
 *
 * Description:
- *   This does WB_SYNC_NONE opportunistic writeback. The IO is only
+ *   This does WB_SYNC_NONE background writeback. The IO is only
 *   started when this function returns, we make no guarentees on
 *   completion. Caller need not hold sb s_umount semaphore.
- *
 */
-void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
+void bdi_start_background_writeback(struct backing_dev_info *bdi)
-                         long nr_pages)
 {
-        struct wb_writeback_args args = {
+        __bdi_start_writeback(bdi, LONG_MAX, true, true);
-                .sb             = sb,
-                .sync_mode      = WB_SYNC_NONE,
-                .nr_pages       = nr_pages,
-                .range_cyclic   = 1,
-        };
-        /*
-         * We treat @nr_pages=0 as the special case to do background writeback,
-         * ie. to sync pages until the background dirty threshold is reached.
-         */
-        if (!nr_pages) {
-                args.nr_pages = LONG_MAX;
-                args.for_background = 1;
-        }
-        bdi_alloc_queue_work(bdi, &args);
 }
 /*
@@ -561,75 +425,69 @@ select_queue:
        return ret;
 }
-static void unpin_sb_for_writeback(struct super_block *sb)
-{
-        up_read(&sb->s_umount);
-        put_super(sb);
-}
-enum sb_pin_state {
-        SB_PINNED,
-        SB_NOT_PINNED,
-        SB_PIN_FAILED
-};
 /*
- * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
+ * For background writeback the caller does not have the sb pinned
 * before calling writeback. So make sure that we do pin it, so it doesn't
 * go away while we are writing inodes from it.
 */
-static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
+static bool pin_sb_for_writeback(struct super_block *sb)
-                                              struct super_block *sb)
 {
-        /*
-         * Caller must already hold the ref for this
-         */
-        if (wbc->sync_mode == WB_SYNC_ALL) {
-                WARN_ON(!rwsem_is_locked(&sb->s_umount));
-                return SB_NOT_PINNED;
-        }
        spin_lock(&sb_lock);
+        if (list_empty(&sb->s_instances)) {
+                spin_unlock(&sb_lock);
+                return false;
+        }
        sb->s_count++;
+        spin_unlock(&sb_lock);
        if (down_read_trylock(&sb->s_umount)) {
-                if (sb->s_root) {
+                if (sb->s_root)
-                        spin_unlock(&sb_lock);
+                        return true;
-                        return SB_PINNED;
-                }
-                /*
-                 * umounted, drop rwsem again and fall through to failure
-                 */
                up_read(&sb->s_umount);
        }
-        sb->s_count--;
-        spin_unlock(&sb_lock);
+        put_super(sb);
-        return SB_PIN_FAILED;
+        return false;
 }
 /*
 * Write a portion of b_io inodes which belong to @sb.
- * If @wbc->sb != NULL, then find and write all such
+ *
+ * If @only_this_sb is true, then find and write all such
 * inodes. Otherwise write only ones which go sequentially
 * in reverse order.
+ *
 * Return 1, if the caller writeback routine should be
 * interrupted. Otherwise return 0.
 */
-static int writeback_sb_inodes(struct super_block *sb,
+static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
-                               struct bdi_writeback *wb,
+                struct writeback_control *wbc, bool only_this_sb)
-                               struct writeback_control *wbc)
 {
        while (!list_empty(&wb->b_io)) {
                long pages_skipped;
                struct inode *inode = list_entry(wb->b_io.prev,
                                                 struct inode, i_list);
-                if (wbc->sb && sb != inode->i_sb) {
-                        /* super block given and doesn't
+                if (inode->i_sb != sb) {
-                           match, skip this inode */
+                        if (only_this_sb) {
-                        redirty_tail(inode);
+                                /*
-                        continue;
+                                 * We only want to write back data for this
-                }
+                                 * superblock, move all inodes not belonging
-                if (sb != inode->i_sb)
+                                 * to it back onto the dirty list.
-                        /* finish with this superblock */
+                                 */
+                                redirty_tail(inode);
+                                continue;
+                        }
+                        /*
+                         * The inode belongs to a different superblock.
+                         * Bounce back to the caller to unpin this and
+                         * pin the next superblock.
+                         */
                        return 0;
+                }
                if (inode->i_state & (I_NEW | I_WILL_FREE)) {
                        requeue_io(inode);
                        continue;
@@ -667,8 +525,8 @@ static int writeback_sb_inodes(struct super_block *sb,
        return 1;
 }
-static void writeback_inodes_wb(struct bdi_writeback *wb,
+void writeback_inodes_wb(struct bdi_writeback *wb,
-                                struct writeback_control *wbc)
+                struct writeback_control *wbc)
 {
        int ret = 0;
@@ -681,24 +539,14 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
                struct inode *inode = list_entry(wb->b_io.prev,
                                                 struct inode, i_list);
                struct super_block *sb = inode->i_sb;
-                enum sb_pin_state state;
-                if (wbc->sb && sb != wbc->sb) {
+                if (!pin_sb_for_writeback(sb)) {
-                        /* super block given and doesn't
-                           match, skip this inode */
-                        redirty_tail(inode);
-                        continue;
-                }
-                state = pin_sb_for_writeback(wbc, sb);
-                if (state == SB_PIN_FAILED) {
                        requeue_io(inode);
                        continue;
                }
-                ret = writeback_sb_inodes(sb, wb, wbc);
+                ret = writeback_sb_inodes(sb, wb, wbc, false);
+                drop_super(sb);
-                if (state == SB_PINNED)
-                        unpin_sb_for_writeback(sb);
                if (ret)
                        break;
        }
@@ -706,11 +554,17 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
        /* Leave any unwritten inodes on b_io */
 }
-void writeback_inodes_wbc(struct writeback_control *wbc)
+static void __writeback_inodes_sb(struct super_block *sb,
+                struct bdi_writeback *wb, struct writeback_control *wbc)
 {
-        struct backing_dev_info *bdi = wbc->bdi;
+        WARN_ON(!rwsem_is_locked(&sb->s_umount));
-        writeback_inodes_wb(&bdi->wb, wbc);
+        wbc->wb_start = jiffies; /* livelock avoidance */
+        spin_lock(&inode_lock);
+        if (!wbc->for_kupdate || list_empty(&wb->b_io))
+                queue_io(wb, wbc->older_than_this);
+        writeback_sb_inodes(sb, wb, wbc, true);
+        spin_unlock(&inode_lock);
 }
 /*
@@ -748,16 +602,14 @@ static inline bool over_bground_thresh(void)
 * all dirty pages if they are all attached to "old" mappings.
 */
 static long wb_writeback(struct bdi_writeback *wb,
-                         struct wb_writeback_args *args)
+                         struct wb_writeback_work *work)
 {
        struct writeback_control wbc = {
-                .bdi                    = wb->bdi,
+                .sync_mode              = work->sync_mode,
-                .sb                     = args->sb,
-                .sync_mode              = args->sync_mode,
                .older_than_this        = NULL,
-                .for_kupdate            = args->for_kupdate,
+                .for_kupdate            = work->for_kupdate,
-                .for_background         = args->for_background,
+                .for_background         = work->for_background,
-                .range_cyclic           = args->range_cyclic,
+                .range_cyclic           = work->range_cyclic,
        };
        unsigned long oldest_jif;
        long wrote = 0;
@@ -777,21 +629,24 @@ static long wb_writeback(struct bdi_writeback *wb,
                /*
                 * Stop writeback when nr_pages has been consumed
                 */
-                if (args->nr_pages <= 0)
+                if (work->nr_pages <= 0)
                        break;
                /*
                 * For background writeout, stop when we are below the
                 * background dirty threshold
                 */
-                if (args->for_background && !over_bground_thresh())
+                if (work->for_background && !over_bground_thresh())
                        break;
                wbc.more_io = 0;
                wbc.nr_to_write = MAX_WRITEBACK_PAGES;
                wbc.pages_skipped = 0;
-                writeback_inodes_wb(wb, &wbc);
+                if (work->sb)
-                args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+                        __writeback_inodes_sb(work->sb, wb, &wbc);
+                else
+                        writeback_inodes_wb(wb, &wbc);
+                work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
                wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
                /*
@@ -827,31 +682,21 @@ static long wb_writeback(struct bdi_writeback *wb,
 }
 /*
- * Return the next bdi_work struct that hasn't been processed by this
+ * Return the next wb_writeback_work struct that hasn't been processed yet.
- * wb thread yet. ->seen is initially set for each thread that exists
- * for this device, when a thread first notices a piece of work it
- * clears its bit. Depending on writeback type, the thread will notify
- * completion on either receiving the work (WB_SYNC_NONE) or after
- * it is done (WB_SYNC_ALL).
 */
-static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi,
+static struct wb_writeback_work *
-                                           struct bdi_writeback *wb)
+get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb)
 {
-        struct bdi_work *work, *ret = NULL;
+        struct wb_writeback_work *work = NULL;
-        rcu_read_lock();
+        spin_lock(&bdi->wb_lock);
+        if (!list_empty(&bdi->work_list)) {
-        list_for_each_entry_rcu(work, &bdi->work_list, list) {
+                work = list_entry(bdi->work_list.next,
-                if (!test_bit(wb->nr, &work->seen))
+                                  struct wb_writeback_work, list);
-                        continue;
+                list_del_init(&work->list);
-                clear_bit(wb->nr, &work->seen);
-                ret = work;
-                break;
        }
+        spin_unlock(&bdi->wb_lock);
-        rcu_read_unlock();
+        return work;
-        return ret;
 }
 static long wb_check_old_data_flush(struct bdi_writeback *wb)
@@ -876,14 +721,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
                        (inodes_stat.nr_inodes - inodes_stat.nr_unused);
        if (nr_pages) {
-                struct wb_writeback_args args = {
+                struct wb_writeback_work work = {
                        .nr_pages       = nr_pages,
                        .sync_mode      = WB_SYNC_NONE,
                        .for_kupdate    = 1,
                        .range_cyclic   = 1,
                };
-                return wb_writeback(wb, &args);
+                return wb_writeback(wb, &work);
        }
        return 0;
@@ -895,33 +740,27 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
 {
        struct backing_dev_info *bdi = wb->bdi;
-        struct bdi_work *work;
+        struct wb_writeback_work *work;
        long wrote = 0;
        while ((work = get_next_work_item(bdi, wb)) != NULL) {
-                struct wb_writeback_args args = work->args;
                /*
                 * Override sync mode, in case we must wait for completion
+                 * because this thread is exiting now.
                 */
                if (force_wait)
-                        work->args.sync_mode = args.sync_mode = WB_SYNC_ALL;
+                        work->sync_mode = WB_SYNC_ALL;
-                /*
-                 * If this isn't a data integrity operation, just notify
-                 * that we have seen this work and we are now starting it.
-                 */
-                if (args.sync_mode == WB_SYNC_NONE)
-                        wb_clear_pending(wb, work);
-                wrote += wb_writeback(wb, &args);
+                wrote += wb_writeback(wb, work);
                /*
-                 * This is a data integrity writeback, so only do the
+                 * Notify the caller of completion if this is a synchronous
-                 * notification when we have completed the work.
+                 * work item, otherwise just free it.
                 */
-                if (args.sync_mode == WB_SYNC_ALL)
+                if (work->done)
-                        wb_clear_pending(wb, work);
+                        complete(work->done);
+                else
+                        kfree(work);
        }
        /*
@@ -978,42 +817,27 @@ int bdi_writeback_task(struct bdi_writeback *wb)
 }
 /*
- * Schedule writeback for all backing devices. This does WB_SYNC_NONE
+ * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
- * writeback, for integrity writeback see bdi_sync_writeback().
+ * the whole world.
 */
-static void bdi_writeback_all(struct super_block *sb, long nr_pages)
+void wakeup_flusher_threads(long nr_pages)
 {
-        struct wb_writeback_args args = {
-                .sb             = sb,
-                .nr_pages       = nr_pages,
-                .sync_mode      = WB_SYNC_NONE,
-        };
        struct backing_dev_info *bdi;
-        rcu_read_lock();
+        if (!nr_pages) {
+                nr_pages = global_page_state(NR_FILE_DIRTY) +
+                                global_page_state(NR_UNSTABLE_NFS);
+        }
+        rcu_read_lock();
        list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
                if (!bdi_has_dirty_io(bdi))
                        continue;
+                __bdi_start_writeback(bdi, nr_pages, false, false);
-                bdi_alloc_queue_work(bdi, &args);
        }
        rcu_read_unlock();
 }
-/*
- * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
- * the whole world.
- */
-void wakeup_flusher_threads(long nr_pages)
-{
-        if (nr_pages == 0)
-                nr_pages = global_page_state(NR_FILE_DIRTY) +
-                                global_page_state(NR_UNSTABLE_NFS);
-        bdi_writeback_all(NULL, nr_pages);
-}
 static noinline void block_dump___mark_inode_dirty(struct inode *inode)
 {
        if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1218,12 +1042,20 @@ void writeback_inodes_sb(struct super_block *sb)
 {
        unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
        unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
-        long nr_to_write;
+        DECLARE_COMPLETION_ONSTACK(done);
+        struct wb_writeback_work work = {
+                .sb             = sb,
+                .sync_mode      = WB_SYNC_NONE,
+                .done           = &done,
+        };
+        WARN_ON(!rwsem_is_locked(&sb->s_umount));
-        nr_to_write = nr_dirty + nr_unstable +
+        work.nr_pages = nr_dirty + nr_unstable +
                        (inodes_stat.nr_inodes - inodes_stat.nr_unused);
-        bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
+        bdi_queue_work(sb->s_bdi, &work);
+        wait_for_completion(&done);
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
@@ -1237,7 +1069,9 @@ EXPORT_SYMBOL(writeback_inodes_sb);
 int writeback_inodes_sb_if_idle(struct super_block *sb)
 {
        if (!writeback_in_progress(sb->s_bdi)) {
+                down_read(&sb->s_umount);
                writeback_inodes_sb(sb);
+                up_read(&sb->s_umount);
                return 1;
        } else
                return 0;
@@ -1253,7 +1087,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
 */
 void sync_inodes_sb(struct super_block *sb)
 {
-        bdi_sync_writeback(sb->s_bdi, sb);
+        DECLARE_COMPLETION_ONSTACK(done);
+        struct wb_writeback_work work = {
+                .sb             = sb,
+                .sync_mode      = WB_SYNC_ALL,
+                .nr_pages       = LONG_MAX,
+                .range_cyclic   = 0,
+                .done           = &done,
+        };
+        WARN_ON(!rwsem_is_locked(&sb->s_umount));
+        bdi_queue_work(sb->s_bdi, &work);
+        wait_for_completion(&done);
        wait_sb_inodes(sb);
 }
 EXPORT_SYMBOL(sync_inodes_sb);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 4a48c0f4b402..84da64b551b2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1041,6 +1041,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
        if (gfs2_is_stuffed(ip)) {
                u64 dsize = size + sizeof(struct gfs2_inode);
+                ip->i_disksize = size;
                ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                gfs2_dinode_out(ip, dibh->b_data);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 8295c5b5d4a9..26ca3361a8bc 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -392,7 +392,7 @@ static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
        unsigned totlen = be16_to_cpu(dent->de_rec_len);
        if (gfs2_dirent_sentinel(dent))
-                actual = GFS2_DIRENT_SIZE(0);
+                actual = 0;
        if (totlen - actual >= required)
                return 1;
        return 0;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index ddcdbf493536..0898f3ec8212 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -706,8 +706,18 @@ static void glock_work_func(struct work_struct *work)
 {
        unsigned long delay = 0;
        struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
+        struct gfs2_holder *gh;
        int drop_ref = 0;
+        if (unlikely(test_bit(GLF_FROZEN, &gl->gl_flags))) {
+                spin_lock(&gl->gl_spin);
+                gh = find_first_waiter(gl);
+                if (gh && (gh->gh_flags & LM_FLAG_NOEXP) &&
+                    test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
+                        set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
+                spin_unlock(&gl->gl_spin);
+        }
        if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
                finish_xmote(gl, gl->gl_reply);
                drop_ref = 1;
@@ -1348,7 +1358,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
 }
-static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask)
+static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
        struct gfs2_glock *gl;
        int may_demote;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b5612cbb62a5..f03afd9c44bc 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -169,7 +169,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb,
 {
        struct inode *inode;
        struct gfs2_inode *ip;
-        struct gfs2_glock *io_gl;
+        struct gfs2_glock *io_gl = NULL;
        int error;
        inode = gfs2_iget(sb, no_addr);
@@ -198,6 +198,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb,
                ip->i_iopen_gh.gh_gl->gl_object = ip;
                gfs2_glock_put(io_gl);
+                io_gl = NULL;
                if ((type == DT_UNKNOWN) && (no_formal_ino == 0))
                        goto gfs2_nfsbypass;
@@ -228,7 +229,8 @@ gfs2_nfsbypass:
 fail_glock:
        gfs2_glock_dq(&ip->i_iopen_gh);
 fail_iopen:
-        gfs2_glock_put(io_gl);
+        if (io_gl)
+                gfs2_glock_put(io_gl);
 fail_put:
        if (inode->i_state & I_NEW)
                ip->i_gl->gl_object = NULL;
@@ -256,7 +258,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
 {
        struct gfs2_sbd *sdp;
        struct gfs2_inode *ip;
-        struct gfs2_glock *io_gl;
+        struct gfs2_glock *io_gl = NULL;
        int error;
        struct gfs2_holder gh;
        struct inode *inode;
@@ -293,6 +295,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
        ip->i_iopen_gh.gh_gl->gl_object = ip;
        gfs2_glock_put(io_gl);
+        io_gl = NULL;
        inode->i_mode = DT2IF(DT_UNKNOWN);
@@ -319,7 +322,8 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
 fail_glock:
        gfs2_glock_dq(&ip->i_iopen_gh);
 fail_iopen:
-        gfs2_glock_put(io_gl);
+        if (io_gl)
+                gfs2_glock_put(io_gl);
 fail_put:
        ip->i_gl->gl_object = NULL;
        gfs2_glock_put(ip->i_gl);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 49667d68769e..8f02d3db8f42 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -77,7 +77,7 @@ static LIST_HEAD(qd_lru_list);
 static atomic_t qd_lru_count = ATOMIC_INIT(0);
 static DEFINE_SPINLOCK(qd_lru_lock);
-int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask)
+int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
        struct gfs2_quota_data *qd;
        struct gfs2_sbd *sdp;
@@ -694,10 +694,8 @@ get_a_page:
                if (!buffer_mapped(bh))
                        goto unlock_out;
                /* If it's a newly allocated disk block for quota, zero it */
-                if (buffer_new(bh)) {
+                if (buffer_new(bh))
-                        memset(bh->b_data, 0, bh->b_size);
+                        zero_user(page, pos - blocksize, bh->b_size);
-                        set_buffer_uptodate(bh);
-                }
        }
        if (PageUptodate(page))
@@ -723,7 +721,7 @@ get_a_page:
        /* If quota straddles page boundary, we need to update the rest of the
         * quota at the beginning of the next page */
-        if (offset != 0) { /* first page, offset is closer to PAGE_CACHE_SIZE */
+        if ((offset + sizeof(struct gfs2_quota)) > PAGE_CACHE_SIZE) {
                ptr = ptr + nbytes;
                nbytes = sizeof(struct gfs2_quota) - nbytes;
                offset = 0;
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 195f60c8bd14..e7d236ca48bd 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -51,7 +51,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
        return ret;
 }
-extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask);
+extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask);
 extern const struct quotactl_ops gfs2_quotactl_ops;
 #endif /* __QUOTA_DOT_H__ */
diff --git a/fs/inode.c b/fs/inode.c
index 2bee20ae3d65..722860b323a9 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -512,7 +512,7 @@ static void prune_icache(int nr_to_scan)
 * This function is passed the number of inodes to scan, and it returns the
 * total number of remaining possibly-reclaimable inodes.
 */
-static int shrink_icache_memory(int nr, gfp_t gfp_mask)
+static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
        if (nr) {
                /*
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index bc2ff5932769..036880895bfc 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -297,7 +297,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
        struct page *new_page;
        unsigned int new_offset;
        struct buffer_head *bh_in = jh2bh(jh_in);
-        struct jbd2_buffer_trigger_type *triggers;
        journal_t *journal = transaction->t_journal;
        /*
@@ -328,21 +327,21 @@ repeat:
                done_copy_out = 1;
                new_page = virt_to_page(jh_in->b_frozen_data);
                new_offset = offset_in_page(jh_in->b_frozen_data);
-                triggers = jh_in->b_frozen_triggers;
        } else {
                new_page = jh2bh(jh_in)->b_page;
                new_offset = offset_in_page(jh2bh(jh_in)->b_data);
-                triggers = jh_in->b_triggers;
        }
        mapped_data = kmap_atomic(new_page, KM_USER0);
        /*
-         * Fire any commit trigger.  Do this before checking for escaping,
+         * Fire data frozen trigger if data already wasn't frozen.  Do this
-         * as the trigger may modify the magic offset.  If a copy-out
+         * before checking for escaping, as the trigger may modify the magic
-         * happens afterwards, it will have the correct data in the buffer.
+         * offset.  If a copy-out happens afterwards, it will have the correct
+         * data in the buffer.
         */
-        jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset,
+        if (!done_copy_out)
-                                   triggers);
+                jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset,
+                                           jh_in->b_triggers);
        /*
         * Check for escaping
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e214d68620ac..b8e0806681bb 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -725,6 +725,9 @@ done:
                page = jh2bh(jh)->b_page;
                offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
                source = kmap_atomic(page, KM_USER0);
+                /* Fire data frozen trigger just before we copy the data */
+                jbd2_buffer_frozen_trigger(jh, source + offset,
+                                           jh->b_triggers);
                memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
                kunmap_atomic(source, KM_USER0);
@@ -963,15 +966,15 @@ void jbd2_journal_set_triggers(struct buffer_head *bh,
        jh->b_triggers = type;
 }
-void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data,
+void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
                                struct jbd2_buffer_trigger_type *triggers)
 {
        struct buffer_head *bh = jh2bh(jh);
-        if (!triggers || !triggers->t_commit)
+        if (!triggers || !triggers->t_frozen)
                return;
-        triggers->t_commit(triggers, bh, mapped_data, bh->b_size);
+        triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
 }
 void jbd2_buffer_abort_trigger(struct journal_head *jh,
diff --git a/fs/mbcache.c b/fs/mbcache.c
index ec88ff3d04a9..e28f21b95344 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -115,7 +115,7 @@ mb_cache_indexes(struct mb_cache *cache)
 * What the mbcache registers as to get shrunk dynamically.
 */
-static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask);
+static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
 static struct shrinker mb_cache_shrinker = {
        .shrink = mb_cache_shrink_fn,
@@ -191,13 +191,14 @@ forget:
 * This function is called by the kernel memory management when memory
 * gets low.
 *
+ * @shrink: (ignored)
 * @nr_to_scan: Number of objects to scan
 * @gfp_mask: (ignored)
 *
 * Returns the number of objects which are present in the cache.
 */
 static int
-mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask)
+mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
        LIST_HEAD(free_list);
        struct list_head *l, *ltmp;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 7ec9b34a59f8..d25b5257b7a1 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1286,6 +1286,55 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
 #endif /* CONFIG_NFS_V4_1 */
 }
+static int nfs4_server_common_setup(struct nfs_server *server,
+                struct nfs_fh *mntfh)
+{
+        struct nfs_fattr *fattr;
+        int error;
+        BUG_ON(!server->nfs_client);
+        BUG_ON(!server->nfs_client->rpc_ops);
+        BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+        fattr = nfs_alloc_fattr();
+        if (fattr == NULL)
+                return -ENOMEM;
+        /* We must ensure the session is initialised first */
+        error = nfs4_init_session(server);
+        if (error < 0)
+                goto out;
+        /* Probe the root fh to retrieve its FSID and filehandle */
+        error = nfs4_get_rootfh(server, mntfh);
+        if (error < 0)
+                goto out;
+        dprintk("Server FSID: %llx:%llx\n",
+                        (unsigned long long) server->fsid.major,
+                        (unsigned long long) server->fsid.minor);
+        dprintk("Mount FH: %d\n", mntfh->size);
+        nfs4_session_set_rwsize(server);
+        error = nfs_probe_fsinfo(server, mntfh, fattr);
+        if (error < 0)
+                goto out;
+        if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
+                server->namelen = NFS4_MAXNAMLEN;
+        spin_lock(&nfs_client_lock);
+        list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+        list_add_tail(&server->master_link, &nfs_volume_list);
+        spin_unlock(&nfs_client_lock);
+        server->mount_time = jiffies;
+out:
+        nfs_free_fattr(fattr);
+        return error;
+}
 /*
 * Create a version 4 volume record
 */
@@ -1346,7 +1395,6 @@ error:
 struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
                                      struct nfs_fh *mntfh)
 {
-        struct nfs_fattr *fattr;
        struct nfs_server *server;
        int error;
@@ -1356,55 +1404,19 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
        if (!server)
                return ERR_PTR(-ENOMEM);
-        error = -ENOMEM;
-        fattr = nfs_alloc_fattr();
-        if (fattr == NULL)
-                goto error;
        /* set up the general RPC client */
        error = nfs4_init_server(server, data);
        if (error < 0)
                goto error;
-        BUG_ON(!server->nfs_client);
+        error = nfs4_server_common_setup(server, mntfh);
-        BUG_ON(!server->nfs_client->rpc_ops);
-        BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
-        error = nfs4_init_session(server);
-        if (error < 0)
-                goto error;
-        /* Probe the root fh to retrieve its FSID */
-        error = nfs4_get_rootfh(server, mntfh);
        if (error < 0)
                goto error;
-        dprintk("Server FSID: %llx:%llx\n",
-                (unsigned long long) server->fsid.major,
-                (unsigned long long) server->fsid.minor);
-        dprintk("Mount FH: %d\n", mntfh->size);
-        nfs4_session_set_rwsize(server);
-        error = nfs_probe_fsinfo(server, mntfh, fattr);
-        if (error < 0)
-                goto error;
-        if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
-                server->namelen = NFS4_MAXNAMLEN;
-        spin_lock(&nfs_client_lock);
-        list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
-        list_add_tail(&server->master_link, &nfs_volume_list);
-        spin_unlock(&nfs_client_lock);
-        server->mount_time = jiffies;
        dprintk("<-- nfs4_create_server() = %p\n", server);
-        nfs_free_fattr(fattr);
        return server;
 error:
-        nfs_free_fattr(fattr);
        nfs_free_server(server);
        dprintk("<-- nfs4_create_server() = error %d\n", error);
        return ERR_PTR(error);
@@ -1418,7 +1430,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 {
        struct nfs_client *parent_client;
        struct nfs_server *server, *parent_server;
-        struct nfs_fattr *fattr;
        int error;
        dprintk("--> nfs4_create_referral_server()\n");
@@ -1427,11 +1438,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
        if (!server)
                return ERR_PTR(-ENOMEM);
-        error = -ENOMEM;
-        fattr = nfs_alloc_fattr();
-        if (fattr == NULL)
-                goto error;
        parent_server = NFS_SB(data->sb);
        parent_client = parent_server->nfs_client;
@@ -1456,40 +1462,14 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
        if (error < 0)
                goto error;
-        BUG_ON(!server->nfs_client);
+        error = nfs4_server_common_setup(server, mntfh);
-        BUG_ON(!server->nfs_client->rpc_ops);
-        BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
-        /* Probe the root fh to retrieve its FSID and filehandle */
-        error = nfs4_get_rootfh(server, mntfh);
-        if (error < 0)
-                goto error;
-        /* probe the filesystem info for this server filesystem */
-        error = nfs_probe_fsinfo(server, mntfh, fattr);
        if (error < 0)
                goto error;
-        if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
-                server->namelen = NFS4_MAXNAMLEN;
-        dprintk("Referral FSID: %llx:%llx\n",
-                (unsigned long long) server->fsid.major,
-                (unsigned long long) server->fsid.minor);
-        spin_lock(&nfs_client_lock);
-        list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
-        list_add_tail(&server->master_link, &nfs_volume_list);
-        spin_unlock(&nfs_client_lock);
-        server->mount_time = jiffies;
-        nfs_free_fattr(fattr);
        dprintk("<-- nfs_create_referral_server() = %p\n", server);
        return server;
 error:
-        nfs_free_fattr(fattr);
        nfs_free_server(server);
        dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
        return ERR_PTR(error);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 782b431ef91c..e60416d3f818 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1710,7 +1710,7 @@ static void nfs_access_free_list(struct list_head *head)
        }
 }
-int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
        LIST_HEAD(head);
        struct nfs_inode *nfsi;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 7428f7d6273b..a70e446e1605 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -146,7 +146,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
                goto out;
        }
-        if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_MODE)
+        if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE)
                        || !S_ISDIR(fsinfo.fattr->mode)) {
                printk(KERN_ERR "nfs4_get_rootfh:"
                       " getroot encountered non-directory\n");
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d8bd619e386c..e70f44b9b3f4 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -205,7 +205,8 @@ extern struct rpc_procinfo nfs4_procedures[];
 void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
 /* dir.c */
-extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
+extern int nfs_access_cache_shrinker(struct shrinker *shrink,
+                                        int nr_to_scan, gfp_t gfp_mask);
 /* inode.c */
 extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6bdef28efa33..65c8dae4b267 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -862,8 +862,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
                bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
                *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
                *p++ = cpu_to_be32(0);
-                *p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
+                *p++ = cpu_to_be32(iap->ia_atime.tv_sec);
-                *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
+                *p++ = cpu_to_be32(iap->ia_atime.tv_nsec);
        }
        else if (iap->ia_valid & ATTR_ATIME) {
                bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 04214fc5c304..f9df16de4a56 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -570,6 +570,22 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
        nfs_show_mountd_netid(m, nfss, showdefaults);
 }
+#ifdef CONFIG_NFS_V4
+static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
+                                    int showdefaults)
+{
+        struct nfs_client *clp = nfss->nfs_client;
+        seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
+        seq_printf(m, ",minorversion=%u", clp->cl_minorversion);
+}
+#else
+static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
+                                    int showdefaults)
+{
+}
+#endif
 /*
 * Describe the mount options in force on this server representation
 */
@@ -631,11 +647,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
        if (version != 4)
                nfs_show_mountd_options(m, nfss, showdefaults);
+        else
+                nfs_show_nfsv4_options(m, nfss, showdefaults);
-#ifdef CONFIG_NFS_V4
-        if (clp->rpc_ops->version == 4)
-                seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
-#endif
        if (nfss->options & NFS_OPTION_FSCACHE)
                seq_printf(m, ",fsc");
 }
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 3623ca20cc18..356e976772bf 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -196,15 +196,14 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
                        dump_stack();
                        goto bail;
                }
-                past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
-                mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
-                     (unsigned long long)past_eof);
-                if (create && (iblock >= past_eof))
-                        set_buffer_new(bh_result);
        }
+        past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
+        mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
+             (unsigned long long)past_eof);
+        if (create && (iblock >= past_eof))
+                set_buffer_new(bh_result);
 bail:
        if (err < 0)
                err = -EIO;
@@ -459,36 +458,6 @@ int walk_page_buffers(	handle_t *handle,
        return ret;
 }
-handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
-                                                         struct page *page,
-                                                         unsigned from,
-                                                         unsigned to)
-{
-        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-        handle_t *handle;
-        int ret = 0;
-        handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
-        if (IS_ERR(handle)) {
-                ret = -ENOMEM;
-                mlog_errno(ret);
-                goto out;
-        }
-        if (ocfs2_should_order_data(inode)) {
-                ret = ocfs2_jbd2_file_inode(handle, inode);
-                if (ret < 0)
-                        mlog_errno(ret);
-        }
-out:
-        if (ret) {
-                if (!IS_ERR(handle))
-                        ocfs2_commit_trans(osb, handle);
-                handle = ERR_PTR(ret);
-        }
-        return handle;
-}
 static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
 {
        sector_t status;
@@ -1131,23 +1100,37 @@ out:
 */
 static int ocfs2_grab_pages_for_write(struct address_space *mapping,
                                      struct ocfs2_write_ctxt *wc,
-                                      u32 cpos, loff_t user_pos, int new,
+                                      u32 cpos, loff_t user_pos,
+                                      unsigned user_len, int new,
                                      struct page *mmap_page)
 {
        int ret = 0, i;
-        unsigned long start, target_index, index;
+        unsigned long start, target_index, end_index, index;
        struct inode *inode = mapping->host;
+        loff_t last_byte;
        target_index = user_pos >> PAGE_CACHE_SHIFT;
        /*
         * Figure out how many pages we'll be manipulating here. For
         * non allocating write, we just change the one
-         * page. Otherwise, we'll need a whole clusters worth.
+         * page. Otherwise, we'll need a whole clusters worth.  If we're
+         * writing past i_size, we only need enough pages to cover the
+         * last page of the write.
         */
        if (new) {
                wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb);
                start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos);
+                /*
+                 * We need the index *past* the last page we could possibly
+                 * touch.  This is the page past the end of the write or
+                 * i_size, whichever is greater.
+                 */
+                last_byte = max(user_pos + user_len, i_size_read(inode));
+                BUG_ON(last_byte < 1);
+                end_index = ((last_byte - 1) >> PAGE_CACHE_SHIFT) + 1;
+                if ((start + wc->w_num_pages) > end_index)
+                        wc->w_num_pages = end_index - start;
        } else {
                wc->w_num_pages = 1;
                start = target_index;
@@ -1620,21 +1603,20 @@ out:
 * write path can treat it as an non-allocating write, which has no
 * special case code for sparse/nonsparse files.
 */
-static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
+static int ocfs2_expand_nonsparse_inode(struct inode *inode,
-                                        unsigned len,
+                                        struct buffer_head *di_bh,
+                                        loff_t pos, unsigned len,
                                        struct ocfs2_write_ctxt *wc)
 {
        int ret;
-        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
        loff_t newsize = pos + len;
-        if (ocfs2_sparse_alloc(osb))
+        BUG_ON(ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
-                return 0;
        if (newsize <= i_size_read(inode))
                return 0;
-        ret = ocfs2_extend_no_holes(inode, newsize, pos);
+        ret = ocfs2_extend_no_holes(inode, di_bh, newsize, pos);
        if (ret)
                mlog_errno(ret);
@@ -1644,6 +1626,18 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
        return ret;
 }
+static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
+                           loff_t pos)
+{
+        int ret = 0;
+        BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
+        if (pos > i_size_read(inode))
+                ret = ocfs2_zero_extend(inode, di_bh, pos);
+        return ret;
+}
 int ocfs2_write_begin_nolock(struct address_space *mapping,
                             loff_t pos, unsigned len, unsigned flags,
                             struct page **pagep, void **fsdata,
@@ -1679,7 +1673,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
                }
        }
-        ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc);
+        if (ocfs2_sparse_alloc(osb))
+                ret = ocfs2_zero_tail(inode, di_bh, pos);
+        else
+                ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len,
+                                                   wc);
        if (ret) {
                mlog_errno(ret);
                goto out;
@@ -1789,7 +1787,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
         * that we can zero and flush if we error after adding the
         * extent.
         */
-        ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
+        ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
                                         cluster_of_pages, mmap_page);
        if (ret) {
                mlog_errno(ret);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 6b5a492e1749..153abb5abef0 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1671,7 +1671,7 @@ struct dlm_ctxt * dlm_register_domain(const char *domain,
        struct dlm_ctxt *dlm = NULL;
        struct dlm_ctxt *new_ctxt = NULL;
-        if (strlen(domain) > O2NM_MAX_NAME_LEN) {
+        if (strlen(domain) >= O2NM_MAX_NAME_LEN) {
                ret = -ENAMETOOLONG;
                mlog(ML_ERROR, "domain name length too long\n");
                goto leave;
@@ -1709,6 +1709,7 @@ retry:
                }
                if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) {
+                        spin_unlock(&dlm_domain_lock);
                        mlog(ML_ERROR,
                             "Requested locking protocol version is not "
                             "compatible with already registered domain "
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 4a7506a4e314..94b97fc6a88e 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2808,14 +2808,8 @@ again:
                mlog(0, "trying again...\n");
                goto again;
        }
-        /* now that we are sure the MIGRATING state is there, drop
-         * the unneded state which blocked threads trying to DIRTY */
-        spin_lock(&res->spinlock);
-        BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
-        BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
-        res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
-        spin_unlock(&res->spinlock);
+        ret = 0;
        /* did the target go down or die? */
        spin_lock(&dlm->spinlock);
        if (!test_bit(target, dlm->domain_map)) {
@@ -2826,9 +2820,21 @@ again:
        spin_unlock(&dlm->spinlock);
        /*
+         * if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for
+         * another try; otherwise, we are sure the MIGRATING state is there,
+         * drop the unneded state which blocked threads trying to DIRTY
+         */
+        spin_lock(&res->spinlock);
+        BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
+        res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
+        if (!ret)
+                BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
+        spin_unlock(&res->spinlock);
+        /*
         * at this point:
         *
-         *   o the DLM_LOCK_RES_MIGRATING flag is set
+         *   o the DLM_LOCK_RES_MIGRATING flag is set if target not down
         *   o there are no pending asts on this lockres
         *   o all processes trying to reserve an ast on this
         *     lockres must wait for the MIGRATING flag to clear
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index f8b75ce4be70..9dfaac73b36d 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -463,7 +463,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
        if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
                int bit;
-                bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES+1, 0);
+                bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0);
                if (bit >= O2NM_MAX_NODES || bit < 0)
                        dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
                else
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6a13ea64c447..2b10b36d1577 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -724,28 +724,55 @@ leave:
        return status;
 }
+/*
+ * While a write will already be ordering the data, a truncate will not.
+ * Thus, we need to explicitly order the zeroed pages.
+ */
+static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
+{
+        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+        handle_t *handle = NULL;
+        int ret = 0;
+        if (!ocfs2_should_order_data(inode))
+                goto out;
+        handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+        if (IS_ERR(handle)) {
+                ret = -ENOMEM;
+                mlog_errno(ret);
+                goto out;
+        }
+        ret = ocfs2_jbd2_file_inode(handle, inode);
+        if (ret < 0)
+                mlog_errno(ret);
+out:
+        if (ret) {
+                if (!IS_ERR(handle))
+                        ocfs2_commit_trans(osb, handle);
+                handle = ERR_PTR(ret);
+        }
+        return handle;
+}
 /* Some parts of this taken from generic_cont_expand, which turned out
 * to be too fragile to do exactly what we need without us having to
 * worry about recursive locking in ->write_begin() and ->write_end(). */
-static int ocfs2_write_zero_page(struct inode *inode,
+static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
-                                 u64 size)
+                                 u64 abs_to)
 {
        struct address_space *mapping = inode->i_mapping;
        struct page *page;
-        unsigned long index;
+        unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
-        unsigned int offset;
        handle_t *handle = NULL;
-        int ret;
+        int ret = 0;
+        unsigned zero_from, zero_to, block_start, block_end;
-        offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
+        BUG_ON(abs_from >= abs_to);
-        /* ugh.  in prepare/commit_write, if from==to==start of block, we
+        BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
-        ** skip the prepare.  make sure we never send an offset for the start
+        BUG_ON(abs_from & (inode->i_blkbits - 1));
-        ** of a block
-        */
-        if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
-                offset++;
-        }
-        index = size >> PAGE_CACHE_SHIFT;
        page = grab_cache_page(mapping, index);
        if (!page) {
@@ -754,31 +781,56 @@ static int ocfs2_write_zero_page(struct inode *inode,
                goto out;
        }
-        ret = ocfs2_prepare_write_nolock(inode, page, offset, offset);
+        /* Get the offsets within the page that we want to zero */
-        if (ret < 0) {
+        zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
-                mlog_errno(ret);
+        zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
-                goto out_unlock;
+        if (!zero_to)
-        }
+                zero_to = PAGE_CACHE_SIZE;
-        if (ocfs2_should_order_data(inode)) {
+        mlog(0,
-                handle = ocfs2_start_walk_page_trans(inode, page, offset,
+             "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n",
-                                                     offset);
+             (unsigned long long)abs_from, (unsigned long long)abs_to,
-                if (IS_ERR(handle)) {
+             index, zero_from, zero_to);
-                        ret = PTR_ERR(handle);
-                        handle = NULL;
+        /* We know that zero_from is block aligned */
+        for (block_start = zero_from; block_start < zero_to;
+             block_start = block_end) {
+                block_end = block_start + (1 << inode->i_blkbits);
+                /*
+                 * block_start is block-aligned.  Bump it by one to
+                 * force ocfs2_{prepare,commit}_write() to zero the
+                 * whole block.
+                 */
+                ret = ocfs2_prepare_write_nolock(inode, page,
+                                                 block_start + 1,
+                                                 block_start + 1);
+                if (ret < 0) {
+                        mlog_errno(ret);
                        goto out_unlock;
                }
-        }
-        /* must not update i_size! */
+                if (!handle) {
-        ret = block_commit_write(page, offset, offset);
+                        handle = ocfs2_zero_start_ordered_transaction(inode);
-        if (ret < 0)
+                        if (IS_ERR(handle)) {
-                mlog_errno(ret);
+                                ret = PTR_ERR(handle);
-        else
+                                handle = NULL;
-                ret = 0;
+                                break;
+                        }
+                }
+                /* must not update i_size! */
+                ret = block_commit_write(page, block_start + 1,
+                                         block_start + 1);
+                if (ret < 0)
+                        mlog_errno(ret);
+                else
+                        ret = 0;
+        }
        if (handle)
                ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
 out_unlock:
        unlock_page(page);
        page_cache_release(page);
@@ -786,22 +838,114 @@ out:
        return ret;
 }
-static int ocfs2_zero_extend(struct inode *inode,
+/*
-                             u64 zero_to_size)
+ * Find the next range to zero.  We do this in terms of bytes because
+ * that's what ocfs2_zero_extend() wants, and it is dealing with the
+ * pagecache.  We may return multiple extents.
+ *
+ * zero_start and zero_end are ocfs2_zero_extend()s current idea of what
+ * needs to be zeroed.  range_start and range_end return the next zeroing
+ * range.  A subsequent call should pass the previous range_end as its
+ * zero_start.  If range_end is 0, there's nothing to do.
+ *
+ * Unwritten extents are skipped over.  Refcounted extents are CoWd.
+ */
+static int ocfs2_zero_extend_get_range(struct inode *inode,
+                                       struct buffer_head *di_bh,
+                                       u64 zero_start, u64 zero_end,
+                                       u64 *range_start, u64 *range_end)
 {
-        int ret = 0;
+        int rc = 0, needs_cow = 0;
-        u64 start_off;
+        u32 p_cpos, zero_clusters = 0;
-        struct super_block *sb = inode->i_sb;
+        u32 zero_cpos =
+                zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
+        u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
+        unsigned int num_clusters = 0;
+        unsigned int ext_flags = 0;
-        start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
+        while (zero_cpos < last_cpos) {
-        while (start_off < zero_to_size) {
+                rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
-                ret = ocfs2_write_zero_page(inode, start_off);
+                                        &num_clusters, &ext_flags);
-                if (ret < 0) {
+                if (rc) {
-                        mlog_errno(ret);
+                        mlog_errno(rc);
+                        goto out;
+                }
+                if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
+                        zero_clusters = num_clusters;
+                        if (ext_flags & OCFS2_EXT_REFCOUNTED)
+                                needs_cow = 1;
+                        break;
+                }
+                zero_cpos += num_clusters;
+        }
+        if (!zero_clusters) {
+                *range_end = 0;
+                goto out;
+        }
+        while ((zero_cpos + zero_clusters) < last_cpos) {
+                rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
+                                        &p_cpos, &num_clusters,
+                                        &ext_flags);
+                if (rc) {
+                        mlog_errno(rc);
                        goto out;
                }
-                start_off += sb->s_blocksize;
+                if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN))
+                        break;
+                if (ext_flags & OCFS2_EXT_REFCOUNTED)
+                        needs_cow = 1;
+                zero_clusters += num_clusters;
+        }
+        if ((zero_cpos + zero_clusters) > last_cpos)
+                zero_clusters = last_cpos - zero_cpos;
+        if (needs_cow) {
+                rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
+                                        UINT_MAX);
+                if (rc) {
+                        mlog_errno(rc);
+                        goto out;
+                }
+        }
+        *range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
+        *range_end = ocfs2_clusters_to_bytes(inode->i_sb,
+                                             zero_cpos + zero_clusters);
+out:
+        return rc;
+}
+/*
+ * Zero one range returned from ocfs2_zero_extend_get_range().  The caller
+ * has made sure that the entire range needs zeroing.
+ */
+static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
+                                   u64 range_end)
+{
+        int rc = 0;
+        u64 next_pos;
+        u64 zero_pos = range_start;
+        mlog(0, "range_start = %llu, range_end = %llu\n",
+             (unsigned long long)range_start,
+             (unsigned long long)range_end);
+        BUG_ON(range_start >= range_end);
+        while (zero_pos < range_end) {
+                next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
+                if (next_pos > range_end)
+                        next_pos = range_end;
+                rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
+                if (rc < 0) {
+                        mlog_errno(rc);
+                        break;
+                }
+                zero_pos = next_pos;
                /*
                 * Very large extends have the potential to lock up
@@ -810,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode,
                cond_resched();
        }
-out:
+        return rc;
+}
+int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
+                      loff_t zero_to_size)
+{
+        int ret = 0;
+        u64 zero_start, range_start = 0, range_end = 0;
+        struct super_block *sb = inode->i_sb;
+        zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
+        mlog(0, "zero_start %llu for i_size %llu\n",
+             (unsigned long long)zero_start,
+             (unsigned long long)i_size_read(inode));
+        while (zero_start < zero_to_size) {
+                ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
+                                                  zero_to_size,
+                                                  &range_start,
+                                                  &range_end);
+                if (ret) {
+                        mlog_errno(ret);
+                        break;
+                }
+                if (!range_end)
+                        break;
+                /* Trim the ends */
+                if (range_start < zero_start)
+                        range_start = zero_start;
+                if (range_end > zero_to_size)
+                        range_end = zero_to_size;
+                ret = ocfs2_zero_extend_range(inode, range_start,
+                                              range_end);
+                if (ret) {
+                        mlog_errno(ret);
+                        break;
+                }
+                zero_start = range_end;
+        }
        return ret;
 }
-int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
+int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
+                          u64 new_i_size, u64 zero_to)
 {
        int ret;
        u32 clusters_to_add;
        struct ocfs2_inode_info *oi = OCFS2_I(inode);
+        /*
+         * Only quota files call this without a bh, and they can't be
+         * refcounted.
+         */
+        BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+        BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
        clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
        if (clusters_to_add < oi->ip_clusters)
                clusters_to_add = 0;
@@ -840,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
         * still need to zero the area between the old i_size and the
         * new i_size.
         */
-        ret = ocfs2_zero_extend(inode, zero_to);
+        ret = ocfs2_zero_extend(inode, di_bh, zero_to);
        if (ret < 0)
                mlog_errno(ret);
@@ -862,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode,
                goto out;
        if (i_size_read(inode) == new_i_size)
-                goto out;
+                goto out;
        BUG_ON(new_i_size < i_size_read(inode));
        /*
-         * Fall through for converting inline data, even if the fs
-         * supports sparse files.
-         *
-         * The check for inline data here is legal - nobody can add
-         * the feature since we have i_mutex. We must check it again
-         * after acquiring ip_alloc_sem though, as paths like mmap
-         * might have raced us to converting the inode to extents.
-         */
-        if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
-            && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
-                goto out_update_size;
-        /*
         * The alloc sem blocks people in read/write from reading our
         * allocation until we're done changing it. We depend on
         * i_mutex to block other extend/truncate calls while we're
-         * here.
+         * here.  We even have to hold it for sparse files because there
+         * might be some tail zeroing.
         */
        down_write(&oi->ip_alloc_sem);
@@ -899,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode,
                ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
                if (ret) {
                        up_write(&oi->ip_alloc_sem);
                        mlog_errno(ret);
                        goto out;
                }
        }
-        if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+        if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
-                ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size);
+                ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
+        else
+                ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
+                                            new_i_size);
        up_write(&oi->ip_alloc_sem);
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index d66cf4f7c70e..97bf761c9e7c 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -54,8 +54,10 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
 int ocfs2_simple_size_update(struct inode *inode,
                             struct buffer_head *di_bh,
                             u64 new_i_size);
-int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
+int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
-                          u64 zero_to);
+                          u64 new_i_size, u64 zero_to);
+int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
+                      loff_t zero_to);
 int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
 int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
                  struct kstat *stat);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 47878cf16418..625de9d7088c 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -472,7 +472,7 @@ static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger
        return container_of(triggers, struct ocfs2_triggers, ot_triggers);
 }
-static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
                                 struct buffer_head *bh,
                                 void *data, size_t size)
 {
@@ -491,7 +491,7 @@ static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
 * Quota blocks have their own trigger because the struct ocfs2_block_check
 * offset depends on the blocksize.
 */
-static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
                                 struct buffer_head *bh,
                                 void *data, size_t size)
 {
@@ -511,7 +511,7 @@ static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
 * Directory blocks also have their own trigger because the
 * struct ocfs2_block_check offset depends on the blocksize.
 */
-static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
                                 struct buffer_head *bh,
                                 void *data, size_t size)
 {
@@ -544,7 +544,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
 static struct ocfs2_triggers di_triggers = {
        .ot_triggers = {
-                .t_commit = ocfs2_commit_trigger,
+                .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_dinode, i_check),
@@ -552,7 +552,7 @@ static struct ocfs2_triggers di_triggers = {
 static struct ocfs2_triggers eb_triggers = {
        .ot_triggers = {
-                .t_commit = ocfs2_commit_trigger,
+                .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_extent_block, h_check),
@@ -560,7 +560,7 @@ static struct ocfs2_triggers eb_triggers = {
 static struct ocfs2_triggers rb_triggers = {
        .ot_triggers = {
-                .t_commit = ocfs2_commit_trigger,
+                .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_refcount_block, rf_check),
@@ -568,7 +568,7 @@ static struct ocfs2_triggers rb_triggers = {
 static struct ocfs2_triggers gd_triggers = {
        .ot_triggers = {
-                .t_commit = ocfs2_commit_trigger,
+                .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_group_desc, bg_check),
@@ -576,14 +576,14 @@ static struct ocfs2_triggers gd_triggers = {
 static struct ocfs2_triggers db_triggers = {
        .ot_triggers = {
-                .t_commit = ocfs2_db_commit_trigger,
+                .t_frozen = ocfs2_db_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
 };
 static struct ocfs2_triggers xb_triggers = {
        .ot_triggers = {
-                .t_commit = ocfs2_commit_trigger,
+                .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_xattr_block, xb_check),
@@ -591,14 +591,14 @@ static struct ocfs2_triggers xb_triggers = {
 static struct ocfs2_triggers dq_triggers = {
        .ot_triggers = {
-                .t_commit = ocfs2_dq_commit_trigger,
+                .t_frozen = ocfs2_dq_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
 };
 static struct ocfs2_triggers dr_triggers = {
        .ot_triggers = {
-                .t_commit = ocfs2_commit_trigger,
+                .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_dx_root_block, dr_check),
@@ -606,7 +606,7 @@ static struct ocfs2_triggers dr_triggers = {
 static struct ocfs2_triggers dl_triggers = {
        .ot_triggers = {
-                .t_commit = ocfs2_commit_trigger,
+                .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_dx_leaf, dl_check),
@@ -1936,7 +1936,7 @@ void ocfs2_orphan_scan_work(struct work_struct *work)
        mutex_lock(&os->os_lock);
        ocfs2_queue_orphan_scan(osb);
        if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
-                schedule_delayed_work(&os->os_orphan_scan_work,
+                queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
                                      ocfs2_orphan_scan_timeout());
        mutex_unlock(&os->os_lock);
 }
@@ -1976,8 +1976,8 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
                atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
        else {
                atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
-                schedule_delayed_work(&os->os_orphan_scan_work,
+                queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
-                                      ocfs2_orphan_scan_timeout());
+                                   ocfs2_orphan_scan_timeout());
        }
 }
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 3d7419682dc0..ec6adbf8f551 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -118,6 +118,7 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
 {
        unsigned int la_mb;
        unsigned int gd_mb;
+        unsigned int la_max_mb;
        unsigned int megs_per_slot;
        struct super_block *sb = osb->sb;
@@ -182,6 +183,12 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
        if (megs_per_slot < la_mb)
                la_mb = megs_per_slot;
+        /* We can't store more bits than we can in a block. */
+        la_max_mb = ocfs2_clusters_to_megabytes(osb->sb,
+                                                ocfs2_local_alloc_size(sb) * 8);
+        if (la_mb > la_max_mb)
+                la_mb = la_max_mb;
        return la_mb;
 }
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 2bb35fe00511..4607923eb24c 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -775,7 +775,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
                 * locking allocators ranks above a transaction start
                 */
                WARN_ON(journal_current_handle());
-                status = ocfs2_extend_no_holes(gqinode,
+                status = ocfs2_extend_no_holes(gqinode, NULL,
                        gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
                        gqinode->i_size);
                if (status < 0)
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 8bd70d4d184d..dc78764ccc4c 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -971,7 +971,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
        u64 p_blkno;
        /* We are protected by dqio_sem so no locking needed */
-        status = ocfs2_extend_no_holes(lqinode,
+        status = ocfs2_extend_no_holes(lqinode, NULL,
                                       lqinode->i_size + 2 * sb->s_blocksize,
                                       lqinode->i_size);
        if (status < 0) {
@@ -1114,7 +1114,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
                return ocfs2_local_quota_add_chunk(sb, type, offset);
        /* We are protected by dqio_sem so no locking needed */
-        status = ocfs2_extend_no_holes(lqinode,
+        status = ocfs2_extend_no_holes(lqinode, NULL,
                                       lqinode->i_size + sb->s_blocksize,
                                       lqinode->i_size);
        if (status < 0) {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 4793f36f6518..3ac5aa733e9c 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2931,6 +2931,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
        offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
        end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
+        /*
+         * We only duplicate pages until we reach the page contains i_size - 1.
+         * So trim 'end' to i_size.
+         */
+        if (end > i_size_read(context->inode))
+                end = i_size_read(context->inode);
        while (offset < end) {
                page_index = offset >> PAGE_CACHE_SHIFT;
@@ -4166,6 +4172,12 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
        struct inode *inode = old_dentry->d_inode;
        struct buffer_head *new_bh = NULL;
+        if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
+                ret = -EINVAL;
+                mlog_errno(ret);
+                goto out;
+        }
        ret = filemap_fdatawrite(inode->i_mapping);
        if (ret) {
                mlog_errno(ret);
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index 40650021fc24..d8b6e4259b80 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -26,7 +26,6 @@
 #include <linux/fs.h>
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/bitops.h>
 #include <linux/list.h>
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index f4c2a9eb8c4d..a8e6a95a353f 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -741,7 +741,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
                     le16_to_cpu(bg->bg_free_bits_count));
        le32_add_cpu(&cl->cl_recs[alloc_rec].c_total,
                     le16_to_cpu(bg->bg_bits));
-        cl->cl_recs[alloc_rec].c_blkno  = cpu_to_le64(bg->bg_blkno);
+        cl->cl_recs[alloc_rec].c_blkno = bg->bg_blkno;
        if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
                le16_add_cpu(&cl->cl_next_free_rec, 1);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index e97b34842cfe..d03469f61801 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -709,7 +709,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
                                         struct ocfs2_xattr_value_buf *vb,
                                         struct ocfs2_xattr_set_ctxt *ctxt)
 {
-        int status = 0;
+        int status = 0, credits;
        handle_t *handle = ctxt->handle;
        enum ocfs2_alloc_restarted why;
        u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
@@ -719,38 +719,54 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
        ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
-        status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
+        while (clusters_to_add) {
-                              OCFS2_JOURNAL_ACCESS_WRITE);
+                status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
-        if (status < 0) {
+                                       OCFS2_JOURNAL_ACCESS_WRITE);
-                mlog_errno(status);
+                if (status < 0) {
-                goto leave;
+                        mlog_errno(status);
-        }
+                        break;
+                }
-        prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
+                prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
-        status = ocfs2_add_clusters_in_btree(handle,
+                status = ocfs2_add_clusters_in_btree(handle,
-                                             &et,
+                                                     &et,
-                                             &logical_start,
+                                                     &logical_start,
-                                             clusters_to_add,
+                                                     clusters_to_add,
-                                             0,
+                                                     0,
-                                             ctxt->data_ac,
+                                                     ctxt->data_ac,
-                                             ctxt->meta_ac,
+                                                     ctxt->meta_ac,
-                                             &why);
+                                                     &why);
-        if (status < 0) {
+                if ((status < 0) && (status != -EAGAIN)) {
-                mlog_errno(status);
+                        if (status != -ENOSPC)
-                goto leave;
+                                mlog_errno(status);
-        }
+                        break;
+                }
-        ocfs2_journal_dirty(handle, vb->vb_bh);
+                ocfs2_journal_dirty(handle, vb->vb_bh);
-        clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
+                clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) -
+                                         prev_clusters;
-        /*
+                if (why != RESTART_NONE && clusters_to_add) {
-         * We should have already allocated enough space before the transaction,
+                        /*
-         * so no need to restart.
+                         * We can only fail in case the alloc file doesn't give
-         */
+                         * up enough clusters.
-        BUG_ON(why != RESTART_NONE || clusters_to_add);
+                         */
+                        BUG_ON(why == RESTART_META);
-leave:
+                        mlog(0, "restarting xattr value extension for %u"
+                             " clusters,.\n", clusters_to_add);
+                        credits = ocfs2_calc_extend_credits(inode->i_sb,
+                                                            &vb->vb_xv->xr_list,
+                                                            clusters_to_add);
+                        status = ocfs2_extend_trans(handle, credits);
+                        if (status < 0) {
+                                status = -ENOMEM;
+                                mlog_errno(status);
+                                break;
+                        }
+                }
+        }
        return status;
 }
@@ -6788,16 +6804,15 @@ out:
        return ret;
 }
-static int ocfs2_reflink_xattr_buckets(handle_t *handle,
+static int ocfs2_reflink_xattr_bucket(handle_t *handle,
                                u64 blkno, u64 new_blkno, u32 clusters,
+                                u32 *cpos, int num_buckets,
                                struct ocfs2_alloc_context *meta_ac,
                                struct ocfs2_alloc_context *data_ac,
                                struct ocfs2_reflink_xattr_tree_args *args)
 {
        int i, j, ret = 0;
        struct super_block *sb = args->reflink->old_inode->i_sb;
-        u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
-        u32 num_buckets = clusters * bpc;
        int bpb = args->old_bucket->bu_blocks;
        struct ocfs2_xattr_value_buf vb = {
                .vb_access = ocfs2_journal_access,
@@ -6816,14 +6831,6 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
                        break;
                }
-                /*
-                 * The real bucket num in this series of blocks is stored
-                 * in the 1st bucket.
-                 */
-                if (i == 0)
-                        num_buckets = le16_to_cpu(
-                                bucket_xh(args->old_bucket)->xh_num_buckets);
                ret = ocfs2_xattr_bucket_journal_access(handle,
                                                args->new_bucket,
                                                OCFS2_JOURNAL_ACCESS_CREATE);
@@ -6837,6 +6844,18 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
                               bucket_block(args->old_bucket, j),
                               sb->s_blocksize);
+                /*
+                 * Record the start cpos so that we can use it to initialize
+                 * our xattr tree we also set the xh_num_bucket for the new
+                 * bucket.
+                 */
+                if (i == 0) {
+                        *cpos = le32_to_cpu(bucket_xh(args->new_bucket)->
+                                            xh_entries[0].xe_name_hash);
+                        bucket_xh(args->new_bucket)->xh_num_buckets =
+                                cpu_to_le16(num_buckets);
+                }
                ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
                ret = ocfs2_reflink_xattr_header(handle, args->reflink,
@@ -6866,6 +6885,7 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
                }
                ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
                ocfs2_xattr_bucket_relse(args->old_bucket);
                ocfs2_xattr_bucket_relse(args->new_bucket);
        }
@@ -6874,6 +6894,75 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
        ocfs2_xattr_bucket_relse(args->new_bucket);
        return ret;
 }
+static int ocfs2_reflink_xattr_buckets(handle_t *handle,
+                                struct inode *inode,
+                                struct ocfs2_reflink_xattr_tree_args *args,
+                                struct ocfs2_extent_tree *et,
+                                struct ocfs2_alloc_context *meta_ac,
+                                struct ocfs2_alloc_context *data_ac,
+                                u64 blkno, u32 cpos, u32 len)
+{
+        int ret, first_inserted = 0;
+        u32 p_cluster, num_clusters, reflink_cpos = 0;
+        u64 new_blkno;
+        unsigned int num_buckets, reflink_buckets;
+        unsigned int bpc =
+                ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
+        ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
+        if (ret) {
+                mlog_errno(ret);
+                goto out;
+        }
+        num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets);
+        ocfs2_xattr_bucket_relse(args->old_bucket);
+        while (len && num_buckets) {
+                ret = ocfs2_claim_clusters(handle, data_ac,
+                                           1, &p_cluster, &num_clusters);
+                if (ret) {
+                        mlog_errno(ret);
+                        goto out;
+                }
+                new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
+                reflink_buckets = min(num_buckets, bpc * num_clusters);
+                ret = ocfs2_reflink_xattr_bucket(handle, blkno,
+                                                 new_blkno, num_clusters,
+                                                 &reflink_cpos, reflink_buckets,
+                                                 meta_ac, data_ac, args);
+                if (ret) {
+                        mlog_errno(ret);
+                        goto out;
+                }
+                /*
+                 * For the 1st allocated cluster, we make it use the same cpos
+                 * so that the xattr tree looks the same as the original one
+                 * in the most case.
+                 */
+                if (!first_inserted) {
+                        reflink_cpos = cpos;
+                        first_inserted = 1;
+                }
+                ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno,
+                                          num_clusters, 0, meta_ac);
+                if (ret)
+                        mlog_errno(ret);
+                mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
+                     (unsigned long long)new_blkno, num_clusters, reflink_cpos);
+                len -= num_clusters;
+                blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
+                num_buckets -= reflink_buckets;
+        }
+out:
+        return ret;
+}
 /*
 * Create the same xattr extent record in the new inode's xattr tree.
 */
@@ -6885,8 +6974,6 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
                                   void *para)
 {
        int ret, credits = 0;
-        u32 p_cluster, num_clusters;
-        u64 new_blkno;
        handle_t *handle;
        struct ocfs2_reflink_xattr_tree_args *args =
                        (struct ocfs2_reflink_xattr_tree_args *)para;
@@ -6895,6 +6982,9 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
        struct ocfs2_alloc_context *data_ac = NULL;
        struct ocfs2_extent_tree et;
+        mlog(0, "reflink xattr buckets %llu len %u\n",
+             (unsigned long long)blkno, len);
        ocfs2_init_xattr_tree_extent_tree(&et,
                                          INODE_CACHE(args->reflink->new_inode),
                                          args->new_blk_bh);
@@ -6914,32 +7004,12 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
                goto out;
        }
-        ret = ocfs2_claim_clusters(handle, data_ac,
+        ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et,
-                                   len, &p_cluster, &num_clusters);
+                                          meta_ac, data_ac,
-        if (ret) {
+                                          blkno, cpos, len);
-                mlog_errno(ret);
-                goto out_commit;
-        }
-        new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
-        mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
-             (unsigned long long)blkno, (unsigned long long)new_blkno, len);
-        ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
-                                          meta_ac, data_ac, args);
-        if (ret) {
-                mlog_errno(ret);
-                goto out_commit;
-        }
-        mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
-             (unsigned long long)new_blkno, len, cpos);
-        ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
-                                  len, 0, meta_ac);
        if (ret)
                mlog_errno(ret);
-out_commit:
        ocfs2_commit_trans(osb, handle);
 out:
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 3e73de5967ff..fc8497643fd0 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -74,6 +74,7 @@ int ibm_partition(struct parsed_partitions *state)
        } *label;
        unsigned char *data;
        Sector sect;
+        sector_t labelsect;
        res = 0;
        blocksize = bdev_logical_block_size(bdev);
@@ -98,10 +99,19 @@ int ibm_partition(struct parsed_partitions *state)
                goto out_freeall;
        /*
+         * Special case for FBA disks: label sector does not depend on
+         * blocksize.
+         */
+        if ((info->cu_type == 0x6310 && info->dev_type == 0x9336) ||
+            (info->cu_type == 0x3880 && info->dev_type == 0x3370))
+                labelsect = info->label_block;
+        else
+                labelsect = info->label_block * (blocksize >> 9);
+        /*
         * Get volume label, extract name and type.
         */
-        data = read_part_sector(state, info->label_block*(blocksize/512),
+        data = read_part_sector(state, labelsect, &sect);
-                                &sect);
        if (data == NULL)
                goto out_readerr;
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index ce94801f48ca..d9396a4fc7ff 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -209,6 +209,9 @@ void proc_device_tree_add_node(struct device_node *np,
        for (pp = np->properties; pp != NULL; pp = pp->next) {
                p = pp->name;
+                if (strchr(p, '/'))
+                        continue;
                if (duplicate_name(de, p))
                        p = fixup_name(np, de, p);
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 46d4b5d72bd3..cb6306e63843 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -122,11 +122,20 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
        return size;
 }
+static void pad_len_spaces(struct seq_file *m, int len)
+{
+        len = 25 + sizeof(void*) * 6 - len;
+        if (len < 1)
+                len = 1;
+        seq_printf(m, "%*c", len, ' ');
+}
 /*
 * display a single VMA to a sequenced file
 */
 static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
 {
+        struct mm_struct *mm = vma->vm_mm;
        unsigned long ino = 0;
        struct file *file;
        dev_t dev = 0;
@@ -155,11 +164,14 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
                   MAJOR(dev), MINOR(dev), ino, &len);
        if (file) {
-                len = 25 + sizeof(void *) * 6 - len;
+                pad_len_spaces(m, len);
-                if (len < 1)
-                        len = 1;
-                seq_printf(m, "%*c", len, ' ');
                seq_path(m, &file->f_path, "");
+        } else if (mm) {
+                if (vma->vm_start <= mm->start_stack &&
+                        vma->vm_end >= mm->start_stack) {
+                        pad_len_spaces(m, len);
+                        seq_puts(m, "[stack]");
+                }
        }
        seq_putc(m, '\n');
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 12c233da1b6b..437d2ca2de97 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -676,7 +676,7 @@ static void prune_dqcache(int count)
 * This is called from kswapd when we think we need some
 * more memory
 */
-static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
+static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
        if (nr) {
                spin_lock(&dq_list_lock);
diff --git a/fs/splice.c b/fs/splice.c
index 740e6b9faf7a..efdbfece9932 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1282,7 +1282,8 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
 {
        struct file *file = sd->u.file;
-        return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags);
+        return do_splice_from(pipe, file, &file->f_pos, sd->total_len,
+                              sd->flags);
 }
 /**
@@ -1371,8 +1372,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
                if (off_in)
                        return -ESPIPE;
                if (off_out) {
-                        if (!out->f_op || !out->f_op->llseek ||
+                        if (!(out->f_mode & FMODE_PWRITE))
-                            out->f_op->llseek == no_llseek)
                                return -EINVAL;
                        if (copy_from_user(&offset, off_out, sizeof(loff_t)))
                                return -EFAULT;
@@ -1392,8 +1392,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
                if (off_out)
                        return -ESPIPE;
                if (off_in) {
-                        if (!in->f_op || !in->f_op->llseek ||
+                        if (!(in->f_mode & FMODE_PREAD))
-                            in->f_op->llseek == no_llseek)
                                return -EINVAL;
                        if (copy_from_user(&offset, off_in, sizeof(loff_t)))
                                return -EFAULT;
diff --git a/fs/super.c b/fs/super.c
index 5c35bc7a499e..938119ab8dcb 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -374,6 +374,8 @@ void sync_supers(void)
                        up_read(&sb->s_umount);
                        spin_lock(&sb_lock);
+                        /* lock was dropped, must reset next */
+                        list_safe_reset_next(sb, n, s_list);
                        __put_super(sb);
                }
        }
@@ -405,6 +407,8 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
                up_read(&sb->s_umount);
                spin_lock(&sb_lock);
+                /* lock was dropped, must reset next */
+                list_safe_reset_next(sb, n, s_list);
                __put_super(sb);
        }
        spin_unlock(&sb_lock);
@@ -585,6 +589,8 @@ static void do_emergency_remount(struct work_struct *work)
                }
                up_write(&sb->s_umount);
                spin_lock(&sb_lock);
+                /* lock was dropped, must reset next */
+                list_safe_reset_next(sb, n, s_list);
                __put_super(sb);
        }
        spin_unlock(&sb_lock);
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index bbd69bdb0fa8..fcc498ec9b33 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -25,6 +25,7 @@
 #include <linux/stat.h>
 #include <linux/string.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
 #include "sysv.h"
 /* We don't trust the value of
@@ -139,6 +140,9 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
        struct inode *inode;
        sysv_ino_t ino;
        unsigned count;
+        struct writeback_control wbc = {
+                .sync_mode = WB_SYNC_NONE
+        };
        inode = new_inode(sb);
        if (!inode)
@@ -168,7 +172,7 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
        insert_inode_hash(inode);
        mark_inode_dirty(inode);
-        sysv_write_inode(inode, 0);     /* ensure inode not allocated again */
+        sysv_write_inode(inode, &wbc);  /* ensure inode not allocated again */
        mark_inode_dirty(inode);        /* cleared by sysv_write_inode() */
        /* That's it. */
        unlock_super(sb);
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 076ca50e9933..c8ff0d1ae5d3 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -62,7 +62,9 @@
 */
 static void shrink_liability(struct ubifs_info *c, int nr_to_write)
 {
+        down_read(&c->vfs_sb->s_umount);
        writeback_inodes_sb(c->vfs_sb);
+        up_read(&c->vfs_sb->s_umount);
 }
 /**
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index 02feb59cefca..0b201114a5ad 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -277,7 +277,7 @@ static int kick_a_thread(void)
        return 0;
 }
-int ubifs_shrinker(int nr, gfp_t gfp_mask)
+int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
        int freed, contention = 0;
        long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 2eef553d50c8..04310878f449 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1575,7 +1575,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
 int ubifs_tnc_end_commit(struct ubifs_info *c);
 /* shrinker.c */
-int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask);
+int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
 /* commit.c */
 int ubifs_bg_thread(void *info);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 649ade8ef598..2ee3f7a60163 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -45,7 +45,7 @@
 static kmem_zone_t *xfs_buf_zone;
 STATIC int xfsbufd(void *);
-STATIC int xfsbufd_wakeup(int, gfp_t);
+STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t);
 STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
 static struct shrinker xfs_buf_shake = {
        .shrink = xfsbufd_wakeup,
@@ -340,7 +340,7 @@ _xfs_buf_lookup_pages(
                                        __func__, gfp_mask);
                        XFS_STATS_INC(xb_page_retries);
-                        xfsbufd_wakeup(0, gfp_mask);
+                        xfsbufd_wakeup(NULL, 0, gfp_mask);
                        congestion_wait(BLK_RW_ASYNC, HZ/50);
                        goto retry;
                }
@@ -1762,6 +1762,7 @@ xfs_buf_runall_queues(
 STATIC int
 xfsbufd_wakeup(
+        struct shrinker         *shrink,
        int                     priority,
        gfp_t                   mask)
 {
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 846b75aeb2ab..e7839ee49e43 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -128,13 +128,12 @@ xfs_nfs_get_inode(
                return ERR_PTR(-ESTALE);
        /*
-         * The XFS_IGET_BULKSTAT means that an invalid inode number is just
+         * The XFS_IGET_UNTRUSTED means that an invalid inode number is just
-         * fine and not an indication of a corrupted filesystem.  Because
+         * fine and not an indication of a corrupted filesystem as clients can
-         * clients can send any kind of invalid file handle, e.g. after
+         * send invalid file handles and we have to handle it gracefully..
-         * a restore on the server we have to deal with this case gracefully.
         */
-        error = xfs_iget(mp, NULL, ino, XFS_IGET_BULKSTAT,
+        error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED,
-                         XFS_ILOCK_SHARED, &ip, 0);
+                         XFS_ILOCK_SHARED, &ip);
        if (error) {
                /*
                 * EINVAL means the inode cluster doesn't exist anymore.
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 699b60cbab9c..e59a81062830 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -679,10 +679,9 @@ xfs_ioc_bulkstat(
                error = xfs_bulkstat_single(mp, &inlast,
                                                bulkreq.ubuffer, &done);
        else    /* XFS_IOC_FSBULKSTAT */
-                error = xfs_bulkstat(mp, &inlast, &count,
+                error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
-                        (bulkstat_one_pf)xfs_bulkstat_one, NULL,
+                                     sizeof(xfs_bstat_t), bulkreq.ubuffer,
-                        sizeof(xfs_bstat_t), bulkreq.ubuffer,
+                                     &done);
-                        BULKSTAT_FG_QUICK, &done);
        if (error)
                return -error;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 9287135e9bfc..52ed49e6465c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -237,15 +237,12 @@ xfs_bulkstat_one_compat(
        xfs_ino_t       ino,            /* inode number to get data for */
        void            __user *buffer, /* buffer to place output in */
        int             ubsize,         /* size of buffer */
-        void            *private_data,  /* my private data */
-        xfs_daddr_t     bno,            /* starting bno of inode cluster */
        int             *ubused,        /* bytes used by me */
-        void            *dibuff,        /* on-disk inode buffer */
        int             *stat)          /* BULKSTAT_RV_... */
 {
        return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
-                                    xfs_bulkstat_one_fmt_compat, bno,
+                                    xfs_bulkstat_one_fmt_compat,
-                                    ubused, dibuff, stat);
+                                    ubused, stat);
 }
 /* copied from xfs_ioctl.c */
@@ -298,13 +295,11 @@ xfs_compat_ioc_bulkstat(
                int res;
                error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
-                                sizeof(compat_xfs_bstat_t),
+                                sizeof(compat_xfs_bstat_t), 0, &res);
-                                NULL, 0, NULL, NULL, &res);
        } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
                error = xfs_bulkstat(mp, &inlast, &count,
-                        xfs_bulkstat_one_compat, NULL,
+                        xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
-                        sizeof(compat_xfs_bstat_t), bulkreq.ubuffer,
+                        bulkreq.ubuffer, &done);
-                        BULKSTAT_FG_QUICK, &done);
        } else
                error = XFS_ERROR(EINVAL);
        if (error)
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index f2d1718c9165..80938c736c27 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1883,7 +1883,6 @@ init_xfs_fs(void)
                goto out_cleanup_procfs;
        vfs_initquota();
-        xfs_inode_shrinker_init();
        error = register_filesystem(&xfs_fs_type);
        if (error)
@@ -1911,7 +1910,6 @@ exit_xfs_fs(void)
 {
        vfs_exitquota();
        unregister_filesystem(&xfs_fs_type);
-        xfs_inode_shrinker_destroy();
        xfs_sysctl_unregister();
        xfs_cleanup_procfs();
        xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index ef7f0218bccb..a51a07c3a70c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -144,6 +144,41 @@ restart:
        return last_error;
 }
+/*
+ * Select the next per-ag structure to iterate during the walk. The reclaim
+ * walk is optimised only to walk AGs with reclaimable inodes in them.
+ */
+static struct xfs_perag *
+xfs_inode_ag_iter_next_pag(
+        struct xfs_mount        *mp,
+        xfs_agnumber_t          *first,
+        int                     tag)
+{
+        struct xfs_perag        *pag = NULL;
+        if (tag == XFS_ICI_RECLAIM_TAG) {
+                int found;
+                int ref;
+                spin_lock(&mp->m_perag_lock);
+                found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
+                                (void **)&pag, *first, 1, tag);
+                if (found <= 0) {
+                        spin_unlock(&mp->m_perag_lock);
+                        return NULL;
+                }
+                *first = pag->pag_agno + 1;
+                /* open coded pag reference increment */
+                ref = atomic_inc_return(&pag->pag_ref);
+                spin_unlock(&mp->m_perag_lock);
+                trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
+        } else {
+                pag = xfs_perag_get(mp, *first);
+                (*first)++;
+        }
+        return pag;
+}
 int
 xfs_inode_ag_iterator(
        struct xfs_mount        *mp,
@@ -154,16 +189,15 @@ xfs_inode_ag_iterator(
        int                     exclusive,
        int                     *nr_to_scan)
 {
+        struct xfs_perag        *pag;
        int                     error = 0;
        int                     last_error = 0;
        xfs_agnumber_t          ag;
        int                     nr;
        nr = nr_to_scan ? *nr_to_scan : INT_MAX;
-        for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+        ag = 0;
-                struct xfs_perag        *pag;
+        while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) {
-                pag = xfs_perag_get(mp, ag);
                error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
                                                exclusive, &nr);
                xfs_perag_put(pag);
@@ -640,6 +674,17 @@ __xfs_inode_set_reclaim_tag(
        radix_tree_tag_set(&pag->pag_ici_root,
                           XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
                           XFS_ICI_RECLAIM_TAG);
+        if (!pag->pag_ici_reclaimable) {
+                /* propagate the reclaim tag up into the perag radix tree */
+                spin_lock(&ip->i_mount->m_perag_lock);
+                radix_tree_tag_set(&ip->i_mount->m_perag_tree,
+                                XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+                                XFS_ICI_RECLAIM_TAG);
+                spin_unlock(&ip->i_mount->m_perag_lock);
+                trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
+                                                        -1, _RET_IP_);
+        }
        pag->pag_ici_reclaimable++;
 }
@@ -674,6 +719,16 @@ __xfs_inode_clear_reclaim_tag(
        radix_tree_tag_clear(&pag->pag_ici_root,
                        XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
        pag->pag_ici_reclaimable--;
+        if (!pag->pag_ici_reclaimable) {
+                /* clear the reclaim tag from the perag radix tree */
+                spin_lock(&ip->i_mount->m_perag_lock);
+                radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
+                                XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+                                XFS_ICI_RECLAIM_TAG);
+                spin_unlock(&ip->i_mount->m_perag_lock);
+                trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
+                                                        -1, _RET_IP_);
+        }
 }
 /*
@@ -828,83 +883,52 @@ xfs_reclaim_inodes(
 /*
 * Shrinker infrastructure.
- *
- * This is all far more complex than it needs to be. It adds a global list of
- * mounts because the shrinkers can only call a global context. We need to make
- * the shrinkers pass a context to avoid the need for global state.
 */
-static LIST_HEAD(xfs_mount_list);
-static struct rw_semaphore xfs_mount_list_lock;
 static int
 xfs_reclaim_inode_shrink(
+        struct shrinker *shrink,
        int             nr_to_scan,
        gfp_t           gfp_mask)
 {
        struct xfs_mount *mp;
        struct xfs_perag *pag;
        xfs_agnumber_t  ag;
-        int             reclaimable = 0;
+        int             reclaimable;
+        mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
        if (nr_to_scan) {
                if (!(gfp_mask & __GFP_FS))
                        return -1;
-                down_read(&xfs_mount_list_lock);
+                xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
-                list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
-                        xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
                                        XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
-                        if (nr_to_scan <= 0)
+                /* if we don't exhaust the scan, don't bother coming back */
-                                break;
+                if (nr_to_scan > 0)
-                }
+                        return -1;
-                up_read(&xfs_mount_list_lock);
+       }
-        }
-        down_read(&xfs_mount_list_lock);
+        reclaimable = 0;
-        list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
+        ag = 0;
-                for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+        while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag,
-                        pag = xfs_perag_get(mp, ag);
+                                        XFS_ICI_RECLAIM_TAG))) {
-                        reclaimable += pag->pag_ici_reclaimable;
+                reclaimable += pag->pag_ici_reclaimable;
-                        xfs_perag_put(pag);
+                xfs_perag_put(pag);
-                }
        }
-        up_read(&xfs_mount_list_lock);
        return reclaimable;
 }
-static struct shrinker xfs_inode_shrinker = {
-        .shrink = xfs_reclaim_inode_shrink,
-        .seeks = DEFAULT_SEEKS,
-};
-void __init
-xfs_inode_shrinker_init(void)
-{
-        init_rwsem(&xfs_mount_list_lock);
-        register_shrinker(&xfs_inode_shrinker);
-}
-void
-xfs_inode_shrinker_destroy(void)
-{
-        ASSERT(list_empty(&xfs_mount_list));
-        unregister_shrinker(&xfs_inode_shrinker);
-}
 void
 xfs_inode_shrinker_register(
        struct xfs_mount        *mp)
 {
-        down_write(&xfs_mount_list_lock);
+        mp->m_inode_shrink.shrink = xfs_reclaim_inode_shrink;
-        list_add_tail(&mp->m_mplist, &xfs_mount_list);
+        mp->m_inode_shrink.seeks = DEFAULT_SEEKS;
-        up_write(&xfs_mount_list_lock);
+        register_shrinker(&mp->m_inode_shrink);
 }
 void
 xfs_inode_shrinker_unregister(
        struct xfs_mount        *mp)
 {
-        down_write(&xfs_mount_list_lock);
+        unregister_shrinker(&mp->m_inode_shrink);
-        list_del(&mp->m_mplist);
-        up_write(&xfs_mount_list_lock);
 }
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index cdcbaaca9880..e28139aaa4aa 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -55,8 +55,6 @@ int xfs_inode_ag_iterator(struct xfs_mount *mp,
        int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
        int flags, int tag, int write_lock, int *nr_to_scan);
-void xfs_inode_shrinker_init(void);
-void xfs_inode_shrinker_destroy(void);
 void xfs_inode_shrinker_register(struct xfs_mount *mp);
 void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 73d5aa117384..302820690904 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -124,7 +124,10 @@ DEFINE_EVENT(xfs_perag_class, name,	\
                 unsigned long caller_ip),                                      \
        TP_ARGS(mp, agno, refcount, caller_ip))
 DEFINE_PERAG_REF_EVENT(xfs_perag_get);
+DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim);
 DEFINE_PERAG_REF_EVENT(xfs_perag_put);
+DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
 TRACE_EVENT(xfs_attr_list_node_descend,
        TP_PROTO(struct xfs_attr_list_context *ctx,
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 2d8b7bc792c9..67c018392d62 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -69,7 +69,7 @@ STATIC void	xfs_qm_list_destroy(xfs_dqlist_t *);
 STATIC int      xfs_qm_init_quotainos(xfs_mount_t *);
 STATIC int      xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int      xfs_qm_shake(int, gfp_t);
+STATIC int      xfs_qm_shake(struct shrinker *, int, gfp_t);
 static struct shrinker xfs_qm_shaker = {
        .shrink = xfs_qm_shake,
@@ -1632,10 +1632,7 @@ xfs_qm_dqusage_adjust(
        xfs_ino_t       ino,            /* inode number to get data for */
        void            __user *buffer, /* not used */
        int             ubsize,         /* not used */
-        void            *private_data,  /* not used */
-        xfs_daddr_t     bno,            /* starting block of inode cluster */
        int             *ubused,        /* not used */
-        void            *dip,           /* on-disk inode pointer (not used) */
        int             *res)           /* result code value */
 {
        xfs_inode_t     *ip;
@@ -1660,7 +1657,7 @@ xfs_qm_dqusage_adjust(
         * the case in all other instances. It's OK that we do this because
         * quotacheck is done only at mount time.
         */
-        if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) {
+        if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip))) {
                *res = BULKSTAT_RV_NOTHING;
                return error;
        }
@@ -1796,12 +1793,13 @@ xfs_qm_quotacheck(
                 * Iterate thru all the inodes in the file system,
                 * adjusting the corresponding dquot counters in core.
                 */
-                if ((error = xfs_bulkstat(mp, &lastino, &count,
+                error = xfs_bulkstat(mp, &lastino, &count,
-                                     xfs_qm_dqusage_adjust, NULL,
+                                     xfs_qm_dqusage_adjust,
-                                     structsz, NULL, BULKSTAT_FG_IGET, &done)))
+                                     structsz, NULL, &done);
+                if (error)
                        break;
-        } while (! done);
+        } while (!done);
        /*
         * We've made all the changes that we need to make incore.
@@ -1889,14 +1887,14 @@ xfs_qm_init_quotainos(
                    mp->m_sb.sb_uquotino != NULLFSINO) {
                        ASSERT(mp->m_sb.sb_uquotino > 0);
                        if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
-                                             0, 0, &uip, 0)))
+                                             0, 0, &uip)))
                                return XFS_ERROR(error);
                }
                if (XFS_IS_OQUOTA_ON(mp) &&
                    mp->m_sb.sb_gquotino != NULLFSINO) {
                        ASSERT(mp->m_sb.sb_gquotino > 0);
                        if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
-                                             0, 0, &gip, 0))) {
+                                             0, 0, &gip))) {
                                if (uip)
                                        IRELE(uip);
                                return XFS_ERROR(error);
@@ -2119,7 +2117,10 @@ xfs_qm_shake_freelist(
 */
 /* ARGSUSED */
 STATIC int
-xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
+xfs_qm_shake(
+        struct shrinker *shrink,
+        int             nr_to_scan,
+        gfp_t           gfp_mask)
 {
        int     ndqused, nfree, n;
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 92b002f1805f..b4487764e923 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -262,7 +262,7 @@ xfs_qm_scall_trunc_qfiles(
        }
        if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) {
-                error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0);
+                error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip);
                if (!error) {
                        error = xfs_truncate_file(mp, qip);
                        IRELE(qip);
@@ -271,7 +271,7 @@ xfs_qm_scall_trunc_qfiles(
        if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) &&
            mp->m_sb.sb_gquotino != NULLFSINO) {
-                error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0);
+                error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip);
                if (!error2) {
                        error2 = xfs_truncate_file(mp, qip);
                        IRELE(qip);
@@ -417,12 +417,12 @@ xfs_qm_scall_getqstat(
        }
        if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
                if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
-                                        0, 0, &uip, 0) == 0)
+                                        0, 0, &uip) == 0)
                        tempuqip = B_TRUE;
        }
        if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
                if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
-                                        0, 0, &gip, 0) == 0)
+                                        0, 0, &gip) == 0)
                        tempgqip = B_TRUE;
        }
        if (uip) {
@@ -1109,10 +1109,7 @@ xfs_qm_internalqcheck_adjust(
        xfs_ino_t       ino,            /* inode number to get data for */
        void            __user *buffer, /* not used */
        int             ubsize,         /* not used */
-        void            *private_data,  /* not used */
-        xfs_daddr_t     bno,            /* starting block of inode cluster */
        int             *ubused,        /* not used */
-        void            *dip,           /* not used */
        int             *res)           /* bulkstat result code */
 {
        xfs_inode_t             *ip;
@@ -1134,7 +1131,7 @@ xfs_qm_internalqcheck_adjust(
        ipreleased = B_FALSE;
 again:
        lock_flags = XFS_ILOCK_SHARED;
-        if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip, bno))) {
+        if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip))) {
                *res = BULKSTAT_RV_NOTHING;
                return (error);
        }
@@ -1205,15 +1202,15 @@ xfs_qm_internalqcheck(
                 * Iterate thru all the inodes in the file system,
                 * adjusting the corresponding dquot counters
                 */
-                if ((error = xfs_bulkstat(mp, &lastino, &count,
+                error = xfs_bulkstat(mp, &lastino, &count,
-                                 xfs_qm_internalqcheck_adjust, NULL,
+                                 xfs_qm_internalqcheck_adjust,
-                                 0, NULL, BULKSTAT_FG_IGET, &done))) {
+                                 0, NULL, &done);
+                if (error) {
+                        cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error);
                        break;
                }
-        } while (! done);
+        } while (!done);
-        if (error) {
-                cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error);
-        }
        cmn_err(CE_DEBUG, "Checking results against system dquots");
        for (i = 0; i < qmtest_hashmask; i++) {
                xfs_dqtest_t    *d, *n;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 5bba29a07812..7f159d2a429a 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -69,7 +69,9 @@ xfs_swapext(
                goto out;
        }
-        if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) {
+        if (!(file->f_mode & FMODE_WRITE) ||
+            !(file->f_mode & FMODE_READ) ||
+            (file->f_flags & O_APPEND)) {
                error = XFS_ERROR(EBADF);
                goto out_put_file;
        }
@@ -81,6 +83,7 @@ xfs_swapext(
        }
        if (!(tmp_file->f_mode & FMODE_WRITE) ||
+            !(tmp_file->f_mode & FMODE_READ) ||
            (tmp_file->f_flags & O_APPEND)) {
                error = XFS_ERROR(EBADF);
                goto out_put_tmp_file;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 9d884c127bb9..c7142a064c48 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1203,6 +1203,63 @@ error0:
        return error;
 }
+STATIC int
+xfs_imap_lookup(
+        struct xfs_mount        *mp,
+        struct xfs_trans        *tp,
+        xfs_agnumber_t          agno,
+        xfs_agino_t             agino,
+        xfs_agblock_t           agbno,
+        xfs_agblock_t           *chunk_agbno,
+        xfs_agblock_t           *offset_agbno,
+        int                     flags)
+{
+        struct xfs_inobt_rec_incore rec;
+        struct xfs_btree_cur    *cur;
+        struct xfs_buf          *agbp;
+        xfs_agino_t             startino;
+        int                     error;
+        int                     i;
+        error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+        if (error) {
+                xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
+                                "xfs_ialloc_read_agi() returned "
+                                "error %d, agno %d",
+                                error, agno);
+                return error;
+        }
+        /*
+         * derive and lookup the exact inode record for the given agino. If the
+         * record cannot be found, then it's an invalid inode number and we
+         * should abort.
+         */
+        cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
+        startino = agino & ~(XFS_IALLOC_INODES(mp) - 1);
+        error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i);
+        if (!error) {
+                if (i)
+                        error = xfs_inobt_get_rec(cur, &rec, &i);
+                if (!error && i == 0)
+                        error = EINVAL;
+        }
+        xfs_trans_brelse(tp, agbp);
+        xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+        if (error)
+                return error;
+        /* for untrusted inodes check it is allocated first */
+        if ((flags & XFS_IGET_UNTRUSTED) &&
+            (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
+                return EINVAL;
+        *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino);
+        *offset_agbno = agbno - *chunk_agbno;
+        return 0;
+}
 /*
 * Return the location of the inode in imap, for mapping it into a buffer.
 */
@@ -1235,8 +1292,11 @@ xfs_imap(
        if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
            ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
 #ifdef DEBUG
-                /* no diagnostics for bulkstat, ino comes from userspace */
+                /*
-                if (flags & XFS_IGET_BULKSTAT)
+                 * Don't output diagnostic information for untrusted inodes
+                 * as they can be invalid without implying corruption.
+                 */
+                if (flags & XFS_IGET_UNTRUSTED)
                        return XFS_ERROR(EINVAL);
                if (agno >= mp->m_sb.sb_agcount) {
                        xfs_fs_cmn_err(CE_ALERT, mp,
@@ -1263,6 +1323,23 @@ xfs_imap(
                return XFS_ERROR(EINVAL);
        }
+        blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
+        /*
+         * For bulkstat and handle lookups, we have an untrusted inode number
+         * that we have to verify is valid. We cannot do this just by reading
+         * the inode buffer as it may have been unlinked and removed leaving
+         * inodes in stale state on disk. Hence we have to do a btree lookup
+         * in all cases where an untrusted inode number is passed.
+         */
+        if (flags & XFS_IGET_UNTRUSTED) {
+                error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
+                                        &chunk_agbno, &offset_agbno, flags);
+                if (error)
+                        return error;
+                goto out_map;
+        }
        /*
         * If the inode cluster size is the same as the blocksize or
         * smaller we get to the buffer by simple arithmetics.
@@ -1277,24 +1354,6 @@ xfs_imap(
                return 0;
        }
-        blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
-        /*
-         * If we get a block number passed from bulkstat we can use it to
-         * find the buffer easily.
-         */
-        if (imap->im_blkno) {
-                offset = XFS_INO_TO_OFFSET(mp, ino);
-                ASSERT(offset < mp->m_sb.sb_inopblock);
-                cluster_agbno = xfs_daddr_to_agbno(mp, imap->im_blkno);
-                offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock;
-                imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
-                imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
-                return 0;
-        }
        /*
         * If the inode chunks are aligned then use simple maths to
         * find the location. Otherwise we have to do a btree
@@ -1304,50 +1363,13 @@ xfs_imap(
                offset_agbno = agbno & mp->m_inoalign_mask;
                chunk_agbno = agbno - offset_agbno;
        } else {
-                xfs_btree_cur_t *cur;   /* inode btree cursor */
+                error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
-                xfs_inobt_rec_incore_t chunk_rec;
+                                        &chunk_agbno, &offset_agbno, flags);
-                xfs_buf_t       *agbp;  /* agi buffer */
-                int             i;      /* temp state */
-                error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
-                if (error) {
-                        xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
-                                        "xfs_ialloc_read_agi() returned "
-                                        "error %d, agno %d",
-                                        error, agno);
-                        return error;
-                }
-                cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
-                error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
-                if (error) {
-                        xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
-                                        "xfs_inobt_lookup() failed");
-                        goto error0;
-                }
-                error = xfs_inobt_get_rec(cur, &chunk_rec, &i);
-                if (error) {
-                        xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
-                                        "xfs_inobt_get_rec() failed");
-                        goto error0;
-                }
-                if (i == 0) {
-#ifdef DEBUG
-                        xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
-                                        "xfs_inobt_get_rec() failed");
-#endif /* DEBUG */
-                        error = XFS_ERROR(EINVAL);
-                }
- error0:
-                xfs_trans_brelse(tp, agbp);
-                xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
                if (error)
                        return error;
-                chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino);
-                offset_agbno = agbno - chunk_agbno;
        }
+out_map:
        ASSERT(agbno >= chunk_agbno);
        cluster_agbno = chunk_agbno +
                ((offset_agbno / blks_per_cluster) * blks_per_cluster);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 75df75f43d48..8f8b91be2c99 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -259,7 +259,6 @@ xfs_iget_cache_miss(
        xfs_trans_t             *tp,
        xfs_ino_t               ino,
        struct xfs_inode        **ipp,
-        xfs_daddr_t             bno,
        int                     flags,
        int                     lock_flags)
 {
@@ -272,7 +271,7 @@ xfs_iget_cache_miss(
        if (!ip)
                return ENOMEM;
-        error = xfs_iread(mp, tp, ip, bno, flags);
+        error = xfs_iread(mp, tp, ip, flags);
        if (error)
                goto out_destroy;
@@ -358,8 +357,6 @@ out_destroy:
 *        within the file system for the inode being requested.
 * lock_flags -- flags indicating how to lock the inode.  See the comment
 *               for xfs_ilock() for a list of valid values.
- * bno -- the block number starting the buffer containing the inode,
- *        if known (as by bulkstat), else 0.
 */
 int
 xfs_iget(
@@ -368,8 +365,7 @@ xfs_iget(
        xfs_ino_t       ino,
        uint            flags,
        uint            lock_flags,
-        xfs_inode_t     **ipp,
+        xfs_inode_t     **ipp)
-        xfs_daddr_t     bno)
 {
        xfs_inode_t     *ip;
        int             error;
@@ -397,7 +393,7 @@ again:
                read_unlock(&pag->pag_ici_lock);
                XFS_STATS_INC(xs_ig_missed);
-                error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno,
+                error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
                                                        flags, lock_flags);
                if (error)
                        goto out_error_or_again;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d53c39de7d05..b76a829d7e20 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -177,7 +177,7 @@ xfs_imap_to_bp(
                if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
                                                XFS_ERRTAG_ITOBP_INOTOBP,
                                                XFS_RANDOM_ITOBP_INOTOBP))) {
-                        if (iget_flags & XFS_IGET_BULKSTAT) {
+                        if (iget_flags & XFS_IGET_UNTRUSTED) {
                                xfs_trans_brelse(tp, bp);
                                return XFS_ERROR(EINVAL);
                        }
@@ -787,7 +787,6 @@ xfs_iread(
        xfs_mount_t     *mp,
        xfs_trans_t     *tp,
        xfs_inode_t     *ip,
-        xfs_daddr_t     bno,
        uint            iget_flags)
 {
        xfs_buf_t       *bp;
@@ -797,11 +796,9 @@ xfs_iread(
        /*
         * Fill in the location information in the in-core inode.
         */
-        ip->i_imap.im_blkno = bno;
        error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
        if (error)
                return error;
-        ASSERT(bno == 0 || bno == ip->i_imap.im_blkno);
        /*
         * Get pointers to the on-disk inode and the buffer containing it.
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 9965e40a4615..78550df13cd6 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -442,7 +442,7 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
 * xfs_iget.c prototypes.
 */
 int             xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
-                         uint, uint, xfs_inode_t **, xfs_daddr_t);
+                         uint, uint, xfs_inode_t **);
 void            xfs_iput(xfs_inode_t *, uint);
 void            xfs_iput_new(xfs_inode_t *, uint);
 void            xfs_ilock(xfs_inode_t *, uint);
@@ -500,7 +500,7 @@ do { \
 * Flags for xfs_iget()
 */
 #define XFS_IGET_CREATE         0x1
-#define XFS_IGET_BULKSTAT       0x2
+#define XFS_IGET_UNTRUSTED      0x2
 int             xfs_inotobp(struct xfs_mount *, struct xfs_trans *,
                            xfs_ino_t, struct xfs_dinode **,
@@ -509,7 +509,7 @@ int		xfs_itobp(struct xfs_mount *, struct xfs_trans *,
                          struct xfs_inode *, struct xfs_dinode **,
                          struct xfs_buf **, uint);
 int             xfs_iread(struct xfs_mount *, struct xfs_trans *,
-                          struct xfs_inode *, xfs_daddr_t, uint);
+                          struct xfs_inode *, uint);
 void            xfs_dinode_to_disk(struct xfs_dinode *,
                                   struct xfs_icdinode *);
 void            xfs_idestroy_fork(struct xfs_inode *, int);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index b1b801e4a28e..2b86f8610512 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -49,24 +49,40 @@ xfs_internal_inum(
                 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino)));
 }
-STATIC int
+/*
-xfs_bulkstat_one_iget(
+ * Return stat information for one inode.
-        xfs_mount_t     *mp,            /* mount point for filesystem */
+ * Return 0 if ok, else errno.
-        xfs_ino_t       ino,            /* inode number to get data for */
+ */
-        xfs_daddr_t     bno,            /* starting bno of inode cluster */
+int
-        xfs_bstat_t     *buf,           /* return buffer */
+xfs_bulkstat_one_int(
-        int             *stat)          /* BULKSTAT_RV_... */
+        struct xfs_mount        *mp,            /* mount point for filesystem */
+        xfs_ino_t               ino,            /* inode to get data for */
+        void __user             *buffer,        /* buffer to place output in */
+        int                     ubsize,         /* size of buffer */
+        bulkstat_one_fmt_pf     formatter,      /* formatter, copy to user */
+        int                     *ubused,        /* bytes used by me */
+        int                     *stat)          /* BULKSTAT_RV_... */
 {
-        xfs_icdinode_t  *dic;   /* dinode core info pointer */
+        struct xfs_icdinode     *dic;           /* dinode core info pointer */
-        xfs_inode_t     *ip;            /* incore inode pointer */
+        struct xfs_inode        *ip;            /* incore inode pointer */
-        struct inode    *inode;
+        struct inode            *inode;
-        int             error;
+        struct xfs_bstat        *buf;           /* return buffer */
+        int                     error = 0;      /* error value */
+        *stat = BULKSTAT_RV_NOTHING;
+        if (!buffer || xfs_internal_inum(mp, ino))
+                return XFS_ERROR(EINVAL);
+        buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL);
+        if (!buf)
+                return XFS_ERROR(ENOMEM);
        error = xfs_iget(mp, NULL, ino,
-                         XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno);
+                         XFS_IGET_UNTRUSTED, XFS_ILOCK_SHARED, &ip);
        if (error) {
                *stat = BULKSTAT_RV_NOTHING;
-                return error;
+                goto out_free;
        }
        ASSERT(ip != NULL);
@@ -127,77 +143,16 @@ xfs_bulkstat_one_iget(
                buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks;
                break;
        }
        xfs_iput(ip, XFS_ILOCK_SHARED);
-        return error;
-}
-STATIC void
+        error = formatter(buffer, ubsize, ubused, buf);
-xfs_bulkstat_one_dinode(
-        xfs_mount_t     *mp,            /* mount point for filesystem */
-        xfs_ino_t       ino,            /* inode number to get data for */
-        xfs_dinode_t    *dic,           /* dinode inode pointer */
-        xfs_bstat_t     *buf)           /* return buffer */
-{
-        /*
-         * The inode format changed when we moved the link count and
-         * made it 32 bits long.  If this is an old format inode,
-         * convert it in memory to look like a new one.  If it gets
-         * flushed to disk we will convert back before flushing or
-         * logging it.  We zero out the new projid field and the old link
-         * count field.  We'll handle clearing the pad field (the remains
-         * of the old uuid field) when we actually convert the inode to
-         * the new format. We don't change the version number so that we
-         * can distinguish this from a real new format inode.
-         */
-        if (dic->di_version == 1) {
-                buf->bs_nlink = be16_to_cpu(dic->di_onlink);
-                buf->bs_projid = 0;
-        } else {
-                buf->bs_nlink = be32_to_cpu(dic->di_nlink);
-                buf->bs_projid = be16_to_cpu(dic->di_projid);
-        }
-        buf->bs_ino = ino;
+        if (!error)
-        buf->bs_mode = be16_to_cpu(dic->di_mode);
+                *stat = BULKSTAT_RV_DIDONE;
-        buf->bs_uid = be32_to_cpu(dic->di_uid);
-        buf->bs_gid = be32_to_cpu(dic->di_gid);
-        buf->bs_size = be64_to_cpu(dic->di_size);
-        buf->bs_atime.tv_sec = be32_to_cpu(dic->di_atime.t_sec);
-        buf->bs_atime.tv_nsec = be32_to_cpu(dic->di_atime.t_nsec);
-        buf->bs_mtime.tv_sec = be32_to_cpu(dic->di_mtime.t_sec);
-        buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec);
-        buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec);
-        buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec);
-        buf->bs_xflags = xfs_dic2xflags(dic);
-        buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog;
-        buf->bs_extents = be32_to_cpu(dic->di_nextents);
-        buf->bs_gen = be32_to_cpu(dic->di_gen);
-        memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
-        buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask);
-        buf->bs_dmstate = be16_to_cpu(dic->di_dmstate);
-        buf->bs_aextents = be16_to_cpu(dic->di_anextents);
-        buf->bs_forkoff = XFS_DFORK_BOFF(dic);
-        switch (dic->di_format) {
+ out_free:
-        case XFS_DINODE_FMT_DEV:
+        kmem_free(buf);
-                buf->bs_rdev = xfs_dinode_get_rdev(dic);
+        return error;
-                buf->bs_blksize = BLKDEV_IOSIZE;
-                buf->bs_blocks = 0;
-                break;
-        case XFS_DINODE_FMT_LOCAL:
-        case XFS_DINODE_FMT_UUID:
-                buf->bs_rdev = 0;
-                buf->bs_blksize = mp->m_sb.sb_blocksize;
-                buf->bs_blocks = 0;
-                break;
-        case XFS_DINODE_FMT_EXTENTS:
-        case XFS_DINODE_FMT_BTREE:
-                buf->bs_rdev = 0;
-                buf->bs_blksize = mp->m_sb.sb_blocksize;
-                buf->bs_blocks = be64_to_cpu(dic->di_nblocks);
-                break;
-        }
 }
 /* Return 0 on success or positive error */
@@ -217,118 +172,17 @@ xfs_bulkstat_one_fmt(
        return 0;
 }
-/*
- * Return stat information for one inode.
- * Return 0 if ok, else errno.
- */
-int                                     /* error status */
-xfs_bulkstat_one_int(
-        xfs_mount_t     *mp,            /* mount point for filesystem */
-        xfs_ino_t       ino,            /* inode number to get data for */
-        void            __user *buffer, /* buffer to place output in */
-        int             ubsize,         /* size of buffer */
-        bulkstat_one_fmt_pf formatter,  /* formatter, copy to user */
-        xfs_daddr_t     bno,            /* starting bno of inode cluster */
-        int             *ubused,        /* bytes used by me */
-        void            *dibuff,        /* on-disk inode buffer */
-        int             *stat)          /* BULKSTAT_RV_... */
-{
-        xfs_bstat_t     *buf;           /* return buffer */
-        int             error = 0;      /* error value */
-        xfs_dinode_t    *dip;           /* dinode inode pointer */
-        dip = (xfs_dinode_t *)dibuff;
-        *stat = BULKSTAT_RV_NOTHING;
-        if (!buffer || xfs_internal_inum(mp, ino))
-                return XFS_ERROR(EINVAL);
-        buf = kmem_alloc(sizeof(*buf), KM_SLEEP);
-        if (dip == NULL) {
-                /* We're not being passed a pointer to a dinode.  This happens
-                 * if BULKSTAT_FG_IGET is selected.  Do the iget.
-                 */
-                error = xfs_bulkstat_one_iget(mp, ino, bno, buf, stat);
-                if (error)
-                        goto out_free;
-        } else {
-                xfs_bulkstat_one_dinode(mp, ino, dip, buf);
-        }
-        error = formatter(buffer, ubsize, ubused, buf);
-        if (error)
-                goto out_free;
-        *stat = BULKSTAT_RV_DIDONE;
- out_free:
-        kmem_free(buf);
-        return error;
-}
 int
 xfs_bulkstat_one(
        xfs_mount_t     *mp,            /* mount point for filesystem */
        xfs_ino_t       ino,            /* inode number to get data for */
        void            __user *buffer, /* buffer to place output in */
        int             ubsize,         /* size of buffer */
-        void            *private_data,  /* my private data */
-        xfs_daddr_t     bno,            /* starting bno of inode cluster */
        int             *ubused,        /* bytes used by me */
-        void            *dibuff,        /* on-disk inode buffer */
        int             *stat)          /* BULKSTAT_RV_... */
 {
        return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
-                                    xfs_bulkstat_one_fmt, bno,
+                                    xfs_bulkstat_one_fmt, ubused, stat);
-                                    ubused, dibuff, stat);
-}
-/*
- * Test to see whether we can use the ondisk inode directly, based
- * on the given bulkstat flags, filling in dipp accordingly.
- * Returns zero if the inode is dodgey.
- */
-STATIC int
-xfs_bulkstat_use_dinode(
-        xfs_mount_t     *mp,
-        int             flags,
-        xfs_buf_t       *bp,
-        int             clustidx,
-        xfs_dinode_t    **dipp)
-{
-        xfs_dinode_t    *dip;
-        unsigned int    aformat;
-        *dipp = NULL;
-        if (!bp || (flags & BULKSTAT_FG_IGET))
-                return 1;
-        dip = (xfs_dinode_t *)
-                        xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog);
-        /*
-         * Check the buffer containing the on-disk inode for di_mode == 0.
-         * This is to prevent xfs_bulkstat from picking up just reclaimed
-         * inodes that have their in-core state initialized but not flushed
-         * to disk yet. This is a temporary hack that would require a proper
-         * fix in the future.
-         */
-        if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC ||
-            !XFS_DINODE_GOOD_VERSION(dip->di_version) ||
-            !dip->di_mode)
-                return 0;
-        if (flags & BULKSTAT_FG_QUICK) {
-                *dipp = dip;
-                return 1;
-        }
-        /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */
-        aformat = dip->di_aformat;
-        if ((XFS_DFORK_Q(dip) == 0) ||
-            (aformat == XFS_DINODE_FMT_LOCAL) ||
-            (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) {
-                *dipp = dip;
-                return 1;
-        }
-        return 1;
 }
 #define XFS_BULKSTAT_UBLEFT(ubleft)     ((ubleft) >= statstruct_size)
@@ -342,10 +196,8 @@ xfs_bulkstat(
        xfs_ino_t               *lastinop, /* last inode returned */
        int                     *ubcountp, /* size of buffer/count returned */
        bulkstat_one_pf         formatter, /* func that'd fill a single buf */
-        void                    *private_data,/* private data for formatter */
        size_t                  statstruct_size, /* sizeof struct filling */
        char                    __user *ubuffer, /* buffer with inode stats */
-        int                     flags,  /* defined in xfs_itable.h */
        int                     *done)  /* 1 if there are more stats to get */
 {
        xfs_agblock_t           agbno=0;/* allocation group block number */
@@ -380,14 +232,12 @@ xfs_bulkstat(
        int                     ubelem; /* spaces used in user's buffer */
        int                     ubused; /* bytes used by formatter */
        xfs_buf_t               *bp;    /* ptr to on-disk inode cluster buf */
-        xfs_dinode_t            *dip;   /* ptr into bp for specific inode */
        /*
         * Get the last inode value, see if there's nothing to do.
         */
        ino = (xfs_ino_t)*lastinop;
        lastino = ino;
-        dip = NULL;
        agno = XFS_INO_TO_AGNO(mp, ino);
        agino = XFS_INO_TO_AGINO(mp, ino);
        if (agno >= mp->m_sb.sb_agcount ||
@@ -612,37 +462,6 @@ xfs_bulkstat(
                                                        irbp->ir_startino) +
                                                ((chunkidx & nimask) >>
                                                 mp->m_sb.sb_inopblog);
-                                        if (flags & (BULKSTAT_FG_QUICK |
-                                                     BULKSTAT_FG_INLINE)) {
-                                                int offset;
-                                                ino = XFS_AGINO_TO_INO(mp, agno,
-                                                                       agino);
-                                                bno = XFS_AGB_TO_DADDR(mp, agno,
-                                                                       agbno);
-                                                /*
-                                                 * Get the inode cluster buffer
-                                                 */
-                                                if (bp)
-                                                        xfs_buf_relse(bp);
-                                                error = xfs_inotobp(mp, NULL, ino, &dip,
-                                                                    &bp, &offset,
-                                                                    XFS_IGET_BULKSTAT);
-                                                if (!error)
-                                                        clustidx = offset / mp->m_sb.sb_inodesize;
-                                                if (XFS_TEST_ERROR(error != 0,
-                                                                   mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK,
-                                                                   XFS_RANDOM_BULKSTAT_READ_CHUNK)) {
-                                                        bp = NULL;
-                                                        ubleft = 0;
-                                                        rval = error;
-                                                        break;
-                                                }
-                                        }
                                }
                                ino = XFS_AGINO_TO_INO(mp, agno, agino);
                                bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
@@ -658,35 +477,13 @@ xfs_bulkstat(
                                 * when the chunk is used up.
                                 */
                                irbp->ir_freecount++;
-                                if (!xfs_bulkstat_use_dinode(mp, flags, bp,
-                                                             clustidx, &dip)) {
-                                        lastino = ino;
-                                        continue;
-                                }
-                                /*
-                                 * If we need to do an iget, cannot hold bp.
-                                 * Drop it, until starting the next cluster.
-                                 */
-                                if ((flags & BULKSTAT_FG_INLINE) && !dip) {
-                                        if (bp)
-                                                xfs_buf_relse(bp);
-                                        bp = NULL;
-                                }
                                /*
                                 * Get the inode and fill in a single buffer.
-                                 * BULKSTAT_FG_QUICK uses dip to fill it in.
-                                 * BULKSTAT_FG_IGET uses igets.
-                                 * BULKSTAT_FG_INLINE uses dip if we have an
-                                 * inline attr fork, else igets.
-                                 * See: xfs_bulkstat_one & xfs_dm_bulkstat_one.
-                                 * This is also used to count inodes/blks, etc
-                                 * in xfs_qm_quotacheck.
                                 */
                                ubused = statstruct_size;
-                                error = formatter(mp, ino, ubufp,
+                                error = formatter(mp, ino, ubufp, ubleft,
-                                                ubleft, private_data,
+                                                  &ubused, &fmterror);
-                                                bno, &ubused, dip, &fmterror);
                                if (fmterror == BULKSTAT_RV_NOTHING) {
                                        if (error && error != ENOENT &&
                                                error != EINVAL) {
@@ -778,8 +575,7 @@ xfs_bulkstat_single(
         */
        ino = (xfs_ino_t)*lastinop;
-        error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t),
+        error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 0, &res);
-                                 NULL, 0, NULL, NULL, &res);
        if (error) {
                /*
                 * Special case way failed, do it the "long" way
@@ -788,8 +584,7 @@ xfs_bulkstat_single(
                (*lastinop)--;
                count = 1;
                if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one,
-                                NULL, sizeof(xfs_bstat_t), buffer,
+                                sizeof(xfs_bstat_t), buffer, done))
-                                BULKSTAT_FG_IGET, done))
                        return error;
                if (count == 0 || (xfs_ino_t)*lastinop != ino)
                        return error == EFSCORRUPTED ?
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 20792bf45946..97295d91d170 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -27,10 +27,7 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount	*mp,
                               xfs_ino_t        ino,
                               void             __user *buffer,
                               int              ubsize,
-                               void             *private_data,
-                               xfs_daddr_t      bno,
                               int              *ubused,
-                               void             *dip,
                               int              *stat);
 /*
@@ -41,13 +38,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount	*mp,
 #define BULKSTAT_RV_GIVEUP      2
 /*
- * Values for bulkstat flag argument.
- */
-#define BULKSTAT_FG_IGET        0x1     /* Go through the buffer cache */
-#define BULKSTAT_FG_QUICK       0x2     /* No iget, walk the dinode cluster */
-#define BULKSTAT_FG_INLINE      0x4     /* No iget if inline attrs */
-/*
 * Return stat information in bulk (by-inode) for the filesystem.
 */
 int                                     /* error status */
@@ -56,10 +46,8 @@ xfs_bulkstat(
        xfs_ino_t       *lastino,       /* last inode returned */
        int             *count,         /* size of buffer/count returned */
        bulkstat_one_pf formatter,      /* func that'd fill a single buf */
-        void            *private_data,  /* private data for formatter */
        size_t          statstruct_size,/* sizeof struct that we're filling */
        char            __user *ubuffer,/* buffer with inode stats */
-        int             flags,          /* flag to control access method */
        int             *done);         /* 1 if there are more stats to get */
 int
@@ -82,9 +70,7 @@ xfs_bulkstat_one_int(
        void                    __user *buffer,
        int                     ubsize,
        bulkstat_one_fmt_pf     formatter,
-        xfs_daddr_t             bno,
        int                     *ubused,
-        void                    *dibuff,
        int                     *stat);
 int
@@ -93,10 +79,7 @@ xfs_bulkstat_one(
        xfs_ino_t               ino,
        void                    __user *buffer,
        int                     ubsize,
-        void                    *private_data,
-        xfs_daddr_t             bno,
        int                     *ubused,
-        void                    *dibuff,
        int                     *stat);
 typedef int (*inumbers_fmt_pf)(
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index ed0684cc50ee..9ac5cfab27b9 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3198,7 +3198,7 @@ xlog_recover_process_one_iunlink(
        int                             error;
        ino = XFS_AGINO_TO_INO(mp, agno, agino);
-        error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0);
+        error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
        if (error)
                goto fail;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d59f4e8bedcf..69f62d8b2816 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1300,7 +1300,7 @@ xfs_mountfs(
         * Get and sanity-check the root inode.
         * Save the pointer to it in the mount structure.
         */
-        error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0);
+        error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip);
        if (error) {
                cmn_err(CE_WARN, "XFS: failed to read root inode");
                goto out_log_dealloc;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1d2c7eed4eda..5761087ee8ea 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -259,7 +259,7 @@ typedef struct xfs_mount {
        wait_queue_head_t       m_wait_single_sync_task;
        __int64_t               m_update_flags; /* sb flags we need to update
                                                   on the next remount,rw */
-        struct list_head        m_mplist;       /* inode shrinker mount list */
+        struct shrinker         m_inode_shrink; /* inode reclaim shrinker */
 } xfs_mount_t;
 /*
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 16445518506d..a2d32ce335aa 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -2277,12 +2277,12 @@ xfs_rtmount_inodes(
        sbp = &mp->m_sb;
        if (sbp->sb_rbmino == NULLFSINO)
                return 0;
-        error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip, 0);
+        error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip);
        if (error)
                return error;
        ASSERT(mp->m_rbmip != NULL);
        ASSERT(sbp->sb_rsumino != NULLFSINO);
-        error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip, 0);
+        error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip);
        if (error) {
                IRELE(mp->m_rbmip);
                return error;
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 785ff101da0a..2559dfec946b 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -62,7 +62,7 @@ xfs_trans_iget(
 {
        int                     error;
-        error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp, 0);
+        error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp);
        if (!error && tp)
                xfs_trans_ijoin(tp, *ipp, lock_flags);
        return error;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index a06bd62504fc..c1646838898f 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1269,7 +1269,7 @@ xfs_lookup(
        if (error)
                goto out;
-        error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0);
+        error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
        if (error)
                goto out_free_name;