247 files changed, 8024 insertions, 6537 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 814ac4e213a8..0a93dc1cb4ac 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -1,6 +1,6 @@
 config 9P_FS
-        tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)"
+        tristate "Plan 9 Resource Sharing Support (9P2000)"
-        depends on INET && NET_9P && EXPERIMENTAL
+        depends on INET && NET_9P
        help
          If you say Y here, you will get experimental support for
          Plan 9 resource sharing via the 9P2000 protocol.
@@ -10,7 +10,6 @@ config 9P_FS
          If unsure, say N.
 if 9P_FS
 config 9P_FSCACHE
        bool "Enable 9P client caching support (EXPERIMENTAL)"
        depends on EXPERIMENTAL
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 82a7c38ddad0..691c78f58bef 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -259,7 +259,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
                if (IS_ERR(inode_fid)) {
                        err = PTR_ERR(inode_fid);
                        mutex_unlock(&v9inode->v_mutex);
-                        goto error;
+                        goto err_clunk_old_fid;
                }
                v9inode->writeback_fid = (void *) inode_fid;
        }
@@ -267,8 +267,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
        /* Since we are opening a file, assign the open fid to the file */
        filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
        if (IS_ERR(filp)) {
-                p9_client_clunk(ofid);
+                err = PTR_ERR(filp);
-                return PTR_ERR(filp);
+                goto err_clunk_old_fid;
        }
        filp->private_data = ofid;
 #ifdef CONFIG_9P_FSCACHE
@@ -278,10 +278,11 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
        return 0;
 error:
-        if (ofid)
-                p9_client_clunk(ofid);
        if (fid)
                p9_client_clunk(fid);
+err_clunk_old_fid:
+        if (ofid)
+                p9_client_clunk(ofid);
        return err;
 }
diff --git a/fs/Kconfig b/fs/Kconfig
index efb7d4ec6fcf..19891aab9c6e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -124,6 +124,7 @@ config TMPFS
 config TMPFS_POSIX_ACL
        bool "Tmpfs POSIX Access Control Lists"
        depends on TMPFS
+        select TMPFS_XATTR
        select GENERIC_ACL
        help
          POSIX Access Control Lists (ACLs) support permissions for users and
@@ -134,6 +135,22 @@ config TMPFS_POSIX_ACL
          If you don't know what Access Control Lists are, say N.
+config TMPFS_XATTR
+        bool "Tmpfs extended attributes"
+        depends on TMPFS
+        default n
+        help
+          Extended attributes are name:value pairs associated with inodes by
+          the kernel or by users (see the attr(5) manual page, or visit
+          <http://acl.bestbits.at/> for details).
+          Currently this enables support for the trusted.* and
+          security.* namespaces.
+          You need this for POSIX ACL support on tmpfs.
+          If unsure, say N.
 config HUGETLBFS
        bool "HugeTLB file system support"
        depends on X86 || IA64 || SPARC64 || (S390 && 64BIT) || \
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 397d3057d336..1bffbe0ed778 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -820,6 +820,8 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
        int res;
        char buf[16];
+        memset(&bprm, 0, sizeof(bprm));
        /* Create the file name */
        sprintf(buf, "/lib/lib%d.so", id);
@@ -835,6 +837,12 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
        if (!bprm.cred)
                goto out;
+        /* We don't really care about recalculating credentials at this point
+         * as we're past the point of no return and are dealing with shared
+         * libraries.
+         */
+        bprm.cred_prepared = 1;
        res = prepare_binprm(&bprm);
        if (!IS_ERR_VALUE(res))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 5147bdd3b8e1..1f2b19978333 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1102,6 +1102,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                        if (!bdev->bd_part)
                                goto out_clear;
+                        ret = 0;
                        if (disk->fops->open) {
                                ret = disk->fops->open(bdev, mode);
                                if (ret == -ERESTARTSYS) {
@@ -1118,18 +1119,26 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                                        put_disk(disk);
                                        goto restart;
                                }
-                                if (ret)
-                                        goto out_clear;
                        }
-                        if (!bdev->bd_openers) {
+                        if (!ret && !bdev->bd_openers) {
                                bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
                                bdi = blk_get_backing_dev_info(bdev);
                                if (bdi == NULL)
                                        bdi = &default_backing_dev_info;
                                bdev_inode_switch_bdi(bdev->bd_inode, bdi);
                        }
-                        if (bdev->bd_invalidated)
+                        /*
+                         * If the device is invalidated, rescan partition
+                         * if open succeeded or failed with -ENOMEDIUM.
+                         * The latter is necessary to prevent ghost
+                         * partitions on a removed medium.
+                         */
+                        if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
                                rescan_partitions(disk, bdev);
+                        if (ret)
+                                goto out_clear;
                } else {
                        struct block_device *whole;
                        whole = bdget_disk(disk, 0);
@@ -1153,13 +1162,14 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                }
        } else {
                if (bdev->bd_contains == bdev) {
-                        if (bdev->bd_disk->fops->open) {
+                        ret = 0;
+                        if (bdev->bd_disk->fops->open)
                                ret = bdev->bd_disk->fops->open(bdev, mode);
-                                if (ret)
+                        /* the same as first opener case, read comment there */
-                                        goto out_unlock_bdev;
+                        if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
-                        }
-                        if (bdev->bd_invalidated)
                                rescan_partitions(bdev->bd_disk, bdev);
+                        if (ret)
+                                goto out_unlock_bdev;
                }
                /* only one opener holds refs to the module and disk */
                module_put(disk->fops->owner);
@@ -1228,6 +1238,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
        res = __blkdev_get(bdev, mode, 0);
        if (whole) {
+                struct gendisk *disk = whole->bd_disk;
                /* finish claiming */
                mutex_lock(&bdev->bd_mutex);
                spin_lock(&bdev_lock);
@@ -1254,15 +1266,16 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
                spin_unlock(&bdev_lock);
                /*
-                 * Block event polling for write claims.  Any write
+                 * Block event polling for write claims if requested.  Any
-                 * holder makes the write_holder state stick until all
+                 * write holder makes the write_holder state stick until
-                 * are released.  This is good enough and tracking
+                 * all are released.  This is good enough and tracking
-                 * individual writeable reference is too fragile given
+                 * individual writeable reference is too fragile given the
-                 * the way @mode is used in blkdev_get/put().
+                 * way @mode is used in blkdev_get/put().
                 */
-                if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
+                if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) &&
+                    !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
                        bdev->bd_write_holder = true;
-                        disk_block_events(bdev->bd_disk);
+                        disk_block_events(disk);
                }
                mutex_unlock(&bdev->bd_mutex);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 5d505aaa72fb..44ea5b92e1ba 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -178,12 +178,13 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
        if (value) {
                acl = posix_acl_from_xattr(value, size);
+                if (IS_ERR(acl))
+                        return PTR_ERR(acl);
                if (acl) {
                        ret = posix_acl_valid(acl);
                        if (ret)
                                goto out;
-                } else if (IS_ERR(acl)) {
-                        return PTR_ERR(acl);
                }
        }
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index cd52f7f556ef..9ee6bd55e16c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8856,23 +8856,38 @@ out:
 int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
 {
        struct btrfs_space_info *space_info;
+        struct btrfs_super_block *disk_super;
+        u64 features;
+        u64 flags;
+        int mixed = 0;
        int ret;
-        ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM, 0, 0,
+        disk_super = &fs_info->super_copy;
-                                                                 &space_info);
+        if (!btrfs_super_root(disk_super))
-        if (ret)
+                return 1;
-                return ret;
-        ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA, 0, 0,
+        features = btrfs_super_incompat_flags(disk_super);
-                                                                 &space_info);
+        if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
-        if (ret)
+                mixed = 1;
-                return ret;
-        ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA, 0, 0,
+        flags = BTRFS_BLOCK_GROUP_SYSTEM;
-                                                                 &space_info);
+        ret = update_space_info(fs_info, flags, 0, 0, &space_info);
        if (ret)
-                return ret;
+                goto out;
+        if (mixed) {
+                flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
+                ret = update_space_info(fs_info, flags, 0, 0, &space_info);
+        } else {
+                flags = BTRFS_BLOCK_GROUP_METADATA;
+                ret = update_space_info(fs_info, flags, 0, 0, &space_info);
+                if (ret)
+                        goto out;
+                flags = BTRFS_BLOCK_GROUP_DATA;
+                ret = update_space_info(fs_info, flags, 0, 0, &space_info);
+        }
+out:
        return ret;
 }
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ba41da59e31b..96fcfa522dab 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -10,6 +10,7 @@
 #include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/pagevec.h>
+#include <linux/prefetch.h>
 #include "extent_io.h"
 #include "extent_map.h"
 #include "compat.h"
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ffb48d6c5433..2616f7ed4799 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -81,6 +81,13 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
                iflags |= FS_NOATIME_FL;
        if (flags & BTRFS_INODE_DIRSYNC)
                iflags |= FS_DIRSYNC_FL;
+        if (flags & BTRFS_INODE_NODATACOW)
+                iflags |= FS_NOCOW_FL;
+        if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS))
+                iflags |= FS_COMPR_FL;
+        else if (flags & BTRFS_INODE_NOCOMPRESS)
+                iflags |= FS_NOCOMP_FL;
        return iflags;
 }
@@ -144,16 +151,13 @@ static int check_flags(unsigned int flags)
        if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
                      FS_NOATIME_FL | FS_NODUMP_FL | \
                      FS_SYNC_FL | FS_DIRSYNC_FL | \
-                      FS_NOCOMP_FL | FS_COMPR_FL | \
+                      FS_NOCOMP_FL | FS_COMPR_FL |
-                      FS_NOCOW_FL | FS_COW_FL))
+                      FS_NOCOW_FL))
                return -EOPNOTSUPP;
        if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
                return -EINVAL;
-        if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL))
-                return -EINVAL;
        return 0;
 }
@@ -218,6 +222,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
                ip->flags |= BTRFS_INODE_DIRSYNC;
        else
                ip->flags &= ~BTRFS_INODE_DIRSYNC;
+        if (flags & FS_NOCOW_FL)
+                ip->flags |= BTRFS_INODE_NODATACOW;
+        else
+                ip->flags &= ~BTRFS_INODE_NODATACOW;
        /*
         * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
@@ -230,11 +238,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        } else if (flags & FS_COMPR_FL) {
                ip->flags |= BTRFS_INODE_COMPRESS;
                ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
+        } else {
+                ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
        }
-        if (flags & FS_NOCOW_FL)
-                ip->flags |= BTRFS_INODE_NODATACOW;
-        else if (flags & FS_COW_FL)
-                ip->flags &= ~BTRFS_INODE_NODATACOW;
        trans = btrfs_join_transaction(root, 1);
        BUG_ON(IS_ERR(trans));
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 199a80134312..f340f7c99d09 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -709,7 +709,7 @@ again:
        WARN_ON(cur->checked);
        if (!list_empty(&cur->upper)) {
                /*
-                 * the backref was added previously when processsing
+                 * the backref was added previously when processing
                 * backref of type BTRFS_TREE_BLOCK_REF_KEY
                 */
                BUG_ON(!list_is_singular(&cur->upper));
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index e159c529fd2b..33da49dc3cc6 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -775,6 +775,13 @@ get_more_pages:
                                            ci->i_truncate_seq,
                                            ci->i_truncate_size,
                                            &inode->i_mtime, true, 1, 0);
+                                if (!req) {
+                                        rc = -ENOMEM;
+                                        unlock_page(page);
+                                        break;
+                                }
                                max_pages = req->r_num_pages;
                                alloc_page_vec(fsc, req);
@@ -841,7 +848,8 @@ get_more_pages:
                op->payload_len = cpu_to_le32(len);
                req->r_request->hdr.data_len = cpu_to_le32(len);
-                ceph_osdc_start_request(&fsc->client->osdc, req, true);
+                rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
+                BUG_ON(rc);
                req = NULL;
                /* continue? */
@@ -873,8 +881,6 @@ release_pvec_pages:
 out:
        if (req)
                ceph_osdc_put_request(req);
-        if (rc > 0)
-                rc = 0;  /* vfs expects us to return 0 */
        ceph_put_snap_context(snapc);
        dout("writepages done, rc = %d\n", rc);
        return rc;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 5323c330bbf3..1f72b00447c4 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -569,7 +569,8 @@ retry:
                list_add_tail(&cap->session_caps, &session->s_caps);
                session->s_nr_caps++;
                spin_unlock(&session->s_cap_lock);
-        }
+        } else if (new_cap)
+                ceph_put_cap(mdsc, new_cap);
        if (!ci->i_snap_realm) {
                /*
@@ -819,7 +820,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
                used |= CEPH_CAP_FILE_CACHE;
        if (ci->i_wr_ref)
                used |= CEPH_CAP_FILE_WR;
-        if (ci->i_wrbuffer_ref)
+        if (ci->i_wb_ref || ci->i_wrbuffer_ref)
                used |= CEPH_CAP_FILE_BUFFER;
        return used;
 }
@@ -1331,10 +1332,11 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
 }
 /*
- * Mark caps dirty.  If inode is newly dirty, add to the global dirty
+ * Mark caps dirty.  If inode is newly dirty, return the dirty flags.
- * list.
+ * Caller is then responsible for calling __mark_inode_dirty with the
+ * returned flags value.
 */
-void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
+int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
 {
        struct ceph_mds_client *mdsc =
                ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
@@ -1357,7 +1359,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
                list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
                spin_unlock(&mdsc->cap_dirty_lock);
                if (ci->i_flushing_caps == 0) {
-                        igrab(inode);
+                        ihold(inode);
                        dirty |= I_DIRTY_SYNC;
                }
        }
@@ -1365,9 +1367,8 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
        if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
            (mask & CEPH_CAP_FILE_BUFFER))
                dirty |= I_DIRTY_DATASYNC;
-        if (dirty)
-                __mark_inode_dirty(inode, dirty);
        __cap_delay_requeue(mdsc, ci);
+        return dirty;
 }
 /*
@@ -1990,11 +1991,11 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got)
        if (got & CEPH_CAP_FILE_WR)
                ci->i_wr_ref++;
        if (got & CEPH_CAP_FILE_BUFFER) {
-                if (ci->i_wrbuffer_ref == 0)
+                if (ci->i_wb_ref == 0)
-                        igrab(&ci->vfs_inode);
+                        ihold(&ci->vfs_inode);
-                ci->i_wrbuffer_ref++;
+                ci->i_wb_ref++;
-                dout("__take_cap_refs %p wrbuffer %d -> %d (?)\n",
+                dout("__take_cap_refs %p wb %d -> %d (?)\n",
-                     &ci->vfs_inode, ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref);
+                     &ci->vfs_inode, ci->i_wb_ref-1, ci->i_wb_ref);
        }
 }
@@ -2169,12 +2170,12 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
                if (--ci->i_rdcache_ref == 0)
                        last++;
        if (had & CEPH_CAP_FILE_BUFFER) {
-                if (--ci->i_wrbuffer_ref == 0) {
+                if (--ci->i_wb_ref == 0) {
                        last++;
                        put++;
                }
-                dout("put_cap_refs %p wrbuffer %d -> %d (?)\n",
+                dout("put_cap_refs %p wb %d -> %d (?)\n",
-                     inode, ci->i_wrbuffer_ref+1, ci->i_wrbuffer_ref);
+                     inode, ci->i_wb_ref+1, ci->i_wb_ref);
        }
        if (had & CEPH_CAP_FILE_WR)
                if (--ci->i_wr_ref == 0) {
@@ -2634,6 +2635,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
                              struct ceph_mds_session *session,
                              int *open_target_sessions)
 {
+        struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
        struct ceph_inode_info *ci = ceph_inode(inode);
        int mds = session->s_mds;
        unsigned mseq = le32_to_cpu(ex->migrate_seq);
@@ -2670,6 +2672,19 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
                         * export targets, so that we get the matching IMPORT
                         */
                        *open_target_sessions = 1;
+                        /*
+                         * we can't flush dirty caps that we've seen the
+                         * EXPORT but no IMPORT for
+                         */
+                        spin_lock(&mdsc->cap_dirty_lock);
+                        if (!list_empty(&ci->i_dirty_item)) {
+                                dout(" moving %p to cap_dirty_migrating\n",
+                                     inode);
+                                list_move(&ci->i_dirty_item,
+                                          &mdsc->cap_dirty_migrating);
+                        }
+                        spin_unlock(&mdsc->cap_dirty_lock);
                }
                __ceph_remove_cap(cap);
        }
@@ -2707,6 +2722,13 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
                ci->i_cap_exporting_issued = 0;
                ci->i_cap_exporting_mseq = 0;
                ci->i_cap_exporting_mds = -1;
+                spin_lock(&mdsc->cap_dirty_lock);
+                if (!list_empty(&ci->i_dirty_item)) {
+                        dout(" moving %p back to cap_dirty\n", inode);
+                        list_move(&ci->i_dirty_item, &mdsc->cap_dirty);
+                }
+                spin_unlock(&mdsc->cap_dirty_lock);
        } else {
                dout("handle_cap_import inode %p ci %p mds%d mseq %d\n",
                     inode, ci, mds, mseq);
@@ -2910,38 +2932,16 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
 */
 void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
 {
-        struct ceph_inode_info *ci, *nci = NULL;
+        struct ceph_inode_info *ci;
-        struct inode *inode, *ninode = NULL;
+        struct inode *inode;
-        struct list_head *p, *n;
        dout("flush_dirty_caps\n");
        spin_lock(&mdsc->cap_dirty_lock);
-        list_for_each_safe(p, n, &mdsc->cap_dirty) {
+        while (!list_empty(&mdsc->cap_dirty)) {
-                if (nci) {
+                ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info,
-                        ci = nci;
+                                      i_dirty_item);
-                        inode = ninode;
+                inode = igrab(&ci->vfs_inode);
-                        ci->i_ceph_flags &= ~CEPH_I_NOFLUSH;
+                dout("flush_dirty_caps %p\n", inode);
-                        dout("flush_dirty_caps inode %p (was next inode)\n",
-                             inode);
-                } else {
-                        ci = list_entry(p, struct ceph_inode_info,
-                                        i_dirty_item);
-                        inode = igrab(&ci->vfs_inode);
-                        BUG_ON(!inode);
-                        dout("flush_dirty_caps inode %p\n", inode);
-                }
-                if (n != &mdsc->cap_dirty) {
-                        nci = list_entry(n, struct ceph_inode_info,
-                                         i_dirty_item);
-                        ninode = igrab(&nci->vfs_inode);
-                        BUG_ON(!ninode);
-                        nci->i_ceph_flags |= CEPH_I_NOFLUSH;
-                        dout("flush_dirty_caps next inode %p, noflush\n",
-                             ninode);
-                } else {
-                        nci = NULL;
-                        ninode = NULL;
-                }
                spin_unlock(&mdsc->cap_dirty_lock);
                if (inode) {
                        ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH,
@@ -2951,6 +2951,7 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
                spin_lock(&mdsc->cap_dirty_lock);
        }
        spin_unlock(&mdsc->cap_dirty_lock);
+        dout("flush_dirty_caps done\n");
 }
 /*
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 1a867a3601ae..33729e822bb9 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -360,7 +360,7 @@ more:
        rinfo = &fi->last_readdir->r_reply_info;
        dout("readdir frag %x num %d off %d chunkoff %d\n", frag,
             rinfo->dir_nr, off, fi->offset);
-        while (off - fi->offset >= 0 && off - fi->offset < rinfo->dir_nr) {
+        while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) {
                u64 pos = ceph_make_fpos(frag, off);
                struct ceph_mds_reply_inode *in =
                        rinfo->dir_in[off - fi->offset].in;
@@ -1066,16 +1066,17 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
        struct inode *inode = file->f_dentry->d_inode;
        struct ceph_inode_info *ci = ceph_inode(inode);
        int left;
+        const int bufsize = 1024;
        if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
                return -EISDIR;
        if (!cf->dir_info) {
-                cf->dir_info = kmalloc(1024, GFP_NOFS);
+                cf->dir_info = kmalloc(bufsize, GFP_NOFS);
                if (!cf->dir_info)
                        return -ENOMEM;
                cf->dir_info_len =
-                        sprintf(cf->dir_info,
+                        snprintf(cf->dir_info, bufsize,
                                "entries:   %20lld\n"
                                " files:    %20lld\n"
                                " subdirs:  %20lld\n"
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index e41056174bf8..a610d3d67488 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -86,6 +86,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
 static struct dentry *__fh_to_dentry(struct super_block *sb,
                                     struct ceph_nfs_fh *fh)
 {
+        struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
        struct inode *inode;
        struct dentry *dentry;
        struct ceph_vino vino;
@@ -95,8 +96,24 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
        vino.ino = fh->ino;
        vino.snap = CEPH_NOSNAP;
        inode = ceph_find_inode(sb, vino);
-        if (!inode)
+        if (!inode) {
-                return ERR_PTR(-ESTALE);
+                struct ceph_mds_request *req;
+                req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
+                                               USE_ANY_MDS);
+                if (IS_ERR(req))
+                        return ERR_CAST(req);
+                req->r_ino1 = vino;
+                req->r_num_caps = 1;
+                err = ceph_mdsc_do_request(mdsc, NULL, req);
+                inode = req->r_target_inode;
+                if (inode)
+                        igrab(inode);
+                ceph_mdsc_put_request(req);
+                if (!inode)
+                        return ERR_PTR(-ESTALE);
+        }
        dentry = d_obtain_alias(inode);
        if (IS_ERR(dentry)) {
@@ -148,8 +165,10 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb,
                snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash);
                req->r_num_caps = 1;
                err = ceph_mdsc_do_request(mdsc, NULL, req);
+                inode = req->r_target_inode;
+                if (inode)
+                        igrab(inode);
                ceph_mdsc_put_request(req);
-                inode = ceph_find_inode(sb, vino);
                if (!inode)
                        return ERR_PTR(err ? err : -ESTALE);
        }
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 159b512d5a27..203252d88d9f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -734,9 +734,12 @@ retry_snap:
                }
        }
        if (ret >= 0) {
+                int dirty;
                spin_lock(&inode->i_lock);
-                __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
+                dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
                spin_unlock(&inode->i_lock);
+                if (dirty)
+                        __mark_inode_dirty(inode, dirty);
        }
 out:
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index b54c97da1c43..70b6a4839c38 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -355,6 +355,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        ci->i_rd_ref = 0;
        ci->i_rdcache_ref = 0;
        ci->i_wr_ref = 0;
+        ci->i_wb_ref = 0;
        ci->i_wrbuffer_ref = 0;
        ci->i_wrbuffer_ref_head = 0;
        ci->i_shared_gen = 0;
@@ -1567,6 +1568,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
        int release = 0, dirtied = 0;
        int mask = 0;
        int err = 0;
+        int inode_dirty_flags = 0;
        if (ceph_snap(inode) != CEPH_NOSNAP)
                return -EROFS;
@@ -1725,13 +1727,16 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
                dout("setattr %p ATTR_FILE ... hrm!\n", inode);
        if (dirtied) {
-                __ceph_mark_dirty_caps(ci, dirtied);
+                inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied);
                inode->i_ctime = CURRENT_TIME;
        }
        release &= issued;
        spin_unlock(&inode->i_lock);
+        if (inode_dirty_flags)
+                __mark_inode_dirty(inode, inode_dirty_flags);
        if (mask) {
                req->r_inode = igrab(inode);
                req->r_inode_drop = release;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f60b07b0feb0..79743d146be6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -578,6 +578,7 @@ static void __register_request(struct ceph_mds_client *mdsc,
        if (dir) {
                struct ceph_inode_info *ci = ceph_inode(dir);
+                ihold(dir);
                spin_lock(&ci->i_unsafe_lock);
                req->r_unsafe_dir = dir;
                list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops);
@@ -598,6 +599,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
                spin_lock(&ci->i_unsafe_lock);
                list_del_init(&req->r_unsafe_dir_item);
                spin_unlock(&ci->i_unsafe_lock);
+                iput(req->r_unsafe_dir);
+                req->r_unsafe_dir = NULL;
        }
        ceph_mdsc_put_request(req);
@@ -2691,7 +2695,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
 {
        struct super_block *sb = mdsc->fsc->sb;
        struct inode *inode;
-        struct ceph_inode_info *ci;
        struct dentry *parent, *dentry;
        struct ceph_dentry_info *di;
        int mds = session->s_mds;
@@ -2728,7 +2731,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
                dout("handle_lease no inode %llx\n", vino.ino);
                goto release;
        }
-        ci = ceph_inode(inode);
        /* dentry */
        parent = d_find_alias(inode);
@@ -3002,6 +3004,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
        spin_lock_init(&mdsc->snap_flush_lock);
        mdsc->cap_flush_seq = 0;
        INIT_LIST_HEAD(&mdsc->cap_dirty);
+        INIT_LIST_HEAD(&mdsc->cap_dirty_migrating);
        mdsc->num_cap_flushing = 0;
        spin_lock_init(&mdsc->cap_dirty_lock);
        init_waitqueue_head(&mdsc->cap_flushing_wq);
@@ -3304,8 +3307,8 @@ static void con_put(struct ceph_connection *con)
 {
        struct ceph_mds_session *s = con->private;
+        dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref) - 1);
        ceph_put_mds_session(s);
-        dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref));
 }
 /*
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 4e3a9cc0bba6..7d8a0d662d56 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -278,6 +278,7 @@ struct ceph_mds_client {
        u64               cap_flush_seq;
        struct list_head  cap_dirty;        /* inodes with dirty caps */
+        struct list_head  cap_dirty_migrating; /* ...that are migration... */
        int               num_cap_flushing; /* # caps we are flushing */
        spinlock_t        cap_dirty_lock;   /* protects above items */
        wait_queue_head_t cap_flushing_wq;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index e86ec1155f8f..24067d68a554 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -206,7 +206,7 @@ void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
                up_write(&mdsc->snap_rwsem);
        } else {
                spin_lock(&mdsc->snap_empty_lock);
-                list_add(&mdsc->snap_empty, &realm->empty_item);
+                list_add(&realm->empty_item, &mdsc->snap_empty);
                spin_unlock(&mdsc->snap_empty_lock);
        }
 }
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 619fe719968f..f5cabefa98dc 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -293,7 +293,7 @@ struct ceph_inode_info {
        /* held references to caps */
        int i_pin_ref;
-        int i_rd_ref, i_rdcache_ref, i_wr_ref;
+        int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
        int i_wrbuffer_ref, i_wrbuffer_ref_head;
        u32 i_shared_gen;       /* increment each time we get FILE_SHARED */
        u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
@@ -506,7 +506,7 @@ static inline int __ceph_caps_dirty(struct ceph_inode_info *ci)
 {
        return ci->i_dirty_caps | ci->i_flushing_caps;
 }
-extern void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);
+extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);
 extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
 extern int __ceph_caps_used(struct ceph_inode_info *ci);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 8c9eba6ef9df..f2b628696180 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -703,6 +703,7 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
        struct ceph_inode_xattr *xattr = NULL;
        int issued;
        int required_blob_size;
+        int dirty;
        if (ceph_snap(inode) != CEPH_NOSNAP)
                return -EROFS;
@@ -763,11 +764,12 @@ retry:
        dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
        err = __set_xattr(ci, newname, name_len, newval,
                          val_len, 1, 1, 1, &xattr);
-        __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
+        dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
        ci->i_xattrs.dirty = true;
        inode->i_ctime = CURRENT_TIME;
        spin_unlock(&inode->i_lock);
+        if (dirty)
+                __mark_inode_dirty(inode, dirty);
        return err;
 do_sync:
@@ -810,6 +812,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
        struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode);
        int issued;
        int err;
+        int dirty;
        if (ceph_snap(inode) != CEPH_NOSNAP)
                return -EROFS;
@@ -833,12 +836,13 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
                goto do_sync;
        err = __remove_xattr_by_name(ceph_inode(inode), name);
-        __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
+        dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
        ci->i_xattrs.dirty = true;
        inode->i_ctime = CURRENT_TIME;
        spin_unlock(&inode->i_lock);
+        if (dirty)
+                __mark_inode_dirty(inode, dirty);
        return err;
 do_sync:
        spin_unlock(&inode->i_lock);
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 7cb0f7f847e4..75c47cd8d086 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -7,6 +7,7 @@ config CIFS
        select CRYPTO_MD5
        select CRYPTO_HMAC
        select CRYPTO_ARC4
+        select CRYPTO_DES
        help
          This is the client VFS module for the Common Internet File System
          (CIFS) protocol which is the successor to the Server Message Block
@@ -152,16 +153,28 @@ config CIFS_ACL
            Allows to fetch CIFS/NTFS ACL from the server.  The DACL blob
            is handed over to the application/caller.
-config CIFS_EXPERIMENTAL
+config CIFS_SMB2
-          bool "CIFS Experimental Features (EXPERIMENTAL)"
+        bool "SMB2 network file system support (EXPERIMENTAL)"
+        depends on EXPERIMENTAL && INET && BROKEN
+        select NLS
+        select KEYS
+        select FSCACHE
+        select DNS_RESOLVER
+        help
+          This enables experimental support for the SMB2 (Server Message Block
+          version 2) protocol. The SMB2 protocol is the successor to the
+          popular CIFS and SMB network file sharing protocols. SMB2 is the
+          native file sharing mechanism for recent versions of Windows
+          operating systems (since Vista).  SMB2 enablement will eventually
+          allow users better performance, security and features, than would be
+          possible with cifs. Note that smb2 mount options also are simpler
+          (compared to cifs) due to protocol improvements.
+          Unless you are a developer or tester, say N.
+config CIFS_NFSD_EXPORT
+          bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)"
          depends on CIFS && EXPERIMENTAL
          help
-            Enables cifs features under testing. These features are
+           Allows NFS server to export a CIFS mounted share (nfsd over cifs)
-            experimental and currently include DFS support and directory
-            change notification ie fcntl(F_DNOTIFY), as well as the upcall
-            mechanism which will be used for Kerberos session negotiation
-            and uid remapping.  Some of these features also may depend on
-            setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
-            (which is disabled by default). See the file fs/cifs/README
-            for more details.  If unsure, say N.
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index d87558448e3d..005d524c3a4a 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -4,7 +4,7 @@
 obj-$(CONFIG_CIFS) += cifs.o
 cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
-          link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \
+          link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
          cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
          readdir.o ioctl.o sess.o export.o
diff --git a/fs/cifs/README b/fs/cifs/README
index 74ab165fc646..4a3ca0e5ca24 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -704,18 +704,6 @@ the start of smb requests and responses can be enabled via:
        echo 1 > /proc/fs/cifs/traceSMB
-Two other experimental features are under development. To test these
-requires enabling CONFIG_CIFS_EXPERIMENTAL
-        cifsacl support needed to retrieve approximated mode bits based on
-                the contents on the CIFS ACL.
-        lease support: cifs will check the oplock state before calling into
-        the vfs to see if we can grant a lease on a file.
-        DNOTIFY fcntl: needed for support of directory change 
-                            notification and perhaps later for file leases)
 Per share (per client mount) statistics are available in /proc/fs/cifs/Stats
 if the kernel was configured with cifs statistics enabled.  The statistics
 represent the number of successful (ie non-zero return code from the server) 
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 30d01bc90855..18f4272d9047 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -63,7 +63,7 @@ void cifs_dump_detail(struct smb_hdr *smb)
        cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
                  smb->Command, smb->Status.CifsError,
                  smb->Flags, smb->Flags2, smb->Mid, smb->Pid);
-        cERROR(1, "smb buf %p len %d", smb, smbCalcSize_LE(smb));
+        cERROR(1, "smb buf %p len %d", smb, smbCalcSize(smb));
 }
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index ac51cd2d33ae..a9d5692e0c20 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -58,9 +58,7 @@ struct cifs_sb_info {
        unsigned int mnt_cifs_flags;
        int     prepathlen;
        char   *prepath; /* relative path under the share to mount to */
-#ifdef CONFIG_CIFS_DFS_UPCALL
+        char   *mountdata; /* options received at mount time or via DFS refs */
-        char   *mountdata; /* mount options received at mount time */
-#endif
        struct backing_dev_info bdi;
        struct delayed_work prune_tlinks;
 };
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 23d43cde4306..1b2e180b018d 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -277,6 +277,7 @@ cifsConvertToUCS(__le16 *target, const char *source, int srclen,
        for (i = 0, j = 0; i < srclen; j++) {
                src_char = source[i];
+                charlen = 1;
                switch (src_char) {
                case 0:
                        put_unaligned(0, &target[j]);
@@ -316,16 +317,13 @@ cifsConvertToUCS(__le16 *target, const char *source, int srclen,
                                dst_char = cpu_to_le16(0x003f);
                                charlen = 1;
                        }
-                        /*
-                         * character may take more than one byte in the source
-                         * string, but will take exactly two bytes in the
-                         * target string
-                         */
-                        i += charlen;
-                        continue;
                }
+                /*
+                 * character may take more than one byte in the source string,
+                 * but will take exactly two bytes in the target string
+                 */
+                i += charlen;
                put_unaligned(dst_char, &target[j]);
-                i++; /* move to next char in source string */
        }
 ctoUCS_out:
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 644dd882a560..6d02fd560566 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -82,6 +82,9 @@ int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *);
 char *cifs_strndup_from_ucs(const char *src, const int maxlen,
                            const bool is_unicode,
                            const struct nls_table *codepage);
+extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
+                        const struct nls_table *cp, int mapChars);
 #endif
 /*
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index beeebf194234..f3c6fb9942ac 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -23,24 +23,16 @@
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/keyctl.h>
+#include <linux/key-type.h>
+#include <keys/user-type.h>
 #include "cifspdu.h"
 #include "cifsglob.h"
 #include "cifsacl.h"
 #include "cifsproto.h"
 #include "cifs_debug.h"
-static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
-        {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"},
-        {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"},
-        {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"},
-        {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(18), 0, 0, 0, 0} }, "sys"},
-        {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(544), 0, 0, 0} }, "root"},
-        {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(545), 0, 0, 0} }, "users"},
-        {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(546), 0, 0, 0} }, "guest"} }
-;
 /* security id for everyone/world system group */
 static const struct cifs_sid sid_everyone = {
        1, 1, {0, 0, 0, 0, 0, 1}, {0} };
@@ -50,50 +42,385 @@ static const struct cifs_sid sid_authusers = {
 /* group users */
 static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
+const struct cred *root_cred;
-int match_sid(struct cifs_sid *ctsid)
+static void
+shrink_idmap_tree(struct rb_root *root, int nr_to_scan, int *nr_rem,
+                        int *nr_del)
 {
-        int i, j;
+        struct rb_node *node;
-        int num_subauth, num_sat, num_saw;
+        struct rb_node *tmp;
-        struct cifs_sid *cwsid;
+        struct cifs_sid_id *psidid;
+        node = rb_first(root);
+        while (node) {
+                tmp = node;
+                node = rb_next(tmp);
+                psidid = rb_entry(tmp, struct cifs_sid_id, rbnode);
+                if (nr_to_scan == 0 || *nr_del == nr_to_scan)
+                        ++(*nr_rem);
+                else {
+                        if (time_after(jiffies, psidid->time + SID_MAP_EXPIRE)
+                                                && psidid->refcount == 0) {
+                                rb_erase(tmp, root);
+                                ++(*nr_del);
+                        } else
+                                ++(*nr_rem);
+                }
+        }
+}
+/*
+ * Run idmap cache shrinker.
+ */
+static int
+cifs_idmap_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+{
+        int nr_del = 0;
+        int nr_rem = 0;
+        struct rb_root *root;
+        root = &uidtree;
+        spin_lock(&siduidlock);
+        shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del);
+        spin_unlock(&siduidlock);
+        root = &gidtree;
+        spin_lock(&sidgidlock);
+        shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del);
+        spin_unlock(&sidgidlock);
+        return nr_rem;
+}
+static struct shrinker cifs_shrinker = {
+        .shrink = cifs_idmap_shrinker,
+        .seeks = DEFAULT_SEEKS,
+};
+static int
+cifs_idmap_key_instantiate(struct key *key, const void *data, size_t datalen)
+{
+        char *payload;
+        payload = kmalloc(datalen, GFP_KERNEL);
+        if (!payload)
+                return -ENOMEM;
+        memcpy(payload, data, datalen);
+        key->payload.data = payload;
+        return 0;
+}
+static inline void
+cifs_idmap_key_destroy(struct key *key)
+{
+        kfree(key->payload.data);
+}
-        if (!ctsid)
+struct key_type cifs_idmap_key_type = {
-                return -1;
+        .name        = "cifs.idmap",
+        .instantiate = cifs_idmap_key_instantiate,
+        .destroy     = cifs_idmap_key_destroy,
+        .describe    = user_describe,
+        .match       = user_match,
+};
+static void
+sid_to_str(struct cifs_sid *sidptr, char *sidstr)
+{
+        int i;
+        unsigned long saval;
+        char *strptr;
-        for (i = 0; i < NUM_WK_SIDS; ++i) {
+        strptr = sidstr;
-                cwsid = &(wksidarr[i].cifssid);
-                /* compare the revision */
+        sprintf(strptr, "%s", "S");
-                if (ctsid->revision != cwsid->revision)
+        strptr = sidstr + strlen(sidstr);
-                        continue;
-                /* compare all of the six auth values */
+        sprintf(strptr, "-%d", sidptr->revision);
-                for (j = 0; j < 6; ++j) {
+        strptr = sidstr + strlen(sidstr);
-                        if (ctsid->authority[j] != cwsid->authority[j])
-                                break;
+        for (i = 0; i < 6; ++i) {
+                if (sidptr->authority[i]) {
+                        sprintf(strptr, "-%d", sidptr->authority[i]);
+                        strptr = sidstr + strlen(sidstr);
                }
-                if (j < 6)
+        }
-                        continue; /* all of the auth values did not match */
+        for (i = 0; i < sidptr->num_subauth; ++i) {
-                /* compare all of the subauth values if any */
+                saval = le32_to_cpu(sidptr->sub_auth[i]);
-                num_sat = ctsid->num_subauth;
+                sprintf(strptr, "-%ld", saval);
-                num_saw = cwsid->num_subauth;
+                strptr = sidstr + strlen(sidstr);
-                num_subauth = num_sat < num_saw ? num_sat : num_saw;
+        }
-                if (num_subauth) {
+}
-                        for (j = 0; j < num_subauth; ++j) {
-                                if (ctsid->sub_auth[j] != cwsid->sub_auth[j])
+static void
-                                        break;
+id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
-                        }
+                struct cifs_sid_id **psidid, char *typestr)
-                        if (j < num_subauth)
+{
-                                continue; /* all sub_auth values do not match */
+        int rc;
+        char *strptr;
+        struct rb_node *node = root->rb_node;
+        struct rb_node *parent = NULL;
+        struct rb_node **linkto = &(root->rb_node);
+        struct cifs_sid_id *lsidid;
+        while (node) {
+                lsidid = rb_entry(node, struct cifs_sid_id, rbnode);
+                parent = node;
+                rc = compare_sids(sidptr, &((lsidid)->sid));
+                if (rc > 0) {
+                        linkto = &(node->rb_left);
+                        node = node->rb_left;
+                } else if (rc < 0) {
+                        linkto = &(node->rb_right);
+                        node = node->rb_right;
+                }
+        }
+        memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid));
+        (*psidid)->time = jiffies - (SID_MAP_RETRY + 1);
+        (*psidid)->refcount = 0;
+        sprintf((*psidid)->sidstr, "%s", typestr);
+        strptr = (*psidid)->sidstr + strlen((*psidid)->sidstr);
+        sid_to_str(&(*psidid)->sid, strptr);
+        clear_bit(SID_ID_PENDING, &(*psidid)->state);
+        clear_bit(SID_ID_MAPPED, &(*psidid)->state);
+        rb_link_node(&(*psidid)->rbnode, parent, linkto);
+        rb_insert_color(&(*psidid)->rbnode, root);
+}
+static struct cifs_sid_id *
+id_rb_search(struct rb_root *root, struct cifs_sid *sidptr)
+{
+        int rc;
+        struct rb_node *node = root->rb_node;
+        struct cifs_sid_id *lsidid;
+        while (node) {
+                lsidid = rb_entry(node, struct cifs_sid_id, rbnode);
+                rc = compare_sids(sidptr, &((lsidid)->sid));
+                if (rc > 0) {
+                        node = node->rb_left;
+                } else if (rc < 0) {
+                        node = node->rb_right;
+                } else /* node found */
+                        return lsidid;
+        }
+        return NULL;
+}
+static int
+sidid_pending_wait(void *unused)
+{
+        schedule();
+        return signal_pending(current) ? -ERESTARTSYS : 0;
+}
+static int
+sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
+                struct cifs_fattr *fattr, uint sidtype)
+{
+        int rc;
+        unsigned long cid;
+        struct key *idkey;
+        const struct cred *saved_cred;
+        struct cifs_sid_id *psidid, *npsidid;
+        struct rb_root *cidtree;
+        spinlock_t *cidlock;
+        if (sidtype == SIDOWNER) {
+                cid = cifs_sb->mnt_uid; /* default uid, in case upcall fails */
+                cidlock = &siduidlock;
+                cidtree = &uidtree;
+        } else if (sidtype == SIDGROUP) {
+                cid = cifs_sb->mnt_gid; /* default gid, in case upcall fails */
+                cidlock = &sidgidlock;
+                cidtree = &gidtree;
+        } else
+                return -ENOENT;
+        spin_lock(cidlock);
+        psidid = id_rb_search(cidtree, psid);
+        if (!psidid) { /* node does not exist, allocate one & attempt adding */
+                spin_unlock(cidlock);
+                npsidid = kzalloc(sizeof(struct cifs_sid_id), GFP_KERNEL);
+                if (!npsidid)
+                        return -ENOMEM;
+                npsidid->sidstr = kmalloc(SIDLEN, GFP_KERNEL);
+                if (!npsidid->sidstr) {
+                        kfree(npsidid);
+                        return -ENOMEM;
+                }
+                spin_lock(cidlock);
+                psidid = id_rb_search(cidtree, psid);
+                if (psidid) { /* node happened to get inserted meanwhile */
+                        ++psidid->refcount;
+                        spin_unlock(cidlock);
+                        kfree(npsidid->sidstr);
+                        kfree(npsidid);
+                } else {
+                        psidid = npsidid;
+                        id_rb_insert(cidtree, psid, &psidid,
+                                        sidtype == SIDOWNER ? "os:" : "gs:");
+                        ++psidid->refcount;
+                        spin_unlock(cidlock);
                }
+        } else {
+                ++psidid->refcount;
+                spin_unlock(cidlock);
+        }
+        /*
+         * If we are here, it is safe to access psidid and its fields
+         * since a reference was taken earlier while holding the spinlock.
+         * A reference on the node is put without holding the spinlock
+         * and it is OK to do so in this case, shrinker will not erase
+         * this node until all references are put and we do not access
+         * any fields of the node after a reference is put .
+         */
+        if (test_bit(SID_ID_MAPPED, &psidid->state)) {
+                cid = psidid->id;
+                psidid->time = jiffies; /* update ts for accessing */
+                goto sid_to_id_out;
+        }
-                cFYI(1, "matching sid: %s\n", wksidarr[i].sidname);
+        if (time_after(psidid->time + SID_MAP_RETRY, jiffies))
-                return 0; /* sids compare/match */
+                goto sid_to_id_out;
+        if (!test_and_set_bit(SID_ID_PENDING, &psidid->state)) {
+                saved_cred = override_creds(root_cred);
+                idkey = request_key(&cifs_idmap_key_type, psidid->sidstr, "");
+                if (IS_ERR(idkey))
+                        cFYI(1, "%s: Can't map SID to an id", __func__);
+                else {
+                        cid = *(unsigned long *)idkey->payload.value;
+                        psidid->id = cid;
+                        set_bit(SID_ID_MAPPED, &psidid->state);
+                        key_put(idkey);
+                        kfree(psidid->sidstr);
+                }
+                revert_creds(saved_cred);
+                psidid->time = jiffies; /* update ts for accessing */
+                clear_bit(SID_ID_PENDING, &psidid->state);
+                wake_up_bit(&psidid->state, SID_ID_PENDING);
+        } else {
+                rc = wait_on_bit(&psidid->state, SID_ID_PENDING,
+                                sidid_pending_wait, TASK_INTERRUPTIBLE);
+                if (rc) {
+                        cFYI(1, "%s: sidid_pending_wait interrupted %d",
+                                        __func__, rc);
+                        --psidid->refcount; /* decremented without spinlock */
+                        return rc;
+                }
+                if (test_bit(SID_ID_MAPPED, &psidid->state))
+                        cid = psidid->id;
        }
-        cFYI(1, "No matching sid");
+sid_to_id_out:
-        return -1;
+        --psidid->refcount; /* decremented without spinlock */
+        if (sidtype == SIDOWNER)
+                fattr->cf_uid = cid;
+        else
+                fattr->cf_gid = cid;
+        return 0;
+}
+int
+init_cifs_idmap(void)
+{
+        struct cred *cred;
+        struct key *keyring;
+        int ret;
+        cFYI(1, "Registering the %s key type\n", cifs_idmap_key_type.name);
+        /* create an override credential set with a special thread keyring in
+         * which requests are cached
+         *
+         * this is used to prevent malicious redirections from being installed
+         * with add_key().
+         */
+        cred = prepare_kernel_cred(NULL);
+        if (!cred)
+                return -ENOMEM;
+        keyring = key_alloc(&key_type_keyring, ".cifs_idmap", 0, 0, cred,
+                            (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+                            KEY_USR_VIEW | KEY_USR_READ,
+                            KEY_ALLOC_NOT_IN_QUOTA);
+        if (IS_ERR(keyring)) {
+                ret = PTR_ERR(keyring);
+                goto failed_put_cred;
+        }
+        ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
+        if (ret < 0)
+                goto failed_put_key;
+        ret = register_key_type(&cifs_idmap_key_type);
+        if (ret < 0)
+                goto failed_put_key;
+        /* instruct request_key() to use this special keyring as a cache for
+         * the results it looks up */
+        cred->thread_keyring = keyring;
+        cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
+        root_cred = cred;
+        spin_lock_init(&siduidlock);
+        uidtree = RB_ROOT;
+        spin_lock_init(&sidgidlock);
+        gidtree = RB_ROOT;
+        register_shrinker(&cifs_shrinker);
+        cFYI(1, "cifs idmap keyring: %d\n", key_serial(keyring));
+        return 0;
+failed_put_key:
+        key_put(keyring);
+failed_put_cred:
+        put_cred(cred);
+        return ret;
+}
+void
+exit_cifs_idmap(void)
+{
+        key_revoke(root_cred->thread_keyring);
+        unregister_key_type(&cifs_idmap_key_type);
+        put_cred(root_cred);
+        unregister_shrinker(&cifs_shrinker);
+        cFYI(1, "Unregistered %s key type\n", cifs_idmap_key_type.name);
+}
+void
+cifs_destroy_idmaptrees(void)
+{
+        struct rb_root *root;
+        struct rb_node *node;
+        root = &uidtree;
+        spin_lock(&siduidlock);
+        while ((node = rb_first(root)))
+                rb_erase(node, root);
+        spin_unlock(&siduidlock);
+        root = &gidtree;
+        spin_lock(&sidgidlock);
+        while ((node = rb_first(root)))
+                rb_erase(node, root);
+        spin_unlock(&sidgidlock);
 }
 /* if the two SIDs (roughly equivalent to a UUID for a user or group) are
@@ -104,16 +431,24 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
        int num_subauth, num_sat, num_saw;
        if ((!ctsid) || (!cwsid))
-                return 0;
+                return 1;
        /* compare the revision */
-        if (ctsid->revision != cwsid->revision)
+        if (ctsid->revision != cwsid->revision) {
-                return 0;
+                if (ctsid->revision > cwsid->revision)
+                        return 1;
+                else
+                        return -1;
+        }
        /* compare all of the six auth values */
        for (i = 0; i < 6; ++i) {
-                if (ctsid->authority[i] != cwsid->authority[i])
+                if (ctsid->authority[i] != cwsid->authority[i]) {
-                        return 0;
+                        if (ctsid->authority[i] > cwsid->authority[i])
+                                return 1;
+                        else
+                                return -1;
+                }
        }
        /* compare all of the subauth values if any */
@@ -122,12 +457,16 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
        num_subauth = num_sat < num_saw ? num_sat : num_saw;
        if (num_subauth) {
                for (i = 0; i < num_subauth; ++i) {
-                        if (ctsid->sub_auth[i] != cwsid->sub_auth[i])
+                        if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) {
-                                return 0;
+                                if (ctsid->sub_auth[i] > cwsid->sub_auth[i])
+                                        return 1;
+                                else
+                                        return -1;
+                        }
                }
        }
-        return 1; /* sids compare/match */
+        return 0; /* sids compare/match */
 }
@@ -382,22 +721,22 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
 #ifdef CONFIG_CIFS_DEBUG2
                        dump_ace(ppace[i], end_of_acl);
 #endif
-                        if (compare_sids(&(ppace[i]->sid), pownersid))
+                        if (compare_sids(&(ppace[i]->sid), pownersid) == 0)
                                access_flags_to_mode(ppace[i]->access_req,
                                                     ppace[i]->type,
                                                     &fattr->cf_mode,
                                                     &user_mask);
-                        if (compare_sids(&(ppace[i]->sid), pgrpsid))
+                        if (compare_sids(&(ppace[i]->sid), pgrpsid) == 0)
                                access_flags_to_mode(ppace[i]->access_req,
                                                     ppace[i]->type,
                                                     &fattr->cf_mode,
                                                     &group_mask);
-                        if (compare_sids(&(ppace[i]->sid), &sid_everyone))
+                        if (compare_sids(&(ppace[i]->sid), &sid_everyone) == 0)
                                access_flags_to_mode(ppace[i]->access_req,
                                                     ppace[i]->type,
                                                     &fattr->cf_mode,
                                                     &other_mask);
-                        if (compare_sids(&(ppace[i]->sid), &sid_authusers))
+                        if (compare_sids(&(ppace[i]->sid), &sid_authusers) == 0)
                                access_flags_to_mode(ppace[i]->access_req,
                                                     ppace[i]->type,
                                                     &fattr->cf_mode,
@@ -475,10 +814,10 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
 /* Convert CIFS ACL to POSIX form */
-static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
+static int parse_sec_desc(struct cifs_sb_info *cifs_sb,
-                          struct cifs_fattr *fattr)
+                struct cifs_ntsd *pntsd, int acl_len, struct cifs_fattr *fattr)
 {
-        int rc;
+        int rc = 0;
        struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
        struct cifs_acl *dacl_ptr; /* no need for SACL ptr */
        char *end_of_acl = ((char *)pntsd) + acl_len;
@@ -500,12 +839,26 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
                 le32_to_cpu(pntsd->sacloffset), dacloffset);
 /*      cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */
        rc = parse_sid(owner_sid_ptr, end_of_acl);
-        if (rc)
+        if (rc) {
+                cFYI(1, "%s: Error %d parsing Owner SID", __func__, rc);
+                return rc;
+        }
+        rc = sid_to_id(cifs_sb, owner_sid_ptr, fattr, SIDOWNER);
+        if (rc) {
+                cFYI(1, "%s: Error %d mapping Owner SID to uid", __func__, rc);
                return rc;
+        }
        rc = parse_sid(group_sid_ptr, end_of_acl);
-        if (rc)
+        if (rc) {
+                cFYI(1, "%s: Error %d mapping Owner SID to gid", __func__, rc);
                return rc;
+        }
+        rc = sid_to_id(cifs_sb, group_sid_ptr, fattr, SIDGROUP);
+        if (rc) {
+                cFYI(1, "%s: Error %d mapping Group SID to gid", __func__, rc);
+                return rc;
+        }
        if (dacloffset)
                parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr,
@@ -520,7 +873,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
        memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr,
                        sizeof(struct cifs_sid)); */
-        return 0;
+        return rc;
 }
@@ -688,7 +1041,7 @@ out:
 }
 /* Set an ACL on the server */
-static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
+int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
                                struct inode *inode, const char *path)
 {
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -727,7 +1080,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
                rc = PTR_ERR(pntsd);
                cERROR(1, "%s: error %d getting sec desc", __func__, rc);
        } else {
-                rc = parse_sec_desc(pntsd, acllen, fattr);
+                rc = parse_sec_desc(cifs_sb, pntsd, acllen, fattr);
                kfree(pntsd);
                if (rc)
                        cERROR(1, "parse sec desc failed rc = %d", rc);
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index c4ae7d036563..5c902c7ce524 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -39,6 +39,15 @@
 #define ACCESS_ALLOWED  0
 #define ACCESS_DENIED   1
+#define SIDOWNER 1
+#define SIDGROUP 2
+#define SIDLEN 150 /* S- 1 revision- 6 authorities- max 5 sub authorities */
+#define SID_ID_MAPPED 0
+#define SID_ID_PENDING 1
+#define SID_MAP_EXPIRE (3600 * HZ) /* map entry expires after one hour */
+#define SID_MAP_RETRY (300 * HZ)   /* wait 5 minutes for next attempt to map */
 struct cifs_ntsd {
        __le16 revision; /* revision level */
        __le16 type;
@@ -74,7 +83,21 @@ struct cifs_wksid {
        char sidname[SIDNAMELENGTH];
 } __attribute__((packed));
-extern int match_sid(struct cifs_sid *);
+struct cifs_sid_id {
+        unsigned int refcount; /* increment with spinlock, decrement without */
+        unsigned long id;
+        unsigned long time;
+        unsigned long state;
+        char *sidstr;
+        struct rb_node rbnode;
+        struct cifs_sid sid;
+};
+#ifdef __KERNEL__
+extern struct key_type cifs_idmap_key_type;
+extern const struct cred *root_cred;
+#endif /* KERNEL */
 extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *);
 #endif /* _CIFSACL_H */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index d1a016be73ba..45c3f78c8f81 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -60,7 +60,7 @@ static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
                server->session_key.response, server->session_key.len);
        crypto_shash_update(&server->secmech.sdescmd5->shash,
-                cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
+                cifs_pdu->Protocol, be32_to_cpu(cifs_pdu->smb_buf_length));
        rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
@@ -268,10 +268,11 @@ int setup_ntlm_response(struct cifsSesInfo *ses)
 }
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
-void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
+int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
                        char *lnm_session_key)
 {
        int i;
+        int rc;
        char password_with_pad[CIFS_ENCPWD_SIZE];
        memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
@@ -282,7 +283,7 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
                memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE);
                memcpy(lnm_session_key, password_with_pad,
                        CIFS_ENCPWD_SIZE);
-                return;
+                return 0;
        }
        /* calculate old style session key */
@@ -299,10 +300,9 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
        for (i = 0; i < CIFS_ENCPWD_SIZE; i++)
                password_with_pad[i] = toupper(password_with_pad[i]);
-        SMBencrypt(password_with_pad, cryptkey, lnm_session_key);
+        rc = SMBencrypt(password_with_pad, cryptkey, lnm_session_key);
-        /* clear password before we return/free memory */
+        return rc;
-        memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
 }
 #endif /* CIFS_WEAK_PW_HASH */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5c412b33cd7c..493b74ca5648 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -128,29 +128,22 @@ cifs_read_super(struct super_block *sb, void *data,
        }
        cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
-#ifdef CONFIG_CIFS_DFS_UPCALL
+        /*
-        /* copy mount params to sb for use in submounts */
+         * Copy mount params to sb for use in submounts. Better to do
-        /* BB: should we move this after the mount so we
+         * the copy here and deal with the error before cleanup gets
-         * do not have to do the copy on failed mounts?
+         * complicated post-mount.
-         * BB: May be it is better to do simple copy before
+         */
-         * complex operation (mount), and in case of fail
-         * just exit instead of doing mount and attempting
-         * undo it if this copy fails?*/
        if (data) {
-                int len = strlen(data);
+                cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
-                cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL);
                if (cifs_sb->mountdata == NULL) {
                        bdi_destroy(&cifs_sb->bdi);
                        kfree(sb->s_fs_info);
                        sb->s_fs_info = NULL;
                        return -ENOMEM;
                }
-                strncpy(cifs_sb->mountdata, data, len + 1);
-                cifs_sb->mountdata[len] = '\0';
        }
-#endif
-        rc = cifs_mount(sb, cifs_sb, data, devname);
+        rc = cifs_mount(sb, cifs_sb, devname);
        if (rc) {
                if (!silent)
@@ -163,7 +156,7 @@ cifs_read_super(struct super_block *sb, void *data,
        sb->s_bdi = &cifs_sb->bdi;
        sb->s_blocksize = CIFS_MAX_MSGSIZE;
        sb->s_blocksize_bits = 14;      /* default 2**14 = CIFS_MAX_MSGSIZE */
-        inode = cifs_root_iget(sb, ROOT_I);
+        inode = cifs_root_iget(sb);
        if (IS_ERR(inode)) {
                rc = PTR_ERR(inode);
@@ -184,12 +177,12 @@ cifs_read_super(struct super_block *sb, void *data,
        else
                sb->s_d_op = &cifs_dentry_ops;
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CIFS_NFSD_EXPORT
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
                cFYI(1, "export ops supported");
                sb->s_export_op = &cifs_export_ops;
        }
-#endif /* EXPERIMENTAL */
+#endif /* CIFS_NFSD_EXPORT */
        return 0;
@@ -202,12 +195,10 @@ out_no_root:
 out_mount_failed:
        if (cifs_sb) {
-#ifdef CONFIG_CIFS_DFS_UPCALL
                if (cifs_sb->mountdata) {
                        kfree(cifs_sb->mountdata);
                        cifs_sb->mountdata = NULL;
                }
-#endif
                unload_nls(cifs_sb->local_nls);
                bdi_destroy(&cifs_sb->bdi);
                kfree(cifs_sb);
@@ -231,12 +222,10 @@ cifs_put_super(struct super_block *sb)
        rc = cifs_umount(sb, cifs_sb);
        if (rc)
                cERROR(1, "cifs_umount failed with return code %d", rc);
-#ifdef CONFIG_CIFS_DFS_UPCALL
        if (cifs_sb->mountdata) {
                kfree(cifs_sb->mountdata);
                cifs_sb->mountdata = NULL;
        }
-#endif
        unload_nls(cifs_sb->local_nls);
        bdi_destroy(&cifs_sb->bdi);
@@ -618,16 +607,31 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
 {
        /* origin == SEEK_END => we must revalidate the cached file length */
        if (origin == SEEK_END) {
-                int retval;
+                int rc;
+                struct inode *inode = file->f_path.dentry->d_inode;
-                /* some applications poll for the file length in this strange
-                   way so we must seek to end on non-oplocked files by
+                /*
-                   setting the revalidate time to zero */
+                 * We need to be sure that all dirty pages are written and the
-                CIFS_I(file->f_path.dentry->d_inode)->time = 0;
+                 * server has the newest file length.
+                 */
-                retval = cifs_revalidate_file(file);
+                if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping &&
-                if (retval < 0)
+                    inode->i_mapping->nrpages != 0) {
-                        return (loff_t)retval;
+                        rc = filemap_fdatawait(inode->i_mapping);
+                        if (rc) {
+                                mapping_set_error(inode->i_mapping, rc);
+                                return rc;
+                        }
+                }
+                /*
+                 * Some applications poll for the file length in this strange
+                 * way so we must seek to end on non-oplocked files by
+                 * setting the revalidate time to zero.
+                 */
+                CIFS_I(inode)->time = 0;
+                rc = cifs_revalidate_file_attr(file);
+                if (rc < 0)
+                        return (loff_t)rc;
        }
        return generic_file_llseek_unlocked(file, offset, origin);
 }
@@ -760,10 +764,11 @@ const struct file_operations cifs_file_strict_ops = {
 };
 const struct file_operations cifs_file_direct_ops = {
-        /* no aio, no readv -
+        /* BB reevaluate whether they can be done with directio, no cache */
-           BB reevaluate whether they can be done with directio, no cache */
+        .read = do_sync_read,
-        .read = cifs_user_read,
+        .write = do_sync_write,
-        .write = cifs_user_write,
+        .aio_read = cifs_user_readv,
+        .aio_write = cifs_user_writev,
        .open = cifs_open,
        .release = cifs_close,
        .lock = cifs_lock,
@@ -815,10 +820,11 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
 };
 const struct file_operations cifs_file_direct_nobrl_ops = {
-        /* no mmap, no aio, no readv -
+        /* BB reevaluate whether they can be done with directio, no cache */
-           BB reevaluate whether they can be done with directio, no cache */
+        .read = do_sync_read,
-        .read = cifs_user_read,
+        .write = do_sync_write,
-        .write = cifs_user_write,
+        .aio_read = cifs_user_readv,
+        .aio_write = cifs_user_writev,
        .open = cifs_open,
        .release = cifs_close,
        .fsync = cifs_fsync,
@@ -981,10 +987,10 @@ init_cifs(void)
        int rc = 0;
        cifs_proc_init();
        INIT_LIST_HEAD(&cifs_tcp_ses_list);
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
        INIT_LIST_HEAD(&GlobalDnotifyReqList);
        INIT_LIST_HEAD(&GlobalDnotifyRsp_Q);
-#endif
+#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
 /*
 *  Initialize Global counters
 */
@@ -1033,22 +1039,33 @@ init_cifs(void)
        if (rc)
                goto out_destroy_mids;
-        rc = register_filesystem(&cifs_fs_type);
-        if (rc)
-                goto out_destroy_request_bufs;
 #ifdef CONFIG_CIFS_UPCALL
        rc = register_key_type(&cifs_spnego_key_type);
        if (rc)
-                goto out_unregister_filesystem;
+                goto out_destroy_request_bufs;
-#endif
+#endif /* CONFIG_CIFS_UPCALL */
+#ifdef CONFIG_CIFS_ACL
+        rc = init_cifs_idmap();
+        if (rc)
+                goto out_register_key_type;
+#endif /* CONFIG_CIFS_ACL */
+        rc = register_filesystem(&cifs_fs_type);
+        if (rc)
+                goto out_init_cifs_idmap;
        return 0;
-#ifdef CONFIG_CIFS_UPCALL
+out_init_cifs_idmap:
-out_unregister_filesystem:
+#ifdef CONFIG_CIFS_ACL
-        unregister_filesystem(&cifs_fs_type);
+        exit_cifs_idmap();
+out_register_key_type:
 #endif
+#ifdef CONFIG_CIFS_UPCALL
+        unregister_key_type(&cifs_spnego_key_type);
 out_destroy_request_bufs:
+#endif
        cifs_destroy_request_bufs();
 out_destroy_mids:
        cifs_destroy_mids();
@@ -1070,6 +1087,10 @@ exit_cifs(void)
 #ifdef CONFIG_CIFS_DFS_UPCALL
        cifs_dfs_release_automount_timer();
 #endif
+#ifdef CONFIG_CIFS_ACL
+        cifs_destroy_idmaptrees();
+        exit_cifs_idmap();
+#endif
 #ifdef CONFIG_CIFS_UPCALL
        unregister_key_type(&cifs_spnego_key_type);
 #endif
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index a9371b6578c0..64313f778ebf 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -47,7 +47,7 @@ extern void cifs_sb_deactive(struct super_block *sb);
 /* Functions related to inodes */
 extern const struct inode_operations cifs_dir_inode_ops;
-extern struct inode *cifs_root_iget(struct super_block *, unsigned long);
+extern struct inode *cifs_root_iget(struct super_block *);
 extern int cifs_create(struct inode *, struct dentry *, int,
                       struct nameidata *);
 extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
@@ -59,9 +59,11 @@ extern int cifs_mkdir(struct inode *, struct dentry *, int);
 extern int cifs_rmdir(struct inode *, struct dentry *);
 extern int cifs_rename(struct inode *, struct dentry *, struct inode *,
                       struct dentry *);
+extern int cifs_revalidate_file_attr(struct file *filp);
+extern int cifs_revalidate_dentry_attr(struct dentry *);
 extern int cifs_revalidate_file(struct file *filp);
 extern int cifs_revalidate_dentry(struct dentry *);
-extern void cifs_invalidate_mapping(struct inode *inode);
+extern int cifs_invalidate_mapping(struct inode *inode);
 extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 extern int cifs_setattr(struct dentry *, struct iattr *);
@@ -80,12 +82,12 @@ extern const struct file_operations cifs_file_strict_nobrl_ops;
 extern int cifs_open(struct inode *inode, struct file *file);
 extern int cifs_close(struct inode *inode, struct file *file);
 extern int cifs_closedir(struct inode *inode, struct file *file);
-extern ssize_t cifs_user_read(struct file *file, char __user *read_data,
+extern ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
-                              size_t read_size, loff_t *poffset);
+                               unsigned long nr_segs, loff_t pos);
 extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
                                 unsigned long nr_segs, loff_t pos);
-extern ssize_t cifs_user_write(struct file *file, const char __user *write_data,
+extern ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
-                               size_t write_size, loff_t *poffset);
+                                unsigned long nr_segs, loff_t pos);
 extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
                                  unsigned long nr_segs, loff_t pos);
 extern int cifs_lock(struct file *, int, struct file_lock *);
@@ -123,9 +125,9 @@ extern ssize_t	cifs_getxattr(struct dentry *, const char *, void *, size_t);
 extern ssize_t  cifs_listxattr(struct dentry *, char *, size_t);
 extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CIFS_NFSD_EXPORT
 extern const struct export_operations cifs_export_ops;
-#endif /* EXPERIMENTAL */
+#endif /* CIFS_NFSD_EXPORT */
-#define CIFS_VERSION   "1.71"
+#define CIFS_VERSION   "1.72"
 #endif                          /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a5d1106fcbde..76b4517e74b0 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -274,7 +274,8 @@ struct cifsSesInfo {
        int capabilities;
        char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
                                TCP names - will ipv6 and sctp addresses fit? */
-        char *user_name;
+        char *user_name;        /* must not be null except during init of sess
+                                   and after mount option parsing we fill it */
        char *domainName;
        char *password;
        struct session_key auth_key;
@@ -780,10 +781,12 @@ GLOBAL_EXTERN spinlock_t		cifs_tcp_ses_lock;
 */
 GLOBAL_EXTERN spinlock_t        cifs_file_list_lock;
+#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
 /* Outstanding dir notify requests */
 GLOBAL_EXTERN struct list_head GlobalDnotifyReqList;
 /* DirNotify response queue */
 GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q;
+#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
 /*
 * Global transaction id (XID) information
@@ -830,6 +833,11 @@ GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
 /* reconnect after this many failed echo attempts */
 GLOBAL_EXTERN unsigned short echo_retries;
+GLOBAL_EXTERN struct rb_root uidtree;
+GLOBAL_EXTERN struct rb_root gidtree;
+GLOBAL_EXTERN spinlock_t siduidlock;
+GLOBAL_EXTERN spinlock_t sidgidlock;
 void cifs_oplock_break(struct work_struct *work);
 void cifs_oplock_break_get(struct cifsFileInfo *cfile);
 void cifs_oplock_break_put(struct cifsFileInfo *cfile);
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b5c8cc5d7a7f..de3aa285de03 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -397,9 +397,9 @@
 #define GETU32(var)  (*((__u32 *)var))  /* BB check for endian issues */
 struct smb_hdr {
-        __u32 smb_buf_length;   /* big endian on wire *//* BB length is only two
+        __be32 smb_buf_length;  /* BB length is only two (rarely three) bytes,
-                or three bytes - with one or two byte type preceding it that are
+                with one or two byte "type" preceding it that will be
-                zero - we could mask the type byte off just in case BB */
+                zero - we could mask the type byte off */
        __u8 Protocol[4];
        __u8 Command;
        union {
@@ -428,43 +428,28 @@ struct smb_hdr {
        __u8 WordCount;
 } __attribute__((packed));
-/* given a pointer to an smb_hdr retrieve a char pointer to the byte count */
+/* given a pointer to an smb_hdr, retrieve a void pointer to the ByteCount */
-#define BCC(smb_var) ((unsigned char *)(smb_var) + sizeof(struct smb_hdr) + \
+static inline void *
-                         (2 * (smb_var)->WordCount))
+BCC(struct smb_hdr *smb)
+{
+        return (void *)smb + sizeof(*smb) + 2 * smb->WordCount;
+}
 /* given a pointer to an smb_hdr retrieve the pointer to the byte area */
 #define pByteArea(smb_var) (BCC(smb_var) + 2)
-/* get the converted ByteCount for a SMB packet and return it */
-static inline __u16
-get_bcc(struct smb_hdr *hdr)
-{
-        __u16 *bc_ptr = (__u16 *)BCC(hdr);
-        return get_unaligned(bc_ptr);
-}
 /* get the unconverted ByteCount for a SMB packet and return it */
 static inline __u16
-get_bcc_le(struct smb_hdr *hdr)
+get_bcc(struct smb_hdr *hdr)
 {
        __le16 *bc_ptr = (__le16 *)BCC(hdr);
        return get_unaligned_le16(bc_ptr);
 }
-/* set the ByteCount for a SMB packet in host-byte order */
-static inline void
-put_bcc(__u16 count, struct smb_hdr *hdr)
-{
-        __u16 *bc_ptr = (__u16 *)BCC(hdr);
-        put_unaligned(count, bc_ptr);
-}
 /* set the ByteCount for a SMB packet in little-endian */
 static inline void
-put_bcc_le(__u16 count, struct smb_hdr *hdr)
+put_bcc(__u16 count, struct smb_hdr *hdr)
 {
        __le16 *bc_ptr = (__le16 *)BCC(hdr);
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 8096f27ad9a8..6e69e06a30b3 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -53,6 +53,9 @@ do {								\
        cFYI(1, "CIFS VFS: leaving %s (xid = %d) rc = %d",      \
             __func__, curr_xid, (int)rc);                      \
 } while (0)
+extern int init_cifs_idmap(void);
+extern void exit_cifs_idmap(void);
+extern void cifs_destroy_idmaptrees(void);
 extern char *build_path_from_dentry(struct dentry *);
 extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb,
                                        struct cifsTconInfo *tcon);
@@ -90,7 +93,6 @@ extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
 extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
 extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
 extern unsigned int smbCalcSize(struct smb_hdr *ptr);
-extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
 extern int decode_negTokenInit(unsigned char *security_blob, int length,
                        struct TCP_Server_Info *server);
 extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
@@ -143,8 +145,10 @@ extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
 extern int mode_to_cifs_acl(struct inode *inode, const char *path, __u64);
 extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *,
                                        const char *, u32 *);
+extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *,
+                                const char *);
-extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *,
+extern int cifs_mount(struct super_block *, struct cifs_sb_info *,
                        const char *);
 extern int cifs_umount(struct super_block *, struct cifs_sb_info *);
 extern void cifs_dfs_release_automount_timer(void);
@@ -304,12 +308,13 @@ extern int CIFSSMBUnixQuerySymLink(const int xid,
                        struct cifsTconInfo *tcon,
                        const unsigned char *searchName, char **syminfo,
                        const struct nls_table *nls_codepage);
+#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
 extern int CIFSSMBQueryReparseLinkInfo(const int xid,
                        struct cifsTconInfo *tcon,
                        const unsigned char *searchName,
                        char *symlinkinfo, const int buflen, __u16 fid,
                        const struct nls_table *nls_codepage);
+#endif /* temporarily unused until cifs_symlink fixed */
 extern int CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon,
                        const char *fileName, const int disposition,
                        const int access_flags, const int omode,
@@ -348,8 +353,6 @@ extern int CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon,
                        const unsigned char *searchName, __u64 *inode_number,
                        const struct nls_table *nls_codepage,
                        int remap_special_chars);
-extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
-                        const struct nls_table *cp, int mapChars);
 extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
                        const __u16 netfid, const __u64 len,
@@ -383,9 +386,15 @@ extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
 extern int calc_seckey(struct cifsSesInfo *);
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
-extern void calc_lanman_hash(const char *password, const char *cryptkey,
+extern int calc_lanman_hash(const char *password, const char *cryptkey,
                                bool encrypt, char *lnm_session_key);
 #endif /* CIFS_WEAK_PW_HASH */
+#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
+extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
+                        const int notify_subdirs, const __u16 netfid,
+                        __u32 filter, struct file *file, int multishot,
+                        const struct nls_table *nls_codepage);
+#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
 extern int CIFSSMBCopy(int xid,
                        struct cifsTconInfo *source_tcon,
                        const char *fromName,
@@ -393,10 +402,6 @@ extern int CIFSSMBCopy(int xid,
                        const char *toName, const int flags,
                        const struct nls_table *nls_codepage,
                        int remap_special_chars);
-extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
-                        const int notify_subdirs, const __u16 netfid,
-                        __u32 filter, struct file *file, int multishot,
-                        const struct nls_table *nls_codepage);
 extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon,
                        const unsigned char *searchName,
                        const unsigned char *ea_name, char *EAData,
@@ -427,9 +432,6 @@ extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr,
                struct cifs_sb_info *cifs_sb, int xid);
 extern int mdfour(unsigned char *, unsigned char *, int);
 extern int E_md4hash(const unsigned char *passwd, unsigned char *p16);
-extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
+extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
-                        unsigned char *p24);
-extern void E_P16(unsigned char *p14, unsigned char *p16);
-extern void E_P24(unsigned char *p21, const unsigned char *c8,
                        unsigned char *p24);
 #endif                  /* _CIFSPROTO_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index df959bae6728..83df937b814e 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -339,12 +339,13 @@ static int validate_t2(struct smb_t2_rsp *pSMB)
            get_unaligned_le16(&pSMB->t2_rsp.DataOffset) > 1024)
                goto vt2_err;
-        /* check that bcc is at least as big as parms + data */
-        /* check that bcc is less than negotiated smb buffer */
        total_size = get_unaligned_le16(&pSMB->t2_rsp.ParameterCount);
        if (total_size >= 512)
                goto vt2_err;
+        /* check that bcc is at least as big as parms + data, and that it is
+         * less than negotiated smb buffer
+         */
        total_size += get_unaligned_le16(&pSMB->t2_rsp.DataCount);
        if (total_size > get_bcc(&pSMB->hdr) ||
            total_size >= CIFSMaxBufSize + MAX_CIFS_HDR_SIZE)
@@ -357,6 +358,13 @@ vt2_err:
        return -EINVAL;
 }
+static inline void inc_rfc1001_len(void *pSMB, int count)
+{
+        struct smb_hdr *hdr = (struct smb_hdr *)pSMB;
+        be32_add_cpu(&hdr->smb_buf_length, count);
+}
 int
 CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 {
@@ -409,7 +417,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
                count += strlen(protocols[i].name) + 1;
                /* null at end of source and target buffers anyway */
        }
-        pSMB->hdr.smb_buf_length += count;
+        inc_rfc1001_len(pSMB, count);
        pSMB->ByteCount = cpu_to_le16(count);
        rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
@@ -541,10 +549,6 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
                server->secType = RawNTLMSSP;
        else if (secFlags & CIFSSEC_MAY_LANMAN)
                server->secType = LANMAN;
-/* #ifdef CONFIG_CIFS_EXPERIMENTAL
-        else if (secFlags & CIFSSEC_MAY_PLNTXT)
-                server->secType = ??
-#endif */
        else {
                rc = -EOPNOTSUPP;
                cERROR(1, "Invalid security type");
@@ -578,7 +582,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
        if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) &&
                (server->capabilities & CAP_EXTENDED_SECURITY)) {
-                count = pSMBr->ByteCount;
+                count = get_bcc(&pSMBr->hdr);
                if (count < 16) {
                        rc = -EIO;
                        goto neg_err_exit;
@@ -732,9 +736,9 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
        smb->hdr.Tid = 0xffff;
        smb->hdr.WordCount = 1;
        put_unaligned_le16(1, &smb->EchoCount);
-        put_bcc_le(1, &smb->hdr);
+        put_bcc(1, &smb->hdr);
        smb->Data[0] = 'a';
-        smb->hdr.smb_buf_length += 3;
+        inc_rfc1001_len(smb, 3);
        rc = cifs_call_async(server, (struct smb_hdr *)smb,
                                cifs_echo_callback, server);
@@ -852,7 +856,7 @@ PsxDelete:
        pSMB->TotalParameterCount = pSMB->ParameterCount;
        pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_UNLINK);
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
                         (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -898,7 +902,7 @@ DelFileRetry:
        pSMB->SearchAttributes =
            cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM);
        pSMB->BufferFormat = 0x04;
-        pSMB->hdr.smb_buf_length += name_len + 1;
+        inc_rfc1001_len(pSMB, name_len + 1);
        pSMB->ByteCount = cpu_to_le16(name_len + 1);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
                         (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -942,7 +946,7 @@ RmDirRetry:
        }
        pSMB->BufferFormat = 0x04;
-        pSMB->hdr.smb_buf_length += name_len + 1;
+        inc_rfc1001_len(pSMB, name_len + 1);
        pSMB->ByteCount = cpu_to_le16(name_len + 1);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
                         (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -985,7 +989,7 @@ MkDirRetry:
        }
        pSMB->BufferFormat = 0x04;
-        pSMB->hdr.smb_buf_length += name_len + 1;
+        inc_rfc1001_len(pSMB, name_len + 1);
        pSMB->ByteCount = cpu_to_le16(name_len + 1);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
                         (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -1063,7 +1067,7 @@ PsxCreat:
        pSMB->TotalParameterCount = pSMB->ParameterCount;
        pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_OPEN);
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
                         (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -1075,7 +1079,7 @@ PsxCreat:
        cFYI(1, "copying inode info");
        rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-        if (rc || (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP))) {
+        if (rc || get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)) {
                rc = -EIO;      /* bad smb */
                goto psx_create_err;
        }
@@ -1096,7 +1100,7 @@ PsxCreat:
                pRetData->Type = cpu_to_le32(-1); /* unknown */
                cFYI(DBG2, "unknown type");
        } else {
-                if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP)
+                if (get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)
                                        + sizeof(FILE_UNIX_BASIC_INFO)) {
                        cERROR(1, "Open response data too small");
                        pRetData->Type = cpu_to_le32(-1);
@@ -1228,7 +1232,7 @@ OldOpenRetry:
        pSMB->Sattr = cpu_to_le16(ATTR_HIDDEN | ATTR_SYSTEM | ATTR_DIRECTORY);
        pSMB->OpenFunction = cpu_to_le16(convert_disposition(openDisposition));
        count += name_len;
-        pSMB->hdr.smb_buf_length += count;
+        inc_rfc1001_len(pSMB, count);
        pSMB->ByteCount = cpu_to_le16(count);
        /* long_op set to 1 to allow for oplock break timeouts */
@@ -1341,7 +1345,7 @@ openRetry:
            SECURITY_CONTEXT_TRACKING | SECURITY_EFFECTIVE_ONLY;
        count += name_len;
-        pSMB->hdr.smb_buf_length += count;
+        inc_rfc1001_len(pSMB, count);
        pSMB->ByteCount = cpu_to_le16(count);
        /* long_op set to 1 to allow for oplock break timeouts */
@@ -1426,7 +1430,7 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
        }
        iov[0].iov_base = (char *)pSMB;
-        iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
+        iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
        rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
                         &resp_buf_type, CIFS_LOG_ERROR);
        cifs_stats_inc(&tcon->num_reads);
@@ -1560,7 +1564,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
        pSMB->DataLengthLow = cpu_to_le16(bytes_sent & 0xFFFF);
        pSMB->DataLengthHigh = cpu_to_le16(bytes_sent >> 16);
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        if (wct == 14)
                pSMB->ByteCount = cpu_to_le16(byte_count);
@@ -1644,11 +1648,12 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
        pSMB->DataLengthLow = cpu_to_le16(count & 0xFFFF);
        pSMB->DataLengthHigh = cpu_to_le16(count >> 16);
-        smb_hdr_len = pSMB->hdr.smb_buf_length + 1; /* hdr + 1 byte pad */
+        /* header + 1 byte pad */
+        smb_hdr_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 1;
        if (wct == 14)
-                pSMB->hdr.smb_buf_length += count+1;
+                inc_rfc1001_len(pSMB, count + 1);
        else /* wct == 12 */
-                pSMB->hdr.smb_buf_length += count+5; /* smb data starts later */
+                inc_rfc1001_len(pSMB, count + 5); /* smb data starts later */
        if (wct == 14)
                pSMB->ByteCount = cpu_to_le16(count + 1);
        else /* wct == 12 */ /* bigger pad, smaller smb hdr, keep offset ok */ {
@@ -1748,7 +1753,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
                /* oplock break */
                count = 0;
        }
-        pSMB->hdr.smb_buf_length += count;
+        inc_rfc1001_len(pSMB, count);
        pSMB->ByteCount = cpu_to_le16(count);
        if (waitFlag) {
@@ -1839,14 +1844,14 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
        pSMB->Fid = smb_file_id;
        pSMB->InformationLevel = cpu_to_le16(SMB_SET_POSIX_LOCK);
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        if (waitFlag) {
                rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB,
                        (struct smb_hdr *) pSMBr, &bytes_returned);
        } else {
                iov[0].iov_base = (char *)pSMB;
-                iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
+                iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
                rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
                                &resp_buf_type, timeout);
                pSMB = NULL; /* request buf already freed by SendReceive2. Do
@@ -1862,7 +1867,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
                __u16 data_count;
                rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-                if (rc || (pSMBr->ByteCount < sizeof(struct cifs_posix_lock))) {
+                if (rc || get_bcc(&pSMBr->hdr) < sizeof(*parm_data)) {
                        rc = -EIO;      /* bad smb */
                        goto plk_err_exit;
                }
@@ -2012,7 +2017,7 @@ renameRetry:
        }
        count = 1 /* 1st signature byte */  + name_len + name_len2;
-        pSMB->hdr.smb_buf_length += count;
+        inc_rfc1001_len(pSMB, count);
        pSMB->ByteCount = cpu_to_le16(count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2092,7 +2097,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
        pSMB->InformationLevel =
                cpu_to_le16(SMB_SET_FILE_RENAME_INFORMATION);
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB,
                         (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2159,7 +2164,7 @@ copyRetry:
        }
        count = 1 /* 1st signature byte */  + name_len + name_len2;
-        pSMB->hdr.smb_buf_length += count;
+        inc_rfc1001_len(pSMB, count);
        pSMB->ByteCount = cpu_to_le16(count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2249,7 +2254,7 @@ createSymLinkRetry:
        pSMB->DataOffset = cpu_to_le16(offset);
        pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_LINK);
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
                         (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2335,7 +2340,7 @@ createHardLinkRetry:
        pSMB->DataOffset = cpu_to_le16(offset);
        pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_HLINK);
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
                         (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2406,7 +2411,7 @@ winCreateHardLinkRetry:
        }
        count = 1 /* string type byte */  + name_len + name_len2;
-        pSMB->hdr.smb_buf_length += count;
+        inc_rfc1001_len(pSMB, count);
        pSMB->ByteCount = cpu_to_le16(count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2477,7 +2482,7 @@ querySymLinkRetry:
        pSMB->ParameterCount = pSMB->TotalParameterCount;
        pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_LINK);
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2489,7 +2494,7 @@ querySymLinkRetry:
                rc = validate_t2((struct smb_t2_rsp *)pSMBr);
                /* BB also check enough total bytes returned */
-                if (rc || (pSMBr->ByteCount < 2))
+                if (rc || get_bcc(&pSMBr->hdr) < 2)
                        rc = -EIO;
                else {
                        bool is_unicode;
@@ -2516,7 +2521,17 @@ querySymLinkRetry:
        return rc;
 }
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
+/*
+ *      Recent Windows versions now create symlinks more frequently
+ *      and they use the "reparse point" mechanism below.  We can of course
+ *      do symlinks nicely to Samba and other servers which support the
+ *      CIFS Unix Extensions and we can also do SFU symlinks and "client only"
+ *      "MF" symlinks optionally, but for recent Windows we really need to
+ *      reenable the code below and fix the cifs_symlink callers to handle this.
+ *      In the interim this code has been moved to its own config option so
+ *      it is not compiled in by default until callers fixed up and more tested.
+ */
 int
 CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
                        const unsigned char *searchName,
@@ -2561,14 +2576,14 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
        } else {                /* decode response */
                __u32 data_offset = le32_to_cpu(pSMBr->DataOffset);
                __u32 data_count = le32_to_cpu(pSMBr->DataCount);
-                if ((pSMBr->ByteCount < 2) || (data_offset > 512)) {
+                if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) {
-                /* BB also check enough total bytes returned */
+                        /* BB also check enough total bytes returned */
                        rc = -EIO;      /* bad smb */
                        goto qreparse_out;
                }
                if (data_count && (data_count < 2048)) {
                        char *end_of_smb = 2 /* sizeof byte count */ +
-                                pSMBr->ByteCount + (char *)&pSMBr->ByteCount;
+                               get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount;
                        struct reparse_data *reparse_buf =
                                                (struct reparse_data *)
@@ -2618,7 +2633,7 @@ qreparse_out:
        return rc;
 }
-#endif /* CIFS_EXPERIMENTAL */
+#endif /* CIFS_SYMLINK_EXPERIMENTAL */ /* BB temporarily unused */
 #ifdef CONFIG_CIFS_POSIX
@@ -2814,7 +2829,7 @@ queryAclRetry:
        pSMB->ParameterCount = pSMB->TotalParameterCount;
        pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_ACL);
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2826,8 +2841,8 @@ queryAclRetry:
                /* decode response */
                rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-                if (rc || (pSMBr->ByteCount < 2))
                /* BB also check enough total bytes returned */
+                if (rc || get_bcc(&pSMBr->hdr) < 2)
                        rc = -EIO;      /* bad smb */
                else {
                        __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -2908,7 +2923,7 @@ setAclRetry:
        pSMB->ParameterCount = cpu_to_le16(params);
        pSMB->TotalParameterCount = pSMB->ParameterCount;
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
                         (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2966,7 +2981,7 @@ GetExtAttrRetry:
        pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_ATTR_FLAGS);
        pSMB->Pad = 0;
        pSMB->Fid = netfid;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->t2.ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2976,8 +2991,8 @@ GetExtAttrRetry:
        } else {
                /* decode response */
                rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-                if (rc || (pSMBr->ByteCount < 2))
                /* BB also check enough total bytes returned */
+                if (rc || get_bcc(&pSMBr->hdr) < 2)
                        /* If rc should we check for EOPNOSUPP and
                           disable the srvino flag? or in caller? */
                        rc = -EIO;      /* bad smb */
@@ -3052,6 +3067,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
        char *end_of_smb;
        __u32 data_count, data_offset, parm_count, parm_offset;
        struct smb_com_ntransact_rsp *pSMBr;
+        u16 bcc;
        *pdatalen = 0;
        *pparmlen = 0;
@@ -3061,8 +3077,8 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
        pSMBr = (struct smb_com_ntransact_rsp *)buf;
-        /* ByteCount was converted from little endian in SendReceive */
+        bcc = get_bcc(&pSMBr->hdr);
-        end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount +
+        end_of_smb = 2 /* sizeof byte count */ + bcc +
                        (char *)&pSMBr->ByteCount;
        data_offset = le32_to_cpu(pSMBr->DataOffset);
@@ -3088,7 +3104,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
                        *ppdata, data_count, (data_count + *ppdata),
                        end_of_smb, pSMBr);
                return -EINVAL;
-        } else if (parm_count + data_count > pSMBr->ByteCount) {
+        } else if (parm_count + data_count > bcc) {
                cFYI(1, "parm count and data count larger than SMB");
                return -EINVAL;
        }
@@ -3124,9 +3140,9 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
        pSMB->AclFlags = cpu_to_le32(CIFS_ACL_OWNER | CIFS_ACL_GROUP |
                                     CIFS_ACL_DACL);
        pSMB->ByteCount = cpu_to_le16(11); /* 3 bytes pad + 8 bytes parm */
-        pSMB->hdr.smb_buf_length += 11;
+        inc_rfc1001_len(pSMB, 11);
        iov[0].iov_base = (char *)pSMB;
-        iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
+        iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
        rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type,
                         0);
@@ -3235,10 +3251,9 @@ setCifsAclRetry:
                memcpy((char *) &pSMBr->hdr.Protocol + data_offset,
                        (char *) pntsd,
                        acllen);
-                pSMB->hdr.smb_buf_length += (byte_count + data_count);
+                inc_rfc1001_len(pSMB, byte_count + data_count);
        } else
-                pSMB->hdr.smb_buf_length += byte_count;
+                inc_rfc1001_len(pSMB, byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
                (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3289,7 +3304,7 @@ QInfRetry:
        }
        pSMB->BufferFormat = 0x04;
        name_len++; /* account for buffer type byte */
-        pSMB->hdr.smb_buf_length += (__u16) name_len;
+        inc_rfc1001_len(pSMB, (__u16)name_len);
        pSMB->ByteCount = cpu_to_le16(name_len);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3364,7 +3379,7 @@ QFileInfoRetry:
        pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO);
        pSMB->Pad = 0;
        pSMB->Fid = netfid;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
                         (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3375,7 +3390,7 @@ QFileInfoRetry:
                if (rc) /* BB add auto retry on EOPNOTSUPP? */
                        rc = -EIO;
-                else if (pSMBr->ByteCount < 40)
+                else if (get_bcc(&pSMBr->hdr) < 40)
                        rc = -EIO;      /* bad smb */
                else if (pFindData) {
                        __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -3451,7 +3466,7 @@ QPathInfoRetry:
        else
                pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO);
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3463,9 +3478,9 @@ QPathInfoRetry:
                if (rc) /* BB add auto retry on EOPNOTSUPP? */
                        rc = -EIO;
-                else if (!legacy && (pSMBr->ByteCount < 40))
+                else if (!legacy && get_bcc(&pSMBr->hdr) < 40)
                        rc = -EIO;      /* bad smb */
-                else if (legacy && (pSMBr->ByteCount < 24))
+                else if (legacy && get_bcc(&pSMBr->hdr) < 24)
                        rc = -EIO;  /* 24 or 26 expected but we do not read
                                        last field */
                else if (pFindData) {
@@ -3532,7 +3547,7 @@ UnixQFileInfoRetry:
        pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
        pSMB->Pad = 0;
        pSMB->Fid = netfid;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
                         (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3541,7 +3556,7 @@ UnixQFileInfoRetry:
        } else {                /* decode response */
                rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-                if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) {
+                if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) {
                        cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
                                   "Unix Extensions can be disabled on mount "
                                   "by specifying the nosfu mount option.");
@@ -3617,7 +3632,7 @@ UnixQPathInfoRetry:
        pSMB->ParameterCount = pSMB->TotalParameterCount;
        pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3627,7 +3642,7 @@ UnixQPathInfoRetry:
        } else {                /* decode response */
                rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-                if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) {
+                if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) {
                        cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
                                   "Unix Extensions can be disabled on mount "
                                   "by specifying the nosfu mount option.");
@@ -3731,7 +3746,7 @@ findFirstRetry:
        /* BB what should we set StorageType to? Does it matter? BB */
        pSMB->SearchStorageType = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3860,7 +3875,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
        byte_count = params + 1 /* pad */ ;
        pSMB->TotalParameterCount = cpu_to_le16(params);
        pSMB->ParameterCount = pSMB->TotalParameterCount;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4022,7 +4037,7 @@ GetInodeNumberRetry:
        pSMB->ParameterCount = pSMB->TotalParameterCount;
        pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_INTERNAL_INFO);
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4032,8 +4047,8 @@ GetInodeNumberRetry:
        } else {
                /* decode response */
                rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-                if (rc || (pSMBr->ByteCount < 2))
                /* BB also check enough total bytes returned */
+                if (rc || get_bcc(&pSMBr->hdr) < 2)
                        /* If rc should we check for EOPNOSUPP and
                        disable the srvino flag? or in caller? */
                        rc = -EIO;      /* bad smb */
@@ -4246,7 +4261,7 @@ getDFSRetry:
        pSMB->ParameterCount = cpu_to_le16(params);
        pSMB->TotalParameterCount = pSMB->ParameterCount;
        pSMB->MaxReferralLevel = cpu_to_le16(3);
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
@@ -4258,13 +4273,13 @@ getDFSRetry:
        rc = validate_t2((struct smb_t2_rsp *)pSMBr);
        /* BB Also check if enough total bytes returned? */
-        if (rc || (pSMBr->ByteCount < 17)) {
+        if (rc || get_bcc(&pSMBr->hdr) < 17) {
                rc = -EIO;      /* bad smb */
                goto GetDFSRefExit;
        }
        cFYI(1, "Decoding GetDFSRefer response BCC: %d  Offset %d",
-                                pSMBr->ByteCount,
+                                get_bcc(&pSMBr->hdr),
                                le16_to_cpu(pSMBr->t2.DataOffset));
        /* parse returned result into more usable form */
@@ -4320,7 +4335,7 @@ oldQFSInfoRetry:
        pSMB->Reserved3 = 0;
        pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
        pSMB->InformationLevel = cpu_to_le16(SMB_INFO_ALLOCATION);
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4330,12 +4345,12 @@ oldQFSInfoRetry:
        } else {                /* decode response */
                rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-                if (rc || (pSMBr->ByteCount < 18))
+                if (rc || get_bcc(&pSMBr->hdr) < 18)
                        rc = -EIO;      /* bad smb */
                else {
                        __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
                        cFYI(1, "qfsinf resp BCC: %d  Offset %d",
-                                 pSMBr->ByteCount, data_offset);
+                                 get_bcc(&pSMBr->hdr), data_offset);
                        response_data = (FILE_SYSTEM_ALLOC_INFO *)
                                (((char *) &pSMBr->hdr.Protocol) + data_offset);
@@ -4399,7 +4414,7 @@ QFSInfoRetry:
        pSMB->Reserved3 = 0;
        pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
        pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_SIZE_INFO);
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4409,7 +4424,7 @@ QFSInfoRetry:
        } else {                /* decode response */
                rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-                if (rc || (pSMBr->ByteCount < 24))
+                if (rc || get_bcc(&pSMBr->hdr) < 24)
                        rc = -EIO;      /* bad smb */
                else {
                        __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4479,7 +4494,7 @@ QFSAttributeRetry:
        pSMB->Reserved3 = 0;
        pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
        pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_ATTRIBUTE_INFO);
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4489,7 +4504,7 @@ QFSAttributeRetry:
        } else {                /* decode response */
                rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-                if (rc || (pSMBr->ByteCount < 13)) {
+                if (rc || get_bcc(&pSMBr->hdr) < 13) {
                        /* BB also check if enough bytes returned */
                        rc = -EIO;      /* bad smb */
                } else {
@@ -4550,7 +4565,7 @@ QFSDeviceRetry:
        pSMB->Reserved3 = 0;
        pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
        pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_DEVICE_INFO);
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4560,7 +4575,8 @@ QFSDeviceRetry:
        } else {                /* decode response */
                rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-                if (rc || (pSMBr->ByteCount < sizeof(FILE_SYSTEM_DEVICE_INFO)))
+                if (rc || get_bcc(&pSMBr->hdr) <
+                          sizeof(FILE_SYSTEM_DEVICE_INFO))
                        rc = -EIO;      /* bad smb */
                else {
                        __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4619,7 +4635,7 @@ QFSUnixRetry:
        pSMB->Reserved3 = 0;
        pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
        pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_CIFS_UNIX_INFO);
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4629,7 +4645,7 @@ QFSUnixRetry:
        } else {                /* decode response */
                rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-                if (rc || (pSMBr->ByteCount < 13)) {
+                if (rc || get_bcc(&pSMBr->hdr) < 13) {
                        rc = -EIO;      /* bad smb */
                } else {
                        __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4702,7 +4718,7 @@ SETFSUnixRetry:
        pSMB->ClientUnixMinor = cpu_to_le16(CIFS_UNIX_MINOR_VERSION);
        pSMB->ClientUnixCap = cpu_to_le64(cap);
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4764,7 +4780,7 @@ QFSPosixRetry:
        pSMB->Reserved3 = 0;
        pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
        pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_FS_INFO);
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4774,7 +4790,7 @@ QFSPosixRetry:
        } else {                /* decode response */
                rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-                if (rc || (pSMBr->ByteCount < 13)) {
+                if (rc || get_bcc(&pSMBr->hdr) < 13) {
                        rc = -EIO;      /* bad smb */
                } else {
                        __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4890,7 +4906,7 @@ SetEOFRetry:
        pSMB->ParameterCount = cpu_to_le16(params);
        pSMB->TotalParameterCount = pSMB->ParameterCount;
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        parm_data->FileSize = cpu_to_le64(size);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4969,7 +4985,7 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
                                cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO);
        }
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
        if (rc) {
@@ -5037,7 +5053,7 @@ CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
        else
                pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
        rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
@@ -5096,7 +5112,7 @@ CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon,
        pSMB->Fid = fid;
        pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_DISPOSITION_INFO);
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        *data_offset = delete_file ? 1 : 0;
        rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
@@ -5169,7 +5185,7 @@ SetTimesRetry:
        else
                pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -5221,7 +5237,7 @@ SetAttrLgcyRetry:
        }
        pSMB->attr = cpu_to_le16(dos_attrs);
        pSMB->BufferFormat = 0x04;
-        pSMB->hdr.smb_buf_length += name_len + 1;
+        inc_rfc1001_len(pSMB, name_len + 1);
        pSMB->ByteCount = cpu_to_le16(name_len + 1);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
                         (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5326,7 +5342,7 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
        pSMB->Fid = fid;
        pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        cifs_fill_unix_set_info(data_offset, args);
@@ -5402,7 +5418,7 @@ setPermsRetry:
        pSMB->TotalDataCount = pSMB->DataCount;
        pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        cifs_fill_unix_set_info(data_offset, args);
@@ -5418,79 +5434,6 @@ setPermsRetry:
        return rc;
 }
-int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
-                  const int notify_subdirs, const __u16 netfid,
-                  __u32 filter, struct file *pfile, int multishot,
-                  const struct nls_table *nls_codepage)
-{
-        int rc = 0;
-        struct smb_com_transaction_change_notify_req *pSMB = NULL;
-        struct smb_com_ntransaction_change_notify_rsp *pSMBr = NULL;
-        struct dir_notify_req *dnotify_req;
-        int bytes_returned;
-        cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid);
-        rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
-                      (void **) &pSMBr);
-        if (rc)
-                return rc;
-        pSMB->TotalParameterCount = 0 ;
-        pSMB->TotalDataCount = 0;
-        pSMB->MaxParameterCount = cpu_to_le32(2);
-        /* BB find exact data count max from sess structure BB */
-        pSMB->MaxDataCount = 0; /* same in little endian or be */
-/* BB VERIFY verify which is correct for above BB */
-        pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
-                                             MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
-        pSMB->MaxSetupCount = 4;
-        pSMB->Reserved = 0;
-        pSMB->ParameterOffset = 0;
-        pSMB->DataCount = 0;
-        pSMB->DataOffset = 0;
-        pSMB->SetupCount = 4; /* single byte does not need le conversion */
-        pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_NOTIFY_CHANGE);
-        pSMB->ParameterCount = pSMB->TotalParameterCount;
-        if (notify_subdirs)
-                pSMB->WatchTree = 1; /* one byte - no le conversion needed */
-        pSMB->Reserved2 = 0;
-        pSMB->CompletionFilter = cpu_to_le32(filter);
-        pSMB->Fid = netfid; /* file handle always le */
-        pSMB->ByteCount = 0;
-        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
-                         (struct smb_hdr *)pSMBr, &bytes_returned,
-                         CIFS_ASYNC_OP);
-        if (rc) {
-                cFYI(1, "Error in Notify = %d", rc);
-        } else {
-                /* Add file to outstanding requests */
-                /* BB change to kmem cache alloc */
-                dnotify_req = kmalloc(
-                                                sizeof(struct dir_notify_req),
-                                                 GFP_KERNEL);
-                if (dnotify_req) {
-                        dnotify_req->Pid = pSMB->hdr.Pid;
-                        dnotify_req->PidHigh = pSMB->hdr.PidHigh;
-                        dnotify_req->Mid = pSMB->hdr.Mid;
-                        dnotify_req->Tid = pSMB->hdr.Tid;
-                        dnotify_req->Uid = pSMB->hdr.Uid;
-                        dnotify_req->netfid = netfid;
-                        dnotify_req->pfile = pfile;
-                        dnotify_req->filter = filter;
-                        dnotify_req->multishot = multishot;
-                        spin_lock(&GlobalMid_Lock);
-                        list_add_tail(&dnotify_req->lhead,
-                                        &GlobalDnotifyReqList);
-                        spin_unlock(&GlobalMid_Lock);
-                } else
-                        rc = -ENOMEM;
-        }
-        cifs_buf_release(pSMB);
-        return rc;
-}
 #ifdef CONFIG_CIFS_XATTR
 /*
 * Do a path-based QUERY_ALL_EAS call and parse the result. This is a common
@@ -5560,7 +5503,7 @@ QAllEAsRetry:
        pSMB->ParameterCount = pSMB->TotalParameterCount;
        pSMB->InformationLevel = cpu_to_le16(SMB_INFO_QUERY_ALL_EAS);
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -5576,7 +5519,7 @@ QAllEAsRetry:
        of these trans2 responses */
        rc = validate_t2((struct smb_t2_rsp *)pSMBr);
-        if (rc || (pSMBr->ByteCount < 4)) {
+        if (rc || get_bcc(&pSMBr->hdr) < 4) {
                rc = -EIO;      /* bad smb */
                goto QAllEAsOut;
        }
@@ -5773,7 +5716,7 @@ SetEARetry:
        pSMB->ParameterCount = cpu_to_le16(params);
        pSMB->TotalParameterCount = pSMB->ParameterCount;
        pSMB->Reserved4 = 0;
-        pSMB->hdr.smb_buf_length += byte_count;
+        inc_rfc1001_len(pSMB, byte_count);
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
                         (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5787,5 +5730,99 @@ SetEARetry:
        return rc;
 }
 #endif
+#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* BB unused temporarily */
+/*
+ *      Years ago the kernel added a "dnotify" function for Samba server,
+ *      to allow network clients (such as Windows) to display updated
+ *      lists of files in directory listings automatically when
+ *      files are added by one user when another user has the
+ *      same directory open on their desktop.  The Linux cifs kernel
+ *      client hooked into the kernel side of this interface for
+ *      the same reason, but ironically when the VFS moved from
+ *      "dnotify" to "inotify" it became harder to plug in Linux
+ *      network file system clients (the most obvious use case
+ *      for notify interfaces is when multiple users can update
+ *      the contents of the same directory - exactly what network
+ *      file systems can do) although the server (Samba) could
+ *      still use it.  For the short term we leave the worker
+ *      function ifdeffed out (below) until inotify is fixed
+ *      in the VFS to make it easier to plug in network file
+ *      system clients.  If inotify turns out to be permanently
+ *      incompatible for network fs clients, we could instead simply
+ *      expose this config flag by adding a future cifs (and smb2) notify ioctl.
+ */
+int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
+                  const int notify_subdirs, const __u16 netfid,
+                  __u32 filter, struct file *pfile, int multishot,
+                  const struct nls_table *nls_codepage)
+{
+        int rc = 0;
+        struct smb_com_transaction_change_notify_req *pSMB = NULL;
+        struct smb_com_ntransaction_change_notify_rsp *pSMBr = NULL;
+        struct dir_notify_req *dnotify_req;
+        int bytes_returned;
+        cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid);
+        rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
+                      (void **) &pSMBr);
+        if (rc)
+                return rc;
+        pSMB->TotalParameterCount = 0 ;
+        pSMB->TotalDataCount = 0;
+        pSMB->MaxParameterCount = cpu_to_le32(2);
+        /* BB find exact data count max from sess structure BB */
+        pSMB->MaxDataCount = 0; /* same in little endian or be */
+/* BB VERIFY verify which is correct for above BB */
+        pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
+                                             MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
+        pSMB->MaxSetupCount = 4;
+        pSMB->Reserved = 0;
+        pSMB->ParameterOffset = 0;
+        pSMB->DataCount = 0;
+        pSMB->DataOffset = 0;
+        pSMB->SetupCount = 4; /* single byte does not need le conversion */
+        pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_NOTIFY_CHANGE);
+        pSMB->ParameterCount = pSMB->TotalParameterCount;
+        if (notify_subdirs)
+                pSMB->WatchTree = 1; /* one byte - no le conversion needed */
+        pSMB->Reserved2 = 0;
+        pSMB->CompletionFilter = cpu_to_le32(filter);
+        pSMB->Fid = netfid; /* file handle always le */
+        pSMB->ByteCount = 0;
+        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
+                         (struct smb_hdr *)pSMBr, &bytes_returned,
+                         CIFS_ASYNC_OP);
+        if (rc) {
+                cFYI(1, "Error in Notify = %d", rc);
+        } else {
+                /* Add file to outstanding requests */
+                /* BB change to kmem cache alloc */
+                dnotify_req = kmalloc(
+                                                sizeof(struct dir_notify_req),
+                                                 GFP_KERNEL);
+                if (dnotify_req) {
+                        dnotify_req->Pid = pSMB->hdr.Pid;
+                        dnotify_req->PidHigh = pSMB->hdr.PidHigh;
+                        dnotify_req->Mid = pSMB->hdr.Mid;
+                        dnotify_req->Tid = pSMB->hdr.Tid;
+                        dnotify_req->Uid = pSMB->hdr.Uid;
+                        dnotify_req->netfid = netfid;
+                        dnotify_req->pfile = pfile;
+                        dnotify_req->filter = filter;
+                        dnotify_req->multishot = multishot;
+                        spin_lock(&GlobalMid_Lock);
+                        list_add_tail(&dnotify_req->lhead,
+                                        &GlobalDnotifyReqList);
+                        spin_unlock(&GlobalMid_Lock);
+                } else
+                        rc = -ENOMEM;
+        }
+        cifs_buf_release(pSMB);
+        return rc;
+}
+#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4bc862a80efa..da284e3cb653 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -102,6 +102,7 @@ struct smb_vol {
        bool fsc:1;     /* enable fscache */
        bool mfsymlinks:1; /* use Minshall+French Symlinks */
        bool multiuser:1;
+        bool use_smb2:1; /* force smb2 use on mount instead of cifs */
        unsigned int rsize;
        unsigned int wsize;
        bool sockopt_tcp_nodelay:1;
@@ -274,7 +275,8 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
        char *data_area_of_target;
        char *data_area_of_buf2;
        int remaining;
-        __u16 byte_count, total_data_size, total_in_buf, total_in_buf2;
+        unsigned int byte_count, total_in_buf;
+        __u16 total_data_size, total_in_buf2;
        total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount);
@@ -287,7 +289,7 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
        remaining = total_data_size - total_in_buf;
        if (remaining < 0)
-                return -EINVAL;
+                return -EPROTO;
        if (remaining == 0) /* nothing to do, ignore */
                return 0;
@@ -308,19 +310,28 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
        data_area_of_target += total_in_buf;
        /* copy second buffer into end of first buffer */
-        memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2);
        total_in_buf += total_in_buf2;
+        /* is the result too big for the field? */
+        if (total_in_buf > USHRT_MAX)
+                return -EPROTO;
        put_unaligned_le16(total_in_buf, &pSMBt->t2_rsp.DataCount);
-        byte_count = get_bcc_le(pTargetSMB);
-        byte_count += total_in_buf2;
-        put_bcc_le(byte_count, pTargetSMB);
-        byte_count = pTargetSMB->smb_buf_length;
+        /* fix up the BCC */
+        byte_count = get_bcc(pTargetSMB);
        byte_count += total_in_buf2;
+        /* is the result too big for the field? */
+        if (byte_count > USHRT_MAX)
+                return -EPROTO;
+        put_bcc(byte_count, pTargetSMB);
-        /* BB also add check that we are not beyond maximum buffer size */
+        byte_count = be32_to_cpu(pTargetSMB->smb_buf_length);
+        byte_count += total_in_buf2;
+        /* don't allow buffer to overflow */
+        if (byte_count > CIFSMaxBufSize)
+                return -ENOBUFS;
+        pTargetSMB->smb_buf_length = cpu_to_be32(byte_count);
-        pTargetSMB->smb_buf_length = byte_count;
+        memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2);
        if (remaining == total_in_buf2) {
                cFYI(1, "found the last secondary response");
@@ -485,8 +496,7 @@ incomplete_rcv:
                /* Note that FC 1001 length is big endian on the wire,
                but we convert it here so it is always manipulated
                as host byte order */
-                pdu_length = be32_to_cpu((__force __be32)smb_buffer->smb_buf_length);
+                pdu_length = be32_to_cpu(smb_buffer->smb_buf_length);
-                smb_buffer->smb_buf_length = pdu_length;
                cFYI(1, "rfc1002 length 0x%x", pdu_length+4);
@@ -607,59 +617,63 @@ incomplete_rcv:
                list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
                        mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
-                        if ((mid_entry->mid == smb_buffer->Mid) &&
+                        if (mid_entry->mid != smb_buffer->Mid ||
-                            (mid_entry->midState == MID_REQUEST_SUBMITTED) &&
+                            mid_entry->midState != MID_REQUEST_SUBMITTED ||
-                            (mid_entry->command == smb_buffer->Command)) {
+                            mid_entry->command != smb_buffer->Command) {
-                                if (length == 0 &&
+                                mid_entry = NULL;
-                                   check2ndT2(smb_buffer, server->maxBuf) > 0) {
+                                continue;
-                                        /* We have a multipart transact2 resp */
+                        }
-                                        isMultiRsp = true;
-                                        if (mid_entry->resp_buf) {
+                        if (length == 0 &&
-                                                /* merge response - fix up 1st*/
+                            check2ndT2(smb_buffer, server->maxBuf) > 0) {
-                                                if (coalesce_t2(smb_buffer,
+                                /* We have a multipart transact2 resp */
-                                                        mid_entry->resp_buf)) {
+                                isMultiRsp = true;
-                                                        mid_entry->multiRsp =
+                                if (mid_entry->resp_buf) {
-                                                                 true;
+                                        /* merge response - fix up 1st*/
-                                                        break;
+                                        length = coalesce_t2(smb_buffer,
-                                                } else {
+                                                        mid_entry->resp_buf);
-                                                        /* all parts received */
+                                        if (length > 0) {
-                                                        mid_entry->multiEnd =
+                                                length = 0;
-                                                                 true;
+                                                mid_entry->multiRsp = true;
-                                                        goto multi_t2_fnd;
+                                                break;
-                                                }
                                        } else {
-                                                if (!isLargeBuf) {
+                                                /* all parts received or
-                                                        cERROR(1, "1st trans2 resp needs bigbuf");
+                                                 * packet is malformed
-                                        /* BB maybe we can fix this up,  switch
+                                                 */
-                                           to already allocated large buffer? */
+                                                mid_entry->multiEnd = true;
-                                                } else {
+                                                goto multi_t2_fnd;
-                                                        /* Have first buffer */
+                                        }
-                                                        mid_entry->resp_buf =
+                                } else {
-                                                                 smb_buffer;
+                                        if (!isLargeBuf) {
-                                                        mid_entry->largeBuf =
+                                                /*
-                                                                 true;
+                                                 * FIXME: switch to already
-                                                        bigbuf = NULL;
+                                                 *        allocated largebuf?
-                                                }
+                                                 */
+                                                cERROR(1, "1st trans2 resp "
+                                                          "needs bigbuf");
+                                        } else {
+                                                /* Have first buffer */
+                                                mid_entry->resp_buf =
+                                                         smb_buffer;
+                                                mid_entry->largeBuf = true;
+                                                bigbuf = NULL;
                                        }
-                                        break;
                                }
-                                mid_entry->resp_buf = smb_buffer;
+                                break;
-                                mid_entry->largeBuf = isLargeBuf;
+                        }
+                        mid_entry->resp_buf = smb_buffer;
+                        mid_entry->largeBuf = isLargeBuf;
 multi_t2_fnd:
-                                if (length == 0)
+                        if (length == 0)
-                                        mid_entry->midState =
+                                mid_entry->midState = MID_RESPONSE_RECEIVED;
-                                                        MID_RESPONSE_RECEIVED;
+                        else
-                                else
+                                mid_entry->midState = MID_RESPONSE_MALFORMED;
-                                        mid_entry->midState =
-                                                        MID_RESPONSE_MALFORMED;
 #ifdef CONFIG_CIFS_STATS2
-                                mid_entry->when_received = jiffies;
+                        mid_entry->when_received = jiffies;
 #endif
-                                list_del_init(&mid_entry->qhead);
+                        list_del_init(&mid_entry->qhead);
-                                mid_entry->callback(mid_entry);
+                        mid_entry->callback(mid_entry);
-                                break;
+                        break;
-                        }
-                        mid_entry = NULL;
                }
                spin_unlock(&GlobalMid_Lock);
@@ -721,7 +735,7 @@ multi_t2_fnd:
                sock_release(csocket);
                server->ssocket = NULL;
        }
-        /* buffer usuallly freed in free_mid - need to free it here on exit */
+        /* buffer usually freed in free_mid - need to free it here on exit */
        cifs_buf_release(bigbuf);
        if (smallbuf) /* no sense logging a debug message if NULL */
                cifs_small_buf_release(smallbuf);
@@ -804,10 +818,11 @@ extract_hostname(const char *unc)
 }
 static int
-cifs_parse_mount_options(char *options, const char *devname,
+cifs_parse_mount_options(const char *mountdata, const char *devname,
                         struct smb_vol *vol)
 {
        char *value, *data, *end;
+        char *mountdata_copy, *options;
        unsigned int  temp_len, i, j;
        char separator[2];
        short int override_uid = -1;
@@ -847,9 +862,14 @@ cifs_parse_mount_options(char *options, const char *devname,
        vol->actimeo = CIFS_DEF_ACTIMEO;
-        if (!options)
+        if (!mountdata)
-                return 1;
+                goto cifs_parse_mount_err;
+        mountdata_copy = kstrndup(mountdata, PAGE_SIZE, GFP_KERNEL);
+        if (!mountdata_copy)
+                goto cifs_parse_mount_err;
+        options = mountdata_copy;
        end = options + strlen(options);
        if (strncmp(options, "sep=", 4) == 0) {
                if (options[4] != 0) {
@@ -875,17 +895,22 @@ cifs_parse_mount_options(char *options, const char *devname,
                        if (!value) {
                                printk(KERN_WARNING
                                       "CIFS: invalid or missing username\n");
-                                return 1;       /* needs_arg; */
+                                goto cifs_parse_mount_err;
                        } else if (!*value) {
                                /* null user, ie anonymous, authentication */
                                vol->nullauth = 1;
                        }
                        if (strnlen(value, MAX_USERNAME_SIZE) <
                                                MAX_USERNAME_SIZE) {
-                                vol->username = value;
+                                vol->username = kstrdup(value, GFP_KERNEL);
+                                if (!vol->username) {
+                                        printk(KERN_WARNING "CIFS: no memory "
+                                                            "for username\n");
+                                        goto cifs_parse_mount_err;
+                                }
                        } else {
                                printk(KERN_WARNING "CIFS: username too long\n");
-                                return 1;
+                                goto cifs_parse_mount_err;
                        }
                } else if (strnicmp(data, "pass", 4) == 0) {
                        if (!value) {
@@ -949,7 +974,7 @@ cifs_parse_mount_options(char *options, const char *devname,
                                if (vol->password == NULL) {
                                        printk(KERN_WARNING "CIFS: no memory "
                                                            "for password\n");
-                                        return 1;
+                                        goto cifs_parse_mount_err;
                                }
                                for (i = 0, j = 0; i < temp_len; i++, j++) {
                                        vol->password[j] = value[i];
@@ -965,7 +990,7 @@ cifs_parse_mount_options(char *options, const char *devname,
                                if (vol->password == NULL) {
                                        printk(KERN_WARNING "CIFS: no memory "
                                                            "for password\n");
-                                        return 1;
+                                        goto cifs_parse_mount_err;
                                }
                                strcpy(vol->password, value);
                        }
@@ -975,11 +1000,16 @@ cifs_parse_mount_options(char *options, const char *devname,
                                vol->UNCip = NULL;
                        } else if (strnlen(value, INET6_ADDRSTRLEN) <
                                                        INET6_ADDRSTRLEN) {
-                                vol->UNCip = value;
+                                vol->UNCip = kstrdup(value, GFP_KERNEL);
+                                if (!vol->UNCip) {
+                                        printk(KERN_WARNING "CIFS: no memory "
+                                                            "for UNC IP\n");
+                                        goto cifs_parse_mount_err;
+                                }
                        } else {
                                printk(KERN_WARNING "CIFS: ip address "
                                                    "too long\n");
-                                return 1;
+                                goto cifs_parse_mount_err;
                        }
                } else if (strnicmp(data, "sec", 3) == 0) {
                        if (!value || !*value) {
@@ -992,7 +1022,7 @@ cifs_parse_mount_options(char *options, const char *devname,
                                /* vol->secFlg |= CIFSSEC_MUST_SEAL |
                                        CIFSSEC_MAY_KRB5; */
                                cERROR(1, "Krb5 cifs privacy not supported");
-                                return 1;
+                                goto cifs_parse_mount_err;
                        } else if (strnicmp(value, "krb5", 4) == 0) {
                                vol->secFlg |= CIFSSEC_MAY_KRB5;
                        } else if (strnicmp(value, "ntlmsspi", 8) == 0) {
@@ -1022,7 +1052,23 @@ cifs_parse_mount_options(char *options, const char *devname,
                                vol->nullauth = 1;
                        } else {
                                cERROR(1, "bad security option: %s", value);
-                                return 1;
+                                goto cifs_parse_mount_err;
+                        }
+                } else if (strnicmp(data, "vers", 3) == 0) {
+                        if (!value || !*value) {
+                                cERROR(1, "no protocol version specified"
+                                          " after vers= mount option");
+                        } else if ((strnicmp(value, "cifs", 4) == 0) ||
+                                   (strnicmp(value, "1", 1) == 0)) {
+                                /* this is the default */
+                                continue;
+                        } else if ((strnicmp(value, "smb2", 4) == 0) ||
+                                   (strnicmp(value, "2", 1) == 0)) {
+#ifdef CONFIG_CIFS_SMB2
+                                vol->use_smb2 = true;
+#else
+                                cERROR(1, "smb2 support not enabled");
+#endif /* CONFIG_CIFS_SMB2 */
                        }
                } else if ((strnicmp(data, "unc", 3) == 0)
                           || (strnicmp(data, "target", 6) == 0)
@@ -1030,12 +1076,12 @@ cifs_parse_mount_options(char *options, const char *devname,
                        if (!value || !*value) {
                                printk(KERN_WARNING "CIFS: invalid path to "
                                                    "network resource\n");
-                                return 1;       /* needs_arg; */
+                                goto cifs_parse_mount_err;
                        }
                        if ((temp_len = strnlen(value, 300)) < 300) {
                                vol->UNC = kmalloc(temp_len+1, GFP_KERNEL);
                                if (vol->UNC == NULL)
-                                        return 1;
+                                        goto cifs_parse_mount_err;
                                strcpy(vol->UNC, value);
                                if (strncmp(vol->UNC, "//", 2) == 0) {
                                        vol->UNC[0] = '\\';
@@ -1044,27 +1090,32 @@ cifs_parse_mount_options(char *options, const char *devname,
                                        printk(KERN_WARNING
                                               "CIFS: UNC Path does not begin "
                                               "with // or \\\\ \n");
-                                        return 1;
+                                        goto cifs_parse_mount_err;
                                }
                        } else {
                                printk(KERN_WARNING "CIFS: UNC name too long\n");
-                                return 1;
+                                goto cifs_parse_mount_err;
                        }
                } else if ((strnicmp(data, "domain", 3) == 0)
                           || (strnicmp(data, "workgroup", 5) == 0)) {
                        if (!value || !*value) {
                                printk(KERN_WARNING "CIFS: invalid domain name\n");
-                                return 1;       /* needs_arg; */
+                                goto cifs_parse_mount_err;
                        }
                        /* BB are there cases in which a comma can be valid in
                        a domain name and need special handling? */
                        if (strnlen(value, 256) < 256) {
-                                vol->domainname = value;
+                                vol->domainname = kstrdup(value, GFP_KERNEL);
+                                if (!vol->domainname) {
+                                        printk(KERN_WARNING "CIFS: no memory "
+                                                            "for domainname\n");
+                                        goto cifs_parse_mount_err;
+                                }
                                cFYI(1, "Domain name set");
                        } else {
                                printk(KERN_WARNING "CIFS: domain name too "
                                                    "long\n");
-                                return 1;
+                                goto cifs_parse_mount_err;
                        }
                } else if (strnicmp(data, "srcaddr", 7) == 0) {
                        vol->srcaddr.ss_family = AF_UNSPEC;
@@ -1072,7 +1123,7 @@ cifs_parse_mount_options(char *options, const char *devname,
                        if (!value || !*value) {
                                printk(KERN_WARNING "CIFS: srcaddr value"
                                       " not specified.\n");
-                                return 1;       /* needs_arg; */
+                                goto cifs_parse_mount_err;
                        }
                        i = cifs_convert_address((struct sockaddr *)&vol->srcaddr,
                                                 value, strlen(value));
@@ -1080,20 +1131,20 @@ cifs_parse_mount_options(char *options, const char *devname,
                                printk(KERN_WARNING "CIFS:  Could not parse"
                                       " srcaddr: %s\n",
                                       value);
-                                return 1;
+                                goto cifs_parse_mount_err;
                        }
                } else if (strnicmp(data, "prefixpath", 10) == 0) {
                        if (!value || !*value) {
                                printk(KERN_WARNING
                                        "CIFS: invalid path prefix\n");
-                                return 1;       /* needs_argument */
+                                goto cifs_parse_mount_err;
                        }
                        if ((temp_len = strnlen(value, 1024)) < 1024) {
                                if (value[0] != '/')
                                        temp_len++;  /* missing leading slash */
                                vol->prepath = kmalloc(temp_len+1, GFP_KERNEL);
                                if (vol->prepath == NULL)
-                                        return 1;
+                                        goto cifs_parse_mount_err;
                                if (value[0] != '/') {
                                        vol->prepath[0] = '/';
                                        strcpy(vol->prepath+1, value);
@@ -1102,24 +1153,33 @@ cifs_parse_mount_options(char *options, const char *devname,
                                cFYI(1, "prefix path %s", vol->prepath);
                        } else {
                                printk(KERN_WARNING "CIFS: prefix too long\n");
-                                return 1;
+                                goto cifs_parse_mount_err;
                        }
                } else if (strnicmp(data, "iocharset", 9) == 0) {
                        if (!value || !*value) {
                                printk(KERN_WARNING "CIFS: invalid iocharset "
                                                    "specified\n");
-                                return 1;       /* needs_arg; */
+                                goto cifs_parse_mount_err;
                        }
                        if (strnlen(value, 65) < 65) {
-                                if (strnicmp(value, "default", 7))
+                                if (strnicmp(value, "default", 7)) {
-                                        vol->iocharset = value;
+                                        vol->iocharset = kstrdup(value,
+                                                                 GFP_KERNEL);
+                                        if (!vol->iocharset) {
+                                                printk(KERN_WARNING "CIFS: no "
+                                                                   "memory for"
+                                                                   "charset\n");
+                                                goto cifs_parse_mount_err;
+                                        }
+                                }
                                /* if iocharset not set then load_nls_default
                                   is used by caller */
                                cFYI(1, "iocharset set to %s", value);
                        } else {
                                printk(KERN_WARNING "CIFS: iocharset name "
                                                    "too long.\n");
-                                return 1;
+                                goto cifs_parse_mount_err;
                        }
                } else if (!strnicmp(data, "uid", 3) && value && *value) {
                        vol->linux_uid = simple_strtoul(value, &value, 0);
@@ -1232,7 +1292,7 @@ cifs_parse_mount_options(char *options, const char *devname,
                                if (vol->actimeo > CIFS_MAX_ACTIMEO) {
                                        cERROR(1, "CIFS: attribute cache"
                                                        "timeout too large");
-                                        return 1;
+                                        goto cifs_parse_mount_err;
                                }
                        }
                } else if (strnicmp(data, "credentials", 4) == 0) {
@@ -1376,7 +1436,7 @@ cifs_parse_mount_options(char *options, const char *devname,
 #ifndef CONFIG_CIFS_FSCACHE
                        cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE"
                                  "kernel config option set");
-                        return 1;
+                        goto cifs_parse_mount_err;
 #endif
                        vol->fsc = true;
                } else if (strnicmp(data, "mfsymlinks", 10) == 0) {
@@ -1391,12 +1451,12 @@ cifs_parse_mount_options(char *options, const char *devname,
                if (devname == NULL) {
                        printk(KERN_WARNING "CIFS: Missing UNC name for mount "
                                                "target\n");
-                        return 1;
+                        goto cifs_parse_mount_err;
                }
                if ((temp_len = strnlen(devname, 300)) < 300) {
                        vol->UNC = kmalloc(temp_len+1, GFP_KERNEL);
                        if (vol->UNC == NULL)
-                                return 1;
+                                goto cifs_parse_mount_err;
                        strcpy(vol->UNC, devname);
                        if (strncmp(vol->UNC, "//", 2) == 0) {
                                vol->UNC[0] = '\\';
@@ -1404,21 +1464,21 @@ cifs_parse_mount_options(char *options, const char *devname,
                        } else if (strncmp(vol->UNC, "\\\\", 2) != 0) {
                                printk(KERN_WARNING "CIFS: UNC Path does not "
                                                    "begin with // or \\\\ \n");
-                                return 1;
+                                goto cifs_parse_mount_err;
                        }
                        value = strpbrk(vol->UNC+2, "/\\");
                        if (value)
                                *value = '\\';
                } else {
                        printk(KERN_WARNING "CIFS: UNC name too long\n");
-                        return 1;
+                        goto cifs_parse_mount_err;
                }
        }
        if (vol->multiuser && !(vol->secFlg & CIFSSEC_MAY_KRB5)) {
                cERROR(1, "Multiuser mounts currently require krb5 "
                          "authentication!");
-                return 1;
+                goto cifs_parse_mount_err;
        }
        if (vol->UNCip == NULL)
@@ -1436,7 +1496,12 @@ cifs_parse_mount_options(char *options, const char *devname,
                printk(KERN_NOTICE "CIFS: ignoring forcegid mount option "
                                   "specified with no gid= option.\n");
+        kfree(mountdata_copy);
        return 0;
+cifs_parse_mount_err:
+        kfree(mountdata_copy);
+        return 1;
 }
 /** Returns true if srcaddr isn't specified and rhs isn't
@@ -2266,7 +2331,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
                smb_buf = (struct smb_hdr *)ses_init_buf;
                /* sizeof RFC1002_SESSION_REQUEST with no scope */
-                smb_buf->smb_buf_length = 0x81000044;
+                smb_buf->smb_buf_length = cpu_to_be32(0x81000044);
                rc = smb_send(server, smb_buf, 0x44);
                kfree(ses_init_buf);
                /*
@@ -2659,6 +2724,11 @@ is_path_accessible(int xid, struct cifsTconInfo *tcon,
                              0 /* not legacy */, cifs_sb->local_nls,
                              cifs_sb->mnt_cifs_flags &
                                CIFS_MOUNT_MAP_SPECIAL_CHR);
+        if (rc == -EOPNOTSUPP || rc == -EINVAL)
+                rc = SMBQueryInformation(xid, tcon, full_path, pfile_info,
+                                cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+                                  CIFS_MOUNT_MAP_SPECIAL_CHR);
        kfree(pfile_info);
        return rc;
 }
@@ -2672,8 +2742,12 @@ cleanup_volume_info(struct smb_vol **pvolume_info)
                return;
        volume_info = *pvolume_info;
+        kfree(volume_info->username);
        kzfree(volume_info->password);
        kfree(volume_info->UNC);
+        kfree(volume_info->UNCip);
+        kfree(volume_info->domainname);
+        kfree(volume_info->iocharset);
        kfree(volume_info->prepath);
        kfree(volume_info);
        *pvolume_info = NULL;
@@ -2710,11 +2784,65 @@ build_unc_path_to_root(const struct smb_vol *volume_info,
        full_path[unc_len + cifs_sb->prepathlen] = 0; /* add trailing null */
        return full_path;
 }
+/*
+ * Perform a dfs referral query for a share and (optionally) prefix
+ *
+ * If a referral is found, cifs_sb->mountdata will be (re-)allocated
+ * to a string containing updated options for the submount.  Otherwise it
+ * will be left untouched.
+ *
+ * Returns the rc from get_dfs_path to the caller, which can be used to
+ * determine whether there were referrals.
+ */
+static int
+expand_dfs_referral(int xid, struct cifsSesInfo *pSesInfo,
+                    struct smb_vol *volume_info, struct cifs_sb_info *cifs_sb,
+                    int check_prefix)
+{
+        int rc;
+        unsigned int num_referrals = 0;
+        struct dfs_info3_param *referrals = NULL;
+        char *full_path = NULL, *ref_path = NULL, *mdata = NULL;
+        full_path = build_unc_path_to_root(volume_info, cifs_sb);
+        if (IS_ERR(full_path))
+                return PTR_ERR(full_path);
+        /* For DFS paths, skip the first '\' of the UNC */
+        ref_path = check_prefix ? full_path + 1 : volume_info->UNC + 1;
+        rc = get_dfs_path(xid, pSesInfo , ref_path, cifs_sb->local_nls,
+                          &num_referrals, &referrals,
+                          cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+        if (!rc && num_referrals > 0) {
+                char *fake_devname = NULL;
+                mdata = cifs_compose_mount_options(cifs_sb->mountdata,
+                                                   full_path + 1, referrals,
+                                                   &fake_devname);
+                free_dfs_info_array(referrals, num_referrals);
+                kfree(fake_devname);
+                if (cifs_sb->mountdata != NULL)
+                        kfree(cifs_sb->mountdata);
+                if (IS_ERR(mdata)) {
+                        rc = PTR_ERR(mdata);
+                        mdata = NULL;
+                }
+                cifs_sb->mountdata = mdata;
+        }
+        kfree(full_path);
+        return rc;
+}
 #endif
 int
 cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
-                char *mount_data_global, const char *devname)
+                const char *devname)
 {
        int rc;
        int xid;
@@ -2723,13 +2851,20 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
        struct cifsTconInfo *tcon;
        struct TCP_Server_Info *srvTcp;
        char   *full_path;
-        char *mount_data = mount_data_global;
        struct tcon_link *tlink;
 #ifdef CONFIG_CIFS_DFS_UPCALL
-        struct dfs_info3_param *referrals = NULL;
-        unsigned int num_referrals = 0;
        int referral_walks_count = 0;
 try_mount_again:
+        /* cleanup activities if we're chasing a referral */
+        if (referral_walks_count) {
+                if (tcon)
+                        cifs_put_tcon(tcon);
+                else if (pSesInfo)
+                        cifs_put_smb_ses(pSesInfo);
+                cleanup_volume_info(&volume_info);
+                FreeXid(xid);
+        }
 #endif
        rc = 0;
        tcon = NULL;
@@ -2746,7 +2881,8 @@ try_mount_again:
                goto out;
        }
-        if (cifs_parse_mount_options(mount_data, devname, volume_info)) {
+        if (cifs_parse_mount_options(cifs_sb->mountdata, devname,
+                                     volume_info)) {
                rc = -EINVAL;
                goto out;
        }
@@ -2842,6 +2978,24 @@ try_mount_again:
                               (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE));
 remote_path_check:
+#ifdef CONFIG_CIFS_DFS_UPCALL
+        /*
+         * Perform an unconditional check for whether there are DFS
+         * referrals for this path without prefix, to provide support
+         * for DFS referrals from w2k8 servers which don't seem to respond
+         * with PATH_NOT_COVERED to requests that include the prefix.
+         * Chase the referral if found, otherwise continue normally.
+         */
+        if (referral_walks_count == 0) {
+                int refrc = expand_dfs_referral(xid, pSesInfo, volume_info,
+                                                cifs_sb, false);
+                if (!refrc) {
+                        referral_walks_count++;
+                        goto try_mount_again;
+                }
+        }
+#endif
        /* check if a whole path (including prepath) is not remote */
        if (!rc && tcon) {
                /* build_path_to_root works only when we have a valid tcon */
@@ -2875,46 +3029,15 @@ remote_path_check:
                if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0)
                        convert_delimiter(cifs_sb->prepath,
                                        CIFS_DIR_SEP(cifs_sb));
-                full_path = build_unc_path_to_root(volume_info, cifs_sb);
-                if (IS_ERR(full_path)) {
-                        rc = PTR_ERR(full_path);
-                        goto mount_fail_check;
-                }
-                cFYI(1, "Getting referral for: %s", full_path);
-                rc = get_dfs_path(xid, pSesInfo , full_path + 1,
-                        cifs_sb->local_nls, &num_referrals, &referrals,
-                        cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
-                if (!rc && num_referrals > 0) {
-                        char *fake_devname = NULL;
-                        if (mount_data != mount_data_global)
-                                kfree(mount_data);
-                        mount_data = cifs_compose_mount_options(
+                rc = expand_dfs_referral(xid, pSesInfo, volume_info, cifs_sb,
-                                        cifs_sb->mountdata, full_path + 1,
+                                         true);
-                                        referrals, &fake_devname);
-                        free_dfs_info_array(referrals, num_referrals);
+                if (!rc) {
-                        kfree(fake_devname);
-                        kfree(full_path);
-                        if (IS_ERR(mount_data)) {
-                                rc = PTR_ERR(mount_data);
-                                mount_data = NULL;
-                                goto mount_fail_check;
-                        }
-                        if (tcon)
-                                cifs_put_tcon(tcon);
-                        else if (pSesInfo)
-                                cifs_put_smb_ses(pSesInfo);
-                        cleanup_volume_info(&volume_info);
                        referral_walks_count++;
-                        FreeXid(xid);
                        goto try_mount_again;
                }
+                goto mount_fail_check;
 #else /* No DFS support, return error on mount */
                rc = -EOPNOTSUPP;
 #endif
@@ -2947,8 +3070,6 @@ remote_path_check:
 mount_fail_check:
        /* on error free sesinfo and tcon struct if needed */
        if (rc) {
-                if (mount_data != mount_data_global)
-                        kfree(mount_data);
                /* If find_unc succeeded then rc == 0 so we can not end */
                /* up accidentally freeing someone elses tcon struct */
                if (tcon)
@@ -3064,7 +3185,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
        bcc_ptr += strlen("?????");
        bcc_ptr += 1;
        count = bcc_ptr - &pSMB->Password[0];
-        pSMB->hdr.smb_buf_length += count;
+        pSMB->hdr.smb_buf_length = cpu_to_be32(be32_to_cpu(
+                                        pSMB->hdr.smb_buf_length) + count);
        pSMB->ByteCount = cpu_to_le16(count);
        rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length,
@@ -3239,7 +3361,9 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid)
        struct cifsSesInfo *ses;
        struct cifsTconInfo *tcon = NULL;
        struct smb_vol *vol_info;
-        char username[MAX_USERNAME_SIZE + 1];
+        char username[28]; /* big enough for "krb50x" + hex of ULONG_MAX 6+16 */
+                           /* We used to have this as MAX_USERNAME which is   */
+                           /* way too big now (256 instead of 32) */
        vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL);
        if (vol_info == NULL) {
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 993f82045bf6..55d87ac52000 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -45,7 +45,7 @@
 #include "cifs_debug.h"
 #include "cifsfs.h"
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CIFS_NFSD_EXPORT
 static struct dentry *cifs_get_parent(struct dentry *dentry)
 {
        /* BB need to add code here eventually to enable export via NFSD */
@@ -63,5 +63,5 @@ const struct export_operations cifs_export_ops = {
        .encode_fs =  */
 };
-#endif /* EXPERIMENTAL */
+#endif /* CIFS_NFSD_EXPORT */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index faf59529e847..c672afef0c09 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -857,95 +857,6 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
                cifsi->server_eof = end_of_write;
 }
-ssize_t cifs_user_write(struct file *file, const char __user *write_data,
-        size_t write_size, loff_t *poffset)
-{
-        struct inode *inode = file->f_path.dentry->d_inode;
-        int rc = 0;
-        unsigned int bytes_written = 0;
-        unsigned int total_written;
-        struct cifs_sb_info *cifs_sb;
-        struct cifsTconInfo *pTcon;
-        int xid;
-        struct cifsFileInfo *open_file;
-        struct cifsInodeInfo *cifsi = CIFS_I(inode);
-        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
-        /* cFYI(1, " write %d bytes to offset %lld of %s", write_size,
-           *poffset, file->f_path.dentry->d_name.name); */
-        if (file->private_data == NULL)
-                return -EBADF;
-        open_file = file->private_data;
-        pTcon = tlink_tcon(open_file->tlink);
-        rc = generic_write_checks(file, poffset, &write_size, 0);
-        if (rc)
-                return rc;
-        xid = GetXid();
-        for (total_written = 0; write_size > total_written;
-             total_written += bytes_written) {
-                rc = -EAGAIN;
-                while (rc == -EAGAIN) {
-                        if (file->private_data == NULL) {
-                                /* file has been closed on us */
-                                FreeXid(xid);
-                        /* if we have gotten here we have written some data
-                           and blocked, and the file has been freed on us while
-                           we blocked so return what we managed to write */
-                                return total_written;
-                        }
-                        if (open_file->invalidHandle) {
-                                /* we could deadlock if we called
-                                   filemap_fdatawait from here so tell
-                                   reopen_file not to flush data to server
-                                   now */
-                                rc = cifs_reopen_file(open_file, false);
-                                if (rc != 0)
-                                        break;
-                        }
-                        rc = CIFSSMBWrite(xid, pTcon,
-                                open_file->netfid,
-                                min_t(const int, cifs_sb->wsize,
-                                      write_size - total_written),
-                                *poffset, &bytes_written,
-                                NULL, write_data + total_written, 0);
-                }
-                if (rc || (bytes_written == 0)) {
-                        if (total_written)
-                                break;
-                        else {
-                                FreeXid(xid);
-                                return rc;
-                        }
-                } else {
-                        cifs_update_eof(cifsi, *poffset, bytes_written);
-                        *poffset += bytes_written;
-                }
-        }
-        cifs_stats_bytes_written(pTcon, total_written);
-/* Do not update local mtime - server will set its actual value on write
- *      inode->i_ctime = inode->i_mtime =
- *              current_fs_time(inode->i_sb);*/
-        if (total_written > 0) {
-                spin_lock(&inode->i_lock);
-                if (*poffset > inode->i_size)
-                        i_size_write(inode, *poffset);
-                spin_unlock(&inode->i_lock);
-        }
-        mark_inode_dirty_sync(inode);
-        FreeXid(xid);
-        return total_written;
-}
 static ssize_t cifs_write(struct cifsFileInfo *open_file,
                          const char *write_data, size_t write_size,
                          loff_t *poffset)
@@ -1420,9 +1331,10 @@ retry_write:
        return rc;
 }
-static int cifs_writepage(struct page *page, struct writeback_control *wbc)
+static int
+cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
 {
-        int rc = -EFAULT;
+        int rc;
        int xid;
        xid = GetXid();
@@ -1442,15 +1354,29 @@ static int cifs_writepage(struct page *page, struct writeback_control *wbc)
         * to fail to update with the state of the page correctly.
         */
        set_page_writeback(page);
+retry_write:
        rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
-        SetPageUptodate(page); /* BB add check for error and Clearuptodate? */
+        if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
-        unlock_page(page);
+                goto retry_write;
+        else if (rc == -EAGAIN)
+                redirty_page_for_writepage(wbc, page);
+        else if (rc != 0)
+                SetPageError(page);
+        else
+                SetPageUptodate(page);
        end_page_writeback(page);
        page_cache_release(page);
        FreeXid(xid);
        return rc;
 }
+static int cifs_writepage(struct page *page, struct writeback_control *wbc)
+{
+        int rc = cifs_writepage_locked(page, wbc);
+        unlock_page(page);
+        return rc;
+}
 static int cifs_write_end(struct file *file, struct address_space *mapping,
                        loff_t pos, unsigned len, unsigned copied,
                        struct page *page, void *fsdata)
@@ -1519,8 +1445,13 @@ int cifs_strict_fsync(struct file *file, int datasync)
        cFYI(1, "Sync file - name: %s datasync: 0x%x",
                file->f_path.dentry->d_name.name, datasync);
-        if (!CIFS_I(inode)->clientCanCacheRead)
+        if (!CIFS_I(inode)->clientCanCacheRead) {
-                cifs_invalidate_mapping(inode);
+                rc = cifs_invalidate_mapping(inode);
+                if (rc) {
+                        cFYI(1, "rc: %d during invalidate phase", rc);
+                        rc = 0; /* don't care about it in fsync */
+                }
+        }
        tcon = tlink_tcon(smbfile->tlink);
        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
@@ -1726,7 +1657,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
        return total_written;
 }
-static ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
+ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
                                unsigned long nr_segs, loff_t pos)
 {
        ssize_t written;
@@ -1849,17 +1780,7 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
        return total_read;
 }
-ssize_t cifs_user_read(struct file *file, char __user *read_data,
+ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
-                       size_t read_size, loff_t *poffset)
-{
-        struct iovec iov;
-        iov.iov_base = read_data;
-        iov.iov_len = read_size;
-        return cifs_iovec_read(file, &iov, 1, poffset);
-}
-static ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
                               unsigned long nr_segs, loff_t pos)
 {
        ssize_t read;
@@ -1987,8 +1908,11 @@ int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
        xid = GetXid();
-        if (!CIFS_I(inode)->clientCanCacheRead)
+        if (!CIFS_I(inode)->clientCanCacheRead) {
-                cifs_invalidate_mapping(inode);
+                rc = cifs_invalidate_mapping(inode);
+                if (rc)
+                        return rc;
+        }
        rc = generic_file_mmap(file, vma);
        if (rc == 0)
@@ -2415,6 +2339,27 @@ static void cifs_invalidate_page(struct page *page, unsigned long offset)
                cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
 }
+static int cifs_launder_page(struct page *page)
+{
+        int rc = 0;
+        loff_t range_start = page_offset(page);
+        loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
+        struct writeback_control wbc = {
+                .sync_mode = WB_SYNC_ALL,
+                .nr_to_write = 0,
+                .range_start = range_start,
+                .range_end = range_end,
+        };
+        cFYI(1, "Launder page: %p", page);
+        if (clear_page_dirty_for_io(page))
+                rc = cifs_writepage_locked(page, &wbc);
+        cifs_fscache_invalidate_page(page, page->mapping->host);
+        return rc;
+}
 void cifs_oplock_break(struct work_struct *work)
 {
        struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
@@ -2486,7 +2431,7 @@ const struct address_space_operations cifs_addr_ops = {
        .set_page_dirty = __set_page_dirty_nobuffers,
        .releasepage = cifs_release_page,
        .invalidatepage = cifs_invalidate_page,
-        /* .direct_IO = */
+        .launder_page = cifs_launder_page,
 };
 /*
@@ -2503,5 +2448,5 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
        .set_page_dirty = __set_page_dirty_nobuffers,
        .releasepage = cifs_release_page,
        .invalidatepage = cifs_invalidate_page,
-        /* .direct_IO = */
+        .launder_page = cifs_launder_page,
 };
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 8852470b4fbb..de02ed5e25c2 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -878,7 +878,7 @@ retry_iget5_locked:
 }
 /* gets root inode */
-struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
+struct inode *cifs_root_iget(struct super_block *sb)
 {
        int xid;
        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
@@ -1683,71 +1683,70 @@ cifs_inode_needs_reval(struct inode *inode)
 /*
 * Zap the cache. Called when invalid_mapping flag is set.
 */
-void
+int
 cifs_invalidate_mapping(struct inode *inode)
 {
-        int rc;
+        int rc = 0;
        struct cifsInodeInfo *cifs_i = CIFS_I(inode);
        cifs_i->invalid_mapping = false;
-        /* write back any cached data */
        if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
-                rc = filemap_write_and_wait(inode->i_mapping);
+                rc = invalidate_inode_pages2(inode->i_mapping);
-                mapping_set_error(inode->i_mapping, rc);
+                if (rc) {
+                        cERROR(1, "%s: could not invalidate inode %p", __func__,
+                               inode);
+                        cifs_i->invalid_mapping = true;
+                }
        }
-        invalidate_remote_inode(inode);
        cifs_fscache_reset_inode_cookie(inode);
+        return rc;
 }
-int cifs_revalidate_file(struct file *filp)
+int cifs_revalidate_file_attr(struct file *filp)
 {
        int rc = 0;
        struct inode *inode = filp->f_path.dentry->d_inode;
        struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data;
        if (!cifs_inode_needs_reval(inode))
-                goto check_inval;
+                return rc;
        if (tlink_tcon(cfile->tlink)->unix_ext)
                rc = cifs_get_file_info_unix(filp);
        else
                rc = cifs_get_file_info(filp);
-check_inval:
-        if (CIFS_I(inode)->invalid_mapping)
-                cifs_invalidate_mapping(inode);
        return rc;
 }
-/* revalidate a dentry's inode attributes */
+int cifs_revalidate_dentry_attr(struct dentry *dentry)
-int cifs_revalidate_dentry(struct dentry *dentry)
 {
        int xid;
        int rc = 0;
-        char *full_path = NULL;
        struct inode *inode = dentry->d_inode;
        struct super_block *sb = dentry->d_sb;
+        char *full_path = NULL;
        if (inode == NULL)
                return -ENOENT;
-        xid = GetXid();
        if (!cifs_inode_needs_reval(inode))
-                goto check_inval;
+                return rc;
+        xid = GetXid();
        /* can not safely grab the rename sem here if rename calls revalidate
           since that would deadlock */
        full_path = build_path_from_dentry(dentry);
        if (full_path == NULL) {
                rc = -ENOMEM;
-                goto check_inval;
+                goto out;
        }
-        cFYI(1, "Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld "
+        cFYI(1, "Update attributes: %s inode 0x%p count %d dentry: 0x%p d_time "
-                 "jiffies %ld", full_path, inode, inode->i_count.counter,
+                 "%ld jiffies %ld", full_path, inode, inode->i_count.counter,
                 dentry, dentry->d_time, jiffies);
        if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext)
@@ -1756,41 +1755,83 @@ int cifs_revalidate_dentry(struct dentry *dentry)
                rc = cifs_get_inode_info(&inode, full_path, NULL, sb,
                                         xid, NULL);
-check_inval:
+out:
-        if (CIFS_I(inode)->invalid_mapping)
-                cifs_invalidate_mapping(inode);
        kfree(full_path);
        FreeXid(xid);
        return rc;
 }
+int cifs_revalidate_file(struct file *filp)
+{
+        int rc;
+        struct inode *inode = filp->f_path.dentry->d_inode;
+        rc = cifs_revalidate_file_attr(filp);
+        if (rc)
+                return rc;
+        if (CIFS_I(inode)->invalid_mapping)
+                rc = cifs_invalidate_mapping(inode);
+        return rc;
+}
+/* revalidate a dentry's inode attributes */
+int cifs_revalidate_dentry(struct dentry *dentry)
+{
+        int rc;
+        struct inode *inode = dentry->d_inode;
+        rc = cifs_revalidate_dentry_attr(dentry);
+        if (rc)
+                return rc;
+        if (CIFS_I(inode)->invalid_mapping)
+                rc = cifs_invalidate_mapping(inode);
+        return rc;
+}
 int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
                 struct kstat *stat)
 {
        struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
        struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb);
-        int err = cifs_revalidate_dentry(dentry);
+        struct inode *inode = dentry->d_inode;
+        int rc;
-        if (!err) {
-                generic_fillattr(dentry->d_inode, stat);
-                stat->blksize = CIFS_MAX_MSGSIZE;
-                stat->ino = CIFS_I(dentry->d_inode)->uniqueid;
-                /*
+        /*
-                 * If on a multiuser mount without unix extensions, and the
+         * We need to be sure that all dirty pages are written and the server
-                 * admin hasn't overridden them, set the ownership to the
+         * has actual ctime, mtime and file length.
-                 * fsuid/fsgid of the current process.
+         */
-                 */
+        if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping &&
-                if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) &&
+            inode->i_mapping->nrpages != 0) {
-                    !tcon->unix_ext) {
+                rc = filemap_fdatawait(inode->i_mapping);
-                        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID))
+                if (rc) {
-                                stat->uid = current_fsuid();
+                        mapping_set_error(inode->i_mapping, rc);
-                        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID))
+                        return rc;
-                                stat->gid = current_fsgid();
                }
        }
-        return err;
+        rc = cifs_revalidate_dentry_attr(dentry);
+        if (rc)
+                return rc;
+        generic_fillattr(inode, stat);
+        stat->blksize = CIFS_MAX_MSGSIZE;
+        stat->ino = CIFS_I(inode)->uniqueid;
+        /*
+         * If on a multiuser mount without unix extensions, and the admin hasn't
+         * overridden them, set the ownership to the fsuid/fsgid of the current
+         * process.
+         */
+        if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) &&
+            !tcon->unix_ext) {
+                if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID))
+                        stat->uid = current_fsuid();
+                if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID))
+                        stat->gid = current_fsgid();
+        }
+        return rc;
 }
 static int cifs_truncate_page(struct address_space *mapping, loff_t from)
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 0c684ae4c071..907531ac5888 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -304,12 +304,10 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
        memset(temp, 0, 256); /* bigger than MAX_CIFS_HDR_SIZE */
-        buffer->smb_buf_length =
+        buffer->smb_buf_length = cpu_to_be32(
            (2 * word_count) + sizeof(struct smb_hdr) -
            4 /*  RFC 1001 length field does not count */  +
-            2 /* for bcc field itself */ ;
+            2 /* for bcc field itself */) ;
-        /* Note that this is the only network field that has to be converted
-           to big endian and it is done just before we send it */
        buffer->Protocol[0] = 0xFF;
        buffer->Protocol[1] = 'S';
@@ -424,7 +422,7 @@ check_smb_hdr(struct smb_hdr *smb, __u16 mid)
 int
 checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
 {
-        __u32 len = smb->smb_buf_length;
+        __u32 len = be32_to_cpu(smb->smb_buf_length);
        __u32 clc_len;  /* calculated length */
        cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len);
@@ -464,7 +462,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
        if (check_smb_hdr(smb, mid))
                return 1;
-        clc_len = smbCalcSize_LE(smb);
+        clc_len = smbCalcSize(smb);
        if (4 + len != length) {
                cERROR(1, "Length read does not match RFC1001 length %d",
@@ -521,7 +519,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
                        (struct smb_com_transaction_change_notify_rsp *)buf;
                struct file_notify_information *pnotify;
                __u32 data_offset = 0;
-                if (get_bcc_le(buf) > sizeof(struct file_notify_information)) {
+                if (get_bcc(buf) > sizeof(struct file_notify_information)) {
                        data_offset = le32_to_cpu(pSMBr->DataOffset);
                        pnotify = (struct file_notify_information *)
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 79f641eeda30..79b71c2c7c9d 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -919,13 +919,6 @@ smbCalcSize(struct smb_hdr *ptr)
                2 /* size of the bcc field */ + get_bcc(ptr));
 }
-unsigned int
-smbCalcSize_LE(struct smb_hdr *ptr)
-{
-        return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) +
-                2 /* size of the bcc field */ + get_bcc_le(ptr));
-}
 /* The following are taken from fs/ntfs/util.c */
 #define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000)
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index f6728eb6f4b9..7dd462100378 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -276,7 +276,7 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
 }
 static void
-decode_unicode_ssetup(char **pbcc_area, __u16 bleft, struct cifsSesInfo *ses,
+decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses,
                      const struct nls_table *nls_cp)
 {
        int len;
@@ -284,19 +284,6 @@ decode_unicode_ssetup(char **pbcc_area, __u16 bleft, struct cifsSesInfo *ses,
        cFYI(1, "bleft %d", bleft);
-        /*
-         * Windows servers do not always double null terminate their final
-         * Unicode string. Check to see if there are an uneven number of bytes
-         * left. If so, then add an extra NULL pad byte to the end of the
-         * response.
-         *
-         * See section 2.7.2 in "Implementing CIFS" for details
-         */
-        if (bleft % 2) {
-                data[bleft] = 0;
-                ++bleft;
-        }
        kfree(ses->serverOS);
        ses->serverOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp);
        cFYI(1, "serverOS=%s", ses->serverOS);
@@ -634,7 +621,7 @@ ssetup_ntlmssp_authenticate:
        and rest of bcc area. This allows us to avoid
        a large buffer 17K allocation */
        iov[0].iov_base = (char *)pSMB;
-        iov[0].iov_len = smb_buf->smb_buf_length + 4;
+        iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4;
        /* setting this here allows the code at the end of the function
           to free the request buffer if there's an error */
@@ -669,7 +656,7 @@ ssetup_ntlmssp_authenticate:
                 * to use challenge/response method (i.e. Password bit is 1).
                 */
-                calc_lanman_hash(ses->password, ses->server->cryptkey,
+                rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
                                 ses->server->secMode & SECMODE_PW_ENCRYPT ?
                                        true : false, lnm_session_key);
@@ -872,9 +859,10 @@ ssetup_ntlmssp_authenticate:
        iov[2].iov_len = (long) bcc_ptr - (long) str_area;
        count = iov[1].iov_len + iov[2].iov_len;
-        smb_buf->smb_buf_length += count;
+        smb_buf->smb_buf_length =
+                cpu_to_be32(be32_to_cpu(smb_buf->smb_buf_length) + count);
-        put_bcc_le(count, smb_buf);
+        put_bcc(count, smb_buf);
        rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type,
                          CIFS_LOG_ERROR);
@@ -929,7 +917,9 @@ ssetup_ntlmssp_authenticate:
        }
        /* BB check if Unicode and decode strings */
-        if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
+        if (bytes_remaining == 0) {
+                /* no string area to decode, do nothing */
+        } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
                /* unicode string area must be word-aligned */
                if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
                        ++bcc_ptr;
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
deleted file mode 100644
index 04721485925d..000000000000
--- a/fs/cifs/smbdes.c
+++ /dev/null
@@ -1,418 +0,0 @@
-/*
-   Unix SMB/Netbios implementation.
-   Version 1.9.
-   a partial implementation of DES designed for use in the
-   SMB authentication protocol
-   Copyright (C) Andrew Tridgell 1998
-   Modified by Steve French (sfrench@us.ibm.com) 2002,2004
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2 of the License, or
-   (at your option) any later version.
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-/* NOTES:
-   This code makes no attempt to be fast! In fact, it is a very
-   slow implementation
-   This code is NOT a complete DES implementation. It implements only
-   the minimum necessary for SMB authentication, as used by all SMB
-   products (including every copy of Microsoft Windows95 ever sold)
-   In particular, it can only do a unchained forward DES pass. This
-   means it is not possible to use this code for encryption/decryption
-   of data, instead it is only useful as a "hash" algorithm.
-   There is no entry point into this code that allows normal DES operation.
-   I believe this means that this code does not come under ITAR
-   regulations but this is NOT a legal opinion. If you are concerned
-   about the applicability of ITAR regulations to this code then you
-   should confirm it for yourself (and maybe let me know if you come
-   up with a different answer to the one above)
-*/
-#include <linux/slab.h>
-#define uchar unsigned char
-static uchar perm1[56] = { 57, 49, 41, 33, 25, 17, 9,
-        1, 58, 50, 42, 34, 26, 18,
-        10, 2, 59, 51, 43, 35, 27,
-        19, 11, 3, 60, 52, 44, 36,
-        63, 55, 47, 39, 31, 23, 15,
-        7, 62, 54, 46, 38, 30, 22,
-        14, 6, 61, 53, 45, 37, 29,
-        21, 13, 5, 28, 20, 12, 4
-};
-static uchar perm2[48] = { 14, 17, 11, 24, 1, 5,
-        3, 28, 15, 6, 21, 10,
-        23, 19, 12, 4, 26, 8,
-        16, 7, 27, 20, 13, 2,
-        41, 52, 31, 37, 47, 55,
-        30, 40, 51, 45, 33, 48,
-        44, 49, 39, 56, 34, 53,
-        46, 42, 50, 36, 29, 32
-};
-static uchar perm3[64] = { 58, 50, 42, 34, 26, 18, 10, 2,
-        60, 52, 44, 36, 28, 20, 12, 4,
-        62, 54, 46, 38, 30, 22, 14, 6,
-        64, 56, 48, 40, 32, 24, 16, 8,
-        57, 49, 41, 33, 25, 17, 9, 1,
-        59, 51, 43, 35, 27, 19, 11, 3,
-        61, 53, 45, 37, 29, 21, 13, 5,
-        63, 55, 47, 39, 31, 23, 15, 7
-};
-static uchar perm4[48] = { 32, 1, 2, 3, 4, 5,
-        4, 5, 6, 7, 8, 9,
-        8, 9, 10, 11, 12, 13,
-        12, 13, 14, 15, 16, 17,
-        16, 17, 18, 19, 20, 21,
-        20, 21, 22, 23, 24, 25,
-        24, 25, 26, 27, 28, 29,
-        28, 29, 30, 31, 32, 1
-};
-static uchar perm5[32] = { 16, 7, 20, 21,
-        29, 12, 28, 17,
-        1, 15, 23, 26,
-        5, 18, 31, 10,
-        2, 8, 24, 14,
-        32, 27, 3, 9,
-        19, 13, 30, 6,
-        22, 11, 4, 25
-};
-static uchar perm6[64] = { 40, 8, 48, 16, 56, 24, 64, 32,
-        39, 7, 47, 15, 55, 23, 63, 31,
-        38, 6, 46, 14, 54, 22, 62, 30,
-        37, 5, 45, 13, 53, 21, 61, 29,
-        36, 4, 44, 12, 52, 20, 60, 28,
-        35, 3, 43, 11, 51, 19, 59, 27,
-        34, 2, 42, 10, 50, 18, 58, 26,
-        33, 1, 41, 9, 49, 17, 57, 25
-};
-static uchar sc[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
-static uchar sbox[8][4][16] = {
-        {{14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7},
-         {0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8},
-         {4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0},
-         {15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13} },
-        {{15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10},
-         {3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5},
-         {0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15},
-         {13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9} },
-        {{10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8},
-         {13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1},
-         {13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7},
-         {1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12} },
-        {{7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15},
-         {13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9},
-         {10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4},
-         {3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14} },
-        {{2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9},
-         {14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6},
-         {4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14},
-         {11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3} },
-        {{12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11},
-         {10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8},
-         {9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6},
-         {4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13} },
-        {{4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1},
-         {13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6},
-         {1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2},
-         {6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12} },
-        {{13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7},
-         {1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2},
-         {7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8},
-         {2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11} }
-};
-static void
-permute(char *out, char *in, uchar *p, int n)
-{
-        int i;
-        for (i = 0; i < n; i++)
-                out[i] = in[p[i] - 1];
-}
-static void
-lshift(char *d, int count, int n)
-{
-        char out[64];
-        int i;
-        for (i = 0; i < n; i++)
-                out[i] = d[(i + count) % n];
-        for (i = 0; i < n; i++)
-                d[i] = out[i];
-}
-static void
-concat(char *out, char *in1, char *in2, int l1, int l2)
-{
-        while (l1--)
-                *out++ = *in1++;
-        while (l2--)
-                *out++ = *in2++;
-}
-static void
-xor(char *out, char *in1, char *in2, int n)
-{
-        int i;
-        for (i = 0; i < n; i++)
-                out[i] = in1[i] ^ in2[i];
-}
-static void
-dohash(char *out, char *in, char *key, int forw)
-{
-        int i, j, k;
-        char *pk1;
-        char c[28];
-        char d[28];
-        char *cd;
-        char (*ki)[48];
-        char *pd1;
-        char l[32], r[32];
-        char *rl;
-        /* Have to reduce stack usage */
-        pk1 = kmalloc(56+56+64+64, GFP_KERNEL);
-        if (pk1 == NULL)
-                return;
-        ki = kmalloc(16*48, GFP_KERNEL);
-        if (ki == NULL) {
-                kfree(pk1);
-                return;
-        }
-        cd = pk1 + 56;
-        pd1 = cd  + 56;
-        rl = pd1 + 64;
-        permute(pk1, key, perm1, 56);
-        for (i = 0; i < 28; i++)
-                c[i] = pk1[i];
-        for (i = 0; i < 28; i++)
-                d[i] = pk1[i + 28];
-        for (i = 0; i < 16; i++) {
-                lshift(c, sc[i], 28);
-                lshift(d, sc[i], 28);
-                concat(cd, c, d, 28, 28);
-                permute(ki[i], cd, perm2, 48);
-        }
-        permute(pd1, in, perm3, 64);
-        for (j = 0; j < 32; j++) {
-                l[j] = pd1[j];
-                r[j] = pd1[j + 32];
-        }
-        for (i = 0; i < 16; i++) {
-                char *er;  /* er[48]  */
-                char *erk; /* erk[48] */
-                char b[8][6];
-                char *cb;  /* cb[32]  */
-                char *pcb; /* pcb[32] */
-                char *r2;  /* r2[32]  */
-                er = kmalloc(48+48+32+32+32, GFP_KERNEL);
-                if (er == NULL) {
-                        kfree(pk1);
-                        kfree(ki);
-                        return;
-                }
-                erk = er+48;
-                cb  = erk+48;
-                pcb = cb+32;
-                r2  = pcb+32;
-                permute(er, r, perm4, 48);
-                xor(erk, er, ki[forw ? i : 15 - i], 48);
-                for (j = 0; j < 8; j++)
-                        for (k = 0; k < 6; k++)
-                                b[j][k] = erk[j * 6 + k];
-                for (j = 0; j < 8; j++) {
-                        int m, n;
-                        m = (b[j][0] << 1) | b[j][5];
-                        n = (b[j][1] << 3) | (b[j][2] << 2) | (b[j][3] <<
-                                                               1) | b[j][4];
-                        for (k = 0; k < 4; k++)
-                                b[j][k] =
-                                    (sbox[j][m][n] & (1 << (3 - k))) ? 1 : 0;
-                }
-                for (j = 0; j < 8; j++)
-                        for (k = 0; k < 4; k++)
-                                cb[j * 4 + k] = b[j][k];
-                permute(pcb, cb, perm5, 32);
-                xor(r2, l, pcb, 32);
-                for (j = 0; j < 32; j++)
-                        l[j] = r[j];
-                for (j = 0; j < 32; j++)
-                        r[j] = r2[j];
-                kfree(er);
-        }
-        concat(rl, r, l, 32, 32);
-        permute(out, rl, perm6, 64);
-        kfree(pk1);
-        kfree(ki);
-}
-static void
-str_to_key(unsigned char *str, unsigned char *key)
-{
-        int i;
-        key[0] = str[0] >> 1;
-        key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
-        key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
-        key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
-        key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
-        key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
-        key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
-        key[7] = str[6] & 0x7F;
-        for (i = 0; i < 8; i++)
-                key[i] = (key[i] << 1);
-}
-static void
-smbhash(unsigned char *out, const unsigned char *in, unsigned char *key,
-        int forw)
-{
-        int i;
-        char *outb; /* outb[64] */
-        char *inb;  /* inb[64]  */
-        char *keyb; /* keyb[64] */
-        unsigned char key2[8];
-        outb = kmalloc(64 * 3, GFP_KERNEL);
-        if (outb == NULL)
-                return;
-        inb  = outb + 64;
-        keyb = inb +  64;
-        str_to_key(key, key2);
-        for (i = 0; i < 64; i++) {
-                inb[i] = (in[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
-                keyb[i] = (key2[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
-                outb[i] = 0;
-        }
-        dohash(outb, inb, keyb, forw);
-        for (i = 0; i < 8; i++)
-                out[i] = 0;
-        for (i = 0; i < 64; i++) {
-                if (outb[i])
-                        out[i / 8] |= (1 << (7 - (i % 8)));
-        }
-        kfree(outb);
-}
-void
-E_P16(unsigned char *p14, unsigned char *p16)
-{
-        unsigned char sp8[8] =
-            { 0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25 };
-        smbhash(p16, sp8, p14, 1);
-        smbhash(p16 + 8, sp8, p14 + 7, 1);
-}
-void
-E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
-{
-        smbhash(p24, c8, p21, 1);
-        smbhash(p24 + 8, c8, p21 + 7, 1);
-        smbhash(p24 + 16, c8, p21 + 14, 1);
-}
-#if 0 /* currently unused */
-static void
-D_P16(unsigned char *p14, unsigned char *in, unsigned char *out)
-{
-        smbhash(out, in, p14, 0);
-        smbhash(out + 8, in + 8, p14 + 7, 0);
-}
-static void
-E_old_pw_hash(unsigned char *p14, unsigned char *in, unsigned char *out)
-{
-        smbhash(out, in, p14, 1);
-        smbhash(out + 8, in + 8, p14 + 7, 1);
-}
-/* these routines are currently unneeded, but may be
-        needed later */
-void
-cred_hash1(unsigned char *out, unsigned char *in, unsigned char *key)
-{
-        unsigned char buf[8];
-        smbhash(buf, in, key, 1);
-        smbhash(out, buf, key + 9, 1);
-}
-void
-cred_hash2(unsigned char *out, unsigned char *in, unsigned char *key)
-{
-        unsigned char buf[8];
-        static unsigned char key2[8];
-        smbhash(buf, in, key, 1);
-        key2[0] = key[7];
-        smbhash(out, buf, key2, 1);
-}
-void
-cred_hash3(unsigned char *out, unsigned char *in, unsigned char *key, int forw)
-{
-        static unsigned char key2[8];
-        smbhash(out, in, key, forw);
-        key2[0] = key[7];
-        smbhash(out + 8, in + 8, key2, forw);
-}
-#endif /* unneeded routines */
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index b5041c849981..1525d5e662b6 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -47,6 +47,88 @@
 #define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8)
 #define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val)))
+static void
+str_to_key(unsigned char *str, unsigned char *key)
+{
+        int i;
+        key[0] = str[0] >> 1;
+        key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
+        key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
+        key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
+        key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
+        key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
+        key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
+        key[7] = str[6] & 0x7F;
+        for (i = 0; i < 8; i++)
+                key[i] = (key[i] << 1);
+}
+static int
+smbhash(unsigned char *out, const unsigned char *in, unsigned char *key)
+{
+        int rc;
+        unsigned char key2[8];
+        struct crypto_blkcipher *tfm_des;
+        struct scatterlist sgin, sgout;
+        struct blkcipher_desc desc;
+        str_to_key(key, key2);
+        tfm_des = crypto_alloc_blkcipher("ecb(des)", 0, CRYPTO_ALG_ASYNC);
+        if (IS_ERR(tfm_des)) {
+                rc = PTR_ERR(tfm_des);
+                cERROR(1, "could not allocate des crypto API\n");
+                goto smbhash_err;
+        }
+        desc.tfm = tfm_des;
+        crypto_blkcipher_setkey(tfm_des, key2, 8);
+        sg_init_one(&sgin, in, 8);
+        sg_init_one(&sgout, out, 8);
+        rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8);
+        if (rc) {
+                cERROR(1, "could not encrypt crypt key rc: %d\n", rc);
+                crypto_free_blkcipher(tfm_des);
+                goto smbhash_err;
+        }
+smbhash_err:
+        return rc;
+}
+static int
+E_P16(unsigned char *p14, unsigned char *p16)
+{
+        int rc;
+        unsigned char sp8[8] =
+            { 0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25 };
+        rc = smbhash(p16, sp8, p14);
+        if (rc)
+                return rc;
+        rc = smbhash(p16 + 8, sp8, p14 + 7);
+        return rc;
+}
+static int
+E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
+{
+        int rc;
+        rc = smbhash(p24, c8, p21);
+        if (rc)
+                return rc;
+        rc = smbhash(p24 + 8, c8, p21 + 7);
+        if (rc)
+                return rc;
+        rc = smbhash(p24 + 16, c8, p21 + 14);
+        return rc;
+}
 /* produce a md4 message digest from data of length n bytes */
 int
 mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len)
@@ -87,40 +169,30 @@ mdfour_err:
        return rc;
 }
-/* Does the des encryption from the NT or LM MD4 hash. */
-static void
-SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
-              unsigned char p24[24])
-{
-        unsigned char p21[21];
-        memset(p21, '\0', 21);
-        memcpy(p21, passwd, 16);
-        E_P24(p21, c8, p24);
-}
 /*
   This implements the X/Open SMB password encryption
   It takes a password, a 8 byte "crypt key" and puts 24 bytes of
   encrypted password into p24 */
 /* Note that password must be uppercased and null terminated */
-void
+int
 SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24)
 {
-        unsigned char p14[15], p21[21];
+        int rc;
+        unsigned char p14[14], p16[16], p21[21];
-        memset(p21, '\0', 21);
        memset(p14, '\0', 14);
-        strncpy((char *) p14, (char *) passwd, 14);
+        memset(p16, '\0', 16);
+        memset(p21, '\0', 21);
-/*      strupper((char *)p14); *//* BB at least uppercase the easy range */
+        memcpy(p14, passwd, 14);
-        E_P16(p14, p21);
+        rc = E_P16(p14, p16);
+        if (rc)
+                return rc;
-        SMBOWFencrypt(p21, c8, p24);
+        memcpy(p21, p16, 16);
+        rc = E_P24(p21, c8, p24);
-        memset(p14, 0, 15);
+        return rc;
-        memset(p21, 0, 21);
 }
 /* Routines for Windows NT MD4 Hash functions. */
@@ -279,16 +351,18 @@ int
 SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24)
 {
        int rc;
-        unsigned char p21[21];
+        unsigned char p16[16], p21[21];
+        memset(p16, '\0', 16);
        memset(p21, '\0', 21);
-        rc = E_md4hash(passwd, p21);
+        rc = E_md4hash(passwd, p16);
        if (rc) {
                cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
                return rc;
        }
-        SMBOWFencrypt(p21, c8, p24);
+        memcpy(p21, p16, 16);
+        rc = E_P24(p21, c8, p24);
        return rc;
 }
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 46d8756f2b24..f2513fb8c391 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -129,7 +129,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
        unsigned int len = iov[0].iov_len;
        unsigned int total_len;
        int first_vec = 0;
-        unsigned int smb_buf_length = smb_buffer->smb_buf_length;
+        unsigned int smb_buf_length = be32_to_cpu(smb_buffer->smb_buf_length);
        struct socket *ssocket = server->ssocket;
        if (ssocket == NULL)
@@ -144,17 +144,10 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
        else
                smb_msg.msg_flags = MSG_NOSIGNAL;
-        /* smb header is converted in header_assemble. bcc and rest of SMB word
-           area, and byte area if necessary, is converted to littleendian in
-           cifssmb.c and RFC1001 len is converted to bigendian in smb_send
-           Flags2 is converted in SendReceive */
        total_len = 0;
        for (i = 0; i < n_vec; i++)
                total_len += iov[i].iov_len;
-        smb_buffer->smb_buf_length = cpu_to_be32(smb_buffer->smb_buf_length);
        cFYI(1, "Sending smb:  total_len %d", total_len);
        dump_smb(smb_buffer, len);
@@ -243,7 +236,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
        /* Don't want to modify the buffer as a
           side effect of this call. */
-        smb_buffer->smb_buf_length = smb_buf_length;
+        smb_buffer->smb_buf_length = cpu_to_be32(smb_buf_length);
        return rc;
 }
@@ -387,7 +380,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
 #ifdef CONFIG_CIFS_STATS2
        atomic_inc(&server->inSend);
 #endif
-        rc = smb_send(server, in_buf, in_buf->smb_buf_length);
+        rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
 #ifdef CONFIG_CIFS_STATS2
        atomic_dec(&server->inSend);
        mid->when_sent = jiffies;
@@ -422,7 +415,7 @@ SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses,
        int resp_buf_type;
        iov[0].iov_base = (char *)in_buf;
-        iov[0].iov_len = in_buf->smb_buf_length + 4;
+        iov[0].iov_len = be32_to_cpu(in_buf->smb_buf_length) + 4;
        flags |= CIFS_NO_RESP;
        rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags);
        cFYI(DBG2, "SendRcvNoRsp flags %d rc %d", flags, rc);
@@ -488,10 +481,10 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
        int rc = 0;
        /* -4 for RFC1001 length and +2 for BCC field */
-        in_buf->smb_buf_length = sizeof(struct smb_hdr) - 4  + 2;
+        in_buf->smb_buf_length = cpu_to_be32(sizeof(struct smb_hdr) - 4  + 2);
        in_buf->Command = SMB_COM_NT_CANCEL;
        in_buf->WordCount = 0;
-        put_bcc_le(0, in_buf);
+        put_bcc(0, in_buf);
        mutex_lock(&server->srv_mutex);
        rc = cifs_sign_smb(in_buf, server, &mid->sequence_number);
@@ -499,7 +492,7 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
                mutex_unlock(&server->srv_mutex);
                return rc;
        }
-        rc = smb_send(server, in_buf, in_buf->smb_buf_length);
+        rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
        mutex_unlock(&server->srv_mutex);
        cFYI(1, "issued NT_CANCEL for mid %u, rc = %d",
@@ -612,7 +605,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
                return rc;
        }
-        receive_len = midQ->resp_buf->smb_buf_length;
+        receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
        if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
                cERROR(1, "Frame too large received.  Length: %d  Xid: %d",
@@ -651,11 +644,6 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
                rc = map_smb_to_linux_error(midQ->resp_buf,
                                            flags & CIFS_LOG_ERROR);
-                /* convert ByteCount if necessary */
-                if (receive_len >= sizeof(struct smb_hdr) - 4
-                    /* do not count RFC1001 header */  +
-                    (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ )
-                        put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf);
                if ((flags & CIFS_NO_RESP) == 0)
                        midQ->resp_buf = NULL;  /* mark it so buf will
                                                   not be freed by
@@ -698,9 +686,10 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
           to the same server. We may make this configurable later or
           use ses->maxReq */
-        if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
+        if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
+                        MAX_CIFS_HDR_SIZE - 4) {
                cERROR(1, "Illegal length, greater than maximum frame, %d",
-                           in_buf->smb_buf_length);
+                           be32_to_cpu(in_buf->smb_buf_length));
                return -EIO;
        }
@@ -733,7 +722,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
 #ifdef CONFIG_CIFS_STATS2
        atomic_inc(&ses->server->inSend);
 #endif
-        rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length);
+        rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
 #ifdef CONFIG_CIFS_STATS2
        atomic_dec(&ses->server->inSend);
        midQ->when_sent = jiffies;
@@ -768,7 +757,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
                return rc;
        }
-        receive_len = midQ->resp_buf->smb_buf_length;
+        receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
        if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
                cERROR(1, "Frame too large received.  Length: %d  Xid: %d",
@@ -781,7 +770,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
        if (midQ->resp_buf && out_buf
            && (midQ->midState == MID_RESPONSE_RECEIVED)) {
-                out_buf->smb_buf_length = receive_len;
+                out_buf->smb_buf_length = cpu_to_be32(receive_len);
                memcpy((char *)out_buf + 4,
                       (char *)midQ->resp_buf + 4,
                       receive_len);
@@ -800,16 +789,10 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
                        }
                }
-                *pbytes_returned = out_buf->smb_buf_length;
+                *pbytes_returned = be32_to_cpu(out_buf->smb_buf_length);
                /* BB special case reconnect tid and uid here? */
                rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
-                /* convert ByteCount if necessary */
-                if (receive_len >= sizeof(struct smb_hdr) - 4
-                    /* do not count RFC1001 header */  +
-                    (2 * out_buf->WordCount) + 2 /* bcc */ )
-                        put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf);
        } else {
                rc = -EIO;
                cERROR(1, "Bad MID state?");
@@ -877,9 +860,10 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
           to the same server. We may make this configurable later or
           use ses->maxReq */
-        if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
+        if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
+                        MAX_CIFS_HDR_SIZE - 4) {
                cERROR(1, "Illegal length, greater than maximum frame, %d",
-                           in_buf->smb_buf_length);
+                           be32_to_cpu(in_buf->smb_buf_length));
                return -EIO;
        }
@@ -910,7 +894,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
 #ifdef CONFIG_CIFS_STATS2
        atomic_inc(&ses->server->inSend);
 #endif
-        rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length);
+        rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
 #ifdef CONFIG_CIFS_STATS2
        atomic_dec(&ses->server->inSend);
        midQ->when_sent = jiffies;
@@ -977,7 +961,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
        if (rc != 0)
                return rc;
-        receive_len = midQ->resp_buf->smb_buf_length;
+        receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
        if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
                cERROR(1, "Frame too large received.  Length: %d  Xid: %d",
                        receive_len, xid);
@@ -993,7 +977,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
                goto out;
        }
-        out_buf->smb_buf_length = receive_len;
+        out_buf->smb_buf_length = cpu_to_be32(receive_len);
        memcpy((char *)out_buf + 4,
               (char *)midQ->resp_buf + 4,
               receive_len);
@@ -1012,17 +996,11 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
                }
        }
-        *pbytes_returned = out_buf->smb_buf_length;
+        *pbytes_returned = be32_to_cpu(out_buf->smb_buf_length);
        /* BB special case reconnect tid and uid here? */
        rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
-        /* convert ByteCount if necessary */
-        if (receive_len >= sizeof(struct smb_hdr) - 4
-            /* do not count RFC1001 header */  +
-            (2 * out_buf->WordCount) + 2 /* bcc */ )
-                put_bcc(get_bcc_le(out_buf), out_buf);
 out:
        delete_mid(midQ);
        if (rstart && rc == -EACCES)
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index eae2a1491608..912995e013ec 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -112,6 +112,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
        struct cifsTconInfo *pTcon;
        struct super_block *sb;
        char *full_path;
+        struct cifs_ntsd *pacl;
        if (direntry == NULL)
                return -EIO;
@@ -166,6 +167,25 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
                rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
                        (__u16)value_size, cifs_sb->local_nls,
                        cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+        } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
+                        strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
+                pacl = kmalloc(value_size, GFP_KERNEL);
+                if (!pacl) {
+                        cFYI(1, "%s: Can't allocate memory for ACL",
+                                        __func__);
+                        rc = -ENOMEM;
+                } else {
+#ifdef CONFIG_CIFS_ACL
+                        memcpy(pacl, ea_value, value_size);
+                        rc = set_cifs_acl(pacl, value_size,
+                                direntry->d_inode, full_path);
+                        if (rc == 0) /* force revalidate of the inode */
+                                CIFS_I(direntry->d_inode)->time = 0;
+                        kfree(pacl);
+#else
+                        cFYI(1, "Set CIFS ACL not supported yet");
+#endif /* CONFIG_CIFS_ACL */
+                }
        } else {
                int temp;
                temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
diff --git a/fs/compat.c b/fs/compat.c
index 72fe6cda9108..0ea00832de23 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1306,241 +1306,6 @@ compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, int
        return do_sys_open(dfd, filename, flags, mode);
 }
-/*
- * compat_count() counts the number of arguments/envelopes. It is basically
- * a copy of count() from fs/exec.c, except that it works with 32 bit argv
- * and envp pointers.
- */
-static int compat_count(compat_uptr_t __user *argv, int max)
-{
-        int i = 0;
-        if (argv != NULL) {
-                for (;;) {
-                        compat_uptr_t p;
-                        if (get_user(p, argv))
-                                return -EFAULT;
-                        if (!p)
-                                break;
-                        argv++;
-                        if (i++ >= max)
-                                return -E2BIG;
-                        if (fatal_signal_pending(current))
-                                return -ERESTARTNOHAND;
-                        cond_resched();
-                }
-        }
-        return i;
-}
-/*
- * compat_copy_strings() is basically a copy of copy_strings() from fs/exec.c
- * except that it works with 32 bit argv and envp pointers.
- */
-static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
-                                struct linux_binprm *bprm)
-{
-        struct page *kmapped_page = NULL;
-        char *kaddr = NULL;
-        unsigned long kpos = 0;
-        int ret;
-        while (argc-- > 0) {
-                compat_uptr_t str;
-                int len;
-                unsigned long pos;
-                if (get_user(str, argv+argc) ||
-                    !(len = strnlen_user(compat_ptr(str), MAX_ARG_STRLEN))) {
-                        ret = -EFAULT;
-                        goto out;
-                }
-                if (len > MAX_ARG_STRLEN) {
-                        ret = -E2BIG;
-                        goto out;
-                }
-                /* We're going to work our way backwords. */
-                pos = bprm->p;
-                str += len;
-                bprm->p -= len;
-                while (len > 0) {
-                        int offset, bytes_to_copy;
-                        if (fatal_signal_pending(current)) {
-                                ret = -ERESTARTNOHAND;
-                                goto out;
-                        }
-                        cond_resched();
-                        offset = pos % PAGE_SIZE;
-                        if (offset == 0)
-                                offset = PAGE_SIZE;
-                        bytes_to_copy = offset;
-                        if (bytes_to_copy > len)
-                                bytes_to_copy = len;
-                        offset -= bytes_to_copy;
-                        pos -= bytes_to_copy;
-                        str -= bytes_to_copy;
-                        len -= bytes_to_copy;
-                        if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
-                                struct page *page;
-                                page = get_arg_page(bprm, pos, 1);
-                                if (!page) {
-                                        ret = -E2BIG;
-                                        goto out;
-                                }
-                                if (kmapped_page) {
-                                        flush_kernel_dcache_page(kmapped_page);
-                                        kunmap(kmapped_page);
-                                        put_page(kmapped_page);
-                                }
-                                kmapped_page = page;
-                                kaddr = kmap(kmapped_page);
-                                kpos = pos & PAGE_MASK;
-                                flush_cache_page(bprm->vma, kpos,
-                                                 page_to_pfn(kmapped_page));
-                        }
-                        if (copy_from_user(kaddr+offset, compat_ptr(str),
-                                                bytes_to_copy)) {
-                                ret = -EFAULT;
-                                goto out;
-                        }
-                }
-        }
-        ret = 0;
-out:
-        if (kmapped_page) {
-                flush_kernel_dcache_page(kmapped_page);
-                kunmap(kmapped_page);
-                put_page(kmapped_page);
-        }
-        return ret;
-}
-/*
- * compat_do_execve() is mostly a copy of do_execve(), with the exception
- * that it processes 32 bit argv and envp pointers.
- */
-int compat_do_execve(char * filename,
-        compat_uptr_t __user *argv,
-        compat_uptr_t __user *envp,
-        struct pt_regs * regs)
-{
-        struct linux_binprm *bprm;
-        struct file *file;
-        struct files_struct *displaced;
-        bool clear_in_exec;
-        int retval;
-        retval = unshare_files(&displaced);
-        if (retval)
-                goto out_ret;
-        retval = -ENOMEM;
-        bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
-        if (!bprm)
-                goto out_files;
-        retval = prepare_bprm_creds(bprm);
-        if (retval)
-                goto out_free;
-        retval = check_unsafe_exec(bprm);
-        if (retval < 0)
-                goto out_free;
-        clear_in_exec = retval;
-        current->in_execve = 1;
-        file = open_exec(filename);
-        retval = PTR_ERR(file);
-        if (IS_ERR(file))
-                goto out_unmark;
-        sched_exec();
-        bprm->file = file;
-        bprm->filename = filename;
-        bprm->interp = filename;
-        retval = bprm_mm_init(bprm);
-        if (retval)
-                goto out_file;
-        bprm->argc = compat_count(argv, MAX_ARG_STRINGS);
-        if ((retval = bprm->argc) < 0)
-                goto out;
-        bprm->envc = compat_count(envp, MAX_ARG_STRINGS);
-        if ((retval = bprm->envc) < 0)
-                goto out;
-        retval = prepare_binprm(bprm);
-        if (retval < 0)
-                goto out;
-        retval = copy_strings_kernel(1, &bprm->filename, bprm);
-        if (retval < 0)
-                goto out;
-        bprm->exec = bprm->p;
-        retval = compat_copy_strings(bprm->envc, envp, bprm);
-        if (retval < 0)
-                goto out;
-        retval = compat_copy_strings(bprm->argc, argv, bprm);
-        if (retval < 0)
-                goto out;
-        retval = search_binary_handler(bprm, regs);
-        if (retval < 0)
-                goto out;
-        /* execve succeeded */
-        current->fs->in_exec = 0;
-        current->in_execve = 0;
-        acct_update_integrals(current);
-        free_bprm(bprm);
-        if (displaced)
-                put_files_struct(displaced);
-        return retval;
-out:
-        if (bprm->mm) {
-                acct_arg_size(bprm, 0);
-                mmput(bprm->mm);
-        }
-out_file:
-        if (bprm->file) {
-                allow_write_access(bprm->file);
-                fput(bprm->file);
-        }
-out_unmark:
-        if (clear_in_exec)
-                current->fs->in_exec = 0;
-        current->in_execve = 0;
-out_free:
-        free_bprm(bprm);
-out_files:
-        if (displaced)
-                reset_files_struct(displaced);
-out_ret:
-        return retval;
-}
 #define __COMPAT_NFDBITS       (8 * sizeof(compat_ulong_t))
 static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 9908c20bb1a5..9d17d350abc5 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -53,11 +53,14 @@ DEFINE_SPINLOCK(configfs_dirent_lock);
 static void configfs_d_iput(struct dentry * dentry,
                            struct inode * inode)
 {
-        struct configfs_dirent * sd = dentry->d_fsdata;
+        struct configfs_dirent *sd = dentry->d_fsdata;
        if (sd) {
                BUG_ON(sd->s_dentry != dentry);
+                /* Coordinate with configfs_readdir */
+                spin_lock(&configfs_dirent_lock);
                sd->s_dentry = NULL;
+                spin_unlock(&configfs_dirent_lock);
                configfs_put(sd);
        }
        iput(inode);
@@ -689,7 +692,8 @@ static int create_default_group(struct config_group *parent_group,
                        sd = child->d_fsdata;
                        sd->s_type |= CONFIGFS_USET_DEFAULT;
                } else {
-                        d_delete(child);
+                        BUG_ON(child->d_inode);
+                        d_drop(child);
                        dput(child);
                }
        }
@@ -1547,7 +1551,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
        struct configfs_dirent * parent_sd = dentry->d_fsdata;
        struct configfs_dirent *cursor = filp->private_data;
        struct list_head *p, *q = &cursor->s_sibling;
-        ino_t ino;
+        ino_t ino = 0;
        int i = filp->f_pos;
        switch (i) {
@@ -1575,6 +1579,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
                                struct configfs_dirent *next;
                                const char * name;
                                int len;
+                                struct inode *inode = NULL;
                                next = list_entry(p, struct configfs_dirent,
                                                   s_sibling);
@@ -1583,9 +1588,28 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
                                name = configfs_get_name(next);
                                len = strlen(name);
-                                if (next->s_dentry)
-                                        ino = next->s_dentry->d_inode->i_ino;
+                                /*
-                                else
+                                 * We'll have a dentry and an inode for
+                                 * PINNED items and for open attribute
+                                 * files.  We lock here to prevent a race
+                                 * with configfs_d_iput() clearing
+                                 * s_dentry before calling iput().
+                                 *
+                                 * Why do we go to the trouble?  If
+                                 * someone has an attribute file open,
+                                 * the inode number should match until
+                                 * they close it.  Beyond that, we don't
+                                 * care.
+                                 */
+                                spin_lock(&configfs_dirent_lock);
+                                dentry = next->s_dentry;
+                                if (dentry)
+                                        inode = dentry->d_inode;
+                                if (inode)
+                                        ino = inode->i_ino;
+                                spin_unlock(&configfs_dirent_lock);
+                                if (!inode)
                                        ino = iunique(configfs_sb, 2);
                                if (filldir(dirent, name, len, filp->f_pos, ino,
@@ -1685,7 +1709,8 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
                err = configfs_attach_group(sd->s_element, &group->cg_item,
                                            dentry);
                if (err) {
-                        d_delete(dentry);
+                        BUG_ON(dentry->d_inode);
+                        d_drop(dentry);
                        dput(dentry);
                } else {
                        spin_lock(&configfs_dirent_lock);
diff --git a/fs/dcache.c b/fs/dcache.c
index 22a0ef41bad1..37f72ee5bf7c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -35,6 +35,7 @@
 #include <linux/hardirq.h>
 #include <linux/bit_spinlock.h>
 #include <linux/rculist_bl.h>
+#include <linux/prefetch.h>
 #include "internal.h"
 /*
@@ -1219,7 +1220,7 @@ void shrink_dcache_parent(struct dentry * parent)
 EXPORT_SYMBOL(shrink_dcache_parent);
 /*
- * Scan `nr' dentries and return the number which remain.
+ * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain.
 *
 * We need to avoid reentering the filesystem if the caller is performing a
 * GFP_NOFS allocation attempt.  One example deadlock is:
@@ -1230,8 +1231,12 @@ EXPORT_SYMBOL(shrink_dcache_parent);
 *
 * In this case we return -1 to tell the caller that we baled.
 */
-static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int shrink_dcache_memory(struct shrinker *shrink,
+                                struct shrink_control *sc)
 {
+        int nr = sc->nr_to_scan;
+        gfp_t gfp_mask = sc->gfp_mask;
        if (nr) {
                if (!(gfp_mask & __GFP_FS))
                        return -1;
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 89d394d8fe24..90f76575c056 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -428,26 +428,17 @@ static ssize_t write_file_bool(struct file *file, const char __user *user_buf,
                               size_t count, loff_t *ppos)
 {
        char buf[32];
-        int buf_size;
+        size_t buf_size;
+        bool bv;
        u32 *val = file->private_data;
        buf_size = min(count, (sizeof(buf)-1));
        if (copy_from_user(buf, user_buf, buf_size))
                return -EFAULT;
-        switch (buf[0]) {
+        if (strtobool(buf, &bv) == 0)
-        case 'y':
+                *val = bv;
-        case 'Y':
-        case '1':
-                *val = 1;
-                break;
-        case 'n':
-        case 'N':
-        case '0':
-                *val = 0;
-                break;
-        }
-        
        return count;
 }
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 0d329ff8ed4c..9b026ea8baa9 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -100,6 +100,7 @@ struct dlm_cluster {
        unsigned int cl_log_debug;
        unsigned int cl_protocol;
        unsigned int cl_timewarn_cs;
+        unsigned int cl_waitwarn_us;
 };
 enum {
@@ -114,6 +115,7 @@ enum {
        CLUSTER_ATTR_LOG_DEBUG,
        CLUSTER_ATTR_PROTOCOL,
        CLUSTER_ATTR_TIMEWARN_CS,
+        CLUSTER_ATTR_WAITWARN_US,
 };
 struct cluster_attribute {
@@ -166,6 +168,7 @@ CLUSTER_ATTR(scan_secs, 1);
 CLUSTER_ATTR(log_debug, 0);
 CLUSTER_ATTR(protocol, 0);
 CLUSTER_ATTR(timewarn_cs, 1);
+CLUSTER_ATTR(waitwarn_us, 0);
 static struct configfs_attribute *cluster_attrs[] = {
        [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -179,6 +182,7 @@ static struct configfs_attribute *cluster_attrs[] = {
        [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
        [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
        [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
+        [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr,
        NULL,
 };
@@ -439,6 +443,7 @@ static struct config_group *make_cluster(struct config_group *g,
        cl->cl_log_debug = dlm_config.ci_log_debug;
        cl->cl_protocol = dlm_config.ci_protocol;
        cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
+        cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
        space_list = &sps->ss_group;
        comm_list = &cms->cs_group;
@@ -986,6 +991,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
 #define DEFAULT_LOG_DEBUG          0
 #define DEFAULT_PROTOCOL           0
 #define DEFAULT_TIMEWARN_CS      500 /* 5 sec = 500 centiseconds */
+#define DEFAULT_WAITWARN_US        0
 struct dlm_config_info dlm_config = {
        .ci_tcp_port = DEFAULT_TCP_PORT,
@@ -998,6 +1004,7 @@ struct dlm_config_info dlm_config = {
        .ci_scan_secs = DEFAULT_SCAN_SECS,
        .ci_log_debug = DEFAULT_LOG_DEBUG,
        .ci_protocol = DEFAULT_PROTOCOL,
-        .ci_timewarn_cs = DEFAULT_TIMEWARN_CS
+        .ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
+        .ci_waitwarn_us = DEFAULT_WAITWARN_US
 };
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 4f1d6fce58c5..dd0ce24d5a80 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -28,6 +28,7 @@ struct dlm_config_info {
        int ci_log_debug;
        int ci_protocol;
        int ci_timewarn_cs;
+        int ci_waitwarn_us;
 };
 extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index b94204913011..0262451eb9c6 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -209,6 +209,7 @@ struct dlm_args {
 #define DLM_IFL_WATCH_TIMEWARN  0x00400000
 #define DLM_IFL_TIMEOUT_CANCEL  0x00800000
 #define DLM_IFL_DEADLOCK_CANCEL 0x01000000
+#define DLM_IFL_STUB_MS         0x02000000 /* magic number for m_flags */
 #define DLM_IFL_USER            0x00000001
 #define DLM_IFL_ORPHAN          0x00000002
@@ -245,6 +246,7 @@ struct dlm_lkb {
        int8_t                  lkb_wait_type;  /* type of reply waiting for */
        int8_t                  lkb_wait_count;
+        int                     lkb_wait_nodeid; /* for debugging */
        struct list_head        lkb_idtbl_list; /* lockspace lkbtbl */
        struct list_head        lkb_statequeue; /* rsb g/c/w list */
@@ -254,6 +256,7 @@ struct dlm_lkb {
        struct list_head        lkb_ownqueue;   /* list of locks for a process */
        struct list_head        lkb_time_list;
        ktime_t                 lkb_timestamp;
+        ktime_t                 lkb_wait_time;
        unsigned long           lkb_timeout_cs;
        struct dlm_callback     lkb_callbacks[DLM_CALLBACKS_SIZE];
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 56d6bfcc1e48..f71d0b5abd95 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -799,10 +799,84 @@ static int msg_reply_type(int mstype)
        return -1;
 }
+static int nodeid_warned(int nodeid, int num_nodes, int *warned)
+{
+        int i;
+        for (i = 0; i < num_nodes; i++) {
+                if (!warned[i]) {
+                        warned[i] = nodeid;
+                        return 0;
+                }
+                if (warned[i] == nodeid)
+                        return 1;
+        }
+        return 0;
+}
+void dlm_scan_waiters(struct dlm_ls *ls)
+{
+        struct dlm_lkb *lkb;
+        ktime_t zero = ktime_set(0, 0);
+        s64 us;
+        s64 debug_maxus = 0;
+        u32 debug_scanned = 0;
+        u32 debug_expired = 0;
+        int num_nodes = 0;
+        int *warned = NULL;
+        if (!dlm_config.ci_waitwarn_us)
+                return;
+        mutex_lock(&ls->ls_waiters_mutex);
+        list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
+                if (ktime_equal(lkb->lkb_wait_time, zero))
+                        continue;
+                debug_scanned++;
+                us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time));
+                if (us < dlm_config.ci_waitwarn_us)
+                        continue;
+                lkb->lkb_wait_time = zero;
+                debug_expired++;
+                if (us > debug_maxus)
+                        debug_maxus = us;
+                if (!num_nodes) {
+                        num_nodes = ls->ls_num_nodes;
+                        warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int));
+                        if (warned)
+                                memset(warned, 0, num_nodes * sizeof(int));
+                }
+                if (!warned)
+                        continue;
+                if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned))
+                        continue;
+                log_error(ls, "waitwarn %x %lld %d us check connection to "
+                          "node %d", lkb->lkb_id, (long long)us,
+                          dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
+        }
+        mutex_unlock(&ls->ls_waiters_mutex);
+        if (warned)
+                kfree(warned);
+        if (debug_expired)
+                log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
+                          debug_scanned, debug_expired,
+                          dlm_config.ci_waitwarn_us, (long long)debug_maxus);
+}
 /* add/remove lkb from global waiters list of lkb's waiting for
   a reply from a remote node */
-static int add_to_waiters(struct dlm_lkb *lkb, int mstype)
+static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
 {
        struct dlm_ls *ls = lkb->lkb_resource->res_ls;
        int error = 0;
@@ -842,6 +916,8 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype)
        lkb->lkb_wait_count++;
        lkb->lkb_wait_type = mstype;
+        lkb->lkb_wait_time = ktime_get();
+        lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */
        hold_lkb(lkb);
        list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
 out:
@@ -961,10 +1037,10 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms)
        struct dlm_ls *ls = lkb->lkb_resource->res_ls;
        int error;
-        if (ms != &ls->ls_stub_ms)
+        if (ms->m_flags != DLM_IFL_STUB_MS)
                mutex_lock(&ls->ls_waiters_mutex);
        error = _remove_from_waiters(lkb, ms->m_type, ms);
-        if (ms != &ls->ls_stub_ms)
+        if (ms->m_flags != DLM_IFL_STUB_MS)
                mutex_unlock(&ls->ls_waiters_mutex);
        return error;
 }
@@ -1157,6 +1233,16 @@ void dlm_adjust_timeouts(struct dlm_ls *ls)
        list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
                lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us);
        mutex_unlock(&ls->ls_timeout_mutex);
+        if (!dlm_config.ci_waitwarn_us)
+                return;
+        mutex_lock(&ls->ls_waiters_mutex);
+        list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
+                if (ktime_to_us(lkb->lkb_wait_time))
+                        lkb->lkb_wait_time = ktime_get();
+        }
+        mutex_unlock(&ls->ls_waiters_mutex);
 }
 /* lkb is master or local copy */
@@ -1376,14 +1462,8 @@ static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb)
   ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become
   compatible with other granted locks */
-static void munge_demoted(struct dlm_lkb *lkb, struct dlm_message *ms)
+static void munge_demoted(struct dlm_lkb *lkb)
 {
-        if (ms->m_type != DLM_MSG_CONVERT_REPLY) {
-                log_print("munge_demoted %x invalid reply type %d",
-                          lkb->lkb_id, ms->m_type);
-                return;
-        }
        if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) {
                log_print("munge_demoted %x invalid modes gr %d rq %d",
                          lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode);
@@ -2844,12 +2924,12 @@ static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
        struct dlm_mhandle *mh;
        int to_nodeid, error;
-        error = add_to_waiters(lkb, mstype);
+        to_nodeid = r->res_nodeid;
+        error = add_to_waiters(lkb, mstype, to_nodeid);
        if (error)
                return error;
-        to_nodeid = r->res_nodeid;
        error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh);
        if (error)
                goto fail;
@@ -2880,9 +2960,9 @@ static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
        /* down conversions go without a reply from the master */
        if (!error && down_conversion(lkb)) {
                remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY);
+                r->res_ls->ls_stub_ms.m_flags = DLM_IFL_STUB_MS;
                r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
                r->res_ls->ls_stub_ms.m_result = 0;
-                r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags;
                __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms);
        }
@@ -2951,12 +3031,12 @@ static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb)
        struct dlm_mhandle *mh;
        int to_nodeid, error;
-        error = add_to_waiters(lkb, DLM_MSG_LOOKUP);
+        to_nodeid = dlm_dir_nodeid(r);
+        error = add_to_waiters(lkb, DLM_MSG_LOOKUP, to_nodeid);
        if (error)
                return error;
-        to_nodeid = dlm_dir_nodeid(r);
        error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh);
        if (error)
                goto fail;
@@ -3070,6 +3150,9 @@ static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms)
 static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
 {
+        if (ms->m_flags == DLM_IFL_STUB_MS)
+                return;
        lkb->lkb_sbflags = ms->m_sbflags;
        lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
                         (ms->m_flags & 0x0000FFFF);
@@ -3612,7 +3695,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
                /* convert was queued on remote master */
                receive_flags_reply(lkb, ms);
                if (is_demoted(lkb))
-                        munge_demoted(lkb, ms);
+                        munge_demoted(lkb);
                del_lkb(r, lkb);
                add_lkb(r, lkb, DLM_LKSTS_CONVERT);
                add_timeout(lkb);
@@ -3622,7 +3705,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
                /* convert was granted on remote master */
                receive_flags_reply(lkb, ms);
                if (is_demoted(lkb))
-                        munge_demoted(lkb, ms);
+                        munge_demoted(lkb);
                grant_lock_pc(r, lkb, ms);
                queue_cast(r, lkb, 0);
                break;
@@ -3996,15 +4079,17 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid)
        dlm_put_lockspace(ls);
 }
-static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
+static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb,
+                                   struct dlm_message *ms_stub)
 {
        if (middle_conversion(lkb)) {
                hold_lkb(lkb);
-                ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
+                memset(ms_stub, 0, sizeof(struct dlm_message));
-                ls->ls_stub_ms.m_result = -EINPROGRESS;
+                ms_stub->m_flags = DLM_IFL_STUB_MS;
-                ls->ls_stub_ms.m_flags = lkb->lkb_flags;
+                ms_stub->m_type = DLM_MSG_CONVERT_REPLY;
-                ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
+                ms_stub->m_result = -EINPROGRESS;
-                _receive_convert_reply(lkb, &ls->ls_stub_ms);
+                ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
+                _receive_convert_reply(lkb, ms_stub);
                /* Same special case as in receive_rcom_lock_args() */
                lkb->lkb_grmode = DLM_LOCK_IV;
@@ -4045,13 +4130,27 @@ static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb)
 void dlm_recover_waiters_pre(struct dlm_ls *ls)
 {
        struct dlm_lkb *lkb, *safe;
+        struct dlm_message *ms_stub;
        int wait_type, stub_unlock_result, stub_cancel_result;
+        ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message));
+        if (!ms_stub) {
+                log_error(ls, "dlm_recover_waiters_pre no mem");
+                return;
+        }
        mutex_lock(&ls->ls_waiters_mutex);
        list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) {
-                log_debug(ls, "pre recover waiter lkid %x type %d flags %x",
-                          lkb->lkb_id, lkb->lkb_wait_type, lkb->lkb_flags);
+                /* exclude debug messages about unlocks because there can be so
+                   many and they aren't very interesting */
+                if (lkb->lkb_wait_type != DLM_MSG_UNLOCK) {
+                        log_debug(ls, "recover_waiter %x nodeid %d "
+                                  "msg %d to %d", lkb->lkb_id, lkb->lkb_nodeid,
+                                  lkb->lkb_wait_type, lkb->lkb_wait_nodeid);
+                }
                /* all outstanding lookups, regardless of destination  will be
                   resent after recovery is done */
@@ -4097,26 +4196,28 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
                        break;
                case DLM_MSG_CONVERT:
-                        recover_convert_waiter(ls, lkb);
+                        recover_convert_waiter(ls, lkb, ms_stub);
                        break;
                case DLM_MSG_UNLOCK:
                        hold_lkb(lkb);
-                        ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY;
+                        memset(ms_stub, 0, sizeof(struct dlm_message));
-                        ls->ls_stub_ms.m_result = stub_unlock_result;
+                        ms_stub->m_flags = DLM_IFL_STUB_MS;
-                        ls->ls_stub_ms.m_flags = lkb->lkb_flags;
+                        ms_stub->m_type = DLM_MSG_UNLOCK_REPLY;
-                        ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
+                        ms_stub->m_result = stub_unlock_result;
-                        _receive_unlock_reply(lkb, &ls->ls_stub_ms);
+                        ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
+                        _receive_unlock_reply(lkb, ms_stub);
                        dlm_put_lkb(lkb);
                        break;
                case DLM_MSG_CANCEL:
                        hold_lkb(lkb);
-                        ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY;
+                        memset(ms_stub, 0, sizeof(struct dlm_message));
-                        ls->ls_stub_ms.m_result = stub_cancel_result;
+                        ms_stub->m_flags = DLM_IFL_STUB_MS;
-                        ls->ls_stub_ms.m_flags = lkb->lkb_flags;
+                        ms_stub->m_type = DLM_MSG_CANCEL_REPLY;
-                        ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid;
+                        ms_stub->m_result = stub_cancel_result;
-                        _receive_cancel_reply(lkb, &ls->ls_stub_ms);
+                        ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
+                        _receive_cancel_reply(lkb, ms_stub);
                        dlm_put_lkb(lkb);
                        break;
@@ -4127,6 +4228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
                schedule();
        }
        mutex_unlock(&ls->ls_waiters_mutex);
+        kfree(ms_stub);
 }
 static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
@@ -4191,8 +4293,8 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
                ou = is_overlap_unlock(lkb);
                err = 0;
-                log_debug(ls, "recover_waiters_post %x type %d flags %x %s",
+                log_debug(ls, "recover_waiter %x nodeid %d msg %d r_nodeid %d",
-                          lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name);
+                          lkb->lkb_id, lkb->lkb_nodeid, mstype, r->res_nodeid);
                /* At this point we assume that we won't get a reply to any
                   previous op or overlap op on this lock.  First, do a big
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 88e93c80cc22..265017a7c3e7 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -24,6 +24,7 @@ int dlm_put_lkb(struct dlm_lkb *lkb);
 void dlm_scan_rsbs(struct dlm_ls *ls);
 int dlm_lock_recovery_try(struct dlm_ls *ls);
 void dlm_unlock_recovery(struct dlm_ls *ls);
+void dlm_scan_waiters(struct dlm_ls *ls);
 void dlm_scan_timeout(struct dlm_ls *ls);
 void dlm_adjust_timeouts(struct dlm_ls *ls);
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index f994a7dfda85..14cbf4099753 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -243,7 +243,6 @@ static struct dlm_ls *find_ls_to_scan(void)
 static int dlm_scand(void *data)
 {
        struct dlm_ls *ls;
-        int timeout_jiffies = dlm_config.ci_scan_secs * HZ;
        while (!kthread_should_stop()) {
                ls = find_ls_to_scan();
@@ -252,13 +251,14 @@ static int dlm_scand(void *data)
                                ls->ls_scan_time = jiffies;
                                dlm_scan_rsbs(ls);
                                dlm_scan_timeout(ls);
+                                dlm_scan_waiters(ls);
                                dlm_unlock_recovery(ls);
                        } else {
                                ls->ls_scan_time += HZ;
                        }
-                } else {
+                        continue;
-                        schedule_timeout_interruptible(timeout_jiffies);
                }
+                schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
        }
        return 0;
 }
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 30d8b85febbf..e2b878004364 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -71,6 +71,36 @@ static void send_op(struct plock_op *op)
        wake_up(&send_wq);
 }
+/* If a process was killed while waiting for the only plock on a file,
+   locks_remove_posix will not see any lock on the file so it won't
+   send an unlock-close to us to pass on to userspace to clean up the
+   abandoned waiter.  So, we have to insert the unlock-close when the
+   lock call is interrupted. */
+static void do_unlock_close(struct dlm_ls *ls, u64 number,
+                            struct file *file, struct file_lock *fl)
+{
+        struct plock_op *op;
+        op = kzalloc(sizeof(*op), GFP_NOFS);
+        if (!op)
+                return;
+        op->info.optype         = DLM_PLOCK_OP_UNLOCK;
+        op->info.pid            = fl->fl_pid;
+        op->info.fsid           = ls->ls_global_id;
+        op->info.number         = number;
+        op->info.start          = 0;
+        op->info.end            = OFFSET_MAX;
+        if (fl->fl_lmops && fl->fl_lmops->fl_grant)
+                op->info.owner  = (__u64) fl->fl_pid;
+        else
+                op->info.owner  = (__u64)(long) fl->fl_owner;
+        op->info.flags |= DLM_PLOCK_FL_CLOSE;
+        send_op(op);
+}
 int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
                   int cmd, struct file_lock *fl)
 {
@@ -114,9 +144,19 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
        send_op(op);
-        if (xop->callback == NULL)
+        if (xop->callback == NULL) {
-                wait_event(recv_wq, (op->done != 0));
+                rv = wait_event_killable(recv_wq, (op->done != 0));
-        else {
+                if (rv == -ERESTARTSYS) {
+                        log_debug(ls, "dlm_posix_lock: wait killed %llx",
+                                  (unsigned long long)number);
+                        spin_lock(&ops_lock);
+                        list_del(&op->list);
+                        spin_unlock(&ops_lock);
+                        kfree(xop);
+                        do_unlock_close(ls, number, file, fl);
+                        goto out;
+                }
+        } else {
                rv = FILE_LOCK_DEFERRED;
                goto out;
        }
@@ -233,6 +273,13 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
        else
                op->info.owner  = (__u64)(long) fl->fl_owner;
+        if (fl->fl_flags & FL_CLOSE) {
+                op->info.flags |= DLM_PLOCK_FL_CLOSE;
+                send_op(op);
+                rv = 0;
+                goto out;
+        }
        send_op(op);
        wait_event(recv_wq, (op->done != 0));
@@ -334,7 +381,10 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
        spin_lock(&ops_lock);
        if (!list_empty(&send_list)) {
                op = list_entry(send_list.next, struct plock_op, list);
-                list_move(&op->list, &recv_list);
+                if (op->info.flags & DLM_PLOCK_FL_CLOSE)
+                        list_del(&op->list);
+                else
+                        list_move(&op->list, &recv_list);
                memcpy(&info, &op->info, sizeof(info));
        }
        spin_unlock(&ops_lock);
@@ -342,6 +392,13 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
        if (!op)
                return -EAGAIN;
+        /* there is no need to get a reply from userspace for unlocks
+           that were generated by the vfs cleaning up for a close
+           (the process did not make an unlock call). */
+        if (op->info.flags & DLM_PLOCK_FL_CLOSE)
+                kfree(op);
        if (copy_to_user(u, &info, sizeof(info)))
                return -EFAULT;
        return sizeof(info);
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index d5ab3fe7c198..e96bf3e9be88 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -611,7 +611,6 @@ static ssize_t device_write(struct file *file, const char __user *buf,
 out_sig:
        sigprocmask(SIG_SETMASK, &tmpsig, NULL);
-        recalc_sigpending();
 out_free:
        kfree(kbuf);
        return error;
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 98b77c89494c..c00e055b6282 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -40,9 +40,12 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
 static void drop_slab(void)
 {
        int nr_objects;
+        struct shrink_control shrink = {
+                .gfp_mask = GFP_KERNEL,
+        };
        do {
-                nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
+                nr_objects = shrink_slab(&shrink, 1000, 1000);
        } while (nr_objects > 10);
 }
diff --git a/fs/exec.c b/fs/exec.c
index 5e62d26a4fec..936f5776655c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
 #include <linux/fs_struct.h>
 #include <linux/pipe_fs_i.h>
 #include <linux/oom.h>
+#include <linux/compat.h>
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -166,8 +167,13 @@ out:
 }
 #ifdef CONFIG_MMU
+/*
-void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+ * The nascent bprm->mm is not visible until exec_mmap() but it can
+ * use a lot of memory, account these pages in current->mm temporary
+ * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
+ * change the counter back via acct_arg_size(0).
+ */
+static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
 {
        struct mm_struct *mm = current->mm;
        long diff = (long)(pages - bprm->vma_pages);
@@ -186,7 +192,7 @@ void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
 #endif
 }
-struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
                int write)
 {
        struct page *page;
@@ -194,7 +200,7 @@ struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 #ifdef CONFIG_STACK_GROWSUP
        if (write) {
-                ret = expand_stack_downwards(bprm->vma, pos);
+                ret = expand_downwards(bprm->vma, pos);
                if (ret < 0)
                        return NULL;
        }
@@ -305,11 +311,11 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
 #else
-void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
 {
 }
-struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
                int write)
 {
        struct page *page;
@@ -398,22 +404,56 @@ err:
        return err;
 }
+struct user_arg_ptr {
+#ifdef CONFIG_COMPAT
+        bool is_compat;
+#endif
+        union {
+                const char __user *const __user *native;
+#ifdef CONFIG_COMPAT
+                compat_uptr_t __user *compat;
+#endif
+        } ptr;
+};
+static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
+{
+        const char __user *native;
+#ifdef CONFIG_COMPAT
+        if (unlikely(argv.is_compat)) {
+                compat_uptr_t compat;
+                if (get_user(compat, argv.ptr.compat + nr))
+                        return ERR_PTR(-EFAULT);
+                return compat_ptr(compat);
+        }
+#endif
+        if (get_user(native, argv.ptr.native + nr))
+                return ERR_PTR(-EFAULT);
+        return native;
+}
 /*
 * count() counts the number of strings in array ARGV.
 */
-static int count(const char __user * const __user * argv, int max)
+static int count(struct user_arg_ptr argv, int max)
 {
        int i = 0;
-        if (argv != NULL) {
+        if (argv.ptr.native != NULL) {
                for (;;) {
-                        const char __user * p;
+                        const char __user *p = get_user_arg_ptr(argv, i);
-                        if (get_user(p, argv))
-                                return -EFAULT;
                        if (!p)
                                break;
-                        argv++;
+                        if (IS_ERR(p))
+                                return -EFAULT;
                        if (i++ >= max)
                                return -E2BIG;
@@ -430,7 +470,7 @@ static int count(const char __user * const __user * argv, int max)
 * processes's memory to the new process's stack.  The call to get_user_pages()
 * ensures the destination page is created and not swapped out.
 */
-static int copy_strings(int argc, const char __user *const __user *argv,
+static int copy_strings(int argc, struct user_arg_ptr argv,
                        struct linux_binprm *bprm)
 {
        struct page *kmapped_page = NULL;
@@ -443,16 +483,18 @@ static int copy_strings(int argc, const char __user *const __user *argv,
                int len;
                unsigned long pos;
-                if (get_user(str, argv+argc) ||
+                ret = -EFAULT;
-                                !(len = strnlen_user(str, MAX_ARG_STRLEN))) {
+                str = get_user_arg_ptr(argv, argc);
-                        ret = -EFAULT;
+                if (IS_ERR(str))
                        goto out;
-                }
-                if (!valid_arg_len(bprm, len)) {
+                len = strnlen_user(str, MAX_ARG_STRLEN);
-                        ret = -E2BIG;
+                if (!len)
+                        goto out;
+                ret = -E2BIG;
+                if (!valid_arg_len(bprm, len))
                        goto out;
-                }
                /* We're going to work our way backwords. */
                pos = bprm->p;
@@ -519,14 +561,19 @@ out:
 /*
 * Like copy_strings, but get argv and its values from kernel memory.
 */
-int copy_strings_kernel(int argc, const char *const *argv,
+int copy_strings_kernel(int argc, const char *const *__argv,
                        struct linux_binprm *bprm)
 {
        int r;
        mm_segment_t oldfs = get_fs();
+        struct user_arg_ptr argv = {
+                .ptr.native = (const char __user *const  __user *)__argv,
+        };
        set_fs(KERNEL_DS);
-        r = copy_strings(argc, (const char __user *const  __user *)argv, bprm);
+        r = copy_strings(argc, argv, bprm);
        set_fs(oldfs);
        return r;
 }
 EXPORT_SYMBOL(copy_strings_kernel);
@@ -553,7 +600,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
        unsigned long length = old_end - old_start;
        unsigned long new_start = old_start - shift;
        unsigned long new_end = old_end - shift;
-        struct mmu_gather *tlb;
+        struct mmu_gather tlb;
        BUG_ON(new_start > new_end);
@@ -579,12 +626,12 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
                return -ENOMEM;
        lru_add_drain();
-        tlb = tlb_gather_mmu(mm, 0);
+        tlb_gather_mmu(&tlb, mm, 0);
        if (new_end > old_start) {
                /*
                 * when the old and new regions overlap clear from new_end.
                 */
-                free_pgd_range(tlb, new_end, old_end, new_end,
+                free_pgd_range(&tlb, new_end, old_end, new_end,
                        vma->vm_next ? vma->vm_next->vm_start : 0);
        } else {
                /*
@@ -593,10 +640,10 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
                 * have constraints on va-space that make this illegal (IA64) -
                 * for the others its just a little faster.
                 */
-                free_pgd_range(tlb, old_start, old_end, new_end,
+                free_pgd_range(&tlb, old_start, old_end, new_end,
                        vma->vm_next ? vma->vm_next->vm_start : 0);
        }
-        tlb_finish_mmu(tlb, new_end, old_end);
+        tlb_finish_mmu(&tlb, new_end, old_end);
        /*
         * Shrink the vma to just the new range.  Always succeeds.
@@ -1004,6 +1051,7 @@ char *get_task_comm(char *buf, struct task_struct *tsk)
        task_unlock(tsk);
        return buf;
 }
+EXPORT_SYMBOL_GPL(get_task_comm);
 void set_task_comm(struct task_struct *tsk, char *buf)
 {
@@ -1379,10 +1427,10 @@ EXPORT_SYMBOL(search_binary_handler);
 /*
 * sys_execve() executes a new program.
 */
-int do_execve(const char * filename,
+static int do_execve_common(const char *filename,
-        const char __user *const __user *argv,
+                                struct user_arg_ptr argv,
-        const char __user *const __user *envp,
+                                struct user_arg_ptr envp,
-        struct pt_regs * regs)
+                                struct pt_regs *regs)
 {
        struct linux_binprm *bprm;
        struct file *file;
@@ -1489,6 +1537,34 @@ out_ret:
        return retval;
 }
+int do_execve(const char *filename,
+        const char __user *const __user *__argv,
+        const char __user *const __user *__envp,
+        struct pt_regs *regs)
+{
+        struct user_arg_ptr argv = { .ptr.native = __argv };
+        struct user_arg_ptr envp = { .ptr.native = __envp };
+        return do_execve_common(filename, argv, envp, regs);
+}
+#ifdef CONFIG_COMPAT
+int compat_do_execve(char *filename,
+        compat_uptr_t __user *__argv,
+        compat_uptr_t __user *__envp,
+        struct pt_regs *regs)
+{
+        struct user_arg_ptr argv = {
+                .is_compat = true,
+                .ptr.compat = __argv,
+        };
+        struct user_arg_ptr envp = {
+                .is_compat = true,
+                .ptr.compat = __envp,
+        };
+        return do_execve_common(filename, argv, envp, regs);
+}
+#endif
 void set_binfmt(struct linux_binfmt *new)
 {
        struct mm_struct *mm = current->mm;
@@ -1659,6 +1735,7 @@ static int zap_process(struct task_struct *start, int exit_code)
        t = start;
        do {
+                task_clear_group_stop_pending(t);
                if (t != current && t->mm) {
                        sigaddset(&t->pending.signal, SIGKILL);
                        signal_wake_up(t, 1);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 0a78dae7e2cb..1dd62ed35b85 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -898,7 +898,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
                brelse(bh);
                if (!sb_set_blocksize(sb, blocksize)) {
-                        ext2_msg(sb, KERN_ERR, "error: blocksize is too small");
+                        ext2_msg(sb, KERN_ERR,
+                                "error: bad blocksize %d", blocksize);
                        goto failed_sbi;
                }
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 32f3b8695859..34b6d9bfc48a 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1416,10 +1416,19 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
        frame->at = entries;
        frame->bh = bh;
        bh = bh2;
+        /*
+         * Mark buffers dirty here so that if do_split() fails we write a
+         * consistent set of buffers to disk.
+         */
+        ext3_journal_dirty_metadata(handle, frame->bh);
+        ext3_journal_dirty_metadata(handle, bh);
        de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
-        dx_release (frames);
+        if (!de) {
-        if (!(de))
+                ext3_mark_inode_dirty(handle, dir);
+                dx_release(frames);
                return retval;
+        }
+        dx_release(frames);
        return add_dirent_to_buf(handle, dentry, inode, de, bh);
 }
@@ -2189,6 +2198,7 @@ static int ext3_symlink (struct inode * dir,
        handle_t *handle;
        struct inode * inode;
        int l, err, retries = 0;
+        int credits;
        l = strlen(symname)+1;
        if (l > dir->i_sb->s_blocksize)
@@ -2196,10 +2206,26 @@ static int ext3_symlink (struct inode * dir,
        dquot_initialize(dir);
+        if (l > EXT3_N_BLOCKS * 4) {
+                /*
+                 * For non-fast symlinks, we just allocate inode and put it on
+                 * orphan list in the first transaction => we need bitmap,
+                 * group descriptor, sb, inode block, quota blocks.
+                 */
+                credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
+        } else {
+                /*
+                 * Fast symlink. We have to add entry to directory
+                 * (EXT3_DATA_TRANS_BLOCKS + EXT3_INDEX_EXTRA_TRANS_BLOCKS),
+                 * allocate new inode (bitmap, group descriptor, inode block,
+                 * quota blocks, sb is already counted in previous macros).
+                 */
+                credits = EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
+                          EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+                          EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
+        }
 retry:
-        handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
+        handle = ext3_journal_start(dir, credits);
-                                        EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
-                                        EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
@@ -2211,21 +2237,45 @@ retry:
        if (IS_ERR(inode))
                goto out_stop;
-        if (l > sizeof (EXT3_I(inode)->i_data)) {
+        if (l > EXT3_N_BLOCKS * 4) {
                inode->i_op = &ext3_symlink_inode_operations;
                ext3_set_aops(inode);
                /*
-                 * page_symlink() calls into ext3_prepare/commit_write.
+                 * We cannot call page_symlink() with transaction started
-                 * We have a transaction open.  All is sweetness.  It also sets
+                 * because it calls into ext3_write_begin() which acquires page
-                 * i_size in generic_commit_write().
+                 * lock which ranks below transaction start (and it can also
+                 * wait for journal commit if we are running out of space). So
+                 * we have to stop transaction now and restart it when symlink
+                 * contents is written. 
+                 *
+                 * To keep fs consistent in case of crash, we have to put inode
+                 * to orphan list in the mean time.
                 */
+                drop_nlink(inode);
+                err = ext3_orphan_add(handle, inode);
+                ext3_journal_stop(handle);
+                if (err)
+                        goto err_drop_inode;
                err = __page_symlink(inode, symname, l, 1);
+                if (err)
+                        goto err_drop_inode;
+                /*
+                 * Now inode is being linked into dir (EXT3_DATA_TRANS_BLOCKS
+                 * + EXT3_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
+                 */
+                handle = ext3_journal_start(dir,
+                                EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
+                                EXT3_INDEX_EXTRA_TRANS_BLOCKS + 1);
+                if (IS_ERR(handle)) {
+                        err = PTR_ERR(handle);
+                        goto err_drop_inode;
+                }
+                inc_nlink(inode);
+                err = ext3_orphan_del(handle, inode);
                if (err) {
+                        ext3_journal_stop(handle);
                        drop_nlink(inode);
-                        unlock_new_inode(inode);
+                        goto err_drop_inode;
-                        ext3_mark_inode_dirty(handle, inode);
-                        iput (inode);
-                        goto out_stop;
                }
        } else {
                inode->i_op = &ext3_fast_symlink_inode_operations;
@@ -2239,6 +2289,10 @@ out_stop:
        if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
                goto retry;
        return err;
+err_drop_inode:
+        unlock_new_inode(inode);
+        iput(inode);
+        return err;
 }
 static int ext3_link (struct dentry * old_dentry,
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index ae8200f84e39..1cc7038e273d 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -151,6 +151,13 @@ static void fat_cache_add(struct inode *inode, struct fat_cache_id *new)
                        spin_unlock(&MSDOS_I(inode)->cache_lru_lock);
                        tmp = fat_cache_alloc(inode);
+                        if (!tmp) {
+                                spin_lock(&MSDOS_I(inode)->cache_lru_lock);
+                                MSDOS_I(inode)->nr_caches--;
+                                spin_unlock(&MSDOS_I(inode)->cache_lru_lock);
+                                return;
+                        }
                        spin_lock(&MSDOS_I(inode)->cache_lru_lock);
                        cache = fat_cache_merge(inode, new);
                        if (cache != NULL) {
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index ee42b9e0b16a..4ad64732cbce 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -98,7 +98,7 @@ next:
        *bh = sb_bread(sb, phys);
        if (*bh == NULL) {
-                printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n",
+                fat_msg(sb, KERN_ERR, "Directory bread(block %llu) failed",
                       (llu)phys);
                /* skip this block */
                *pos = (iblock + 1) << sb->s_blocksize_bits;
@@ -136,9 +136,10 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos,
 * but ignore that right now.
 * Ahem... Stack smashing in ring 0 isn't fun. Fixed.
 */
-static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len,
+static int uni16_to_x8(struct super_block *sb, unsigned char *ascii,
-                       int uni_xlate, struct nls_table *nls)
+                       const wchar_t *uni, int len, struct nls_table *nls)
 {
+        int uni_xlate = MSDOS_SB(sb)->options.unicode_xlate;
        const wchar_t *ip;
        wchar_t ec;
        unsigned char *op;
@@ -166,23 +167,23 @@ static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len,
        }
        if (unlikely(*ip)) {
-                printk(KERN_WARNING "FAT: filename was truncated while "
+                fat_msg(sb, KERN_WARNING, "filename was truncated while "
-                       "converting.");
+                        "converting.");
        }
        *op = 0;
        return (op - ascii);
 }
-static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni,
+static inline int fat_uni_to_x8(struct super_block *sb, const wchar_t *uni,
                                unsigned char *buf, int size)
 {
+        struct msdos_sb_info *sbi = MSDOS_SB(sb);
        if (sbi->options.utf8)
                return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS,
                                UTF16_HOST_ENDIAN, buf, size);
        else
-                return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate,
+                return uni16_to_x8(sb, buf, uni, size, sbi->nls_io);
-                                   sbi->nls_io);
 }
 static inline int
@@ -419,7 +420,7 @@ parse_record:
                /* Compare shortname */
                bufuname[last_u] = 0x0000;
-                len = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
+                len = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname));
                if (fat_name_match(sbi, name, name_len, bufname, len))
                        goto found;
@@ -428,7 +429,7 @@ parse_record:
                        int size = PATH_MAX - FAT_MAX_UNI_SIZE;
                        /* Compare longname */
-                        len = fat_uni_to_x8(sbi, unicode, longname, size);
+                        len = fat_uni_to_x8(sb, unicode, longname, size);
                        if (fat_name_match(sbi, name, name_len, longname, len))
                                goto found;
                }
@@ -545,7 +546,7 @@ parse_record:
                if (nr_slots) {
                        void *longname = unicode + FAT_MAX_UNI_CHARS;
                        int size = PATH_MAX - FAT_MAX_UNI_SIZE;
-                        int len = fat_uni_to_x8(sbi, unicode, longname, size);
+                        int len = fat_uni_to_x8(sb, unicode, longname, size);
                        fill_name = longname;
                        fill_len = len;
@@ -621,7 +622,7 @@ parse_record:
        if (isvfat) {
                bufuname[j] = 0x0000;
-                i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
+                i = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname));
        }
        if (nr_slots) {
                /* hack for fat_ioctl_filldir() */
@@ -979,6 +980,7 @@ static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots)
 int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
 {
+        struct super_block *sb = dir->i_sb;
        struct msdos_dir_entry *de;
        struct buffer_head *bh;
        int err = 0, nr_slots;
@@ -1013,8 +1015,8 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
                 */
                err = __fat_remove_entries(dir, sinfo->slot_off, nr_slots);
                if (err) {
-                        printk(KERN_WARNING
+                        fat_msg(sb, KERN_WARNING,
-                               "FAT: Couldn't remove the long name slots\n");
+                               "Couldn't remove the long name slots");
                }
        }
@@ -1265,7 +1267,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
                if (sbi->fat_bits != 32)
                        goto error;
        } else if (MSDOS_I(dir)->i_start == 0) {
-                printk(KERN_ERR "FAT: Corrupted directory (i_pos %lld)\n",
+                fat_msg(sb, KERN_ERR, "Corrupted directory (i_pos %lld)",
                       MSDOS_I(dir)->i_pos);
                err = -EIO;
                goto error;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index f50408901f7e..8276cc282dec 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -319,19 +319,20 @@ extern struct inode *fat_build_inode(struct super_block *sb,
                        struct msdos_dir_entry *de, loff_t i_pos);
 extern int fat_sync_inode(struct inode *inode);
 extern int fat_fill_super(struct super_block *sb, void *data, int silent,
-                        const struct inode_operations *fs_dir_inode_ops,
+                          int isvfat, void (*setup)(struct super_block *));
-                        int isvfat, void (*setup)(struct super_block *));
 extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
                            struct inode *i2);
 /* fat/misc.c */
 extern void
-__fat_fs_error(struct super_block *s, int report, const char *fmt, ...)
+__fat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
+        __attribute__ ((format (printf, 3, 4))) __cold;
+#define fat_fs_error(sb, fmt, args...)          \
+        __fat_fs_error(sb, 1, fmt , ## args)
+#define fat_fs_error_ratelimit(sb, fmt, args...) \
+        __fat_fs_error(sb, __ratelimit(&MSDOS_SB(sb)->ratelimit), fmt , ## args)
+void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...)
        __attribute__ ((format (printf, 3, 4))) __cold;
-#define fat_fs_error(s, fmt, args...)           \
-        __fat_fs_error(s, 1, fmt , ## args)
-#define fat_fs_error_ratelimit(s, fmt, args...) \
-        __fat_fs_error(s, __ratelimit(&MSDOS_SB(s)->ratelimit), fmt , ## args)
 extern int fat_clusters_flush(struct super_block *sb);
 extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
 extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index b47d2c9f4fa1..2e81ac0df7e2 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -95,7 +95,7 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent,
 err_brelse:
        brelse(bhs[0]);
 err:
-        printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", (llu)blocknr);
+        fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr);
        return -EIO;
 }
@@ -108,7 +108,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent,
        fatent->fat_inode = MSDOS_SB(sb)->fat_inode;
        fatent->bhs[0] = sb_bread(sb, blocknr);
        if (!fatent->bhs[0]) {
-                printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n",
+                fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)",
                       (llu)blocknr);
                return -EIO;
        }
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 8d68690bdcf1..cb8d8391ac0b 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -581,7 +581,8 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_bavail = sbi->free_clusters;
        buf->f_fsid.val[0] = (u32)id;
        buf->f_fsid.val[1] = (u32)(id >> 32);
-        buf->f_namelen = sbi->options.isvfat ? FAT_LFN_LEN : 12;
+        buf->f_namelen =
+                (sbi->options.isvfat ? FAT_LFN_LEN : 12) * NLS_MAX_CHARSET_SIZE;
        return 0;
 }
@@ -619,8 +620,8 @@ retry:
        bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
        if (!bh) {
-                printk(KERN_ERR "FAT: unable to read inode block "
+                fat_msg(sb, KERN_ERR, "unable to read inode block "
-                       "for updating (i_pos %lld)\n", i_pos);
+                       "for updating (i_pos %lld)", i_pos);
                return -EIO;
        }
        spin_lock(&sbi->inode_hash_lock);
@@ -976,8 +977,8 @@ static const match_table_t vfat_tokens = {
        {Opt_err, NULL}
 };
-static int parse_options(char *options, int is_vfat, int silent, int *debug,
+static int parse_options(struct super_block *sb, char *options, int is_vfat,
-                         struct fat_mount_options *opts)
+                         int silent, int *debug, struct fat_mount_options *opts)
 {
        char *p;
        substring_t args[MAX_OPT_ARGS];
@@ -1168,15 +1169,15 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
                /* obsolete mount options */
                case Opt_obsolate:
-                        printk(KERN_INFO "FAT: \"%s\" option is obsolete, "
+                        fat_msg(sb, KERN_INFO, "\"%s\" option is obsolete, "
-                               "not supported now\n", p);
+                               "not supported now", p);
                        break;
                /* unknown option */
                default:
                        if (!silent) {
-                                printk(KERN_ERR
+                                fat_msg(sb, KERN_ERR,
-                                       "FAT: Unrecognized mount option \"%s\" "
+                                       "Unrecognized mount option \"%s\" "
-                                       "or missing value\n", p);
+                                       "or missing value", p);
                        }
                        return -EINVAL;
                }
@@ -1185,7 +1186,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
 out:
        /* UTF-8 doesn't provide FAT semantics */
        if (!strcmp(opts->iocharset, "utf8")) {
-                printk(KERN_ERR "FAT: utf8 is not a recommended IO charset"
+                fat_msg(sb, KERN_ERR, "utf8 is not a recommended IO charset"
                       " for FAT filesystems, filesystem will be "
                       "case sensitive!\n");
        }
@@ -1238,8 +1239,7 @@ static int fat_read_root(struct inode *inode)
 /*
 * Read the super block of an MS-DOS FS.
 */
-int fat_fill_super(struct super_block *sb, void *data, int silent,
+int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
-                   const struct inode_operations *fs_dir_inode_ops, int isvfat,
                   void (*setup)(struct super_block *))
 {
        struct inode *root_inode = NULL, *fat_inode = NULL;
@@ -1268,11 +1268,10 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
        sb->s_magic = MSDOS_SUPER_MAGIC;
        sb->s_op = &fat_sops;
        sb->s_export_op = &fat_export_ops;
-        sbi->dir_ops = fs_dir_inode_ops;
        ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL,
                             DEFAULT_RATELIMIT_BURST);
-        error = parse_options(data, isvfat, silent, &debug, &sbi->options);
+        error = parse_options(sb, data, isvfat, silent, &debug, &sbi->options);
        if (error)
                goto out_fail;
@@ -1282,20 +1281,20 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
        sb_min_blocksize(sb, 512);
        bh = sb_bread(sb, 0);
        if (bh == NULL) {
-                printk(KERN_ERR "FAT: unable to read boot sector\n");
+                fat_msg(sb, KERN_ERR, "unable to read boot sector");
                goto out_fail;
        }
        b = (struct fat_boot_sector *) bh->b_data;
        if (!b->reserved) {
                if (!silent)
-                        printk(KERN_ERR "FAT: bogus number of reserved sectors\n");
+                        fat_msg(sb, KERN_ERR, "bogus number of reserved sectors");
                brelse(bh);
                goto out_invalid;
        }
        if (!b->fats) {
                if (!silent)
-                        printk(KERN_ERR "FAT: bogus number of FAT structure\n");
+                        fat_msg(sb, KERN_ERR, "bogus number of FAT structure");
                brelse(bh);
                goto out_invalid;
        }
@@ -1308,7 +1307,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
        media = b->media;
        if (!fat_valid_media(media)) {
                if (!silent)
-                        printk(KERN_ERR "FAT: invalid media value (0x%02x)\n",
+                        fat_msg(sb, KERN_ERR, "invalid media value (0x%02x)",
                               media);
                brelse(bh);
                goto out_invalid;
@@ -1318,7 +1317,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
            || (logical_sector_size < 512)
            || (logical_sector_size > 4096)) {
                if (!silent)
-                        printk(KERN_ERR "FAT: bogus logical sector size %u\n",
+                        fat_msg(sb, KERN_ERR, "bogus logical sector size %u",
                               logical_sector_size);
                brelse(bh);
                goto out_invalid;
@@ -1326,15 +1325,15 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
        sbi->sec_per_clus = b->sec_per_clus;
        if (!is_power_of_2(sbi->sec_per_clus)) {
                if (!silent)
-                        printk(KERN_ERR "FAT: bogus sectors per cluster %u\n",
+                        fat_msg(sb, KERN_ERR, "bogus sectors per cluster %u",
                               sbi->sec_per_clus);
                brelse(bh);
                goto out_invalid;
        }
        if (logical_sector_size < sb->s_blocksize) {
-                printk(KERN_ERR "FAT: logical sector size too small for device"
+                fat_msg(sb, KERN_ERR, "logical sector size too small for device"
-                       " (logical sector size = %u)\n", logical_sector_size);
+                       " (logical sector size = %u)", logical_sector_size);
                brelse(bh);
                goto out_fail;
        }
@@ -1342,14 +1341,14 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
                brelse(bh);
                if (!sb_set_blocksize(sb, logical_sector_size)) {
-                        printk(KERN_ERR "FAT: unable to set blocksize %u\n",
+                        fat_msg(sb, KERN_ERR, "unable to set blocksize %u",
                               logical_sector_size);
                        goto out_fail;
                }
                bh = sb_bread(sb, 0);
                if (bh == NULL) {
-                        printk(KERN_ERR "FAT: unable to read boot sector"
+                        fat_msg(sb, KERN_ERR, "unable to read boot sector"
-                               " (logical sector size = %lu)\n",
+                               " (logical sector size = %lu)",
                               sb->s_blocksize);
                        goto out_fail;
                }
@@ -1385,16 +1384,16 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
                fsinfo_bh = sb_bread(sb, sbi->fsinfo_sector);
                if (fsinfo_bh == NULL) {
-                        printk(KERN_ERR "FAT: bread failed, FSINFO block"
+                        fat_msg(sb, KERN_ERR, "bread failed, FSINFO block"
-                               " (sector = %lu)\n", sbi->fsinfo_sector);
+                               " (sector = %lu)", sbi->fsinfo_sector);
                        brelse(bh);
                        goto out_fail;
                }
                fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data;
                if (!IS_FSINFO(fsinfo)) {
-                        printk(KERN_WARNING "FAT: Invalid FSINFO signature: "
+                        fat_msg(sb, KERN_WARNING, "Invalid FSINFO signature: "
-                               "0x%08x, 0x%08x (sector = %lu)\n",
+                               "0x%08x, 0x%08x (sector = %lu)",
                               le32_to_cpu(fsinfo->signature1),
                               le32_to_cpu(fsinfo->signature2),
                               sbi->fsinfo_sector);
@@ -1415,8 +1414,8 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
        sbi->dir_entries = get_unaligned_le16(&b->dir_entries);
        if (sbi->dir_entries & (sbi->dir_per_block - 1)) {
                if (!silent)
-                        printk(KERN_ERR "FAT: bogus directroy-entries per block"
+                        fat_msg(sb, KERN_ERR, "bogus directroy-entries per block"
-                               " (%u)\n", sbi->dir_entries);
+                               " (%u)", sbi->dir_entries);
                brelse(bh);
                goto out_invalid;
        }
@@ -1438,7 +1437,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
        total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT);
        if (total_clusters > MAX_FAT(sb)) {
                if (!silent)
-                        printk(KERN_ERR "FAT: count of clusters too big (%u)\n",
+                        fat_msg(sb, KERN_ERR, "count of clusters too big (%u)",
                               total_clusters);
                brelse(bh);
                goto out_invalid;
@@ -1471,7 +1470,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
        sprintf(buf, "cp%d", sbi->options.codepage);
        sbi->nls_disk = load_nls(buf);
        if (!sbi->nls_disk) {
-                printk(KERN_ERR "FAT: codepage %s not found\n", buf);
+                fat_msg(sb, KERN_ERR, "codepage %s not found", buf);
                goto out_fail;
        }
@@ -1479,7 +1478,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
        if (sbi->options.isvfat) {
                sbi->nls_io = load_nls(sbi->options.iocharset);
                if (!sbi->nls_io) {
-                        printk(KERN_ERR "FAT: IO charset %s not found\n",
+                        fat_msg(sb, KERN_ERR, "IO charset %s not found",
                               sbi->options.iocharset);
                        goto out_fail;
                }
@@ -1503,7 +1502,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
        insert_inode_hash(root_inode);
        sb->s_root = d_alloc_root(root_inode);
        if (!sb->s_root) {
-                printk(KERN_ERR "FAT: get root inode failed\n");
+                fat_msg(sb, KERN_ERR, "get root inode failed");
                goto out_fail;
        }
@@ -1512,8 +1511,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
 out_invalid:
        error = -EINVAL;
        if (!silent)
-                printk(KERN_INFO "VFS: Can't find a valid FAT filesystem"
+                fat_msg(sb, KERN_INFO, "Can't find a valid FAT filesystem");
-                       " on dev %s.\n", sb->s_id);
 out_fail:
        if (fat_inode)
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 970e682ea754..6d93360ca0cc 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -20,30 +20,46 @@
 * In case the file system is remounted read-only, it can be made writable
 * again by remounting it.
 */
-void __fat_fs_error(struct super_block *s, int report, const char *fmt, ...)
+void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
 {
-        struct fat_mount_options *opts = &MSDOS_SB(s)->options;
+        struct fat_mount_options *opts = &MSDOS_SB(sb)->options;
        va_list args;
+        struct va_format vaf;
        if (report) {
-                printk(KERN_ERR "FAT: Filesystem error (dev %s)\n", s->s_id);
-                printk(KERN_ERR "    ");
                va_start(args, fmt);
-                vprintk(fmt, args);
+                vaf.fmt = fmt;
+                vaf.va = &args;
+                printk(KERN_ERR "FAT-fs (%s): error, %pV\n", sb->s_id, &vaf);
                va_end(args);
-                printk("\n");
        }
        if (opts->errors == FAT_ERRORS_PANIC)
-                panic("FAT: fs panic from previous error\n");
+                panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id);
-        else if (opts->errors == FAT_ERRORS_RO && !(s->s_flags & MS_RDONLY)) {
+        else if (opts->errors == FAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) {
-                s->s_flags |= MS_RDONLY;
+                sb->s_flags |= MS_RDONLY;
-                printk(KERN_ERR "FAT: Filesystem has been set read-only\n");
+                printk(KERN_ERR "FAT-fs (%s): Filesystem has been "
+                                "set read-only\n", sb->s_id);
        }
 }
 EXPORT_SYMBOL_GPL(__fat_fs_error);
+/**
+ * fat_msg() - print preformated FAT specific messages. Every thing what is
+ * not fat_fs_error() should be fat_msg().
+ */
+void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...)
+{
+        struct va_format vaf;
+        va_list args;
+        va_start(args, fmt);
+        vaf.fmt = fmt;
+        vaf.va = &args;
+        printk("%sFAT-fs (%s): %pV\n", level, sb->s_id, &vaf);
+        va_end(args);
+}
 /* Flushes the number of free clusters on FAT32 */
 /* XXX: Need to write one per FSINFO block.  Currently only writes 1 */
 int fat_clusters_flush(struct super_block *sb)
@@ -57,15 +73,15 @@ int fat_clusters_flush(struct super_block *sb)
        bh = sb_bread(sb, sbi->fsinfo_sector);
        if (bh == NULL) {
-                printk(KERN_ERR "FAT: bread failed in fat_clusters_flush\n");
+                fat_msg(sb, KERN_ERR, "bread failed in fat_clusters_flush");
                return -EIO;
        }
        fsinfo = (struct fat_boot_fsinfo *)bh->b_data;
        /* Sanity check */
        if (!IS_FSINFO(fsinfo)) {
-                printk(KERN_ERR "FAT: Invalid FSINFO signature: "
+                fat_msg(sb, KERN_ERR, "Invalid FSINFO signature: "
-                       "0x%08x, 0x%08x (sector = %lu)\n",
+                       "0x%08x, 0x%08x (sector = %lu)",
                       le32_to_cpu(fsinfo->signature1),
                       le32_to_cpu(fsinfo->signature2),
                       sbi->fsinfo_sector);
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index c3eccbd02037..be15437c272e 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -664,14 +664,14 @@ static const struct inode_operations msdos_dir_inode_operations = {
 static void setup(struct super_block *sb)
 {
+        MSDOS_SB(sb)->dir_ops = &msdos_dir_inode_operations;
        sb->s_d_op = &msdos_dentry_operations;
        sb->s_flags |= MS_NOATIME;
 }
 static int msdos_fill_super(struct super_block *sb, void *data, int silent)
 {
-        return fat_fill_super(sb, data, silent, &msdos_dir_inode_operations,
+        return fat_fill_super(sb, data, silent, 0, setup);
-                             0, setup);
 }
 static struct dentry *msdos_mount(struct file_system_type *fs_type,
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index e2466b2f8cf2..c61a6789f36c 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -1070,6 +1070,7 @@ static const struct inode_operations vfat_dir_inode_operations = {
 static void setup(struct super_block *sb)
 {
+        MSDOS_SB(sb)->dir_ops = &vfat_dir_inode_operations;
        if (MSDOS_SB(sb)->options.name_check != 's')
                sb->s_d_op = &vfat_ci_dentry_ops;
        else
@@ -1078,8 +1079,7 @@ static void setup(struct super_block *sb)
 static int vfat_fill_super(struct super_block *sb, void *data, int silent)
 {
-        return fat_fill_super(sb, data, silent, &vfat_dir_inode_operations,
+        return fat_fill_super(sb, data, silent, 1, setup);
-                             1, setup);
 }
 static struct dentry *vfat_mount(struct file_system_type *fs_type,
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 2ba6719ac612..1a4311437a8b 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -272,7 +272,7 @@ vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip)
 * *ip:                 VFS inode
 *
 * Description:
- *  vxfs_put_fake_inode frees all data asssociated with @ip.
+ *  vxfs_put_fake_inode frees all data associated with @ip.
 */
 void
 vxfs_put_fake_inode(struct inode *ip)
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 48a18f184d50..30afdfa7aec7 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -33,8 +33,6 @@ void fscache_enqueue_operation(struct fscache_operation *op)
        _enter("{OBJ%x OP%x,%u}",
               op->object->debug_id, op->debug_id, atomic_read(&op->usage));
-        fscache_set_op_state(op, "EnQ");
        ASSERT(list_empty(&op->pend_link));
        ASSERT(op->processor != NULL);
        ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE);
@@ -66,8 +64,6 @@ EXPORT_SYMBOL(fscache_enqueue_operation);
 static void fscache_run_op(struct fscache_object *object,
                           struct fscache_operation *op)
 {
-        fscache_set_op_state(op, "Run");
        object->n_in_progress++;
        if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
                wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
@@ -88,8 +84,6 @@ int fscache_submit_exclusive_op(struct fscache_object *object,
        _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id);
-        fscache_set_op_state(op, "SubmitX");
        spin_lock(&object->lock);
        ASSERTCMP(object->n_ops, >=, object->n_in_progress);
        ASSERTCMP(object->n_ops, >=, object->n_exclusive);
@@ -194,8 +188,6 @@ int fscache_submit_op(struct fscache_object *object,
        ASSERTCMP(atomic_read(&op->usage), >, 0);
-        fscache_set_op_state(op, "Submit");
        spin_lock(&object->lock);
        ASSERTCMP(object->n_ops, >=, object->n_in_progress);
        ASSERTCMP(object->n_ops, >=, object->n_exclusive);
@@ -335,8 +327,6 @@ void fscache_put_operation(struct fscache_operation *op)
        if (!atomic_dec_and_test(&op->usage))
                return;
-        fscache_set_op_state(op, "Put");
        _debug("PUT OP");
        if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags))
                BUG();
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 41c441c2058d..a2a5d19ece6a 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -155,11 +155,9 @@ static void fscache_attr_changed_op(struct fscache_operation *op)
        fscache_stat(&fscache_n_attr_changed_calls);
        if (fscache_object_is_active(object)) {
-                fscache_set_op_state(op, "CallFS");
                fscache_stat(&fscache_n_cop_attr_changed);
                ret = object->cache->ops->attr_changed(object);
                fscache_stat_d(&fscache_n_cop_attr_changed);
-                fscache_set_op_state(op, "Done");
                if (ret < 0)
                        fscache_abort_object(object);
        }
@@ -190,7 +188,6 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
        fscache_operation_init(op, fscache_attr_changed_op, NULL);
        op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE);
-        fscache_set_op_name(op, "Attr");
        spin_lock(&cookie->lock);
@@ -257,7 +254,6 @@ static struct fscache_retrieval *fscache_alloc_retrieval(
        op->context     = context;
        op->start_time  = jiffies;
        INIT_LIST_HEAD(&op->to_do);
-        fscache_set_op_name(&op->op, "Retr");
        return op;
 }
@@ -368,7 +364,6 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
                _leave(" = -ENOMEM");
                return -ENOMEM;
        }
-        fscache_set_op_name(&op->op, "RetrRA1");
        spin_lock(&cookie->lock);
@@ -487,7 +482,6 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
        op = fscache_alloc_retrieval(mapping, end_io_func, context);
        if (!op)
                return -ENOMEM;
-        fscache_set_op_name(&op->op, "RetrRAN");
        spin_lock(&cookie->lock);
@@ -589,7 +583,6 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
        op = fscache_alloc_retrieval(page->mapping, NULL, NULL);
        if (!op)
                return -ENOMEM;
-        fscache_set_op_name(&op->op, "RetrAL1");
        spin_lock(&cookie->lock);
@@ -662,8 +655,6 @@ static void fscache_write_op(struct fscache_operation *_op)
        _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage));
-        fscache_set_op_state(&op->op, "GetPage");
        spin_lock(&object->lock);
        cookie = object->cookie;
@@ -698,15 +689,12 @@ static void fscache_write_op(struct fscache_operation *_op)
        spin_unlock(&cookie->stores_lock);
        spin_unlock(&object->lock);
-        fscache_set_op_state(&op->op, "Store");
        fscache_stat(&fscache_n_store_pages);
        fscache_stat(&fscache_n_cop_write_page);
        ret = object->cache->ops->write_page(op, page);
        fscache_stat_d(&fscache_n_cop_write_page);
-        fscache_set_op_state(&op->op, "EndWrite");
        fscache_end_page_write(object, page);
        if (ret < 0) {
-                fscache_set_op_state(&op->op, "Abort");
                fscache_abort_object(object);
        } else {
                fscache_enqueue_operation(&op->op);
@@ -778,7 +766,6 @@ int __fscache_write_page(struct fscache_cookie *cookie,
        fscache_operation_init(&op->op, fscache_write_op,
                               fscache_release_write_op);
        op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING);
-        fscache_set_op_name(&op->op, "Write1");
        ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
        if (ret < 0)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index e462a7a281bf..0d0e3faddcfa 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -174,7 +174,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
                if (!inode)
                        return 0;
-                if (nd->flags & LOOKUP_RCU)
+                if (nd && (nd->flags & LOOKUP_RCU))
                        return -ECHILD;
                fc = get_fuse_conn(inode);
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index f3d23ef4e876..86128202384f 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,9 +1,9 @@
 ccflags-y := -I$(src)
 obj-$(CONFIG_GFS2_FS) += gfs2.o
 gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \
-        glops.o inode.o log.o lops.o main.o meta_io.o \
+        glops.o log.o lops.o main.o meta_io.o \
        aops.o dentry.o export.o file.o \
-        ops_fstype.o ops_inode.o quota.o \
+        ops_fstype.o inode.o quota.o \
        recovery.o rgrp.o super.o sys.o trans.o util.o
 gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 0f5c4f9d5d62..802ac5eeba28 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1076,8 +1076,8 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
                bd = bh->b_private;
                if (bd && bd->bd_ail)
                        goto cannot_release;
-                gfs2_assert_warn(sdp, !buffer_pinned(bh));
+                if (buffer_pinned(bh) || buffer_dirty(bh))
-                gfs2_assert_warn(sdp, !buffer_dirty(bh));
+                        goto not_possible;
                bh = bh->b_this_page;
        } while(bh != head);
        gfs2_log_unlock(sdp);
@@ -1107,6 +1107,10 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
        } while (bh != head);
        return try_to_free_buffers(page);
+not_possible: /* Should never happen */
+        WARN_ON(buffer_dirty(bh));
+        WARN_ON(buffer_pinned(bh));
 cannot_release:
        gfs2_log_unlock(sdp);
        return 0;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 74add2ddcc3f..e65493a8ac00 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -780,6 +780,8 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
        metadata = (height != ip->i_height - 1);
        if (metadata)
                revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
+        else if (ip->i_depth)
+                revokes = sdp->sd_inptrs;
        if (ip != GFS2_I(sdp->sd_rindex))
                error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index f789c5732b7c..091ee4779538 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -82,12 +82,9 @@
 struct qstr gfs2_qdot __read_mostly;
 struct qstr gfs2_qdotdot __read_mostly;
-typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len,
-                            u64 leaf_no, void *data);
 typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent,
                            const struct qstr *name, void *opaque);
 int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
                            struct buffer_head **bhp)
 {
@@ -1600,7 +1597,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
 */
 int gfs2_dir_add(struct inode *inode, const struct qstr *name,
-                 const struct gfs2_inode *nip, unsigned type)
+                 const struct gfs2_inode *nip)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
        struct buffer_head *bh;
@@ -1616,7 +1613,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
                                return PTR_ERR(dent);
                        dent = gfs2_init_dirent(inode, dent, name, bh);
                        gfs2_inum_out(nip, dent);
-                        dent->de_type = cpu_to_be16(type);
+                        dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode));
                        if (ip->i_diskflags & GFS2_DIF_EXHASH) {
                                leaf = (struct gfs2_leaf *)bh->b_data;
                                be16_add_cpu(&leaf->lf_entries, 1);
@@ -1628,6 +1625,8 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
                        gfs2_trans_add_bh(ip->i_gl, bh, 1);
                        ip->i_entries++;
                        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
+                        if (S_ISDIR(nip->i_inode.i_mode))
+                                inc_nlink(&ip->i_inode);
                        gfs2_dinode_out(ip, bh->b_data);
                        brelse(bh);
                        error = 0;
@@ -1672,8 +1671,9 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
 * Returns: 0 on success, error code on failure
 */
-int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
+int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
 {
+        const struct qstr *name = &dentry->d_name;
        struct gfs2_dirent *dent, *prev = NULL;
        struct buffer_head *bh;
        int error;
@@ -1714,6 +1714,8 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
        gfs2_trans_add_bh(dip->i_gl, bh, 1);
        dip->i_entries--;
        dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
+        if (S_ISDIR(dentry->d_inode->i_mode))
+                drop_nlink(&dip->i_inode);
        gfs2_dinode_out(dip, bh->b_data);
        brelse(bh);
        mark_inode_dirty(&dip->i_inode);
@@ -1768,94 +1770,20 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
 }
 /**
- * foreach_leaf - call a function for each leaf in a directory
- * @dip: the directory
- * @lc: the function to call for each each
- * @data: private data to pass to it
- *
- * Returns: errno
- */
-static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
-{
-        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
-        struct buffer_head *bh;
-        struct gfs2_leaf *leaf;
-        u32 hsize, len;
-        u32 ht_offset, lp_offset, ht_offset_cur = -1;
-        u32 index = 0;
-        __be64 *lp;
-        u64 leaf_no;
-        int error = 0;
-        hsize = 1 << dip->i_depth;
-        if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
-                gfs2_consist_inode(dip);
-                return -EIO;
-        }
-        lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
-        if (!lp)
-                return -ENOMEM;
-        while (index < hsize) {
-                lp_offset = index & (sdp->sd_hash_ptrs - 1);
-                ht_offset = index - lp_offset;
-                if (ht_offset_cur != ht_offset) {
-                        error = gfs2_dir_read_data(dip, (char *)lp,
-                                                ht_offset * sizeof(__be64),
-                                                sdp->sd_hash_bsize, 1);
-                        if (error != sdp->sd_hash_bsize) {
-                                if (error >= 0)
-                                        error = -EIO;
-                                goto out;
-                        }
-                        ht_offset_cur = ht_offset;
-                }
-                leaf_no = be64_to_cpu(lp[lp_offset]);
-                if (leaf_no) {
-                        error = get_leaf(dip, leaf_no, &bh);
-                        if (error)
-                                goto out;
-                        leaf = (struct gfs2_leaf *)bh->b_data;
-                        len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
-                        brelse(bh);
-                        error = lc(dip, index, len, leaf_no, data);
-                        if (error)
-                                goto out;
-                        index = (index & ~(len - 1)) + len;
-                } else
-                        index++;
-        }
-        if (index != hsize) {
-                gfs2_consist_inode(dip);
-                error = -EIO;
-        }
-out:
-        kfree(lp);
-        return error;
-}
-/**
 * leaf_dealloc - Deallocate a directory leaf
 * @dip: the directory
 * @index: the hash table offset in the directory
 * @len: the number of pointers to this leaf
 * @leaf_no: the leaf number
- * @data: not used
+ * @leaf_bh: buffer_head for the starting leaf
+ * last_dealloc: 1 if this is the final dealloc for the leaf, else 0
 *
 * Returns: errno
 */
 static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
-                        u64 leaf_no, void *data)
+                        u64 leaf_no, struct buffer_head *leaf_bh,
+                        int last_dealloc)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
        struct gfs2_leaf *tmp_leaf;
@@ -1887,14 +1815,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
                goto out_qs;
        /*  Count the number of leaves  */
+        bh = leaf_bh;
        for (blk = leaf_no; blk; blk = nblk) {
-                error = get_leaf(dip, blk, &bh);
+                if (blk != leaf_no) {
-                if (error)
+                        error = get_leaf(dip, blk, &bh);
-                        goto out_rlist;
+                        if (error)
+                                goto out_rlist;
+                }
                tmp_leaf = (struct gfs2_leaf *)bh->b_data;
                nblk = be64_to_cpu(tmp_leaf->lf_next);
-                brelse(bh);
+                if (blk != leaf_no)
+                        brelse(bh);
                gfs2_rlist_add(sdp, &rlist, blk);
                l_blocks++;
@@ -1918,13 +1850,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
        if (error)
                goto out_rg_gunlock;
+        bh = leaf_bh;
        for (blk = leaf_no; blk; blk = nblk) {
-                error = get_leaf(dip, blk, &bh);
+                if (blk != leaf_no) {
-                if (error)
+                        error = get_leaf(dip, blk, &bh);
-                        goto out_end_trans;
+                        if (error)
+                                goto out_end_trans;
+                }
                tmp_leaf = (struct gfs2_leaf *)bh->b_data;
                nblk = be64_to_cpu(tmp_leaf->lf_next);
-                brelse(bh);
+                if (blk != leaf_no)
+                        brelse(bh);
                gfs2_free_meta(dip, blk, 1);
                gfs2_add_inode_blocks(&dip->i_inode, -1);
@@ -1942,6 +1879,10 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
                goto out_end_trans;
        gfs2_trans_add_bh(dip->i_gl, dibh, 1);
+        /* On the last dealloc, make this a regular file in case we crash.
+           (We don't want to free these blocks a second time.)  */
+        if (last_dealloc)
+                dip->i_inode.i_mode = S_IFREG;
        gfs2_dinode_out(dip, dibh->b_data);
        brelse(dibh);
@@ -1975,29 +1916,67 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
        struct buffer_head *bh;
-        int error;
+        struct gfs2_leaf *leaf;
+        u32 hsize, len;
+        u32 ht_offset, lp_offset, ht_offset_cur = -1;
+        u32 index = 0, next_index;
+        __be64 *lp;
+        u64 leaf_no;
+        int error = 0, last;
-        /* Dealloc on-disk leaves to FREEMETA state */
+        hsize = 1 << dip->i_depth;
-        error = foreach_leaf(dip, leaf_dealloc, NULL);
+        if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
-        if (error)
+                gfs2_consist_inode(dip);
-                return error;
+                return -EIO;
+        }
-        /* Make this a regular file in case we crash.
+        lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
-           (We don't want to free these blocks a second time.)  */
+        if (!lp)
+                return -ENOMEM;
-        error = gfs2_trans_begin(sdp, RES_DINODE, 0);
+        while (index < hsize) {
-        if (error)
+                lp_offset = index & (sdp->sd_hash_ptrs - 1);
-                return error;
+                ht_offset = index - lp_offset;
-        error = gfs2_meta_inode_buffer(dip, &bh);
+                if (ht_offset_cur != ht_offset) {
-        if (!error) {
+                        error = gfs2_dir_read_data(dip, (char *)lp,
-                gfs2_trans_add_bh(dip->i_gl, bh, 1);
+                                                ht_offset * sizeof(__be64),
-                ((struct gfs2_dinode *)bh->b_data)->di_mode =
+                                                sdp->sd_hash_bsize, 1);
-                                                cpu_to_be32(S_IFREG);
+                        if (error != sdp->sd_hash_bsize) {
-                brelse(bh);
+                                if (error >= 0)
+                                        error = -EIO;
+                                goto out;
+                        }
+                        ht_offset_cur = ht_offset;
+                }
+                leaf_no = be64_to_cpu(lp[lp_offset]);
+                if (leaf_no) {
+                        error = get_leaf(dip, leaf_no, &bh);
+                        if (error)
+                                goto out;
+                        leaf = (struct gfs2_leaf *)bh->b_data;
+                        len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
+                        next_index = (index & ~(len - 1)) + len;
+                        last = ((next_index >= hsize) ? 1 : 0);
+                        error = leaf_dealloc(dip, index, len, leaf_no, bh,
+                                             last);
+                        brelse(bh);
+                        if (error)
+                                goto out;
+                        index = next_index;
+                } else
+                        index++;
        }
-        gfs2_trans_end(sdp);
+        if (index != hsize) {
+                gfs2_consist_inode(dip);
+                error = -EIO;
+        }
+out:
+        kfree(lp);
        return error;
 }
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index a98f644bd3df..e686af11becd 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -22,8 +22,8 @@ extern struct inode *gfs2_dir_search(struct inode *dir,
 extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
                          const struct gfs2_inode *ip);
 extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
-                        const struct gfs2_inode *ip, unsigned int type);
+                        const struct gfs2_inode *ip);
-extern int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
+extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry);
 extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
                         filldir_t filldir);
 extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index b5a5e60df0d5..fe9945f2ff72 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -139,7 +139,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
        struct gfs2_sbd *sdp = sb->s_fs_info;
        struct inode *inode;
-        inode = gfs2_ilookup(sb, inum->no_addr);
+        inode = gfs2_ilookup(sb, inum->no_addr, 0);
        if (inode) {
                if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
                        iput(inode);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index e48310885c48..a9f5cbe45cd9 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -545,18 +545,10 @@ static int gfs2_close(struct inode *inode, struct file *file)
 /**
 * gfs2_fsync - sync the dirty data for a file (across the cluster)
 * @file: the file that points to the dentry (we ignore this)
- * @dentry: the dentry that points to the inode to sync
+ * @datasync: set if we can ignore timestamp changes
 *
- * The VFS will flush "normal" data for us. We only need to worry
+ * The VFS will flush data for us. We only need to worry
- * about metadata here. For journaled data, we just do a log flush
+ * about metadata here.
- * as we can't avoid it. Otherwise we can just bale out if datasync
- * is set. For stuffed inodes we must flush the log in order to
- * ensure that all data is on disk.
- *
- * The call to write_inode_now() is there to write back metadata and
- * the inode itself. It does also try and write the data, but thats
- * (hopefully) a no-op due to the VFS having already called filemap_fdatawrite()
- * for us.
 *
 * Returns: errno
 */
@@ -565,22 +557,20 @@ static int gfs2_fsync(struct file *file, int datasync)
 {
        struct inode *inode = file->f_mapping->host;
        int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
-        int ret = 0;
+        struct gfs2_inode *ip = GFS2_I(inode);
+        int ret;
-        if (gfs2_is_jdata(GFS2_I(inode))) {
-                gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
-                return 0;
-        }
-        if (sync_state != 0) {
+        if (datasync)
-                if (!datasync)
+                sync_state &= ~I_DIRTY_SYNC;
-                        ret = write_inode_now(inode, 0);
-                if (gfs2_is_stuffed(GFS2_I(inode)))
+        if (sync_state) {
-                        gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
+                ret = sync_inode_metadata(inode, 1);
+                if (ret)
+                        return ret;
+                gfs2_ail_flush(ip->i_gl);
        }
-        return ret;
+        return 0;
 }
 /**
@@ -826,6 +816,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
        loff_t bytes, max_bytes;
        struct gfs2_alloc *al;
        int error;
+        loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
        loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
        next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
@@ -833,13 +824,15 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
        if (mode & ~FALLOC_FL_KEEP_SIZE)
                return -EOPNOTSUPP;
-        offset = (offset >> sdp->sd_sb.sb_bsize_shift) <<
+        offset &= bsize_mask;
-                 sdp->sd_sb.sb_bsize_shift;
        len = next - offset;
        bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
        if (!bytes)
                bytes = UINT_MAX;
+        bytes &= bsize_mask;
+        if (bytes == 0)
+                bytes = sdp->sd_sb.sb_bsize;
        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
        error = gfs2_glock_nq(&ip->i_gh);
@@ -870,6 +863,9 @@ retry:
                if (error) {
                        if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
                                bytes >>= 1;
+                                bytes &= bsize_mask;
+                                if (bytes == 0)
+                                        bytes = sdp->sd_sb.sb_bsize;
                                goto retry;
                        }
                        goto out_qunlock;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 7a4fb630a320..2792a790e50b 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -143,14 +143,9 @@ static int demote_ok(const struct gfs2_glock *gl)
 {
        const struct gfs2_glock_operations *glops = gl->gl_ops;
-        /* assert_spin_locked(&gl->gl_spin); */
        if (gl->gl_state == LM_ST_UNLOCKED)
                return 0;
-        if (test_bit(GLF_LFLUSH, &gl->gl_flags))
+        if (!list_empty(&gl->gl_holders))
-                return 0;
-        if ((gl->gl_name.ln_type != LM_TYPE_INODE) &&
-            !list_empty(&gl->gl_holders))
                return 0;
        if (glops->go_demote_ok)
                return glops->go_demote_ok(gl);
@@ -158,6 +153,31 @@ static int demote_ok(const struct gfs2_glock *gl)
 }
+void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
+{
+        spin_lock(&lru_lock);
+        if (!list_empty(&gl->gl_lru))
+                list_del_init(&gl->gl_lru);
+        else
+                atomic_inc(&lru_count);
+        list_add_tail(&gl->gl_lru, &lru_list);
+        set_bit(GLF_LRU, &gl->gl_flags);
+        spin_unlock(&lru_lock);
+}
+static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
+{
+        spin_lock(&lru_lock);
+        if (!list_empty(&gl->gl_lru)) {
+                list_del_init(&gl->gl_lru);
+                atomic_dec(&lru_count);
+                clear_bit(GLF_LRU, &gl->gl_flags);
+        }
+        spin_unlock(&lru_lock);
+}
 /**
 * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
 * @gl: the glock
@@ -168,24 +188,8 @@ static int demote_ok(const struct gfs2_glock *gl)
 static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
 {
-        if (demote_ok(gl)) {
+        if (demote_ok(gl))
-                spin_lock(&lru_lock);
+                gfs2_glock_add_to_lru(gl);
-                if (!list_empty(&gl->gl_lru))
-                        list_del_init(&gl->gl_lru);
-                else
-                        atomic_inc(&lru_count);
-                list_add_tail(&gl->gl_lru, &lru_list);
-                spin_unlock(&lru_lock);
-        }
-}
-void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
-{
-        spin_lock(&gl->gl_spin);
-        __gfs2_glock_schedule_for_reclaim(gl);
-        spin_unlock(&gl->gl_spin);
 }
 /**
@@ -217,12 +221,7 @@ void gfs2_glock_put(struct gfs2_glock *gl)
                spin_lock_bucket(gl->gl_hash);
                hlist_bl_del_rcu(&gl->gl_list);
                spin_unlock_bucket(gl->gl_hash);
-                spin_lock(&lru_lock);
+                gfs2_glock_remove_from_lru(gl);
-                if (!list_empty(&gl->gl_lru)) {
-                        list_del_init(&gl->gl_lru);
-                        atomic_dec(&lru_count);
-                }
-                spin_unlock(&lru_lock);
                GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
                GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
                trace_gfs2_glock_put(gl);
@@ -542,11 +541,6 @@ __acquires(&gl->gl_spin)
        clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
        gfs2_glock_hold(gl);
-        if (target != LM_ST_UNLOCKED && (gl->gl_state == LM_ST_SHARED ||
-            gl->gl_state == LM_ST_DEFERRED) &&
-            !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
-                lck_flags |= LM_FLAG_TRY_1CB;
        if (sdp->sd_lockstruct.ls_ops->lm_lock) {
                /* lock_dlm */
                ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
@@ -648,7 +642,7 @@ static void delete_work_func(struct work_struct *work)
        /* Note: Unsafe to dereference ip as we don't hold right refs/locks */
        if (ip)
-                inode = gfs2_ilookup(sdp->sd_vfs, no_addr);
+                inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1);
        else
                inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
        if (inode && !IS_ERR(inode)) {
@@ -1025,6 +1019,9 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
        if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
                return -EIO;
+        if (test_bit(GLF_LRU, &gl->gl_flags))
+                gfs2_glock_remove_from_lru(gl);
        spin_lock(&gl->gl_spin);
        add_to_queue(gh);
        if ((LM_FLAG_NOEXP & gh->gh_flags) &&
@@ -1082,7 +1079,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
                    !test_bit(GLF_DEMOTE, &gl->gl_flags))
                        fast_path = 1;
        }
-        __gfs2_glock_schedule_for_reclaim(gl);
+        if (!test_bit(GLF_LFLUSH, &gl->gl_flags))
+                __gfs2_glock_schedule_for_reclaim(gl);
        trace_gfs2_glock_queue(gh, 0);
        spin_unlock(&gl->gl_spin);
        if (likely(fast_path))
@@ -1348,11 +1346,14 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
 }
-static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int gfs2_shrink_glock_memory(struct shrinker *shrink,
+                                    struct shrink_control *sc)
 {
        struct gfs2_glock *gl;
        int may_demote;
        int nr_skipped = 0;
+        int nr = sc->nr_to_scan;
+        gfp_t gfp_mask = sc->gfp_mask;
        LIST_HEAD(skipped);
        if (nr == 0)
@@ -1365,6 +1366,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m
        while(nr && !list_empty(&lru_list)) {
                gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
                list_del_init(&gl->gl_lru);
+                clear_bit(GLF_LRU, &gl->gl_flags);
                atomic_dec(&lru_count);
                /* Test for being demotable */
@@ -1387,6 +1389,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m
                }
                nr_skipped++;
                list_add(&gl->gl_lru, &skipped);
+                set_bit(GLF_LRU, &gl->gl_flags);
        }
        list_splice(&skipped, &lru_list);
        atomic_add(nr_skipped, &lru_count);
@@ -1459,12 +1462,7 @@ static void thaw_glock(struct gfs2_glock *gl)
 static void clear_glock(struct gfs2_glock *gl)
 {
-        spin_lock(&lru_lock);
+        gfs2_glock_remove_from_lru(gl);
-        if (!list_empty(&gl->gl_lru)) {
-                list_del_init(&gl->gl_lru);
-                atomic_dec(&lru_count);
-        }
-        spin_unlock(&lru_lock);
        spin_lock(&gl->gl_spin);
        if (gl->gl_state != LM_ST_UNLOCKED)
@@ -1599,9 +1597,11 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
        return 0;
 }
-static const char *gflags2str(char *buf, const unsigned long *gflags)
+static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
 {
+        const unsigned long *gflags = &gl->gl_flags;
        char *p = buf;
        if (test_bit(GLF_LOCK, gflags))
                *p++ = 'l';
        if (test_bit(GLF_DEMOTE, gflags))
@@ -1624,6 +1624,10 @@ static const char *gflags2str(char *buf, const unsigned long *gflags)
                *p++ = 'F';
        if (test_bit(GLF_QUEUED, gflags))
                *p++ = 'q';
+        if (test_bit(GLF_LRU, gflags))
+                *p++ = 'L';
+        if (gl->gl_object)
+                *p++ = 'o';
        *p = 0;
        return buf;
 }
@@ -1658,14 +1662,15 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
        dtime *= 1000000/HZ; /* demote time in uSec */
        if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
                dtime = 0;
-        gfs2_print_dbg(seq, "G:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d\n",
+        gfs2_print_dbg(seq, "G:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d\n",
                  state2str(gl->gl_state),
                  gl->gl_name.ln_type,
                  (unsigned long long)gl->gl_name.ln_number,
-                  gflags2str(gflags_buf, &gl->gl_flags),
+                  gflags2str(gflags_buf, gl),
                  state2str(gl->gl_target),
                  state2str(gl->gl_demote_state), dtime,
                  atomic_read(&gl->gl_ail_count),
+                  atomic_read(&gl->gl_revokes),
                  atomic_read(&gl->gl_ref));
        list_for_each_entry(gh, &gl->gl_holders, gh_list) {
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index aea160690e94..6b2f757b9281 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -225,11 +225,10 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
 extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
 extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
-extern void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
 extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
 extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
 extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
-extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
+extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl);
 extern void gfs2_glock_free(struct gfs2_glock *gl);
 extern int __init gfs2_glock_init(void);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 25eeb2bcee47..8ef70f464731 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -28,33 +28,18 @@
 #include "trans.h"
 /**
- * ail_empty_gl - remove all buffers for a given lock from the AIL
+ * __gfs2_ail_flush - remove all buffers for a given lock from the AIL
 * @gl: the glock
 *
 * None of the buffers should be dirty, locked, or pinned.
 */
-static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
+static void __gfs2_ail_flush(struct gfs2_glock *gl)
 {
        struct gfs2_sbd *sdp = gl->gl_sbd;
        struct list_head *head = &gl->gl_ail_list;
        struct gfs2_bufdata *bd;
        struct buffer_head *bh;
-        struct gfs2_trans tr;
-        memset(&tr, 0, sizeof(tr));
-        tr.tr_revokes = atomic_read(&gl->gl_ail_count);
-        if (!tr.tr_revokes)
-                return;
-        /* A shortened, inline version of gfs2_trans_begin() */
-        tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
-        tr.tr_ip = (unsigned long)__builtin_return_address(0);
-        INIT_LIST_HEAD(&tr.tr_list_buf);
-        gfs2_log_reserve(sdp, tr.tr_reserved);
-        BUG_ON(current->journal_info);
-        current->journal_info = &tr;
        spin_lock(&sdp->sd_ail_lock);
        while (!list_empty(head)) {
@@ -76,7 +61,47 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
        }
        gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
        spin_unlock(&sdp->sd_ail_lock);
+}
+static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
+{
+        struct gfs2_sbd *sdp = gl->gl_sbd;
+        struct gfs2_trans tr;
+        memset(&tr, 0, sizeof(tr));
+        tr.tr_revokes = atomic_read(&gl->gl_ail_count);
+        if (!tr.tr_revokes)
+                return;
+        /* A shortened, inline version of gfs2_trans_begin() */
+        tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
+        tr.tr_ip = (unsigned long)__builtin_return_address(0);
+        INIT_LIST_HEAD(&tr.tr_list_buf);
+        gfs2_log_reserve(sdp, tr.tr_reserved);
+        BUG_ON(current->journal_info);
+        current->journal_info = &tr;
+        __gfs2_ail_flush(gl);
+        gfs2_trans_end(sdp);
+        gfs2_log_flush(sdp, NULL);
+}
+void gfs2_ail_flush(struct gfs2_glock *gl)
+{
+        struct gfs2_sbd *sdp = gl->gl_sbd;
+        unsigned int revokes = atomic_read(&gl->gl_ail_count);
+        int ret;
+        if (!revokes)
+                return;
+        ret = gfs2_trans_begin(sdp, 0, revokes);
+        if (ret)
+                return;
+        __gfs2_ail_flush(gl);
        gfs2_trans_end(sdp);
        gfs2_log_flush(sdp, NULL);
 }
@@ -227,6 +252,119 @@ static int inode_go_demote_ok(const struct gfs2_glock *gl)
 }
 /**
+ * gfs2_set_nlink - Set the inode's link count based on on-disk info
+ * @inode: The inode in question
+ * @nlink: The link count
+ *
+ * If the link count has hit zero, it must never be raised, whatever the
+ * on-disk inode might say. When new struct inodes are created the link
+ * count is set to 1, so that we can safely use this test even when reading
+ * in on disk information for the first time.
+ */
+static void gfs2_set_nlink(struct inode *inode, u32 nlink)
+{
+        /*
+         * We will need to review setting the nlink count here in the
+         * light of the forthcoming ro bind mount work. This is a reminder
+         * to do that.
+         */
+        if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) {
+                if (nlink == 0)
+                        clear_nlink(inode);
+                else
+                        inode->i_nlink = nlink;
+        }
+}
+static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
+{
+        const struct gfs2_dinode *str = buf;
+        struct timespec atime;
+        u16 height, depth;
+        if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
+                goto corrupt;
+        ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
+        ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
+        ip->i_inode.i_rdev = 0;
+        switch (ip->i_inode.i_mode & S_IFMT) {
+        case S_IFBLK:
+        case S_IFCHR:
+                ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
+                                           be32_to_cpu(str->di_minor));
+                break;
+        };
+        ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
+        ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
+        gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
+        i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
+        gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
+        atime.tv_sec = be64_to_cpu(str->di_atime);
+        atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
+        if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
+                ip->i_inode.i_atime = atime;
+        ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
+        ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
+        ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
+        ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
+        ip->i_goal = be64_to_cpu(str->di_goal_meta);
+        ip->i_generation = be64_to_cpu(str->di_generation);
+        ip->i_diskflags = be32_to_cpu(str->di_flags);
+        gfs2_set_inode_flags(&ip->i_inode);
+        height = be16_to_cpu(str->di_height);
+        if (unlikely(height > GFS2_MAX_META_HEIGHT))
+                goto corrupt;
+        ip->i_height = (u8)height;
+        depth = be16_to_cpu(str->di_depth);
+        if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
+                goto corrupt;
+        ip->i_depth = (u8)depth;
+        ip->i_entries = be32_to_cpu(str->di_entries);
+        ip->i_eattr = be64_to_cpu(str->di_eattr);
+        if (S_ISREG(ip->i_inode.i_mode))
+                gfs2_set_aops(&ip->i_inode);
+        return 0;
+corrupt:
+        gfs2_consist_inode(ip);
+        return -EIO;
+}
+/**
+ * gfs2_inode_refresh - Refresh the incore copy of the dinode
+ * @ip: The GFS2 inode
+ *
+ * Returns: errno
+ */
+int gfs2_inode_refresh(struct gfs2_inode *ip)
+{
+        struct buffer_head *dibh;
+        int error;
+        error = gfs2_meta_inode_buffer(ip, &dibh);
+        if (error)
+                return error;
+        if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
+                brelse(dibh);
+                return -EIO;
+        }
+        error = gfs2_dinode_in(ip, dibh->b_data);
+        brelse(dibh);
+        clear_bit(GIF_INVALID, &ip->i_flags);
+        return error;
+}
+/**
 * inode_go_lock - operation done after an inode lock is locked by a process
 * @gl: the glock
 * @flags:
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
index b3aa2e3210fd..6fce409b5a50 100644
--- a/fs/gfs2/glops.h
+++ b/fs/gfs2/glops.h
@@ -23,4 +23,6 @@ extern const struct gfs2_glock_operations gfs2_quota_glops;
 extern const struct gfs2_glock_operations gfs2_journal_glops;
 extern const struct gfs2_glock_operations *gfs2_glops_list[];
+extern void gfs2_ail_flush(struct gfs2_glock *gl);
 #endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 870a89d6d4dc..0a064e91ac70 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -20,7 +20,6 @@
 #define DIO_WAIT        0x00000010
 #define DIO_METADATA    0x00000020
-#define DIO_ALL         0x00000100
 struct gfs2_log_operations;
 struct gfs2_log_element;
@@ -200,6 +199,8 @@ enum {
        GLF_INITIAL                     = 10,
        GLF_FROZEN                      = 11,
        GLF_QUEUED                      = 12,
+        GLF_LRU                         = 13,
+        GLF_OBJECT                      = 14, /* Used only for tracing */
 };
 struct gfs2_glock {
@@ -234,6 +235,7 @@ struct gfs2_glock {
        struct list_head gl_ail_list;
        atomic_t gl_ail_count;
+        atomic_t gl_revokes;
        struct delayed_work gl_work;
        struct work_struct gl_delete;
        struct rcu_head gl_rcu;
@@ -374,8 +376,6 @@ struct gfs2_ail {
        unsigned int ai_first;
        struct list_head ai_ail1_list;
        struct list_head ai_ail2_list;
-        u64 ai_sync_gen;
 };
 struct gfs2_journal_extent {
@@ -488,7 +488,6 @@ struct gfs2_sb_host {
        char sb_lockproto[GFS2_LOCKNAME_LEN];
        char sb_locktable[GFS2_LOCKNAME_LEN];
-        u8 sb_uuid[16];
 };
 /*
@@ -654,7 +653,6 @@ struct gfs2_sbd {
        spinlock_t sd_ail_lock;
        struct list_head sd_ail1_list;
        struct list_head sd_ail2_list;
-        u64 sd_ail_sync_gen;
        /* Replay stuff */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 9134dcb89479..03e0c529063e 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1,23 +1,25 @@
 /*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2011 Red Hat, Inc.  All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
 * of the GNU General Public License version 2.
 */
-#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
+#include <linux/namei.h>
+#include <linux/mm.h>
+#include <linux/xattr.h>
 #include <linux/posix_acl.h>
-#include <linux/sort.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
+#include <linux/fiemap.h>
 #include <linux/security.h>
-#include <linux/time.h>
+#include <asm/uaccess.h>
 #include "gfs2.h"
 #include "incore.h"
@@ -26,19 +28,14 @@
 #include "dir.h"
 #include "xattr.h"
 #include "glock.h"
-#include "glops.h"
 #include "inode.h"
-#include "log.h"
 #include "meta_io.h"
 #include "quota.h"
 #include "rgrp.h"
 #include "trans.h"
 #include "util.h"
+#include "super.h"
-struct gfs2_inum_range_host {
+#include "glops.h"
-        u64 ir_start;
-        u64 ir_length;
-};
 struct gfs2_skip_data {
        u64 no_addr;
@@ -74,14 +71,14 @@ static int iget_set(struct inode *inode, void *opaque)
        return 0;
 }
-struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
 {
        unsigned long hash = (unsigned long)no_addr;
        struct gfs2_skip_data data;
        data.no_addr = no_addr;
        data.skipped = 0;
-        data.non_block = 0;
+        data.non_block = non_block;
        return ilookup5(sb, hash, iget_test, &data);
 }
@@ -248,203 +245,6 @@ fail_iput:
        goto fail;
 }
-static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
-{
-        const struct gfs2_dinode *str = buf;
-        struct timespec atime;
-        u16 height, depth;
-        if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
-                goto corrupt;
-        ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
-        ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
-        ip->i_inode.i_rdev = 0;
-        switch (ip->i_inode.i_mode & S_IFMT) {
-        case S_IFBLK:
-        case S_IFCHR:
-                ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
-                                           be32_to_cpu(str->di_minor));
-                break;
-        };
-        ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
-        ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
-        /*
-         * We will need to review setting the nlink count here in the
-         * light of the forthcoming ro bind mount work. This is a reminder
-         * to do that.
-         */
-        ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
-        i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
-        gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
-        atime.tv_sec = be64_to_cpu(str->di_atime);
-        atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
-        if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
-                ip->i_inode.i_atime = atime;
-        ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
-        ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
-        ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
-        ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
-        ip->i_goal = be64_to_cpu(str->di_goal_meta);
-        ip->i_generation = be64_to_cpu(str->di_generation);
-        ip->i_diskflags = be32_to_cpu(str->di_flags);
-        gfs2_set_inode_flags(&ip->i_inode);
-        height = be16_to_cpu(str->di_height);
-        if (unlikely(height > GFS2_MAX_META_HEIGHT))
-                goto corrupt;
-        ip->i_height = (u8)height;
-        depth = be16_to_cpu(str->di_depth);
-        if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
-                goto corrupt;
-        ip->i_depth = (u8)depth;
-        ip->i_entries = be32_to_cpu(str->di_entries);
-        ip->i_eattr = be64_to_cpu(str->di_eattr);
-        if (S_ISREG(ip->i_inode.i_mode))
-                gfs2_set_aops(&ip->i_inode);
-        return 0;
-corrupt:
-        if (gfs2_consist_inode(ip))
-                gfs2_dinode_print(ip);
-        return -EIO;
-}
-/**
- * gfs2_inode_refresh - Refresh the incore copy of the dinode
- * @ip: The GFS2 inode
- *
- * Returns: errno
- */
-int gfs2_inode_refresh(struct gfs2_inode *ip)
-{
-        struct buffer_head *dibh;
-        int error;
-        error = gfs2_meta_inode_buffer(ip, &dibh);
-        if (error)
-                return error;
-        if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
-                brelse(dibh);
-                return -EIO;
-        }
-        error = gfs2_dinode_in(ip, dibh->b_data);
-        brelse(dibh);
-        clear_bit(GIF_INVALID, &ip->i_flags);
-        return error;
-}
-int gfs2_dinode_dealloc(struct gfs2_inode *ip)
-{
-        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_alloc *al;
-        struct gfs2_rgrpd *rgd;
-        int error;
-        if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
-                if (gfs2_consist_inode(ip))
-                        gfs2_dinode_print(ip);
-                return -EIO;
-        }
-        al = gfs2_alloc_get(ip);
-        if (!al)
-                return -ENOMEM;
-        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
-        if (error)
-                goto out;
-        error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
-        if (error)
-                goto out_qs;
-        rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
-        if (!rgd) {
-                gfs2_consist_inode(ip);
-                error = -EIO;
-                goto out_rindex_relse;
-        }
-        error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
-                                   &al->al_rgd_gh);
-        if (error)
-                goto out_rindex_relse;
-        error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1);
-        if (error)
-                goto out_rg_gunlock;
-        set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
-        set_bit(GLF_LFLUSH, &ip->i_gl->gl_flags);
-        gfs2_free_di(rgd, ip);
-        gfs2_trans_end(sdp);
-out_rg_gunlock:
-        gfs2_glock_dq_uninit(&al->al_rgd_gh);
-out_rindex_relse:
-        gfs2_glock_dq_uninit(&al->al_ri_gh);
-out_qs:
-        gfs2_quota_unhold(ip);
-out:
-        gfs2_alloc_put(ip);
-        return error;
-}
-/**
- * gfs2_change_nlink - Change nlink count on inode
- * @ip: The GFS2 inode
- * @diff: The change in the nlink count required
- *
- * Returns: errno
- */
-int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
-{
-        struct buffer_head *dibh;
-        u32 nlink;
-        int error;
-        BUG_ON(diff != 1 && diff != -1);
-        nlink = ip->i_inode.i_nlink + diff;
-        /* If we are reducing the nlink count, but the new value ends up being
-           bigger than the old one, we must have underflowed. */
-        if (diff < 0 && nlink > ip->i_inode.i_nlink) {
-                if (gfs2_consist_inode(ip))
-                        gfs2_dinode_print(ip);
-                return -EIO;
-        }
-        error = gfs2_meta_inode_buffer(ip, &dibh);
-        if (error)
-                return error;
-        if (diff > 0)
-                inc_nlink(&ip->i_inode);
-        else
-                drop_nlink(&ip->i_inode);
-        ip->i_inode.i_ctime = CURRENT_TIME;
-        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-        gfs2_dinode_out(ip, dibh->b_data);
-        brelse(dibh);
-        mark_inode_dirty(&ip->i_inode);
-        if (ip->i_inode.i_nlink == 0)
-                gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
-        return error;
-}
 struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
 {
@@ -543,7 +343,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
        /*  Don't create entries in an unlinked directory  */
        if (!dip->i_inode.i_nlink)
-                return -EPERM;
+                return -ENOENT;
        error = gfs2_dir_check(&dip->i_inode, name, NULL);
        switch (error) {
@@ -613,21 +413,44 @@ out:
        return error;
 }
+static void gfs2_init_dir(struct buffer_head *dibh,
+                          const struct gfs2_inode *parent)
+{
+        struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
+        struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
+        gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent);
+        dent->de_inum = di->di_num; /* already GFS2 endian */
+        dent->de_type = cpu_to_be16(DT_DIR);
+        dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
+        gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
+        gfs2_inum_out(parent, dent);
+        dent->de_type = cpu_to_be16(DT_DIR);
+        
+}
 /**
 * init_dinode - Fill in a new dinode structure
- * @dip: the directory this inode is being created in
+ * @dip: The directory this inode is being created in
 * @gl: The glock covering the new inode
- * @inum: the inode number
+ * @inum: The inode number
- * @mode: the file permissions
+ * @mode: The file permissions
- * @uid:
+ * @uid: The uid of the new inode
- * @gid:
+ * @gid: The gid of the new inode
+ * @generation: The generation number of the new inode
+ * @dev: The device number (if a device node)
+ * @symname: The symlink destination (if a symlink)
+ * @size: The inode size (ignored for directories)
+ * @bhp: The buffer head (returned to caller)
 *
 */
 static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
                        const struct gfs2_inum_host *inum, unsigned int mode,
                        unsigned int uid, unsigned int gid,
-                        const u64 *generation, dev_t dev, struct buffer_head **bhp)
+                        const u64 *generation, dev_t dev, const char *symname,
+                        unsigned size, struct buffer_head **bhp)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
        struct gfs2_dinode *di;
@@ -646,7 +469,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
        di->di_uid = cpu_to_be32(uid);
        di->di_gid = cpu_to_be32(gid);
        di->di_nlink = 0;
-        di->di_size = 0;
+        di->di_size = cpu_to_be64(size);
        di->di_blocks = cpu_to_be64(1);
        di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec);
        di->di_major = cpu_to_be32(MAJOR(dev));
@@ -654,16 +477,6 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
        di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
        di->di_generation = cpu_to_be64(*generation);
        di->di_flags = 0;
-        if (S_ISREG(mode)) {
-                if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
-                    gfs2_tune_get(sdp, gt_new_files_jdata))
-                        di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
-        } else if (S_ISDIR(mode)) {
-                di->di_flags |= cpu_to_be32(dip->i_diskflags &
-                                            GFS2_DIF_INHERIT_JDATA);
-        }
        di->__pad1 = 0;
        di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0);
        di->di_height = 0;
@@ -677,7 +490,26 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
        di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
        di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
        memset(&di->di_reserved, 0, sizeof(di->di_reserved));
-        
+        switch(mode & S_IFMT) { 
+        case S_IFREG:
+                if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
+                    gfs2_tune_get(sdp, gt_new_files_jdata))
+                        di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
+                break;
+        case S_IFDIR:
+                di->di_flags |= cpu_to_be32(dip->i_diskflags &
+                                            GFS2_DIF_INHERIT_JDATA);
+                di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
+                di->di_size = cpu_to_be64(sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode));
+                di->di_entries = cpu_to_be32(2);
+                gfs2_init_dir(dibh, dip);
+                break;
+        case S_IFLNK:
+                memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, size);
+                break;
+        }
        set_buffer_uptodate(dibh);
        *bhp = dibh;
@@ -685,7 +517,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
                       unsigned int mode, const struct gfs2_inum_host *inum,
-                       const u64 *generation, dev_t dev, struct buffer_head **bhp)
+                       const u64 *generation, dev_t dev, const char *symname,
+                       unsigned int size, struct buffer_head **bhp)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
        unsigned int uid, gid;
@@ -707,7 +540,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
        if (error)
                goto out_quota;
-        init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp);
+        init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, symname, size, bhp);
        gfs2_quota_change(dip, +1, uid, gid);
        gfs2_trans_end(sdp);
@@ -761,14 +594,16 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
                        goto fail_quota_locks;
        }
-        error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode));
+        error = gfs2_dir_add(&dip->i_inode, name, ip);
        if (error)
                goto fail_end_trans;
        error = gfs2_meta_inode_buffer(ip, &dibh);
        if (error)
                goto fail_end_trans;
-        ip->i_inode.i_nlink = 1;
+        inc_nlink(&ip->i_inode);
+        if (S_ISDIR(ip->i_inode.i_mode))
+                inc_nlink(&ip->i_inode);
        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
        gfs2_dinode_out(ip, dibh->b_data);
        brelse(dibh);
@@ -815,27 +650,25 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
 }
 /**
- * gfs2_createi - Create a new inode
+ * gfs2_create_inode - Create a new inode
- * @ghs: An array of two holders
+ * @dir: The parent directory
- * @name: The name of the new file
+ * @dentry: The new dentry
- * @mode: the permissions on the new inode
+ * @mode: The permissions on the new inode
- *
+ * @dev: For device nodes, this is the device number
- * @ghs[0] is an initialized holder for the directory
+ * @symname: For symlinks, this is the link destination
- * @ghs[1] is the holder for the inode lock
+ * @size: The initial size of the inode (ignored for directories)
 *
- * If the return value is not NULL, the glocks on both the directory and the new
+ * Returns: 0 on success, or error code
- * file are held.  A transaction has been started and an inplace reservation
- * is held, as well.
- *
- * Returns: An inode
 */
-struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
+static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
-                           unsigned int mode, dev_t dev)
+                             unsigned int mode, dev_t dev, const char *symname,
+                             unsigned int size)
 {
+        const struct qstr *name = &dentry->d_name;
+        struct gfs2_holder ghs[2];
        struct inode *inode = NULL;
-        struct gfs2_inode *dip = ghs->gh_gl->gl_object;
+        struct gfs2_inode *dip = GFS2_I(dir);
-        struct inode *dir = &dip->i_inode;
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
        struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
        int error;
@@ -843,10 +676,9 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
        struct buffer_head *bh = NULL;
        if (!name->len || name->len > GFS2_FNAMESIZE)
-                return ERR_PTR(-ENAMETOOLONG);
+                return -ENAMETOOLONG;
-        gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
+        error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
-        error = gfs2_glock_nq(ghs);
        if (error)
                goto fail;
@@ -864,7 +696,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
        if (error)
                goto fail_gunlock;
-        error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh);
+        error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, symname, size, &bh);
        if (error)
                goto fail_gunlock2;
@@ -891,18 +723,852 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
        if (bh)
                brelse(bh);
-        return inode;
+        gfs2_trans_end(sdp);
+        if (dip->i_alloc->al_rgd)
+                gfs2_inplace_release(dip);
+        gfs2_quota_unlock(dip);
+        gfs2_alloc_put(dip);
+        gfs2_glock_dq_uninit_m(2, ghs);
+        mark_inode_dirty(inode);
+        d_instantiate(dentry, inode);
+        return 0;
 fail_gunlock2:
        gfs2_glock_dq_uninit(ghs + 1);
        if (inode && !IS_ERR(inode))
                iput(inode);
 fail_gunlock:
-        gfs2_glock_dq(ghs);
+        gfs2_glock_dq_uninit(ghs);
 fail:
        if (bh)
                brelse(bh);
-        return ERR_PTR(error);
+        return error;
+}
+/**
+ * gfs2_create - Create a file
+ * @dir: The directory in which to create the file
+ * @dentry: The dentry of the new file
+ * @mode: The mode of the new file
+ *
+ * Returns: errno
+ */
+static int gfs2_create(struct inode *dir, struct dentry *dentry,
+                       int mode, struct nameidata *nd)
+{
+        struct inode *inode;
+        int ret;
+        for (;;) {
+                ret = gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0);
+                if (ret != -EEXIST || (nd && (nd->flags & LOOKUP_EXCL)))
+                        return ret;
+                inode = gfs2_lookupi(dir, &dentry->d_name, 0);
+                if (inode) {
+                        if (!IS_ERR(inode))
+                                break;
+                        return PTR_ERR(inode);
+                }
+        }
+        d_instantiate(dentry, inode);
+        return 0;
+}
+/**
+ * gfs2_lookup - Look up a filename in a directory and return its inode
+ * @dir: The directory inode
+ * @dentry: The dentry of the new inode
+ * @nd: passed from Linux VFS, ignored by us
+ *
+ * Called by the VFS layer. Lock dir and call gfs2_lookupi()
+ *
+ * Returns: errno
+ */
+static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
+                                  struct nameidata *nd)
+{
+        struct inode *inode = NULL;
+        inode = gfs2_lookupi(dir, &dentry->d_name, 0);
+        if (inode && IS_ERR(inode))
+                return ERR_CAST(inode);
+        if (inode) {
+                struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
+                struct gfs2_holder gh;
+                int error;
+                error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+                if (error) {
+                        iput(inode);
+                        return ERR_PTR(error);
+                }
+                gfs2_glock_dq_uninit(&gh);
+                return d_splice_alias(inode, dentry);
+        }
+        d_add(dentry, inode);
+        return NULL;
+}
+/**
+ * gfs2_link - Link to a file
+ * @old_dentry: The inode to link
+ * @dir: Add link to this directory
+ * @dentry: The name of the link
+ *
+ * Link the inode in "old_dentry" into the directory "dir" with the
+ * name in "dentry".
+ *
+ * Returns: errno
+ */
+static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
+                     struct dentry *dentry)
+{
+        struct gfs2_inode *dip = GFS2_I(dir);
+        struct gfs2_sbd *sdp = GFS2_SB(dir);
+        struct inode *inode = old_dentry->d_inode;
+        struct gfs2_inode *ip = GFS2_I(inode);
+        struct gfs2_holder ghs[2];
+        struct buffer_head *dibh;
+        int alloc_required;
+        int error;
+        if (S_ISDIR(inode->i_mode))
+                return -EPERM;
+        gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
+        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
+        error = gfs2_glock_nq(ghs); /* parent */
+        if (error)
+                goto out_parent;
+        error = gfs2_glock_nq(ghs + 1); /* child */
+        if (error)
+                goto out_child;
+        error = -ENOENT;
+        if (inode->i_nlink == 0)
+                goto out_gunlock;
+        error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
+        if (error)
+                goto out_gunlock;
+        error = gfs2_dir_check(dir, &dentry->d_name, NULL);
+        switch (error) {
+        case -ENOENT:
+                break;
+        case 0:
+                error = -EEXIST;
+        default:
+                goto out_gunlock;
+        }
+        error = -EINVAL;
+        if (!dip->i_inode.i_nlink)
+                goto out_gunlock;
+        error = -EFBIG;
+        if (dip->i_entries == (u32)-1)
+                goto out_gunlock;
+        error = -EPERM;
+        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+                goto out_gunlock;
+        error = -EINVAL;
+        if (!ip->i_inode.i_nlink)
+                goto out_gunlock;
+        error = -EMLINK;
+        if (ip->i_inode.i_nlink == (u32)-1)
+                goto out_gunlock;
+        alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
+        if (error < 0)
+                goto out_gunlock;
+        error = 0;
+        if (alloc_required) {
+                struct gfs2_alloc *al = gfs2_alloc_get(dip);
+                if (!al) {
+                        error = -ENOMEM;
+                        goto out_gunlock;
+                }
+                error = gfs2_quota_lock_check(dip);
+                if (error)
+                        goto out_alloc;
+                al->al_requested = sdp->sd_max_dirres;
+                error = gfs2_inplace_reserve(dip);
+                if (error)
+                        goto out_gunlock_q;
+                error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
+                                         gfs2_rg_blocks(al) +
+                                         2 * RES_DINODE + RES_STATFS +
+                                         RES_QUOTA, 0);
+                if (error)
+                        goto out_ipres;
+        } else {
+                error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
+                if (error)
+                        goto out_ipres;
+        }
+        error = gfs2_meta_inode_buffer(ip, &dibh);
+        if (error)
+                goto out_end_trans;
+        error = gfs2_dir_add(dir, &dentry->d_name, ip);
+        if (error)
+                goto out_brelse;
+        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+        inc_nlink(&ip->i_inode);
+        ip->i_inode.i_ctime = CURRENT_TIME;
+        gfs2_dinode_out(ip, dibh->b_data);
+        mark_inode_dirty(&ip->i_inode);
+out_brelse:
+        brelse(dibh);
+out_end_trans:
+        gfs2_trans_end(sdp);
+out_ipres:
+        if (alloc_required)
+                gfs2_inplace_release(dip);
+out_gunlock_q:
+        if (alloc_required)
+                gfs2_quota_unlock(dip);
+out_alloc:
+        if (alloc_required)
+                gfs2_alloc_put(dip);
+out_gunlock:
+        gfs2_glock_dq(ghs + 1);
+out_child:
+        gfs2_glock_dq(ghs);
+out_parent:
+        gfs2_holder_uninit(ghs);
+        gfs2_holder_uninit(ghs + 1);
+        if (!error) {
+                ihold(inode);
+                d_instantiate(dentry, inode);
+                mark_inode_dirty(inode);
+        }
+        return error;
+}
+/*
+ * gfs2_unlink_ok - check to see that a inode is still in a directory
+ * @dip: the directory
+ * @name: the name of the file
+ * @ip: the inode
+ *
+ * Assumes that the lock on (at least) @dip is held.
+ *
+ * Returns: 0 if the parent/child relationship is correct, errno if it isn't
+ */
+static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
+                          const struct gfs2_inode *ip)
+{
+        int error;
+        if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
+                return -EPERM;
+        if ((dip->i_inode.i_mode & S_ISVTX) &&
+            dip->i_inode.i_uid != current_fsuid() &&
+            ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
+                return -EPERM;
+        if (IS_APPEND(&dip->i_inode))
+                return -EPERM;
+        error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
+        if (error)
+                return error;
+        error = gfs2_dir_check(&dip->i_inode, name, ip);
+        if (error)
+                return error;
+        return 0;
+}
+/**
+ * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it
+ * @dip: The parent directory
+ * @name: The name of the entry in the parent directory
+ * @bh: The inode buffer for the inode to be removed
+ * @inode: The inode to be removed
+ *
+ * Called with all the locks and in a transaction. This will only be
+ * called for a directory after it has been checked to ensure it is empty.
+ *
+ * Returns: 0 on success, or an error
+ */
+static int gfs2_unlink_inode(struct gfs2_inode *dip,
+                             const struct dentry *dentry,
+                             struct buffer_head *bh)
+{
+        struct inode *inode = dentry->d_inode;
+        struct gfs2_inode *ip = GFS2_I(inode);
+        int error;
+        error = gfs2_dir_del(dip, dentry);
+        if (error)
+                return error;
+        ip->i_entries = 0;
+        inode->i_ctime = CURRENT_TIME;
+        if (S_ISDIR(inode->i_mode))
+                clear_nlink(inode);
+        else
+                drop_nlink(inode);
+        gfs2_trans_add_bh(ip->i_gl, bh, 1);
+        gfs2_dinode_out(ip, bh->b_data);
+        mark_inode_dirty(inode);
+        if (inode->i_nlink == 0)
+                gfs2_unlink_di(inode);
+        return 0;
+}
+/**
+ * gfs2_unlink - Unlink an inode (this does rmdir as well)
+ * @dir: The inode of the directory containing the inode to unlink
+ * @dentry: The file itself
+ *
+ * This routine uses the type of the inode as a flag to figure out
+ * whether this is an unlink or an rmdir.
+ *
+ * Returns: errno
+ */
+static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
+{
+        struct gfs2_inode *dip = GFS2_I(dir);
+        struct gfs2_sbd *sdp = GFS2_SB(dir);
+        struct inode *inode = dentry->d_inode;
+        struct gfs2_inode *ip = GFS2_I(inode);
+        struct buffer_head *bh;
+        struct gfs2_holder ghs[3];
+        struct gfs2_rgrpd *rgd;
+        struct gfs2_holder ri_gh;
+        int error;
+        error = gfs2_rindex_hold(sdp, &ri_gh);
+        if (error)
+                return error;
+        gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
+        gfs2_holder_init(ip->i_gl,  LM_ST_EXCLUSIVE, 0, ghs + 1);
+        rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
+        gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
+        error = gfs2_glock_nq(ghs); /* parent */
+        if (error)
+                goto out_parent;
+        error = gfs2_glock_nq(ghs + 1); /* child */
+        if (error)
+                goto out_child;
+        error = -ENOENT;
+        if (inode->i_nlink == 0)
+                goto out_rgrp;
+        if (S_ISDIR(inode->i_mode)) {
+                error = -ENOTEMPTY;
+                if (ip->i_entries > 2 || inode->i_nlink > 2)
+                        goto out_rgrp;
+        }
+        error = gfs2_glock_nq(ghs + 2); /* rgrp */
+        if (error)
+                goto out_rgrp;
+        error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
+        if (error)
+                goto out_gunlock;
+        error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0);
+        if (error)
+                goto out_gunlock;
+        error = gfs2_meta_inode_buffer(ip, &bh);
+        if (error)
+                goto out_end_trans;
+        error = gfs2_unlink_inode(dip, dentry, bh);
+        brelse(bh);
+out_end_trans:
+        gfs2_trans_end(sdp);
+out_gunlock:
+        gfs2_glock_dq(ghs + 2);
+out_rgrp:
+        gfs2_holder_uninit(ghs + 2);
+        gfs2_glock_dq(ghs + 1);
+out_child:
+        gfs2_holder_uninit(ghs + 1);
+        gfs2_glock_dq(ghs);
+out_parent:
+        gfs2_holder_uninit(ghs);
+        gfs2_glock_dq_uninit(&ri_gh);
+        return error;
+}
+/**
+ * gfs2_symlink - Create a symlink
+ * @dir: The directory to create the symlink in
+ * @dentry: The dentry to put the symlink in
+ * @symname: The thing which the link points to
+ *
+ * Returns: errno
+ */
+static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
+                        const char *symname)
+{
+        struct gfs2_sbd *sdp = GFS2_SB(dir);
+        unsigned int size;
+        size = strlen(symname);
+        if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
+                return -ENAMETOOLONG;
+        return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size);
+}
+/**
+ * gfs2_mkdir - Make a directory
+ * @dir: The parent directory of the new one
+ * @dentry: The dentry of the new directory
+ * @mode: The mode of the new directory
+ *
+ * Returns: errno
+ */
+static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+        return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0);
+}
+/**
+ * gfs2_mknod - Make a special file
+ * @dir: The directory in which the special file will reside
+ * @dentry: The dentry of the special file
+ * @mode: The mode of the special file
+ * @dev: The device specification of the special file
+ *
+ */
+static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
+                      dev_t dev)
+{
+        return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0);
+}
+/*
+ * gfs2_ok_to_move - check if it's ok to move a directory to another directory
+ * @this: move this
+ * @to: to here
+ *
+ * Follow @to back to the root and make sure we don't encounter @this
+ * Assumes we already hold the rename lock.
+ *
+ * Returns: errno
+ */
+static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
+{
+        struct inode *dir = &to->i_inode;
+        struct super_block *sb = dir->i_sb;
+        struct inode *tmp;
+        int error = 0;
+        igrab(dir);
+        for (;;) {
+                if (dir == &this->i_inode) {
+                        error = -EINVAL;
+                        break;
+                }
+                if (dir == sb->s_root->d_inode) {
+                        error = 0;
+                        break;
+                }
+                tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1);
+                if (IS_ERR(tmp)) {
+                        error = PTR_ERR(tmp);
+                        break;
+                }
+                iput(dir);
+                dir = tmp;
+        }
+        iput(dir);
+        return error;
+}
+/**
+ * gfs2_rename - Rename a file
+ * @odir: Parent directory of old file name
+ * @odentry: The old dentry of the file
+ * @ndir: Parent directory of new file name
+ * @ndentry: The new dentry of the file
+ *
+ * Returns: errno
+ */
+static int gfs2_rename(struct inode *odir, struct dentry *odentry,
+                       struct inode *ndir, struct dentry *ndentry)
+{
+        struct gfs2_inode *odip = GFS2_I(odir);
+        struct gfs2_inode *ndip = GFS2_I(ndir);
+        struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
+        struct gfs2_inode *nip = NULL;
+        struct gfs2_sbd *sdp = GFS2_SB(odir);
+        struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh;
+        struct gfs2_rgrpd *nrgd;
+        unsigned int num_gh;
+        int dir_rename = 0;
+        int alloc_required = 0;
+        unsigned int x;
+        int error;
+        if (ndentry->d_inode) {
+                nip = GFS2_I(ndentry->d_inode);
+                if (ip == nip)
+                        return 0;
+        }
+        error = gfs2_rindex_hold(sdp, &ri_gh);
+        if (error)
+                return error;
+        if (odip != ndip) {
+                error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
+                                           0, &r_gh);
+                if (error)
+                        goto out;
+                if (S_ISDIR(ip->i_inode.i_mode)) {
+                        dir_rename = 1;
+                        /* don't move a dirctory into it's subdir */
+                        error = gfs2_ok_to_move(ip, ndip);
+                        if (error)
+                                goto out_gunlock_r;
+                }
+        }
+        num_gh = 1;
+        gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
+        if (odip != ndip) {
+                gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
+                num_gh++;
+        }
+        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
+        num_gh++;
+        if (nip) {
+                gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
+                num_gh++;
+                /* grab the resource lock for unlink flag twiddling 
+                 * this is the case of the target file already existing
+                 * so we unlink before doing the rename
+                 */
+                nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
+                if (nrgd)
+                        gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
+        }
+        for (x = 0; x < num_gh; x++) {
+                error = gfs2_glock_nq(ghs + x);
+                if (error)
+                        goto out_gunlock;
+        }
+        error = -ENOENT;
+        if (ip->i_inode.i_nlink == 0)
+                goto out_gunlock;
+        /* Check out the old directory */
+        error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
+        if (error)
+                goto out_gunlock;
+        /* Check out the new directory */
+        if (nip) {
+                error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
+                if (error)
+                        goto out_gunlock;
+                if (nip->i_inode.i_nlink == 0) {
+                        error = -EAGAIN;
+                        goto out_gunlock;
+                }
+                if (S_ISDIR(nip->i_inode.i_mode)) {
+                        if (nip->i_entries < 2) {
+                                gfs2_consist_inode(nip);
+                                error = -EIO;
+                                goto out_gunlock;
+                        }
+                        if (nip->i_entries > 2) {
+                                error = -ENOTEMPTY;
+                                goto out_gunlock;
+                        }
+                }
+        } else {
+                error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
+                if (error)
+                        goto out_gunlock;
+                error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
+                switch (error) {
+                case -ENOENT:
+                        error = 0;
+                        break;
+                case 0:
+                        error = -EEXIST;
+                default:
+                        goto out_gunlock;
+                };
+                if (odip != ndip) {
+                        if (!ndip->i_inode.i_nlink) {
+                                error = -ENOENT;
+                                goto out_gunlock;
+                        }
+                        if (ndip->i_entries == (u32)-1) {
+                                error = -EFBIG;
+                                goto out_gunlock;
+                        }
+                        if (S_ISDIR(ip->i_inode.i_mode) &&
+                            ndip->i_inode.i_nlink == (u32)-1) {
+                                error = -EMLINK;
+                                goto out_gunlock;
+                        }
+                }
+        }
+        /* Check out the dir to be renamed */
+        if (dir_rename) {
+                error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
+                if (error)
+                        goto out_gunlock;
+        }
+        if (nip == NULL)
+                alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
+        error = alloc_required;
+        if (error < 0)
+                goto out_gunlock;
+        error = 0;
+        if (alloc_required) {
+                struct gfs2_alloc *al = gfs2_alloc_get(ndip);
+                if (!al) {
+                        error = -ENOMEM;
+                        goto out_gunlock;
+                }
+                error = gfs2_quota_lock_check(ndip);
+                if (error)
+                        goto out_alloc;
+                al->al_requested = sdp->sd_max_dirres;
+                error = gfs2_inplace_reserve_ri(ndip);
+                if (error)
+                        goto out_gunlock_q;
+                error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
+                                         gfs2_rg_blocks(al) +
+                                         4 * RES_DINODE + 4 * RES_LEAF +
+                                         RES_STATFS + RES_QUOTA + 4, 0);
+                if (error)
+                        goto out_ipreserv;
+        } else {
+                error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
+                                         5 * RES_LEAF + 4, 0);
+                if (error)
+                        goto out_gunlock;
+        }
+        /* Remove the target file, if it exists */
+        if (nip) {
+                struct buffer_head *bh;
+                error = gfs2_meta_inode_buffer(nip, &bh);
+                if (error)
+                        goto out_end_trans;
+                error = gfs2_unlink_inode(ndip, ndentry, bh);
+                brelse(bh);
+        }
+        if (dir_rename) {
+                error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
+                if (error)
+                        goto out_end_trans;
+        } else {
+                struct buffer_head *dibh;
+                error = gfs2_meta_inode_buffer(ip, &dibh);
+                if (error)
+                        goto out_end_trans;
+                ip->i_inode.i_ctime = CURRENT_TIME;
+                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+                gfs2_dinode_out(ip, dibh->b_data);
+                brelse(dibh);
+        }
+        error = gfs2_dir_del(odip, odentry);
+        if (error)
+                goto out_end_trans;
+        error = gfs2_dir_add(ndir, &ndentry->d_name, ip);
+        if (error)
+                goto out_end_trans;
+out_end_trans:
+        gfs2_trans_end(sdp);
+out_ipreserv:
+        if (alloc_required)
+                gfs2_inplace_release(ndip);
+out_gunlock_q:
+        if (alloc_required)
+                gfs2_quota_unlock(ndip);
+out_alloc:
+        if (alloc_required)
+                gfs2_alloc_put(ndip);
+out_gunlock:
+        while (x--) {
+                gfs2_glock_dq(ghs + x);
+                gfs2_holder_uninit(ghs + x);
+        }
+out_gunlock_r:
+        if (r_gh.gh_gl)
+                gfs2_glock_dq_uninit(&r_gh);
+out:
+        gfs2_glock_dq_uninit(&ri_gh);
+        return error;
+}
+/**
+ * gfs2_follow_link - Follow a symbolic link
+ * @dentry: The dentry of the link
+ * @nd: Data that we pass to vfs_follow_link()
+ *
+ * This can handle symlinks of any size.
+ *
+ * Returns: 0 on success or error code
+ */
+static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+        struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+        struct gfs2_holder i_gh;
+        struct buffer_head *dibh;
+        unsigned int size;
+        char *buf;
+        int error;
+        gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
+        error = gfs2_glock_nq(&i_gh);
+        if (error) {
+                gfs2_holder_uninit(&i_gh);
+                nd_set_link(nd, ERR_PTR(error));
+                return NULL;
+        }
+        size = (unsigned int)i_size_read(&ip->i_inode);
+        if (size == 0) {
+                gfs2_consist_inode(ip);
+                buf = ERR_PTR(-EIO);
+                goto out;
+        }
+        error = gfs2_meta_inode_buffer(ip, &dibh);
+        if (error) {
+                buf = ERR_PTR(error);
+                goto out;
+        }
+        buf = kzalloc(size + 1, GFP_NOFS);
+        if (!buf)
+                buf = ERR_PTR(-ENOMEM);
+        else
+                memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size);
+        brelse(dibh);
+out:
+        gfs2_glock_dq_uninit(&i_gh);
+        nd_set_link(nd, buf);
+        return NULL;
+}
+static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
+{
+        char *s = nd_get_link(nd);
+        if (!IS_ERR(s))
+                kfree(s);
+}
+/**
+ * gfs2_permission -
+ * @inode: The inode
+ * @mask: The mask to be tested
+ * @flags: Indicates whether this is an RCU path walk or not
+ *
+ * This may be called from the VFS directly, or from within GFS2 with the
+ * inode locked, so we look to see if the glock is already locked and only
+ * lock the glock if its not already been done.
+ *
+ * Returns: errno
+ */
+int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
+{
+        struct gfs2_inode *ip;
+        struct gfs2_holder i_gh;
+        int error;
+        int unlock = 0;
+        ip = GFS2_I(inode);
+        if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
+                if (flags & IPERM_FLAG_RCU)
+                        return -ECHILD;
+                error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+                if (error)
+                        return error;
+                unlock = 1;
+        }
+        if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
+                error = -EACCES;
+        else
+                error = generic_permission(inode, mask, flags, gfs2_check_acl);
+        if (unlock)
+                gfs2_glock_dq_uninit(&i_gh);
+        return error;
 }
 static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
@@ -928,8 +1594,6 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
 * @ip:
 * @attr:
 *
- * Called with a reference on the vnode.
- *
 * Returns: errno
 */
@@ -949,60 +1613,280 @@ int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
        return error;
 }
-void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
+static int setattr_chown(struct inode *inode, struct iattr *attr)
-{
+{
-        struct gfs2_dinode *str = buf;
+        struct gfs2_inode *ip = GFS2_I(inode);
+        struct gfs2_sbd *sdp = GFS2_SB(inode);
-        str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+        u32 ouid, ogid, nuid, ngid;
-        str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
+        int error;
-        str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
-        str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
+        ouid = inode->i_uid;
-        str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
+        ogid = inode->i_gid;
-        str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
+        nuid = attr->ia_uid;
-        str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
+        ngid = attr->ia_gid;
-        str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
-        str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
+        if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
-        str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
+                ouid = nuid = NO_QUOTA_CHANGE;
-        str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
+        if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
-        str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+                ogid = ngid = NO_QUOTA_CHANGE;
-        str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
-        str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
+        if (!gfs2_alloc_get(ip))
+                return -ENOMEM;
-        str->di_goal_meta = cpu_to_be64(ip->i_goal);
-        str->di_goal_data = cpu_to_be64(ip->i_goal);
+        error = gfs2_quota_lock(ip, nuid, ngid);
-        str->di_generation = cpu_to_be64(ip->i_generation);
+        if (error)
+                goto out_alloc;
-        str->di_flags = cpu_to_be32(ip->i_diskflags);
-        str->di_height = cpu_to_be16(ip->i_height);
+        if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
-        str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
+                error = gfs2_quota_check(ip, nuid, ngid);
-                                             !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
+                if (error)
-                                             GFS2_FORMAT_DE : 0);
+                        goto out_gunlock_q;
-        str->di_depth = cpu_to_be16(ip->i_depth);
+        }
-        str->di_entries = cpu_to_be32(ip->i_entries);
+        error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
-        str->di_eattr = cpu_to_be64(ip->i_eattr);
+        if (error)
-        str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
+                goto out_gunlock_q;
-        str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
-        str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
+        error = gfs2_setattr_simple(ip, attr);
-}
+        if (error)
+                goto out_end_trans;
-void gfs2_dinode_print(const struct gfs2_inode *ip)
-{
+        if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
-        printk(KERN_INFO "  no_formal_ino = %llu\n",
+                u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
-               (unsigned long long)ip->i_no_formal_ino);
+                gfs2_quota_change(ip, -blocks, ouid, ogid);
-        printk(KERN_INFO "  no_addr = %llu\n",
+                gfs2_quota_change(ip, blocks, nuid, ngid);
-               (unsigned long long)ip->i_no_addr);
+        }
-        printk(KERN_INFO "  i_size = %llu\n",
-               (unsigned long long)i_size_read(&ip->i_inode));
+out_end_trans:
-        printk(KERN_INFO "  blocks = %llu\n",
+        gfs2_trans_end(sdp);
-               (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode));
+out_gunlock_q:
-        printk(KERN_INFO "  i_goal = %llu\n",
+        gfs2_quota_unlock(ip);
-               (unsigned long long)ip->i_goal);
+out_alloc:
-        printk(KERN_INFO "  i_diskflags = 0x%.8X\n", ip->i_diskflags);
+        gfs2_alloc_put(ip);
-        printk(KERN_INFO "  i_height = %u\n", ip->i_height);
+        return error;
-        printk(KERN_INFO "  i_depth = %u\n", ip->i_depth);
+}
-        printk(KERN_INFO "  i_entries = %u\n", ip->i_entries);
-        printk(KERN_INFO "  i_eattr = %llu\n",
+/**
-               (unsigned long long)ip->i_eattr);
+ * gfs2_setattr - Change attributes on an inode
+ * @dentry: The dentry which is changing
+ * @attr: The structure describing the change
+ *
+ * The VFS layer wants to change one or more of an inodes attributes.  Write
+ * that change out to disk.
+ *
+ * Returns: errno
+ */
+static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
+{
+        struct inode *inode = dentry->d_inode;
+        struct gfs2_inode *ip = GFS2_I(inode);
+        struct gfs2_holder i_gh;
+        int error;
+        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
+        if (error)
+                return error;
+        error = -EPERM;
+        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+                goto out;
+        error = inode_change_ok(inode, attr);
+        if (error)
+                goto out;
+        if (attr->ia_valid & ATTR_SIZE)
+                error = gfs2_setattr_size(inode, attr->ia_size);
+        else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
+                error = setattr_chown(inode, attr);
+        else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
+                error = gfs2_acl_chmod(ip, attr);
+        else
+                error = gfs2_setattr_simple(ip, attr);
+out:
+        gfs2_glock_dq_uninit(&i_gh);
+        if (!error)
+                mark_inode_dirty(inode);
+        return error;
+}
+/**
+ * gfs2_getattr - Read out an inode's attributes
+ * @mnt: The vfsmount the inode is being accessed from
+ * @dentry: The dentry to stat
+ * @stat: The inode's stats
+ *
+ * This may be called from the VFS directly, or from within GFS2 with the
+ * inode locked, so we look to see if the glock is already locked and only
+ * lock the glock if its not already been done. Note that its the NFS
+ * readdirplus operation which causes this to be called (from filldir)
+ * with the glock already held.
+ *
+ * Returns: errno
+ */
+static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
+                        struct kstat *stat)
+{
+        struct inode *inode = dentry->d_inode;
+        struct gfs2_inode *ip = GFS2_I(inode);
+        struct gfs2_holder gh;
+        int error;
+        int unlock = 0;
+        if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
+                error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+                if (error)
+                        return error;
+                unlock = 1;
+        }
+        generic_fillattr(inode, stat);
+        if (unlock)
+                gfs2_glock_dq_uninit(&gh);
+        return 0;
+}
+static int gfs2_setxattr(struct dentry *dentry, const char *name,
+                         const void *data, size_t size, int flags)
+{
+        struct inode *inode = dentry->d_inode;
+        struct gfs2_inode *ip = GFS2_I(inode);
+        struct gfs2_holder gh;
+        int ret;
+        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+        ret = gfs2_glock_nq(&gh);
+        if (ret == 0) {
+                ret = generic_setxattr(dentry, name, data, size, flags);
+                gfs2_glock_dq(&gh);
+        }
+        gfs2_holder_uninit(&gh);
+        return ret;
+}
+static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
+                             void *data, size_t size)
+{
+        struct inode *inode = dentry->d_inode;
+        struct gfs2_inode *ip = GFS2_I(inode);
+        struct gfs2_holder gh;
+        int ret;
+        gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+        ret = gfs2_glock_nq(&gh);
+        if (ret == 0) {
+                ret = generic_getxattr(dentry, name, data, size);
+                gfs2_glock_dq(&gh);
+        }
+        gfs2_holder_uninit(&gh);
+        return ret;
+}
+static int gfs2_removexattr(struct dentry *dentry, const char *name)
+{
+        struct inode *inode = dentry->d_inode;
+        struct gfs2_inode *ip = GFS2_I(inode);
+        struct gfs2_holder gh;
+        int ret;
+        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+        ret = gfs2_glock_nq(&gh);
+        if (ret == 0) {
+                ret = generic_removexattr(dentry, name);
+                gfs2_glock_dq(&gh);
+        }
+        gfs2_holder_uninit(&gh);
+        return ret;
+}
+static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+                       u64 start, u64 len)
+{
+        struct gfs2_inode *ip = GFS2_I(inode);
+        struct gfs2_holder gh;
+        int ret;
+        ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+        if (ret)
+                return ret;
+        mutex_lock(&inode->i_mutex);
+        ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+        if (ret)
+                goto out;
+        if (gfs2_is_stuffed(ip)) {
+                u64 phys = ip->i_no_addr << inode->i_blkbits;
+                u64 size = i_size_read(inode);
+                u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
+                            FIEMAP_EXTENT_DATA_INLINE;
+                phys += sizeof(struct gfs2_dinode);
+                phys += start;
+                if (start + len > size)
+                        len = size - start;
+                if (start < size)
+                        ret = fiemap_fill_next_extent(fieinfo, start, phys,
+                                                      len, flags);
+                if (ret == 1)
+                        ret = 0;
+        } else {
+                ret = __generic_block_fiemap(inode, fieinfo, start, len,
+                                             gfs2_block_map);
+        }
+        gfs2_glock_dq_uninit(&gh);
+out:
+        mutex_unlock(&inode->i_mutex);
+        return ret;
 }
+const struct inode_operations gfs2_file_iops = {
+        .permission = gfs2_permission,
+        .setattr = gfs2_setattr,
+        .getattr = gfs2_getattr,
+        .setxattr = gfs2_setxattr,
+        .getxattr = gfs2_getxattr,
+        .listxattr = gfs2_listxattr,
+        .removexattr = gfs2_removexattr,
+        .fiemap = gfs2_fiemap,
+};
+const struct inode_operations gfs2_dir_iops = {
+        .create = gfs2_create,
+        .lookup = gfs2_lookup,
+        .link = gfs2_link,
+        .unlink = gfs2_unlink,
+        .symlink = gfs2_symlink,
+        .mkdir = gfs2_mkdir,
+        .rmdir = gfs2_unlink,
+        .mknod = gfs2_mknod,
+        .rename = gfs2_rename,
+        .permission = gfs2_permission,
+        .setattr = gfs2_setattr,
+        .getattr = gfs2_getattr,
+        .setxattr = gfs2_setxattr,
+        .getxattr = gfs2_getxattr,
+        .listxattr = gfs2_listxattr,
+        .removexattr = gfs2_removexattr,
+        .fiemap = gfs2_fiemap,
+};
+const struct inode_operations gfs2_symlink_iops = {
+        .readlink = generic_readlink,
+        .follow_link = gfs2_follow_link,
+        .put_link = gfs2_put_link,
+        .permission = gfs2_permission,
+        .setattr = gfs2_setattr,
+        .getattr = gfs2_getattr,
+        .setxattr = gfs2_setxattr,
+        .getxattr = gfs2_getxattr,
+        .listxattr = gfs2_listxattr,
+        .removexattr = gfs2_removexattr,
+        .fiemap = gfs2_fiemap,
+};
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 099ca305e518..31606076f701 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -102,22 +102,16 @@ extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
 extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
                                         u64 *no_formal_ino,
                                         unsigned int blktype);
-extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
+extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int nonblock);
 extern int gfs2_inode_refresh(struct gfs2_inode *ip);
-extern int gfs2_dinode_dealloc(struct gfs2_inode *inode);
-extern int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
 extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
                                  int is_root);
-extern struct inode *gfs2_createi(struct gfs2_holder *ghs,
-                                  const struct qstr *name,
-                                  unsigned int mode, dev_t dev);
 extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags);
 extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
 extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
 extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
-extern void gfs2_dinode_print(const struct gfs2_inode *ip);
 extern const struct inode_operations gfs2_file_iops;
 extern const struct inode_operations gfs2_dir_iops;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 5b102c1887fd..903115f2bb34 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -18,6 +18,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/bio.h>
+#include <linux/writeback.h>
 #include "gfs2.h"
 #include "incore.h"
@@ -83,55 +84,97 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
 /**
 * gfs2_ail1_start_one - Start I/O on a part of the AIL
 * @sdp: the filesystem
- * @tr: the part of the AIL
+ * @wbc: The writeback control structure
+ * @ai: The ail structure
 *
 */
-static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
+                               struct writeback_control *wbc,
+                               struct gfs2_ail *ai)
 __releases(&sdp->sd_ail_lock)
 __acquires(&sdp->sd_ail_lock)
 {
+        struct gfs2_glock *gl = NULL;
+        struct address_space *mapping;
        struct gfs2_bufdata *bd, *s;
        struct buffer_head *bh;
-        int retry;
-        do {
+        list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, bd_ail_st_list) {
-                retry = 0;
+                bh = bd->bd_bh;
-                list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
+                gfs2_assert(sdp, bd->bd_ail == ai);
-                                                 bd_ail_st_list) {
-                        bh = bd->bd_bh;
-                        gfs2_assert(sdp, bd->bd_ail == ai);
+                if (!buffer_busy(bh)) {
+                        if (!buffer_uptodate(bh))
+                                gfs2_io_error_bh(sdp, bh);
+                        list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+                        continue;
+                }
-                        if (!buffer_busy(bh)) {
+                if (!buffer_dirty(bh))
-                                if (!buffer_uptodate(bh))
+                        continue;
-                                        gfs2_io_error_bh(sdp, bh);
+                if (gl == bd->bd_gl)
-                                list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+                        continue;
-                                continue;
+                gl = bd->bd_gl;
-                        }
+                list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
+                mapping = bh->b_page->mapping;
+                if (!mapping)
+                        continue;
+                spin_unlock(&sdp->sd_ail_lock);
+                generic_writepages(mapping, wbc);
+                spin_lock(&sdp->sd_ail_lock);
+                if (wbc->nr_to_write <= 0)
+                        break;
+                return 1;
+        }
-                        if (!buffer_dirty(bh))
+        return 0;
-                                continue;
+}
-                        list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
-                        get_bh(bh);
+/**
-                        spin_unlock(&sdp->sd_ail_lock);
+ * gfs2_ail1_flush - start writeback of some ail1 entries 
-                        lock_buffer(bh);
+ * @sdp: The super block
-                        if (test_clear_buffer_dirty(bh)) {
+ * @wbc: The writeback control structure
-                                bh->b_end_io = end_buffer_write_sync;
+ *
-                                submit_bh(WRITE_SYNC, bh);
+ * Writes back some ail1 entries, according to the limits in the
-                        } else {
+ * writeback control structure
-                                unlock_buffer(bh);
+ */
-                                brelse(bh);
-                        }
+void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
-                        spin_lock(&sdp->sd_ail_lock);
+{
+        struct list_head *head = &sdp->sd_ail1_list;
-                        retry = 1;
+        struct gfs2_ail *ai;
+        trace_gfs2_ail_flush(sdp, wbc, 1);
+        spin_lock(&sdp->sd_ail_lock);
+restart:
+        list_for_each_entry_reverse(ai, head, ai_list) {
+                if (wbc->nr_to_write <= 0)
                        break;
-                }
+                if (gfs2_ail1_start_one(sdp, wbc, ai))
-        } while (retry);
+                        goto restart;
+        }
+        spin_unlock(&sdp->sd_ail_lock);
+        trace_gfs2_ail_flush(sdp, wbc, 0);
+}
+/**
+ * gfs2_ail1_start - start writeback of all ail1 entries
+ * @sdp: The superblock
+ */
+static void gfs2_ail1_start(struct gfs2_sbd *sdp)
+{
+        struct writeback_control wbc = {
+                .sync_mode = WB_SYNC_NONE,
+                .nr_to_write = LONG_MAX,
+                .range_start = 0,
+                .range_end = LLONG_MAX,
+        };
+        return gfs2_ail1_flush(sdp, &wbc);
 }
 /**
@@ -141,7 +184,7 @@ __acquires(&sdp->sd_ail_lock)
 *
 */
-static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags)
+static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
 {
        struct gfs2_bufdata *bd, *s;
        struct buffer_head *bh;
@@ -149,76 +192,63 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
        list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
                                         bd_ail_st_list) {
                bh = bd->bd_bh;
                gfs2_assert(sdp, bd->bd_ail == ai);
+                if (buffer_busy(bh))
-                if (buffer_busy(bh)) {
+                        continue;
-                        if (flags & DIO_ALL)
-                                continue;
-                        else
-                                break;
-                }
                if (!buffer_uptodate(bh))
                        gfs2_io_error_bh(sdp, bh);
                list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
        }
-        return list_empty(&ai->ai_ail1_list);
 }
-static void gfs2_ail1_start(struct gfs2_sbd *sdp)
+/**
-{
+ * gfs2_ail1_empty - Try to empty the ail1 lists
-        struct list_head *head;
+ * @sdp: The superblock
-        u64 sync_gen;
+ *
-        struct gfs2_ail *ai;
+ * Tries to empty the ail1 lists, starting with the oldest first
-        int done = 0;
+ */
-        spin_lock(&sdp->sd_ail_lock);
-        head = &sdp->sd_ail1_list;
-        if (list_empty(head)) {
-                spin_unlock(&sdp->sd_ail_lock);
-                return;
-        }
-        sync_gen = sdp->sd_ail_sync_gen++;
-        while(!done) {
-                done = 1;
-                list_for_each_entry_reverse(ai, head, ai_list) {
-                        if (ai->ai_sync_gen >= sync_gen)
-                                continue;
-                        ai->ai_sync_gen = sync_gen;
-                        gfs2_ail1_start_one(sdp, ai); /* This may drop ail lock */
-                        done = 0;
-                        break;
-                }
-        }
-        spin_unlock(&sdp->sd_ail_lock);
-}
-static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
+static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
 {
        struct gfs2_ail *ai, *s;
        int ret;
        spin_lock(&sdp->sd_ail_lock);
        list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
-                if (gfs2_ail1_empty_one(sdp, ai, flags))
+                gfs2_ail1_empty_one(sdp, ai);
+                if (list_empty(&ai->ai_ail1_list))
                        list_move(&ai->ai_list, &sdp->sd_ail2_list);
-                else if (!(flags & DIO_ALL))
+                else
                        break;
        }
        ret = list_empty(&sdp->sd_ail1_list);
        spin_unlock(&sdp->sd_ail_lock);
        return ret;
 }
+static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
+{
+        struct gfs2_ail *ai;
+        struct gfs2_bufdata *bd;
+        struct buffer_head *bh;
+        spin_lock(&sdp->sd_ail_lock);
+        list_for_each_entry_reverse(ai, &sdp->sd_ail1_list, ai_list) {
+                list_for_each_entry(bd, &ai->ai_ail1_list, bd_ail_st_list) {
+                        bh = bd->bd_bh;
+                        if (!buffer_locked(bh))
+                                continue;
+                        get_bh(bh);
+                        spin_unlock(&sdp->sd_ail_lock);
+                        wait_on_buffer(bh);
+                        brelse(bh);
+                        return;
+                }
+        }
+        spin_unlock(&sdp->sd_ail_lock);
+}
 /**
 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
@@ -574,7 +604,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
        set_buffer_uptodate(bh);
        clear_buffer_dirty(bh);
-        gfs2_ail1_empty(sdp, 0);
+        gfs2_ail1_empty(sdp);
        tail = current_tail(sdp);
        lh = (struct gfs2_log_header *)bh->b_data;
@@ -869,9 +899,9 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
        gfs2_log_flush(sdp, NULL);
        for (;;) {
                gfs2_ail1_start(sdp);
-                if (gfs2_ail1_empty(sdp, DIO_ALL))
+                gfs2_ail1_wait(sdp);
+                if (gfs2_ail1_empty(sdp))
                        break;
-                msleep(10);
        }
 }
@@ -905,20 +935,20 @@ int gfs2_logd(void *data)
                preflush = atomic_read(&sdp->sd_log_pinned);
                if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
-                        gfs2_ail1_empty(sdp, DIO_ALL);
+                        gfs2_ail1_empty(sdp);
                        gfs2_log_flush(sdp, NULL);
-                        gfs2_ail1_empty(sdp, DIO_ALL);
                }
                if (gfs2_ail_flush_reqd(sdp)) {
                        gfs2_ail1_start(sdp);
-                        io_schedule();
+                        gfs2_ail1_wait(sdp);
-                        gfs2_ail1_empty(sdp, 0);
+                        gfs2_ail1_empty(sdp);
                        gfs2_log_flush(sdp, NULL);
-                        gfs2_ail1_empty(sdp, DIO_ALL);
                }
-                wake_up(&sdp->sd_log_waitq);
+                if (!gfs2_ail_flush_reqd(sdp))
+                        wake_up(&sdp->sd_log_waitq);
                t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
                if (freezing(current))
                        refrigerator();
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 0d007f920234..ab0621698b73 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -12,6 +12,7 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <linux/writeback.h>
 #include "incore.h"
 /**
@@ -59,6 +60,7 @@ extern struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
 extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
 extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
 extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
+extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
 extern void gfs2_log_shutdown(struct gfs2_sbd *sdp);
 extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 51d27f00ebb4..05bbb124699f 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -40,7 +40,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
 {
        struct gfs2_bufdata *bd;
-        gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
+        BUG_ON(!current->journal_info);
        clear_buffer_dirty(bh);
        if (test_set_buffer_pinned(bh))
@@ -65,6 +65,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
 * @sdp: the filesystem the buffer belongs to
 * @bh: The buffer to unpin
 * @ai:
+ * @flags: The inode dirty flags
 *
 */
@@ -73,10 +74,8 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
 {
        struct gfs2_bufdata *bd = bh->b_private;
-        gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
+        BUG_ON(!buffer_uptodate(bh));
+        BUG_ON(!buffer_pinned(bh));
-        if (!buffer_pinned(bh))
-                gfs2_assert_withdraw(sdp, 0);
        lock_buffer(bh);
        mark_buffer_dirty(bh);
@@ -95,8 +94,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
        list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
        spin_unlock(&sdp->sd_ail_lock);
-        if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags))
+        clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
-                gfs2_glock_schedule_for_reclaim(bd->bd_gl);
        trace_gfs2_pin(bd, 0);
        unlock_buffer(bh);
        atomic_dec(&sdp->sd_log_pinned);
@@ -322,12 +320,16 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
 static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
 {
+        struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
+        struct gfs2_glock *gl = bd->bd_gl;
        struct gfs2_trans *tr;
        tr = current->journal_info;
        tr->tr_touched = 1;
        tr->tr_num_revoke++;
        sdp->sd_log_num_revoke++;
+        atomic_inc(&gl->gl_revokes);
+        set_bit(GLF_LFLUSH, &gl->gl_flags);
        list_add(&le->le_list, &sdp->sd_log_le_revoke);
 }
@@ -350,9 +352,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
        ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
        offset = sizeof(struct gfs2_log_descriptor);
-        while (!list_empty(head)) {
+        list_for_each_entry(bd, head, bd_le.le_list) {
-                bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
-                list_del_init(&bd->bd_le.le_list);
                sdp->sd_log_num_revoke--;
                if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
@@ -367,8 +367,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
                }
                *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno);
-                kmem_cache_free(gfs2_bufdata_cachep, bd);
                offset += sizeof(u64);
        }
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
@@ -376,6 +374,22 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
        submit_bh(WRITE_SYNC, bh);
 }
+static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+{
+        struct list_head *head = &sdp->sd_log_le_revoke;
+        struct gfs2_bufdata *bd;
+        struct gfs2_glock *gl;
+        while (!list_empty(head)) {
+                bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
+                list_del_init(&bd->bd_le.le_list);
+                gl = bd->bd_gl;
+                atomic_dec(&gl->gl_revokes);
+                clear_bit(GLF_LFLUSH, &gl->gl_flags);
+                kmem_cache_free(gfs2_bufdata_cachep, bd);
+        }
+}
 static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
                                  struct gfs2_log_header_host *head, int pass)
 {
@@ -749,6 +763,7 @@ const struct gfs2_log_operations gfs2_buf_lops = {
 const struct gfs2_log_operations gfs2_revoke_lops = {
        .lo_add = revoke_lo_add,
        .lo_before_commit = revoke_lo_before_commit,
+        .lo_after_commit = revoke_lo_after_commit,
        .lo_before_scan = revoke_lo_before_scan,
        .lo_scan_elements = revoke_lo_scan_elements,
        .lo_after_scan = revoke_lo_after_scan,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 888a5f5a1a58..cfa327d33194 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -53,6 +53,7 @@ static void gfs2_init_glock_once(void *foo)
        INIT_LIST_HEAD(&gl->gl_lru);
        INIT_LIST_HEAD(&gl->gl_ail_list);
        atomic_set(&gl->gl_ail_count, 0);
+        atomic_set(&gl->gl_revokes, 0);
 }
 static void gfs2_init_gl_aspace_once(void *foo)
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 675349b5a133..747238cd9f96 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -31,6 +31,7 @@
 #include "rgrp.h"
 #include "trans.h"
 #include "util.h"
+#include "trace_gfs2.h"
 static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc)
 {
@@ -310,6 +311,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
        struct gfs2_bufdata *bd = bh->b_private;
        if (test_clear_buffer_pinned(bh)) {
+                trace_gfs2_pin(bd, 0);
                atomic_dec(&sdp->sd_log_pinned);
                list_del_init(&bd->bd_le.le_list);
                if (meta) {
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 6a1d9ba16411..22c526593131 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -77,8 +77,6 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
 #define buffer_busy(bh) \
 ((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
-#define buffer_in_io(bh) \
-((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
 #endif /* __DIO_DOT_H__ */
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index d3c69eb91c74..8ac9ae189b53 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -126,8 +126,10 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 * changed.
 */
-static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
+static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
 {
+        struct gfs2_sb_host *sb = &sdp->sd_sb;
        if (sb->sb_magic != GFS2_MAGIC ||
            sb->sb_type != GFS2_METATYPE_SB) {
                if (!silent)
@@ -157,8 +159,10 @@ static void end_bio_io_page(struct bio *bio, int error)
        unlock_page(page);
 }
-static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
+static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf)
 {
+        struct gfs2_sb_host *sb = &sdp->sd_sb;
+        struct super_block *s = sdp->sd_vfs;
        const struct gfs2_sb *str = buf;
        sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
@@ -175,7 +179,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
        memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
        memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
-        memcpy(sb->sb_uuid, str->sb_uuid, 16);
+        memcpy(s->s_uuid, str->sb_uuid, 16);
 }
 /**
@@ -197,7 +201,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
 * Returns: 0 on success or error
 */
-static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
+static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
 {
        struct super_block *sb = sdp->sd_vfs;
        struct gfs2_sb *p;
@@ -227,10 +231,10 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
                return -EIO;
        }
        p = kmap(page);
-        gfs2_sb_in(&sdp->sd_sb, p);
+        gfs2_sb_in(sdp, p);
        kunmap(page);
        __free_page(page);
-        return 0;
+        return gfs2_check_sb(sdp, silent);
 }
 /**
@@ -247,17 +251,13 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
        unsigned int x;
        int error;
-        error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+        error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
        if (error) {
                if (!silent)
                        fs_err(sdp, "can't read superblock\n");
                return error;
        }
-        error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
-        if (error)
-                return error;
        sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
                               GFS2_BASIC_BLOCK_SHIFT;
        sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
@@ -340,14 +340,10 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
        /*  Try to autodetect  */
        if (!proto[0] || !table[0]) {
-                error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+                error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
                if (error)
                        return error;
-                error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
-                if (error)
-                        goto out;
                if (!proto[0])
                        proto = sdp->sd_sb.sb_lockproto;
                if (!table[0])
@@ -364,7 +360,6 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
        while ((table = strchr(table, '/')))
                *table = '_';
-out:
        return error;
 }
@@ -1119,8 +1114,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
        if (sdp->sd_args.ar_statfs_quantum) {
                sdp->sd_tune.gt_statfs_slow = 0;
                sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum;
-        }
+        } else {
-        else {
                sdp->sd_tune.gt_statfs_slow = 1;
                sdp->sd_tune.gt_statfs_quantum = 30;
        }
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
deleted file mode 100644
index 09e436a50723..000000000000
--- a/fs/gfs2/ops_inode.c
+++ /dev/null
@@ -1,1344 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include <linux/namei.h>
-#include <linux/mm.h>
-#include <linux/xattr.h>
-#include <linux/posix_acl.h>
-#include <linux/gfs2_ondisk.h>
-#include <linux/crc32.h>
-#include <linux/fiemap.h>
-#include <asm/uaccess.h>
-#include "gfs2.h"
-#include "incore.h"
-#include "acl.h"
-#include "bmap.h"
-#include "dir.h"
-#include "xattr.h"
-#include "glock.h"
-#include "inode.h"
-#include "meta_io.h"
-#include "quota.h"
-#include "rgrp.h"
-#include "trans.h"
-#include "util.h"
-#include "super.h"
-/**
- * gfs2_create - Create a file
- * @dir: The directory in which to create the file
- * @dentry: The dentry of the new file
- * @mode: The mode of the new file
- *
- * Returns: errno
- */
-static int gfs2_create(struct inode *dir, struct dentry *dentry,
-                       int mode, struct nameidata *nd)
-{
-        struct gfs2_inode *dip = GFS2_I(dir);
-        struct gfs2_sbd *sdp = GFS2_SB(dir);
-        struct gfs2_holder ghs[2];
-        struct inode *inode;
-        gfs2_holder_init(dip->i_gl, 0, 0, ghs);
-        for (;;) {
-                inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0);
-                if (!IS_ERR(inode)) {
-                        gfs2_trans_end(sdp);
-                        if (dip->i_alloc->al_rgd)
-                                gfs2_inplace_release(dip);
-                        gfs2_quota_unlock(dip);
-                        gfs2_alloc_put(dip);
-                        gfs2_glock_dq_uninit_m(2, ghs);
-                        mark_inode_dirty(inode);
-                        break;
-                } else if (PTR_ERR(inode) != -EEXIST ||
-                           (nd && nd->flags & LOOKUP_EXCL)) {
-                        gfs2_holder_uninit(ghs);
-                        return PTR_ERR(inode);
-                }
-                inode = gfs2_lookupi(dir, &dentry->d_name, 0);
-                if (inode) {
-                        if (!IS_ERR(inode)) {
-                                gfs2_holder_uninit(ghs);
-                                break;
-                        } else {
-                                gfs2_holder_uninit(ghs);
-                                return PTR_ERR(inode);
-                        }
-                }
-        }
-        d_instantiate(dentry, inode);
-        return 0;
-}
-/**
- * gfs2_lookup - Look up a filename in a directory and return its inode
- * @dir: The directory inode
- * @dentry: The dentry of the new inode
- * @nd: passed from Linux VFS, ignored by us
- *
- * Called by the VFS layer. Lock dir and call gfs2_lookupi()
- *
- * Returns: errno
- */
-static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
-                                  struct nameidata *nd)
-{
-        struct inode *inode = NULL;
-        inode = gfs2_lookupi(dir, &dentry->d_name, 0);
-        if (inode && IS_ERR(inode))
-                return ERR_CAST(inode);
-        if (inode) {
-                struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
-                struct gfs2_holder gh;
-                int error;
-                error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
-                if (error) {
-                        iput(inode);
-                        return ERR_PTR(error);
-                }
-                gfs2_glock_dq_uninit(&gh);
-                return d_splice_alias(inode, dentry);
-        }
-        d_add(dentry, inode);
-        return NULL;
-}
-/**
- * gfs2_link - Link to a file
- * @old_dentry: The inode to link
- * @dir: Add link to this directory
- * @dentry: The name of the link
- *
- * Link the inode in "old_dentry" into the directory "dir" with the
- * name in "dentry".
- *
- * Returns: errno
- */
-static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
-                     struct dentry *dentry)
-{
-        struct gfs2_inode *dip = GFS2_I(dir);
-        struct gfs2_sbd *sdp = GFS2_SB(dir);
-        struct inode *inode = old_dentry->d_inode;
-        struct gfs2_inode *ip = GFS2_I(inode);
-        struct gfs2_holder ghs[2];
-        int alloc_required;
-        int error;
-        if (S_ISDIR(inode->i_mode))
-                return -EPERM;
-        gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
-        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
-        error = gfs2_glock_nq(ghs); /* parent */
-        if (error)
-                goto out_parent;
-        error = gfs2_glock_nq(ghs + 1); /* child */
-        if (error)
-                goto out_child;
-        error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
-        if (error)
-                goto out_gunlock;
-        error = gfs2_dir_check(dir, &dentry->d_name, NULL);
-        switch (error) {
-        case -ENOENT:
-                break;
-        case 0:
-                error = -EEXIST;
-        default:
-                goto out_gunlock;
-        }
-        error = -EINVAL;
-        if (!dip->i_inode.i_nlink)
-                goto out_gunlock;
-        error = -EFBIG;
-        if (dip->i_entries == (u32)-1)
-                goto out_gunlock;
-        error = -EPERM;
-        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-                goto out_gunlock;
-        error = -EINVAL;
-        if (!ip->i_inode.i_nlink)
-                goto out_gunlock;
-        error = -EMLINK;
-        if (ip->i_inode.i_nlink == (u32)-1)
-                goto out_gunlock;
-        alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
-        if (error < 0)
-                goto out_gunlock;
-        error = 0;
-        if (alloc_required) {
-                struct gfs2_alloc *al = gfs2_alloc_get(dip);
-                if (!al) {
-                        error = -ENOMEM;
-                        goto out_gunlock;
-                }
-                error = gfs2_quota_lock_check(dip);
-                if (error)
-                        goto out_alloc;
-                al->al_requested = sdp->sd_max_dirres;
-                error = gfs2_inplace_reserve(dip);
-                if (error)
-                        goto out_gunlock_q;
-                error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-                                         gfs2_rg_blocks(al) +
-                                         2 * RES_DINODE + RES_STATFS +
-                                         RES_QUOTA, 0);
-                if (error)
-                        goto out_ipres;
-        } else {
-                error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
-                if (error)
-                        goto out_ipres;
-        }
-        error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode));
-        if (error)
-                goto out_end_trans;
-        error = gfs2_change_nlink(ip, +1);
-out_end_trans:
-        gfs2_trans_end(sdp);
-out_ipres:
-        if (alloc_required)
-                gfs2_inplace_release(dip);
-out_gunlock_q:
-        if (alloc_required)
-                gfs2_quota_unlock(dip);
-out_alloc:
-        if (alloc_required)
-                gfs2_alloc_put(dip);
-out_gunlock:
-        gfs2_glock_dq(ghs + 1);
-out_child:
-        gfs2_glock_dq(ghs);
-out_parent:
-        gfs2_holder_uninit(ghs);
-        gfs2_holder_uninit(ghs + 1);
-        if (!error) {
-                ihold(inode);
-                d_instantiate(dentry, inode);
-                mark_inode_dirty(inode);
-        }
-        return error;
-}
-/*
- * gfs2_unlink_ok - check to see that a inode is still in a directory
- * @dip: the directory
- * @name: the name of the file
- * @ip: the inode
- *
- * Assumes that the lock on (at least) @dip is held.
- *
- * Returns: 0 if the parent/child relationship is correct, errno if it isn't
- */
-static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
-                          const struct gfs2_inode *ip)
-{
-        int error;
-        if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
-                return -EPERM;
-        if ((dip->i_inode.i_mode & S_ISVTX) &&
-            dip->i_inode.i_uid != current_fsuid() &&
-            ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
-                return -EPERM;
-        if (IS_APPEND(&dip->i_inode))
-                return -EPERM;
-        error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
-        if (error)
-                return error;
-        error = gfs2_dir_check(&dip->i_inode, name, ip);
-        if (error)
-                return error;
-        return 0;
-}
-/**
- * gfs2_unlink - Unlink a file
- * @dir: The inode of the directory containing the file to unlink
- * @dentry: The file itself
- *
- * Unlink a file.  Call gfs2_unlinki()
- *
- * Returns: errno
- */
-static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
-{
-        struct gfs2_inode *dip = GFS2_I(dir);
-        struct gfs2_sbd *sdp = GFS2_SB(dir);
-        struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
-        struct gfs2_holder ghs[3];
-        struct gfs2_rgrpd *rgd;
-        struct gfs2_holder ri_gh;
-        int error;
-        error = gfs2_rindex_hold(sdp, &ri_gh);
-        if (error)
-                return error;
-        gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
-        gfs2_holder_init(ip->i_gl,  LM_ST_EXCLUSIVE, 0, ghs + 1);
-        rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
-        gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
-        error = gfs2_glock_nq(ghs); /* parent */
-        if (error)
-                goto out_parent;
-        error = gfs2_glock_nq(ghs + 1); /* child */
-        if (error)
-                goto out_child;
-        error = gfs2_glock_nq(ghs + 2); /* rgrp */
-        if (error)
-                goto out_rgrp;
-        error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
-        if (error)
-                goto out_gunlock;
-        error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
-        if (error)
-                goto out_gunlock;
-        error = gfs2_dir_del(dip, &dentry->d_name);
-        if (error)
-                goto out_end_trans;
-        error = gfs2_change_nlink(ip, -1);
-out_end_trans:
-        gfs2_trans_end(sdp);
-out_gunlock:
-        gfs2_glock_dq(ghs + 2);
-out_rgrp:
-        gfs2_holder_uninit(ghs + 2);
-        gfs2_glock_dq(ghs + 1);
-out_child:
-        gfs2_holder_uninit(ghs + 1);
-        gfs2_glock_dq(ghs);
-out_parent:
-        gfs2_holder_uninit(ghs);
-        gfs2_glock_dq_uninit(&ri_gh);
-        return error;
-}
-/**
- * gfs2_symlink - Create a symlink
- * @dir: The directory to create the symlink in
- * @dentry: The dentry to put the symlink in
- * @symname: The thing which the link points to
- *
- * Returns: errno
- */
-static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
-                        const char *symname)
-{
-        struct gfs2_inode *dip = GFS2_I(dir), *ip;
-        struct gfs2_sbd *sdp = GFS2_SB(dir);
-        struct gfs2_holder ghs[2];
-        struct inode *inode;
-        struct buffer_head *dibh;
-        int size;
-        int error;
-        /* Must be stuffed with a null terminator for gfs2_follow_link() */
-        size = strlen(symname);
-        if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
-                return -ENAMETOOLONG;
-        gfs2_holder_init(dip->i_gl, 0, 0, ghs);
-        inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO, 0);
-        if (IS_ERR(inode)) {
-                gfs2_holder_uninit(ghs);
-                return PTR_ERR(inode);
-        }
-        ip = ghs[1].gh_gl->gl_object;
-        i_size_write(inode, size);
-        error = gfs2_meta_inode_buffer(ip, &dibh);
-        if (!gfs2_assert_withdraw(sdp, !error)) {
-                gfs2_dinode_out(ip, dibh->b_data);
-                memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname,
-                       size);
-                brelse(dibh);
-        }
-        gfs2_trans_end(sdp);
-        if (dip->i_alloc->al_rgd)
-                gfs2_inplace_release(dip);
-        gfs2_quota_unlock(dip);
-        gfs2_alloc_put(dip);
-        gfs2_glock_dq_uninit_m(2, ghs);
-        d_instantiate(dentry, inode);
-        mark_inode_dirty(inode);
-        return 0;
-}
-/**
- * gfs2_mkdir - Make a directory
- * @dir: The parent directory of the new one
- * @dentry: The dentry of the new directory
- * @mode: The mode of the new directory
- *
- * Returns: errno
- */
-static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
-        struct gfs2_inode *dip = GFS2_I(dir), *ip;
-        struct gfs2_sbd *sdp = GFS2_SB(dir);
-        struct gfs2_holder ghs[2];
-        struct inode *inode;
-        struct buffer_head *dibh;
-        int error;
-        gfs2_holder_init(dip->i_gl, 0, 0, ghs);
-        inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode, 0);
-        if (IS_ERR(inode)) {
-                gfs2_holder_uninit(ghs);
-                return PTR_ERR(inode);
-        }
-        ip = ghs[1].gh_gl->gl_object;
-        ip->i_inode.i_nlink = 2;
-        i_size_write(inode, sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode));
-        ip->i_diskflags |= GFS2_DIF_JDATA;
-        ip->i_entries = 2;
-        error = gfs2_meta_inode_buffer(ip, &dibh);
-        if (!gfs2_assert_withdraw(sdp, !error)) {
-                struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
-                struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
-                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-                gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent);
-                dent->de_inum = di->di_num; /* already GFS2 endian */
-                dent->de_type = cpu_to_be16(DT_DIR);
-                di->di_entries = cpu_to_be32(1);
-                dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
-                gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
-                gfs2_inum_out(dip, dent);
-                dent->de_type = cpu_to_be16(DT_DIR);
-                gfs2_dinode_out(ip, di);
-                brelse(dibh);
-        }
-        error = gfs2_change_nlink(dip, +1);
-        gfs2_assert_withdraw(sdp, !error); /* dip already pinned */
-        gfs2_trans_end(sdp);
-        if (dip->i_alloc->al_rgd)
-                gfs2_inplace_release(dip);
-        gfs2_quota_unlock(dip);
-        gfs2_alloc_put(dip);
-        gfs2_glock_dq_uninit_m(2, ghs);
-        d_instantiate(dentry, inode);
-        mark_inode_dirty(inode);
-        return 0;
-}
-/**
- * gfs2_rmdiri - Remove a directory
- * @dip: The parent directory of the directory to be removed
- * @name: The name of the directory to be removed
- * @ip: The GFS2 inode of the directory to be removed
- *
- * Assumes Glocks on dip and ip are held
- *
- * Returns: errno
- */
-static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
-                       struct gfs2_inode *ip)
-{
-        int error;
-        if (ip->i_entries != 2) {
-                if (gfs2_consist_inode(ip))
-                        gfs2_dinode_print(ip);
-                return -EIO;
-        }
-        error = gfs2_dir_del(dip, name);
-        if (error)
-                return error;
-        error = gfs2_change_nlink(dip, -1);
-        if (error)
-                return error;
-        error = gfs2_dir_del(ip, &gfs2_qdot);
-        if (error)
-                return error;
-        error = gfs2_dir_del(ip, &gfs2_qdotdot);
-        if (error)
-                return error;
-        /* It looks odd, but it really should be done twice */
-        error = gfs2_change_nlink(ip, -1);
-        if (error)
-                return error;
-        error = gfs2_change_nlink(ip, -1);
-        if (error)
-                return error;
-        return error;
-}
-/**
- * gfs2_rmdir - Remove a directory
- * @dir: The parent directory of the directory to be removed
- * @dentry: The dentry of the directory to remove
- *
- * Remove a directory. Call gfs2_rmdiri()
- *
- * Returns: errno
- */
-static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
-{
-        struct gfs2_inode *dip = GFS2_I(dir);
-        struct gfs2_sbd *sdp = GFS2_SB(dir);
-        struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
-        struct gfs2_holder ghs[3];
-        struct gfs2_rgrpd *rgd;
-        struct gfs2_holder ri_gh;
-        int error;
-        error = gfs2_rindex_hold(sdp, &ri_gh);
-        if (error)
-                return error;
-        gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
-        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
-        rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
-        gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
-        error = gfs2_glock_nq(ghs); /* parent */
-        if (error)
-                goto out_parent;
-        error = gfs2_glock_nq(ghs + 1); /* child */
-        if (error)
-                goto out_child;
-        error = gfs2_glock_nq(ghs + 2); /* rgrp */
-        if (error)
-                goto out_rgrp;
-        error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
-        if (error)
-                goto out_gunlock;
-        if (ip->i_entries < 2) {
-                if (gfs2_consist_inode(ip))
-                        gfs2_dinode_print(ip);
-                error = -EIO;
-                goto out_gunlock;
-        }
-        if (ip->i_entries > 2) {
-                error = -ENOTEMPTY;
-                goto out_gunlock;
-        }
-        error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF + RES_RG_BIT, 0);
-        if (error)
-                goto out_gunlock;
-        error = gfs2_rmdiri(dip, &dentry->d_name, ip);
-        gfs2_trans_end(sdp);
-out_gunlock:
-        gfs2_glock_dq(ghs + 2);
-out_rgrp:
-        gfs2_holder_uninit(ghs + 2);
-        gfs2_glock_dq(ghs + 1);
-out_child:
-        gfs2_holder_uninit(ghs + 1);
-        gfs2_glock_dq(ghs);
-out_parent:
-        gfs2_holder_uninit(ghs);
-        gfs2_glock_dq_uninit(&ri_gh);
-        return error;
-}
-/**
- * gfs2_mknod - Make a special file
- * @dir: The directory in which the special file will reside
- * @dentry: The dentry of the special file
- * @mode: The mode of the special file
- * @rdev: The device specification of the special file
- *
- */
-static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
-                      dev_t dev)
-{
-        struct gfs2_inode *dip = GFS2_I(dir);
-        struct gfs2_sbd *sdp = GFS2_SB(dir);
-        struct gfs2_holder ghs[2];
-        struct inode *inode;
-        gfs2_holder_init(dip->i_gl, 0, 0, ghs);
-        inode = gfs2_createi(ghs, &dentry->d_name, mode, dev);
-        if (IS_ERR(inode)) {
-                gfs2_holder_uninit(ghs);
-                return PTR_ERR(inode);
-        }
-        gfs2_trans_end(sdp);
-        if (dip->i_alloc->al_rgd)
-                gfs2_inplace_release(dip);
-        gfs2_quota_unlock(dip);
-        gfs2_alloc_put(dip);
-        gfs2_glock_dq_uninit_m(2, ghs);
-        d_instantiate(dentry, inode);
-        mark_inode_dirty(inode);
-        return 0;
-}
-/*
- * gfs2_ok_to_move - check if it's ok to move a directory to another directory
- * @this: move this
- * @to: to here
- *
- * Follow @to back to the root and make sure we don't encounter @this
- * Assumes we already hold the rename lock.
- *
- * Returns: errno
- */
-static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
-{
-        struct inode *dir = &to->i_inode;
-        struct super_block *sb = dir->i_sb;
-        struct inode *tmp;
-        int error = 0;
-        igrab(dir);
-        for (;;) {
-                if (dir == &this->i_inode) {
-                        error = -EINVAL;
-                        break;
-                }
-                if (dir == sb->s_root->d_inode) {
-                        error = 0;
-                        break;
-                }
-                tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1);
-                if (IS_ERR(tmp)) {
-                        error = PTR_ERR(tmp);
-                        break;
-                }
-                iput(dir);
-                dir = tmp;
-        }
-        iput(dir);
-        return error;
-}
-/**
- * gfs2_rename - Rename a file
- * @odir: Parent directory of old file name
- * @odentry: The old dentry of the file
- * @ndir: Parent directory of new file name
- * @ndentry: The new dentry of the file
- *
- * Returns: errno
- */
-static int gfs2_rename(struct inode *odir, struct dentry *odentry,
-                       struct inode *ndir, struct dentry *ndentry)
-{
-        struct gfs2_inode *odip = GFS2_I(odir);
-        struct gfs2_inode *ndip = GFS2_I(ndir);
-        struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
-        struct gfs2_inode *nip = NULL;
-        struct gfs2_sbd *sdp = GFS2_SB(odir);
-        struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh;
-        struct gfs2_rgrpd *nrgd;
-        unsigned int num_gh;
-        int dir_rename = 0;
-        int alloc_required = 0;
-        unsigned int x;
-        int error;
-        if (ndentry->d_inode) {
-                nip = GFS2_I(ndentry->d_inode);
-                if (ip == nip)
-                        return 0;
-        }
-        error = gfs2_rindex_hold(sdp, &ri_gh);
-        if (error)
-                return error;
-        if (odip != ndip) {
-                error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
-                                           0, &r_gh);
-                if (error)
-                        goto out;
-                if (S_ISDIR(ip->i_inode.i_mode)) {
-                        dir_rename = 1;
-                        /* don't move a dirctory into it's subdir */
-                        error = gfs2_ok_to_move(ip, ndip);
-                        if (error)
-                                goto out_gunlock_r;
-                }
-        }
-        num_gh = 1;
-        gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
-        if (odip != ndip) {
-                gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
-                num_gh++;
-        }
-        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
-        num_gh++;
-        if (nip) {
-                gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
-                num_gh++;
-                /* grab the resource lock for unlink flag twiddling 
-                 * this is the case of the target file already existing
-                 * so we unlink before doing the rename
-                 */
-                nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
-                if (nrgd)
-                        gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
-        }
-        for (x = 0; x < num_gh; x++) {
-                error = gfs2_glock_nq(ghs + x);
-                if (error)
-                        goto out_gunlock;
-        }
-        /* Check out the old directory */
-        error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
-        if (error)
-                goto out_gunlock;
-        /* Check out the new directory */
-        if (nip) {
-                error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
-                if (error)
-                        goto out_gunlock;
-                if (S_ISDIR(nip->i_inode.i_mode)) {
-                        if (nip->i_entries < 2) {
-                                if (gfs2_consist_inode(nip))
-                                        gfs2_dinode_print(nip);
-                                error = -EIO;
-                                goto out_gunlock;
-                        }
-                        if (nip->i_entries > 2) {
-                                error = -ENOTEMPTY;
-                                goto out_gunlock;
-                        }
-                }
-        } else {
-                error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
-                if (error)
-                        goto out_gunlock;
-                error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
-                switch (error) {
-                case -ENOENT:
-                        error = 0;
-                        break;
-                case 0:
-                        error = -EEXIST;
-                default:
-                        goto out_gunlock;
-                };
-                if (odip != ndip) {
-                        if (!ndip->i_inode.i_nlink) {
-                                error = -EINVAL;
-                                goto out_gunlock;
-                        }
-                        if (ndip->i_entries == (u32)-1) {
-                                error = -EFBIG;
-                                goto out_gunlock;
-                        }
-                        if (S_ISDIR(ip->i_inode.i_mode) &&
-                            ndip->i_inode.i_nlink == (u32)-1) {
-                                error = -EMLINK;
-                                goto out_gunlock;
-                        }
-                }
-        }
-        /* Check out the dir to be renamed */
-        if (dir_rename) {
-                error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
-                if (error)
-                        goto out_gunlock;
-        }
-        if (nip == NULL)
-                alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
-        error = alloc_required;
-        if (error < 0)
-                goto out_gunlock;
-        error = 0;
-        if (alloc_required) {
-                struct gfs2_alloc *al = gfs2_alloc_get(ndip);
-                if (!al) {
-                        error = -ENOMEM;
-                        goto out_gunlock;
-                }
-                error = gfs2_quota_lock_check(ndip);
-                if (error)
-                        goto out_alloc;
-                al->al_requested = sdp->sd_max_dirres;
-                error = gfs2_inplace_reserve_ri(ndip);
-                if (error)
-                        goto out_gunlock_q;
-                error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-                                         gfs2_rg_blocks(al) +
-                                         4 * RES_DINODE + 4 * RES_LEAF +
-                                         RES_STATFS + RES_QUOTA + 4, 0);
-                if (error)
-                        goto out_ipreserv;
-        } else {
-                error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
-                                         5 * RES_LEAF + 4, 0);
-                if (error)
-                        goto out_gunlock;
-        }
-        /* Remove the target file, if it exists */
-        if (nip) {
-                if (S_ISDIR(nip->i_inode.i_mode))
-                        error = gfs2_rmdiri(ndip, &ndentry->d_name, nip);
-                else {
-                        error = gfs2_dir_del(ndip, &ndentry->d_name);
-                        if (error)
-                                goto out_end_trans;
-                        error = gfs2_change_nlink(nip, -1);
-                }
-                if (error)
-                        goto out_end_trans;
-        }
-        if (dir_rename) {
-                error = gfs2_change_nlink(ndip, +1);
-                if (error)
-                        goto out_end_trans;
-                error = gfs2_change_nlink(odip, -1);
-                if (error)
-                        goto out_end_trans;
-                error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
-                if (error)
-                        goto out_end_trans;
-        } else {
-                struct buffer_head *dibh;
-                error = gfs2_meta_inode_buffer(ip, &dibh);
-                if (error)
-                        goto out_end_trans;
-                ip->i_inode.i_ctime = CURRENT_TIME;
-                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-                gfs2_dinode_out(ip, dibh->b_data);
-                brelse(dibh);
-        }
-        error = gfs2_dir_del(odip, &odentry->d_name);
-        if (error)
-                goto out_end_trans;
-        error = gfs2_dir_add(ndir, &ndentry->d_name, ip, IF2DT(ip->i_inode.i_mode));
-        if (error)
-                goto out_end_trans;
-out_end_trans:
-        gfs2_trans_end(sdp);
-out_ipreserv:
-        if (alloc_required)
-                gfs2_inplace_release(ndip);
-out_gunlock_q:
-        if (alloc_required)
-                gfs2_quota_unlock(ndip);
-out_alloc:
-        if (alloc_required)
-                gfs2_alloc_put(ndip);
-out_gunlock:
-        while (x--) {
-                gfs2_glock_dq(ghs + x);
-                gfs2_holder_uninit(ghs + x);
-        }
-out_gunlock_r:
-        if (r_gh.gh_gl)
-                gfs2_glock_dq_uninit(&r_gh);
-out:
-        gfs2_glock_dq_uninit(&ri_gh);
-        return error;
-}
-/**
- * gfs2_follow_link - Follow a symbolic link
- * @dentry: The dentry of the link
- * @nd: Data that we pass to vfs_follow_link()
- *
- * This can handle symlinks of any size.
- *
- * Returns: 0 on success or error code
- */
-static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-        struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
-        struct gfs2_holder i_gh;
-        struct buffer_head *dibh;
-        unsigned int x, size;
-        char *buf;
-        int error;
-        gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
-        error = gfs2_glock_nq(&i_gh);
-        if (error) {
-                gfs2_holder_uninit(&i_gh);
-                nd_set_link(nd, ERR_PTR(error));
-                return NULL;
-        }
-        size = (unsigned int)i_size_read(&ip->i_inode);
-        if (size == 0) {
-                gfs2_consist_inode(ip);
-                buf = ERR_PTR(-EIO);
-                goto out;
-        }
-        error = gfs2_meta_inode_buffer(ip, &dibh);
-        if (error) {
-                buf = ERR_PTR(error);
-                goto out;
-        }
-        x = size + 1;
-        buf = kmalloc(x, GFP_NOFS);
-        if (!buf)
-                buf = ERR_PTR(-ENOMEM);
-        else
-                memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
-        brelse(dibh);
-out:
-        gfs2_glock_dq_uninit(&i_gh);
-        nd_set_link(nd, buf);
-        return NULL;
-}
-static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
-{
-        char *s = nd_get_link(nd);
-        if (!IS_ERR(s))
-                kfree(s);
-}
-/**
- * gfs2_permission -
- * @inode: The inode
- * @mask: The mask to be tested
- * @flags: Indicates whether this is an RCU path walk or not
- *
- * This may be called from the VFS directly, or from within GFS2 with the
- * inode locked, so we look to see if the glock is already locked and only
- * lock the glock if its not already been done.
- *
- * Returns: errno
- */
-int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
-{
-        struct gfs2_inode *ip;
-        struct gfs2_holder i_gh;
-        int error;
-        int unlock = 0;
-        ip = GFS2_I(inode);
-        if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
-                if (flags & IPERM_FLAG_RCU)
-                        return -ECHILD;
-                error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
-                if (error)
-                        return error;
-                unlock = 1;
-        }
-        if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
-                error = -EACCES;
-        else
-                error = generic_permission(inode, mask, flags, gfs2_check_acl);
-        if (unlock)
-                gfs2_glock_dq_uninit(&i_gh);
-        return error;
-}
-static int setattr_chown(struct inode *inode, struct iattr *attr)
-{
-        struct gfs2_inode *ip = GFS2_I(inode);
-        struct gfs2_sbd *sdp = GFS2_SB(inode);
-        u32 ouid, ogid, nuid, ngid;
-        int error;
-        ouid = inode->i_uid;
-        ogid = inode->i_gid;
-        nuid = attr->ia_uid;
-        ngid = attr->ia_gid;
-        if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
-                ouid = nuid = NO_QUOTA_CHANGE;
-        if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
-                ogid = ngid = NO_QUOTA_CHANGE;
-        if (!gfs2_alloc_get(ip))
-                return -ENOMEM;
-        error = gfs2_quota_lock(ip, nuid, ngid);
-        if (error)
-                goto out_alloc;
-        if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
-                error = gfs2_quota_check(ip, nuid, ngid);
-                if (error)
-                        goto out_gunlock_q;
-        }
-        error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
-        if (error)
-                goto out_gunlock_q;
-        error = gfs2_setattr_simple(ip, attr);
-        if (error)
-                goto out_end_trans;
-        if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
-                u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
-                gfs2_quota_change(ip, -blocks, ouid, ogid);
-                gfs2_quota_change(ip, blocks, nuid, ngid);
-        }
-out_end_trans:
-        gfs2_trans_end(sdp);
-out_gunlock_q:
-        gfs2_quota_unlock(ip);
-out_alloc:
-        gfs2_alloc_put(ip);
-        return error;
-}
-/**
- * gfs2_setattr - Change attributes on an inode
- * @dentry: The dentry which is changing
- * @attr: The structure describing the change
- *
- * The VFS layer wants to change one or more of an inodes attributes.  Write
- * that change out to disk.
- *
- * Returns: errno
- */
-static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
-{
-        struct inode *inode = dentry->d_inode;
-        struct gfs2_inode *ip = GFS2_I(inode);
-        struct gfs2_holder i_gh;
-        int error;
-        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
-        if (error)
-                return error;
-        error = -EPERM;
-        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-                goto out;
-        error = inode_change_ok(inode, attr);
-        if (error)
-                goto out;
-        if (attr->ia_valid & ATTR_SIZE)
-                error = gfs2_setattr_size(inode, attr->ia_size);
-        else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
-                error = setattr_chown(inode, attr);
-        else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
-                error = gfs2_acl_chmod(ip, attr);
-        else
-                error = gfs2_setattr_simple(ip, attr);
-out:
-        gfs2_glock_dq_uninit(&i_gh);
-        if (!error)
-                mark_inode_dirty(inode);
-        return error;
-}
-/**
- * gfs2_getattr - Read out an inode's attributes
- * @mnt: The vfsmount the inode is being accessed from
- * @dentry: The dentry to stat
- * @stat: The inode's stats
- *
- * This may be called from the VFS directly, or from within GFS2 with the
- * inode locked, so we look to see if the glock is already locked and only
- * lock the glock if its not already been done. Note that its the NFS
- * readdirplus operation which causes this to be called (from filldir)
- * with the glock already held.
- *
- * Returns: errno
- */
-static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
-                        struct kstat *stat)
-{
-        struct inode *inode = dentry->d_inode;
-        struct gfs2_inode *ip = GFS2_I(inode);
-        struct gfs2_holder gh;
-        int error;
-        int unlock = 0;
-        if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
-                error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
-                if (error)
-                        return error;
-                unlock = 1;
-        }
-        generic_fillattr(inode, stat);
-        if (unlock)
-                gfs2_glock_dq_uninit(&gh);
-        return 0;
-}
-static int gfs2_setxattr(struct dentry *dentry, const char *name,
-                         const void *data, size_t size, int flags)
-{
-        struct inode *inode = dentry->d_inode;
-        struct gfs2_inode *ip = GFS2_I(inode);
-        struct gfs2_holder gh;
-        int ret;
-        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
-        ret = gfs2_glock_nq(&gh);
-        if (ret == 0) {
-                ret = generic_setxattr(dentry, name, data, size, flags);
-                gfs2_glock_dq(&gh);
-        }
-        gfs2_holder_uninit(&gh);
-        return ret;
-}
-static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
-                             void *data, size_t size)
-{
-        struct inode *inode = dentry->d_inode;
-        struct gfs2_inode *ip = GFS2_I(inode);
-        struct gfs2_holder gh;
-        int ret;
-        gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
-        ret = gfs2_glock_nq(&gh);
-        if (ret == 0) {
-                ret = generic_getxattr(dentry, name, data, size);
-                gfs2_glock_dq(&gh);
-        }
-        gfs2_holder_uninit(&gh);
-        return ret;
-}
-static int gfs2_removexattr(struct dentry *dentry, const char *name)
-{
-        struct inode *inode = dentry->d_inode;
-        struct gfs2_inode *ip = GFS2_I(inode);
-        struct gfs2_holder gh;
-        int ret;
-        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
-        ret = gfs2_glock_nq(&gh);
-        if (ret == 0) {
-                ret = generic_removexattr(dentry, name);
-                gfs2_glock_dq(&gh);
-        }
-        gfs2_holder_uninit(&gh);
-        return ret;
-}
-static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
-                       u64 start, u64 len)
-{
-        struct gfs2_inode *ip = GFS2_I(inode);
-        struct gfs2_holder gh;
-        int ret;
-        ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
-        if (ret)
-                return ret;
-        mutex_lock(&inode->i_mutex);
-        ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
-        if (ret)
-                goto out;
-        if (gfs2_is_stuffed(ip)) {
-                u64 phys = ip->i_no_addr << inode->i_blkbits;
-                u64 size = i_size_read(inode);
-                u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
-                            FIEMAP_EXTENT_DATA_INLINE;
-                phys += sizeof(struct gfs2_dinode);
-                phys += start;
-                if (start + len > size)
-                        len = size - start;
-                if (start < size)
-                        ret = fiemap_fill_next_extent(fieinfo, start, phys,
-                                                      len, flags);
-                if (ret == 1)
-                        ret = 0;
-        } else {
-                ret = __generic_block_fiemap(inode, fieinfo, start, len,
-                                             gfs2_block_map);
-        }
-        gfs2_glock_dq_uninit(&gh);
-out:
-        mutex_unlock(&inode->i_mutex);
-        return ret;
-}
-const struct inode_operations gfs2_file_iops = {
-        .permission = gfs2_permission,
-        .setattr = gfs2_setattr,
-        .getattr = gfs2_getattr,
-        .setxattr = gfs2_setxattr,
-        .getxattr = gfs2_getxattr,
-        .listxattr = gfs2_listxattr,
-        .removexattr = gfs2_removexattr,
-        .fiemap = gfs2_fiemap,
-};
-const struct inode_operations gfs2_dir_iops = {
-        .create = gfs2_create,
-        .lookup = gfs2_lookup,
-        .link = gfs2_link,
-        .unlink = gfs2_unlink,
-        .symlink = gfs2_symlink,
-        .mkdir = gfs2_mkdir,
-        .rmdir = gfs2_rmdir,
-        .mknod = gfs2_mknod,
-        .rename = gfs2_rename,
-        .permission = gfs2_permission,
-        .setattr = gfs2_setattr,
-        .getattr = gfs2_getattr,
-        .setxattr = gfs2_setxattr,
-        .getxattr = gfs2_getxattr,
-        .listxattr = gfs2_listxattr,
-        .removexattr = gfs2_removexattr,
-        .fiemap = gfs2_fiemap,
-};
-const struct inode_operations gfs2_symlink_iops = {
-        .readlink = generic_readlink,
-        .follow_link = gfs2_follow_link,
-        .put_link = gfs2_put_link,
-        .permission = gfs2_permission,
-        .setattr = gfs2_setattr,
-        .getattr = gfs2_getattr,
-        .setxattr = gfs2_setxattr,
-        .getxattr = gfs2_getxattr,
-        .listxattr = gfs2_listxattr,
-        .removexattr = gfs2_removexattr,
-        .fiemap = gfs2_fiemap,
-};
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index e23d9864c418..42e8d23bc047 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -38,6 +38,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
@@ -77,19 +78,20 @@ static LIST_HEAD(qd_lru_list);
 static atomic_t qd_lru_count = ATOMIC_INIT(0);
 static DEFINE_SPINLOCK(qd_lru_lock);
-int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc)
 {
        struct gfs2_quota_data *qd;
        struct gfs2_sbd *sdp;
+        int nr_to_scan = sc->nr_to_scan;
-        if (nr == 0)
+        if (nr_to_scan == 0)
                goto out;
-        if (!(gfp_mask & __GFP_FS))
+        if (!(sc->gfp_mask & __GFP_FS))
                return -1;
        spin_lock(&qd_lru_lock);
-        while (nr && !list_empty(&qd_lru_list)) {
+        while (nr_to_scan && !list_empty(&qd_lru_list)) {
                qd = list_entry(qd_lru_list.next,
                                struct gfs2_quota_data, qd_reclaim);
                sdp = qd->qd_gl->gl_sbd;
@@ -110,7 +112,7 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
                spin_unlock(&qd_lru_lock);
                kmem_cache_free(gfs2_quotad_cachep, qd);
                spin_lock(&qd_lru_lock);
-                nr--;
+                nr_to_scan--;
        }
        spin_unlock(&qd_lru_lock);
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index e7d236ca48bd..90bf1c302a98 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -12,6 +12,7 @@
 struct gfs2_inode;
 struct gfs2_sbd;
+struct shrink_control;
 #define NO_QUOTA_CHANGE ((u32)-1)
@@ -51,7 +52,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
        return ret;
 }
-extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask);
+extern int gfs2_shrink_qd_memory(struct shrinker *shrink,
+                                 struct shrink_control *sc);
 extern const struct quotactl_ops gfs2_quotactl_ops;
 #endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 6fcae8469f6d..9b780df3fd54 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -78,10 +78,11 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
 static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
                               unsigned char *buf2, unsigned int offset,
-                               unsigned int buflen, u32 block,
+                               struct gfs2_bitmap *bi, u32 block,
                               unsigned char new_state)
 {
        unsigned char *byte1, *byte2, *end, cur_state;
+        unsigned int buflen = bi->bi_len;
        const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
        byte1 = buf1 + offset + (block / GFS2_NBBY);
@@ -92,6 +93,16 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
        cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
        if (unlikely(!valid_change[new_state * 4 + cur_state])) {
+                printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, "
+                       "new_state=%d\n",
+                       (unsigned long long)block, cur_state, new_state);
+                printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n",
+                       (unsigned long long)rgd->rd_addr,
+                       (unsigned long)bi->bi_start);
+                printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n",
+                       (unsigned long)bi->bi_offset,
+                       (unsigned long)bi->bi_len);
+                dump_stack();
                gfs2_consist_rgrpd(rgd);
                return;
        }
@@ -381,6 +392,7 @@ static void clear_rgrpdi(struct gfs2_sbd *sdp)
                if (gl) {
                        gl->gl_object = NULL;
+                        gfs2_glock_add_to_lru(gl);
                        gfs2_glock_put(gl);
                }
@@ -1365,7 +1377,7 @@ skip:
        gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
        gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
-                    bi->bi_len, blk, new_state);
+                    bi, blk, new_state);
        goal = blk;
        while (*n < elen) {
                goal++;
@@ -1375,7 +1387,7 @@ skip:
                    GFS2_BLKST_FREE)
                        break;
                gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
-                            bi->bi_len, goal, new_state);
+                            bi, goal, new_state);
                (*n)++;
        }
 out:
@@ -1432,7 +1444,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
                }
                gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
                gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset,
-                            bi->bi_len, buf_blk, new_state);
+                            bi, buf_blk, new_state);
        }
        return rgd;
@@ -1617,6 +1629,10 @@ void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
        gfs2_trans_add_rg(rgd);
+        /* Directories keep their data in the metadata address space */
+        if (ip->i_depth)
+                gfs2_meta_wipe(ip, bstart, blen);
 }
 /**
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index b9f28e66dad1..ed540e7018be 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -23,6 +23,7 @@
 #include <linux/time.h>
 #include <linux/wait.h>
 #include <linux/writeback.h>
+#include <linux/backing-dev.h>
 #include "gfs2.h"
 #include "incore.h"
@@ -700,11 +701,47 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
        mutex_unlock(&sdp->sd_freeze_lock);
 }
+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
+{
+        struct gfs2_dinode *str = buf;
+        str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+        str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
+        str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
+        str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
+        str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
+        str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
+        str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
+        str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
+        str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
+        str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
+        str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
+        str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+        str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
+        str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
+        str->di_goal_meta = cpu_to_be64(ip->i_goal);
+        str->di_goal_data = cpu_to_be64(ip->i_goal);
+        str->di_generation = cpu_to_be64(ip->i_generation);
+        str->di_flags = cpu_to_be32(ip->i_diskflags);
+        str->di_height = cpu_to_be16(ip->i_height);
+        str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
+                                             !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
+                                             GFS2_FORMAT_DE : 0);
+        str->di_depth = cpu_to_be16(ip->i_depth);
+        str->di_entries = cpu_to_be32(ip->i_entries);
+        str->di_eattr = cpu_to_be64(ip->i_eattr);
+        str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
+        str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
+        str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
+}
 /**
 * gfs2_write_inode - Make sure the inode is stable on the disk
 * @inode: The inode
- * @sync: synchronous write flag
+ * @wbc: The writeback control structure
 *
 * Returns: errno
 */
@@ -713,15 +750,17 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
+        struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
+        struct backing_dev_info *bdi = metamapping->backing_dev_info;
        struct gfs2_holder gh;
        struct buffer_head *bh;
        struct timespec atime;
        struct gfs2_dinode *di;
-        int ret = 0;
+        int ret = -EAGAIN;
-        /* Check this is a "normal" inode, etc */
+        /* Skip timestamp update, if this is from a memalloc */
        if (current->flags & PF_MEMALLOC)
-                return 0;
+                goto do_flush;
        ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
        if (ret)
                goto do_flush;
@@ -745,6 +784,13 @@ do_unlock:
 do_flush:
        if (wbc->sync_mode == WB_SYNC_ALL)
                gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
+        filemap_fdatawrite(metamapping);
+        if (bdi->dirty_exceeded)
+                gfs2_ail1_flush(sdp, wbc);
+        if (!ret && (wbc->sync_mode == WB_SYNC_ALL))
+                ret = filemap_fdatawait(metamapping);
+        if (ret)
+                mark_inode_dirty_sync(inode);
        return ret;
 }
@@ -874,8 +920,9 @@ restart:
 static int gfs2_sync_fs(struct super_block *sb, int wait)
 {
-        if (wait && sb->s_fs_info)
+        struct gfs2_sbd *sdp = sb->s_fs_info;
-                gfs2_log_flush(sb->s_fs_info, NULL);
+        if (wait && sdp)
+                gfs2_log_flush(sdp, NULL);
        return 0;
 }
@@ -1308,6 +1355,78 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
        return 0;
 }
+static void gfs2_final_release_pages(struct gfs2_inode *ip)
+{
+        struct inode *inode = &ip->i_inode;
+        struct gfs2_glock *gl = ip->i_gl;
+        truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0);
+        truncate_inode_pages(&inode->i_data, 0);
+        if (atomic_read(&gl->gl_revokes) == 0) {
+                clear_bit(GLF_LFLUSH, &gl->gl_flags);
+                clear_bit(GLF_DIRTY, &gl->gl_flags);
+        }
+}
+static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
+{
+        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+        struct gfs2_alloc *al;
+        struct gfs2_rgrpd *rgd;
+        int error;
+        if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
+                gfs2_consist_inode(ip);
+                return -EIO;
+        }
+        al = gfs2_alloc_get(ip);
+        if (!al)
+                return -ENOMEM;
+        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+        if (error)
+                goto out;
+        error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
+        if (error)
+                goto out_qs;
+        rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
+        if (!rgd) {
+                gfs2_consist_inode(ip);
+                error = -EIO;
+                goto out_rindex_relse;
+        }
+        error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
+                                   &al->al_rgd_gh);
+        if (error)
+                goto out_rindex_relse;
+        error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
+                                 sdp->sd_jdesc->jd_blocks);
+        if (error)
+                goto out_rg_gunlock;
+        gfs2_free_di(rgd, ip);
+        gfs2_final_release_pages(ip);
+        gfs2_trans_end(sdp);
+out_rg_gunlock:
+        gfs2_glock_dq_uninit(&al->al_rgd_gh);
+out_rindex_relse:
+        gfs2_glock_dq_uninit(&al->al_ri_gh);
+out_qs:
+        gfs2_quota_unhold(ip);
+out:
+        gfs2_alloc_put(ip);
+        return error;
+}
 /*
 * We have to (at the moment) hold the inodes main lock to cover
 * the gap between unlocking the shared lock on the iopen lock and
@@ -1371,15 +1490,13 @@ static void gfs2_evict_inode(struct inode *inode)
        }
        error = gfs2_dinode_dealloc(ip);
-        if (error)
+        goto out_unlock;
-                goto out_unlock;
 out_truncate:
        error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
        if (error)
                goto out_unlock;
-        /* Needs to be done before glock release & also in a transaction */
+        gfs2_final_release_pages(ip);
-        truncate_inode_pages(&inode->i_data, 0);
        gfs2_trans_end(sdp);
 out_unlock:
@@ -1394,6 +1511,7 @@ out:
        end_writeback(inode);
        ip->i_gl->gl_object = NULL;
+        gfs2_glock_add_to_lru(ip->i_gl);
        gfs2_glock_put(ip->i_gl);
        ip->i_gl = NULL;
        if (ip->i_iopen_gh.gh_gl) {
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 748ccb557c18..e20eab37bc80 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -81,7 +81,8 @@ static int gfs2_uuid_valid(const u8 *uuid)
 static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
 {
-        const u8 *uuid = sdp->sd_sb.sb_uuid;
+        struct super_block *s = sdp->sd_vfs;
+        const u8 *uuid = s->s_uuid;
        buf[0] = '\0';
        if (!gfs2_uuid_valid(uuid))
                return 0;
@@ -616,7 +617,8 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
                       struct kobj_uevent_env *env)
 {
        struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
-        const u8 *uuid = sdp->sd_sb.sb_uuid;
+        struct super_block *s = sdp->sd_vfs;
+        const u8 *uuid = s->s_uuid;
        add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
        add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index cedb0bb96d96..5d07609ec57d 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -10,6 +10,7 @@
 #include <linux/buffer_head.h>
 #include <linux/dlmconstants.h>
 #include <linux/gfs2_ondisk.h>
+#include <linux/writeback.h>
 #include "incore.h"
 #include "glock.h"
@@ -40,7 +41,9 @@
        {(1UL << GLF_REPLY_PENDING),            "r" },          \
        {(1UL << GLF_INITIAL),                  "I" },          \
        {(1UL << GLF_FROZEN),                   "F" },          \
-        {(1UL << GLF_QUEUED),                   "q" })
+        {(1UL << GLF_QUEUED),                   "q" },          \
+        {(1UL << GLF_LRU),                      "L" },          \
+        {(1UL << GLF_OBJECT),                   "o" })
 #ifndef NUMPTY
 #define NUMPTY
@@ -94,7 +97,7 @@ TRACE_EVENT(gfs2_glock_state_change,
                __entry->new_state      = glock_trace_state(new_state);
                __entry->tgt_state      = glock_trace_state(gl->gl_target);
                __entry->dmt_state      = glock_trace_state(gl->gl_demote_state);
-                __entry->flags          = gl->gl_flags;
+                __entry->flags          = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
        ),
        TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s",
@@ -127,7 +130,7 @@ TRACE_EVENT(gfs2_glock_put,
                __entry->gltype         = gl->gl_name.ln_type;
                __entry->glnum          = gl->gl_name.ln_number;
                __entry->cur_state      = glock_trace_state(gl->gl_state);
-                __entry->flags          = gl->gl_flags;
+                __entry->flags          = gl->gl_flags  | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
        ),
        TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s",
@@ -161,7 +164,7 @@ TRACE_EVENT(gfs2_demote_rq,
                __entry->glnum          = gl->gl_name.ln_number;
                __entry->cur_state      = glock_trace_state(gl->gl_state);
                __entry->dmt_state      = glock_trace_state(gl->gl_demote_state);
-                __entry->flags          = gl->gl_flags;
+                __entry->flags          = gl->gl_flags  | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
        ),
        TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s",
@@ -318,6 +321,33 @@ TRACE_EVENT(gfs2_log_blocks,
                  MINOR(__entry->dev), __entry->blocks)
 );
+/* Writing back the AIL */
+TRACE_EVENT(gfs2_ail_flush,
+        TP_PROTO(const struct gfs2_sbd *sdp, const struct writeback_control *wbc, int start),
+        TP_ARGS(sdp, wbc, start),
+        TP_STRUCT__entry(
+                __field(        dev_t,  dev                     )
+                __field(        int, start                      )
+                __field(        int, sync_mode                  )
+                __field(        long, nr_to_write               )
+        ),
+        TP_fast_assign(
+                __entry->dev            = sdp->sd_vfs->s_dev;
+                __entry->start          = start;
+                __entry->sync_mode      = wbc->sync_mode;
+                __entry->nr_to_write    = wbc->nr_to_write;
+        ),
+        TP_printk("%u,%u ail flush %s %s %ld", MAJOR(__entry->dev),
+                  MINOR(__entry->dev), __entry->start ? "start" : "end",
+                  __entry->sync_mode == WB_SYNC_ALL ? "all" : "none",
+                  __entry->nr_to_write)
+);
 /* Section 3 - bmap
 *
 * Objectives:
diff --git a/fs/hpfs/Kconfig b/fs/hpfs/Kconfig
index 0c39dc3ef7d7..56bd15c5bf6c 100644
--- a/fs/hpfs/Kconfig
+++ b/fs/hpfs/Kconfig
@@ -1,7 +1,6 @@
 config HPFS_FS
        tristate "OS/2 HPFS file system support"
        depends on BLOCK
-        depends on BROKEN || !PREEMPT
        help
          OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS
          is the file system used for organizing files on OS/2 hard disk
diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c
index 5503e2c28910..7a5eb2c718c8 100644
--- a/fs/hpfs/alloc.c
+++ b/fs/hpfs/alloc.c
@@ -8,8 +8,6 @@
 #include "hpfs_fn.h"
-static int hpfs_alloc_if_possible_nolock(struct super_block *s, secno sec);
 /*
 * Check if a sector is allocated in bitmap
 * This is really slow. Turned on only if chk==2
@@ -18,9 +16,9 @@ static int hpfs_alloc_if_possible_nolock(struct super_block *s, secno sec);
 static int chk_if_allocated(struct super_block *s, secno sec, char *msg)
 {
        struct quad_buffer_head qbh;
-        unsigned *bmp;
+        u32 *bmp;
        if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "chk"))) goto fail;
-        if ((bmp[(sec & 0x3fff) >> 5] >> (sec & 0x1f)) & 1) {
+        if ((cpu_to_le32(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f)) & 1) {
                hpfs_error(s, "sector '%s' - %08x not allocated in bitmap", msg, sec);
                goto fail1;
        }
@@ -28,7 +26,7 @@ static int chk_if_allocated(struct super_block *s, secno sec, char *msg)
        if (sec >= hpfs_sb(s)->sb_dirband_start && sec < hpfs_sb(s)->sb_dirband_start + hpfs_sb(s)->sb_dirband_size) {
                unsigned ssec = (sec - hpfs_sb(s)->sb_dirband_start) / 4;
                if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) goto fail;
-                if ((bmp[ssec >> 5] >> (ssec & 0x1f)) & 1) {
+                if ((le32_to_cpu(bmp[ssec >> 5]) >> (ssec & 0x1f)) & 1) {
                        hpfs_error(s, "sector '%s' - %08x not allocated in directory bitmap", msg, sec);
                        goto fail1;
                }
@@ -75,7 +73,6 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
                hpfs_error(s, "Bad allocation size: %d", n);
                return 0;
        }
-        lock_super(s);
        if (bs != ~0x3fff) {
                if (!(bmp = hpfs_map_bitmap(s, near >> 14, &qbh, "aib"))) goto uls;
        } else {
@@ -85,10 +82,6 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
                ret = bs + nr;
                goto rt;
        }
-        /*if (!tstbits(bmp, nr + n, n + forward)) {
-                ret = bs + nr + n;
-                goto rt;
-        }*/
        q = nr + n; b = 0;
        while ((a = tstbits(bmp, q, n + forward)) != 0) {
                q += a;
@@ -105,14 +98,14 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
                goto rt;
        }
        nr >>= 5;
-        /*for (i = nr + 1; i != nr; i++, i &= 0x1ff) {*/
+        /*for (i = nr + 1; i != nr; i++, i &= 0x1ff) */
        i = nr;
        do {
-                if (!bmp[i]) goto cont;
+                if (!le32_to_cpu(bmp[i])) goto cont;
-                if (n + forward >= 0x3f && bmp[i] != -1) goto cont;
+                if (n + forward >= 0x3f && le32_to_cpu(bmp[i]) != 0xffffffff) goto cont;
                q = i<<5;
                if (i > 0) {
-                        unsigned k = bmp[i-1];
+                        unsigned k = le32_to_cpu(bmp[i-1]);
                        while (k & 0x80000000) {
                                q--; k <<= 1;
                        }
@@ -132,18 +125,17 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
        } while (i != nr);
        rt:
        if (ret) {
-                if (hpfs_sb(s)->sb_chk && ((ret >> 14) != (bs >> 14) || (bmp[(ret & 0x3fff) >> 5] | ~(((1 << n) - 1) << (ret & 0x1f))) != 0xffffffff)) {
+                if (hpfs_sb(s)->sb_chk && ((ret >> 14) != (bs >> 14) || (le32_to_cpu(bmp[(ret & 0x3fff) >> 5]) | ~(((1 << n) - 1) << (ret & 0x1f))) != 0xffffffff)) {
                        hpfs_error(s, "Allocation doesn't work! Wanted %d, allocated at %08x", n, ret);
                        ret = 0;
                        goto b;
                }
-                bmp[(ret & 0x3fff) >> 5] &= ~(((1 << n) - 1) << (ret & 0x1f));
+                bmp[(ret & 0x3fff) >> 5] &= cpu_to_le32(~(((1 << n) - 1) << (ret & 0x1f)));
                hpfs_mark_4buffers_dirty(&qbh);
        }
        b:
        hpfs_brelse4(&qbh);
        uls:
-        unlock_super(s);
        return ret;
 }
@@ -155,7 +147,7 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
 *                              sectors
 */
-secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forward, int lock)
+secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forward)
 {
        secno sec;
        int i;
@@ -167,7 +159,6 @@ secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forwa
                forward = -forward;
                f_p = 1;
        }
-        if (lock) hpfs_lock_creation(s);
        n_bmps = (sbi->sb_fs_size + 0x4000 - 1) >> 14;
        if (near && near < sbi->sb_fs_size) {
                if ((sec = alloc_in_bmp(s, near, n, f_p ? forward : forward/4))) goto ret;
@@ -214,18 +205,17 @@ secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forwa
        ret:
        if (sec && f_p) {
                for (i = 0; i < forward; i++) {
-                        if (!hpfs_alloc_if_possible_nolock(s, sec + i + 1)) {
+                        if (!hpfs_alloc_if_possible(s, sec + i + 1)) {
                                hpfs_error(s, "Prealloc doesn't work! Wanted %d, allocated at %08x, can't allocate %d", forward, sec, i);
                                sec = 0;
                                break;
                        }
                }
        }
-        if (lock) hpfs_unlock_creation(s);
        return sec;
 }
-static secno alloc_in_dirband(struct super_block *s, secno near, int lock)
+static secno alloc_in_dirband(struct super_block *s, secno near)
 {
        unsigned nr = near;
        secno sec;
@@ -236,49 +226,35 @@ static secno alloc_in_dirband(struct super_block *s, secno near, int lock)
                nr = sbi->sb_dirband_start + sbi->sb_dirband_size - 4;
        nr -= sbi->sb_dirband_start;
        nr >>= 2;
-        if (lock) hpfs_lock_creation(s);
        sec = alloc_in_bmp(s, (~0x3fff) | nr, 1, 0);
-        if (lock) hpfs_unlock_creation(s);
        if (!sec) return 0;
        return ((sec & 0x3fff) << 2) + sbi->sb_dirband_start;
 }
 /* Alloc sector if it's free */
-static int hpfs_alloc_if_possible_nolock(struct super_block *s, secno sec)
+int hpfs_alloc_if_possible(struct super_block *s, secno sec)
 {
        struct quad_buffer_head qbh;
-        unsigned *bmp;
+        u32 *bmp;
-        lock_super(s);
        if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "aip"))) goto end;
-        if (bmp[(sec & 0x3fff) >> 5] & (1 << (sec & 0x1f))) {
+        if (le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) & (1 << (sec & 0x1f))) {
-                bmp[(sec & 0x3fff) >> 5] &= ~(1 << (sec & 0x1f));
+                bmp[(sec & 0x3fff) >> 5] &= cpu_to_le32(~(1 << (sec & 0x1f)));
                hpfs_mark_4buffers_dirty(&qbh);
                hpfs_brelse4(&qbh);
-                unlock_super(s);
                return 1;
        }
        hpfs_brelse4(&qbh);
        end:
-        unlock_super(s);
        return 0;
 }
-int hpfs_alloc_if_possible(struct super_block *s, secno sec)
-{
-        int r;
-        hpfs_lock_creation(s);
-        r = hpfs_alloc_if_possible_nolock(s, sec);
-        hpfs_unlock_creation(s);
-        return r;
-}
 /* Free sectors in bitmaps */
 void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n)
 {
        struct quad_buffer_head qbh;
-        unsigned *bmp;
+        u32 *bmp;
        struct hpfs_sb_info *sbi = hpfs_sb(s);
        /*printk("2 - ");*/
        if (!n) return;
@@ -286,26 +262,22 @@ void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n)
                hpfs_error(s, "Trying to free reserved sector %08x", sec);
                return;
        }
-        lock_super(s);
        sbi->sb_max_fwd_alloc += n > 0xffff ? 0xffff : n;
        if (sbi->sb_max_fwd_alloc > 0xffffff) sbi->sb_max_fwd_alloc = 0xffffff;
        new_map:
        if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "free"))) {
-                unlock_super(s);
                return;
        }       
        new_tst:
-        if ((bmp[(sec & 0x3fff) >> 5] >> (sec & 0x1f) & 1)) {
+        if ((le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f) & 1)) {
                hpfs_error(s, "sector %08x not allocated", sec);
                hpfs_brelse4(&qbh);
-                unlock_super(s);
                return;
        }
-        bmp[(sec & 0x3fff) >> 5] |= 1 << (sec & 0x1f);
+        bmp[(sec & 0x3fff) >> 5] |= cpu_to_le32(1 << (sec & 0x1f));
        if (!--n) {
                hpfs_mark_4buffers_dirty(&qbh);
                hpfs_brelse4(&qbh);
-                unlock_super(s);
                return;
        }       
        if (!(++sec & 0x3fff)) {
@@ -327,13 +299,13 @@ int hpfs_check_free_dnodes(struct super_block *s, int n)
        int n_bmps = (hpfs_sb(s)->sb_fs_size + 0x4000 - 1) >> 14;
        int b = hpfs_sb(s)->sb_c_bitmap & 0x0fffffff;
        int i, j;
-        unsigned *bmp;
+        u32 *bmp;
        struct quad_buffer_head qbh;
        if ((bmp = hpfs_map_dnode_bitmap(s, &qbh))) {
                for (j = 0; j < 512; j++) {
                        unsigned k;
-                        if (!bmp[j]) continue;
+                        if (!le32_to_cpu(bmp[j])) continue;
-                        for (k = bmp[j]; k; k >>= 1) if (k & 1) if (!--n) {
+                        for (k = le32_to_cpu(bmp[j]); k; k >>= 1) if (k & 1) if (!--n) {
                                hpfs_brelse4(&qbh);
                                return 0;
                        }
@@ -352,10 +324,10 @@ int hpfs_check_free_dnodes(struct super_block *s, int n)
        chk_bmp:
        if (bmp) {
                for (j = 0; j < 512; j++) {
-                        unsigned k;
+                        u32 k;
-                        if (!bmp[j]) continue;
+                        if (!le32_to_cpu(bmp[j])) continue;
                        for (k = 0xf; k; k <<= 4)
-                                if ((bmp[j] & k) == k) {
+                                if ((le32_to_cpu(bmp[j]) & k) == k) {
                                        if (!--n) {
                                                hpfs_brelse4(&qbh);
                                                return 0;
@@ -379,44 +351,40 @@ void hpfs_free_dnode(struct super_block *s, dnode_secno dno)
                hpfs_free_sectors(s, dno, 4);
        } else {
                struct quad_buffer_head qbh;
-                unsigned *bmp;
+                u32 *bmp;
                unsigned ssec = (dno - hpfs_sb(s)->sb_dirband_start) / 4;
-                lock_super(s);
                if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) {
-                        unlock_super(s);
                        return;
                }
-                bmp[ssec >> 5] |= 1 << (ssec & 0x1f);
+                bmp[ssec >> 5] |= cpu_to_le32(1 << (ssec & 0x1f));
                hpfs_mark_4buffers_dirty(&qbh);
                hpfs_brelse4(&qbh);
-                unlock_super(s);
        }
 }
 struct dnode *hpfs_alloc_dnode(struct super_block *s, secno near,
-                         dnode_secno *dno, struct quad_buffer_head *qbh,
+                         dnode_secno *dno, struct quad_buffer_head *qbh)
-                         int lock)
 {
        struct dnode *d;
        if (hpfs_count_one_bitmap(s, hpfs_sb(s)->sb_dmap) > FREE_DNODES_ADD) {
-                if (!(*dno = alloc_in_dirband(s, near, lock)))
+                if (!(*dno = alloc_in_dirband(s, near)))
-                        if (!(*dno = hpfs_alloc_sector(s, near, 4, 0, lock))) return NULL;
+                        if (!(*dno = hpfs_alloc_sector(s, near, 4, 0))) return NULL;
        } else {
-                if (!(*dno = hpfs_alloc_sector(s, near, 4, 0, lock)))
+                if (!(*dno = hpfs_alloc_sector(s, near, 4, 0)))
-                        if (!(*dno = alloc_in_dirband(s, near, lock))) return NULL;
+                        if (!(*dno = alloc_in_dirband(s, near))) return NULL;
        }
        if (!(d = hpfs_get_4sectors(s, *dno, qbh))) {
                hpfs_free_dnode(s, *dno);
                return NULL;
        }
        memset(d, 0, 2048);
-        d->magic = DNODE_MAGIC;
+        d->magic = cpu_to_le32(DNODE_MAGIC);
-        d->first_free = 52;
+        d->first_free = cpu_to_le32(52);
        d->dirent[0] = 32;
        d->dirent[2] = 8;
        d->dirent[30] = 1;
        d->dirent[31] = 255;
-        d->self = *dno;
+        d->self = cpu_to_le32(*dno);
        return d;
 }
@@ -424,16 +392,16 @@ struct fnode *hpfs_alloc_fnode(struct super_block *s, secno near, fnode_secno *f
                          struct buffer_head **bh)
 {
        struct fnode *f;
-        if (!(*fno = hpfs_alloc_sector(s, near, 1, FNODE_ALLOC_FWD, 1))) return NULL;
+        if (!(*fno = hpfs_alloc_sector(s, near, 1, FNODE_ALLOC_FWD))) return NULL;
        if (!(f = hpfs_get_sector(s, *fno, bh))) {
                hpfs_free_sectors(s, *fno, 1);
                return NULL;
        }       
        memset(f, 0, 512);
-        f->magic = FNODE_MAGIC;
+        f->magic = cpu_to_le32(FNODE_MAGIC);
-        f->ea_offs = 0xc4;
+        f->ea_offs = cpu_to_le16(0xc4);
        f->btree.n_free_nodes = 8;
-        f->btree.first_free = 8;
+        f->btree.first_free = cpu_to_le16(8);
        return f;
 }
@@ -441,16 +409,16 @@ struct anode *hpfs_alloc_anode(struct super_block *s, secno near, anode_secno *a
                          struct buffer_head **bh)
 {
        struct anode *a;
-        if (!(*ano = hpfs_alloc_sector(s, near, 1, ANODE_ALLOC_FWD, 1))) return NULL;
+        if (!(*ano = hpfs_alloc_sector(s, near, 1, ANODE_ALLOC_FWD))) return NULL;
        if (!(a = hpfs_get_sector(s, *ano, bh))) {
                hpfs_free_sectors(s, *ano, 1);
                return NULL;
        }
        memset(a, 0, 512);
-        a->magic = ANODE_MAGIC;
+        a->magic = cpu_to_le32(ANODE_MAGIC);
-        a->self = *ano;
+        a->self = cpu_to_le32(*ano);
        a->btree.n_free_nodes = 40;
        a->btree.n_used_nodes = 0;
-        a->btree.first_free = 8;
+        a->btree.first_free = cpu_to_le16(8);
        return a;
 }
diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c
index 6a2f04bf3df0..08b503e8ed29 100644
--- a/fs/hpfs/anode.c
+++ b/fs/hpfs/anode.c
@@ -22,8 +22,8 @@ secno hpfs_bplus_lookup(struct super_block *s, struct inode *inode,
        if (hpfs_sb(s)->sb_chk) if (hpfs_stop_cycles(s, a, &c1, &c2, "hpfs_bplus_lookup")) return -1;
        if (btree->internal) {
                for (i = 0; i < btree->n_used_nodes; i++)
-                        if (btree->u.internal[i].file_secno > sec) {
+                        if (le32_to_cpu(btree->u.internal[i].file_secno) > sec) {
-                                a = btree->u.internal[i].down;
+                                a = le32_to_cpu(btree->u.internal[i].down);
                                brelse(bh);
                                if (!(anode = hpfs_map_anode(s, a, &bh))) return -1;
                                btree = &anode->btree;
@@ -34,18 +34,18 @@ secno hpfs_bplus_lookup(struct super_block *s, struct inode *inode,
                return -1;
        }
        for (i = 0; i < btree->n_used_nodes; i++)
-                if (btree->u.external[i].file_secno <= sec &&
+                if (le32_to_cpu(btree->u.external[i].file_secno) <= sec &&
-                    btree->u.external[i].file_secno + btree->u.external[i].length > sec) {
+                    le32_to_cpu(btree->u.external[i].file_secno) + le32_to_cpu(btree->u.external[i].length) > sec) {
-                        a = btree->u.external[i].disk_secno + sec - btree->u.external[i].file_secno;
+                        a = le32_to_cpu(btree->u.external[i].disk_secno) + sec - le32_to_cpu(btree->u.external[i].file_secno);
                        if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, a, 1, "data")) {
                                brelse(bh);
                                return -1;
                        }
                        if (inode) {
                                struct hpfs_inode_info *hpfs_inode = hpfs_i(inode);
-                                hpfs_inode->i_file_sec = btree->u.external[i].file_secno;
+                                hpfs_inode->i_file_sec = le32_to_cpu(btree->u.external[i].file_secno);
-                                hpfs_inode->i_disk_sec = btree->u.external[i].disk_secno;
+                                hpfs_inode->i_disk_sec = le32_to_cpu(btree->u.external[i].disk_secno);
-                                hpfs_inode->i_n_secs = btree->u.external[i].length;
+                                hpfs_inode->i_n_secs = le32_to_cpu(btree->u.external[i].length);
                        }
                        brelse(bh);
                        return a;
@@ -83,8 +83,8 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
                return -1;
        }
        if (btree->internal) {
-                a = btree->u.internal[n].down;
+                a = le32_to_cpu(btree->u.internal[n].down);
-                btree->u.internal[n].file_secno = -1;
+                btree->u.internal[n].file_secno = cpu_to_le32(-1);
                mark_buffer_dirty(bh);
                brelse(bh);
                if (hpfs_sb(s)->sb_chk)
@@ -94,15 +94,15 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
                goto go_down;
        }
        if (n >= 0) {
-                if (btree->u.external[n].file_secno + btree->u.external[n].length != fsecno) {
+                if (le32_to_cpu(btree->u.external[n].file_secno) + le32_to_cpu(btree->u.external[n].length) != fsecno) {
                        hpfs_error(s, "allocated size %08x, trying to add sector %08x, %cnode %08x",
-                                btree->u.external[n].file_secno + btree->u.external[n].length, fsecno,
+                                le32_to_cpu(btree->u.external[n].file_secno) + le32_to_cpu(btree->u.external[n].length), fsecno,
                                fnod?'f':'a', node);
                        brelse(bh);
                        return -1;
                }
-                if (hpfs_alloc_if_possible(s, se = btree->u.external[n].disk_secno + btree->u.external[n].length)) {
+                if (hpfs_alloc_if_possible(s, se = le32_to_cpu(btree->u.external[n].disk_secno) + le32_to_cpu(btree->u.external[n].length))) {
-                        btree->u.external[n].length++;
+                        btree->u.external[n].length = cpu_to_le32(le32_to_cpu(btree->u.external[n].length) + 1);
                        mark_buffer_dirty(bh);
                        brelse(bh);
                        return se;
@@ -115,20 +115,20 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
                }
                se = !fnod ? node : (node + 16384) & ~16383;
        }       
-        if (!(se = hpfs_alloc_sector(s, se, 1, fsecno*ALLOC_M>ALLOC_FWD_MAX ? ALLOC_FWD_MAX : fsecno*ALLOC_M<ALLOC_FWD_MIN ? ALLOC_FWD_MIN : fsecno*ALLOC_M, 1))) {
+        if (!(se = hpfs_alloc_sector(s, se, 1, fsecno*ALLOC_M>ALLOC_FWD_MAX ? ALLOC_FWD_MAX : fsecno*ALLOC_M<ALLOC_FWD_MIN ? ALLOC_FWD_MIN : fsecno*ALLOC_M))) {
                brelse(bh);
                return -1;
        }
-        fs = n < 0 ? 0 : btree->u.external[n].file_secno + btree->u.external[n].length;
+        fs = n < 0 ? 0 : le32_to_cpu(btree->u.external[n].file_secno) + le32_to_cpu(btree->u.external[n].length);
        if (!btree->n_free_nodes) {
-                up = a != node ? anode->up : -1;
+                up = a != node ? le32_to_cpu(anode->up) : -1;
                if (!(anode = hpfs_alloc_anode(s, a, &na, &bh1))) {
                        brelse(bh);
                        hpfs_free_sectors(s, se, 1);
                        return -1;
                }
                if (a == node && fnod) {
-                        anode->up = node;
+                        anode->up = cpu_to_le32(node);
                        anode->btree.fnode_parent = 1;
                        anode->btree.n_used_nodes = btree->n_used_nodes;
                        anode->btree.first_free = btree->first_free;
@@ -137,9 +137,9 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
                        btree->internal = 1;
                        btree->n_free_nodes = 11;
                        btree->n_used_nodes = 1;
-                        btree->first_free = (char *)&(btree->u.internal[1]) - (char *)btree;
+                        btree->first_free = cpu_to_le16((char *)&(btree->u.internal[1]) - (char *)btree);
-                        btree->u.internal[0].file_secno = -1;
+                        btree->u.internal[0].file_secno = cpu_to_le32(-1);
-                        btree->u.internal[0].down = na;
+                        btree->u.internal[0].down = cpu_to_le32(na);
                        mark_buffer_dirty(bh);
                } else if (!(ranode = hpfs_alloc_anode(s, /*a*/0, &ra, &bh2))) {
                        brelse(bh);
@@ -153,15 +153,15 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
                btree = &anode->btree;
        }
        btree->n_free_nodes--; n = btree->n_used_nodes++;
-        btree->first_free += 12;
+        btree->first_free = cpu_to_le16(le16_to_cpu(btree->first_free) + 12);
-        btree->u.external[n].disk_secno = se;
+        btree->u.external[n].disk_secno = cpu_to_le32(se);
-        btree->u.external[n].file_secno = fs;
+        btree->u.external[n].file_secno = cpu_to_le32(fs);
-        btree->u.external[n].length = 1;
+        btree->u.external[n].length = cpu_to_le32(1);
        mark_buffer_dirty(bh);
        brelse(bh);
        if ((a == node && fnod) || na == -1) return se;
        c2 = 0;
-        while (up != -1) {
+        while (up != (anode_secno)-1) {
                struct anode *new_anode;
                if (hpfs_sb(s)->sb_chk)
                        if (hpfs_stop_cycles(s, up, &c1, &c2, "hpfs_add_sector_to_btree #2")) return -1;
@@ -174,47 +174,47 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
                }
                if (btree->n_free_nodes) {
                        btree->n_free_nodes--; n = btree->n_used_nodes++;
-                        btree->first_free += 8;
+                        btree->first_free = cpu_to_le16(le16_to_cpu(btree->first_free) + 8);
-                        btree->u.internal[n].file_secno = -1;
+                        btree->u.internal[n].file_secno = cpu_to_le32(-1);
-                        btree->u.internal[n].down = na;
+                        btree->u.internal[n].down = cpu_to_le32(na);
-                        btree->u.internal[n-1].file_secno = fs;
+                        btree->u.internal[n-1].file_secno = cpu_to_le32(fs);
                        mark_buffer_dirty(bh);
                        brelse(bh);
                        brelse(bh2);
                        hpfs_free_sectors(s, ra, 1);
                        if ((anode = hpfs_map_anode(s, na, &bh))) {
-                                anode->up = up;
+                                anode->up = cpu_to_le32(up);
                                anode->btree.fnode_parent = up == node && fnod;
                                mark_buffer_dirty(bh);
                                brelse(bh);
                        }
                        return se;
                }
-                up = up != node ? anode->up : -1;
+                up = up != node ? le32_to_cpu(anode->up) : -1;
-                btree->u.internal[btree->n_used_nodes - 1].file_secno = /*fs*/-1;
+                btree->u.internal[btree->n_used_nodes - 1].file_secno = cpu_to_le32(/*fs*/-1);
                mark_buffer_dirty(bh);
                brelse(bh);
                a = na;
                if ((new_anode = hpfs_alloc_anode(s, a, &na, &bh))) {
                        anode = new_anode;
-                        /*anode->up = up != -1 ? up : ra;*/
+                        /*anode->up = cpu_to_le32(up != -1 ? up : ra);*/
                        anode->btree.internal = 1;
                        anode->btree.n_used_nodes = 1;
                        anode->btree.n_free_nodes = 59;
-                        anode->btree.first_free = 16;
+                        anode->btree.first_free = cpu_to_le16(16);
-                        anode->btree.u.internal[0].down = a;
+                        anode->btree.u.internal[0].down = cpu_to_le32(a);
-                        anode->btree.u.internal[0].file_secno = -1;
+                        anode->btree.u.internal[0].file_secno = cpu_to_le32(-1);
                        mark_buffer_dirty(bh);
                        brelse(bh);
                        if ((anode = hpfs_map_anode(s, a, &bh))) {
-                                anode->up = na;
+                                anode->up = cpu_to_le32(na);
                                mark_buffer_dirty(bh);
                                brelse(bh);
                        }
                } else na = a;
        }
        if ((anode = hpfs_map_anode(s, na, &bh))) {
-                anode->up = node;
+                anode->up = cpu_to_le32(node);
                if (fnod) anode->btree.fnode_parent = 1;
                mark_buffer_dirty(bh);
                brelse(bh);
@@ -232,14 +232,14 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
                }
                btree = &fnode->btree;
        }
-        ranode->up = node;
+        ranode->up = cpu_to_le32(node);
-        memcpy(&ranode->btree, btree, btree->first_free);
+        memcpy(&ranode->btree, btree, le16_to_cpu(btree->first_free));
        if (fnod) ranode->btree.fnode_parent = 1;
        ranode->btree.n_free_nodes = (ranode->btree.internal ? 60 : 40) - ranode->btree.n_used_nodes;
        if (ranode->btree.internal) for (n = 0; n < ranode->btree.n_used_nodes; n++) {
                struct anode *unode;
-                if ((unode = hpfs_map_anode(s, ranode->u.internal[n].down, &bh1))) {
+                if ((unode = hpfs_map_anode(s, le32_to_cpu(ranode->u.internal[n].down), &bh1))) {
-                        unode->up = ra;
+                        unode->up = cpu_to_le32(ra);
                        unode->btree.fnode_parent = 0;
                        mark_buffer_dirty(bh1);
                        brelse(bh1);
@@ -248,11 +248,11 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
        btree->internal = 1;
        btree->n_free_nodes = fnod ? 10 : 58;
        btree->n_used_nodes = 2;
-        btree->first_free = (char *)&btree->u.internal[2] - (char *)btree;
+        btree->first_free = cpu_to_le16((char *)&btree->u.internal[2] - (char *)btree);
-        btree->u.internal[0].file_secno = fs;
+        btree->u.internal[0].file_secno = cpu_to_le32(fs);
-        btree->u.internal[0].down = ra;
+        btree->u.internal[0].down = cpu_to_le32(ra);
-        btree->u.internal[1].file_secno = -1;
+        btree->u.internal[1].file_secno = cpu_to_le32(-1);
-        btree->u.internal[1].down = na;
+        btree->u.internal[1].down = cpu_to_le32(na);
        mark_buffer_dirty(bh);
        brelse(bh);
        mark_buffer_dirty(bh2);
@@ -279,7 +279,7 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree)
        go_down:
        d2 = 0;
        while (btree1->internal) {
-                ano = btree1->u.internal[pos].down;
+                ano = le32_to_cpu(btree1->u.internal[pos].down);
                if (level) brelse(bh);
                if (hpfs_sb(s)->sb_chk)
                        if (hpfs_stop_cycles(s, ano, &d1, &d2, "hpfs_remove_btree #1"))
@@ -290,7 +290,7 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree)
                pos = 0;
        }
        for (i = 0; i < btree1->n_used_nodes; i++)
-                hpfs_free_sectors(s, btree1->u.external[i].disk_secno, btree1->u.external[i].length);
+                hpfs_free_sectors(s, le32_to_cpu(btree1->u.external[i].disk_secno), le32_to_cpu(btree1->u.external[i].length));
        go_up:
        if (!level) return;
        brelse(bh);
@@ -298,13 +298,13 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree)
                if (hpfs_stop_cycles(s, ano, &c1, &c2, "hpfs_remove_btree #2")) return;
        hpfs_free_sectors(s, ano, 1);
        oano = ano;
-        ano = anode->up;
+        ano = le32_to_cpu(anode->up);
        if (--level) {
                if (!(anode = hpfs_map_anode(s, ano, &bh))) return;
                btree1 = &anode->btree;
        } else btree1 = btree;
        for (i = 0; i < btree1->n_used_nodes; i++) {
-                if (btree1->u.internal[i].down == oano) {
+                if (le32_to_cpu(btree1->u.internal[i].down) == oano) {
                        if ((pos = i + 1) < btree1->n_used_nodes)
                                goto go_down;
                        else
@@ -411,7 +411,7 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs)
                if (fno) {
                        btree->n_free_nodes = 8;
                        btree->n_used_nodes = 0;
-                        btree->first_free = 8;
+                        btree->first_free = cpu_to_le16(8);
                        btree->internal = 0;
                        mark_buffer_dirty(bh);
                } else hpfs_free_sectors(s, f, 1);
@@ -421,22 +421,22 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs)
        while (btree->internal) {
                nodes = btree->n_used_nodes + btree->n_free_nodes;
                for (i = 0; i < btree->n_used_nodes; i++)
-                        if (btree->u.internal[i].file_secno >= secs) goto f;
+                        if (le32_to_cpu(btree->u.internal[i].file_secno) >= secs) goto f;
                brelse(bh);
                hpfs_error(s, "internal btree %08x doesn't end with -1", node);
                return;
                f:
                for (j = i + 1; j < btree->n_used_nodes; j++)
-                        hpfs_ea_remove(s, btree->u.internal[j].down, 1, 0);
+                        hpfs_ea_remove(s, le32_to_cpu(btree->u.internal[j].down), 1, 0);
                btree->n_used_nodes = i + 1;
                btree->n_free_nodes = nodes - btree->n_used_nodes;
-                btree->first_free = 8 + 8 * btree->n_used_nodes;
+                btree->first_free = cpu_to_le16(8 + 8 * btree->n_used_nodes);
                mark_buffer_dirty(bh);
-                if (btree->u.internal[i].file_secno == secs) {
+                if (btree->u.internal[i].file_secno == cpu_to_le32(secs)) {
                        brelse(bh);
                        return;
                }
-                node = btree->u.internal[i].down;
+                node = le32_to_cpu(btree->u.internal[i].down);
                brelse(bh);
                if (hpfs_sb(s)->sb_chk)
                        if (hpfs_stop_cycles(s, node, &c1, &c2, "hpfs_truncate_btree"))
@@ -446,25 +446,25 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs)
        }       
        nodes = btree->n_used_nodes + btree->n_free_nodes;
        for (i = 0; i < btree->n_used_nodes; i++)
-                if (btree->u.external[i].file_secno + btree->u.external[i].length >= secs) goto ff;
+                if (le32_to_cpu(btree->u.external[i].file_secno) + le32_to_cpu(btree->u.external[i].length) >= secs) goto ff;
        brelse(bh);
        return;
        ff:
-        if (secs <= btree->u.external[i].file_secno) {
+        if (secs <= le32_to_cpu(btree->u.external[i].file_secno)) {
                hpfs_error(s, "there is an allocation error in file %08x, sector %08x", f, secs);
                if (i) i--;
        }
-        else if (btree->u.external[i].file_secno + btree->u.external[i].length > secs) {
+        else if (le32_to_cpu(btree->u.external[i].file_secno) + le32_to_cpu(btree->u.external[i].length) > secs) {
-                hpfs_free_sectors(s, btree->u.external[i].disk_secno + secs -
+                hpfs_free_sectors(s, le32_to_cpu(btree->u.external[i].disk_secno) + secs -
-                        btree->u.external[i].file_secno, btree->u.external[i].length
+                        le32_to_cpu(btree->u.external[i].file_secno), le32_to_cpu(btree->u.external[i].length)
-                        - secs + btree->u.external[i].file_secno); /* I hope gcc optimizes this :-) */
+                        - secs + le32_to_cpu(btree->u.external[i].file_secno)); /* I hope gcc optimizes this :-) */
-                btree->u.external[i].length = secs - btree->u.external[i].file_secno;
+                btree->u.external[i].length = cpu_to_le32(secs - le32_to_cpu(btree->u.external[i].file_secno));
        }
        for (j = i + 1; j < btree->n_used_nodes; j++)
-                hpfs_free_sectors(s, btree->u.external[j].disk_secno, btree->u.external[j].length);
+                hpfs_free_sectors(s, le32_to_cpu(btree->u.external[j].disk_secno), le32_to_cpu(btree->u.external[j].length));
        btree->n_used_nodes = i + 1;
        btree->n_free_nodes = nodes - btree->n_used_nodes;
-        btree->first_free = 8 + 12 * btree->n_used_nodes;
+        btree->first_free = cpu_to_le16(8 + 12 * btree->n_used_nodes);
        mark_buffer_dirty(bh);
        brelse(bh);
 }
@@ -480,12 +480,12 @@ void hpfs_remove_fnode(struct super_block *s, fnode_secno fno)
        struct extended_attribute *ea_end;
        if (!(fnode = hpfs_map_fnode(s, fno, &bh))) return;
        if (!fnode->dirflag) hpfs_remove_btree(s, &fnode->btree);
-        else hpfs_remove_dtree(s, fnode->u.external[0].disk_secno);
+        else hpfs_remove_dtree(s, le32_to_cpu(fnode->u.external[0].disk_secno));
        ea_end = fnode_end_ea(fnode);
        for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea))
                if (ea->indirect)
                        hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea));
-        hpfs_ea_ext_remove(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l);
+        hpfs_ea_ext_remove(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l));
        brelse(bh);
        hpfs_free_sectors(s, fno, 1);
 }
diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c
index 793cb9d943d2..9ecde27d1e29 100644
--- a/fs/hpfs/buffer.c
+++ b/fs/hpfs/buffer.c
@@ -9,22 +9,6 @@
 #include <linux/slab.h>
 #include "hpfs_fn.h"
-void hpfs_lock_creation(struct super_block *s)
-{
-#ifdef DEBUG_LOCKS
-        printk("lock creation\n");
-#endif
-        mutex_lock(&hpfs_sb(s)->hpfs_creation_de);
-}
-void hpfs_unlock_creation(struct super_block *s)
-{
-#ifdef DEBUG_LOCKS
-        printk("unlock creation\n");
-#endif
-        mutex_unlock(&hpfs_sb(s)->hpfs_creation_de);
-}
 /* Map a sector into a buffer and return pointers to it and to the buffer. */
 void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head **bhp,
@@ -32,6 +16,8 @@ void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head
 {
        struct buffer_head *bh;
+        hpfs_lock_assert(s);
        cond_resched();
        *bhp = bh = sb_bread(s, secno);
@@ -50,6 +36,8 @@ void *hpfs_get_sector(struct super_block *s, unsigned secno, struct buffer_head
        struct buffer_head *bh;
        /*return hpfs_map_sector(s, secno, bhp, 0);*/
+        hpfs_lock_assert(s);
        cond_resched();
        if ((*bhp = bh = sb_getblk(s, secno)) != NULL) {
@@ -70,6 +58,8 @@ void *hpfs_map_4sectors(struct super_block *s, unsigned secno, struct quad_buffe
        struct buffer_head *bh;
        char *data;
+        hpfs_lock_assert(s);
        cond_resched();
        if (secno & 3) {
@@ -125,6 +115,8 @@ void *hpfs_get_4sectors(struct super_block *s, unsigned secno,
 {
        cond_resched();
+        hpfs_lock_assert(s);
        if (secno & 3) {
                printk("HPFS: hpfs_get_4sectors: unaligned read\n");
                return NULL;
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index b3d7c0ddb609..f46ae025bfb5 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -88,9 +88,9 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
                        hpfs_error(inode->i_sb, "not a directory, fnode %08lx",
                                        (unsigned long)inode->i_ino);
                }
-                if (hpfs_inode->i_dno != fno->u.external[0].disk_secno) {
+                if (hpfs_inode->i_dno != le32_to_cpu(fno->u.external[0].disk_secno)) {
                        e = 1;
-                        hpfs_error(inode->i_sb, "corrupted inode: i_dno == %08x, fnode -> dnode == %08x", hpfs_inode->i_dno, fno->u.external[0].disk_secno);
+                        hpfs_error(inode->i_sb, "corrupted inode: i_dno == %08x, fnode -> dnode == %08x", hpfs_inode->i_dno, le32_to_cpu(fno->u.external[0].disk_secno));
                }
                brelse(bh);
                if (e) {
@@ -156,7 +156,7 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
                        goto again;
                }
                tempname = hpfs_translate_name(inode->i_sb, de->name, de->namelen, lc, de->not_8x3);
-                if (filldir(dirent, tempname, de->namelen, old_pos, de->fnode, DT_UNKNOWN) < 0) {
+                if (filldir(dirent, tempname, de->namelen, old_pos, le32_to_cpu(de->fnode), DT_UNKNOWN) < 0) {
                        filp->f_pos = old_pos;
                        if (tempname != de->name) kfree(tempname);
                        hpfs_brelse4(&qbh);
@@ -221,7 +221,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
         * Get inode number, what we're after.
         */
-        ino = de->fnode;
+        ino = le32_to_cpu(de->fnode);
        /*
         * Go find or make an inode.
@@ -236,7 +236,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
                hpfs_init_inode(result);
                if (de->directory)
                        hpfs_read_inode(result);
-                else if (de->ea_size && hpfs_sb(dir->i_sb)->sb_eas)
+                else if (le32_to_cpu(de->ea_size) && hpfs_sb(dir->i_sb)->sb_eas)
                        hpfs_read_inode(result);
                else {
                        result->i_mode |= S_IFREG;
@@ -250,8 +250,6 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
        hpfs_result = hpfs_i(result);
        if (!de->directory) hpfs_result->i_parent_dir = dir->i_ino;
-        hpfs_decide_conv(result, name, len);
        if (de->has_acl || de->has_xtd_perm) if (!(dir->i_sb->s_flags & MS_RDONLY)) {
                hpfs_error(result->i_sb, "ACLs or XPERM found. This is probably HPFS386. This driver doesn't support it now. Send me some info on these structures");
                goto bail1;
@@ -263,19 +261,19 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
         */
        if (!result->i_ctime.tv_sec) {
-                if (!(result->i_ctime.tv_sec = local_to_gmt(dir->i_sb, de->creation_date)))
+                if (!(result->i_ctime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->creation_date))))
                        result->i_ctime.tv_sec = 1;
                result->i_ctime.tv_nsec = 0;
-                result->i_mtime.tv_sec = local_to_gmt(dir->i_sb, de->write_date);
+                result->i_mtime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->write_date));
                result->i_mtime.tv_nsec = 0;
-                result->i_atime.tv_sec = local_to_gmt(dir->i_sb, de->read_date);
+                result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->read_date));
                result->i_atime.tv_nsec = 0;
-                hpfs_result->i_ea_size = de->ea_size;
+                hpfs_result->i_ea_size = le32_to_cpu(de->ea_size);
                if (!hpfs_result->i_ea_mode && de->read_only)
                        result->i_mode &= ~0222;
                if (!de->directory) {
                        if (result->i_size == -1) {
-                                result->i_size = de->file_size;
+                                result->i_size = le32_to_cpu(de->file_size);
                                result->i_data.a_ops = &hpfs_aops;
                                hpfs_i(result)->mmu_private = result->i_size;
                        /*
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index 9b2ffadfc8c4..1e0e2ac30fd3 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -14,11 +14,11 @@ static loff_t get_pos(struct dnode *d, struct hpfs_dirent *fde)
        struct hpfs_dirent *de_end = dnode_end_de(d);
        int i = 1;
        for (de = dnode_first_de(d); de < de_end; de = de_next_de(de)) {
-                if (de == fde) return ((loff_t) d->self << 4) | (loff_t)i;
+                if (de == fde) return ((loff_t) le32_to_cpu(d->self) << 4) | (loff_t)i;
                i++;
        }
        printk("HPFS: get_pos: not_found\n");
-        return ((loff_t)d->self << 4) | (loff_t)1;
+        return ((loff_t)le32_to_cpu(d->self) << 4) | (loff_t)1;
 }
 void hpfs_add_pos(struct inode *inode, loff_t *pos)
@@ -130,29 +130,30 @@ static void set_last_pointer(struct super_block *s, struct dnode *d, dnode_secno
 {
        struct hpfs_dirent *de;
        if (!(de = dnode_last_de(d))) {
-                hpfs_error(s, "set_last_pointer: empty dnode %08x", d->self);
+                hpfs_error(s, "set_last_pointer: empty dnode %08x", le32_to_cpu(d->self));
                return;
        }
        if (hpfs_sb(s)->sb_chk) {
                if (de->down) {
                        hpfs_error(s, "set_last_pointer: dnode %08x has already last pointer %08x",
-                                d->self, de_down_pointer(de));
+                                le32_to_cpu(d->self), de_down_pointer(de));
                        return;
                }
-                if (de->length != 32) {
+                if (le16_to_cpu(de->length) != 32) {
-                        hpfs_error(s, "set_last_pointer: bad last dirent in dnode %08x", d->self);
+                        hpfs_error(s, "set_last_pointer: bad last dirent in dnode %08x", le32_to_cpu(d->self));
                        return;
                }
        }
        if (ptr) {
-                if ((d->first_free += 4) > 2048) {
+                d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) + 4);
-                        hpfs_error(s,"set_last_pointer: too long dnode %08x", d->self);
+                if (le32_to_cpu(d->first_free) > 2048) {
-                        d->first_free -= 4;
+                        hpfs_error(s, "set_last_pointer: too long dnode %08x", le32_to_cpu(d->self));
+                        d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) - 4);
                        return;
                }
-                de->length = 36;
+                de->length = cpu_to_le16(36);
                de->down = 1;
-                *(dnode_secno *)((char *)de + 32) = ptr;
+                *(dnode_secno *)((char *)de + 32) = cpu_to_le32(ptr);
        }
 }
@@ -168,7 +169,7 @@ struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d,
        for (de = dnode_first_de(d); de < de_end; de = de_next_de(de)) {
                int c = hpfs_compare_names(s, name, namelen, de->name, de->namelen, de->last);
                if (!c) {
-                        hpfs_error(s, "name (%c,%d) already exists in dnode %08x", *name, namelen, d->self);
+                        hpfs_error(s, "name (%c,%d) already exists in dnode %08x", *name, namelen, le32_to_cpu(d->self));
                        return NULL;
                }
                if (c < 0) break;
@@ -176,15 +177,14 @@ struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d,
        memmove((char *)de + d_size, de, (char *)de_end - (char *)de);
        memset(de, 0, d_size);
        if (down_ptr) {
-                *(int *)((char *)de + d_size - 4) = down_ptr;
+                *(dnode_secno *)((char *)de + d_size - 4) = cpu_to_le32(down_ptr);
                de->down = 1;
        }
-        de->length = d_size;
+        de->length = cpu_to_le16(d_size);
-        if (down_ptr) de->down = 1;
        de->not_8x3 = hpfs_is_name_long(name, namelen);
        de->namelen = namelen;
        memcpy(de->name, name, namelen);
-        d->first_free += d_size;
+        d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) + d_size);
        return de;
 }
@@ -194,25 +194,25 @@ static void hpfs_delete_de(struct super_block *s, struct dnode *d,
                           struct hpfs_dirent *de)
 {
        if (de->last) {
-                hpfs_error(s, "attempt to delete last dirent in dnode %08x", d->self);
+                hpfs_error(s, "attempt to delete last dirent in dnode %08x", le32_to_cpu(d->self));
                return;
        }
-        d->first_free -= de->length;
+        d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) - le16_to_cpu(de->length));
-        memmove(de, de_next_de(de), d->first_free + (char *)d - (char *)de);
+        memmove(de, de_next_de(de), le32_to_cpu(d->first_free) + (char *)d - (char *)de);
 }
 static void fix_up_ptrs(struct super_block *s, struct dnode *d)
 {
        struct hpfs_dirent *de;
        struct hpfs_dirent *de_end = dnode_end_de(d);
-        dnode_secno dno = d->self;
+        dnode_secno dno = le32_to_cpu(d->self);
        for (de = dnode_first_de(d); de < de_end; de = de_next_de(de))
                if (de->down) {
                        struct quad_buffer_head qbh;
                        struct dnode *dd;
                        if ((dd = hpfs_map_dnode(s, de_down_pointer(de), &qbh))) {
-                                if (dd->up != dno || dd->root_dnode) {
+                                if (le32_to_cpu(dd->up) != dno || dd->root_dnode) {
-                                        dd->up = dno;
+                                        dd->up = cpu_to_le32(dno);
                                        dd->root_dnode = 0;
                                        hpfs_mark_4buffers_dirty(&qbh);
                                }
@@ -262,7 +262,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
                        kfree(nname);
                        return 1;
                }
-        if (d->first_free + de_size(namelen, down_ptr) <= 2048) {
+        if (le32_to_cpu(d->first_free) + de_size(namelen, down_ptr) <= 2048) {
                loff_t t;
                copy_de(de=hpfs_add_de(i->i_sb, d, name, namelen, down_ptr), new_de);
                t = get_pos(d, de);
@@ -286,11 +286,11 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
                kfree(nname);
                return 1;
        }       
-        memcpy(nd, d, d->first_free);
+        memcpy(nd, d, le32_to_cpu(d->first_free));
        copy_de(de = hpfs_add_de(i->i_sb, nd, name, namelen, down_ptr), new_de);
        for_all_poss(i, hpfs_pos_ins, get_pos(nd, de), 1);
        h = ((char *)dnode_last_de(nd) - (char *)nd) / 2 + 10;
-        if (!(ad = hpfs_alloc_dnode(i->i_sb, d->up, &adno, &qbh1, 0))) {
+        if (!(ad = hpfs_alloc_dnode(i->i_sb, le32_to_cpu(d->up), &adno, &qbh1))) {
                hpfs_error(i->i_sb, "unable to alloc dnode - dnode tree will be corrupted");
                hpfs_brelse4(&qbh);
                kfree(nd);
@@ -313,20 +313,21 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
        down_ptr = adno;
        set_last_pointer(i->i_sb, ad, de->down ? de_down_pointer(de) : 0);
        de = de_next_de(de);
-        memmove((char *)nd + 20, de, nd->first_free + (char *)nd - (char *)de);
+        memmove((char *)nd + 20, de, le32_to_cpu(nd->first_free) + (char *)nd - (char *)de);
-        nd->first_free -= (char *)de - (char *)nd - 20;
+        nd->first_free = cpu_to_le32(le32_to_cpu(nd->first_free) - ((char *)de - (char *)nd - 20));
-        memcpy(d, nd, nd->first_free);
+        memcpy(d, nd, le32_to_cpu(nd->first_free));
        for_all_poss(i, hpfs_pos_del, (loff_t)dno << 4, pos);
        fix_up_ptrs(i->i_sb, ad);
        if (!d->root_dnode) {
-                dno = ad->up = d->up;
+                ad->up = d->up;
+                dno = le32_to_cpu(ad->up);
                hpfs_mark_4buffers_dirty(&qbh);
                hpfs_brelse4(&qbh);
                hpfs_mark_4buffers_dirty(&qbh1);
                hpfs_brelse4(&qbh1);
                goto go_up;
        }
-        if (!(rd = hpfs_alloc_dnode(i->i_sb, d->up, &rdno, &qbh2, 0))) {
+        if (!(rd = hpfs_alloc_dnode(i->i_sb, le32_to_cpu(d->up), &rdno, &qbh2))) {
                hpfs_error(i->i_sb, "unable to alloc dnode - dnode tree will be corrupted");
                hpfs_brelse4(&qbh);
                hpfs_brelse4(&qbh1);
@@ -338,7 +339,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
        i->i_blocks += 4;
        rd->root_dnode = 1;
        rd->up = d->up;
-        if (!(fnode = hpfs_map_fnode(i->i_sb, d->up, &bh))) {
+        if (!(fnode = hpfs_map_fnode(i->i_sb, le32_to_cpu(d->up), &bh))) {
                hpfs_free_dnode(i->i_sb, rdno);
                hpfs_brelse4(&qbh);
                hpfs_brelse4(&qbh1);
@@ -347,10 +348,11 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
                kfree(nname);
                return 1;
        }
-        fnode->u.external[0].disk_secno = rdno;
+        fnode->u.external[0].disk_secno = cpu_to_le32(rdno);
        mark_buffer_dirty(bh);
        brelse(bh);
-        d->up = ad->up = hpfs_i(i)->i_dno = rdno;
+        hpfs_i(i)->i_dno = rdno;
+        d->up = ad->up = cpu_to_le32(rdno);
        d->root_dnode = ad->root_dnode = 0;
        hpfs_mark_4buffers_dirty(&qbh);
        hpfs_brelse4(&qbh);
@@ -373,7 +375,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
 int hpfs_add_dirent(struct inode *i,
                    const unsigned char *name, unsigned namelen,
-                    struct hpfs_dirent *new_de, int cdepth)
+                    struct hpfs_dirent *new_de)
 {
        struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
        struct dnode *d;
@@ -403,7 +405,6 @@ int hpfs_add_dirent(struct inode *i,
                }
        }
        hpfs_brelse4(&qbh);
-        if (!cdepth) hpfs_lock_creation(i->i_sb);
        if (hpfs_check_free_dnodes(i->i_sb, FREE_DNODES_ADD)) {
                c = 1;
                goto ret;
@@ -411,7 +412,6 @@ int hpfs_add_dirent(struct inode *i,
        i->i_version++;
        c = hpfs_add_to_dnode(i, dno, name, namelen, new_de, 0);
        ret:
-        if (!cdepth) hpfs_unlock_creation(i->i_sb);
        return c;
 }
@@ -437,9 +437,9 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
                                return 0;
                if (!(dnode = hpfs_map_dnode(i->i_sb, dno, &qbh))) return 0;
                if (hpfs_sb(i->i_sb)->sb_chk) {
-                        if (dnode->up != chk_up) {
+                        if (le32_to_cpu(dnode->up) != chk_up) {
                                hpfs_error(i->i_sb, "move_to_top: up pointer from %08x should be %08x, is %08x",
-                                        dno, chk_up, dnode->up);
+                                        dno, chk_up, le32_to_cpu(dnode->up));
                                hpfs_brelse4(&qbh);
                                return 0;
                        }
@@ -455,7 +455,7 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
                hpfs_brelse4(&qbh);
        }
        while (!(de = dnode_pre_last_de(dnode))) {
-                dnode_secno up = dnode->up;
+                dnode_secno up = le32_to_cpu(dnode->up);
                hpfs_brelse4(&qbh);
                hpfs_free_dnode(i->i_sb, dno);
                i->i_size -= 2048;
@@ -474,8 +474,8 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
                        hpfs_brelse4(&qbh);
                        return 0;
                }
-                dnode->first_free -= 4;
+                dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) - 4);
-                de->length -= 4;
+                de->length = cpu_to_le16(le16_to_cpu(de->length) - 4);
                de->down = 0;
                hpfs_mark_4buffers_dirty(&qbh);
                dno = up;
@@ -483,12 +483,12 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
        t = get_pos(dnode, de);
        for_all_poss(i, hpfs_pos_subst, t, 4);
        for_all_poss(i, hpfs_pos_subst, t + 1, 5);
-        if (!(nde = kmalloc(de->length, GFP_NOFS))) {
+        if (!(nde = kmalloc(le16_to_cpu(de->length), GFP_NOFS))) {
                hpfs_error(i->i_sb, "out of memory for dirent - directory will be corrupted");
                hpfs_brelse4(&qbh);
                return 0;
        }
-        memcpy(nde, de, de->length);
+        memcpy(nde, de, le16_to_cpu(de->length));
        ddno = de->down ? de_down_pointer(de) : 0;
        hpfs_delete_de(i->i_sb, dnode, de);
        set_last_pointer(i->i_sb, dnode, ddno);
@@ -517,11 +517,11 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
        try_it_again:
        if (hpfs_stop_cycles(i->i_sb, dno, &c1, &c2, "delete_empty_dnode")) return;
        if (!(dnode = hpfs_map_dnode(i->i_sb, dno, &qbh))) return;
-        if (dnode->first_free > 56) goto end;
+        if (le32_to_cpu(dnode->first_free) > 56) goto end;
-        if (dnode->first_free == 52 || dnode->first_free == 56) {
+        if (le32_to_cpu(dnode->first_free) == 52 || le32_to_cpu(dnode->first_free) == 56) {
                struct hpfs_dirent *de_end;
                int root = dnode->root_dnode;
-                up = dnode->up;
+                up = le32_to_cpu(dnode->up);
                de = dnode_first_de(dnode);
                down = de->down ? de_down_pointer(de) : 0;
                if (hpfs_sb(i->i_sb)->sb_chk) if (root && !down) {
@@ -545,13 +545,13 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
                                return;
                            }
                        if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) {
-                                d1->up = up;
+                                d1->up = cpu_to_le32(up);
                                d1->root_dnode = 1;
                                hpfs_mark_4buffers_dirty(&qbh1);
                                hpfs_brelse4(&qbh1);
                        }
                        if ((fnode = hpfs_map_fnode(i->i_sb, up, &bh))) {
-                                fnode->u.external[0].disk_secno = down;
+                                fnode->u.external[0].disk_secno = cpu_to_le32(down);
                                mark_buffer_dirty(bh);
                                brelse(bh);
                        }
@@ -570,22 +570,22 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
                for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | 1, ((loff_t)up << 4) | p);
                if (!down) {
                        de->down = 0;
-                        de->length -= 4;
+                        de->length = cpu_to_le16(le16_to_cpu(de->length) - 4);
-                        dnode->first_free -= 4;
+                        dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) - 4);
                        memmove(de_next_de(de), (char *)de_next_de(de) + 4,
-                                (char *)dnode + dnode->first_free - (char *)de_next_de(de));
+                                (char *)dnode + le32_to_cpu(dnode->first_free) - (char *)de_next_de(de));
                } else {
                        struct dnode *d1;
                        struct quad_buffer_head qbh1;
-                        *(dnode_secno *) ((void *) de + de->length - 4) = down;
+                        *(dnode_secno *) ((void *) de + le16_to_cpu(de->length) - 4) = down;
                        if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) {
-                                d1->up = up;
+                                d1->up = cpu_to_le32(up);
                                hpfs_mark_4buffers_dirty(&qbh1);
                                hpfs_brelse4(&qbh1);
                        }
                }
        } else {
-                hpfs_error(i->i_sb, "delete_empty_dnode: dnode %08x, first_free == %03x", dno, dnode->first_free);
+                hpfs_error(i->i_sb, "delete_empty_dnode: dnode %08x, first_free == %03x", dno, le32_to_cpu(dnode->first_free));
                goto end;
        }
@@ -596,18 +596,18 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
                struct quad_buffer_head qbh1;
                if (!de_next->down) goto endm;
                ndown = de_down_pointer(de_next);
-                if (!(de_cp = kmalloc(de->length, GFP_NOFS))) {
+                if (!(de_cp = kmalloc(le16_to_cpu(de->length), GFP_NOFS))) {
                        printk("HPFS: out of memory for dtree balancing\n");
                        goto endm;
                }
-                memcpy(de_cp, de, de->length);
+                memcpy(de_cp, de, le16_to_cpu(de->length));
                hpfs_delete_de(i->i_sb, dnode, de);
                hpfs_mark_4buffers_dirty(&qbh);
                hpfs_brelse4(&qbh);
                for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | p, 4);
                for_all_poss(i, hpfs_pos_del, ((loff_t)up << 4) | p, 1);
                if (de_cp->down) if ((d1 = hpfs_map_dnode(i->i_sb, de_down_pointer(de_cp), &qbh1))) {
-                        d1->up = ndown;
+                        d1->up = cpu_to_le32(ndown);
                        hpfs_mark_4buffers_dirty(&qbh1);
                        hpfs_brelse4(&qbh1);
                }
@@ -635,7 +635,7 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
                        struct hpfs_dirent *del = dnode_last_de(d1);
                        dlp = del->down ? de_down_pointer(del) : 0;
                        if (!dlp && down) {
-                                if (d1->first_free > 2044) {
+                                if (le32_to_cpu(d1->first_free) > 2044) {
                                        if (hpfs_sb(i->i_sb)->sb_chk >= 2) {
                                                printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n");
                                                printk("HPFS: warning: terminating balancing operation\n");
@@ -647,38 +647,38 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
                                        printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n");
                                        printk("HPFS: warning: goin'on\n");
                                }
-                                del->length += 4;
+                                del->length = cpu_to_le16(le16_to_cpu(del->length) + 4);
                                del->down = 1;
-                                d1->first_free += 4;
+                                d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) + 4);
                        }
                        if (dlp && !down) {
-                                del->length -= 4;
+                                del->length = cpu_to_le16(le16_to_cpu(del->length) - 4);
                                del->down = 0;
-                                d1->first_free -= 4;
+                                d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) - 4);
                        } else if (down)
-                                *(dnode_secno *) ((void *) del + del->length - 4) = down;
+                                *(dnode_secno *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down);
                } else goto endm;
-                if (!(de_cp = kmalloc(de_prev->length, GFP_NOFS))) {
+                if (!(de_cp = kmalloc(le16_to_cpu(de_prev->length), GFP_NOFS))) {
                        printk("HPFS: out of memory for dtree balancing\n");
                        hpfs_brelse4(&qbh1);
                        goto endm;
                }
                hpfs_mark_4buffers_dirty(&qbh1);
                hpfs_brelse4(&qbh1);
-                memcpy(de_cp, de_prev, de_prev->length);
+                memcpy(de_cp, de_prev, le16_to_cpu(de_prev->length));
                hpfs_delete_de(i->i_sb, dnode, de_prev);
                if (!de_prev->down) {
-                        de_prev->length += 4;
+                        de_prev->length = cpu_to_le16(le16_to_cpu(de_prev->length) + 4);
                        de_prev->down = 1;
-                        dnode->first_free += 4;
+                        dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) + 4);
                }
-                *(dnode_secno *) ((void *) de_prev + de_prev->length - 4) = ndown;
+                *(dnode_secno *) ((void *) de_prev + le16_to_cpu(de_prev->length) - 4) = cpu_to_le32(ndown);
                hpfs_mark_4buffers_dirty(&qbh);
                hpfs_brelse4(&qbh);
                for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | (p - 1), 4);
                for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | p, ((loff_t)up << 4) | (p - 1));
                if (down) if ((d1 = hpfs_map_dnode(i->i_sb, de_down_pointer(de), &qbh1))) {
-                        d1->up = ndown;
+                        d1->up = cpu_to_le32(ndown);
                        hpfs_mark_4buffers_dirty(&qbh1);
                        hpfs_brelse4(&qbh1);
                }
@@ -701,7 +701,6 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de,
 {
        struct dnode *dnode = qbh->data;
        dnode_secno down = 0;
-        int lock = 0;
        loff_t t;
        if (de->first || de->last) {
                hpfs_error(i->i_sb, "hpfs_remove_dirent: attempt to delete first or last dirent in dnode %08x", dno);
@@ -710,11 +709,8 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de,
        }
        if (de->down) down = de_down_pointer(de);
        if (depth && (de->down || (de == dnode_first_de(dnode) && de_next_de(de)->last))) {
-                lock = 1;
-                hpfs_lock_creation(i->i_sb);
                if (hpfs_check_free_dnodes(i->i_sb, FREE_DNODES_DEL)) {
                        hpfs_brelse4(qbh);
-                        hpfs_unlock_creation(i->i_sb);
                        return 2;
                }
        }
@@ -727,11 +723,9 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de,
                dnode_secno a = move_to_top(i, down, dno);
                for_all_poss(i, hpfs_pos_subst, 5, t);
                if (a) delete_empty_dnode(i, a);
-                if (lock) hpfs_unlock_creation(i->i_sb);
                return !a;
        }
        delete_empty_dnode(i, dno);
-        if (lock) hpfs_unlock_creation(i->i_sb);
        return 0;
 }
@@ -751,8 +745,8 @@ void hpfs_count_dnodes(struct super_block *s, dnode_secno dno, int *n_dnodes,
        ptr = 0;
        go_up:
        if (!(dnode = hpfs_map_dnode(s, dno, &qbh))) return;
-        if (hpfs_sb(s)->sb_chk) if (odno && odno != -1 && dnode->up != odno)
+        if (hpfs_sb(s)->sb_chk) if (odno && odno != -1 && le32_to_cpu(dnode->up) != odno)
-                hpfs_error(s, "hpfs_count_dnodes: bad up pointer; dnode %08x, down %08x points to %08x", odno, dno, dnode->up);
+                hpfs_error(s, "hpfs_count_dnodes: bad up pointer; dnode %08x, down %08x points to %08x", odno, dno, le32_to_cpu(dnode->up));
        de = dnode_first_de(dnode);
        if (ptr) while(1) {
                if (de->down) if (de_down_pointer(de) == ptr) goto process_de;
@@ -776,7 +770,7 @@ void hpfs_count_dnodes(struct super_block *s, dnode_secno dno, int *n_dnodes,
        if (!de->first && !de->last && n_items) (*n_items)++;
        if ((de = de_next_de(de)) < dnode_end_de(dnode)) goto next_de;
        ptr = dno;
-        dno = dnode->up;
+        dno = le32_to_cpu(dnode->up);
        if (dnode->root_dnode) {
                hpfs_brelse4(&qbh);
                return;
@@ -824,8 +818,8 @@ dnode_secno hpfs_de_as_down_as_possible(struct super_block *s, dnode_secno dno)
                        return d;
        if (!(de = map_nth_dirent(s, d, 1, &qbh, NULL))) return dno;
        if (hpfs_sb(s)->sb_chk)
-                if (up && ((struct dnode *)qbh.data)->up != up)
+                if (up && le32_to_cpu(((struct dnode *)qbh.data)->up) != up)
-                        hpfs_error(s, "hpfs_de_as_down_as_possible: bad up pointer; dnode %08x, down %08x points to %08x", up, d, ((struct dnode *)qbh.data)->up);
+                        hpfs_error(s, "hpfs_de_as_down_as_possible: bad up pointer; dnode %08x, down %08x points to %08x", up, d, le32_to_cpu(((struct dnode *)qbh.data)->up));
        if (!de->down) {
                hpfs_brelse4(&qbh);
                return d;
@@ -874,7 +868,7 @@ struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp,
        /* Going up */
        if (dnode->root_dnode) goto bail;
-        if (!(up_dnode = hpfs_map_dnode(inode->i_sb, dnode->up, &qbh0)))
+        if (!(up_dnode = hpfs_map_dnode(inode->i_sb, le32_to_cpu(dnode->up), &qbh0)))
                goto bail;
        end_up_de = dnode_end_de(up_dnode);
@@ -882,16 +876,16 @@ struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp,
        for (up_de = dnode_first_de(up_dnode); up_de < end_up_de;
             up_de = de_next_de(up_de)) {
                if (!(++c & 077)) hpfs_error(inode->i_sb,
-                        "map_pos_dirent: pos crossed dnode boundary; dnode = %08x", dnode->up);
+                        "map_pos_dirent: pos crossed dnode boundary; dnode = %08x", le32_to_cpu(dnode->up));
                if (up_de->down && de_down_pointer(up_de) == dno) {
-                        *posp = ((loff_t) dnode->up << 4) + c;
+                        *posp = ((loff_t) le32_to_cpu(dnode->up) << 4) + c;
                        hpfs_brelse4(&qbh0);
                        return de;
                }
        }
        
        hpfs_error(inode->i_sb, "map_pos_dirent: pointer to dnode %08x not found in parent dnode %08x",
-                dno, dnode->up);
+                dno, le32_to_cpu(dnode->up));
        hpfs_brelse4(&qbh0);
        
        bail:
@@ -1017,17 +1011,17 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
                /*name2[15] = 0xff;*/
                name1len = 15; name2len = 256;
        }
-        if (!(upf = hpfs_map_fnode(s, f->up, &bh))) {
+        if (!(upf = hpfs_map_fnode(s, le32_to_cpu(f->up), &bh))) {
                kfree(name2);
                return NULL;
        }       
        if (!upf->dirflag) {
                brelse(bh);
-                hpfs_error(s, "fnode %08x has non-directory parent %08x", fno, f->up);
+                hpfs_error(s, "fnode %08x has non-directory parent %08x", fno, le32_to_cpu(f->up));
                kfree(name2);
                return NULL;
        }
-        dno = upf->u.external[0].disk_secno;
+        dno = le32_to_cpu(upf->u.external[0].disk_secno);
        brelse(bh);
        go_down:
        downd = 0;
@@ -1049,7 +1043,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
                return NULL;
        }
        next_de:
-        if (de->fnode == fno) {
+        if (le32_to_cpu(de->fnode) == fno) {
                kfree(name2);
                return de;
        }
@@ -1065,7 +1059,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
                goto go_down;
        }
        f:
-        if (de->fnode == fno) {
+        if (le32_to_cpu(de->fnode) == fno) {
                kfree(name2);
                return de;
        }
@@ -1074,7 +1068,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
        if ((de = de_next_de(de)) < de_end) goto next_de;
        if (d->root_dnode) goto not_found;
        downd = dno;
-        dno = d->up;
+        dno = le32_to_cpu(d->up);
        hpfs_brelse4(qbh);
        if (hpfs_sb(s)->sb_chk)
                if (hpfs_stop_cycles(s, downd, &d1, &d2, "map_fnode_dirent #2")) {
diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c
index 45e53d972b42..d8b84d113c89 100644
--- a/fs/hpfs/ea.c
+++ b/fs/hpfs/ea.c
@@ -24,7 +24,7 @@ void hpfs_ea_ext_remove(struct super_block *s, secno a, int ano, unsigned len)
                }
                if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return;
                if (ea->indirect) {
-                        if (ea->valuelen != 8) {
+                        if (ea_valuelen(ea) != 8) {
                                hpfs_error(s, "ea->indirect set while ea->valuelen!=8, %s %08x, pos %08x",
                                        ano ? "anode" : "sectors", a, pos);
                                return;
@@ -33,7 +33,7 @@ void hpfs_ea_ext_remove(struct super_block *s, secno a, int ano, unsigned len)
                                return;
                        hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea));
                }
-                pos += ea->namelen + ea->valuelen + 5;
+                pos += ea->namelen + ea_valuelen(ea) + 5;
        }
        if (!ano) hpfs_free_sectors(s, a, (len+511) >> 9);
        else {
@@ -76,24 +76,24 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key,
        unsigned pos;
        int ano, len;
        secno a;
+        char ex[4 + 255 + 1 + 8];
        struct extended_attribute *ea;
        struct extended_attribute *ea_end = fnode_end_ea(fnode);
        for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea))
                if (!strcmp(ea->name, key)) {
                        if (ea->indirect)
                                goto indirect;
-                        if (ea->valuelen >= size)
+                        if (ea_valuelen(ea) >= size)
                                return -EINVAL;
-                        memcpy(buf, ea_data(ea), ea->valuelen);
+                        memcpy(buf, ea_data(ea), ea_valuelen(ea));
-                        buf[ea->valuelen] = 0;
+                        buf[ea_valuelen(ea)] = 0;
                        return 0;
                }
-        a = fnode->ea_secno;
+        a = le32_to_cpu(fnode->ea_secno);
-        len = fnode->ea_size_l;
+        len = le32_to_cpu(fnode->ea_size_l);
        ano = fnode->ea_anode;
        pos = 0;
        while (pos < len) {
-                char ex[4 + 255 + 1 + 8];
                ea = (struct extended_attribute *)ex;
                if (pos + 4 > len) {
                        hpfs_error(s, "EAs don't end correctly, %s %08x, len %08x",
@@ -106,14 +106,14 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key,
                if (!strcmp(ea->name, key)) {
                        if (ea->indirect)
                                goto indirect;
-                        if (ea->valuelen >= size)
+                        if (ea_valuelen(ea) >= size)
                                return -EINVAL;
-                        if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea->valuelen, buf))
+                        if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea_valuelen(ea), buf))
                                return -EIO;
-                        buf[ea->valuelen] = 0;
+                        buf[ea_valuelen(ea)] = 0;
                        return 0;
                }
-                pos += ea->namelen + ea->valuelen + 5;
+                pos += ea->namelen + ea_valuelen(ea) + 5;
        }
        return -ENOENT;
 indirect:
@@ -138,16 +138,16 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si
                if (!strcmp(ea->name, key)) {
                        if (ea->indirect)
                                return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea));
-                        if (!(ret = kmalloc((*size = ea->valuelen) + 1, GFP_NOFS))) {
+                        if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) {
                                printk("HPFS: out of memory for EA\n");
                                return NULL;
                        }
-                        memcpy(ret, ea_data(ea), ea->valuelen);
+                        memcpy(ret, ea_data(ea), ea_valuelen(ea));
-                        ret[ea->valuelen] = 0;
+                        ret[ea_valuelen(ea)] = 0;
                        return ret;
                }
-        a = fnode->ea_secno;
+        a = le32_to_cpu(fnode->ea_secno);
-        len = fnode->ea_size_l;
+        len = le32_to_cpu(fnode->ea_size_l);
        ano = fnode->ea_anode;
        pos = 0;
        while (pos < len) {
@@ -164,18 +164,18 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si
                if (!strcmp(ea->name, key)) {
                        if (ea->indirect)
                                return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea));
-                        if (!(ret = kmalloc((*size = ea->valuelen) + 1, GFP_NOFS))) {
+                        if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) {
                                printk("HPFS: out of memory for EA\n");
                                return NULL;
                        }
-                        if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea->valuelen, ret)) {
+                        if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea_valuelen(ea), ret)) {
                                kfree(ret);
                                return NULL;
                        }
-                        ret[ea->valuelen] = 0;
+                        ret[ea_valuelen(ea)] = 0;
                        return ret;
                }
-                pos += ea->namelen + ea->valuelen + 5;
+                pos += ea->namelen + ea_valuelen(ea) + 5;
        }
        return NULL;
 }
@@ -202,13 +202,13 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
                        if (ea->indirect) {
                                if (ea_len(ea) == size)
                                        set_indirect_ea(s, ea->anode, ea_sec(ea), data, size);
-                        } else if (ea->valuelen == size) {
+                        } else if (ea_valuelen(ea) == size) {
                                memcpy(ea_data(ea), data, size);
                        }
                        return;
                }
-        a = fnode->ea_secno;
+        a = le32_to_cpu(fnode->ea_secno);
-        len = fnode->ea_size_l;
+        len = le32_to_cpu(fnode->ea_size_l);
        ano = fnode->ea_anode;
        pos = 0;
        while (pos < len) {
@@ -228,68 +228,70 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
                                        set_indirect_ea(s, ea->anode, ea_sec(ea), data, size);
                        }
                        else {
-                                if (ea->valuelen == size)
+                                if (ea_valuelen(ea) == size)
                                        hpfs_ea_write(s, a, ano, pos + 4 + ea->namelen + 1, size, data);
                        }
                        return;
                }
-                pos += ea->namelen + ea->valuelen + 5;
+                pos += ea->namelen + ea_valuelen(ea) + 5;
        }
-        if (!fnode->ea_offs) {
+        if (!le16_to_cpu(fnode->ea_offs)) {
-                /*if (fnode->ea_size_s) {
+                /*if (le16_to_cpu(fnode->ea_size_s)) {
                        hpfs_error(s, "fnode %08x: ea_size_s == %03x, ea_offs == 0",
-                                inode->i_ino, fnode->ea_size_s);
+                                inode->i_ino, le16_to_cpu(fnode->ea_size_s));
                        return;
                }*/
-                fnode->ea_offs = 0xc4;
+                fnode->ea_offs = cpu_to_le16(0xc4);
        }
-        if (fnode->ea_offs < 0xc4 || fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s > 0x200) {
+        if (le16_to_cpu(fnode->ea_offs) < 0xc4 || le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200) {
                hpfs_error(s, "fnode %08lx: ea_offs == %03x, ea_size_s == %03x",
                        (unsigned long)inode->i_ino,
-                        fnode->ea_offs, fnode->ea_size_s);
+                        le32_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s));
                return;
        }
-        if ((fnode->ea_size_s || !fnode->ea_size_l) &&
+        if ((le16_to_cpu(fnode->ea_size_s) || !le32_to_cpu(fnode->ea_size_l)) &&
-             fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s + strlen(key) + size + 5 <= 0x200) {
+             le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) + strlen(key) + size + 5 <= 0x200) {
                ea = fnode_end_ea(fnode);
                *(char *)ea = 0;
                ea->namelen = strlen(key);
-                ea->valuelen = size;
+                ea->valuelen_lo = size;
+                ea->valuelen_hi = size >> 8;
                strcpy(ea->name, key);
                memcpy(ea_data(ea), data, size);
-                fnode->ea_size_s += strlen(key) + size + 5;
+                fnode->ea_size_s = cpu_to_le16(le16_to_cpu(fnode->ea_size_s) + strlen(key) + size + 5);
                goto ret;
        }
        /* Most the code here is 99.9993422% unused. I hope there are no bugs.
           But what .. HPFS.IFS has also bugs in ea management. */
-        if (fnode->ea_size_s && !fnode->ea_size_l) {
+        if (le16_to_cpu(fnode->ea_size_s) && !le32_to_cpu(fnode->ea_size_l)) {
                secno n;
                struct buffer_head *bh;
                char *data;
-                if (!(n = hpfs_alloc_sector(s, fno, 1, 0, 1))) return;
+                if (!(n = hpfs_alloc_sector(s, fno, 1, 0))) return;
                if (!(data = hpfs_get_sector(s, n, &bh))) {
                        hpfs_free_sectors(s, n, 1);
                        return;
                }
-                memcpy(data, fnode_ea(fnode), fnode->ea_size_s);
+                memcpy(data, fnode_ea(fnode), le16_to_cpu(fnode->ea_size_s));
-                fnode->ea_size_l = fnode->ea_size_s;
+                fnode->ea_size_l = cpu_to_le32(le16_to_cpu(fnode->ea_size_s));
-                fnode->ea_size_s = 0;
+                fnode->ea_size_s = cpu_to_le16(0);
-                fnode->ea_secno = n;
+                fnode->ea_secno = cpu_to_le32(n);
-                fnode->ea_anode = 0;
+                fnode->ea_anode = cpu_to_le32(0);
                mark_buffer_dirty(bh);
                brelse(bh);
        }
-        pos = fnode->ea_size_l + 5 + strlen(key) + size;
+        pos = le32_to_cpu(fnode->ea_size_l) + 5 + strlen(key) + size;
-        len = (fnode->ea_size_l + 511) >> 9;
+        len = (le32_to_cpu(fnode->ea_size_l) + 511) >> 9;
        if (pos >= 30000) goto bail;
        while (((pos + 511) >> 9) > len) {
                if (!len) {
-                        if (!(fnode->ea_secno = hpfs_alloc_sector(s, fno, 1, 0, 1)))
+                        secno q = hpfs_alloc_sector(s, fno, 1, 0);
-                                goto bail;
+                        if (!q) goto bail;
+                        fnode->ea_secno = cpu_to_le32(q);
                        fnode->ea_anode = 0;
                        len++;
                } else if (!fnode->ea_anode) {
-                        if (hpfs_alloc_if_possible(s, fnode->ea_secno + len)) {
+                        if (hpfs_alloc_if_possible(s, le32_to_cpu(fnode->ea_secno) + len)) {
                                len++;
                        } else {
                                /* Aargh... don't know how to create ea anodes :-( */
@@ -298,26 +300,26 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
                                anode_secno a_s;
                                if (!(anode = hpfs_alloc_anode(s, fno, &a_s, &bh)))
                                        goto bail;
-                                anode->up = fno;
+                                anode->up = cpu_to_le32(fno);
                                anode->btree.fnode_parent = 1;
                                anode->btree.n_free_nodes--;
                                anode->btree.n_used_nodes++;
-                                anode->btree.first_free += 12;
+                                anode->btree.first_free = cpu_to_le16(le16_to_cpu(anode->btree.first_free) + 12);
-                                anode->u.external[0].disk_secno = fnode->ea_secno;
+                                anode->u.external[0].disk_secno = cpu_to_le32(le32_to_cpu(fnode->ea_secno));
-                                anode->u.external[0].file_secno = 0;
+                                anode->u.external[0].file_secno = cpu_to_le32(0);
-                                anode->u.external[0].length = len;
+                                anode->u.external[0].length = cpu_to_le32(len);
                                mark_buffer_dirty(bh);
                                brelse(bh);
                                fnode->ea_anode = 1;
-                                fnode->ea_secno = a_s;*/
+                                fnode->ea_secno = cpu_to_le32(a_s);*/
                                secno new_sec;
                                int i;
-                                if (!(new_sec = hpfs_alloc_sector(s, fno, 1, 1 - ((pos + 511) >> 9), 1)))
+                                if (!(new_sec = hpfs_alloc_sector(s, fno, 1, 1 - ((pos + 511) >> 9))))
                                        goto bail;
                                for (i = 0; i < len; i++) {
                                        struct buffer_head *bh1, *bh2;
                                        void *b1, *b2;
-                                        if (!(b1 = hpfs_map_sector(s, fnode->ea_secno + i, &bh1, len - i - 1))) {
+                                        if (!(b1 = hpfs_map_sector(s, le32_to_cpu(fnode->ea_secno) + i, &bh1, len - i - 1))) {
                                                hpfs_free_sectors(s, new_sec, (pos + 511) >> 9);
                                                goto bail;
                                        }
@@ -331,13 +333,13 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
                                        mark_buffer_dirty(bh2);
                                        brelse(bh2);
                                }
-                                hpfs_free_sectors(s, fnode->ea_secno, len);
+                                hpfs_free_sectors(s, le32_to_cpu(fnode->ea_secno), len);
-                                fnode->ea_secno = new_sec;
+                                fnode->ea_secno = cpu_to_le32(new_sec);
                                len = (pos + 511) >> 9;
                        }
                }
                if (fnode->ea_anode) {
-                        if (hpfs_add_sector_to_btree(s, fnode->ea_secno,
+                        if (hpfs_add_sector_to_btree(s, le32_to_cpu(fnode->ea_secno),
                                                     0, len) != -1) {
                                len++;
                        } else {
@@ -349,17 +351,17 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
        h[1] = strlen(key);
        h[2] = size & 0xff;
        h[3] = size >> 8;
-        if (hpfs_ea_write(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l, 4, h)) goto bail;
+        if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l), 4, h)) goto bail;
-        if (hpfs_ea_write(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l + 4, h[1] + 1, key)) goto bail;
+        if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 4, h[1] + 1, key)) goto bail;
-        if (hpfs_ea_write(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l + 5 + h[1], size, data)) goto bail;
+        if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 5 + h[1], size, data)) goto bail;
-        fnode->ea_size_l = pos;
+        fnode->ea_size_l = cpu_to_le32(pos);
        ret:
        hpfs_i(inode)->i_ea_size += 5 + strlen(key) + size;
        return;
        bail:
-        if (fnode->ea_secno)
+        if (le32_to_cpu(fnode->ea_secno))
-                if (fnode->ea_anode) hpfs_truncate_btree(s, fnode->ea_secno, 1, (fnode->ea_size_l + 511) >> 9);
+                if (fnode->ea_anode) hpfs_truncate_btree(s, le32_to_cpu(fnode->ea_secno), 1, (le32_to_cpu(fnode->ea_size_l) + 511) >> 9);
-                else hpfs_free_sectors(s, fnode->ea_secno + ((fnode->ea_size_l + 511) >> 9), len - ((fnode->ea_size_l + 511) >> 9));
+                else hpfs_free_sectors(s, le32_to_cpu(fnode->ea_secno) + ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9), len - ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9));
-        else fnode->ea_secno = fnode->ea_size_l = 0;
+        else fnode->ea_secno = fnode->ea_size_l = cpu_to_le32(0);
 }
        
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 9b9eb6933e43..89c500ee5213 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -20,8 +20,8 @@ static int hpfs_file_release(struct inode *inode, struct file *file)
 int hpfs_file_fsync(struct file *file, int datasync)
 {
-        /*return file_fsync(file, datasync);*/
+        struct inode *inode = file->f_mapping->host;
-        return 0; /* Don't fsync :-) */
+        return sync_blockdev(inode->i_sb->s_bdev);
 }
 /*
@@ -48,38 +48,46 @@ static secno hpfs_bmap(struct inode *inode, unsigned file_secno)
 static void hpfs_truncate(struct inode *i)
 {
        if (IS_IMMUTABLE(i)) return /*-EPERM*/;
-        hpfs_lock(i->i_sb);
+        hpfs_lock_assert(i->i_sb);
        hpfs_i(i)->i_n_secs = 0;
        i->i_blocks = 1 + ((i->i_size + 511) >> 9);
        hpfs_i(i)->mmu_private = i->i_size;
        hpfs_truncate_btree(i->i_sb, i->i_ino, 1, ((i->i_size + 511) >> 9));
        hpfs_write_inode(i);
        hpfs_i(i)->i_n_secs = 0;
-        hpfs_unlock(i->i_sb);
 }
 static int hpfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
 {
+        int r;
        secno s;
+        hpfs_lock(inode->i_sb);
        s = hpfs_bmap(inode, iblock);
        if (s) {
                map_bh(bh_result, inode->i_sb, s);
-                return 0;
+                goto ret_0;
        }
-        if (!create) return 0;
+        if (!create) goto ret_0;
        if (iblock<<9 != hpfs_i(inode)->mmu_private) {
                BUG();
-                return -EIO;
+                r = -EIO;
+                goto ret_r;
        }
        if ((s = hpfs_add_sector_to_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1)) == -1) {
                hpfs_truncate_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1);
-                return -ENOSPC;
+                r = -ENOSPC;
+                goto ret_r;
        }
        inode->i_blocks++;
        hpfs_i(inode)->mmu_private += 512;
        set_buffer_new(bh_result);
        map_bh(bh_result, inode->i_sb, s);
-        return 0;
+        ret_0:
+        r = 0;
+        ret_r:
+        hpfs_unlock(inode->i_sb);
+        return r;
 }
 static int hpfs_writepage(struct page *page, struct writeback_control *wbc)
@@ -130,8 +138,11 @@ static ssize_t hpfs_file_write(struct file *file, const char __user *buf,
        ssize_t retval;
        retval = do_sync_write(file, buf, count, ppos);
-        if (retval > 0)
+        if (retval > 0) {
+                hpfs_lock(file->f_path.dentry->d_sb);
                hpfs_i(file->f_path.dentry->d_inode)->i_dirty = 1;
+                hpfs_unlock(file->f_path.dentry->d_sb);
+        }
        return retval;
 }
diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h
index 0e84c73cd9c4..8b0650aae328 100644
--- a/fs/hpfs/hpfs.h
+++ b/fs/hpfs/hpfs.h
@@ -19,9 +19,13 @@
   For definitive information on HPFS, ask somebody else -- this is guesswork.
   There are certain to be many mistakes. */
+#if !defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN)
+#error unknown endian
+#endif
 /* Notation */
-typedef unsigned secno;                 /* sector number, partition relative */
+typedef u32 secno;                      /* sector number, partition relative */
 typedef secno dnode_secno;              /* sector number of a dnode */
 typedef secno fnode_secno;              /* sector number of an fnode */
@@ -38,28 +42,28 @@ typedef u32 time32_t;		/* 32-bit time_t type */
 struct hpfs_boot_block
 {
-  unsigned char jmp[3];
+  u8 jmp[3];
-  unsigned char oem_id[8];
+  u8 oem_id[8];
-  unsigned char bytes_per_sector[2];    /* 512 */
+  u8 bytes_per_sector[2];       /* 512 */
-  unsigned char sectors_per_cluster;
+  u8 sectors_per_cluster;
-  unsigned char n_reserved_sectors[2];
+  u8 n_reserved_sectors[2];
-  unsigned char n_fats;
+  u8 n_fats;
-  unsigned char n_rootdir_entries[2];
+  u8 n_rootdir_entries[2];
-  unsigned char n_sectors_s[2];
+  u8 n_sectors_s[2];
-  unsigned char media_byte;
+  u8 media_byte;
-  unsigned short sectors_per_fat;
+  u16 sectors_per_fat;
-  unsigned short sectors_per_track;
+  u16 sectors_per_track;
-  unsigned short heads_per_cyl;
+  u16 heads_per_cyl;
-  unsigned int n_hidden_sectors;
+  u32 n_hidden_sectors;
-  unsigned int n_sectors_l;             /* size of partition */
+  u32 n_sectors_l;              /* size of partition */
-  unsigned char drive_number;
+  u8 drive_number;
-  unsigned char mbz;
+  u8 mbz;
-  unsigned char sig_28h;                /* 28h */
+  u8 sig_28h;                   /* 28h */
-  unsigned char vol_serno[4];
+  u8 vol_serno[4];
-  unsigned char vol_label[11];
+  u8 vol_label[11];
-  unsigned char sig_hpfs[8];            /* "HPFS    " */
+  u8 sig_hpfs[8];               /* "HPFS    " */
-  unsigned char pad[448];
+  u8 pad[448];
-  unsigned short magic;                 /* aa55 */
+  u16 magic;                    /* aa55 */
 };
@@ -71,31 +75,29 @@ struct hpfs_boot_block
 struct hpfs_super_block
 {
-  unsigned magic;                       /* f995 e849 */
+  u32 magic;                            /* f995 e849 */
-  unsigned magic1;                      /* fa53 e9c5, more magic? */
+  u32 magic1;                           /* fa53 e9c5, more magic? */
-  /*unsigned huh202;*/                  /* ?? 202 = N. of B. in 1.00390625 S.*/
+  u8 version;                           /* version of a filesystem  usually 2 */
-  char version;                         /* version of a filesystem  usually 2 */
+  u8 funcversion;                       /* functional version - oldest version
-  char funcversion;                     /* functional version - oldest version
                                           of filesystem that can understand
                                           this disk */
-  unsigned short int zero;              /* 0 */
+  u16 zero;                             /* 0 */
  fnode_secno root;                     /* fnode of root directory */
  secno n_sectors;                      /* size of filesystem */
-  unsigned n_badblocks;                 /* number of bad blocks */
+  u32 n_badblocks;                      /* number of bad blocks */
  secno bitmaps;                        /* pointers to free space bit maps */
-  unsigned zero1;                       /* 0 */
+  u32 zero1;                            /* 0 */
  secno badblocks;                      /* bad block list */
-  unsigned zero3;                       /* 0 */
+  u32 zero3;                            /* 0 */
  time32_t last_chkdsk;                 /* date last checked, 0 if never */
-  /*unsigned zero4;*/                   /* 0 */
+  time32_t last_optimize;               /* date last optimized, 0 if never */
-  time32_t last_optimize;                       /* date last optimized, 0 if never */
  secno n_dir_band;                     /* number of sectors in dir band */
  secno dir_band_start;                 /* first sector in dir band */
  secno dir_band_end;                   /* last sector in dir band */
  secno dir_band_bitmap;                /* free space map, 1 dnode per bit */
-  char volume_name[32];                 /* not used */
+  u8 volume_name[32];                   /* not used */
  secno user_id_table;                  /* 8 preallocated sectors - user id */
-  unsigned zero6[103];                  /* 0 */
+  u32 zero6[103];                       /* 0 */
 };
@@ -107,44 +109,65 @@ struct hpfs_super_block
 struct hpfs_spare_block
 {
-  unsigned magic;                       /* f991 1849 */
+  u32 magic;                            /* f991 1849 */
-  unsigned magic1;                      /* fa52 29c5, more magic? */
+  u32 magic1;                           /* fa52 29c5, more magic? */
-  unsigned dirty: 1;                    /* 0 clean, 1 "improperly stopped" */
+#ifdef __LITTLE_ENDIAN
-  /*unsigned flag1234: 4;*/             /* unknown flags */
+  u8 dirty: 1;                          /* 0 clean, 1 "improperly stopped" */
-  unsigned sparedir_used: 1;            /* spare dirblks used */
+  u8 sparedir_used: 1;                  /* spare dirblks used */
-  unsigned hotfixes_used: 1;            /* hotfixes used */
+  u8 hotfixes_used: 1;                  /* hotfixes used */
-  unsigned bad_sector: 1;               /* bad sector, corrupted disk (???) */
+  u8 bad_sector: 1;                     /* bad sector, corrupted disk (???) */
-  unsigned bad_bitmap: 1;               /* bad bitmap */
+  u8 bad_bitmap: 1;                     /* bad bitmap */
-  unsigned fast: 1;                     /* partition was fast formatted */
+  u8 fast: 1;                           /* partition was fast formatted */
-  unsigned old_wrote: 1;                /* old version wrote to partion */
+  u8 old_wrote: 1;                      /* old version wrote to partion */
-  unsigned old_wrote_1: 1;              /* old version wrote to partion (?) */
+  u8 old_wrote_1: 1;                    /* old version wrote to partion (?) */
-  unsigned install_dasd_limits: 1;      /* HPFS386 flags */
+#else
-  unsigned resynch_dasd_limits: 1;
+  u8 old_wrote_1: 1;                    /* old version wrote to partion (?) */
-  unsigned dasd_limits_operational: 1;
+  u8 old_wrote: 1;                      /* old version wrote to partion */
-  unsigned multimedia_active: 1;
+  u8 fast: 1;                           /* partition was fast formatted */
-  unsigned dce_acls_active: 1;
+  u8 bad_bitmap: 1;                     /* bad bitmap */
-  unsigned dasd_limits_dirty: 1;
+  u8 bad_sector: 1;                     /* bad sector, corrupted disk (???) */
-  unsigned flag67: 2;
+  u8 hotfixes_used: 1;                  /* hotfixes used */
-  unsigned char mm_contlgulty;
+  u8 sparedir_used: 1;                  /* spare dirblks used */
-  unsigned char unused;
+  u8 dirty: 1;                          /* 0 clean, 1 "improperly stopped" */
+#endif
+#ifdef __LITTLE_ENDIAN
+  u8 install_dasd_limits: 1;            /* HPFS386 flags */
+  u8 resynch_dasd_limits: 1;
+  u8 dasd_limits_operational: 1;
+  u8 multimedia_active: 1;
+  u8 dce_acls_active: 1;
+  u8 dasd_limits_dirty: 1;
+  u8 flag67: 2;
+#else
+  u8 flag67: 2;
+  u8 dasd_limits_dirty: 1;
+  u8 dce_acls_active: 1;
+  u8 multimedia_active: 1;
+  u8 dasd_limits_operational: 1;
+  u8 resynch_dasd_limits: 1;
+  u8 install_dasd_limits: 1;            /* HPFS386 flags */
+#endif
+  u8 mm_contlgulty;
+  u8 unused;
  secno hotfix_map;                     /* info about remapped bad sectors */
-  unsigned n_spares_used;               /* number of hotfixes */
+  u32 n_spares_used;                    /* number of hotfixes */
-  unsigned n_spares;                    /* number of spares in hotfix map */
+  u32 n_spares;                         /* number of spares in hotfix map */
-  unsigned n_dnode_spares_free;         /* spare dnodes unused */
+  u32 n_dnode_spares_free;              /* spare dnodes unused */
-  unsigned n_dnode_spares;              /* length of spare_dnodes[] list,
+  u32 n_dnode_spares;                   /* length of spare_dnodes[] list,
                                           follows in this block*/
  secno code_page_dir;                  /* code page directory block */
-  unsigned n_code_pages;                /* number of code pages */
+  u32 n_code_pages;                     /* number of code pages */
-  /*unsigned large_numbers[2];*/        /* ?? */
+  u32 super_crc;                        /* on HPFS386 and LAN Server this is
-  unsigned super_crc;                   /* on HPFS386 and LAN Server this is
                                           checksum of superblock, on normal
                                           OS/2 unused */
-  unsigned spare_crc;                   /* on HPFS386 checksum of spareblock */
+  u32 spare_crc;                        /* on HPFS386 checksum of spareblock */
-  unsigned zero1[15];                   /* unused */
+  u32 zero1[15];                        /* unused */
  dnode_secno spare_dnodes[100];        /* emergency free dnode list */
-  unsigned zero2[1];                    /* room for more? */
+  u32 zero2[1];                         /* room for more? */
 };
 /* The bad block list is 4 sectors long.  The first word must be zero,
@@ -179,18 +202,18 @@ struct hpfs_spare_block
 struct code_page_directory
 {
-  unsigned magic;                       /* 4945 21f7 */
+  u32 magic;                            /* 4945 21f7 */
-  unsigned n_code_pages;                /* number of pointers following */
+  u32 n_code_pages;                     /* number of pointers following */
-  unsigned zero1[2];
+  u32 zero1[2];
  struct {
-    unsigned short ix;                  /* index */
+    u16 ix;                             /* index */
-    unsigned short code_page_number;    /* code page number */
+    u16 code_page_number;               /* code page number */
-    unsigned bounds;                    /* matches corresponding word
+    u32 bounds;                         /* matches corresponding word
                                           in data block */
    secno code_page_data;               /* sector number of a code_page_data
                                           containing c.p. array */
-    unsigned short index;               /* index in c.p. array in that sector*/
+    u16 index;                          /* index in c.p. array in that sector*/
-    unsigned short unknown;             /* some unknown value; usually 0;
+    u16 unknown;                        /* some unknown value; usually 0;
                                           2 in Japanese version */
  } array[31];                          /* unknown length */
 };
@@ -201,21 +224,21 @@ struct code_page_directory
 struct code_page_data
 {
-  unsigned magic;                       /* 8945 21f7 */
+  u32 magic;                            /* 8945 21f7 */
-  unsigned n_used;                      /* # elements used in c_p_data[] */
+  u32 n_used;                           /* # elements used in c_p_data[] */
-  unsigned bounds[3];                   /* looks a bit like
+  u32 bounds[3];                        /* looks a bit like
                                             (beg1,end1), (beg2,end2)
                                           one byte each */
-  unsigned short offs[3];               /* offsets from start of sector
+  u16 offs[3];                          /* offsets from start of sector
                                           to start of c_p_data[ix] */
  struct {
-    unsigned short ix;                  /* index */
+    u16 ix;                             /* index */
-    unsigned short code_page_number;    /* code page number */
+    u16 code_page_number;               /* code page number */
-    unsigned short unknown;             /* the same as in cp directory */
+    u16 unknown;                        /* the same as in cp directory */
-    unsigned char map[128];             /* upcase table for chars 80..ff */
+    u8 map[128];                        /* upcase table for chars 80..ff */
-    unsigned short zero2;
+    u16 zero2;
  } code_page[3];
-  unsigned char incognita[78];
+  u8 incognita[78];
 };
@@ -255,50 +278,84 @@ struct code_page_data
 #define DNODE_MAGIC   0x77e40aae
 struct dnode {
-  unsigned magic;                       /* 77e4 0aae */
+  u32 magic;                            /* 77e4 0aae */
-  unsigned first_free;                  /* offset from start of dnode to
+  u32 first_free;                       /* offset from start of dnode to
                                           first free dir entry */
-  unsigned root_dnode:1;                /* Is it root dnode? */
+#ifdef __LITTLE_ENDIAN
-  unsigned increment_me:31;             /* some kind of activity counter?
+  u8 root_dnode: 1;                     /* Is it root dnode? */
-                                           Neither HPFS.IFS nor CHKDSK cares
+  u8 increment_me: 7;                   /* some kind of activity counter? */
+                                        /* Neither HPFS.IFS nor CHKDSK cares
+                                           if you change this word */
+#else
+  u8 increment_me: 7;                   /* some kind of activity counter? */
+                                        /* Neither HPFS.IFS nor CHKDSK cares
                                           if you change this word */
+  u8 root_dnode: 1;                     /* Is it root dnode? */
+#endif
+  u8 increment_me2[3];
  secno up;                             /* (root dnode) directory's fnode
                                           (nonroot) parent dnode */
  dnode_secno self;                     /* pointer to this dnode */
-  unsigned char dirent[2028];           /* one or more dirents */
+  u8 dirent[2028];                      /* one or more dirents */
 };
 struct hpfs_dirent {
-  unsigned short length;                /* offset to next dirent */
+  u16 length;                           /* offset to next dirent */
-  unsigned first: 1;                    /* set on phony ^A^A (".") entry */
-  unsigned has_acl: 1;
+#ifdef __LITTLE_ENDIAN
-  unsigned down: 1;                     /* down pointer present (after name) */
+  u8 first: 1;                          /* set on phony ^A^A (".") entry */
-  unsigned last: 1;                     /* set on phony \377 entry */
+  u8 has_acl: 1;
-  unsigned has_ea: 1;                   /* entry has EA */
+  u8 down: 1;                           /* down pointer present (after name) */
-  unsigned has_xtd_perm: 1;             /* has extended perm list (???) */
+  u8 last: 1;                           /* set on phony \377 entry */
-  unsigned has_explicit_acl: 1;
+  u8 has_ea: 1;                         /* entry has EA */
-  unsigned has_needea: 1;               /* ?? some EA has NEEDEA set
+  u8 has_xtd_perm: 1;                   /* has extended perm list (???) */
+  u8 has_explicit_acl: 1;
+  u8 has_needea: 1;                     /* ?? some EA has NEEDEA set
+                                           I have no idea why this is
+                                           interesting in a dir entry */
+#else
+  u8 has_needea: 1;                     /* ?? some EA has NEEDEA set
                                           I have no idea why this is
                                           interesting in a dir entry */
-  unsigned read_only: 1;                /* dos attrib */
+  u8 has_explicit_acl: 1;
-  unsigned hidden: 1;                   /* dos attrib */
+  u8 has_xtd_perm: 1;                   /* has extended perm list (???) */
-  unsigned system: 1;                   /* dos attrib */
+  u8 has_ea: 1;                         /* entry has EA */
-  unsigned flag11: 1;                   /* would be volume label dos attrib */
+  u8 last: 1;                           /* set on phony \377 entry */
-  unsigned directory: 1;                /* dos attrib */
+  u8 down: 1;                           /* down pointer present (after name) */
-  unsigned archive: 1;                  /* dos attrib */
+  u8 has_acl: 1;
-  unsigned not_8x3: 1;                  /* name is not 8.3 */
+  u8 first: 1;                          /* set on phony ^A^A (".") entry */
-  unsigned flag15: 1;
+#endif
+#ifdef __LITTLE_ENDIAN
+  u8 read_only: 1;                      /* dos attrib */
+  u8 hidden: 1;                         /* dos attrib */
+  u8 system: 1;                         /* dos attrib */
+  u8 flag11: 1;                         /* would be volume label dos attrib */
+  u8 directory: 1;                      /* dos attrib */
+  u8 archive: 1;                        /* dos attrib */
+  u8 not_8x3: 1;                        /* name is not 8.3 */
+  u8 flag15: 1;
+#else
+  u8 flag15: 1;
+  u8 not_8x3: 1;                        /* name is not 8.3 */
+  u8 archive: 1;                        /* dos attrib */
+  u8 directory: 1;                      /* dos attrib */
+  u8 flag11: 1;                         /* would be volume label dos attrib */
+  u8 system: 1;                         /* dos attrib */
+  u8 hidden: 1;                         /* dos attrib */
+  u8 read_only: 1;                      /* dos attrib */
+#endif
  fnode_secno fnode;                    /* fnode giving allocation info */
  time32_t write_date;                  /* mtime */
-  unsigned file_size;                   /* file length, bytes */
+  u32 file_size;                        /* file length, bytes */
  time32_t read_date;                   /* atime */
  time32_t creation_date;                       /* ctime */
-  unsigned ea_size;                     /* total EA length, bytes */
+  u32 ea_size;                          /* total EA length, bytes */
-  unsigned char no_of_acls : 3;         /* number of ACL's */
+  u8 no_of_acls;                        /* number of ACL's (low 3 bits) */
-  unsigned char reserver : 5;
+  u8 ix;                                /* code page index (of filename), see
-  unsigned char ix;                     /* code page index (of filename), see
                                           struct code_page_data */
-  unsigned char namelen, name[1];       /* file name */
+  u8 namelen, name[1];                  /* file name */
  /* dnode_secno down;    btree down pointer, if present,
                          follows name on next word boundary, or maybe it
                          precedes next dirent, which is on a word boundary. */
@@ -318,38 +375,50 @@ struct hpfs_dirent {
 struct bplus_leaf_node
 {
-  unsigned file_secno;                  /* first file sector in extent */
+  u32 file_secno;                       /* first file sector in extent */
-  unsigned length;                      /* length, sectors */
+  u32 length;                           /* length, sectors */
  secno disk_secno;                     /* first corresponding disk sector */
 };
 struct bplus_internal_node
 {
-  unsigned file_secno;                  /* subtree maps sectors < this  */
+  u32 file_secno;                       /* subtree maps sectors < this  */
  anode_secno down;                     /* pointer to subtree */
 };
 struct bplus_header
 {
-  unsigned hbff: 1;     /* high bit of first free entry offset */
+#ifdef __LITTLE_ENDIAN
-  unsigned flag1: 1;
+  u8 hbff: 1;                   /* high bit of first free entry offset */
-  unsigned flag2: 1;
+  u8 flag1234: 4;
-  unsigned flag3: 1;
+  u8 fnode_parent: 1;                   /* ? we're pointed to by an fnode,
-  unsigned flag4: 1;
-  unsigned fnode_parent: 1;             /* ? we're pointed to by an fnode,
                                           the data btree or some ea or the
                                           main ea bootage pointer ea_secno */
                                        /* also can get set in fnodes, which
                                           may be a chkdsk glitch or may mean
                                           this bit is irrelevant in fnodes,
                                           or this interpretation is all wet */
-  unsigned binary_search: 1;            /* suggest binary search (unused) */
+  u8 binary_search: 1;                  /* suggest binary search (unused) */
-  unsigned internal: 1;                 /* 1 -> (internal) tree of anodes
+  u8 internal: 1;                       /* 1 -> (internal) tree of anodes
+                                           0 -> (leaf) list of extents */
+#else
+  u8 internal: 1;                       /* 1 -> (internal) tree of anodes
                                           0 -> (leaf) list of extents */
-  unsigned char fill[3];
+  u8 binary_search: 1;                  /* suggest binary search (unused) */
-  unsigned char n_free_nodes;           /* free nodes in following array */
+  u8 fnode_parent: 1;                   /* ? we're pointed to by an fnode,
-  unsigned char n_used_nodes;           /* used nodes in following array */
+                                           the data btree or some ea or the
-  unsigned short first_free;            /* offset from start of header to
+                                           main ea bootage pointer ea_secno */
+                                        /* also can get set in fnodes, which
+                                           may be a chkdsk glitch or may mean
+                                           this bit is irrelevant in fnodes,
+                                           or this interpretation is all wet */
+  u8 flag1234: 4;
+  u8 hbff: 1;                   /* high bit of first free entry offset */
+#endif
+  u8 fill[3];
+  u8 n_free_nodes;                      /* free nodes in following array */
+  u8 n_used_nodes;                      /* used nodes in following array */
+  u16 first_free;                       /* offset from start of header to
                                           first free node in array */
  union {
    struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving
@@ -369,37 +438,38 @@ struct bplus_header
 struct fnode
 {
-  unsigned magic;                       /* f7e4 0aae */
+  u32 magic;                            /* f7e4 0aae */
-  unsigned zero1[2];                    /* read history */
+  u32 zero1[2];                         /* read history */
-  unsigned char len, name[15];          /* true length, truncated name */
+  u8 len, name[15];                     /* true length, truncated name */
  fnode_secno up;                       /* pointer to file's directory fnode */
-  /*unsigned zero2[3];*/
  secno acl_size_l;
  secno acl_secno;
-  unsigned short acl_size_s;
+  u16 acl_size_s;
-  char acl_anode;
+  u8 acl_anode;
-  char zero2;                           /* history bit count */
+  u8 zero2;                             /* history bit count */
-  unsigned ea_size_l;                   /* length of disk-resident ea's */
+  u32 ea_size_l;                        /* length of disk-resident ea's */
  secno ea_secno;                       /* first sector of disk-resident ea's*/
-  unsigned short ea_size_s;             /* length of fnode-resident ea's */
+  u16 ea_size_s;                        /* length of fnode-resident ea's */
-  unsigned flag0: 1;
+#ifdef __LITTLE_ENDIAN
-  unsigned ea_anode: 1;                 /* 1 -> ea_secno is an anode */
+  u8 flag0: 1;
-  unsigned flag2: 1;
+  u8 ea_anode: 1;                       /* 1 -> ea_secno is an anode */
-  unsigned flag3: 1;
+  u8 flag234567: 6;
-  unsigned flag4: 1;
+#else
-  unsigned flag5: 1;
+  u8 flag234567: 6;
-  unsigned flag6: 1;
+  u8 ea_anode: 1;                       /* 1 -> ea_secno is an anode */
-  unsigned flag7: 1;
+  u8 flag0: 1;
-  unsigned dirflag: 1;                  /* 1 -> directory.  first & only extent
+#endif
+#ifdef __LITTLE_ENDIAN
+  u8 dirflag: 1;                        /* 1 -> directory.  first & only extent
                                           points to dnode. */
-  unsigned flag9: 1;
+  u8 flag9012345: 7;
-  unsigned flag10: 1;
+#else
-  unsigned flag11: 1;
+  u8 flag9012345: 7;
-  unsigned flag12: 1;
+  u8 dirflag: 1;                        /* 1 -> directory.  first & only extent
-  unsigned flag13: 1;
+                                           points to dnode. */
-  unsigned flag14: 1;
+#endif
-  unsigned flag15: 1;
  struct bplus_header btree;            /* b+ tree, 8 extents or 12 subtrees */
  union {
@@ -407,17 +477,16 @@ struct fnode
    struct bplus_internal_node internal[12];
  } u;
-  unsigned file_size;                   /* file length, bytes */
+  u32 file_size;                        /* file length, bytes */
-  unsigned n_needea;                    /* number of EA's with NEEDEA set */
+  u32 n_needea;                         /* number of EA's with NEEDEA set */
-  char user_id[16];                     /* unused */
+  u8 user_id[16];                       /* unused */
-  unsigned short ea_offs;               /* offset from start of fnode
+  u16 ea_offs;                          /* offset from start of fnode
                                           to first fnode-resident ea */
-  char dasd_limit_treshhold;
+  u8 dasd_limit_treshhold;
-  char dasd_limit_delta;
+  u8 dasd_limit_delta;
-  unsigned dasd_limit;
+  u32 dasd_limit;
-  unsigned dasd_usage;
+  u32 dasd_usage;
-  /*unsigned zero5[2];*/
+  u8 ea[316];                           /* zero or more EA's, packed together
-  unsigned char ea[316];                /* zero or more EA's, packed together
                                           with no alignment padding.
                                           (Do not use this name, get here
                                           via fnode + ea_offs. I think.) */
@@ -430,7 +499,7 @@ struct fnode
 struct anode
 {
-  unsigned magic;                       /* 37e4 0aae */
+  u32 magic;                            /* 37e4 0aae */
  anode_secno self;                     /* pointer to this anode */
  secno up;                             /* parent anode or fnode */
@@ -440,7 +509,7 @@ struct anode
    struct bplus_internal_node internal[60];
  } u;
-  unsigned fill[3];                     /* unused */
+  u32 fill[3];                          /* unused */
 };
@@ -461,25 +530,31 @@ struct anode
 struct extended_attribute
 {
-  unsigned indirect: 1;                 /* 1 -> value gives sector number
+#ifdef __LITTLE_ENDIAN
+  u8 indirect: 1;                       /* 1 -> value gives sector number
                                           where real value starts */
-  unsigned anode: 1;                    /* 1 -> sector is an anode
+  u8 anode: 1;                          /* 1 -> sector is an anode
+                                           that points to fragmented value */
+  u8 flag23456: 5;
+  u8 needea: 1;                         /* required ea */
+#else
+  u8 needea: 1;                         /* required ea */
+  u8 flag23456: 5;
+  u8 anode: 1;                          /* 1 -> sector is an anode
                                           that points to fragmented value */
-  unsigned flag2: 1;
+  u8 indirect: 1;                       /* 1 -> value gives sector number
-  unsigned flag3: 1;
+                                           where real value starts */
-  unsigned flag4: 1;
+#endif
-  unsigned flag5: 1;
+  u8 namelen;                           /* length of name, bytes */
-  unsigned flag6: 1;
+  u8 valuelen_lo;                       /* length of value, bytes */
-  unsigned needea: 1;                   /* required ea */
+  u8 valuelen_hi;                       /* length of value, bytes */
-  unsigned char namelen;                /* length of name, bytes */
+  u8 name[0];
-  unsigned short valuelen;              /* length of value, bytes */
-  unsigned char name[0];
  /*
-    unsigned char name[namelen];        ascii attrib name
+    u8 name[namelen];                   ascii attrib name
-    unsigned char nul;                  terminating '\0', not counted
+    u8 nul;                             terminating '\0', not counted
-    unsigned char value[valuelen];      value, arbitrary
+    u8 value[valuelen];                 value, arbitrary
      if this.indirect, valuelen is 8 and the value is
-        unsigned length;                real length of value, bytes
+        u32 length;                     real length of value, bytes
        secno secno;                    sector address where it starts
      if this.anode, the above sector number is the root of an anode tree
        which points to the value.
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index c15adbca07ff..dd552f862c8f 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -13,6 +13,7 @@
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/slab.h>
+#include <asm/unaligned.h>
 #include "hpfs.h"
@@ -51,18 +52,16 @@ struct hpfs_inode_info {
        unsigned i_disk_sec;    /* (files) minimalist cache of alloc info */
        unsigned i_n_secs;      /* (files) minimalist cache of alloc info */
        unsigned i_ea_size;     /* size of extended attributes */
-        unsigned i_conv : 2;    /* (files) crlf->newline hackery */
        unsigned i_ea_mode : 1; /* file's permission is stored in ea */
        unsigned i_ea_uid : 1;  /* file's uid is stored in ea */
        unsigned i_ea_gid : 1;  /* file's gid is stored in ea */
        unsigned i_dirty : 1;
-        struct mutex i_mutex;
-        struct mutex i_parent_mutex;
        loff_t **i_rddir_off;
        struct inode vfs_inode;
 };
 struct hpfs_sb_info {
+        struct mutex hpfs_mutex;        /* global hpfs lock */
        ino_t sb_root;                  /* inode number of root dir */
        unsigned sb_fs_size;            /* file system size, sectors */
        unsigned sb_bitmaps;            /* sector number of bitmap list */
@@ -74,7 +73,6 @@ struct hpfs_sb_info {
        uid_t sb_uid;                   /* uid from mount options */
        gid_t sb_gid;                   /* gid from mount options */
        umode_t sb_mode;                /* mode from mount options */
-        unsigned sb_conv : 2;           /* crlf->newline hackery */
        unsigned sb_eas : 2;            /* eas: 0-ignore, 1-ro, 2-rw */
        unsigned sb_err : 2;            /* on errs: 0-cont, 1-ro, 2-panic */
        unsigned sb_chk : 2;            /* checks: 0-no, 1-normal, 2-strict */
@@ -87,20 +85,9 @@ struct hpfs_sb_info {
        unsigned *sb_bmp_dir;           /* main bitmap directory */
        unsigned sb_c_bitmap;           /* current bitmap */
        unsigned sb_max_fwd_alloc;      /* max forwad allocation */
-        struct mutex hpfs_creation_de;  /* when creating dirents, nobody else
-                                           can alloc blocks */
-        /*unsigned sb_mounting : 1;*/
        int sb_timeshift;
 };
-/*
- * conv= options
- */
-#define CONV_BINARY 0                   /* no conversion */
-#define CONV_TEXT 1                     /* crlf->newline */
-#define CONV_AUTO 2                     /* decide based on file contents */
 /* Four 512-byte buffers and the 2k block obtained by concatenating them */
 struct quad_buffer_head {
@@ -113,7 +100,7 @@ struct quad_buffer_head {
 static inline dnode_secno de_down_pointer (struct hpfs_dirent *de)
 {
  CHKCOND(de->down,("HPFS: de_down_pointer: !de->down\n"));
-  return *(dnode_secno *) ((void *) de + de->length - 4);
+  return le32_to_cpu(*(dnode_secno *) ((void *) de + le16_to_cpu(de->length) - 4));
 }
 /* The first dir entry in a dnode */
@@ -127,41 +114,46 @@ static inline struct hpfs_dirent *dnode_first_de (struct dnode *dnode)
 static inline struct hpfs_dirent *dnode_end_de (struct dnode *dnode)
 {
-  CHKCOND(dnode->first_free>=0x14 && dnode->first_free<=0xa00,("HPFS: dnode_end_de: dnode->first_free = %d\n",(int)dnode->first_free));
+  CHKCOND(le32_to_cpu(dnode->first_free)>=0x14 && le32_to_cpu(dnode->first_free)<=0xa00,("HPFS: dnode_end_de: dnode->first_free = %x\n",(unsigned)le32_to_cpu(dnode->first_free)));
-  return (void *) dnode + dnode->first_free;
+  return (void *) dnode + le32_to_cpu(dnode->first_free);
 }
 /* The dir entry after dir entry de */
 static inline struct hpfs_dirent *de_next_de (struct hpfs_dirent *de)
 {
-  CHKCOND(de->length>=0x20 && de->length<0x800,("HPFS: de_next_de: de->length = %d\n",(int)de->length));
+  CHKCOND(le16_to_cpu(de->length)>=0x20 && le16_to_cpu(de->length)<0x800,("HPFS: de_next_de: de->length = %x\n",(unsigned)le16_to_cpu(de->length)));
-  return (void *) de + de->length;
+  return (void *) de + le16_to_cpu(de->length);
 }
 static inline struct extended_attribute *fnode_ea(struct fnode *fnode)
 {
-        return (struct extended_attribute *)((char *)fnode + fnode->ea_offs + fnode->acl_size_s);
+        return (struct extended_attribute *)((char *)fnode + le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s));
 }
 static inline struct extended_attribute *fnode_end_ea(struct fnode *fnode)
 {
-        return (struct extended_attribute *)((char *)fnode + fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s);
+        return (struct extended_attribute *)((char *)fnode + le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s));
+}
+static unsigned ea_valuelen(struct extended_attribute *ea)
+{
+        return ea->valuelen_lo + 256 * ea->valuelen_hi;
 }
 static inline struct extended_attribute *next_ea(struct extended_attribute *ea)
 {
-        return (struct extended_attribute *)((char *)ea + 5 + ea->namelen + ea->valuelen);
+        return (struct extended_attribute *)((char *)ea + 5 + ea->namelen + ea_valuelen(ea));
 }
 static inline secno ea_sec(struct extended_attribute *ea)
 {
-        return *(secno *)((char *)ea + 9 + ea->namelen);
+        return le32_to_cpu(get_unaligned((secno *)((char *)ea + 9 + ea->namelen)));
 }
 static inline secno ea_len(struct extended_attribute *ea)
 {
-        return *(secno *)((char *)ea + 5 + ea->namelen);
+        return le32_to_cpu(get_unaligned((secno *)((char *)ea + 5 + ea->namelen)));
 }
 static inline char *ea_data(struct extended_attribute *ea)
@@ -186,13 +178,13 @@ static inline void copy_de(struct hpfs_dirent *dst, struct hpfs_dirent *src)
        dst->not_8x3 = n;
 }
-static inline unsigned tstbits(unsigned *bmp, unsigned b, unsigned n)
+static inline unsigned tstbits(u32 *bmp, unsigned b, unsigned n)
 {
        int i;
        if ((b >= 0x4000) || (b + n - 1 >= 0x4000)) return n;
-        if (!((bmp[(b & 0x3fff) >> 5] >> (b & 0x1f)) & 1)) return 1;
+        if (!((le32_to_cpu(bmp[(b & 0x3fff) >> 5]) >> (b & 0x1f)) & 1)) return 1;
        for (i = 1; i < n; i++)
-                if (/*b+i < 0x4000 &&*/ !((bmp[((b+i) & 0x3fff) >> 5] >> ((b+i) & 0x1f)) & 1))
+                if (!((le32_to_cpu(bmp[((b+i) & 0x3fff) >> 5]) >> ((b+i) & 0x1f)) & 1))
                        return i + 1;
        return 0;
 }
@@ -200,12 +192,12 @@ static inline unsigned tstbits(unsigned *bmp, unsigned b, unsigned n)
 /* alloc.c */
 int hpfs_chk_sectors(struct super_block *, secno, int, char *);
-secno hpfs_alloc_sector(struct super_block *, secno, unsigned, int, int);
+secno hpfs_alloc_sector(struct super_block *, secno, unsigned, int);
 int hpfs_alloc_if_possible(struct super_block *, secno);
 void hpfs_free_sectors(struct super_block *, secno, unsigned);
 int hpfs_check_free_dnodes(struct super_block *, int);
 void hpfs_free_dnode(struct super_block *, secno);
-struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *, int);
+struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *);
 struct fnode *hpfs_alloc_fnode(struct super_block *, secno, fnode_secno *, struct buffer_head **);
 struct anode *hpfs_alloc_anode(struct super_block *, secno, anode_secno *, struct buffer_head **);
@@ -222,8 +214,6 @@ void hpfs_remove_fnode(struct super_block *, fnode_secno fno);
 /* buffer.c */
-void hpfs_lock_creation(struct super_block *);
-void hpfs_unlock_creation(struct super_block *);
 void *hpfs_map_sector(struct super_block *, unsigned, struct buffer_head **, int);
 void *hpfs_get_sector(struct super_block *, unsigned, struct buffer_head **);
 void *hpfs_map_4sectors(struct super_block *, unsigned, struct quad_buffer_head *, int);
@@ -247,7 +237,7 @@ void hpfs_del_pos(struct inode *, loff_t *);
 struct hpfs_dirent *hpfs_add_de(struct super_block *, struct dnode *,
                                const unsigned char *, unsigned, secno);
 int hpfs_add_dirent(struct inode *, const unsigned char *, unsigned,
-                    struct hpfs_dirent *, int);
+                    struct hpfs_dirent *);
 int hpfs_remove_dirent(struct inode *, dnode_secno, struct hpfs_dirent *, struct quad_buffer_head *, int);
 void hpfs_count_dnodes(struct super_block *, dnode_secno, int *, int *, int *);
 dnode_secno hpfs_de_as_down_as_possible(struct super_block *, dnode_secno dno);
@@ -303,7 +293,6 @@ int hpfs_compare_names(struct super_block *, const unsigned char *, unsigned,
                       const unsigned char *, unsigned, int);
 int hpfs_is_name_long(const unsigned char *, unsigned);
 void hpfs_adjust_length(const unsigned char *, unsigned *);
-void hpfs_decide_conv(struct inode *, const unsigned char *, unsigned);
 /* namei.c */
@@ -346,21 +335,26 @@ static inline time32_t gmt_to_local(struct super_block *s, time_t t)
 /*
 * Locking:
 *
- * hpfs_lock() is a leftover from the big kernel lock.
+ * hpfs_lock() locks the whole filesystem. It must be taken
- * Right now, these functions are empty and only left
+ * on any method called by the VFS.
- * for documentation purposes. The file system no longer
- * works on SMP systems, so the lock is not needed
- * any more.
 *
- * If someone is interested in making it work again, this
+ * We don't do any per-file locking anymore, it is hard to
- * would be the place to start by adding a per-superblock
+ * review and HPFS is not performance-sensitive anyway.
- * mutex and fixing all the bugs and performance issues
- * caused by that.
 */
 static inline void hpfs_lock(struct super_block *s)
 {
+        struct hpfs_sb_info *sbi = hpfs_sb(s);
+        mutex_lock(&sbi->hpfs_mutex);
 }
 static inline void hpfs_unlock(struct super_block *s)
 {
+        struct hpfs_sb_info *sbi = hpfs_sb(s);
+        mutex_unlock(&sbi->hpfs_mutex);
+}
+static inline void hpfs_lock_assert(struct super_block *s)
+{
+        struct hpfs_sb_info *sbi = hpfs_sb(s);
+        WARN_ON(!mutex_is_locked(&sbi->hpfs_mutex));
 }
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 87f1f787e767..338cd8368451 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -17,7 +17,6 @@ void hpfs_init_inode(struct inode *i)
        i->i_uid = hpfs_sb(sb)->sb_uid;
        i->i_gid = hpfs_sb(sb)->sb_gid;
        i->i_mode = hpfs_sb(sb)->sb_mode;
-        hpfs_inode->i_conv = hpfs_sb(sb)->sb_conv;
        i->i_size = -1;
        i->i_blocks = -1;
        
@@ -116,8 +115,8 @@ void hpfs_read_inode(struct inode *i)
                i->i_mode |= S_IFDIR;
                i->i_op = &hpfs_dir_iops;
                i->i_fop = &hpfs_dir_ops;
-                hpfs_inode->i_parent_dir = fnode->up;
+                hpfs_inode->i_parent_dir = le32_to_cpu(fnode->up);
-                hpfs_inode->i_dno = fnode->u.external[0].disk_secno;
+                hpfs_inode->i_dno = le32_to_cpu(fnode->u.external[0].disk_secno);
                if (hpfs_sb(sb)->sb_chk >= 2) {
                        struct buffer_head *bh0;
                        if (hpfs_map_fnode(sb, hpfs_inode->i_parent_dir, &bh0)) brelse(bh0);
@@ -133,7 +132,7 @@ void hpfs_read_inode(struct inode *i)
                i->i_op = &hpfs_file_iops;
                i->i_fop = &hpfs_file_ops;
                i->i_nlink = 1;
-                i->i_size = fnode->file_size;
+                i->i_size = le32_to_cpu(fnode->file_size);
                i->i_blocks = ((i->i_size + 511) >> 9) + 1;
                i->i_data.a_ops = &hpfs_aops;
                hpfs_i(i)->mmu_private = i->i_size;
@@ -144,7 +143,7 @@ void hpfs_read_inode(struct inode *i)
 static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode)
 {
        struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
-        /*if (fnode->acl_size_l || fnode->acl_size_s) {
+        /*if (le32_to_cpu(fnode->acl_size_l) || le16_to_cpu(fnode->acl_size_s)) {
                   Some unknown structures like ACL may be in fnode,
                   we'd better not overwrite them
                hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino);
@@ -187,9 +186,7 @@ void hpfs_write_inode(struct inode *i)
                kfree(hpfs_inode->i_rddir_off);
                hpfs_inode->i_rddir_off = NULL;
        }
-        mutex_lock(&hpfs_inode->i_parent_mutex);
        if (!i->i_nlink) {
-                mutex_unlock(&hpfs_inode->i_parent_mutex);
                return;
        }
        parent = iget_locked(i->i_sb, hpfs_inode->i_parent_dir);
@@ -200,14 +197,9 @@ void hpfs_write_inode(struct inode *i)
                        hpfs_read_inode(parent);
                        unlock_new_inode(parent);
                }
-                mutex_lock(&hpfs_inode->i_mutex);
                hpfs_write_inode_nolock(i);
-                mutex_unlock(&hpfs_inode->i_mutex);
                iput(parent);
-        } else {
-                mark_inode_dirty(i);
        }
-        mutex_unlock(&hpfs_inode->i_parent_mutex);
 }
 void hpfs_write_inode_nolock(struct inode *i)
@@ -226,30 +218,30 @@ void hpfs_write_inode_nolock(struct inode *i)
                }
        } else de = NULL;
        if (S_ISREG(i->i_mode)) {
-                fnode->file_size = i->i_size;
+                fnode->file_size = cpu_to_le32(i->i_size);
-                if (de) de->file_size = i->i_size;
+                if (de) de->file_size = cpu_to_le32(i->i_size);
        } else if (S_ISDIR(i->i_mode)) {
-                fnode->file_size = 0;
+                fnode->file_size = cpu_to_le32(0);
-                if (de) de->file_size = 0;
+                if (de) de->file_size = cpu_to_le32(0);
        }
        hpfs_write_inode_ea(i, fnode);
        if (de) {
-                de->write_date = gmt_to_local(i->i_sb, i->i_mtime.tv_sec);
+                de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec));
-                de->read_date = gmt_to_local(i->i_sb, i->i_atime.tv_sec);
+                de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec));
-                de->creation_date = gmt_to_local(i->i_sb, i->i_ctime.tv_sec);
+                de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec));
                de->read_only = !(i->i_mode & 0222);
-                de->ea_size = hpfs_inode->i_ea_size;
+                de->ea_size = cpu_to_le32(hpfs_inode->i_ea_size);
                hpfs_mark_4buffers_dirty(&qbh);
                hpfs_brelse4(&qbh);
        }
        if (S_ISDIR(i->i_mode)) {
                if ((de = map_dirent(i, hpfs_inode->i_dno, "\001\001", 2, NULL, &qbh))) {
-                        de->write_date = gmt_to_local(i->i_sb, i->i_mtime.tv_sec);
+                        de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec));
-                        de->read_date = gmt_to_local(i->i_sb, i->i_atime.tv_sec);
+                        de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec));
-                        de->creation_date = gmt_to_local(i->i_sb, i->i_ctime.tv_sec);
+                        de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec));
                        de->read_only = !(i->i_mode & 0222);
-                        de->ea_size = /*hpfs_inode->i_ea_size*/0;
+                        de->ea_size = cpu_to_le32(/*hpfs_inode->i_ea_size*/0);
-                        de->file_size = 0;
+                        de->file_size = cpu_to_le32(0);
                        hpfs_mark_4buffers_dirty(&qbh);
                        hpfs_brelse4(&qbh);
                } else
@@ -269,6 +261,10 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
        hpfs_lock(inode->i_sb);
        if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root)
                goto out_unlock;
+        if ((attr->ia_valid & ATTR_UID) && attr->ia_uid >= 0x10000)
+                goto out_unlock;
+        if ((attr->ia_valid & ATTR_GID) && attr->ia_gid >= 0x10000)
+                goto out_unlock;
        if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size)
                goto out_unlock;
@@ -284,7 +280,6 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
        }
        setattr_copy(inode, attr);
-        mark_inode_dirty(inode);
        hpfs_write_inode(inode);
diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c
index 840d033ecee8..a790821366a7 100644
--- a/fs/hpfs/map.c
+++ b/fs/hpfs/map.c
@@ -21,7 +21,7 @@ unsigned int *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block,
                hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id);
                return NULL;
        }
-        sec = hpfs_sb(s)->sb_bmp_dir[bmp_block];
+        sec = le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[bmp_block]);
        if (!sec || sec > hpfs_sb(s)->sb_fs_size-4) {
                hpfs_error(s, "invalid bitmap block pointer %08x -> %08x at %s", bmp_block, sec, id);
                return NULL;
@@ -46,18 +46,18 @@ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps)
        struct code_page_data *cpd;
        struct code_page_directory *cp = hpfs_map_sector(s, cps, &bh, 0);
        if (!cp) return NULL;
-        if (cp->magic != CP_DIR_MAGIC) {
+        if (le32_to_cpu(cp->magic) != CP_DIR_MAGIC) {
-                printk("HPFS: Code page directory magic doesn't match (magic = %08x)\n", cp->magic);
+                printk("HPFS: Code page directory magic doesn't match (magic = %08x)\n", le32_to_cpu(cp->magic));
                brelse(bh);
                return NULL;
        }
-        if (!cp->n_code_pages) {
+        if (!le32_to_cpu(cp->n_code_pages)) {
                printk("HPFS: n_code_pages == 0\n");
                brelse(bh);
                return NULL;
        }
-        cpds = cp->array[0].code_page_data;
+        cpds = le32_to_cpu(cp->array[0].code_page_data);
-        cpi = cp->array[0].index;
+        cpi = le16_to_cpu(cp->array[0].index);
        brelse(bh);
        if (cpi >= 3) {
@@ -66,12 +66,12 @@ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps)
        }
        
        if (!(cpd = hpfs_map_sector(s, cpds, &bh, 0))) return NULL;
-        if ((unsigned)cpd->offs[cpi] > 0x178) {
+        if (le16_to_cpu(cpd->offs[cpi]) > 0x178) {
                printk("HPFS: Code page index out of sector\n");
                brelse(bh);
                return NULL;
        }
-        ptr = (unsigned char *)cpd + cpd->offs[cpi] + 6;
+        ptr = (unsigned char *)cpd + le16_to_cpu(cpd->offs[cpi]) + 6;
        if (!(cp_table = kmalloc(256, GFP_KERNEL))) {
                printk("HPFS: out of memory for code page table\n");
                brelse(bh);
@@ -125,7 +125,7 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea
                if (hpfs_sb(s)->sb_chk) {
                        struct extended_attribute *ea;
                        struct extended_attribute *ea_end;
-                        if (fnode->magic != FNODE_MAGIC) {
+                        if (le32_to_cpu(fnode->magic) != FNODE_MAGIC) {
                                hpfs_error(s, "bad magic on fnode %08lx",
                                        (unsigned long)ino);
                                goto bail;
@@ -138,7 +138,7 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea
                                            (unsigned long)ino);
                                        goto bail;
                                }
-                                if (fnode->btree.first_free !=
+                                if (le16_to_cpu(fnode->btree.first_free) !=
                                    8 + fnode->btree.n_used_nodes * (fnode->btree.internal ? 8 : 12)) {
                                        hpfs_error(s,
                                            "bad first_free pointer in fnode %08lx",
@@ -146,12 +146,12 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea
                                        goto bail;
                                }
                        }
-                        if (fnode->ea_size_s && ((signed int)fnode->ea_offs < 0xc4 ||
+                        if (le16_to_cpu(fnode->ea_size_s) && (le16_to_cpu(fnode->ea_offs) < 0xc4 ||
-                           (signed int)fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s > 0x200)) {
+                           le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200)) {
                                hpfs_error(s,
                                        "bad EA info in fnode %08lx: ea_offs == %04x ea_size_s == %04x",
                                        (unsigned long)ino,
-                                        fnode->ea_offs, fnode->ea_size_s);
+                                        le16_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s));
                                goto bail;
                        }
                        ea = fnode_ea(fnode);
@@ -178,16 +178,20 @@ struct anode *hpfs_map_anode(struct super_block *s, anode_secno ano, struct buff
        if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, ano, 1, "anode")) return NULL;
        if ((anode = hpfs_map_sector(s, ano, bhp, ANODE_RD_AHEAD)))
                if (hpfs_sb(s)->sb_chk) {
-                        if (anode->magic != ANODE_MAGIC || anode->self != ano) {
+                        if (le32_to_cpu(anode->magic) != ANODE_MAGIC) {
                                hpfs_error(s, "bad magic on anode %08x", ano);
                                goto bail;
                        }
+                        if (le32_to_cpu(anode->self) != ano) {
+                                hpfs_error(s, "self pointer invalid on anode %08x", ano);
+                                goto bail;
+                        }
                        if ((unsigned)anode->btree.n_used_nodes + (unsigned)anode->btree.n_free_nodes !=
                            (anode->btree.internal ? 60 : 40)) {
                                hpfs_error(s, "bad number of nodes in anode %08x", ano);
                                goto bail;
                        }
-                        if (anode->btree.first_free !=
+                        if (le16_to_cpu(anode->btree.first_free) !=
                            8 + anode->btree.n_used_nodes * (anode->btree.internal ? 8 : 12)) {
                                hpfs_error(s, "bad first_free pointer in anode %08x", ano);
                                goto bail;
@@ -219,26 +223,26 @@ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno,
                        unsigned p, pp = 0;
                        unsigned char *d = (unsigned char *)dnode;
                        int b = 0;
-                        if (dnode->magic != DNODE_MAGIC) {
+                        if (le32_to_cpu(dnode->magic) != DNODE_MAGIC) {
                                hpfs_error(s, "bad magic on dnode %08x", secno);
                                goto bail;
                        }
-                        if (dnode->self != secno)
+                        if (le32_to_cpu(dnode->self) != secno)
-                                hpfs_error(s, "bad self pointer on dnode %08x self = %08x", secno, dnode->self);
+                                hpfs_error(s, "bad self pointer on dnode %08x self = %08x", secno, le32_to_cpu(dnode->self));
                        /* Check dirents - bad dirents would cause infinite
                           loops or shooting to memory */
-                        if (dnode->first_free > 2048/* || dnode->first_free < 84*/) {
+                        if (le32_to_cpu(dnode->first_free) > 2048) {
-                                hpfs_error(s, "dnode %08x has first_free == %08x", secno, dnode->first_free);
+                                hpfs_error(s, "dnode %08x has first_free == %08x", secno, le32_to_cpu(dnode->first_free));
                                goto bail;
                        }
-                        for (p = 20; p < dnode->first_free; p += d[p] + (d[p+1] << 8)) {
+                        for (p = 20; p < le32_to_cpu(dnode->first_free); p += d[p] + (d[p+1] << 8)) {
                                struct hpfs_dirent *de = (struct hpfs_dirent *)((char *)dnode + p);
-                                if (de->length > 292 || (de->length < 32) || (de->length & 3) || p + de->length > 2048) {
+                                if (le16_to_cpu(de->length) > 292 || (le16_to_cpu(de->length) < 32) || (le16_to_cpu(de->length) & 3) || p + le16_to_cpu(de->length) > 2048) {
                                        hpfs_error(s, "bad dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp);
                                        goto bail;
                                }
-                                if (((31 + de->namelen + de->down*4 + 3) & ~3) != de->length) {
+                                if (((31 + de->namelen + de->down*4 + 3) & ~3) != le16_to_cpu(de->length)) {
-                                        if (((31 + de->namelen + de->down*4 + 3) & ~3) < de->length && s->s_flags & MS_RDONLY) goto ok;
+                                        if (((31 + de->namelen + de->down*4 + 3) & ~3) < le16_to_cpu(de->length) && s->s_flags & MS_RDONLY) goto ok;
                                        hpfs_error(s, "namelen does not match dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp);
                                        goto bail;
                                }
@@ -251,7 +255,7 @@ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno,
                                pp = p;
                                
                        }
-                        if (p != dnode->first_free) {
+                        if (p != le32_to_cpu(dnode->first_free)) {
                                hpfs_error(s, "size on last dirent does not match first_free; dnode %08x", secno);
                                goto bail;
                        }
@@ -277,7 +281,7 @@ dnode_secno hpfs_fnode_dno(struct super_block *s, ino_t ino)
        if (!fnode)
                return 0;
-        dno = fnode->u.external[0].disk_secno;
+        dno = le32_to_cpu(fnode->u.external[0].disk_secno);
        brelse(bh);
        return dno;
 }
diff --git a/fs/hpfs/name.c b/fs/hpfs/name.c
index f24736d7a439..9acdf338def0 100644
--- a/fs/hpfs/name.c
+++ b/fs/hpfs/name.c
@@ -8,39 +8,6 @@
 #include "hpfs_fn.h"
-static const char *text_postfix[]={
-".ASM", ".BAS", ".BAT", ".C", ".CC", ".CFG", ".CMD", ".CON", ".CPP", ".DEF",
-".DOC", ".DPR", ".ERX", ".H", ".HPP", ".HTM", ".HTML", ".JAVA", ".LOG", ".PAS",
-".RC", ".TEX", ".TXT", ".Y", ""};
-static const char *text_prefix[]={
-"AUTOEXEC.", "CHANGES", "COPYING", "CONFIG.", "CREDITS", "FAQ", "FILE_ID.DIZ",
-"MAKEFILE", "READ.ME", "README", "TERMCAP", ""};
-void hpfs_decide_conv(struct inode *inode, const unsigned char *name, unsigned len)
-{
-        struct hpfs_inode_info *hpfs_inode = hpfs_i(inode);
-        int i;
-        if (hpfs_inode->i_conv != CONV_AUTO) return;
-        for (i = 0; *text_postfix[i]; i++) {
-                int l = strlen(text_postfix[i]);
-                if (l <= len)
-                        if (!hpfs_compare_names(inode->i_sb, text_postfix[i], l, name + len - l, l, 0))
-                                goto text;
-        }
-        for (i = 0; *text_prefix[i]; i++) {
-                int l = strlen(text_prefix[i]);
-                if (l <= len)
-                        if (!hpfs_compare_names(inode->i_sb, text_prefix[i], l, name, l, 0))
-                                goto text;
-        }
-        hpfs_inode->i_conv = CONV_BINARY;
-        return;
-        text:
-        hpfs_inode->i_conv = CONV_TEXT;
-        return;
-}
 static inline int not_allowed_char(unsigned char c)
 {
        return c<' ' || c=='"' || c=='*' || c=='/' || c==':' || c=='<' ||
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index d3db95f51a4e..ff0ce21c0867 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -29,7 +29,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh);
        if (!fnode)
                goto bail;
-        dnode = hpfs_alloc_dnode(dir->i_sb, fno, &dno, &qbh0, 1);
+        dnode = hpfs_alloc_dnode(dir->i_sb, fno, &dno, &qbh0);
        if (!dnode)
                goto bail1;
        memset(&dee, 0, sizeof dee);
@@ -37,8 +37,8 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        if (!(mode & 0222)) dee.read_only = 1;
        /*dee.archive = 0;*/
        dee.hidden = name[0] == '.';
-        dee.fnode = fno;
+        dee.fnode = cpu_to_le32(fno);
-        dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds());
+        dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
        result = new_inode(dir->i_sb);
        if (!result)
                goto bail2;
@@ -46,7 +46,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        result->i_ino = fno;
        hpfs_i(result)->i_parent_dir = dir->i_ino;
        hpfs_i(result)->i_dno = dno;
-        result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date);
+        result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
        result->i_ctime.tv_nsec = 0; 
        result->i_mtime.tv_nsec = 0; 
        result->i_atime.tv_nsec = 0; 
@@ -60,8 +60,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        if (dee.read_only)
                result->i_mode &= ~0222;
-        mutex_lock(&hpfs_i(dir)->i_mutex);
+        r = hpfs_add_dirent(dir, name, len, &dee);
-        r = hpfs_add_dirent(dir, name, len, &dee, 0);
        if (r == 1)
                goto bail3;
        if (r == -1) {
@@ -70,21 +69,21 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        }
        fnode->len = len;
        memcpy(fnode->name, name, len > 15 ? 15 : len);
-        fnode->up = dir->i_ino;
+        fnode->up = cpu_to_le32(dir->i_ino);
        fnode->dirflag = 1;
        fnode->btree.n_free_nodes = 7;
        fnode->btree.n_used_nodes = 1;
-        fnode->btree.first_free = 0x14;
+        fnode->btree.first_free = cpu_to_le16(0x14);
-        fnode->u.external[0].disk_secno = dno;
+        fnode->u.external[0].disk_secno = cpu_to_le32(dno);
-        fnode->u.external[0].file_secno = -1;
+        fnode->u.external[0].file_secno = cpu_to_le32(-1);
        dnode->root_dnode = 1;
-        dnode->up = fno;
+        dnode->up = cpu_to_le32(fno);
        de = hpfs_add_de(dir->i_sb, dnode, "\001\001", 2, 0);
-        de->creation_date = de->write_date = de->read_date = gmt_to_local(dir->i_sb, get_seconds());
+        de->creation_date = de->write_date = de->read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
        if (!(mode & 0222)) de->read_only = 1;
        de->first = de->directory = 1;
        /*de->hidden = de->system = 0;*/
-        de->fnode = fno;
+        de->fnode = cpu_to_le32(fno);
        mark_buffer_dirty(bh);
        brelse(bh);
        hpfs_mark_4buffers_dirty(&qbh0);
@@ -101,11 +100,9 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
                hpfs_write_inode_nolock(result);
        }
        d_instantiate(dentry, result);
-        mutex_unlock(&hpfs_i(dir)->i_mutex);
        hpfs_unlock(dir->i_sb);
        return 0;
 bail3:
-        mutex_unlock(&hpfs_i(dir)->i_mutex);
        iput(result);
 bail2:
        hpfs_brelse4(&qbh0);
@@ -140,8 +137,8 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
        if (!(mode & 0222)) dee.read_only = 1;
        dee.archive = 1;
        dee.hidden = name[0] == '.';
-        dee.fnode = fno;
+        dee.fnode = cpu_to_le32(fno);
-        dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds());
+        dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
        result = new_inode(dir->i_sb);
        if (!result)
@@ -154,9 +151,8 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
        result->i_op = &hpfs_file_iops;
        result->i_fop = &hpfs_file_ops;
        result->i_nlink = 1;
-        hpfs_decide_conv(result, name, len);
        hpfs_i(result)->i_parent_dir = dir->i_ino;
-        result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date);
+        result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
        result->i_ctime.tv_nsec = 0;
        result->i_mtime.tv_nsec = 0;
        result->i_atime.tv_nsec = 0;
@@ -168,8 +164,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
        result->i_data.a_ops = &hpfs_aops;
        hpfs_i(result)->mmu_private = 0;
-        mutex_lock(&hpfs_i(dir)->i_mutex);
+        r = hpfs_add_dirent(dir, name, len, &dee);
-        r = hpfs_add_dirent(dir, name, len, &dee, 0);
        if (r == 1)
                goto bail2;
        if (r == -1) {
@@ -178,7 +173,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
        }
        fnode->len = len;
        memcpy(fnode->name, name, len > 15 ? 15 : len);
-        fnode->up = dir->i_ino;
+        fnode->up = cpu_to_le32(dir->i_ino);
        mark_buffer_dirty(bh);
        brelse(bh);
@@ -193,12 +188,10 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
                hpfs_write_inode_nolock(result);
        }
        d_instantiate(dentry, result);
-        mutex_unlock(&hpfs_i(dir)->i_mutex);
        hpfs_unlock(dir->i_sb);
        return 0;
 bail2:
-        mutex_unlock(&hpfs_i(dir)->i_mutex);
        iput(result);
 bail1:
        brelse(bh);
@@ -232,8 +225,8 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
        if (!(mode & 0222)) dee.read_only = 1;
        dee.archive = 1;
        dee.hidden = name[0] == '.';
-        dee.fnode = fno;
+        dee.fnode = cpu_to_le32(fno);
-        dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds());
+        dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
        result = new_inode(dir->i_sb);
        if (!result)
@@ -242,7 +235,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
        hpfs_init_inode(result);
        result->i_ino = fno;
        hpfs_i(result)->i_parent_dir = dir->i_ino;
-        result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date);
+        result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
        result->i_ctime.tv_nsec = 0;
        result->i_mtime.tv_nsec = 0;
        result->i_atime.tv_nsec = 0;
@@ -254,8 +247,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
        result->i_blocks = 1;
        init_special_inode(result, mode, rdev);
-        mutex_lock(&hpfs_i(dir)->i_mutex);
+        r = hpfs_add_dirent(dir, name, len, &dee);
-        r = hpfs_add_dirent(dir, name, len, &dee, 0);
        if (r == 1)
                goto bail2;
        if (r == -1) {
@@ -264,19 +256,17 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
        }
        fnode->len = len;
        memcpy(fnode->name, name, len > 15 ? 15 : len);
-        fnode->up = dir->i_ino;
+        fnode->up = cpu_to_le32(dir->i_ino);
        mark_buffer_dirty(bh);
        insert_inode_hash(result);
        hpfs_write_inode_nolock(result);
        d_instantiate(dentry, result);
-        mutex_unlock(&hpfs_i(dir)->i_mutex);
        brelse(bh);
        hpfs_unlock(dir->i_sb);
        return 0;
 bail2:
-        mutex_unlock(&hpfs_i(dir)->i_mutex);
        iput(result);
 bail1:
        brelse(bh);
@@ -310,8 +300,8 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
        memset(&dee, 0, sizeof dee);
        dee.archive = 1;
        dee.hidden = name[0] == '.';
-        dee.fnode = fno;
+        dee.fnode = cpu_to_le32(fno);
-        dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds());
+        dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
        result = new_inode(dir->i_sb);
        if (!result)
@@ -319,7 +309,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
        result->i_ino = fno;
        hpfs_init_inode(result);
        hpfs_i(result)->i_parent_dir = dir->i_ino;
-        result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date);
+        result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
        result->i_ctime.tv_nsec = 0;
        result->i_mtime.tv_nsec = 0;
        result->i_atime.tv_nsec = 0;
@@ -333,8 +323,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
        result->i_op = &page_symlink_inode_operations;
        result->i_data.a_ops = &hpfs_symlink_aops;
-        mutex_lock(&hpfs_i(dir)->i_mutex);
+        r = hpfs_add_dirent(dir, name, len, &dee);
-        r = hpfs_add_dirent(dir, name, len, &dee, 0);
        if (r == 1)
                goto bail2;
        if (r == -1) {
@@ -343,7 +332,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
        }
        fnode->len = len;
        memcpy(fnode->name, name, len > 15 ? 15 : len);
-        fnode->up = dir->i_ino;
+        fnode->up = cpu_to_le32(dir->i_ino);
        hpfs_set_ea(result, fnode, "SYMLINK", symlink, strlen(symlink));
        mark_buffer_dirty(bh);
        brelse(bh);
@@ -352,11 +341,9 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
        hpfs_write_inode_nolock(result);
        d_instantiate(dentry, result);
-        mutex_unlock(&hpfs_i(dir)->i_mutex);
        hpfs_unlock(dir->i_sb);
        return 0;
 bail2:
-        mutex_unlock(&hpfs_i(dir)->i_mutex);
        iput(result);
 bail1:
        brelse(bh);
@@ -374,7 +361,6 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
        struct hpfs_dirent *de;
        struct inode *inode = dentry->d_inode;
        dnode_secno dno;
-        fnode_secno fno;
        int r;
        int rep = 0;
        int err;
@@ -382,8 +368,6 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
        hpfs_lock(dir->i_sb);
        hpfs_adjust_length(name, &len);
 again:
-        mutex_lock(&hpfs_i(inode)->i_parent_mutex);
-        mutex_lock(&hpfs_i(dir)->i_mutex);
        err = -ENOENT;
        de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
        if (!de)
@@ -397,7 +381,6 @@ again:
        if (de->directory)
                goto out1;
-        fno = de->fnode;
        r = hpfs_remove_dirent(dir, dno, de, &qbh, 1);
        switch (r) {
        case 1:
@@ -410,8 +393,6 @@ again:
                if (rep++)
                        break;
-                mutex_unlock(&hpfs_i(dir)->i_mutex);
-                mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
                dentry_unhash(dentry);
                if (!d_unhashed(dentry)) {
                        hpfs_unlock(dir->i_sb);
@@ -442,8 +423,6 @@ again:
 out1:
        hpfs_brelse4(&qbh);
 out:
-        mutex_unlock(&hpfs_i(dir)->i_mutex);
-        mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
        hpfs_unlock(dir->i_sb);
        return err;
 }
@@ -456,7 +435,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
        struct hpfs_dirent *de;
        struct inode *inode = dentry->d_inode;
        dnode_secno dno;
-        fnode_secno fno;
        int n_items = 0;
        int err;
        int r;
@@ -465,8 +443,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
        hpfs_adjust_length(name, &len);
        hpfs_lock(dir->i_sb);
-        mutex_lock(&hpfs_i(inode)->i_parent_mutex);
-        mutex_lock(&hpfs_i(dir)->i_mutex);
        err = -ENOENT;
        de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
        if (!de)
@@ -485,7 +461,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
        if (n_items)
                goto out1;
-        fno = de->fnode;
        r = hpfs_remove_dirent(dir, dno, de, &qbh, 1);
        switch (r) {
        case 1:
@@ -504,8 +479,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
 out1:
        hpfs_brelse4(&qbh);
 out:
-        mutex_unlock(&hpfs_i(dir)->i_mutex);
-        mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
        hpfs_unlock(dir->i_sb);
        return err;
 }
@@ -571,12 +544,6 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        hpfs_lock(i->i_sb);
        /* order doesn't matter, due to VFS exclusion */
-        mutex_lock(&hpfs_i(i)->i_parent_mutex);
-        if (new_inode)
-                mutex_lock(&hpfs_i(new_inode)->i_parent_mutex);
-        mutex_lock(&hpfs_i(old_dir)->i_mutex);
-        if (new_dir != old_dir)
-                mutex_lock(&hpfs_i(new_dir)->i_mutex);
        
        /* Erm? Moving over the empty non-busy directory is perfectly legal */
        if (new_inode && S_ISDIR(new_inode->i_mode)) {
@@ -613,9 +580,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (new_dir == old_dir) hpfs_brelse4(&qbh);
-        hpfs_lock_creation(i->i_sb);
+        if ((r = hpfs_add_dirent(new_dir, new_name, new_len, &de))) {
-        if ((r = hpfs_add_dirent(new_dir, new_name, new_len, &de, 1))) {
-                hpfs_unlock_creation(i->i_sb);
                if (r == -1) hpfs_error(new_dir->i_sb, "hpfs_rename: dirent already exists!");
                err = r == 1 ? -ENOSPC : -EFSERROR;
                if (new_dir != old_dir) hpfs_brelse4(&qbh);
@@ -624,20 +589,17 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        
        if (new_dir == old_dir)
                if (!(dep = map_dirent(old_dir, hpfs_i(old_dir)->i_dno, old_name, old_len, &dno, &qbh))) {
-                        hpfs_unlock_creation(i->i_sb);
                        hpfs_error(i->i_sb, "lookup succeeded but map dirent failed at #2");
                        err = -ENOENT;
                        goto end1;
                }
        if ((r = hpfs_remove_dirent(old_dir, dno, dep, &qbh, 0))) {
-                hpfs_unlock_creation(i->i_sb);
                hpfs_error(i->i_sb, "hpfs_rename: could not remove dirent");
                err = r == 2 ? -ENOSPC : -EFSERROR;
                goto end1;
        }
-        hpfs_unlock_creation(i->i_sb);
-        
        end:
        hpfs_i(i)->i_parent_dir = new_dir->i_ino;
        if (S_ISDIR(i->i_mode)) {
@@ -645,22 +607,14 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                drop_nlink(old_dir);
        }
        if ((fnode = hpfs_map_fnode(i->i_sb, i->i_ino, &bh))) {
-                fnode->up = new_dir->i_ino;
+                fnode->up = cpu_to_le32(new_dir->i_ino);
                fnode->len = new_len;
                memcpy(fnode->name, new_name, new_len>15?15:new_len);
                if (new_len < 15) memset(&fnode->name[new_len], 0, 15 - new_len);
                mark_buffer_dirty(bh);
                brelse(bh);
        }
-        hpfs_i(i)->i_conv = hpfs_sb(i->i_sb)->sb_conv;
-        hpfs_decide_conv(i, new_name, new_len);
 end1:
-        if (old_dir != new_dir)
-                mutex_unlock(&hpfs_i(new_dir)->i_mutex);
-        mutex_unlock(&hpfs_i(old_dir)->i_mutex);
-        mutex_unlock(&hpfs_i(i)->i_parent_mutex);
-        if (new_inode)
-                mutex_unlock(&hpfs_i(new_inode)->i_parent_mutex);
        hpfs_unlock(i->i_sb);
        return err;
 }
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index c89b40808587..98580a3b5005 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -18,15 +18,16 @@
 /* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
-static void mark_dirty(struct super_block *s)
+static void mark_dirty(struct super_block *s, int remount)
 {
-        if (hpfs_sb(s)->sb_chkdsk && !(s->s_flags & MS_RDONLY)) {
+        if (hpfs_sb(s)->sb_chkdsk && (remount || !(s->s_flags & MS_RDONLY))) {
                struct buffer_head *bh;
                struct hpfs_spare_block *sb;
                if ((sb = hpfs_map_sector(s, 17, &bh, 0))) {
                        sb->dirty = 1;
                        sb->old_wrote = 0;
                        mark_buffer_dirty(bh);
+                        sync_dirty_buffer(bh);
                        brelse(bh);
                }
        }
@@ -40,10 +41,12 @@ static void unmark_dirty(struct super_block *s)
        struct buffer_head *bh;
        struct hpfs_spare_block *sb;
        if (s->s_flags & MS_RDONLY) return;
+        sync_blockdev(s->s_bdev);
        if ((sb = hpfs_map_sector(s, 17, &bh, 0))) {
                sb->dirty = hpfs_sb(s)->sb_chkdsk > 1 - hpfs_sb(s)->sb_was_error;
                sb->old_wrote = hpfs_sb(s)->sb_chkdsk >= 2 && !hpfs_sb(s)->sb_was_error;
                mark_buffer_dirty(bh);
+                sync_dirty_buffer(bh);
                brelse(bh);
        }
 }
@@ -63,13 +66,13 @@ void hpfs_error(struct super_block *s, const char *fmt, ...)
        if (!hpfs_sb(s)->sb_was_error) {
                if (hpfs_sb(s)->sb_err == 2) {
                        printk("; crashing the system because you wanted it\n");
-                        mark_dirty(s);
+                        mark_dirty(s, 0);
                        panic("HPFS panic");
                } else if (hpfs_sb(s)->sb_err == 1) {
                        if (s->s_flags & MS_RDONLY) printk("; already mounted read-only\n");
                        else {
                                printk("; remounting read-only\n");
-                                mark_dirty(s);
+                                mark_dirty(s, 0);
                                s->s_flags |= MS_RDONLY;
                        }
                } else if (s->s_flags & MS_RDONLY) printk("; going on - but anything won't be destroyed because it's read-only\n");
@@ -102,9 +105,12 @@ static void hpfs_put_super(struct super_block *s)
 {
        struct hpfs_sb_info *sbi = hpfs_sb(s);
+        hpfs_lock(s);
+        unmark_dirty(s);
+        hpfs_unlock(s);
        kfree(sbi->sb_cp_table);
        kfree(sbi->sb_bmp_dir);
-        unmark_dirty(s);
        s->s_fs_info = NULL;
        kfree(sbi);
 }
@@ -129,7 +135,7 @@ static unsigned count_bitmaps(struct super_block *s)
        n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14;
        count = 0;
        for (n = 0; n < n_bands; n++)
-                count += hpfs_count_one_bitmap(s, hpfs_sb(s)->sb_bmp_dir[n]);
+                count += hpfs_count_one_bitmap(s, le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[n]));
        return count;
 }
@@ -188,8 +194,6 @@ static void init_once(void *foo)
 {
        struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
-        mutex_init(&ei->i_mutex);
-        mutex_init(&ei->i_parent_mutex);
        inode_init_once(&ei->vfs_inode);
 }
@@ -218,7 +222,6 @@ static void destroy_inodecache(void)
 enum {
        Opt_help, Opt_uid, Opt_gid, Opt_umask, Opt_case_lower, Opt_case_asis,
-        Opt_conv_binary, Opt_conv_text, Opt_conv_auto,
        Opt_check_none, Opt_check_normal, Opt_check_strict,
        Opt_err_cont, Opt_err_ro, Opt_err_panic,
        Opt_eas_no, Opt_eas_ro, Opt_eas_rw,
@@ -233,9 +236,6 @@ static const match_table_t tokens = {
        {Opt_umask, "umask=%o"},
        {Opt_case_lower, "case=lower"},
        {Opt_case_asis, "case=asis"},
-        {Opt_conv_binary, "conv=binary"},
-        {Opt_conv_text, "conv=text"},
-        {Opt_conv_auto, "conv=auto"},
        {Opt_check_none, "check=none"},
        {Opt_check_normal, "check=normal"},
        {Opt_check_strict, "check=strict"},
@@ -253,7 +253,7 @@ static const match_table_t tokens = {
 };
 static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask,
-                      int *lowercase, int *conv, int *eas, int *chk, int *errs,
+                      int *lowercase, int *eas, int *chk, int *errs,
                      int *chkdsk, int *timeshift)
 {
        char *p;
@@ -295,15 +295,6 @@ static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask,
                case Opt_case_asis:
                        *lowercase = 0;
                        break;
-                case Opt_conv_binary:
-                        *conv = CONV_BINARY;
-                        break;
-                case Opt_conv_text:
-                        *conv = CONV_TEXT;
-                        break;
-                case Opt_conv_auto:
-                        *conv = CONV_AUTO;
-                        break;
                case Opt_check_none:
                        *chk = 0;
                        break;
@@ -370,9 +361,6 @@ HPFS filesystem options:\n\
      umask=xxx         set mode of files that don't have mode specified in eas\n\
      case=lower        lowercase all files\n\
      case=asis         do not lowercase files (default)\n\
-      conv=binary       do not convert CR/LF -> LF (default)\n\
-      conv=auto         convert only files with known text extensions\n\
-      conv=text         convert all files\n\
      check=none        no fs checks - kernel may crash on corrupted filesystem\n\
      check=normal      do some checks - it should not crash (default)\n\
      check=strict      do extra time-consuming checks, used for debugging\n\
@@ -394,7 +382,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
        uid_t uid;
        gid_t gid;
        umode_t umask;
-        int lowercase, conv, eas, chk, errs, chkdsk, timeshift;
+        int lowercase, eas, chk, errs, chkdsk, timeshift;
        int o;
        struct hpfs_sb_info *sbi = hpfs_sb(s);
        char *new_opts = kstrdup(data, GFP_KERNEL);
@@ -405,11 +393,11 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
        lock_super(s);
        uid = sbi->sb_uid; gid = sbi->sb_gid;
        umask = 0777 & ~sbi->sb_mode;
-        lowercase = sbi->sb_lowercase; conv = sbi->sb_conv;
+        lowercase = sbi->sb_lowercase;
        eas = sbi->sb_eas; chk = sbi->sb_chk; chkdsk = sbi->sb_chkdsk;
        errs = sbi->sb_err; timeshift = sbi->sb_timeshift;
-        if (!(o = parse_opts(data, &uid, &gid, &umask, &lowercase, &conv,
+        if (!(o = parse_opts(data, &uid, &gid, &umask, &lowercase,
            &eas, &chk, &errs, &chkdsk, &timeshift))) {
                printk("HPFS: bad mount options.\n");
                goto out_err;
@@ -427,11 +415,11 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
        sbi->sb_uid = uid; sbi->sb_gid = gid;
        sbi->sb_mode = 0777 & ~umask;
-        sbi->sb_lowercase = lowercase; sbi->sb_conv = conv;
+        sbi->sb_lowercase = lowercase;
        sbi->sb_eas = eas; sbi->sb_chk = chk; sbi->sb_chkdsk = chkdsk;
        sbi->sb_err = errs; sbi->sb_timeshift = timeshift;
-        if (!(*flags & MS_RDONLY)) mark_dirty(s);
+        if (!(*flags & MS_RDONLY)) mark_dirty(s, 1);
        replace_mount_options(s, new_opts);
@@ -471,7 +459,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
        uid_t uid;
        gid_t gid;
        umode_t umask;
-        int lowercase, conv, eas, chk, errs, chkdsk, timeshift;
+        int lowercase, eas, chk, errs, chkdsk, timeshift;
        dnode_secno root_dno;
        struct hpfs_dirent *de = NULL;
@@ -479,11 +467,6 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
        int o;
-        if (num_possible_cpus() > 1) {
-                printk(KERN_ERR "HPFS is not SMP safe\n");
-                return -EINVAL;
-        }
        save_mount_options(s, options);
        sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
@@ -495,20 +478,20 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
        sbi->sb_bmp_dir = NULL;
        sbi->sb_cp_table = NULL;
-        mutex_init(&sbi->hpfs_creation_de);
+        mutex_init(&sbi->hpfs_mutex);
+        hpfs_lock(s);
        uid = current_uid();
        gid = current_gid();
        umask = current_umask();
        lowercase = 0;
-        conv = CONV_BINARY;
        eas = 2;
        chk = 1;
        errs = 1;
        chkdsk = 1;
        timeshift = 0;
-        if (!(o = parse_opts(options, &uid, &gid, &umask, &lowercase, &conv,
+        if (!(o = parse_opts(options, &uid, &gid, &umask, &lowercase,
            &eas, &chk, &errs, &chkdsk, &timeshift))) {
                printk("HPFS: bad mount options.\n");
                goto bail0;
@@ -526,9 +509,9 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
        if (!(spareblock = hpfs_map_sector(s, 17, &bh2, 0))) goto bail3;
        /* Check magics */
-        if (/*bootblock->magic != BB_MAGIC
+        if (/*le16_to_cpu(bootblock->magic) != BB_MAGIC
-            ||*/ superblock->magic != SB_MAGIC
+            ||*/ le32_to_cpu(superblock->magic) != SB_MAGIC
-            || spareblock->magic != SP_MAGIC) {
+            || le32_to_cpu(spareblock->magic) != SP_MAGIC) {
                if (!silent) printk("HPFS: Bad magic ... probably not HPFS\n");
                goto bail4;
        }
@@ -549,19 +532,18 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
        s->s_op = &hpfs_sops;
        s->s_d_op = &hpfs_dentry_operations;
-        sbi->sb_root = superblock->root;
+        sbi->sb_root = le32_to_cpu(superblock->root);
-        sbi->sb_fs_size = superblock->n_sectors;
+        sbi->sb_fs_size = le32_to_cpu(superblock->n_sectors);
-        sbi->sb_bitmaps = superblock->bitmaps;
+        sbi->sb_bitmaps = le32_to_cpu(superblock->bitmaps);
-        sbi->sb_dirband_start = superblock->dir_band_start;
+        sbi->sb_dirband_start = le32_to_cpu(superblock->dir_band_start);
-        sbi->sb_dirband_size = superblock->n_dir_band;
+        sbi->sb_dirband_size = le32_to_cpu(superblock->n_dir_band);
-        sbi->sb_dmap = superblock->dir_band_bitmap;
+        sbi->sb_dmap = le32_to_cpu(superblock->dir_band_bitmap);
        sbi->sb_uid = uid;
        sbi->sb_gid = gid;
        sbi->sb_mode = 0777 & ~umask;
        sbi->sb_n_free = -1;
        sbi->sb_n_free_dnodes = -1;
        sbi->sb_lowercase = lowercase;
-        sbi->sb_conv = conv;
        sbi->sb_eas = eas;
        sbi->sb_chk = chk;
        sbi->sb_chkdsk = chkdsk;
@@ -573,7 +555,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
        sbi->sb_max_fwd_alloc = 0xffffff;
        
        /* Load bitmap directory */
-        if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, superblock->bitmaps)))
+        if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, le32_to_cpu(superblock->bitmaps))))
                goto bail4;
        
        /* Check for general fs errors*/
@@ -591,20 +573,20 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
                mark_buffer_dirty(bh2);
        }
-        if (spareblock->hotfixes_used || spareblock->n_spares_used) {
+        if (le32_to_cpu(spareblock->hotfixes_used) || le32_to_cpu(spareblock->n_spares_used)) {
                if (errs >= 2) {
                        printk("HPFS: Hotfixes not supported here, try chkdsk\n");
-                        mark_dirty(s);
+                        mark_dirty(s, 0);
                        goto bail4;
                }
                hpfs_error(s, "hotfixes not supported here, try chkdsk");
                if (errs == 0) printk("HPFS: Proceeding, but your filesystem will be probably corrupted by this driver...\n");
                else printk("HPFS: This driver may read bad files or crash when operating on disk with hotfixes.\n");
        }
-        if (spareblock->n_dnode_spares != spareblock->n_dnode_spares_free) {
+        if (le32_to_cpu(spareblock->n_dnode_spares) != le32_to_cpu(spareblock->n_dnode_spares_free)) {
                if (errs >= 2) {
                        printk("HPFS: Spare dnodes used, try chkdsk\n");
-                        mark_dirty(s);
+                        mark_dirty(s, 0);
                        goto bail4;
                }
                hpfs_error(s, "warning: spare dnodes used, try chkdsk");
@@ -612,26 +594,26 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
        }
        if (chk) {
                unsigned a;
-                if (superblock->dir_band_end - superblock->dir_band_start + 1 != superblock->n_dir_band ||
+                if (le32_to_cpu(superblock->dir_band_end) - le32_to_cpu(superblock->dir_band_start) + 1 != le32_to_cpu(superblock->n_dir_band) ||
-                    superblock->dir_band_end < superblock->dir_band_start || superblock->n_dir_band > 0x4000) {
+                    le32_to_cpu(superblock->dir_band_end) < le32_to_cpu(superblock->dir_band_start) || le32_to_cpu(superblock->n_dir_band) > 0x4000) {
                        hpfs_error(s, "dir band size mismatch: dir_band_start==%08x, dir_band_end==%08x, n_dir_band==%08x",
-                                superblock->dir_band_start, superblock->dir_band_end, superblock->n_dir_band);
+                                le32_to_cpu(superblock->dir_band_start), le32_to_cpu(superblock->dir_band_end), le32_to_cpu(superblock->n_dir_band));
                        goto bail4;
                }
                a = sbi->sb_dirband_size;
                sbi->sb_dirband_size = 0;
-                if (hpfs_chk_sectors(s, superblock->dir_band_start, superblock->n_dir_band, "dir_band") ||
+                if (hpfs_chk_sectors(s, le32_to_cpu(superblock->dir_band_start), le32_to_cpu(superblock->n_dir_band), "dir_band") ||
-                    hpfs_chk_sectors(s, superblock->dir_band_bitmap, 4, "dir_band_bitmap") ||
+                    hpfs_chk_sectors(s, le32_to_cpu(superblock->dir_band_bitmap), 4, "dir_band_bitmap") ||
-                    hpfs_chk_sectors(s, superblock->bitmaps, 4, "bitmaps")) {
+                    hpfs_chk_sectors(s, le32_to_cpu(superblock->bitmaps), 4, "bitmaps")) {
-                        mark_dirty(s);
+                        mark_dirty(s, 0);
                        goto bail4;
                }
                sbi->sb_dirband_size = a;
        } else printk("HPFS: You really don't want any checks? You are crazy...\n");
        /* Load code page table */
-        if (spareblock->n_code_pages)
+        if (le32_to_cpu(spareblock->n_code_pages))
-                if (!(sbi->sb_cp_table = hpfs_load_code_page(s, spareblock->code_page_dir)))
+                if (!(sbi->sb_cp_table = hpfs_load_code_page(s, le32_to_cpu(spareblock->code_page_dir))))
                        printk("HPFS: Warning: code page support is disabled\n");
        brelse(bh2);
@@ -660,13 +642,13 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
        if (!de)
                hpfs_error(s, "unable to find root dir");
        else {
-                root->i_atime.tv_sec = local_to_gmt(s, de->read_date);
+                root->i_atime.tv_sec = local_to_gmt(s, le32_to_cpu(de->read_date));
                root->i_atime.tv_nsec = 0;
-                root->i_mtime.tv_sec = local_to_gmt(s, de->write_date);
+                root->i_mtime.tv_sec = local_to_gmt(s, le32_to_cpu(de->write_date));
                root->i_mtime.tv_nsec = 0;
-                root->i_ctime.tv_sec = local_to_gmt(s, de->creation_date);
+                root->i_ctime.tv_sec = local_to_gmt(s, le32_to_cpu(de->creation_date));
                root->i_ctime.tv_nsec = 0;
-                hpfs_i(root)->i_ea_size = de->ea_size;
+                hpfs_i(root)->i_ea_size = le16_to_cpu(de->ea_size);
                hpfs_i(root)->i_parent_dir = root->i_ino;
                if (root->i_size == -1)
                        root->i_size = 2048;
@@ -674,6 +656,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
                        root->i_blocks = 5;
                hpfs_brelse4(&qbh);
        }
+        hpfs_unlock(s);
        return 0;
 bail4:  brelse(bh2);
@@ -681,6 +664,7 @@ bail3:	brelse(bh1);
 bail2:  brelse(bh0);
 bail1:
 bail0:
+        hpfs_unlock(s);
        kfree(sbi->sb_bmp_dir);
        kfree(sbi->sb_cp_table);
        s->s_fs_info = NULL;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b9eeb1cd03ff..7aafeb8fa300 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -412,10 +412,10 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
        pgoff = offset >> PAGE_SHIFT;
        i_size_write(inode, offset);
-        spin_lock(&mapping->i_mmap_lock);
+        mutex_lock(&mapping->i_mmap_mutex);
        if (!prio_tree_empty(&mapping->i_mmap))
                hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
-        spin_unlock(&mapping->i_mmap_lock);
+        mutex_unlock(&mapping->i_mmap_mutex);
        truncate_hugepages(inode, offset);
        return 0;
 }
@@ -921,7 +921,8 @@ static int can_do_hugetlb_shm(void)
        return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
 }
-struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
+struct file *hugetlb_file_setup(const char *name, size_t size,
+                                vm_flags_t acctflag,
                                struct user_struct **user, int creat_flags)
 {
        int error = -ENOMEM;
diff --git a/fs/inode.c b/fs/inode.c
index 33c963d08ab4..990d284877a1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -24,6 +24,7 @@
 #include <linux/mount.h>
 #include <linux/async.h>
 #include <linux/posix_acl.h>
+#include <linux/prefetch.h>
 #include <linux/ima.h>
 #include <linux/cred.h>
 #include "internal.h"
@@ -325,12 +326,11 @@ void address_space_init_once(struct address_space *mapping)
        memset(mapping, 0, sizeof(*mapping));
        INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
        spin_lock_init(&mapping->tree_lock);
-        spin_lock_init(&mapping->i_mmap_lock);
+        mutex_init(&mapping->i_mmap_mutex);
        INIT_LIST_HEAD(&mapping->private_list);
        spin_lock_init(&mapping->private_lock);
        INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
        INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
-        mutex_init(&mapping->unmap_mutex);
 }
 EXPORT_SYMBOL(address_space_init_once);
@@ -751,8 +751,12 @@ static void prune_icache(int nr_to_scan)
 * This function is passed the number of inodes to scan, and it returns the
 * total number of remaining possibly-reclaimable inodes.
 */
-static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int shrink_icache_memory(struct shrinker *shrink,
+                                struct shrink_control *sc)
 {
+        int nr = sc->nr_to_scan;
+        gfp_t gfp_mask = sc->gfp_mask;
        if (nr) {
                /*
                 * Nasty deadlock avoidance.  We may hold various FS locks,
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 69b180459463..72ffa974b0b8 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -302,12 +302,6 @@ void journal_commit_transaction(journal_t *journal)
         * all outstanding updates to complete.
         */
-#ifdef COMMIT_STATS
-        spin_lock(&journal->j_list_lock);
-        summarise_journal_usage(journal);
-        spin_unlock(&journal->j_list_lock);
-#endif
        /* Do we need to erase the effects of a prior journal_flush? */
        if (journal->j_flags & JFS_FLUSHED) {
                jbd_debug(3, "super block updated\n");
@@ -722,8 +716,13 @@ wait_for_iobuf:
                   required. */
                JBUFFER_TRACE(jh, "file as BJ_Forget");
                journal_file_buffer(jh, commit_transaction, BJ_Forget);
-                /* Wake up any transactions which were waiting for this
+                /*
-                   IO to complete */
+                 * Wake up any transactions which were waiting for this
+                 * IO to complete. The barrier must be here so that changes
+                 * by journal_file_buffer() take effect before wake_up_bit()
+                 * does the waitqueue check.
+                 */
+                smp_mb();
                wake_up_bit(&bh->b_state, BH_Unshadow);
                JBUFFER_TRACE(jh, "brelse shadowed buffer");
                __brelse(bh);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b3713afaaa9e..e2d4285fbe90 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -437,9 +437,12 @@ int __log_space_left(journal_t *journal)
 int __log_start_commit(journal_t *journal, tid_t target)
 {
        /*
-         * Are we already doing a recent enough commit?
+         * The only transaction we can possibly wait upon is the
+         * currently running transaction (if it exists).  Otherwise,
+         * the target tid must be an old one.
         */
-        if (!tid_geq(journal->j_commit_request, target)) {
+        if (journal->j_running_transaction &&
+            journal->j_running_transaction->t_tid == target) {
                /*
                 * We want a new commit: OK, mark the request and wakeup the
                 * commit thread.  We do _not_ do the commit ourselves.
@@ -451,7 +454,14 @@ int __log_start_commit(journal_t *journal, tid_t target)
                          journal->j_commit_sequence);
                wake_up(&journal->j_wait_commit);
                return 1;
-        }
+        } else if (!tid_geq(journal->j_commit_request, target))
+                /* This should never happen, but if it does, preserve
+                   the evidence before kjournald goes into a loop and
+                   increments j_commit_sequence beyond all recognition. */
+                WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n",
+                    journal->j_commit_request, journal->j_commit_sequence,
+                    target, journal->j_running_transaction ?
+                    journal->j_running_transaction->t_tid : 0);
        return 0;
 }
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 60d2319651b2..f7ee81a065da 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -266,7 +266,8 @@ static handle_t *new_handle(int nblocks)
 * This function is visible to journal users (like ext3fs), so is not
 * called with the journal already locked.
 *
- * Return a pointer to a newly allocated handle, or NULL on failure
+ * Return a pointer to a newly allocated handle, or an ERR_PTR() value
+ * on failure.
 */
 handle_t *journal_start(journal_t *journal, int nblocks)
 {
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6e28000a4b21..29148a81c783 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -338,12 +338,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
         * all outstanding updates to complete.
         */
-#ifdef COMMIT_STATS
-        spin_lock(&journal->j_list_lock);
-        summarise_journal_usage(journal);
-        spin_unlock(&journal->j_list_lock);
-#endif
        /* Do we need to erase the effects of a prior jbd2_journal_flush? */
        if (journal->j_flags & JBD2_FLUSHED) {
                jbd_debug(3, "super block updated\n");
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 1adc8d455f0e..df0de27c2733 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -10,6 +10,7 @@
 #include <linux/blkdev.h>
 #include <linux/buffer_head.h>
 #include <linux/gfp.h>
+#include <linux/prefetch.h>
 #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 9e22085231b3..d8d09380c7de 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -481,7 +481,7 @@ static int inode_write_alias(struct super_block *sb,
                        val = inode_val0(inode);
                        break;
                case INODE_USED_OFS:
-                        val = cpu_to_be64(li->li_used_bytes);;
+                        val = cpu_to_be64(li->li_used_bytes);
                        break;
                case INODE_SIZE_OFS:
                        val = cpu_to_be64(i_size_read(inode));
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 33435e4b14d2..ce03a182c771 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -480,10 +480,6 @@ static int logfs_read_sb(struct super_block *sb, int read_only)
                        !read_only)
                return -EIO;
-        mutex_init(&super->s_dirop_mutex);
-        mutex_init(&super->s_object_alias_mutex);
-        INIT_LIST_HEAD(&super->s_freeing_list);
        ret = logfs_init_rw(sb);
        if (ret)
                return ret;
@@ -601,6 +597,10 @@ static struct dentry *logfs_mount(struct file_system_type *type, int flags,
        if (!super)
                return ERR_PTR(-ENOMEM);
+        mutex_init(&super->s_dirop_mutex);
+        mutex_init(&super->s_object_alias_mutex);
+        INIT_LIST_HEAD(&super->s_freeing_list);
        if (!devname)
                err = logfs_get_sb_bdev(super, type, devname);
        else if (strncmp(devname, "mtd", 3))
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 2f174be06555..8c32ef3ba88e 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -90,7 +90,8 @@ static DEFINE_SPINLOCK(mb_cache_spinlock);
 * What the mbcache registers as to get shrunk dynamically.
 */
-static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
+static int mb_cache_shrink_fn(struct shrinker *shrink,
+                              struct shrink_control *sc);
 static struct shrinker mb_cache_shrinker = {
        .shrink = mb_cache_shrink_fn,
@@ -156,18 +157,19 @@ forget:
 * gets low.
 *
 * @shrink: (ignored)
- * @nr_to_scan: Number of objects to scan
+ * @sc: shrink_control passed from reclaim
- * @gfp_mask: (ignored)
 *
 * Returns the number of objects which are present in the cache.
 */
 static int
-mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc)
 {
        LIST_HEAD(free_list);
        struct mb_cache *cache;
        struct mb_cache_entry *entry, *tmp;
        int count = 0;
+        int nr_to_scan = sc->nr_to_scan;
+        gfp_t gfp_mask = sc->gfp_mask;
        mb_debug("trying to free %d entries", nr_to_scan);
        spin_lock(&mb_cache_spinlock);
diff --git a/fs/namei.c b/fs/namei.c
index f90f0593092a..2358b326b221 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -179,7 +179,7 @@ EXPORT_SYMBOL(putname);
 static int acl_permission_check(struct inode *inode, int mask, unsigned int flags,
                int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
 {
-        umode_t                 mode = inode->i_mode;
+        unsigned int mode = inode->i_mode;
        mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
@@ -1296,12 +1296,12 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
 {
        int res;
-        BUG_ON(nd->depth >= MAX_NESTED_LINKS);
        if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
                path_put_conditional(path, nd);
                path_put(&nd->path);
                return -ELOOP;
        }
+        BUG_ON(nd->depth >= MAX_NESTED_LINKS);
        nd->depth++;
        current->link_count++;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 0250e4ce4893..202f370526a7 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -461,7 +461,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 #endif
        struct ncp_entry_info finfo;
-        data.wdog_pid = NULL;
+        memset(&data, 0, sizeof(data));
        server = kzalloc(sizeof(struct ncp_server), GFP_KERNEL);
        if (!server)
                return -ENOMEM;
@@ -496,7 +496,6 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
                                struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data;
                                data.flags = md->flags;
-                                data.int_flags = 0;
                                data.mounted_uid = md->mounted_uid;
                                data.wdog_pid = find_get_pid(md->wdog_pid);
                                data.ncp_fd = md->ncp_fd;
@@ -507,7 +506,6 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
                                data.file_mode = md->file_mode;
                                data.dir_mode = md->dir_mode;
                                data.info_fd = -1;
-                                data.mounted_vol[0] = 0;
                        }
                        break;
                default:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7237672216c8..424e47773a84 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2042,11 +2042,14 @@ static void nfs_access_free_list(struct list_head *head)
        }
 }
-int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+int nfs_access_cache_shrinker(struct shrinker *shrink,
+                              struct shrink_control *sc)
 {
        LIST_HEAD(head);
        struct nfs_inode *nfsi, *next;
        struct nfs_access_entry *cache;
+        int nr_to_scan = sc->nr_to_scan;
+        gfp_t gfp_mask = sc->gfp_mask;
        if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
                return (nr_to_scan == 0) ? 0 : -1;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ce118ce885dd..2df6ca7b5898 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -234,7 +234,7 @@ extern int nfs_init_client(struct nfs_client *clp,
 /* dir.c */
 extern int nfs_access_cache_shrinker(struct shrinker *shrink,
-                                        int nr_to_scan, gfp_t gfp_mask);
+                                        struct shrink_control *sc);
 /* inode.c */
 extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 6f8192f4cfc7..be79dc9f386d 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -117,6 +117,8 @@ static int filelayout_async_handle_error(struct rpc_task *task,
        case -EKEYEXPIRED:
                rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX);
                break;
+        case -NFS4ERR_RETRY_UNCACHED_REP:
+                break;
        default:
                dprintk("%s DS error. Retry through MDS %d\n", __func__,
                        task->tk_status);
@@ -416,7 +418,8 @@ static int
 filelayout_check_layout(struct pnfs_layout_hdr *lo,
                        struct nfs4_filelayout_segment *fl,
                        struct nfs4_layoutget_res *lgr,
-                        struct nfs4_deviceid *id)
+                        struct nfs4_deviceid *id,
+                        gfp_t gfp_flags)
 {
        struct nfs4_file_layout_dsaddr *dsaddr;
        int status = -EINVAL;
@@ -439,7 +442,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
        /* find and reference the deviceid */
        dsaddr = nfs4_fl_find_get_deviceid(id);
        if (dsaddr == NULL) {
-                dsaddr = get_device_info(lo->plh_inode, id);
+                dsaddr = get_device_info(lo->plh_inode, id, gfp_flags);
                if (dsaddr == NULL)
                        goto out;
        }
@@ -500,7 +503,8 @@ static int
 filelayout_decode_layout(struct pnfs_layout_hdr *flo,
                         struct nfs4_filelayout_segment *fl,
                         struct nfs4_layoutget_res *lgr,
-                         struct nfs4_deviceid *id)
+                         struct nfs4_deviceid *id,
+                         gfp_t gfp_flags)
 {
        struct xdr_stream stream;
        struct xdr_buf buf = {
@@ -516,7 +520,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
        dprintk("%s: set_layout_map Begin\n", __func__);
-        scratch = alloc_page(GFP_KERNEL);
+        scratch = alloc_page(gfp_flags);
        if (!scratch)
                return -ENOMEM;
@@ -554,13 +558,13 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
                goto out_err;
        fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
-                               GFP_KERNEL);
+                               gfp_flags);
        if (!fl->fh_array)
                goto out_err;
        for (i = 0; i < fl->num_fh; i++) {
                /* Do we want to use a mempool here? */
-                fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
+                fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags);
                if (!fl->fh_array[i])
                        goto out_err_free;
@@ -605,19 +609,20 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
 static struct pnfs_layout_segment *
 filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
-                      struct nfs4_layoutget_res *lgr)
+                      struct nfs4_layoutget_res *lgr,
+                      gfp_t gfp_flags)
 {
        struct nfs4_filelayout_segment *fl;
        int rc;
        struct nfs4_deviceid id;
        dprintk("--> %s\n", __func__);
-        fl = kzalloc(sizeof(*fl), GFP_KERNEL);
+        fl = kzalloc(sizeof(*fl), gfp_flags);
        if (!fl)
                return NULL;
-        rc = filelayout_decode_layout(layoutid, fl, lgr, &id);
+        rc = filelayout_decode_layout(layoutid, fl, lgr, &id, gfp_flags);
-        if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) {
+        if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id, gfp_flags)) {
                _filelayout_free_lseg(fl);
                return NULL;
        }
@@ -633,7 +638,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
                int size = (fl->stripe_type == STRIPE_SPARSE) ?
                        fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
-                fl->commit_buckets = kcalloc(size, sizeof(struct list_head), GFP_KERNEL);
+                fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags);
                if (!fl->commit_buckets) {
                        filelayout_free_lseg(&fl->generic_hdr);
                        return NULL;
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 7c44579f5832..2b461d77b43a 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -104,6 +104,6 @@ extern struct nfs4_file_layout_dsaddr *
 nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id);
 extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
 struct nfs4_file_layout_dsaddr *
-get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id);
+get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
 #endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index de5350f2b249..db07c7af1395 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -225,11 +225,11 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
 }
 static struct nfs4_pnfs_ds *
-nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
+nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags)
 {
        struct nfs4_pnfs_ds *tmp_ds, *ds;
-        ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
+        ds = kzalloc(sizeof(*tmp_ds), gfp_flags);
        if (!ds)
                goto out;
@@ -261,7 +261,7 @@ out:
 * Currently only support ipv4, and one multi-path address.
 */
 static struct nfs4_pnfs_ds *
-decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
+decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags)
 {
        struct nfs4_pnfs_ds *ds = NULL;
        char *buf;
@@ -303,7 +303,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
                        rlen);
                goto out_err;
        }
-        buf = kmalloc(rlen + 1, GFP_KERNEL);
+        buf = kmalloc(rlen + 1, gfp_flags);
        if (!buf) {
                dprintk("%s: Not enough memory\n", __func__);
                goto out_err;
@@ -333,7 +333,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
        sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
        port = htons((tmp[0] << 8) | (tmp[1]));
-        ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
+        ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags);
        dprintk("%s: Decoded address and port %s\n", __func__, buf);
 out_free:
        kfree(buf);
@@ -343,7 +343,7 @@ out_err:
 /* Decode opaque device data and return the result */
 static struct nfs4_file_layout_dsaddr*
-decode_device(struct inode *ino, struct pnfs_device *pdev)
+decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
 {
        int i;
        u32 cnt, num;
@@ -362,7 +362,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
        struct page *scratch;
        /* set up xdr stream */
-        scratch = alloc_page(GFP_KERNEL);
+        scratch = alloc_page(gfp_flags);
        if (!scratch)
                goto out_err;
@@ -384,7 +384,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
        }
        /* read stripe indices */
-        stripe_indices = kcalloc(cnt, sizeof(u8), GFP_KERNEL);
+        stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags);
        if (!stripe_indices)
                goto out_err_free_scratch;
@@ -423,7 +423,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
        dsaddr = kzalloc(sizeof(*dsaddr) +
                        (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
-                        GFP_KERNEL);
+                        gfp_flags);
        if (!dsaddr)
                goto out_err_free_stripe_indices;
@@ -452,7 +452,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
                for (j = 0; j < mp_count; j++) {
                        if (j == 0) {
                                dsaddr->ds_list[i] = decode_and_add_ds(&stream,
-                                        ino);
+                                        ino, gfp_flags);
                                if (dsaddr->ds_list[i] == NULL)
                                        goto out_err_free_deviceid;
                        } else {
@@ -503,12 +503,12 @@ out_err:
 * available devices.
 */
 static struct nfs4_file_layout_dsaddr *
-decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
+decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags)
 {
        struct nfs4_file_layout_dsaddr *d, *new;
        long hash;
-        new = decode_device(inode, dev);
+        new = decode_device(inode, dev, gfp_flags);
        if (!new) {
                printk(KERN_WARNING "%s: Could not decode or add device\n",
                        __func__);
@@ -537,7 +537,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
 * of available devices, and return it.
 */
 struct nfs4_file_layout_dsaddr *
-get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
+get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags)
 {
        struct pnfs_device *pdev = NULL;
        u32 max_resp_sz;
@@ -556,17 +556,17 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
        dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
                __func__, inode, max_resp_sz, max_pages);
-        pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
+        pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags);
        if (pdev == NULL)
                return NULL;
-        pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
+        pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
        if (pages == NULL) {
                kfree(pdev);
                return NULL;
        }
        for (i = 0; i < max_pages; i++) {
-                pages[i] = alloc_page(GFP_KERNEL);
+                pages[i] = alloc_page(gfp_flags);
                if (!pages[i])
                        goto out_free;
        }
@@ -587,7 +587,7 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
         * Found new device, need to decode it and then add it to the
         * list of known devices for this mountpoint.
         */
-        dsaddr = decode_and_add_device(inode, pdev);
+        dsaddr = decode_and_add_device(inode, pdev, gfp_flags);
 out_free:
        for (i = 0; i < max_pages; i++)
                __free_page(pages[i]);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 69c0f3c5ee7a..cf1b339c3937 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -300,6 +300,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
                        ret = nfs4_delay(server->client, &exception->timeout);
                        if (ret != 0)
                                break;
+                case -NFS4ERR_RETRY_UNCACHED_REP:
                case -NFS4ERR_OLD_STATEID:
                        exception->retry = 1;
                        break;
@@ -3695,6 +3696,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
                        rpc_delay(task, NFS4_POLL_RETRY_MAX);
                        task->tk_status = 0;
                        return -EAGAIN;
+                case -NFS4ERR_RETRY_UNCACHED_REP:
                case -NFS4ERR_OLD_STATEID:
                        task->tk_status = 0;
                        return -EAGAIN;
@@ -4844,6 +4846,8 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
                dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status);
                rpc_delay(task, NFS4_POLL_RETRY_MIN);
                task->tk_status = 0;
+                /* fall through */
+        case -NFS4ERR_RETRY_UNCACHED_REP:
                nfs_restart_rpc(task, data->clp);
                return;
        }
@@ -5479,6 +5483,8 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
                break;
        case -NFS4ERR_DELAY:
                rpc_delay(task, NFS4_POLL_RETRY_MAX);
+                /* fall through */
+        case -NFS4ERR_RETRY_UNCACHED_REP:
                return -EAGAIN;
        default:
                nfs4_schedule_lease_recovery(clp);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index ff681ab65d31..f57f5281a520 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -383,6 +383,7 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
                                plh_layouts);
                dprintk("%s freeing layout for inode %lu\n", __func__,
                        lo->plh_inode->i_ino);
+                list_del_init(&lo->plh_layouts);
                pnfs_destroy_layout(NFS_I(lo->plh_inode));
        }
 }
@@ -466,7 +467,8 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
 static struct pnfs_layout_segment *
 send_layoutget(struct pnfs_layout_hdr *lo,
           struct nfs_open_context *ctx,
-           u32 iomode)
+           u32 iomode,
+           gfp_t gfp_flags)
 {
        struct inode *ino = lo->plh_inode;
        struct nfs_server *server = NFS_SERVER(ino);
@@ -479,7 +481,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
        dprintk("--> %s\n", __func__);
        BUG_ON(ctx == NULL);
-        lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
+        lgp = kzalloc(sizeof(*lgp), gfp_flags);
        if (lgp == NULL)
                return NULL;
@@ -487,12 +489,12 @@ send_layoutget(struct pnfs_layout_hdr *lo,
        max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
        max_pages = max_resp_sz >> PAGE_SHIFT;
-        pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
+        pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
        if (!pages)
                goto out_err_free;
        for (i = 0; i < max_pages; i++) {
-                pages[i] = alloc_page(GFP_KERNEL);
+                pages[i] = alloc_page(gfp_flags);
                if (!pages[i])
                        goto out_err_free;
        }
@@ -508,6 +510,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
        lgp->args.layout.pages = pages;
        lgp->args.layout.pglen = max_pages * PAGE_SIZE;
        lgp->lsegpp = &lseg;
+        lgp->gfp_flags = gfp_flags;
        /* Synchronously retrieve layout information from server and
         * store in lseg.
@@ -665,11 +668,11 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
 }
 static struct pnfs_layout_hdr *
-alloc_init_layout_hdr(struct inode *ino)
+alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags)
 {
        struct pnfs_layout_hdr *lo;
-        lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
+        lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
        if (!lo)
                return NULL;
        atomic_set(&lo->plh_refcount, 1);
@@ -681,7 +684,7 @@ alloc_init_layout_hdr(struct inode *ino)
 }
 static struct pnfs_layout_hdr *
-pnfs_find_alloc_layout(struct inode *ino)
+pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags)
 {
        struct nfs_inode *nfsi = NFS_I(ino);
        struct pnfs_layout_hdr *new = NULL;
@@ -696,7 +699,7 @@ pnfs_find_alloc_layout(struct inode *ino)
                        return nfsi->layout;
        }
        spin_unlock(&ino->i_lock);
-        new = alloc_init_layout_hdr(ino);
+        new = alloc_init_layout_hdr(ino, gfp_flags);
        spin_lock(&ino->i_lock);
        if (likely(nfsi->layout == NULL))       /* Won the race? */
@@ -756,7 +759,8 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
 struct pnfs_layout_segment *
 pnfs_update_layout(struct inode *ino,
                   struct nfs_open_context *ctx,
-                   enum pnfs_iomode iomode)
+                   enum pnfs_iomode iomode,
+                   gfp_t gfp_flags)
 {
        struct nfs_inode *nfsi = NFS_I(ino);
        struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
@@ -767,7 +771,7 @@ pnfs_update_layout(struct inode *ino,
        if (!pnfs_enabled_sb(NFS_SERVER(ino)))
                return NULL;
        spin_lock(&ino->i_lock);
-        lo = pnfs_find_alloc_layout(ino);
+        lo = pnfs_find_alloc_layout(ino, gfp_flags);
        if (lo == NULL) {
                dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
                goto out_unlock;
@@ -807,7 +811,7 @@ pnfs_update_layout(struct inode *ino,
                spin_unlock(&clp->cl_lock);
        }
-        lseg = send_layoutget(lo, ctx, iomode);
+        lseg = send_layoutget(lo, ctx, iomode, gfp_flags);
        if (!lseg && first) {
                spin_lock(&clp->cl_lock);
                list_del_init(&lo->plh_layouts);
@@ -846,7 +850,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
                goto out;
        }
        /* Inject layout blob into I/O device driver */
-        lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
+        lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags);
        if (!lseg || IS_ERR(lseg)) {
                if (!lseg)
                        status = -ENOMEM;
@@ -899,7 +903,8 @@ static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
                /* This is first coelesce call for a series of nfs_pages */
                pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
                                                   prev->wb_context,
-                                                   IOMODE_READ);
+                                                   IOMODE_READ,
+                                                   GFP_KERNEL);
        }
        return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
 }
@@ -921,7 +926,8 @@ static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
                /* This is first coelesce call for a series of nfs_pages */
                pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
                                                   prev->wb_context,
-                                                   IOMODE_RW);
+                                                   IOMODE_RW,
+                                                   GFP_NOFS);
        }
        return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
 }
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index bc4827202e7a..0c015bad9e7a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -70,7 +70,7 @@ struct pnfs_layoutdriver_type {
        const u32 id;
        const char *name;
        struct module *owner;
-        struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
+        struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags);
        void (*free_lseg) (struct pnfs_layout_segment *lseg);
        /* test for nfs page cache coalescing */
@@ -126,7 +126,7 @@ void get_layout_hdr(struct pnfs_layout_hdr *lo);
 void put_lseg(struct pnfs_layout_segment *lseg);
 struct pnfs_layout_segment *
 pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
-                   enum pnfs_iomode access_type);
+                   enum pnfs_iomode access_type, gfp_t gfp_flags);
 void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
 void unset_pnfs_layoutdriver(struct nfs_server *);
 enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
@@ -245,7 +245,7 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
 static inline struct pnfs_layout_segment *
 pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
-                   enum pnfs_iomode access_type)
+                   enum pnfs_iomode access_type, gfp_t gfp_flags)
 {
        return NULL;
 }
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7cded2b12a05..2bcf0dc306a1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -288,7 +288,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
        atomic_set(&req->wb_complete, requests);
        BUG_ON(desc->pg_lseg != NULL);
-        lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
+        lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL);
        ClearPageError(page);
        offset = 0;
        nbytes = desc->pg_count;
@@ -351,7 +351,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
        }
        req = nfs_list_entry(data->pages.next);
        if ((!lseg) && list_is_singular(&data->pages))
-                lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
+                lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL);
        ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
                                0, lseg);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3bd5d7e80f6c..49c715b4ac92 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -939,7 +939,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
        atomic_set(&req->wb_complete, requests);
        BUG_ON(desc->pg_lseg);
-        lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
+        lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
        ClearPageError(page);
        offset = 0;
        nbytes = desc->pg_count;
@@ -1013,7 +1013,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
        }
        req = nfs_list_entry(data->pages.next);
        if ((!lseg) && list_is_singular(&data->pages))
-                lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
+                lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
        if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
            (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 5232d3e8fb2f..a2e2402b2afb 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -8,7 +8,7 @@
 *                      Statistsics for the reply cache
 *      fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache>
 *                      statistics for filehandle lookup
- *      io <bytes-read> <bytes-writtten>
+ *      io <bytes-read> <bytes-written>
 *                      statistics for IO throughput
 *      th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%> 
 *                      time (seconds) when nfsd thread usage above thresholds
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 0a0a66d98cce..eed4d7b26249 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -489,8 +489,8 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
 void nilfs_palloc_commit_alloc_entry(struct inode *inode,
                                     struct nilfs_palloc_req *req)
 {
-        nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh);
+        mark_buffer_dirty(req->pr_bitmap_bh);
-        nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh);
+        mark_buffer_dirty(req->pr_desc_bh);
        nilfs_mdt_mark_dirty(inode);
        brelse(req->pr_bitmap_bh);
@@ -527,8 +527,8 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
        kunmap(req->pr_bitmap_bh->b_page);
        kunmap(req->pr_desc_bh->b_page);
-        nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh);
+        mark_buffer_dirty(req->pr_desc_bh);
-        nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh);
+        mark_buffer_dirty(req->pr_bitmap_bh);
        nilfs_mdt_mark_dirty(inode);
        brelse(req->pr_bitmap_bh);
@@ -646,7 +646,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
        unsigned long group, group_offset;
        int i, j, n, ret;
-        for (i = 0; i < nitems; i += n) {
+        for (i = 0; i < nitems; i = j) {
                group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset);
                ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh);
                if (ret < 0)
@@ -683,8 +683,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
                kunmap(bitmap_bh->b_page);
                kunmap(desc_bh->b_page);
-                nilfs_mdt_mark_buffer_dirty(desc_bh);
+                mark_buffer_dirty(desc_bh);
-                nilfs_mdt_mark_buffer_dirty(bitmap_bh);
+                mark_buffer_dirty(bitmap_bh);
                nilfs_mdt_mark_dirty(inode);
                brelse(bitmap_bh);
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 4723f04e9b12..aadbd0b5e3e8 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -34,7 +34,9 @@
 struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
 {
-        return NILFS_I_NILFS(bmap->b_inode)->ns_dat;
+        struct the_nilfs *nilfs = bmap->b_inode->i_sb->s_fs_info;
+        return nilfs->ns_dat;
 }
 static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap,
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 609cd223eea8..a35ae35e6932 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -34,12 +34,6 @@
 #include "page.h"
 #include "btnode.h"
-void nilfs_btnode_cache_init(struct address_space *btnc,
-                             struct backing_dev_info *bdi)
-{
-        nilfs_mapping_init(btnc, bdi);
-}
 void nilfs_btnode_cache_clear(struct address_space *btnc)
 {
        invalidate_mapping_pages(btnc, 0, -1);
@@ -62,7 +56,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
                BUG();
        }
        memset(bh->b_data, 0, 1 << inode->i_blkbits);
-        bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
+        bh->b_bdev = inode->i_sb->s_bdev;
        bh->b_blocknr = blocknr;
        set_buffer_mapped(bh);
        set_buffer_uptodate(bh);
@@ -94,10 +88,11 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
        if (pblocknr == 0) {
                pblocknr = blocknr;
                if (inode->i_ino != NILFS_DAT_INO) {
-                        struct inode *dat = NILFS_I_NILFS(inode)->ns_dat;
+                        struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
                        /* blocknr is a virtual block number */
-                        err = nilfs_dat_translate(dat, blocknr, &pblocknr);
+                        err = nilfs_dat_translate(nilfs->ns_dat, blocknr,
+                                                  &pblocknr);
                        if (unlikely(err)) {
                                brelse(bh);
                                goto out_locked;
@@ -120,7 +115,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
                goto found;
        }
        set_buffer_mapped(bh);
-        bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
+        bh->b_bdev = inode->i_sb->s_bdev;
        bh->b_blocknr = pblocknr; /* set block address for read */
        bh->b_end_io = end_buffer_read_sync;
        get_bh(bh);
@@ -259,7 +254,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
                                       "invalid oldkey %lld (newkey=%lld)",
                                       (unsigned long long)oldkey,
                                       (unsigned long long)newkey);
-                nilfs_btnode_mark_dirty(obh);
+                mark_buffer_dirty(obh);
                spin_lock_irq(&btnc->tree_lock);
                radix_tree_delete(&btnc->page_tree, oldkey);
@@ -271,7 +266,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
                unlock_page(opage);
        } else {
                nilfs_copy_buffer(nbh, obh);
-                nilfs_btnode_mark_dirty(nbh);
+                mark_buffer_dirty(nbh);
                nbh->b_blocknr = newkey;
                ctxt->bh = nbh;
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 1b8ebd888c28..3a4dd2d8d3fc 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
        struct buffer_head *newbh;
 };
-void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
 void nilfs_btnode_cache_clear(struct address_space *);
 struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
                                              __u64 blocknr);
@@ -51,7 +50,4 @@ void nilfs_btnode_commit_change_key(struct address_space *,
 void nilfs_btnode_abort_change_key(struct address_space *,
                                   struct nilfs_btnode_chkey_ctxt *);
-#define nilfs_btnode_mark_dirty(bh)     nilfs_mark_buffer_dirty(bh)
 #endif  /* _NILFS_BTNODE_H */
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index d451ae0e0bf3..7eafe468a29c 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -714,7 +714,7 @@ static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
                                nilfs_btree_get_nonroot_node(path, level),
                                path[level].bp_index, key);
                        if (!buffer_dirty(path[level].bp_bh))
-                                nilfs_btnode_mark_dirty(path[level].bp_bh);
+                                mark_buffer_dirty(path[level].bp_bh);
                } while ((path[level].bp_index == 0) &&
                         (++level < nilfs_btree_height(btree) - 1));
        }
@@ -739,7 +739,7 @@ static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
                nilfs_btree_node_insert(node, path[level].bp_index,
                                        *keyp, *ptrp, ncblk);
                if (!buffer_dirty(path[level].bp_bh))
-                        nilfs_btnode_mark_dirty(path[level].bp_bh);
+                        mark_buffer_dirty(path[level].bp_bh);
                if (path[level].bp_index == 0)
                        nilfs_btree_promote_key(btree, path, level + 1,
@@ -777,9 +777,9 @@ static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
        nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
        if (!buffer_dirty(path[level].bp_bh))
-                nilfs_btnode_mark_dirty(path[level].bp_bh);
+                mark_buffer_dirty(path[level].bp_bh);
        if (!buffer_dirty(path[level].bp_sib_bh))
-                nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+                mark_buffer_dirty(path[level].bp_sib_bh);
        nilfs_btree_promote_key(btree, path, level + 1,
                                nilfs_btree_node_get_key(node, 0));
@@ -823,9 +823,9 @@ static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
        nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
        if (!buffer_dirty(path[level].bp_bh))
-                nilfs_btnode_mark_dirty(path[level].bp_bh);
+                mark_buffer_dirty(path[level].bp_bh);
        if (!buffer_dirty(path[level].bp_sib_bh))
-                nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+                mark_buffer_dirty(path[level].bp_sib_bh);
        path[level + 1].bp_index++;
        nilfs_btree_promote_key(btree, path, level + 1,
@@ -870,9 +870,9 @@ static void nilfs_btree_split(struct nilfs_bmap *btree,
        nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
        if (!buffer_dirty(path[level].bp_bh))
-                nilfs_btnode_mark_dirty(path[level].bp_bh);
+                mark_buffer_dirty(path[level].bp_bh);
        if (!buffer_dirty(path[level].bp_sib_bh))
-                nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+                mark_buffer_dirty(path[level].bp_sib_bh);
        newkey = nilfs_btree_node_get_key(right, 0);
        newptr = path[level].bp_newreq.bpr_ptr;
@@ -919,7 +919,7 @@ static void nilfs_btree_grow(struct nilfs_bmap *btree,
        nilfs_btree_node_set_level(root, level + 1);
        if (!buffer_dirty(path[level].bp_sib_bh))
-                nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+                mark_buffer_dirty(path[level].bp_sib_bh);
        path[level].bp_bh = path[level].bp_sib_bh;
        path[level].bp_sib_bh = NULL;
@@ -1194,7 +1194,7 @@ static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
                nilfs_btree_node_delete(node, path[level].bp_index,
                                        keyp, ptrp, ncblk);
                if (!buffer_dirty(path[level].bp_bh))
-                        nilfs_btnode_mark_dirty(path[level].bp_bh);
+                        mark_buffer_dirty(path[level].bp_bh);
                if (path[level].bp_index == 0)
                        nilfs_btree_promote_key(btree, path, level + 1,
                                nilfs_btree_node_get_key(node, 0));
@@ -1226,9 +1226,9 @@ static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
        nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
        if (!buffer_dirty(path[level].bp_bh))
-                nilfs_btnode_mark_dirty(path[level].bp_bh);
+                mark_buffer_dirty(path[level].bp_bh);
        if (!buffer_dirty(path[level].bp_sib_bh))
-                nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+                mark_buffer_dirty(path[level].bp_sib_bh);
        nilfs_btree_promote_key(btree, path, level + 1,
                                nilfs_btree_node_get_key(node, 0));
@@ -1258,9 +1258,9 @@ static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
        nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
        if (!buffer_dirty(path[level].bp_bh))
-                nilfs_btnode_mark_dirty(path[level].bp_bh);
+                mark_buffer_dirty(path[level].bp_bh);
        if (!buffer_dirty(path[level].bp_sib_bh))
-                nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+                mark_buffer_dirty(path[level].bp_sib_bh);
        path[level + 1].bp_index++;
        nilfs_btree_promote_key(btree, path, level + 1,
@@ -1289,7 +1289,7 @@ static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
        nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
        if (!buffer_dirty(path[level].bp_sib_bh))
-                nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
+                mark_buffer_dirty(path[level].bp_sib_bh);
        nilfs_btnode_delete(path[level].bp_bh);
        path[level].bp_bh = path[level].bp_sib_bh;
@@ -1315,7 +1315,7 @@ static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
        nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
        if (!buffer_dirty(path[level].bp_bh))
-                nilfs_btnode_mark_dirty(path[level].bp_bh);
+                mark_buffer_dirty(path[level].bp_bh);
        nilfs_btnode_delete(path[level].bp_sib_bh);
        path[level].bp_sib_bh = NULL;
@@ -1709,7 +1709,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
                nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
                nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
                if (!buffer_dirty(bh))
-                        nilfs_btnode_mark_dirty(bh);
+                        mark_buffer_dirty(bh);
                if (!nilfs_bmap_dirty(btree))
                        nilfs_bmap_set_dirty(btree);
@@ -1787,7 +1787,7 @@ static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
 {
        while ((++level < nilfs_btree_height(btree) - 1) &&
               !buffer_dirty(path[level].bp_bh))
-                nilfs_btnode_mark_dirty(path[level].bp_bh);
+                mark_buffer_dirty(path[level].bp_bh);
        return 0;
 }
@@ -2229,7 +2229,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
        }
        if (!buffer_dirty(bh))
-                nilfs_btnode_mark_dirty(bh);
+                mark_buffer_dirty(bh);
        brelse(bh);
        if (!nilfs_bmap_dirty(btree))
                nilfs_bmap_set_dirty(btree);
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 5ff15a8a1024..c9b342c8b503 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -216,14 +216,14 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile,
                if (!nilfs_cpfile_is_in_first(cpfile, cno))
                        nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh,
                                                                 kaddr, 1);
-                nilfs_mdt_mark_buffer_dirty(cp_bh);
+                mark_buffer_dirty(cp_bh);
                kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
                header = nilfs_cpfile_block_get_header(cpfile, header_bh,
                                                       kaddr);
                le64_add_cpu(&header->ch_ncheckpoints, 1);
                kunmap_atomic(kaddr, KM_USER0);
-                nilfs_mdt_mark_buffer_dirty(header_bh);
+                mark_buffer_dirty(header_bh);
                nilfs_mdt_mark_dirty(cpfile);
        }
@@ -326,7 +326,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
                }
                if (nicps > 0) {
                        tnicps += nicps;
-                        nilfs_mdt_mark_buffer_dirty(cp_bh);
+                        mark_buffer_dirty(cp_bh);
                        nilfs_mdt_mark_dirty(cpfile);
                        if (!nilfs_cpfile_is_in_first(cpfile, cno)) {
                                count =
@@ -358,7 +358,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
                header = nilfs_cpfile_block_get_header(cpfile, header_bh,
                                                       kaddr);
                le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps);
-                nilfs_mdt_mark_buffer_dirty(header_bh);
+                mark_buffer_dirty(header_bh);
                nilfs_mdt_mark_dirty(cpfile);
                kunmap_atomic(kaddr, KM_USER0);
        }
@@ -671,10 +671,10 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
        le64_add_cpu(&header->ch_nsnapshots, 1);
        kunmap_atomic(kaddr, KM_USER0);
-        nilfs_mdt_mark_buffer_dirty(prev_bh);
+        mark_buffer_dirty(prev_bh);
-        nilfs_mdt_mark_buffer_dirty(curr_bh);
+        mark_buffer_dirty(curr_bh);
-        nilfs_mdt_mark_buffer_dirty(cp_bh);
+        mark_buffer_dirty(cp_bh);
-        nilfs_mdt_mark_buffer_dirty(header_bh);
+        mark_buffer_dirty(header_bh);
        nilfs_mdt_mark_dirty(cpfile);
        brelse(prev_bh);
@@ -774,10 +774,10 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
        le64_add_cpu(&header->ch_nsnapshots, -1);
        kunmap_atomic(kaddr, KM_USER0);
-        nilfs_mdt_mark_buffer_dirty(next_bh);
+        mark_buffer_dirty(next_bh);
-        nilfs_mdt_mark_buffer_dirty(prev_bh);
+        mark_buffer_dirty(prev_bh);
-        nilfs_mdt_mark_buffer_dirty(cp_bh);
+        mark_buffer_dirty(cp_bh);
-        nilfs_mdt_mark_buffer_dirty(header_bh);
+        mark_buffer_dirty(header_bh);
        nilfs_mdt_mark_dirty(cpfile);
        brelse(prev_bh);
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 59e5fe742f7b..fcc2f869af16 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -54,7 +54,7 @@ static int nilfs_dat_prepare_entry(struct inode *dat,
 static void nilfs_dat_commit_entry(struct inode *dat,
                                   struct nilfs_palloc_req *req)
 {
-        nilfs_mdt_mark_buffer_dirty(req->pr_entry_bh);
+        mark_buffer_dirty(req->pr_entry_bh);
        nilfs_mdt_mark_dirty(dat);
        brelse(req->pr_entry_bh);
 }
@@ -361,7 +361,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
        entry->de_blocknr = cpu_to_le64(blocknr);
        kunmap_atomic(kaddr, KM_USER0);
-        nilfs_mdt_mark_buffer_dirty(entry_bh);
+        mark_buffer_dirty(entry_bh);
        nilfs_mdt_mark_dirty(dat);
        brelse(entry_bh);
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 397e73258631..d7eeca62febd 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -111,7 +111,6 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        nilfs_transaction_commit(inode->i_sb);
 mapped:
-        SetPageChecked(page);
        wait_on_page_writeback(page);
        return VM_FAULT_LOCKED;
 }
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 1c2a3e23f8b2..08a07a218d26 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -48,9 +48,6 @@
 #include "dat.h"
 #include "ifile.h"
-static const struct address_space_operations def_gcinode_aops = {
-};
 /*
 * nilfs_gccache_submit_read_data() - add data buffer and submit read request
 * @inode - gc inode
@@ -87,9 +84,9 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
                goto out;
        if (pbn == 0) {
-                struct inode *dat_inode = NILFS_I_NILFS(inode)->ns_dat;
+                struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
-                                          /* use original dat, not gc dat. */
-                err = nilfs_dat_translate(dat_inode, vbn, &pbn);
+                err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn);
                if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */
                        brelse(bh);
                        goto failed;
@@ -103,7 +100,7 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
        }
        if (!buffer_mapped(bh)) {
-                bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
+                bh->b_bdev = inode->i_sb->s_bdev;
                set_buffer_mapped(bh);
        }
        bh->b_blocknr = pbn;
@@ -160,15 +157,11 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
        if (buffer_dirty(bh))
                return -EEXIST;
-        if (buffer_nilfs_node(bh)) {
+        if (buffer_nilfs_node(bh) && nilfs_btree_broken_node_block(bh)) {
-                if (nilfs_btree_broken_node_block(bh)) {
+                clear_buffer_uptodate(bh);
-                        clear_buffer_uptodate(bh);
+                return -EIO;
-                        return -EIO;
-                }
-                nilfs_btnode_mark_dirty(bh);
-        } else {
-                nilfs_mark_buffer_dirty(bh);
        }
+        mark_buffer_dirty(bh);
        return 0;
 }
@@ -178,7 +171,7 @@ int nilfs_init_gcinode(struct inode *inode)
        inode->i_mode = S_IFREG;
        mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
-        inode->i_mapping->a_ops = &def_gcinode_aops;
+        inode->i_mapping->a_ops = &empty_aops;
        inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi;
        ii->i_flags = 0;
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index bfc73d3a30ed..684d76300a80 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -80,7 +80,7 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino,
                return ret;
        }
        nilfs_palloc_commit_alloc_entry(ifile, &req);
-        nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh);
+        mark_buffer_dirty(req.pr_entry_bh);
        nilfs_mdt_mark_dirty(ifile);
        *out_ino = (ino_t)req.pr_entry_nr;
        *out_bh = req.pr_entry_bh;
@@ -128,7 +128,7 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
        raw_inode->i_flags = 0;
        kunmap_atomic(kaddr, KM_USER0);
-        nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh);
+        mark_buffer_dirty(req.pr_entry_bh);
        brelse(req.pr_entry_bh);
        nilfs_palloc_commit_free_entry(ifile, &req);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index c0aa27490c02..587f18432832 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -74,14 +74,14 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
                    struct buffer_head *bh_result, int create)
 {
        struct nilfs_inode_info *ii = NILFS_I(inode);
+        struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
        __u64 blknum = 0;
        int err = 0, ret;
-        struct inode *dat = NILFS_I_NILFS(inode)->ns_dat;
        unsigned maxblocks = bh_result->b_size >> inode->i_blkbits;
-        down_read(&NILFS_MDT(dat)->mi_sem);
+        down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
        ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks);
-        up_read(&NILFS_MDT(dat)->mi_sem);
+        up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
        if (ret >= 0) { /* found */
                map_bh(bh_result, inode->i_sb, blknum);
                if (ret > 0)
@@ -596,6 +596,16 @@ void nilfs_write_inode_common(struct inode *inode,
        raw_inode->i_flags = cpu_to_le32(ii->i_flags);
        raw_inode->i_generation = cpu_to_le32(inode->i_generation);
+        if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) {
+                struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+                /* zero-fill unused portion in the case of super root block */
+                raw_inode->i_xattr = 0;
+                raw_inode->i_pad = 0;
+                memset((void *)raw_inode + sizeof(*raw_inode), 0,
+                       nilfs->ns_inode_size - sizeof(*raw_inode));
+        }
        if (has_bmap)
                nilfs_bmap_write(ii->i_bmap, raw_inode);
        else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
@@ -872,8 +882,7 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
                        return -EINVAL; /* NILFS_I_DIRTY may remain for
                                           freeing inode */
                }
-                list_del(&ii->i_dirty);
+                list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
-                list_add_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
                set_bit(NILFS_I_QUEUED, &ii->i_state);
        }
        spin_unlock(&nilfs->ns_inode_lock);
@@ -892,7 +901,7 @@ int nilfs_mark_inode_dirty(struct inode *inode)
                return err;
        }
        nilfs_update_inode(inode, ibh);
-        nilfs_mdt_mark_buffer_dirty(ibh);
+        mark_buffer_dirty(ibh);
        nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile);
        brelse(ibh);
        return 0;
@@ -931,7 +940,7 @@ void nilfs_dirty_inode(struct inode *inode)
 int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                 __u64 start, __u64 len)
 {
-        struct the_nilfs *nilfs = NILFS_I_NILFS(inode);
+        struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
        __u64 logical = 0, phys = 0, size = 0;
        __u32 flags = 0;
        loff_t isize;
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index f2469ba6246b..41d6743d303c 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -698,6 +698,63 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
        return 0;
 }
+static int nilfs_ioctl_resize(struct inode *inode, struct file *filp,
+                              void __user *argp)
+{
+        __u64 newsize;
+        int ret = -EPERM;
+        if (!capable(CAP_SYS_ADMIN))
+                goto out;
+        ret = mnt_want_write(filp->f_path.mnt);
+        if (ret)
+                goto out;
+        ret = -EFAULT;
+        if (copy_from_user(&newsize, argp, sizeof(newsize)))
+                goto out_drop_write;
+        ret = nilfs_resize_fs(inode->i_sb, newsize);
+out_drop_write:
+        mnt_drop_write(filp->f_path.mnt);
+out:
+        return ret;
+}
+static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
+{
+        struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+        __u64 range[2];
+        __u64 minseg, maxseg;
+        unsigned long segbytes;
+        int ret = -EPERM;
+        if (!capable(CAP_SYS_ADMIN))
+                goto out;
+        ret = -EFAULT;
+        if (copy_from_user(range, argp, sizeof(__u64[2])))
+                goto out;
+        ret = -ERANGE;
+        if (range[1] > i_size_read(inode->i_sb->s_bdev->bd_inode))
+                goto out;
+        segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize;
+        minseg = range[0] + segbytes - 1;
+        do_div(minseg, segbytes);
+        maxseg = NILFS_SB2_OFFSET_BYTES(range[1]);
+        do_div(maxseg, segbytes);
+        maxseg--;
+        ret = nilfs_sufile_set_alloc_range(nilfs->ns_sufile, minseg, maxseg);
+out:
+        return ret;
+}
 static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
                                unsigned int cmd, void __user *argp,
                                size_t membsz,
@@ -763,6 +820,10 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                return nilfs_ioctl_clean_segments(inode, filp, cmd, argp);
        case NILFS_IOCTL_SYNC:
                return nilfs_ioctl_sync(inode, filp, cmd, argp);
+        case NILFS_IOCTL_RESIZE:
+                return nilfs_ioctl_resize(inode, filp, argp);
+        case NILFS_IOCTL_SET_ALLOC_RANGE:
+                return nilfs_ioctl_set_alloc_range(inode, argp);
        default:
                return -ENOTTY;
        }
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index a649b05f7069..800e8d78a83b 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -66,7 +66,7 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
        kunmap_atomic(kaddr, KM_USER0);
        set_buffer_uptodate(bh);
-        nilfs_mark_buffer_dirty(bh);
+        mark_buffer_dirty(bh);
        nilfs_mdt_mark_dirty(inode);
        return 0;
 }
@@ -355,7 +355,7 @@ int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block)
        err = nilfs_mdt_read_block(inode, block, 0, &bh);
        if (unlikely(err))
                return err;
-        nilfs_mark_buffer_dirty(bh);
+        mark_buffer_dirty(bh);
        nilfs_mdt_mark_dirty(inode);
        brelse(bh);
        return 0;
@@ -450,9 +450,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
        INIT_LIST_HEAD(&shadow->frozen_buffers);
        address_space_init_once(&shadow->frozen_data);
-        nilfs_mapping_init(&shadow->frozen_data, bdi);
+        nilfs_mapping_init(&shadow->frozen_data, inode, bdi);
        address_space_init_once(&shadow->frozen_btnodes);
-        nilfs_mapping_init(&shadow->frozen_btnodes, bdi);
+        nilfs_mapping_init(&shadow->frozen_btnodes, inode, bdi);
        mi->mi_shadow = shadow;
        return 0;
 }
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index ed68563ec708..ab20a4baa50f 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -64,11 +64,6 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode)
        return inode->i_private;
 }
-static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode)
-{
-        return inode->i_sb->s_fs_info;
-}
 /* Default GFP flags using highmem */
 #define NILFS_MDT_GFP      (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM)
@@ -93,8 +88,6 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh);
 struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode,
                                                struct buffer_head *bh);
-#define nilfs_mdt_mark_buffer_dirty(bh) nilfs_mark_buffer_dirty(bh)
 static inline void nilfs_mdt_mark_dirty(struct inode *inode)
 {
        if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state))
@@ -108,7 +101,7 @@ static inline void nilfs_mdt_clear_dirty(struct inode *inode)
 static inline __u64 nilfs_mdt_cno(struct inode *inode)
 {
-        return NILFS_I_NILFS(inode)->ns_cno;
+        return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno;
 }
 #define nilfs_mdt_bgl_lock(inode, bg) \
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index a8dd344303cb..a9c6a531f80c 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -80,12 +80,6 @@ static inline struct inode *NILFS_BTNC_I(struct address_space *btnc)
        return &ii->vfs_inode;
 }
-static inline struct inode *NILFS_AS_I(struct address_space *mapping)
-{
-        return (mapping->host) ? :
-                container_of(mapping, struct inode, i_data);
-}
 /*
 * Dynamic state flags of NILFS on-memory inode (i_state)
 */
@@ -298,6 +292,7 @@ struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb,
                                               int flip);
 int nilfs_commit_super(struct super_block *sb, int flag);
 int nilfs_cleanup_super(struct super_block *sb);
+int nilfs_resize_fs(struct super_block *sb, __u64 newsize);
 int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
                            struct nilfs_root **root);
 int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 1168059c7efd..65221a04c6f0 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -37,8 +37,7 @@
 #define NILFS_BUFFER_INHERENT_BITS  \
        ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
-         (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated) | \
+         (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Checked))
-         (1UL << BH_NILFS_Checked))
 static struct buffer_head *
 __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
@@ -59,19 +58,6 @@ __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
        return bh;
 }
-/*
- * Since the page cache of B-tree node pages or data page cache of pseudo
- * inodes does not have a valid mapping->host pointer, calling
- * mark_buffer_dirty() for their buffers causes a NULL pointer dereference;
- * it calls __mark_inode_dirty(NULL) through __set_page_dirty().
- * To avoid this problem, the old style mark_buffer_dirty() is used instead.
- */
-void nilfs_mark_buffer_dirty(struct buffer_head *bh)
-{
-        if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
-                __set_page_dirty_nobuffers(bh->b_page);
-}
 struct buffer_head *nilfs_grab_buffer(struct inode *inode,
                                      struct address_space *mapping,
                                      unsigned long blkoff,
@@ -183,7 +169,7 @@ int nilfs_page_buffers_clean(struct page *page)
 void nilfs_page_bug(struct page *page)
 {
        struct address_space *m;
-        unsigned long ino = 0;
+        unsigned long ino;
        if (unlikely(!page)) {
                printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
@@ -191,11 +177,8 @@ void nilfs_page_bug(struct page *page)
        }
        m = page->mapping;
-        if (m) {
+        ino = m ? m->host->i_ino : 0;
-                struct inode *inode = NILFS_AS_I(m);
-                if (inode != NULL)
-                        ino = inode->i_ino;
-        }
        printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
               "mapping=%p ino=%lu\n",
               page, atomic_read(&page->_count),
@@ -217,56 +200,6 @@ void nilfs_page_bug(struct page *page)
 }
 /**
- * nilfs_alloc_private_page - allocate a private page with buffer heads
- *
- * Return Value: On success, a pointer to the allocated page is returned.
- * On error, NULL is returned.
- */
-struct page *nilfs_alloc_private_page(struct block_device *bdev, int size,
-                                      unsigned long state)
-{
-        struct buffer_head *bh, *head, *tail;
-        struct page *page;
-        page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */
-        if (unlikely(!page))
-                return NULL;
-        lock_page(page);
-        head = alloc_page_buffers(page, size, 0);
-        if (unlikely(!head)) {
-                unlock_page(page);
-                __free_page(page);
-                return NULL;
-        }
-        bh = head;
-        do {
-                bh->b_state = (1UL << BH_NILFS_Allocated) | state;
-                tail = bh;
-                bh->b_bdev = bdev;
-                bh = bh->b_this_page;
-        } while (bh);
-        tail->b_this_page = head;
-        attach_page_buffers(page, head);
-        return page;
-}
-void nilfs_free_private_page(struct page *page)
-{
-        BUG_ON(!PageLocked(page));
-        BUG_ON(page->mapping);
-        if (page_has_buffers(page) && !try_to_free_buffers(page))
-                NILFS_PAGE_BUG(page, "failed to free page");
-        unlock_page(page);
-        __free_page(page);
-}
-/**
 * nilfs_copy_page -- copy the page with buffers
 * @dst: destination page
 * @src: source page
@@ -492,10 +425,10 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
        return nc;
 }
-void nilfs_mapping_init(struct address_space *mapping,
+void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
                        struct backing_dev_info *bdi)
 {
-        mapping->host = NULL;
+        mapping->host = inode;
        mapping->flags = 0;
        mapping_set_gfp_mask(mapping, GFP_NOFS);
        mapping->assoc_mapping = NULL;
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index f06b79ad7493..fb7de71605a0 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -38,14 +38,12 @@ enum {
        BH_NILFS_Redirected,
 };
-BUFFER_FNS(NILFS_Allocated, nilfs_allocated)    /* nilfs private buffers */
 BUFFER_FNS(NILFS_Node, nilfs_node)              /* nilfs node buffers */
 BUFFER_FNS(NILFS_Volatile, nilfs_volatile)
 BUFFER_FNS(NILFS_Checked, nilfs_checked)        /* buffer is verified */
 BUFFER_FNS(NILFS_Redirected, nilfs_redirected)  /* redirected to a copy */
-void nilfs_mark_buffer_dirty(struct buffer_head *bh);
 int __nilfs_clear_page_dirty(struct page *);
 struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *,
@@ -54,14 +52,11 @@ void nilfs_forget_buffer(struct buffer_head *);
 void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *);
 int nilfs_page_buffers_clean(struct page *);
 void nilfs_page_bug(struct page *);
-struct page *nilfs_alloc_private_page(struct block_device *, int,
-                                      unsigned long);
-void nilfs_free_private_page(struct page *);
 int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
 void nilfs_copy_back_pages(struct address_space *, struct address_space *);
 void nilfs_clear_dirty_pages(struct address_space *);
-void nilfs_mapping_init(struct address_space *mapping,
+void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
                        struct backing_dev_info *bdi);
 unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
 unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index ba4a64518f38..a604ac0331b2 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -387,9 +387,9 @@ static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr,
 static void dispose_recovery_list(struct list_head *head)
 {
        while (!list_empty(head)) {
-                struct nilfs_recovery_block *rb
+                struct nilfs_recovery_block *rb;
-                        = list_entry(head->next,
-                                     struct nilfs_recovery_block, list);
+                rb = list_first_entry(head, struct nilfs_recovery_block, list);
                list_del(&rb->list);
                kfree(rb);
        }
@@ -416,9 +416,9 @@ static int nilfs_segment_list_add(struct list_head *head, __u64 segnum)
 void nilfs_dispose_segment_list(struct list_head *head)
 {
        while (!list_empty(head)) {
-                struct nilfs_segment_entry *ent
+                struct nilfs_segment_entry *ent;
-                        = list_entry(head->next,
-                                     struct nilfs_segment_entry, list);
+                ent = list_first_entry(head, struct nilfs_segment_entry, list);
                list_del(&ent->list);
                kfree(ent);
        }
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 2853ff20f85a..850a7c0228fb 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -239,12 +239,15 @@ nilfs_segbuf_fill_in_super_root_crc(struct nilfs_segment_buffer *segbuf,
                                    u32 seed)
 {
        struct nilfs_super_root *raw_sr;
+        struct the_nilfs *nilfs = segbuf->sb_super->s_fs_info;
+        unsigned srsize;
        u32 crc;
        raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data;
+        srsize = NILFS_SR_BYTES(nilfs->ns_inode_size);
        crc = crc32_le(seed,
                       (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum),
-                       NILFS_SR_BYTES - sizeof(raw_sr->sr_sum));
+                       srsize - sizeof(raw_sr->sr_sum));
        raw_sr->sr_sum = cpu_to_le32(crc);
 }
@@ -254,18 +257,6 @@ static void nilfs_release_buffers(struct list_head *list)
        list_for_each_entry_safe(bh, n, list, b_assoc_buffers) {
                list_del_init(&bh->b_assoc_buffers);
-                if (buffer_nilfs_allocated(bh)) {
-                        struct page *clone_page = bh->b_page;
-                        /* remove clone page */
-                        brelse(bh);
-                        page_cache_release(clone_page); /* for each bh */
-                        if (page_count(clone_page) <= 2) {
-                                lock_page(clone_page);
-                                nilfs_free_private_page(clone_page);
-                        }
-                        continue;
-                }
                brelse(bh);
        }
 }
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index afe4f2183454..141646e88fb5 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -655,13 +655,10 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
                if (unlikely(page->index > last))
                        break;
-                if (mapping->host) {
+                lock_page(page);
-                        lock_page(page);
+                if (!page_has_buffers(page))
-                        if (!page_has_buffers(page))
+                        create_empty_buffers(page, 1 << inode->i_blkbits, 0);
-                                create_empty_buffers(page,
+                unlock_page(page);
-                                                     1 << inode->i_blkbits, 0);
-                        unlock_page(page);
-                }
                bh = head = page_buffers(page);
                do {
@@ -809,7 +806,7 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
                /* The following code is duplicated with cpfile.  But, it is
                   needed to collect the checkpoint even if it was not newly
                   created */
-                nilfs_mdt_mark_buffer_dirty(bh_cp);
+                mark_buffer_dirty(bh_cp);
                nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
                nilfs_cpfile_put_checkpoint(
                        nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
@@ -889,12 +886,14 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
 {
        struct buffer_head *bh_sr;
        struct nilfs_super_root *raw_sr;
-        unsigned isz = nilfs->ns_inode_size;
+        unsigned isz, srsz;
        bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
        raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
+        isz = nilfs->ns_inode_size;
+        srsz = NILFS_SR_BYTES(isz);
-        raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES);
+        raw_sr->sr_bytes = cpu_to_le16(srsz);
        raw_sr->sr_nongc_ctime
                = cpu_to_le64(nilfs_doing_gc() ?
                              nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
@@ -906,6 +905,7 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
                                 NILFS_SR_CPFILE_OFFSET(isz), 1);
        nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
                                 NILFS_SR_SUFILE_OFFSET(isz), 1);
+        memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
 }
 static void nilfs_redirty_inodes(struct list_head *head)
@@ -954,8 +954,8 @@ static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci,
 dispose_buffers:
        while (!list_empty(listp)) {
-                bh = list_entry(listp->next, struct buffer_head,
+                bh = list_first_entry(listp, struct buffer_head,
-                                b_assoc_buffers);
+                                      b_assoc_buffers);
                list_del_init(&bh->b_assoc_buffers);
                brelse(bh);
        }
@@ -1500,10 +1500,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
                        nblocks = le32_to_cpu(finfo->fi_nblocks);
                        ndatablk = le32_to_cpu(finfo->fi_ndatablk);
-                        if (buffer_nilfs_node(bh))
+                        inode = bh->b_page->mapping->host;
-                                inode = NILFS_BTNC_I(bh->b_page->mapping);
-                        else
-                                inode = NILFS_AS_I(bh->b_page->mapping);
                        if (mode == SC_LSEG_DSYNC)
                                sc_op = &nilfs_sc_dsync_ops;
@@ -1556,83 +1553,24 @@ static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode)
        return 0;
 }
-static int
+static void nilfs_begin_page_io(struct page *page)
-nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out)
-{
-        struct page *clone_page;
-        struct buffer_head *bh, *head, *bh2;
-        void *kaddr;
-        bh = head = page_buffers(page);
-        clone_page = nilfs_alloc_private_page(bh->b_bdev, bh->b_size, 0);
-        if (unlikely(!clone_page))
-                return -ENOMEM;
-        bh2 = page_buffers(clone_page);
-        kaddr = kmap_atomic(page, KM_USER0);
-        do {
-                if (list_empty(&bh->b_assoc_buffers))
-                        continue;
-                get_bh(bh2);
-                page_cache_get(clone_page); /* for each bh */
-                memcpy(bh2->b_data, kaddr + bh_offset(bh), bh2->b_size);
-                bh2->b_blocknr = bh->b_blocknr;
-                list_replace(&bh->b_assoc_buffers, &bh2->b_assoc_buffers);
-                list_add_tail(&bh->b_assoc_buffers, out);
-        } while (bh = bh->b_this_page, bh2 = bh2->b_this_page, bh != head);
-        kunmap_atomic(kaddr, KM_USER0);
-        if (!TestSetPageWriteback(clone_page))
-                account_page_writeback(clone_page);
-        unlock_page(clone_page);
-        return 0;
-}
-static int nilfs_test_page_to_be_frozen(struct page *page)
-{
-        struct address_space *mapping = page->mapping;
-        if (!mapping || !mapping->host || S_ISDIR(mapping->host->i_mode))
-                return 0;
-        if (page_mapped(page)) {
-                ClearPageChecked(page);
-                return 1;
-        }
-        return PageChecked(page);
-}
-static int nilfs_begin_page_io(struct page *page, struct list_head *out)
 {
        if (!page || PageWriteback(page))
                /* For split b-tree node pages, this function may be called
                   twice.  We ignore the 2nd or later calls by this check. */
-                return 0;
+                return;
        lock_page(page);
        clear_page_dirty_for_io(page);
        set_page_writeback(page);
        unlock_page(page);
-        if (nilfs_test_page_to_be_frozen(page)) {
-                int err = nilfs_copy_replace_page_buffers(page, out);
-                if (unlikely(err))
-                        return err;
-        }
-        return 0;
 }
-static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
+static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
-                                       struct page **failed_page)
 {
        struct nilfs_segment_buffer *segbuf;
        struct page *bd_page = NULL, *fs_page = NULL;
-        struct list_head *list = &sci->sc_copied_buffers;
-        int err;
-        *failed_page = NULL;
        list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
                struct buffer_head *bh;
@@ -1662,11 +1600,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
                                break;
                        }
                        if (bh->b_page != fs_page) {
-                                err = nilfs_begin_page_io(fs_page, list);
+                                nilfs_begin_page_io(fs_page);
-                                if (unlikely(err)) {
-                                        *failed_page = fs_page;
-                                        goto out;
-                                }
                                fs_page = bh->b_page;
                        }
                }
@@ -1677,11 +1611,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
                set_page_writeback(bd_page);
                unlock_page(bd_page);
        }
-        err = nilfs_begin_page_io(fs_page, list);
+        nilfs_begin_page_io(fs_page);
-        if (unlikely(err))
-                *failed_page = fs_page;
- out:
-        return err;
 }
 static int nilfs_segctor_write(struct nilfs_sc_info *sci,
@@ -1694,24 +1624,6 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci,
        return ret;
 }
-static void __nilfs_end_page_io(struct page *page, int err)
-{
-        if (!err) {
-                if (!nilfs_page_buffers_clean(page))
-                        __set_page_dirty_nobuffers(page);
-                ClearPageError(page);
-        } else {
-                __set_page_dirty_nobuffers(page);
-                SetPageError(page);
-        }
-        if (buffer_nilfs_allocated(page_buffers(page))) {
-                if (TestClearPageWriteback(page))
-                        dec_zone_page_state(page, NR_WRITEBACK);
-        } else
-                end_page_writeback(page);
-}
 static void nilfs_end_page_io(struct page *page, int err)
 {
        if (!page)
@@ -1738,40 +1650,19 @@ static void nilfs_end_page_io(struct page *page, int err)
                return;
        }
-        __nilfs_end_page_io(page, err);
+        if (!err) {
-}
+                if (!nilfs_page_buffers_clean(page))
+                        __set_page_dirty_nobuffers(page);
-static void nilfs_clear_copied_buffers(struct list_head *list, int err)
+                ClearPageError(page);
-{
+        } else {
-        struct buffer_head *bh, *head;
+                __set_page_dirty_nobuffers(page);
-        struct page *page;
+                SetPageError(page);
-        while (!list_empty(list)) {
-                bh = list_entry(list->next, struct buffer_head,
-                                b_assoc_buffers);
-                page = bh->b_page;
-                page_cache_get(page);
-                head = bh = page_buffers(page);
-                do {
-                        if (!list_empty(&bh->b_assoc_buffers)) {
-                                list_del_init(&bh->b_assoc_buffers);
-                                if (!err) {
-                                        set_buffer_uptodate(bh);
-                                        clear_buffer_dirty(bh);
-                                        clear_buffer_delay(bh);
-                                        clear_buffer_nilfs_volatile(bh);
-                                }
-                                brelse(bh); /* for b_assoc_buffers */
-                        }
-                } while ((bh = bh->b_this_page) != head);
-                __nilfs_end_page_io(page, err);
-                page_cache_release(page);
        }
+        end_page_writeback(page);
 }
-static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page,
+static void nilfs_abort_logs(struct list_head *logs, int err)
-                             int err)
 {
        struct nilfs_segment_buffer *segbuf;
        struct page *bd_page = NULL, *fs_page = NULL;
@@ -1801,8 +1692,6 @@ static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page,
                        }
                        if (bh->b_page != fs_page) {
                                nilfs_end_page_io(fs_page, err);
-                                if (fs_page && fs_page == failed_page)
-                                        return;
                                fs_page = bh->b_page;
                        }
                }
@@ -1821,12 +1710,11 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
        list_splice_tail_init(&sci->sc_write_logs, &logs);
        ret = nilfs_wait_on_logs(&logs);
-        nilfs_abort_logs(&logs, NULL, ret ? : err);
+        nilfs_abort_logs(&logs, ret ? : err);
        list_splice_tail_init(&sci->sc_segbufs, &logs);
        nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
        nilfs_free_incomplete_logs(&logs, nilfs);
-        nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err);
        if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
                ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
@@ -1920,8 +1808,6 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
        nilfs_end_page_io(fs_page, 0);
-        nilfs_clear_copied_buffers(&sci->sc_copied_buffers, 0);
        nilfs_drop_collected_inodes(&sci->sc_dirty_files);
        if (nilfs_doing_gc())
@@ -1979,7 +1865,7 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
                                              "failed to get inode block.\n");
                                return err;
                        }
-                        nilfs_mdt_mark_buffer_dirty(ibh);
+                        mark_buffer_dirty(ibh);
                        nilfs_mdt_mark_dirty(ifile);
                        spin_lock(&nilfs->ns_inode_lock);
                        if (likely(!ii->i_bh))
@@ -1991,8 +1877,7 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
                clear_bit(NILFS_I_QUEUED, &ii->i_state);
                set_bit(NILFS_I_BUSY, &ii->i_state);
-                list_del(&ii->i_dirty);
+                list_move_tail(&ii->i_dirty, &sci->sc_dirty_files);
-                list_add_tail(&ii->i_dirty, &sci->sc_dirty_files);
        }
        spin_unlock(&nilfs->ns_inode_lock);
@@ -2014,8 +1899,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
                clear_bit(NILFS_I_BUSY, &ii->i_state);
                brelse(ii->i_bh);
                ii->i_bh = NULL;
-                list_del(&ii->i_dirty);
+                list_move_tail(&ii->i_dirty, &ti->ti_garbage);
-                list_add_tail(&ii->i_dirty, &ti->ti_garbage);
        }
        spin_unlock(&nilfs->ns_inode_lock);
 }
@@ -2026,7 +1910,6 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
 static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
 {
        struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
-        struct page *failed_page;
        int err;
        sci->sc_stage.scnt = NILFS_ST_INIT;
@@ -2081,11 +1964,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
                nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
                /* Write partial segments */
-                err = nilfs_segctor_prepare_write(sci, &failed_page);
+                nilfs_segctor_prepare_write(sci);
-                if (err) {
-                        nilfs_abort_logs(&sci->sc_segbufs, failed_page, err);
-                        goto failed_to_write;
-                }
                nilfs_add_checksums_on_logs(&sci->sc_segbufs,
                                            nilfs->ns_crc_seed);
@@ -2687,7 +2566,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
        INIT_LIST_HEAD(&sci->sc_segbufs);
        INIT_LIST_HEAD(&sci->sc_write_logs);
        INIT_LIST_HEAD(&sci->sc_gc_inodes);
-        INIT_LIST_HEAD(&sci->sc_copied_buffers);
        init_timer(&sci->sc_timer);
        sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
@@ -2741,8 +2619,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
        if (flag || !nilfs_segctor_confirm(sci))
                nilfs_segctor_write_out(sci);
-        WARN_ON(!list_empty(&sci->sc_copied_buffers));
        if (!list_empty(&sci->sc_dirty_files)) {
                nilfs_warning(sci->sc_super, __func__,
                              "dirty file(s) after the final construction\n");
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 6c02a86745fb..38a1d0013314 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -92,7 +92,6 @@ struct nilfs_segsum_pointer {
 * @sc_nblk_inc: Block count of current generation
 * @sc_dirty_files: List of files to be written
 * @sc_gc_inodes: List of GC inodes having blocks to be written
- * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data
 * @sc_freesegs: array of segment numbers to be freed
 * @sc_nfreesegs: number of segments on @sc_freesegs
 * @sc_dsync_inode: inode whose data pages are written for a sync operation
@@ -136,7 +135,6 @@ struct nilfs_sc_info {
        struct list_head        sc_dirty_files;
        struct list_head        sc_gc_inodes;
-        struct list_head        sc_copied_buffers;
        __u64                  *sc_freesegs;
        size_t                  sc_nfreesegs;
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 1d6f488ccae8..0a0aba617d8a 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -33,7 +33,9 @@
 struct nilfs_sufile_info {
        struct nilfs_mdt_info mi;
-        unsigned long ncleansegs;
+        unsigned long ncleansegs;/* number of clean segments */
+        __u64 allocmin;         /* lower limit of allocatable segment range */
+        __u64 allocmax;         /* upper limit of allocatable segment range */
 };
 static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile)
@@ -96,6 +98,13 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum,
                                   create, NULL, bhp);
 }
+static int nilfs_sufile_delete_segment_usage_block(struct inode *sufile,
+                                                   __u64 segnum)
+{
+        return nilfs_mdt_delete_block(sufile,
+                                      nilfs_sufile_get_blkoff(sufile, segnum));
+}
 static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
                                     u64 ncleanadd, u64 ndirtyadd)
 {
@@ -108,7 +117,7 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
        le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd);
        kunmap_atomic(kaddr, KM_USER0);
-        nilfs_mdt_mark_buffer_dirty(header_bh);
+        mark_buffer_dirty(header_bh);
 }
 /**
@@ -248,6 +257,35 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create,
 }
 /**
+ * nilfs_sufile_set_alloc_range - limit range of segment to be allocated
+ * @sufile: inode of segment usage file
+ * @start: minimum segment number of allocatable region (inclusive)
+ * @end: maximum segment number of allocatable region (inclusive)
+ *
+ * Return Value: On success, 0 is returned.  On error, one of the
+ * following negative error codes is returned.
+ *
+ * %-ERANGE - invalid segment region
+ */
+int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end)
+{
+        struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
+        __u64 nsegs;
+        int ret = -ERANGE;
+        down_write(&NILFS_MDT(sufile)->mi_sem);
+        nsegs = nilfs_sufile_get_nsegments(sufile);
+        if (start <= end && end < nsegs) {
+                sui->allocmin = start;
+                sui->allocmax = end;
+                ret = 0;
+        }
+        up_write(&NILFS_MDT(sufile)->mi_sem);
+        return ret;
+}
+/**
 * nilfs_sufile_alloc - allocate a segment
 * @sufile: inode of segment usage file
 * @segnump: pointer to segment number
@@ -269,11 +307,12 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
        struct buffer_head *header_bh, *su_bh;
        struct nilfs_sufile_header *header;
        struct nilfs_segment_usage *su;
+        struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
        size_t susz = NILFS_MDT(sufile)->mi_entry_size;
        __u64 segnum, maxsegnum, last_alloc;
        void *kaddr;
-        unsigned long nsegments, ncleansegs, nsus;
+        unsigned long nsegments, ncleansegs, nsus, cnt;
-        int ret, i, j;
+        int ret, j;
        down_write(&NILFS_MDT(sufile)->mi_sem);
@@ -287,13 +326,31 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
        kunmap_atomic(kaddr, KM_USER0);
        nsegments = nilfs_sufile_get_nsegments(sufile);
+        maxsegnum = sui->allocmax;
        segnum = last_alloc + 1;
-        maxsegnum = nsegments - 1;
+        if (segnum < sui->allocmin || segnum > sui->allocmax)
-        for (i = 0; i < nsegments; i += nsus) {
+                segnum = sui->allocmin;
-                if (segnum >= nsegments) {
-                        /* wrap around */
+        for (cnt = 0; cnt < nsegments; cnt += nsus) {
-                        segnum = 0;
+                if (segnum > maxsegnum) {
-                        maxsegnum = last_alloc;
+                        if (cnt < sui->allocmax - sui->allocmin + 1) {
+                                /*
+                                 * wrap around in the limited region.
+                                 * if allocation started from
+                                 * sui->allocmin, this never happens.
+                                 */
+                                segnum = sui->allocmin;
+                                maxsegnum = last_alloc;
+                        } else if (segnum > sui->allocmin &&
+                                   sui->allocmax + 1 < nsegments) {
+                                segnum = sui->allocmax + 1;
+                                maxsegnum = nsegments - 1;
+                        } else if (sui->allocmin > 0)  {
+                                segnum = 0;
+                                maxsegnum = sui->allocmin - 1;
+                        } else {
+                                break; /* never happens */
+                        }
                }
                ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1,
                                                           &su_bh);
@@ -319,9 +376,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
                        header->sh_last_alloc = cpu_to_le64(segnum);
                        kunmap_atomic(kaddr, KM_USER0);
-                        NILFS_SUI(sufile)->ncleansegs--;
+                        sui->ncleansegs--;
-                        nilfs_mdt_mark_buffer_dirty(header_bh);
+                        mark_buffer_dirty(header_bh);
-                        nilfs_mdt_mark_buffer_dirty(su_bh);
+                        mark_buffer_dirty(su_bh);
                        nilfs_mdt_mark_dirty(sufile);
                        brelse(su_bh);
                        *segnump = segnum;
@@ -364,7 +421,7 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
        nilfs_sufile_mod_counter(header_bh, -1, 1);
        NILFS_SUI(sufile)->ncleansegs--;
-        nilfs_mdt_mark_buffer_dirty(su_bh);
+        mark_buffer_dirty(su_bh);
        nilfs_mdt_mark_dirty(sufile);
 }
@@ -395,7 +452,7 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
        nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
        NILFS_SUI(sufile)->ncleansegs -= clean;
-        nilfs_mdt_mark_buffer_dirty(su_bh);
+        mark_buffer_dirty(su_bh);
        nilfs_mdt_mark_dirty(sufile);
 }
@@ -421,7 +478,7 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
        sudirty = nilfs_segment_usage_dirty(su);
        nilfs_segment_usage_set_clean(su);
        kunmap_atomic(kaddr, KM_USER0);
-        nilfs_mdt_mark_buffer_dirty(su_bh);
+        mark_buffer_dirty(su_bh);
        nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
        NILFS_SUI(sufile)->ncleansegs++;
@@ -441,7 +498,7 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
        ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
        if (!ret) {
-                nilfs_mdt_mark_buffer_dirty(bh);
+                mark_buffer_dirty(bh);
                nilfs_mdt_mark_dirty(sufile);
                brelse(bh);
        }
@@ -476,7 +533,7 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
        su->su_nblocks = cpu_to_le32(nblocks);
        kunmap_atomic(kaddr, KM_USER0);
-        nilfs_mdt_mark_buffer_dirty(bh);
+        mark_buffer_dirty(bh);
        nilfs_mdt_mark_dirty(sufile);
        brelse(bh);
@@ -505,7 +562,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
 {
        struct buffer_head *header_bh;
        struct nilfs_sufile_header *header;
-        struct the_nilfs *nilfs = NILFS_I_NILFS(sufile);
+        struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
        void *kaddr;
        int ret;
@@ -555,11 +612,183 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
                nilfs_sufile_mod_counter(header_bh, -1, 0);
                NILFS_SUI(sufile)->ncleansegs--;
        }
-        nilfs_mdt_mark_buffer_dirty(su_bh);
+        mark_buffer_dirty(su_bh);
        nilfs_mdt_mark_dirty(sufile);
 }
 /**
+  * nilfs_sufile_truncate_range - truncate range of segment array
+  * @sufile: inode of segment usage file
+  * @start: start segment number (inclusive)
+  * @end: end segment number (inclusive)
+  *
+  * Return Value: On success, 0 is returned.  On error, one of the
+  * following negative error codes is returned.
+  *
+  * %-EIO - I/O error.
+  *
+  * %-ENOMEM - Insufficient amount of memory available.
+  *
+  * %-EINVAL - Invalid number of segments specified
+  *
+  * %-EBUSY - Dirty or active segments are present in the range
+  */
+static int nilfs_sufile_truncate_range(struct inode *sufile,
+                                       __u64 start, __u64 end)
+{
+        struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
+        struct buffer_head *header_bh;
+        struct buffer_head *su_bh;
+        struct nilfs_segment_usage *su, *su2;
+        size_t susz = NILFS_MDT(sufile)->mi_entry_size;
+        unsigned long segusages_per_block;
+        unsigned long nsegs, ncleaned;
+        __u64 segnum;
+        void *kaddr;
+        ssize_t n, nc;
+        int ret;
+        int j;
+        nsegs = nilfs_sufile_get_nsegments(sufile);
+        ret = -EINVAL;
+        if (start > end || start >= nsegs)
+                goto out;
+        ret = nilfs_sufile_get_header_block(sufile, &header_bh);
+        if (ret < 0)
+                goto out;
+        segusages_per_block = nilfs_sufile_segment_usages_per_block(sufile);
+        ncleaned = 0;
+        for (segnum = start; segnum <= end; segnum += n) {
+                n = min_t(unsigned long,
+                          segusages_per_block -
+                                  nilfs_sufile_get_offset(sufile, segnum),
+                          end - segnum + 1);
+                ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0,
+                                                           &su_bh);
+                if (ret < 0) {
+                        if (ret != -ENOENT)
+                                goto out_header;
+                        /* hole */
+                        continue;
+                }
+                kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
+                su = nilfs_sufile_block_get_segment_usage(
+                        sufile, segnum, su_bh, kaddr);
+                su2 = su;
+                for (j = 0; j < n; j++, su = (void *)su + susz) {
+                        if ((le32_to_cpu(su->su_flags) &
+                             ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) ||
+                            nilfs_segment_is_active(nilfs, segnum + j)) {
+                                ret = -EBUSY;
+                                kunmap_atomic(kaddr, KM_USER0);
+                                brelse(su_bh);
+                                goto out_header;
+                        }
+                }
+                nc = 0;
+                for (su = su2, j = 0; j < n; j++, su = (void *)su + susz) {
+                        if (nilfs_segment_usage_error(su)) {
+                                nilfs_segment_usage_set_clean(su);
+                                nc++;
+                        }
+                }
+                kunmap_atomic(kaddr, KM_USER0);
+                if (nc > 0) {
+                        mark_buffer_dirty(su_bh);
+                        ncleaned += nc;
+                }
+                brelse(su_bh);
+                if (n == segusages_per_block) {
+                        /* make hole */
+                        nilfs_sufile_delete_segment_usage_block(sufile, segnum);
+                }
+        }
+        ret = 0;
+out_header:
+        if (ncleaned > 0) {
+                NILFS_SUI(sufile)->ncleansegs += ncleaned;
+                nilfs_sufile_mod_counter(header_bh, ncleaned, 0);
+                nilfs_mdt_mark_dirty(sufile);
+        }
+        brelse(header_bh);
+out:
+        return ret;
+}
+/**
+ * nilfs_sufile_resize - resize segment array
+ * @sufile: inode of segment usage file
+ * @newnsegs: new number of segments
+ *
+ * Return Value: On success, 0 is returned.  On error, one of the
+ * following negative error codes is returned.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-ENOSPC - Enough free space is not left for shrinking
+ *
+ * %-EBUSY - Dirty or active segments exist in the region to be truncated
+ */
+int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
+{
+        struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
+        struct buffer_head *header_bh;
+        struct nilfs_sufile_header *header;
+        struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
+        void *kaddr;
+        unsigned long nsegs, nrsvsegs;
+        int ret = 0;
+        down_write(&NILFS_MDT(sufile)->mi_sem);
+        nsegs = nilfs_sufile_get_nsegments(sufile);
+        if (nsegs == newnsegs)
+                goto out;
+        ret = -ENOSPC;
+        nrsvsegs = nilfs_nrsvsegs(nilfs, newnsegs);
+        if (newnsegs < nsegs && nsegs - newnsegs + nrsvsegs > sui->ncleansegs)
+                goto out;
+        ret = nilfs_sufile_get_header_block(sufile, &header_bh);
+        if (ret < 0)
+                goto out;
+        if (newnsegs > nsegs) {
+                sui->ncleansegs += newnsegs - nsegs;
+        } else /* newnsegs < nsegs */ {
+                ret = nilfs_sufile_truncate_range(sufile, newnsegs, nsegs - 1);
+                if (ret < 0)
+                        goto out_header;
+                sui->ncleansegs -= nsegs - newnsegs;
+        }
+        kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
+        header = kaddr + bh_offset(header_bh);
+        header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs);
+        kunmap_atomic(kaddr, KM_USER0);
+        mark_buffer_dirty(header_bh);
+        nilfs_mdt_mark_dirty(sufile);
+        nilfs_set_nsegments(nilfs, newnsegs);
+out_header:
+        brelse(header_bh);
+out:
+        up_write(&NILFS_MDT(sufile)->mi_sem);
+        return ret;
+}
+/**
 * nilfs_sufile_get_suinfo -
 * @sufile: inode of segment usage file
 * @segnum: segment number to start looking
@@ -583,7 +812,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
        struct nilfs_segment_usage *su;
        struct nilfs_suinfo *si = buf;
        size_t susz = NILFS_MDT(sufile)->mi_entry_size;
-        struct the_nilfs *nilfs = NILFS_I_NILFS(sufile);
+        struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
        void *kaddr;
        unsigned long nsegs, segusages_per_block;
        ssize_t n;
@@ -679,6 +908,9 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
        kunmap_atomic(kaddr, KM_USER0);
        brelse(header_bh);
+        sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1;
+        sui->allocmin = 0;
        unlock_new_inode(sufile);
 out:
        *inodep = sufile;
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index a943fbacb45b..e84bc5b51fc1 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -31,11 +31,12 @@
 static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
 {
-        return NILFS_I_NILFS(sufile)->ns_nsegments;
+        return ((struct the_nilfs *)sufile->i_sb->s_fs_info)->ns_nsegments;
 }
 unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile);
+int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end);
 int nilfs_sufile_alloc(struct inode *, __u64 *);
 int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum);
 int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
@@ -61,6 +62,7 @@ void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
 void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
                               struct buffer_head *);
+int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs);
 int nilfs_sufile_read(struct super_block *sb, size_t susize,
                      struct nilfs_inode *raw_inode, struct inode **inodep);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 062cca065195..8351c44a7320 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -56,6 +56,7 @@
 #include "btnode.h"
 #include "page.h"
 #include "cpfile.h"
+#include "sufile.h" /* nilfs_sufile_resize(), nilfs_sufile_set_alloc_range() */
 #include "ifile.h"
 #include "dat.h"
 #include "segment.h"
@@ -165,7 +166,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
        ii->i_state = 0;
        ii->i_cno = 0;
        ii->vfs_inode.i_version = 1;
-        nilfs_btnode_cache_init(&ii->i_btnode_cache, sb->s_bdi);
+        nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode, sb->s_bdi);
        return &ii->vfs_inode;
 }
@@ -347,6 +348,134 @@ int nilfs_cleanup_super(struct super_block *sb)
        return ret;
 }
+/**
+ * nilfs_move_2nd_super - relocate secondary super block
+ * @sb: super block instance
+ * @sb2off: new offset of the secondary super block (in bytes)
+ */
+static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
+{
+        struct the_nilfs *nilfs = sb->s_fs_info;
+        struct buffer_head *nsbh;
+        struct nilfs_super_block *nsbp;
+        sector_t blocknr, newblocknr;
+        unsigned long offset;
+        int sb2i = -1;  /* array index of the secondary superblock */
+        int ret = 0;
+        /* nilfs->ns_sem must be locked by the caller. */
+        if (nilfs->ns_sbh[1] &&
+            nilfs->ns_sbh[1]->b_blocknr > nilfs->ns_first_data_block) {
+                sb2i = 1;
+                blocknr = nilfs->ns_sbh[1]->b_blocknr;
+        } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) {
+                sb2i = 0;
+                blocknr = nilfs->ns_sbh[0]->b_blocknr;
+        }
+        if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off)
+                goto out;  /* super block location is unchanged */
+        /* Get new super block buffer */
+        newblocknr = sb2off >> nilfs->ns_blocksize_bits;
+        offset = sb2off & (nilfs->ns_blocksize - 1);
+        nsbh = sb_getblk(sb, newblocknr);
+        if (!nsbh) {
+                printk(KERN_WARNING
+                       "NILFS warning: unable to move secondary superblock "
+                       "to block %llu\n", (unsigned long long)newblocknr);
+                ret = -EIO;
+                goto out;
+        }
+        nsbp = (void *)nsbh->b_data + offset;
+        memset(nsbp, 0, nilfs->ns_blocksize);
+        if (sb2i >= 0) {
+                memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize);
+                brelse(nilfs->ns_sbh[sb2i]);
+                nilfs->ns_sbh[sb2i] = nsbh;
+                nilfs->ns_sbp[sb2i] = nsbp;
+        } else if (nilfs->ns_sbh[0]->b_blocknr < nilfs->ns_first_data_block) {
+                /* secondary super block will be restored to index 1 */
+                nilfs->ns_sbh[1] = nsbh;
+                nilfs->ns_sbp[1] = nsbp;
+        } else {
+                brelse(nsbh);
+        }
+out:
+        return ret;
+}
+/**
+ * nilfs_resize_fs - resize the filesystem
+ * @sb: super block instance
+ * @newsize: new size of the filesystem (in bytes)
+ */
+int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
+{
+        struct the_nilfs *nilfs = sb->s_fs_info;
+        struct nilfs_super_block **sbp;
+        __u64 devsize, newnsegs;
+        loff_t sb2off;
+        int ret;
+        ret = -ERANGE;
+        devsize = i_size_read(sb->s_bdev->bd_inode);
+        if (newsize > devsize)
+                goto out;
+        /*
+         * Write lock is required to protect some functions depending
+         * on the number of segments, the number of reserved segments,
+         * and so forth.
+         */
+        down_write(&nilfs->ns_segctor_sem);
+        sb2off = NILFS_SB2_OFFSET_BYTES(newsize);
+        newnsegs = sb2off >> nilfs->ns_blocksize_bits;
+        do_div(newnsegs, nilfs->ns_blocks_per_segment);
+        ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs);
+        up_write(&nilfs->ns_segctor_sem);
+        if (ret < 0)
+                goto out;
+        ret = nilfs_construct_segment(sb);
+        if (ret < 0)
+                goto out;
+        down_write(&nilfs->ns_sem);
+        nilfs_move_2nd_super(sb, sb2off);
+        ret = -EIO;
+        sbp = nilfs_prepare_super(sb, 0);
+        if (likely(sbp)) {
+                nilfs_set_log_cursor(sbp[0], nilfs);
+                /*
+                 * Drop NILFS_RESIZE_FS flag for compatibility with
+                 * mount-time resize which may be implemented in a
+                 * future release.
+                 */
+                sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) &
+                                              ~NILFS_RESIZE_FS);
+                sbp[0]->s_dev_size = cpu_to_le64(newsize);
+                sbp[0]->s_nsegments = cpu_to_le64(nilfs->ns_nsegments);
+                if (sbp[1])
+                        memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
+                ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
+        }
+        up_write(&nilfs->ns_sem);
+        /*
+         * Reset the range of allocatable segments last.  This order
+         * is important in the case of expansion because the secondary
+         * superblock must be protected from log write until migration
+         * completes.
+         */
+        if (!ret)
+                nilfs_sufile_set_alloc_range(nilfs->ns_sufile, 0, newnsegs - 1);
+out:
+        return ret;
+}
 static void nilfs_put_super(struct super_block *sb)
 {
        struct the_nilfs *nilfs = sb->s_fs_info;
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index d2acd1a651f3..d32714094375 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -363,6 +363,24 @@ static unsigned long long nilfs_max_size(unsigned int blkbits)
        return res;
 }
+/**
+ * nilfs_nrsvsegs - calculate the number of reserved segments
+ * @nilfs: nilfs object
+ * @nsegs: total number of segments
+ */
+unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs)
+{
+        return max_t(unsigned long, NILFS_MIN_NRSVSEGS,
+                     DIV_ROUND_UP(nsegs * nilfs->ns_r_segments_percentage,
+                                  100));
+}
+void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs)
+{
+        nilfs->ns_nsegments = nsegs;
+        nilfs->ns_nrsvsegs = nilfs_nrsvsegs(nilfs, nsegs);
+}
 static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
                                   struct nilfs_super_block *sbp)
 {
@@ -389,13 +407,9 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
        }
        nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block);
-        nilfs->ns_nsegments = le64_to_cpu(sbp->s_nsegments);
        nilfs->ns_r_segments_percentage =
                le32_to_cpu(sbp->s_r_segments_percentage);
-        nilfs->ns_nrsvsegs =
+        nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments));
-                max_t(unsigned long, NILFS_MIN_NRSVSEGS,
-                      DIV_ROUND_UP(nilfs->ns_nsegments *
-                                   nilfs->ns_r_segments_percentage, 100));
        nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
        return 0;
 }
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index f4968145c2a3..9992b11312ff 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -268,6 +268,8 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev);
 void destroy_nilfs(struct the_nilfs *nilfs);
 int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data);
 int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
+unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs);
+void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs);
 int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t);
 int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
 struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 643720209a98..9a3e6bbff27b 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -539,25 +539,41 @@ static int o2hb_verify_crc(struct o2hb_region *reg,
 /* We want to make sure that nobody is heartbeating on top of us --
 * this will help detect an invalid configuration. */
-static int o2hb_check_last_timestamp(struct o2hb_region *reg)
+static void o2hb_check_last_timestamp(struct o2hb_region *reg)
 {
-        int node_num, ret;
        struct o2hb_disk_slot *slot;
        struct o2hb_disk_heartbeat_block *hb_block;
+        char *errstr;
-        node_num = o2nm_this_node();
+        slot = &reg->hr_slots[o2nm_this_node()];
-        ret = 1;
-        slot = &reg->hr_slots[node_num];
        /* Don't check on our 1st timestamp */
-        if (slot->ds_last_time) {
+        if (!slot->ds_last_time)
-                hb_block = slot->ds_raw_block;
+                return;
-                if (le64_to_cpu(hb_block->hb_seq) != slot->ds_last_time)
+        hb_block = slot->ds_raw_block;
-                        ret = 0;
+        if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time &&
-        }
+            le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation &&
+            hb_block->hb_node == slot->ds_node_num)
+                return;
-        return ret;
+#define ERRSTR1         "Another node is heartbeating on device"
+#define ERRSTR2         "Heartbeat generation mismatch on device"
+#define ERRSTR3         "Heartbeat sequence mismatch on device"
+        if (hb_block->hb_node != slot->ds_node_num)
+                errstr = ERRSTR1;
+        else if (le64_to_cpu(hb_block->hb_generation) !=
+                 slot->ds_last_generation)
+                errstr = ERRSTR2;
+        else
+                errstr = ERRSTR3;
+        mlog(ML_ERROR, "%s (%s): expected(%u:0x%llx, 0x%llx), "
+             "ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg->hr_dev_name,
+             slot->ds_node_num, (unsigned long long)slot->ds_last_generation,
+             (unsigned long long)slot->ds_last_time, hb_block->hb_node,
+             (unsigned long long)le64_to_cpu(hb_block->hb_generation),
+             (unsigned long long)le64_to_cpu(hb_block->hb_seq));
 }
 static inline void o2hb_prepare_block(struct o2hb_region *reg,
@@ -983,9 +999,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
        /* With an up to date view of the slots, we can check that no
         * other node has been improperly configured to heartbeat in
         * our slot. */
-        if (!o2hb_check_last_timestamp(reg))
+        o2hb_check_last_timestamp(reg);
-                mlog(ML_ERROR, "Device \"%s\": another node is heartbeating "
-                     "in our slot!\n", reg->hr_dev_name);
        /* fill in the proper info for our next heartbeat */
        o2hb_prepare_block(reg, reg->hr_generation);
@@ -999,8 +1013,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
        }
        i = -1;
-        while((i = find_next_bit(configured_nodes, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
+        while((i = find_next_bit(configured_nodes,
+                                 O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
                change |= o2hb_check_slot(reg, &reg->hr_slots[i]);
        }
@@ -1690,6 +1704,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
        struct file *filp = NULL;
        struct inode *inode = NULL;
        ssize_t ret = -EINVAL;
+        int live_threshold;
        if (reg->hr_bdev)
                goto out;
@@ -1766,8 +1781,18 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
         * A node is considered live after it has beat LIVE_THRESHOLD
         * times.  We're not steady until we've given them a chance
         * _after_ our first read.
+         * The default threshold is bare minimum so as to limit the delay
+         * during mounts. For global heartbeat, the threshold doubled for the
+         * first region.
         */
-        atomic_set(&reg->hr_steady_iterations, O2HB_LIVE_THRESHOLD + 1);
+        live_threshold = O2HB_LIVE_THRESHOLD;
+        if (o2hb_global_heartbeat_active()) {
+                spin_lock(&o2hb_live_lock);
+                if (o2hb_pop_count(&o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1)
+                        live_threshold <<= 1;
+                spin_unlock(&o2hb_live_lock);
+        }
+        atomic_set(&reg->hr_steady_iterations, live_threshold + 1);
        hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s",
                              reg->hr_item.ci_name);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 9fe5b8fd658f..8582e3f4f120 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2868,7 +2868,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
                bytes = blocks_wanted << sb->s_blocksize_bits;
        struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
        struct ocfs2_inode_info *oi = OCFS2_I(dir);
-        struct ocfs2_alloc_context *data_ac;
+        struct ocfs2_alloc_context *data_ac = NULL;
        struct ocfs2_alloc_context *meta_ac = NULL;
        struct buffer_head *dirdata_bh = NULL;
        struct buffer_head *dx_root_bh = NULL;
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 7540a492eaba..3b179d6cbde0 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1614,7 +1614,8 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
        spin_unlock(&dlm->spinlock);
        /* Support for global heartbeat and node info was added in 1.1 */
-        if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) {
+        if (dlm->dlm_locking_proto.pv_major > 1 ||
+            dlm->dlm_locking_proto.pv_minor > 0) {
                status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map);
                if (status) {
                        mlog_errno(status);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index fede57ed005f..84d166328cf7 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2574,6 +2574,9 @@ fail:
                res->state &= ~DLM_LOCK_RES_MIGRATING;
                wake = 1;
                spin_unlock(&res->spinlock);
+                if (dlm_is_host_down(ret))
+                        dlm_wait_for_node_death(dlm, target,
+                                                DLM_NODE_DEATH_WAIT_MAX);
                goto leave;
        }
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 41565ae52856..89659d6dc206 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1607,6 +1607,9 @@ static void ocfs2_calc_trunc_pos(struct inode *inode,
        range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
        if (le32_to_cpu(rec->e_cpos) >= trunc_start) {
+                /*
+                 * remove an entire extent record.
+                 */
                *trunc_cpos = le32_to_cpu(rec->e_cpos);
                /*
                 * Skip holes if any.
@@ -1617,7 +1620,16 @@ static void ocfs2_calc_trunc_pos(struct inode *inode,
                *blkno = le64_to_cpu(rec->e_blkno);
                *trunc_end = le32_to_cpu(rec->e_cpos);
        } else if (range > trunc_start) {
+                /*
+                 * remove a partial extent record, which means we're
+                 * removing the last extent record.
+                 */
                *trunc_cpos = trunc_start;
+                /*
+                 * skip hole if any.
+                 */
+                if (range < *trunc_end)
+                        *trunc_end = range;
                *trunc_len = *trunc_end - trunc_start;
                coff = trunc_start - le32_to_cpu(rec->e_cpos);
                *blkno = le64_to_cpu(rec->e_blkno) +
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index b141a44605ca..295d56454e8b 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1260,6 +1260,9 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
 {
        struct ocfs2_journal *journal = osb->journal;
+        if (ocfs2_is_hard_readonly(osb))
+                return;
        /* No need to queue up our truncate_log as regular cleanup will catch
         * that */
        ocfs2_queue_recovery_completion(journal, osb->slot_num,
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 5d32749c896d..3c7606cff1ab 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3706,7 +3706,7 @@ int ocfs2_refcount_cow_xattr(struct inode *inode,
        context->cow_start = cow_start;
        context->cow_len = cow_len;
        context->ref_tree = ref_tree;
-        context->ref_root_bh = ref_root_bh;;
+        context->ref_root_bh = ref_root_bh;
        context->cow_object = xv;
        context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index d545e97d99c3..8ed4d3433199 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -255,7 +255,11 @@ ssize_t part_discard_alignment_show(struct device *dev,
                                   struct device_attribute *attr, char *buf)
 {
        struct hd_struct *p = dev_to_part(dev);
-        return sprintf(buf, "%u\n", p->discard_alignment);
+        struct gendisk *disk = dev_to_disk(dev);
+        return sprintf(buf, "%u\n",
+                        queue_limit_discard_alignment(&disk->queue->limits,
+                                                        p->start_sect));
 }
 ssize_t part_stat_show(struct device *dev,
@@ -449,8 +453,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
        p->start_sect = start;
        p->alignment_offset =
                queue_limit_alignment_offset(&disk->queue->limits, start);
-        p->discard_alignment =
-                queue_limit_discard_alignment(&disk->queue->limits, start);
        p->nr_sects = len;
        p->partno = partno;
        p->policy = get_disk_ro(disk);
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index ac0ccb5026a2..19d6750d1d6c 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -348,6 +348,12 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
                goto fail;
        }
+        /* Check that sizeof_partition_entry has the correct value */
+        if (le32_to_cpu((*gpt)->sizeof_partition_entry) != sizeof(gpt_entry)) {
+                pr_debug("GUID Partitition Entry Size check failed.\n");
+                goto fail;
+        }
        if (!(*ptes = alloc_read_gpt_entries(state, *gpt)))
                goto fail;
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index ce4f62440425..af9fdf046769 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -565,7 +565,7 @@ static bool ldm_validate_partition_table(struct parsed_partitions *state)
        data = read_part_sector(state, 0, &sect);
        if (!data) {
-                ldm_crit ("Disk read failed.");
+                ldm_info ("Disk read failed.");
                return false;
        }
@@ -1335,6 +1335,11 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
        list_add_tail (&f->list, frags);
 found:
+        if (rec >= f->num) {
+                ldm_error("REC value (%d) exceeds NUM value (%d)", rec, f->num);
+                return false;
+        }
        if (f->map & (1 << rec)) {
                ldm_error ("Duplicate VBLK, part %d.", rec);
                f->map &= 0x7F;                 /* Mark the group as broken */
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index df434c5f28fb..c1c729335924 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -20,6 +20,7 @@ proc-y	+= stat.o
 proc-y  += uptime.o
 proc-y  += version.o
 proc-y  += softirqs.o
+proc-y  += namespaces.o
 proc-$(CONFIG_PROC_SYSCTL)      += proc_sysctl.o
 proc-$(CONFIG_NET)              += proc_net.o
 proc-$(CONFIG_PROC_KCORE)       += kcore.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dfa532730e55..dc8bca72b002 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -600,7 +600,7 @@ static int proc_fd_access_allowed(struct inode *inode)
        return allowed;
 }
-static int proc_setattr(struct dentry *dentry, struct iattr *attr)
+int proc_setattr(struct dentry *dentry, struct iattr *attr)
 {
        int error;
        struct inode *inode = dentry->d_inode;
@@ -1736,8 +1736,7 @@ static int task_dumpable(struct task_struct *task)
        return 0;
 }
+struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
-static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
 {
        struct inode * inode;
        struct proc_inode *ei;
@@ -1779,7 +1778,7 @@ out_unlock:
        return NULL;
 }
-static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
        struct inode *inode = dentry->d_inode;
        struct task_struct *task;
@@ -1820,7 +1819,7 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
 * made this apply to all per process world readable and executable
 * directories.
 */
-static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
+int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
        struct inode *inode;
        struct task_struct *task;
@@ -1862,7 +1861,7 @@ static int pid_delete_dentry(const struct dentry * dentry)
        return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
 }
-static const struct dentry_operations pid_dentry_operations =
+const struct dentry_operations pid_dentry_operations =
 {
        .d_revalidate   = pid_revalidate,
        .d_delete       = pid_delete_dentry,
@@ -1870,9 +1869,6 @@ static const struct dentry_operations pid_dentry_operations =
 /* Lookups */
-typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
-                                struct task_struct *, const void *);
 /*
 * Fill a directory entry.
 *
@@ -1885,8 +1881,8 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
 * reported by readdir in sync with the inode numbers reported
 * by stat.
 */
-static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
-        char *name, int len,
+        const char *name, int len,
        instantiate_t instantiate, struct task_struct *task, const void *ptr)
 {
        struct dentry *child, *dir = filp->f_path.dentry;
@@ -2820,6 +2816,7 @@ static const struct pid_entry tgid_base_stuff[] = {
        DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
        DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
        DIR("fdinfo",     S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
+        DIR("ns",         S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
 #ifdef CONFIG_NET
        DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
 #endif
@@ -3168,6 +3165,7 @@ out_no_task:
 static const struct pid_entry tid_base_stuff[] = {
        DIR("fd",        S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
        DIR("fdinfo",    S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
+        DIR("ns",        S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
        REG("environ",   S_IRUSR, proc_environ_operations),
        INF("auxv",      S_IRUSR, proc_pid_auxv),
        ONE("status",    S_IRUGO, proc_pid_status),
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index f1281339b6fa..f1637f17c37c 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -674,6 +674,7 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
        }
        return ent;
 }
+EXPORT_SYMBOL(proc_mkdir_mode);
 struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
                struct proc_dir_entry *parent)
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d15aa1b1cc8f..74b48cfa1bb2 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -28,6 +28,7 @@ static void proc_evict_inode(struct inode *inode)
 {
        struct proc_dir_entry *de;
        struct ctl_table_header *head;
+        const struct proc_ns_operations *ns_ops;
        truncate_inode_pages(&inode->i_data, 0);
        end_writeback(inode);
@@ -44,6 +45,10 @@ static void proc_evict_inode(struct inode *inode)
                rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
                sysctl_head_put(head);
        }
+        /* Release any associated namespace */
+        ns_ops = PROC_I(inode)->ns_ops;
+        if (ns_ops && ns_ops->put)
+                ns_ops->put(PROC_I(inode)->ns);
 }
 static struct kmem_cache * proc_inode_cachep;
@@ -62,6 +67,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
        ei->pde = NULL;
        ei->sysctl = NULL;
        ei->sysctl_entry = NULL;
+        ei->ns = NULL;
+        ei->ns_ops = NULL;
        inode = &ei->vfs_inode;
        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
        return inode;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c03e8d3a3a5b..7838e5cfec14 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -61,6 +61,14 @@ extern const struct file_operations proc_pagemap_operations;
 extern const struct file_operations proc_net_operations;
 extern const struct inode_operations proc_net_inode_operations;
+struct proc_maps_private {
+        struct pid *pid;
+        struct task_struct *task;
+#ifdef CONFIG_MMU
+        struct vm_area_struct *tail_vma;
+#endif
+};
 void proc_init_inodecache(void);
 static inline struct pid *proc_pid(struct inode *inode)
@@ -119,3 +127,21 @@ struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
 */
 int proc_readdir(struct file *, void *, filldir_t);
 struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
+/* Lookups */
+typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
+                                struct task_struct *, const void *);
+int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+        const char *name, int len,
+        instantiate_t instantiate, struct task_struct *task, const void *ptr);
+int pid_revalidate(struct dentry *dentry, struct nameidata *nd);
+struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task);
+extern const struct dentry_operations pid_dentry_operations;
+int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
+int proc_setattr(struct dentry *dentry, struct iattr *attr);
+extern const struct inode_operations proc_ns_dir_inode_operations;
+extern const struct file_operations proc_ns_dir_operations;
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
new file mode 100644
index 000000000000..781dec5bd682
--- /dev/null
+++ b/fs/proc/namespaces.c
@@ -0,0 +1,198 @@
+#include <linux/proc_fs.h>
+#include <linux/nsproxy.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/fs_struct.h>
+#include <linux/mount.h>
+#include <linux/path.h>
+#include <linux/namei.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+#include <net/net_namespace.h>
+#include <linux/mnt_namespace.h>
+#include <linux/ipc_namespace.h>
+#include <linux/pid_namespace.h>
+#include "internal.h"
+static const struct proc_ns_operations *ns_entries[] = {
+#ifdef CONFIG_NET_NS
+        &netns_operations,
+#endif
+#ifdef CONFIG_UTS_NS
+        &utsns_operations,
+#endif
+#ifdef CONFIG_IPC_NS
+        &ipcns_operations,
+#endif
+};
+static const struct file_operations ns_file_operations = {
+        .llseek         = no_llseek,
+};
+static struct dentry *proc_ns_instantiate(struct inode *dir,
+        struct dentry *dentry, struct task_struct *task, const void *ptr)
+{
+        const struct proc_ns_operations *ns_ops = ptr;
+        struct inode *inode;
+        struct proc_inode *ei;
+        struct dentry *error = ERR_PTR(-ENOENT);
+        inode = proc_pid_make_inode(dir->i_sb, task);
+        if (!inode)
+                goto out;
+        ei = PROC_I(inode);
+        inode->i_mode = S_IFREG|S_IRUSR;
+        inode->i_fop  = &ns_file_operations;
+        ei->ns_ops    = ns_ops;
+        ei->ns        = ns_ops->get(task);
+        if (!ei->ns)
+                goto out_iput;
+        dentry->d_op = &pid_dentry_operations;
+        d_add(dentry, inode);
+        /* Close the race of the process dying before we return the dentry */
+        if (pid_revalidate(dentry, NULL))
+                error = NULL;
+out:
+        return error;
+out_iput:
+        iput(inode);
+        goto out;
+}
+static int proc_ns_fill_cache(struct file *filp, void *dirent,
+        filldir_t filldir, struct task_struct *task,
+        const struct proc_ns_operations *ops)
+{
+        return proc_fill_cache(filp, dirent, filldir,
+                                ops->name, strlen(ops->name),
+                                proc_ns_instantiate, task, ops);
+}
+static int proc_ns_dir_readdir(struct file *filp, void *dirent,
+                                filldir_t filldir)
+{
+        int i;
+        struct dentry *dentry = filp->f_path.dentry;
+        struct inode *inode = dentry->d_inode;
+        struct task_struct *task = get_proc_task(inode);
+        const struct proc_ns_operations **entry, **last;
+        ino_t ino;
+        int ret;
+        ret = -ENOENT;
+        if (!task)
+                goto out_no_task;
+        ret = -EPERM;
+        if (!ptrace_may_access(task, PTRACE_MODE_READ))
+                goto out;
+        ret = 0;
+        i = filp->f_pos;
+        switch (i) {
+        case 0:
+                ino = inode->i_ino;
+                if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
+                        goto out;
+                i++;
+                filp->f_pos++;
+                /* fall through */
+        case 1:
+                ino = parent_ino(dentry);
+                if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
+                        goto out;
+                i++;
+                filp->f_pos++;
+                /* fall through */
+        default:
+                i -= 2;
+                if (i >= ARRAY_SIZE(ns_entries)) {
+                        ret = 1;
+                        goto out;
+                }
+                entry = ns_entries + i;
+                last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
+                while (entry <= last) {
+                        if (proc_ns_fill_cache(filp, dirent, filldir,
+                                                task, *entry) < 0)
+                                goto out;
+                        filp->f_pos++;
+                        entry++;
+                }
+        }
+        ret = 1;
+out:
+        put_task_struct(task);
+out_no_task:
+        return ret;
+}
+const struct file_operations proc_ns_dir_operations = {
+        .read           = generic_read_dir,
+        .readdir        = proc_ns_dir_readdir,
+};
+static struct dentry *proc_ns_dir_lookup(struct inode *dir,
+                                struct dentry *dentry, struct nameidata *nd)
+{
+        struct dentry *error;
+        struct task_struct *task = get_proc_task(dir);
+        const struct proc_ns_operations **entry, **last;
+        unsigned int len = dentry->d_name.len;
+        error = ERR_PTR(-ENOENT);
+        if (!task)
+                goto out_no_task;
+        error = ERR_PTR(-EPERM);
+        if (!ptrace_may_access(task, PTRACE_MODE_READ))
+                goto out;
+        last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
+        for (entry = ns_entries; entry <= last; entry++) {
+                if (strlen((*entry)->name) != len)
+                        continue;
+                if (!memcmp(dentry->d_name.name, (*entry)->name, len))
+                        break;
+        }
+        error = ERR_PTR(-ENOENT);
+        if (entry > last)
+                goto out;
+        error = proc_ns_instantiate(dir, dentry, task, *entry);
+out:
+        put_task_struct(task);
+out_no_task:
+        return error;
+}
+const struct inode_operations proc_ns_dir_inode_operations = {
+        .lookup         = proc_ns_dir_lookup,
+        .getattr        = pid_getattr,
+        .setattr        = proc_setattr,
+};
+struct file *proc_ns_fget(int fd)
+{
+        struct file *file;
+        file = fget(fd);
+        if (!file)
+                return ERR_PTR(-EBADF);
+        if (file->f_op != &ns_file_operations)
+                goto out_invalid;
+        return file;
+out_invalid:
+        fput(file);
+        return ERR_PTR(-EINVAL);
+}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2e7addfd9803..db15935fa757 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -211,10 +211,10 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 {
        struct mm_struct *mm = vma->vm_mm;
        struct file *file = vma->vm_file;
-        int flags = vma->vm_flags;
+        vm_flags_t flags = vma->vm_flags;
        unsigned long ino = 0;
        unsigned long long pgoff = 0;
-        unsigned long start;
+        unsigned long start, end;
        dev_t dev = 0;
        int len;
@@ -227,13 +227,15 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
        /* We don't show the stack guard page in /proc/maps */
        start = vma->vm_start;
-        if (vma->vm_flags & VM_GROWSDOWN)
+        if (stack_guard_page_start(vma, start))
-                if (!vma_stack_continue(vma->vm_prev, vma->vm_start))
+                start += PAGE_SIZE;
-                        start += PAGE_SIZE;
+        end = vma->vm_end;
+        if (stack_guard_page_end(vma, end))
+                end -= PAGE_SIZE;
        seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
                        start,
-                        vma->vm_end,
+                        end,
                        flags & VM_READ ? 'r' : '-',
                        flags & VM_WRITE ? 'w' : '-',
                        flags & VM_EXEC ? 'x' : '-',
@@ -856,7 +858,192 @@ const struct file_operations proc_pagemap_operations = {
 #endif /* CONFIG_PROC_PAGE_MONITOR */
 #ifdef CONFIG_NUMA
-extern int show_numa_map(struct seq_file *m, void *v);
+struct numa_maps {
+        struct vm_area_struct *vma;
+        unsigned long pages;
+        unsigned long anon;
+        unsigned long active;
+        unsigned long writeback;
+        unsigned long mapcount_max;
+        unsigned long dirty;
+        unsigned long swapcache;
+        unsigned long node[MAX_NUMNODES];
+};
+struct numa_maps_private {
+        struct proc_maps_private proc_maps;
+        struct numa_maps md;
+};
+static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty)
+{
+        int count = page_mapcount(page);
+        md->pages++;
+        if (pte_dirty || PageDirty(page))
+                md->dirty++;
+        if (PageSwapCache(page))
+                md->swapcache++;
+        if (PageActive(page) || PageUnevictable(page))
+                md->active++;
+        if (PageWriteback(page))
+                md->writeback++;
+        if (PageAnon(page))
+                md->anon++;
+        if (count > md->mapcount_max)
+                md->mapcount_max = count;
+        md->node[page_to_nid(page)]++;
+}
+static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
+                unsigned long end, struct mm_walk *walk)
+{
+        struct numa_maps *md;
+        spinlock_t *ptl;
+        pte_t *orig_pte;
+        pte_t *pte;
+        md = walk->private;
+        orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+        do {
+                struct page *page;
+                int nid;
+                if (!pte_present(*pte))
+                        continue;
+                page = vm_normal_page(md->vma, addr, *pte);
+                if (!page)
+                        continue;
+                if (PageReserved(page))
+                        continue;
+                nid = page_to_nid(page);
+                if (!node_isset(nid, node_states[N_HIGH_MEMORY]))
+                        continue;
+                gather_stats(page, md, pte_dirty(*pte));
+        } while (pte++, addr += PAGE_SIZE, addr != end);
+        pte_unmap_unlock(orig_pte, ptl);
+        return 0;
+}
+#ifdef CONFIG_HUGETLB_PAGE
+static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
+                unsigned long addr, unsigned long end, struct mm_walk *walk)
+{
+        struct numa_maps *md;
+        struct page *page;
+        if (pte_none(*pte))
+                return 0;
+        page = pte_page(*pte);
+        if (!page)
+                return 0;
+        md = walk->private;
+        gather_stats(page, md, pte_dirty(*pte));
+        return 0;
+}
+#else
+static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
+                unsigned long addr, unsigned long end, struct mm_walk *walk)
+{
+        return 0;
+}
+#endif
+/*
+ * Display pages allocated per node and memory policy via /proc.
+ */
+static int show_numa_map(struct seq_file *m, void *v)
+{
+        struct numa_maps_private *numa_priv = m->private;
+        struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
+        struct vm_area_struct *vma = v;
+        struct numa_maps *md = &numa_priv->md;
+        struct file *file = vma->vm_file;
+        struct mm_struct *mm = vma->vm_mm;
+        struct mm_walk walk = {};
+        struct mempolicy *pol;
+        int n;
+        char buffer[50];
+        if (!mm)
+                return 0;
+        /* Ensure we start with an empty set of numa_maps statistics. */
+        memset(md, 0, sizeof(*md));
+        md->vma = vma;
+        walk.hugetlb_entry = gather_hugetbl_stats;
+        walk.pmd_entry = gather_pte_stats;
+        walk.private = md;
+        walk.mm = mm;
+        pol = get_vma_policy(proc_priv->task, vma, vma->vm_start);
+        mpol_to_str(buffer, sizeof(buffer), pol, 0);
+        mpol_cond_put(pol);
+        seq_printf(m, "%08lx %s", vma->vm_start, buffer);
+        if (file) {
+                seq_printf(m, " file=");
+                seq_path(m, &file->f_path, "\n\t= ");
+        } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
+                seq_printf(m, " heap");
+        } else if (vma->vm_start <= mm->start_stack &&
+                        vma->vm_end >= mm->start_stack) {
+                seq_printf(m, " stack");
+        }
+        walk_page_range(vma->vm_start, vma->vm_end, &walk);
+        if (!md->pages)
+                goto out;
+        if (md->anon)
+                seq_printf(m, " anon=%lu", md->anon);
+        if (md->dirty)
+                seq_printf(m, " dirty=%lu", md->dirty);
+        if (md->pages != md->anon && md->pages != md->dirty)
+                seq_printf(m, " mapped=%lu", md->pages);
+        if (md->mapcount_max > 1)
+                seq_printf(m, " mapmax=%lu", md->mapcount_max);
+        if (md->swapcache)
+                seq_printf(m, " swapcache=%lu", md->swapcache);
+        if (md->active < md->pages && !is_vm_hugetlb_page(vma))
+                seq_printf(m, " active=%lu", md->active);
+        if (md->writeback)
+                seq_printf(m, " writeback=%lu", md->writeback);
+        for_each_node_state(n, N_HIGH_MEMORY)
+                if (md->node[n])
+                        seq_printf(m, " N%d=%lu", n, md->node[n]);
+out:
+        seq_putc(m, '\n');
+        if (m->count < m->size)
+                m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0;
+        return 0;
+}
 static const struct seq_operations proc_pid_numa_maps_op = {
        .start  = m_start,
@@ -867,7 +1054,20 @@ static const struct seq_operations proc_pid_numa_maps_op = {
 static int numa_maps_open(struct inode *inode, struct file *file)
 {
-        return do_maps_open(inode, file, &proc_pid_numa_maps_op);
+        struct numa_maps_private *priv;
+        int ret = -ENOMEM;
+        priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+        if (priv) {
+                priv->proc_maps.pid = proc_pid(inode);
+                ret = seq_open(file, &proc_pid_numa_maps_op);
+                if (!ret) {
+                        struct seq_file *m = file->private_data;
+                        m->private = priv;
+                } else {
+                        kfree(priv);
+                }
+        }
+        return ret;
 }
 const struct file_operations proc_numa_maps_operations = {
@@ -876,4 +1076,4 @@ const struct file_operations proc_numa_maps_operations = {
        .llseek         = seq_lseek,
        .release        = seq_release_private,
 };
-#endif
+#endif /* CONFIG_NUMA */
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index f835a25625ff..f2c3ff20ea68 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -152,21 +152,27 @@ EXPORT_SYMBOL_GPL(pstore_register);
 void pstore_get_records(void)
 {
        struct pstore_info *psi = psinfo;
-        size_t                  size;
+        ssize_t                 size;
        u64                     id;
        enum pstore_type_id     type;
        struct timespec         time;
-        int                     failed = 0;
+        int                     failed = 0, rc;
        if (!psi)
                return;
        mutex_lock(&psinfo->buf_mutex);
+        rc = psi->open(psi);
+        if (rc)
+                goto out;
        while ((size = psi->read(&id, &type, &time)) > 0) {
-                if (pstore_mkfile(type, psi->name, id, psi->buf, size,
+                if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
                                  time, psi->erase))
                        failed++;
        }
+        psi->close(psi);
+out:
        mutex_unlock(&psinfo->buf_mutex);
        if (failed)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index d3c032f5fa0a..5b572c89e6c4 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -691,8 +691,11 @@ static void prune_dqcache(int count)
 * This is called from kswapd when we think we need some
 * more memory
 */
-static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
+static int shrink_dqcache_memory(struct shrinker *shrink,
+                                 struct shrink_control *sc)
 {
+        int nr = sc->nr_to_scan;
        if (nr) {
                spin_lock(&dq_list_lock);
                prune_dqcache(nr);
diff --git a/fs/splice.c b/fs/splice.c
index 50a5d978da16..aa866d309695 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -162,6 +162,14 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = {
        .get = generic_pipe_buf_get,
 };
+static void wakeup_pipe_readers(struct pipe_inode_info *pipe)
+{
+        smp_mb();
+        if (waitqueue_active(&pipe->wait))
+                wake_up_interruptible(&pipe->wait);
+        kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+}
 /**
 * splice_to_pipe - fill passed data into a pipe
 * @pipe:       pipe to fill
@@ -247,12 +255,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
        pipe_unlock(pipe);
-        if (do_wakeup) {
+        if (do_wakeup)
-                smp_mb();
+                wakeup_pipe_readers(pipe);
-                if (waitqueue_active(&pipe->wait))
-                        wake_up_interruptible(&pipe->wait);
-                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
-        }
        while (page_nr < spd_pages)
                spd->spd_release(spd, page_nr++);
@@ -1892,12 +1896,9 @@ retry:
        /*
         * If we put data in the output pipe, wakeup any potential readers.
         */
-        if (ret > 0) {
+        if (ret > 0)
-                smp_mb();
+                wakeup_pipe_readers(opipe);
-                if (waitqueue_active(&opipe->wait))
-                        wake_up_interruptible(&opipe->wait);
-                kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
-        }
        if (input_wakeup)
                wakeup_pipe_writers(ipipe);
@@ -1976,12 +1977,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
        /*
         * If we put data in the output pipe, wakeup any potential readers.
         */
-        if (ret > 0) {
+        if (ret > 0)
-                smp_mb();
+                wakeup_pipe_readers(opipe);
-                if (waitqueue_active(&opipe->wait))
-                        wake_up_interruptible(&opipe->wait);
-                kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
-        }
        return ret;
 }
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index efc309fa3035..7797218d0b30 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -42,7 +42,7 @@ config SQUASHFS_LZO
        select LZO_DECOMPRESS
        help
          Saying Y here includes support for reading Squashfs file systems
-          compressed with LZO compresssion.  LZO compression is mainly
+          compressed with LZO compression.  LZO compression is mainly
          aimed at embedded systems with slower CPUs where the overheads
          of zlib are too high.
@@ -57,7 +57,7 @@ config SQUASHFS_XZ
        select XZ_DEC
        help
          Saying Y here includes support for reading Squashfs file systems
-          compressed with XZ compresssion.  XZ gives better compression than
+          compressed with XZ compression.  XZ gives better compression than
          the default zlib compression, at the expense of greater CPU and
          memory overhead.
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index c37b520132ff..4b5a3fbb1f1f 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -29,7 +29,7 @@
 * plus functions layered ontop of the generic cache implementation to
 * access the metadata and fragment caches.
 *
- * To avoid out of memory and fragmentation isssues with vmalloc the cache
+ * To avoid out of memory and fragmentation issues with vmalloc the cache
 * uses sequences of kmalloced PAGE_CACHE_SIZE buffers.
 *
 * It should be noted that the cache is not used for file datablocks, these
diff --git a/fs/super.c b/fs/super.c
index 8a06881b1920..c04f7e0b7ed2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -948,8 +948,7 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
         * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
         * but s_maxbytes was an unsigned long long for many releases. Throw
         * this warning for a little while to try and catch filesystems that
-         * violate this rule. This warning should be either removed or
+         * violate this rule.
-         * converted to a BUG() in 2.6.34.
         */
        WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
                "negative value (%lld)\n", type->name, sb->s_maxbytes);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index da3fefe91a8f..1ad8c93c1b85 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -24,13 +24,6 @@
 #include "sysfs.h"
-/* used in crash dumps to help with debugging */
-static char last_sysfs_file[PATH_MAX];
-void sysfs_printk_last_file(void)
-{
-        printk(KERN_EMERG "last sysfs file: %s\n", last_sysfs_file);
-}
 /*
 * There's one sysfs_buffer for each open file and one
 * sysfs_open_dirent for each sysfs_dirent with one or more open
@@ -337,11 +330,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
        struct sysfs_buffer *buffer;
        const struct sysfs_ops *ops;
        int error = -EACCES;
-        char *p;
-        p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
-        if (!IS_ERR(p))
-                memmove(last_sysfs_file, p, strlen(p) + 1);
        /* need attr_sd for attr and ops, its parent for kobj */
        if (!sysfs_get_active(attr_sd))
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index c8769dc222d8..194414f8298c 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -101,9 +101,9 @@ int sysfs_create_group(struct kobject *kobj,
 }
 /**
- * sysfs_update_group - given a directory kobject, create an attribute group
+ * sysfs_update_group - given a directory kobject, update an attribute group
- * @kobj:       The kobject to create the group on
+ * @kobj:       The kobject to update the group on
- * @grp:        The attribute group to create
+ * @grp:        The attribute group to update
 *
 * This function updates an attribute group.  Unlike
 * sysfs_create_group(), it will explicitly not warn or error if any
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 8c4fc1425b3e..f67acbdda5e8 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -22,16 +22,24 @@
 #include <linux/anon_inodes.h>
 #include <linux/timerfd.h>
 #include <linux/syscalls.h>
+#include <linux/rcupdate.h>
 struct timerfd_ctx {
        struct hrtimer tmr;
        ktime_t tintv;
+        ktime_t moffs;
        wait_queue_head_t wqh;
        u64 ticks;
        int expired;
        int clockid;
+        struct rcu_head rcu;
+        struct list_head clist;
+        bool might_cancel;
 };
+static LIST_HEAD(cancel_list);
+static DEFINE_SPINLOCK(cancel_lock);
 /*
 * This gets called when the timer event triggers. We set the "expired"
 * flag, but we do not re-arm the timer (in case it's necessary,
@@ -51,6 +59,63 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
        return HRTIMER_NORESTART;
 }
+/*
+ * Called when the clock was set to cancel the timers in the cancel
+ * list.
+ */
+void timerfd_clock_was_set(void)
+{
+        ktime_t moffs = ktime_get_monotonic_offset();
+        struct timerfd_ctx *ctx;
+        unsigned long flags;
+        rcu_read_lock();
+        list_for_each_entry_rcu(ctx, &cancel_list, clist) {
+                if (!ctx->might_cancel)
+                        continue;
+                spin_lock_irqsave(&ctx->wqh.lock, flags);
+                if (ctx->moffs.tv64 != moffs.tv64) {
+                        ctx->moffs.tv64 = KTIME_MAX;
+                        wake_up_locked(&ctx->wqh);
+                }
+                spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+        }
+        rcu_read_unlock();
+}
+static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
+{
+        if (ctx->might_cancel) {
+                ctx->might_cancel = false;
+                spin_lock(&cancel_lock);
+                list_del_rcu(&ctx->clist);
+                spin_unlock(&cancel_lock);
+        }
+}
+static bool timerfd_canceled(struct timerfd_ctx *ctx)
+{
+        if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
+                return false;
+        ctx->moffs = ktime_get_monotonic_offset();
+        return true;
+}
+static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
+{
+        if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) &&
+            (flags & TFD_TIMER_CANCEL_ON_SET)) {
+                if (!ctx->might_cancel) {
+                        ctx->might_cancel = true;
+                        spin_lock(&cancel_lock);
+                        list_add_rcu(&ctx->clist, &cancel_list);
+                        spin_unlock(&cancel_lock);
+                }
+        } else if (ctx->might_cancel) {
+                timerfd_remove_cancel(ctx);
+        }
+}
 static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
 {
        ktime_t remaining;
@@ -59,11 +124,12 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
        return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
 }
-static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
+static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
-                          const struct itimerspec *ktmr)
+                         const struct itimerspec *ktmr)
 {
        enum hrtimer_mode htmode;
        ktime_t texp;
+        int clockid = ctx->clockid;
        htmode = (flags & TFD_TIMER_ABSTIME) ?
                HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
@@ -72,19 +138,24 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
        ctx->expired = 0;
        ctx->ticks = 0;
        ctx->tintv = timespec_to_ktime(ktmr->it_interval);
-        hrtimer_init(&ctx->tmr, ctx->clockid, htmode);
+        hrtimer_init(&ctx->tmr, clockid, htmode);
        hrtimer_set_expires(&ctx->tmr, texp);
        ctx->tmr.function = timerfd_tmrproc;
-        if (texp.tv64 != 0)
+        if (texp.tv64 != 0) {
                hrtimer_start(&ctx->tmr, texp, htmode);
+                if (timerfd_canceled(ctx))
+                        return -ECANCELED;
+        }
+        return 0;
 }
 static int timerfd_release(struct inode *inode, struct file *file)
 {
        struct timerfd_ctx *ctx = file->private_data;
+        timerfd_remove_cancel(ctx);
        hrtimer_cancel(&ctx->tmr);
-        kfree(ctx);
+        kfree_rcu(ctx, rcu);
        return 0;
 }
@@ -118,8 +189,21 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
                res = -EAGAIN;
        else
                res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
+        /*
+         * If clock has changed, we do not care about the
+         * ticks and we do not rearm the timer. Userspace must
+         * reevaluate anyway.
+         */
+        if (timerfd_canceled(ctx)) {
+                ctx->ticks = 0;
+                ctx->expired = 0;
+                res = -ECANCELED;
+        }
        if (ctx->ticks) {
                ticks = ctx->ticks;
                if (ctx->expired && ctx->tintv.tv64) {
                        /*
                         * If tintv.tv64 != 0, this is a periodic timer that
@@ -183,6 +267,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
        init_waitqueue_head(&ctx->wqh);
        ctx->clockid = clockid;
        hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
+        ctx->moffs = ktime_get_monotonic_offset();
        ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
                               O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
@@ -199,6 +284,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
        struct file *file;
        struct timerfd_ctx *ctx;
        struct itimerspec ktmr, kotmr;
+        int ret;
        if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
                return -EFAULT;
@@ -213,6 +299,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
                return PTR_ERR(file);
        ctx = file->private_data;
+        timerfd_setup_cancel(ctx, flags);
        /*
         * We need to stop the existing timer before reprogramming
         * it to the new values.
@@ -240,14 +328,14 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
        /*
         * Re-program the timer to the new value ...
         */
-        timerfd_setup(ctx, flags, &ktmr);
+        ret = timerfd_setup(ctx, flags, &ktmr);
        spin_unlock_irq(&ctx->wqh.lock);
        fput(file);
        if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
                return -EFAULT;
-        return 0;
+        return ret;
 }
 SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 8b3a7da531eb..315de66e52b2 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -106,7 +106,7 @@ static long long get_liability(struct ubifs_info *c)
        long long liab;
        spin_lock(&c->space_lock);
-        liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth;
+        liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth;
        spin_unlock(&c->space_lock);
        return liab;
 }
@@ -180,7 +180,7 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
        int idx_lebs;
        long long idx_size;
-        idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
+        idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx;
        /* And make sure we have thrice the index size of space reserved */
        idx_size += idx_size << 1;
        /*
@@ -292,13 +292,13 @@ static int can_use_rp(struct ubifs_info *c)
 * budgeted index space to the size of the current index, multiplies this by 3,
 * and makes sure this does not exceed the amount of free LEBs.
 *
- * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables:
+ * Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables:
 * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might
 *    be large, because UBIFS does not do any index consolidation as long as
 *    there is free space. IOW, the index may take a lot of LEBs, but the LEBs
 *    will contain a lot of dirt.
- * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW,
+ * o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW,
- *    the index may be consolidated to take up to @c->min_idx_lebs LEBs.
+ *    the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs.
 *
 * This function returns zero in case of success, and %-ENOSPC in case of
 * failure.
@@ -343,13 +343,13 @@ static int do_budget_space(struct ubifs_info *c)
               c->lst.taken_empty_lebs;
        if (unlikely(rsvd_idx_lebs > lebs)) {
                dbg_budg("out of indexing space: min_idx_lebs %d (old %d), "
-                         "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs,
+                         "rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs,
                         rsvd_idx_lebs);
                return -ENOSPC;
        }
        available = ubifs_calc_available(c, min_idx_lebs);
-        outstanding = c->budg_data_growth + c->budg_dd_growth;
+        outstanding = c->bi.data_growth + c->bi.dd_growth;
        if (unlikely(available < outstanding)) {
                dbg_budg("out of data space: available %lld, outstanding %lld",
@@ -360,7 +360,7 @@ static int do_budget_space(struct ubifs_info *c)
        if (available - outstanding <= c->rp_size && !can_use_rp(c))
                return -ENOSPC;
-        c->min_idx_lebs = min_idx_lebs;
+        c->bi.min_idx_lebs = min_idx_lebs;
        return 0;
 }
@@ -393,11 +393,11 @@ static int calc_data_growth(const struct ubifs_info *c,
 {
        int data_growth;
-        data_growth = req->new_ino  ? c->inode_budget : 0;
+        data_growth = req->new_ino  ? c->bi.inode_budget : 0;
        if (req->new_page)
-                data_growth += c->page_budget;
+                data_growth += c->bi.page_budget;
        if (req->new_dent)
-                data_growth += c->dent_budget;
+                data_growth += c->bi.dent_budget;
        data_growth += req->new_ino_d;
        return data_growth;
 }
@@ -413,12 +413,12 @@ static int calc_dd_growth(const struct ubifs_info *c,
 {
        int dd_growth;
-        dd_growth = req->dirtied_page ? c->page_budget : 0;
+        dd_growth = req->dirtied_page ? c->bi.page_budget : 0;
        if (req->dirtied_ino)
-                dd_growth += c->inode_budget << (req->dirtied_ino - 1);
+                dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1);
        if (req->mod_dent)
-                dd_growth += c->dent_budget;
+                dd_growth += c->bi.dent_budget;
        dd_growth += req->dirtied_ino_d;
        return dd_growth;
 }
@@ -460,19 +460,19 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
 again:
        spin_lock(&c->space_lock);
-        ubifs_assert(c->budg_idx_growth >= 0);
+        ubifs_assert(c->bi.idx_growth >= 0);
-        ubifs_assert(c->budg_data_growth >= 0);
+        ubifs_assert(c->bi.data_growth >= 0);
-        ubifs_assert(c->budg_dd_growth >= 0);
+        ubifs_assert(c->bi.dd_growth >= 0);
-        if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) {
+        if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) {
                dbg_budg("no space");
                spin_unlock(&c->space_lock);
                return -ENOSPC;
        }
-        c->budg_idx_growth += idx_growth;
+        c->bi.idx_growth += idx_growth;
-        c->budg_data_growth += data_growth;
+        c->bi.data_growth += data_growth;
-        c->budg_dd_growth += dd_growth;
+        c->bi.dd_growth += dd_growth;
        err = do_budget_space(c);
        if (likely(!err)) {
@@ -484,9 +484,9 @@ again:
        }
        /* Restore the old values */
-        c->budg_idx_growth -= idx_growth;
+        c->bi.idx_growth -= idx_growth;
-        c->budg_data_growth -= data_growth;
+        c->bi.data_growth -= data_growth;
-        c->budg_dd_growth -= dd_growth;
+        c->bi.dd_growth -= dd_growth;
        spin_unlock(&c->space_lock);
        if (req->fast) {
@@ -506,9 +506,9 @@ again:
                        goto again;
                }
                dbg_budg("FS is full, -ENOSPC");
-                c->nospace = 1;
+                c->bi.nospace = 1;
                if (can_use_rp(c) || c->rp_size == 0)
-                        c->nospace_rp = 1;
+                        c->bi.nospace_rp = 1;
                smp_wmb();
        } else
                ubifs_err("cannot budget space, error %d", err);
@@ -523,8 +523,8 @@ again:
 * This function releases the space budgeted by 'ubifs_budget_space()'. Note,
 * since the index changes (which were budgeted for in @req->idx_growth) will
 * only be written to the media on commit, this function moves the index budget
- * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be
+ * from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed
- * zeroed by the commit operation.
+ * by the commit operation.
 */
 void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
 {
@@ -553,23 +553,23 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
        if (!req->data_growth && !req->dd_growth)
                return;
-        c->nospace = c->nospace_rp = 0;
+        c->bi.nospace = c->bi.nospace_rp = 0;
        smp_wmb();
        spin_lock(&c->space_lock);
-        c->budg_idx_growth -= req->idx_growth;
+        c->bi.idx_growth -= req->idx_growth;
-        c->budg_uncommitted_idx += req->idx_growth;
+        c->bi.uncommitted_idx += req->idx_growth;
-        c->budg_data_growth -= req->data_growth;
+        c->bi.data_growth -= req->data_growth;
-        c->budg_dd_growth -= req->dd_growth;
+        c->bi.dd_growth -= req->dd_growth;
-        c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+        c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
-        ubifs_assert(c->budg_idx_growth >= 0);
+        ubifs_assert(c->bi.idx_growth >= 0);
-        ubifs_assert(c->budg_data_growth >= 0);
+        ubifs_assert(c->bi.data_growth >= 0);
-        ubifs_assert(c->budg_dd_growth >= 0);
+        ubifs_assert(c->bi.dd_growth >= 0);
-        ubifs_assert(c->min_idx_lebs < c->main_lebs);
+        ubifs_assert(c->bi.min_idx_lebs < c->main_lebs);
-        ubifs_assert(!(c->budg_idx_growth & 7));
+        ubifs_assert(!(c->bi.idx_growth & 7));
-        ubifs_assert(!(c->budg_data_growth & 7));
+        ubifs_assert(!(c->bi.data_growth & 7));
-        ubifs_assert(!(c->budg_dd_growth & 7));
+        ubifs_assert(!(c->bi.dd_growth & 7));
        spin_unlock(&c->space_lock);
 }
@@ -586,13 +586,13 @@ void ubifs_convert_page_budget(struct ubifs_info *c)
 {
        spin_lock(&c->space_lock);
        /* Release the index growth reservation */
-        c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
+        c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
        /* Release the data growth reservation */
-        c->budg_data_growth -= c->page_budget;
+        c->bi.data_growth -= c->bi.page_budget;
        /* Increase the dirty data growth reservation instead */
-        c->budg_dd_growth += c->page_budget;
+        c->bi.dd_growth += c->bi.page_budget;
        /* And re-calculate the indexing space reservation */
-        c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+        c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
        spin_unlock(&c->space_lock);
 }
@@ -612,7 +612,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
        memset(&req, 0, sizeof(struct ubifs_budget_req));
        /* The "no space" flags will be cleared because dd_growth is > 0 */
-        req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8);
+        req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8);
        ubifs_release_budget(c, &req);
 }
@@ -682,9 +682,9 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c)
        int rsvd_idx_lebs, lebs;
        long long available, outstanding, free;
-        ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c));
+        ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
-        outstanding = c->budg_data_growth + c->budg_dd_growth;
+        outstanding = c->bi.data_growth + c->bi.dd_growth;
-        available = ubifs_calc_available(c, c->min_idx_lebs);
+        available = ubifs_calc_available(c, c->bi.min_idx_lebs);
        /*
         * When reporting free space to user-space, UBIFS guarantees that it is
@@ -697,8 +697,8 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c)
         * Note, the calculations below are similar to what we have in
         * 'do_budget_space()', so refer there for comments.
         */
-        if (c->min_idx_lebs > c->lst.idx_lebs)
+        if (c->bi.min_idx_lebs > c->lst.idx_lebs)
-                rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
+                rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
        else
                rsvd_idx_lebs = 0;
        lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 1bd01ded7123..87cd0ead8633 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -182,7 +182,7 @@ static int do_commit(struct ubifs_info *c)
        c->mst_node->root_len    = cpu_to_le32(zroot.len);
        c->mst_node->ihead_lnum  = cpu_to_le32(c->ihead_lnum);
        c->mst_node->ihead_offs  = cpu_to_le32(c->ihead_offs);
-        c->mst_node->index_size  = cpu_to_le64(c->old_idx_sz);
+        c->mst_node->index_size  = cpu_to_le64(c->bi.old_idx_sz);
        c->mst_node->lpt_lnum    = cpu_to_le32(c->lpt_lnum);
        c->mst_node->lpt_offs    = cpu_to_le32(c->lpt_offs);
        c->mst_node->nhead_lnum  = cpu_to_le32(c->nhead_lnum);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 004d3745dc45..0bb2bcef0de9 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -34,7 +34,6 @@
 #include <linux/moduleparam.h>
 #include <linux/debugfs.h>
 #include <linux/math64.h>
-#include <linux/slab.h>
 #ifdef CONFIG_UBIFS_FS_DEBUG
@@ -43,15 +42,12 @@ DEFINE_SPINLOCK(dbg_lock);
 static char dbg_key_buf0[128];
 static char dbg_key_buf1[128];
-unsigned int ubifs_msg_flags;
 unsigned int ubifs_chk_flags;
 unsigned int ubifs_tst_flags;
-module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR);
 module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR);
 module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(debug_msgs, "Debug message type flags");
 MODULE_PARM_DESC(debug_chks, "Debug check flags");
 MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
@@ -317,6 +313,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
                printk(KERN_DEBUG "\tflags          %#x\n", sup_flags);
                printk(KERN_DEBUG "\t  big_lpt      %u\n",
                       !!(sup_flags & UBIFS_FLG_BIGLPT));
+                printk(KERN_DEBUG "\t  space_fixup  %u\n",
+                       !!(sup_flags & UBIFS_FLG_SPACE_FIXUP));
                printk(KERN_DEBUG "\tmin_io_size    %u\n",
                       le32_to_cpu(sup->min_io_size));
                printk(KERN_DEBUG "\tleb_size       %u\n",
@@ -602,7 +600,7 @@ void dbg_dump_lstats(const struct ubifs_lp_stats *lst)
        spin_unlock(&dbg_lock);
 }
-void dbg_dump_budg(struct ubifs_info *c)
+void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
 {
        int i;
        struct rb_node *rb;
@@ -610,26 +608,42 @@ void dbg_dump_budg(struct ubifs_info *c)
        struct ubifs_gced_idx_leb *idx_gc;
        long long available, outstanding, free;
-        ubifs_assert(spin_is_locked(&c->space_lock));
+        spin_lock(&c->space_lock);
        spin_lock(&dbg_lock);
-        printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, "
+        printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, "
-               "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid,
+               "total budget sum %lld\n", current->pid,
-               c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth);
+               bi->data_growth + bi->dd_growth,
-        printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, "
+               bi->data_growth + bi->dd_growth + bi->idx_growth);
-               "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth,
+        printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, "
-               c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth,
+               "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth,
-               c->freeable_cnt);
+               bi->idx_growth);
-        printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, "
+        printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, "
-               "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs,
+               "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz,
-               c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt);
+               bi->uncommitted_idx);
+        printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n",
+               bi->page_budget, bi->inode_budget, bi->dent_budget);
+        printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n",
+               bi->nospace, bi->nospace_rp);
+        printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
+               c->dark_wm, c->dead_wm, c->max_idx_node_sz);
+        if (bi != &c->bi)
+                /*
+                 * If we are dumping saved budgeting data, do not print
+                 * additional information which is about the current state, not
+                 * the old one which corresponded to the saved budgeting data.
+                 */
+                goto out_unlock;
+        printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n",
+               c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt);
        printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, "
               "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt),
               atomic_long_read(&c->dirty_zn_cnt),
               atomic_long_read(&c->clean_zn_cnt));
-        printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
-               c->dark_wm, c->dead_wm, c->max_idx_node_sz);
        printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n",
               c->gc_lnum, c->ihead_lnum);
        /* If we are in R/O mode, journal heads do not exist */
        if (c->jheads)
                for (i = 0; i < c->jhead_cnt; i++)
@@ -648,13 +662,15 @@ void dbg_dump_budg(struct ubifs_info *c)
        printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state);
        /* Print budgeting predictions */
-        available = ubifs_calc_available(c, c->min_idx_lebs);
+        available = ubifs_calc_available(c, c->bi.min_idx_lebs);
-        outstanding = c->budg_data_growth + c->budg_dd_growth;
+        outstanding = c->bi.data_growth + c->bi.dd_growth;
        free = ubifs_get_free_space_nolock(c);
        printk(KERN_DEBUG "Budgeting predictions:\n");
        printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n",
               available, outstanding, free);
+out_unlock:
        spin_unlock(&dbg_lock);
+        spin_unlock(&c->space_lock);
 }
 void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
@@ -729,7 +745,13 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
                if (bud->lnum == lp->lnum) {
                        int head = 0;
                        for (i = 0; i < c->jhead_cnt; i++) {
-                                if (lp->lnum == c->jheads[i].wbuf.lnum) {
+                                /*
+                                 * Note, if we are in R/O mode or in the middle
+                                 * of mounting/re-mounting, the write-buffers do
+                                 * not exist.
+                                 */
+                                if (c->jheads &&
+                                    lp->lnum == c->jheads[i].wbuf.lnum) {
                                        printk(KERN_CONT ", jhead %s",
                                               dbg_jhead(i));
                                        head = 1;
@@ -976,6 +998,8 @@ void dbg_save_space_info(struct ubifs_info *c)
        spin_lock(&c->space_lock);
        memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats));
+        memcpy(&d->saved_bi, &c->bi, sizeof(struct ubifs_budg_info));
+        d->saved_idx_gc_cnt = c->idx_gc_cnt;
        /*
         * We use a dirty hack here and zero out @c->freeable_cnt, because it
@@ -1042,14 +1066,14 @@ int dbg_check_space_info(struct ubifs_info *c)
 out:
        ubifs_msg("saved lprops statistics dump");
        dbg_dump_lstats(&d->saved_lst);
-        ubifs_get_lp_stats(c, &lst);
+        ubifs_msg("saved budgeting info dump");
+        dbg_dump_budg(c, &d->saved_bi);
+        ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt);
        ubifs_msg("current lprops statistics dump");
+        ubifs_get_lp_stats(c, &lst);
        dbg_dump_lstats(&lst);
+        ubifs_msg("current budgeting info dump");
-        spin_lock(&c->space_lock);
+        dbg_dump_budg(c, &c->bi);
-        dbg_dump_budg(c);
-        spin_unlock(&c->space_lock);
        dump_stack();
        return -EINVAL;
 }
@@ -1793,6 +1817,8 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
        struct rb_node **p, *parent = NULL;
        struct fsck_inode *fscki;
        ino_t inum = key_inum_flash(c, &ino->key);
+        struct inode *inode;
+        struct ubifs_inode *ui;
        p = &fsckd->inodes.rb_node;
        while (*p) {
@@ -1816,19 +1842,46 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
        if (!fscki)
                return ERR_PTR(-ENOMEM);
+        inode = ilookup(c->vfs_sb, inum);
        fscki->inum = inum;
-        fscki->nlink = le32_to_cpu(ino->nlink);
+        /*
-        fscki->size = le64_to_cpu(ino->size);
+         * If the inode is present in the VFS inode cache, use it instead of
-        fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
+         * the on-flash inode which might be out-of-date. E.g., the size might
-        fscki->xattr_sz = le32_to_cpu(ino->xattr_size);
+         * be out-of-date. If we do not do this, the following may happen, for
-        fscki->xattr_nms = le32_to_cpu(ino->xattr_names);
+         * example:
-        fscki->mode = le32_to_cpu(ino->mode);
+         *   1. A power cut happens
+         *   2. We mount the file-system R/O, the replay process fixes up the
+         *      inode size in the VFS cache, but on on-flash.
+         *   3. 'check_leaf()' fails because it hits a data node beyond inode
+         *      size.
+         */
+        if (!inode) {
+                fscki->nlink = le32_to_cpu(ino->nlink);
+                fscki->size = le64_to_cpu(ino->size);
+                fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
+                fscki->xattr_sz = le32_to_cpu(ino->xattr_size);
+                fscki->xattr_nms = le32_to_cpu(ino->xattr_names);
+                fscki->mode = le32_to_cpu(ino->mode);
+        } else {
+                ui = ubifs_inode(inode);
+                fscki->nlink = inode->i_nlink;
+                fscki->size = inode->i_size;
+                fscki->xattr_cnt = ui->xattr_cnt;
+                fscki->xattr_sz = ui->xattr_size;
+                fscki->xattr_nms = ui->xattr_names;
+                fscki->mode = inode->i_mode;
+                iput(inode);
+        }
        if (S_ISDIR(fscki->mode)) {
                fscki->calc_sz = UBIFS_INO_NODE_SZ;
                fscki->calc_cnt = 2;
        }
        rb_link_node(&fscki->rb, parent, p);
        rb_insert_color(&fscki->rb, &fsckd->inodes);
        return fscki;
 }
@@ -2421,7 +2474,8 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
                hashb = key_block(c, &sb->key);
                if (hasha > hashb) {
-                        ubifs_err("larger hash %u goes before %u", hasha, hashb);
+                        ubifs_err("larger hash %u goes before %u",
+                                  hasha, hashb);
                        goto error_dump;
                }
        }
@@ -2437,14 +2491,12 @@ error_dump:
        return 0;
 }
-static int invocation_cnt;
 int dbg_force_in_the_gaps(void)
 {
-        if (!dbg_force_in_the_gaps_enabled)
+        if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
                return 0;
-        /* Force in-the-gaps every 8th commit */
-        return !((invocation_cnt++) & 0x7);
+        return !(random32() & 7);
 }
 /* Failure mode for recovery testing */
@@ -2632,7 +2684,7 @@ int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
                 int len, int check)
 {
        if (in_failure_mode(desc))
-                return -EIO;
+                return -EROFS;
        return ubi_leb_read(desc, lnum, buf, offset, len, check);
 }
@@ -2642,7 +2694,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
        int err, failing;
        if (in_failure_mode(desc))
-                return -EIO;
+                return -EROFS;
        failing = do_fail(desc, lnum, 1);
        if (failing)
                cut_data(buf, len);
@@ -2650,7 +2702,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
        if (err)
                return err;
        if (failing)
-                return -EIO;
+                return -EROFS;
        return 0;
 }
@@ -2660,12 +2712,12 @@ int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
        int err;
        if (do_fail(desc, lnum, 1))
-                return -EIO;
+                return -EROFS;
        err = ubi_leb_change(desc, lnum, buf, len, dtype);
        if (err)
                return err;
        if (do_fail(desc, lnum, 1))
-                return -EIO;
+                return -EROFS;
        return 0;
 }
@@ -2674,12 +2726,12 @@ int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum)
        int err;
        if (do_fail(desc, lnum, 0))
-                return -EIO;
+                return -EROFS;
        err = ubi_leb_erase(desc, lnum);
        if (err)
                return err;
        if (do_fail(desc, lnum, 0))
-                return -EIO;
+                return -EROFS;
        return 0;
 }
@@ -2688,19 +2740,19 @@ int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum)
        int err;
        if (do_fail(desc, lnum, 0))
-                return -EIO;
+                return -EROFS;
        err = ubi_leb_unmap(desc, lnum);
        if (err)
                return err;
        if (do_fail(desc, lnum, 0))
-                return -EIO;
+                return -EROFS;
        return 0;
 }
 int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum)
 {
        if (in_failure_mode(desc))
-                return -EIO;
+                return -EROFS;
        return ubi_is_mapped(desc, lnum);
 }
@@ -2709,12 +2761,12 @@ int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
        int err;
        if (do_fail(desc, lnum, 0))
-                return -EIO;
+                return -EROFS;
        err = ubi_leb_map(desc, lnum, dtype);
        if (err)
                return err;
        if (do_fail(desc, lnum, 0))
-                return -EIO;
+                return -EROFS;
        return 0;
 }
@@ -2784,7 +2836,7 @@ void dbg_debugfs_exit(void)
 static int open_debugfs_file(struct inode *inode, struct file *file)
 {
        file->private_data = inode->i_private;
-        return 0;
+        return nonseekable_open(inode, file);
 }
 static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
@@ -2795,18 +2847,15 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
        if (file->f_path.dentry == d->dfs_dump_lprops)
                dbg_dump_lprops(c);
-        else if (file->f_path.dentry == d->dfs_dump_budg) {
+        else if (file->f_path.dentry == d->dfs_dump_budg)
-                spin_lock(&c->space_lock);
+                dbg_dump_budg(c, &c->bi);
-                dbg_dump_budg(c);
+        else if (file->f_path.dentry == d->dfs_dump_tnc) {
-                spin_unlock(&c->space_lock);
-        } else if (file->f_path.dentry == d->dfs_dump_tnc) {
                mutex_lock(&c->tnc_mutex);
                dbg_dump_tnc(c);
                mutex_unlock(&c->tnc_mutex);
        } else
                return -EINVAL;
-        *ppos += count;
        return count;
 }
@@ -2814,7 +2863,7 @@ static const struct file_operations dfs_fops = {
        .open = open_debugfs_file,
        .write = write_debugfs_file,
        .owner = THIS_MODULE,
-        .llseek = default_llseek,
+        .llseek = no_llseek,
 };
 /**
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index e6493cac193d..a811ac4a26bb 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -31,6 +31,8 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
 #ifdef CONFIG_UBIFS_FS_DEBUG
+#include <linux/random.h>
 /**
 * ubifs_debug_info - per-FS debugging information.
 * @old_zroot: old index root - used by 'dbg_check_old_index()'
@@ -50,13 +52,15 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
 * @new_ihead_offs: used by debugging to check @c->ihead_offs
 *
 * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()')
- * @saved_free: saved free space (used by 'dbg_save_space_info()')
+ * @saved_bi: saved budgeting information
+ * @saved_free: saved amount of free space
+ * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt
 *
- * dfs_dir_name: name of debugfs directory containing this file-system's files
+ * @dfs_dir_name: name of debugfs directory containing this file-system's files
- * dfs_dir: direntry object of the file-system debugfs directory
+ * @dfs_dir: direntry object of the file-system debugfs directory
- * dfs_dump_lprops: "dump lprops" debugfs knob
+ * @dfs_dump_lprops: "dump lprops" debugfs knob
- * dfs_dump_budg: "dump budgeting information" debugfs knob
+ * @dfs_dump_budg: "dump budgeting information" debugfs knob
- * dfs_dump_tnc: "dump TNC" debugfs knob
+ * @dfs_dump_tnc: "dump TNC" debugfs knob
 */
 struct ubifs_debug_info {
        struct ubifs_zbranch old_zroot;
@@ -76,7 +80,9 @@ struct ubifs_debug_info {
        int new_ihead_offs;
        struct ubifs_lp_stats saved_lst;
+        struct ubifs_budg_info saved_bi;
        long long saved_free;
+        int saved_idx_gc_cnt;
        char dfs_dir_name[100];
        struct dentry *dfs_dir;
@@ -101,23 +107,7 @@ struct ubifs_debug_info {
        }                                                                      \
 } while (0)
-#define dbg_dump_stack() do {                                                  \
+#define dbg_dump_stack() dump_stack()
-        if (!dbg_failure_mode)                                                 \
-                dump_stack();                                                  \
-} while (0)
-/* Generic debugging messages */
-#define dbg_msg(fmt, ...) do {                                                 \
-        spin_lock(&dbg_lock);                                                  \
-        printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid,   \
-               __func__, ##__VA_ARGS__);                                       \
-        spin_unlock(&dbg_lock);                                                \
-} while (0)
-#define dbg_do_msg(typ, fmt, ...) do {                                         \
-        if (ubifs_msg_flags & typ)                                             \
-                dbg_msg(fmt, ##__VA_ARGS__);                                   \
-} while (0)
 #define dbg_err(fmt, ...) do {                                                 \
        spin_lock(&dbg_lock);                                                  \
@@ -137,77 +127,40 @@ const char *dbg_key_str1(const struct ubifs_info *c,
 #define DBGKEY(key) dbg_key_str0(c, (key))
 #define DBGKEY1(key) dbg_key_str1(c, (key))
-/* General messages */
+#define ubifs_dbg_msg(type, fmt, ...) do {                        \
-#define dbg_gen(fmt, ...)   dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__)
+        spin_lock(&dbg_lock);                                     \
+        pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \
+        spin_unlock(&dbg_lock);                                   \
+} while (0)
+/* Just a debugging messages not related to any specific UBIFS subsystem */
+#define dbg_msg(fmt, ...)   ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__)
+/* General messages */
+#define dbg_gen(fmt, ...)   ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__)
 /* Additional journal messages */
-#define dbg_jnl(fmt, ...)   dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__)
+#define dbg_jnl(fmt, ...)   ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__)
 /* Additional TNC messages */
-#define dbg_tnc(fmt, ...)   dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__)
+#define dbg_tnc(fmt, ...)   ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__)
 /* Additional lprops messages */
-#define dbg_lp(fmt, ...)    dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__)
+#define dbg_lp(fmt, ...)    ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__)
 /* Additional LEB find messages */
-#define dbg_find(fmt, ...)  dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__)
+#define dbg_find(fmt, ...)  ubifs_dbg_msg("find", fmt, ##__VA_ARGS__)
 /* Additional mount messages */
-#define dbg_mnt(fmt, ...)   dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__)
+#define dbg_mnt(fmt, ...)   ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__)
 /* Additional I/O messages */
-#define dbg_io(fmt, ...)    dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__)
+#define dbg_io(fmt, ...)    ubifs_dbg_msg("io", fmt, ##__VA_ARGS__)
 /* Additional commit messages */
-#define dbg_cmt(fmt, ...)   dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__)
+#define dbg_cmt(fmt, ...)   ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__)
 /* Additional budgeting messages */
-#define dbg_budg(fmt, ...)  dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__)
+#define dbg_budg(fmt, ...)  ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__)
 /* Additional log messages */
-#define dbg_log(fmt, ...)   dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__)
+#define dbg_log(fmt, ...)   ubifs_dbg_msg("log", fmt, ##__VA_ARGS__)
 /* Additional gc messages */
-#define dbg_gc(fmt, ...)    dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__)
+#define dbg_gc(fmt, ...)    ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__)
 /* Additional scan messages */
-#define dbg_scan(fmt, ...)  dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__)
+#define dbg_scan(fmt, ...)  ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__)
 /* Additional recovery messages */
-#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__)
+#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
-/*
- * Debugging message type flags.
- *
- * UBIFS_MSG_GEN: general messages
- * UBIFS_MSG_JNL: journal messages
- * UBIFS_MSG_MNT: mount messages
- * UBIFS_MSG_CMT: commit messages
- * UBIFS_MSG_FIND: LEB find messages
- * UBIFS_MSG_BUDG: budgeting messages
- * UBIFS_MSG_GC: garbage collection messages
- * UBIFS_MSG_TNC: TNC messages
- * UBIFS_MSG_LP: lprops messages
- * UBIFS_MSG_IO: I/O messages
- * UBIFS_MSG_LOG: log messages
- * UBIFS_MSG_SCAN: scan messages
- * UBIFS_MSG_RCVRY: recovery messages
- */
-enum {
-        UBIFS_MSG_GEN   = 0x1,
-        UBIFS_MSG_JNL   = 0x2,
-        UBIFS_MSG_MNT   = 0x4,
-        UBIFS_MSG_CMT   = 0x8,
-        UBIFS_MSG_FIND  = 0x10,
-        UBIFS_MSG_BUDG  = 0x20,
-        UBIFS_MSG_GC    = 0x40,
-        UBIFS_MSG_TNC   = 0x80,
-        UBIFS_MSG_LP    = 0x100,
-        UBIFS_MSG_IO    = 0x200,
-        UBIFS_MSG_LOG   = 0x400,
-        UBIFS_MSG_SCAN  = 0x800,
-        UBIFS_MSG_RCVRY = 0x1000,
-};
 /*
 * Debugging check flags.
@@ -233,11 +186,9 @@ enum {
 /*
 * Special testing flags.
 *
- * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method
 * UBIFS_TST_RCVRY: failure mode for recovery testing
 */
 enum {
-        UBIFS_TST_FORCE_IN_THE_GAPS = 0x2,
        UBIFS_TST_RCVRY             = 0x4,
 };
@@ -262,7 +213,7 @@ void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum,
                       int offs);
 void dbg_dump_budget_req(const struct ubifs_budget_req *req);
 void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
-void dbg_dump_budg(struct ubifs_info *c);
+void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi);
 void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp);
 void dbg_dump_lprops(struct ubifs_info *c);
 void dbg_dump_lpt_info(struct ubifs_info *c);
@@ -304,18 +255,16 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head);
 int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head);
 /* Force the use of in-the-gaps method for testing */
+static inline int dbg_force_in_the_gaps_enabled(void)
-#define dbg_force_in_the_gaps_enabled \
+{
-        (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS)
+        return ubifs_chk_flags & UBIFS_CHK_GEN;
+}
 int dbg_force_in_the_gaps(void);
 /* Failure mode for recovery testing */
 #define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
 #ifndef UBIFS_DBG_PRESERVE_UBI
 #define ubi_leb_read   dbg_leb_read
 #define ubi_leb_write  dbg_leb_write
 #define ubi_leb_change dbg_leb_change
@@ -323,7 +272,6 @@ int dbg_force_in_the_gaps(void);
 #define ubi_leb_unmap  dbg_leb_unmap
 #define ubi_is_mapped  dbg_is_mapped
 #define ubi_leb_map    dbg_leb_map
 #endif
 int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
@@ -370,33 +318,33 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
                       __func__, __LINE__, current->pid);                      \
 } while (0)
-#define dbg_err(fmt, ...)   do {                                               \
+#define dbg_err(fmt, ...)   do {                   \
-        if (0)                                                                 \
+        if (0)                                     \
-                ubifs_err(fmt, ##__VA_ARGS__);                                 \
+                ubifs_err(fmt, ##__VA_ARGS__);     \
 } while (0)
-#define dbg_msg(fmt, ...) do {                                                 \
+#define ubifs_dbg_msg(fmt, ...) do {               \
-        if (0)                                                                 \
+        if (0)                                     \
-                printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n",         \
+                pr_debug(fmt "\n", ##__VA_ARGS__); \
-                       current->pid, __func__, ##__VA_ARGS__);                 \
 } while (0)
 #define dbg_dump_stack()
 #define ubifs_assert_cmt_locked(c)
-#define dbg_gen(fmt, ...)   dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_msg(fmt, ...)   ubifs_dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_jnl(fmt, ...)   dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_gen(fmt, ...)   ubifs_dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_tnc(fmt, ...)   dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_jnl(fmt, ...)   ubifs_dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_lp(fmt, ...)    dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_tnc(fmt, ...)   ubifs_dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_find(fmt, ...)  dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_lp(fmt, ...)    ubifs_dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_mnt(fmt, ...)   dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_find(fmt, ...)  ubifs_dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_io(fmt, ...)    dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_mnt(fmt, ...)   ubifs_dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_cmt(fmt, ...)   dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_io(fmt, ...)    ubifs_dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_budg(fmt, ...)  dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_cmt(fmt, ...)   ubifs_dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_log(fmt, ...)   dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_budg(fmt, ...)  ubifs_dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_gc(fmt, ...)    dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_log(fmt, ...)   ubifs_dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_scan(fmt, ...)  dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_gc(fmt, ...)    ubifs_dbg_msg(fmt, ##__VA_ARGS__)
-#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_scan(fmt, ...)  ubifs_dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
 #define DBGKEY(key)  ((char *)(key))
 #define DBGKEY1(key) ((char *)(key))
@@ -420,7 +368,9 @@ static inline void
 dbg_dump_budget_req(const struct ubifs_budget_req *req)           { return; }
 static inline void
 dbg_dump_lstats(const struct ubifs_lp_stats *lst)                 { return; }
-static inline void dbg_dump_budg(struct ubifs_info *c)            { return; }
+static inline void
+dbg_dump_budg(struct ubifs_info *c,
+              const struct ubifs_budg_info *bi)                   { return; }
 static inline void dbg_dump_lprop(const struct ubifs_info *c,
                                  const struct ubifs_lprops *lp)  { return; }
 static inline void dbg_dump_lprops(struct ubifs_info *c)          { return; }
@@ -482,8 +432,8 @@ dbg_check_nondata_nodes_order(struct ubifs_info *c,
                              struct list_head *head)             { return 0; }
 static inline int dbg_force_in_the_gaps(void)                     { return 0; }
-#define dbg_force_in_the_gaps_enabled 0
+#define dbg_force_in_the_gaps_enabled() 0
-#define dbg_failure_mode              0
+#define dbg_failure_mode                0
 static inline int dbg_debugfs_init(void)                          { return 0; }
 static inline void dbg_debugfs_exit(void)                         { return; }
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index d80810bb4c37..c2b80943560d 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -603,7 +603,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
                ubifs_release_budget(c, &req);
        else {
                /* We've deleted something - clean the "no space" flags */
-                c->nospace = c->nospace_rp = 0;
+                c->bi.nospace = c->bi.nospace_rp = 0;
                smp_wmb();
        }
        return 0;
@@ -695,7 +695,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
                ubifs_release_budget(c, &req);
        else {
                /* We've deleted something - clean the "no space" flags */
-                c->nospace = c->nospace_rp = 0;
+                c->bi.nospace = c->bi.nospace_rp = 0;
                smp_wmb();
        }
        return 0;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index b286db79c686..5e7fccfc4b29 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -212,7 +212,7 @@ static void release_new_page_budget(struct ubifs_info *c)
 */
 static void release_existing_page_budget(struct ubifs_info *c)
 {
-        struct ubifs_budget_req req = { .dd_growth = c->page_budget};
+        struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget};
        ubifs_release_budget(c, &req);
 }
@@ -971,11 +971,11 @@ static int do_writepage(struct page *page, int len)
 * the page locked, and it locks @ui_mutex. However, write-back does take inode
 * @i_mutex, which means other VFS operations may be run on this inode at the
 * same time. And the problematic one is truncation to smaller size, from where
- * we have to call 'truncate_setsize()', which first changes @inode->i_size, then
+ * we have to call 'truncate_setsize()', which first changes @inode->i_size,
- * drops the truncated pages. And while dropping the pages, it takes the page
+ * then drops the truncated pages. And while dropping the pages, it takes the
- * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with
+ * page lock. This means that 'do_truncation()' cannot call 'truncate_setsize()'
- * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This
+ * with @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'.
- * means that @inode->i_size is changed while @ui_mutex is unlocked.
+ * This means that @inode->i_size is changed while @ui_mutex is unlocked.
 *
 * XXX(truncate): with the new truncate sequence this is not true anymore,
 * and the calls to truncate_setsize can be move around freely.  They should
@@ -1189,7 +1189,7 @@ out_budg:
        if (budgeted)
                ubifs_release_budget(c, &req);
        else {
-                c->nospace = c->nospace_rp = 0;
+                c->bi.nospace = c->bi.nospace_rp = 0;
                smp_wmb();
        }
        return err;
@@ -1312,7 +1312,11 @@ int ubifs_fsync(struct file *file, int datasync)
        dbg_gen("syncing inode %lu", inode->i_ino);
-        if (inode->i_sb->s_flags & MS_RDONLY)
+        if (c->ro_mount)
+                /*
+                 * For some really strange reasons VFS does not filter out
+                 * 'fsync()' for R/O mounted file-systems as per 2.6.39.
+                 */
                return 0;
        /*
@@ -1432,10 +1436,11 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
 }
 /*
- * mmap()d file has taken write protection fault and is being made
+ * mmap()d file has taken write protection fault and is being made writable.
- * writable. UBIFS must ensure page is budgeted for.
+ * UBIFS must ensure page is budgeted for.
 */
-static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
+                                 struct vm_fault *vmf)
 {
        struct page *page = vmf->page;
        struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
@@ -1536,7 +1541,6 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
        int err;
-        /* 'generic_file_mmap()' takes care of NOMMU case */
        err = generic_file_mmap(file, vma);
        if (err)
                return err;
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 1d54383d1269..2559d174e004 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -252,8 +252,8 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
                 * But if the index takes fewer LEBs than it is reserved for it,
                 * this function must avoid picking those reserved LEBs.
                 */
-                if (c->min_idx_lebs >= c->lst.idx_lebs) {
+                if (c->bi.min_idx_lebs >= c->lst.idx_lebs) {
-                        rsvd_idx_lebs = c->min_idx_lebs -  c->lst.idx_lebs;
+                        rsvd_idx_lebs = c->bi.min_idx_lebs -  c->lst.idx_lebs;
                        exclude_index = 1;
                }
                spin_unlock(&c->space_lock);
@@ -276,7 +276,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
                        pick_free = 0;
        } else {
                spin_lock(&c->space_lock);
-                exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs);
+                exclude_index = (c->bi.min_idx_lebs >= c->lst.idx_lebs);
                spin_unlock(&c->space_lock);
        }
@@ -501,8 +501,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
        /* Check if there are enough empty LEBs for commit */
        spin_lock(&c->space_lock);
-        if (c->min_idx_lebs > c->lst.idx_lebs)
+        if (c->bi.min_idx_lebs > c->lst.idx_lebs)
-                rsvd_idx_lebs = c->min_idx_lebs -  c->lst.idx_lebs;
+                rsvd_idx_lebs = c->bi.min_idx_lebs -  c->lst.idx_lebs;
        else
                rsvd_idx_lebs = 0;
        lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 151f10882820..ded29f6224c2 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -100,6 +100,10 @@ static int switch_gc_head(struct ubifs_info *c)
        if (err)
                return err;
+        err = ubifs_wbuf_sync_nolock(wbuf);
+        if (err)
+                return err;
        err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0);
        if (err)
                return err;
@@ -118,7 +122,7 @@ static int switch_gc_head(struct ubifs_info *c)
 * This function compares data nodes @a and @b. Returns %1 if @a has greater
 * inode or block number, and %-1 otherwise.
 */
-int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
+static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
 {
        ino_t inuma, inumb;
        struct ubifs_info *c = priv;
@@ -161,7 +165,8 @@ int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
 * first and sorted by length in descending order. Directory entry nodes go
 * after inode nodes and are sorted in ascending hash valuer order.
 */
-int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
+static int nondata_nodes_cmp(void *priv, struct list_head *a,
+                             struct list_head *b)
 {
        ino_t inuma, inumb;
        struct ubifs_info *c = priv;
@@ -473,6 +478,37 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
        ubifs_assert(c->gc_lnum != lnum);
        ubifs_assert(wbuf->lnum != lnum);
+        if (lp->free + lp->dirty == c->leb_size) {
+                /* Special case - a free LEB  */
+                dbg_gc("LEB %d is free, return it", lp->lnum);
+                ubifs_assert(!(lp->flags & LPROPS_INDEX));
+                if (lp->free != c->leb_size) {
+                        /*
+                         * Write buffers must be sync'd before unmapping
+                         * freeable LEBs, because one of them may contain data
+                         * which obsoletes something in 'lp->pnum'.
+                         */
+                        err = gc_sync_wbufs(c);
+                        if (err)
+                                return err;
+                        err = ubifs_change_one_lp(c, lp->lnum, c->leb_size,
+                                                  0, 0, 0, 0);
+                        if (err)
+                                return err;
+                }
+                err = ubifs_leb_unmap(c, lp->lnum);
+                if (err)
+                        return err;
+                if (c->gc_lnum == -1) {
+                        c->gc_lnum = lnum;
+                        return LEB_RETAINED;
+                }
+                return LEB_FREED;
+        }
        /*
         * We scan the entire LEB even though we only really need to scan up to
         * (c->leb_size - lp->free).
@@ -682,37 +718,6 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
                       "(min. space %d)", lp.lnum, lp.free, lp.dirty,
                       lp.free + lp.dirty, min_space);
-                if (lp.free + lp.dirty == c->leb_size) {
-                        /* An empty LEB was returned */
-                        dbg_gc("LEB %d is free, return it", lp.lnum);
-                        /*
-                         * ubifs_find_dirty_leb() doesn't return freeable index
-                         * LEBs.
-                         */
-                        ubifs_assert(!(lp.flags & LPROPS_INDEX));
-                        if (lp.free != c->leb_size) {
-                                /*
-                                 * Write buffers must be sync'd before
-                                 * unmapping freeable LEBs, because one of them
-                                 * may contain data which obsoletes something
-                                 * in 'lp.pnum'.
-                                 */
-                                ret = gc_sync_wbufs(c);
-                                if (ret)
-                                        goto out;
-                                ret = ubifs_change_one_lp(c, lp.lnum,
-                                                          c->leb_size, 0, 0, 0,
-                                                          0);
-                                if (ret)
-                                        goto out;
-                        }
-                        ret = ubifs_leb_unmap(c, lp.lnum);
-                        if (ret)
-                                goto out;
-                        ret = lp.lnum;
-                        break;
-                }
                space_before = c->leb_size - wbuf->offs - wbuf->used;
                if (wbuf->lnum == -1)
                        space_before = 0;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index dfd168b7807e..166951e0dcd3 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -393,7 +393,7 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
        ubifs_assert(wbuf->size % c->min_io_size == 0);
        ubifs_assert(!c->ro_media && !c->ro_mount);
        if (c->leb_size - wbuf->offs >= c->max_write_size)
-                ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size ));
+                ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
        if (c->ro_error)
                return -EROFS;
@@ -452,8 +452,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
 * @dtype: data type
 *
 * This function targets the write-buffer to logical eraseblock @lnum:@offs.
- * The write-buffer is synchronized if it is not empty. Returns zero in case of
+ * The write-buffer has to be empty. Returns zero in case of success and a
- * success and a negative error code in case of failure.
+ * negative error code in case of failure.
 */
 int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
                           int dtype)
@@ -465,13 +465,7 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
        ubifs_assert(offs >= 0 && offs <= c->leb_size);
        ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
        ubifs_assert(lnum != wbuf->lnum);
+        ubifs_assert(wbuf->used == 0);
-        if (wbuf->used > 0) {
-                int err = ubifs_wbuf_sync_nolock(wbuf);
-                if (err)
-                        return err;
-        }
        spin_lock(&wbuf->lock);
        wbuf->lnum = lnum;
@@ -573,7 +567,7 @@ out_timers:
 int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
 {
        struct ubifs_info *c = wbuf->c;
-        int err, written, n, aligned_len = ALIGN(len, 8), offs;
+        int err, written, n, aligned_len = ALIGN(len, 8);
        dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len,
               dbg_ntype(((struct ubifs_ch *)buf)->node_type),
@@ -588,7 +582,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
        ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
        ubifs_assert(!c->ro_media && !c->ro_mount);
        if (c->leb_size - wbuf->offs >= c->max_write_size)
-                ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size ));
+                ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
        if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
                err = -ENOSPC;
@@ -636,7 +630,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
                goto exit;
        }
-        offs = wbuf->offs;
        written = 0;
        if (wbuf->used) {
@@ -653,7 +646,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
                if (err)
                        goto out;
-                offs += wbuf->size;
+                wbuf->offs += wbuf->size;
                len -= wbuf->avail;
                aligned_len -= wbuf->avail;
                written += wbuf->avail;
@@ -672,7 +665,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
                if (err)
                        goto out;
-                offs += wbuf->size;
+                wbuf->offs += wbuf->size;
                len -= wbuf->size;
                aligned_len -= wbuf->size;
                written += wbuf->size;
@@ -687,12 +680,13 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
        n = aligned_len >> c->max_write_shift;
        if (n) {
                n <<= c->max_write_shift;
-                dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs);
+                dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum,
-                err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n,
+                       wbuf->offs);
-                                    wbuf->dtype);
+                err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written,
+                                    wbuf->offs, n, wbuf->dtype);
                if (err)
                        goto out;
-                offs += n;
+                wbuf->offs += n;
                aligned_len -= n;
                len -= n;
                written += n;
@@ -707,7 +701,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
                 */
                memcpy(wbuf->buf, buf + written, len);
-        wbuf->offs = offs;
        if (c->leb_size - wbuf->offs >= c->max_write_size)
                wbuf->size = c->max_write_size;
        else
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index aed25e864227..34b1679e6e3a 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -141,14 +141,8 @@ again:
         * LEB with some empty space.
         */
        lnum = ubifs_find_free_space(c, len, &offs, squeeze);
-        if (lnum >= 0) {
+        if (lnum >= 0)
-                /* Found an LEB, add it to the journal head */
-                err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
-                if (err)
-                        goto out_return;
-                /* A new bud was successfully allocated and added to the log */
                goto out;
-        }
        err = lnum;
        if (err != -ENOSPC)
@@ -203,12 +197,23 @@ again:
                return 0;
        }
-        err = ubifs_add_bud_to_log(c, jhead, lnum, 0);
-        if (err)
-                goto out_return;
        offs = 0;
 out:
+        /*
+         * Make sure we synchronize the write-buffer before we add the new bud
+         * to the log. Otherwise we may have a power cut after the log
+         * reference node for the last bud (@lnum) is written but before the
+         * write-buffer data are written to the next-to-last bud
+         * (@wbuf->lnum). And the effect would be that the recovery would see
+         * that there is corruption in the next-to-last bud.
+         */
+        err = ubifs_wbuf_sync_nolock(wbuf);
+        if (err)
+                goto out_return;
+        err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
+        if (err)
+                goto out_return;
        err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype);
        if (err)
                goto out_unlock;
@@ -380,10 +385,8 @@ out:
        if (err == -ENOSPC) {
                /* This are some budgeting problems, print useful information */
                down_write(&c->commit_sem);
-                spin_lock(&c->space_lock);
                dbg_dump_stack();
-                dbg_dump_budg(c);
+                dbg_dump_budg(c, &c->bi);
-                spin_unlock(&c->space_lock);
                dbg_dump_lprops(c);
                cmt_retries = dbg_check_lprops(c);
                up_write(&c->commit_sem);
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 4d0cb1241460..affea9494ae2 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -100,20 +100,6 @@ struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum)
 }
 /**
- * next_log_lnum - switch to the next log LEB.
- * @c: UBIFS file-system description object
- * @lnum: current log LEB
- */
-static inline int next_log_lnum(const struct ubifs_info *c, int lnum)
-{
-        lnum += 1;
-        if (lnum > c->log_last)
-                lnum = UBIFS_LOG_LNUM;
-        return lnum;
-}
-/**
 * empty_log_bytes - calculate amount of empty space in the log.
 * @c: UBIFS file-system description object
 */
@@ -175,26 +161,6 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
 }
 /**
- * ubifs_create_buds_lists - create journal head buds lists for remount rw.
- * @c: UBIFS file-system description object
- */
-void ubifs_create_buds_lists(struct ubifs_info *c)
-{
-        struct rb_node *p;
-        spin_lock(&c->buds_lock);
-        p = rb_first(&c->buds);
-        while (p) {
-                struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb);
-                struct ubifs_jhead *jhead = &c->jheads[bud->jhead];
-                list_add_tail(&bud->list, &jhead->buds_list);
-                p = rb_next(p);
-        }
-        spin_unlock(&c->buds_lock);
-}
-/**
 * ubifs_add_bud_to_log - add a new bud to the log.
 * @c: UBIFS file-system description object
 * @jhead: journal head the bud belongs to
@@ -277,7 +243,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
        ref->jhead = cpu_to_le32(jhead);
        if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
-                c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
+                c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
                c->lhead_offs = 0;
        }
@@ -445,7 +411,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
        /* Switch to the next log LEB */
        if (c->lhead_offs) {
-                c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
+                c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
                c->lhead_offs = 0;
        }
@@ -466,7 +432,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
        c->lhead_offs += len;
        if (c->lhead_offs == c->leb_size) {
-                c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
+                c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
                c->lhead_offs = 0;
        }
@@ -553,7 +519,7 @@ int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum)
        }
        mutex_lock(&c->log_mutex);
        for (lnum = old_ltail_lnum; lnum != c->ltail_lnum;
-             lnum = next_log_lnum(c, lnum)) {
+             lnum = ubifs_next_log_lnum(c, lnum)) {
                dbg_log("unmap log LEB %d", lnum);
                err = ubifs_leb_unmap(c, lnum);
                if (err)
@@ -662,7 +628,7 @@ static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs,
                err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM);
                if (err)
                        return err;
-                *lnum = next_log_lnum(c, *lnum);
+                *lnum = ubifs_next_log_lnum(c, *lnum);
                *offs = 0;
        }
        memcpy(buf + *offs, node, len);
@@ -732,7 +698,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
                ubifs_scan_destroy(sleb);
                if (lnum == c->lhead_lnum)
                        break;
-                lnum = next_log_lnum(c, lnum);
+                lnum = ubifs_next_log_lnum(c, lnum);
        }
        if (offs) {
                int sz = ALIGN(offs, c->min_io_size);
@@ -752,7 +718,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
        /* Unmap remaining LEBs */
        lnum = write_lnum;
        do {
-                lnum = next_log_lnum(c, lnum);
+                lnum = ubifs_next_log_lnum(c, lnum);
                err = ubifs_leb_unmap(c, lnum);
                if (err)
                        return err;
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 0ee0847f2421..667884f4a615 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -1007,21 +1007,11 @@ out:
 }
 /**
- * struct scan_check_data - data provided to scan callback function.
- * @lst: LEB properties statistics
- * @err: error code
- */
-struct scan_check_data {
-        struct ubifs_lp_stats lst;
-        int err;
-};
-/**
 * scan_check_cb - scan callback.
 * @c: the UBIFS file-system description object
 * @lp: LEB properties to scan
 * @in_tree: whether the LEB properties are in main memory
- * @data: information passed to and from the caller of the scan
+ * @lst: lprops statistics to update
 *
 * This function returns a code that indicates whether the scan should continue
 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
@@ -1030,11 +1020,10 @@ struct scan_check_data {
 */
 static int scan_check_cb(struct ubifs_info *c,
                         const struct ubifs_lprops *lp, int in_tree,
-                         struct scan_check_data *data)
+                         struct ubifs_lp_stats *lst)
 {
        struct ubifs_scan_leb *sleb;
        struct ubifs_scan_node *snod;
-        struct ubifs_lp_stats *lst = &data->lst;
        int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret;
        void *buf = NULL;
@@ -1044,7 +1033,7 @@ static int scan_check_cb(struct ubifs_info *c,
                if (cat != (lp->flags & LPROPS_CAT_MASK)) {
                        ubifs_err("bad LEB category %d expected %d",
                                  (lp->flags & LPROPS_CAT_MASK), cat);
-                        goto out;
+                        return -EINVAL;
                }
        }
@@ -1078,7 +1067,7 @@ static int scan_check_cb(struct ubifs_info *c,
                        }
                        if (!found) {
                                ubifs_err("bad LPT list (category %d)", cat);
-                                goto out;
+                                return -EINVAL;
                        }
                }
        }
@@ -1090,45 +1079,40 @@ static int scan_check_cb(struct ubifs_info *c,
                if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) ||
                    lp != heap->arr[lp->hpos]) {
                        ubifs_err("bad LPT heap (category %d)", cat);
-                        goto out;
+                        return -EINVAL;
                }
        }
        buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
-        if (!buf) {
+        if (!buf)
-                ubifs_err("cannot allocate memory to scan LEB %d", lnum);
+                return -ENOMEM;
-                goto out;
+        /*
+         * After an unclean unmount, empty and freeable LEBs
+         * may contain garbage - do not scan them.
+         */
+        if (lp->free == c->leb_size) {
+                lst->empty_lebs += 1;
+                lst->total_free += c->leb_size;
+                lst->total_dark += ubifs_calc_dark(c, c->leb_size);
+                return LPT_SCAN_CONTINUE;
+        }
+        if (lp->free + lp->dirty == c->leb_size &&
+            !(lp->flags & LPROPS_INDEX)) {
+                lst->total_free  += lp->free;
+                lst->total_dirty += lp->dirty;
+                lst->total_dark  +=  ubifs_calc_dark(c, c->leb_size);
+                return LPT_SCAN_CONTINUE;
        }
        sleb = ubifs_scan(c, lnum, 0, buf, 0);
        if (IS_ERR(sleb)) {
-                /*
+                ret = PTR_ERR(sleb);
-                 * After an unclean unmount, empty and freeable LEBs
+                if (ret == -EUCLEAN) {
-                 * may contain garbage.
+                        dbg_dump_lprops(c);
-                 */
+                        dbg_dump_budg(c, &c->bi);
-                if (lp->free == c->leb_size) {
-                        ubifs_err("scan errors were in empty LEB "
-                                  "- continuing checking");
-                        lst->empty_lebs += 1;
-                        lst->total_free += c->leb_size;
-                        lst->total_dark += ubifs_calc_dark(c, c->leb_size);
-                        ret = LPT_SCAN_CONTINUE;
-                        goto exit;
-                }
-                if (lp->free + lp->dirty == c->leb_size &&
-                    !(lp->flags & LPROPS_INDEX)) {
-                        ubifs_err("scan errors were in freeable LEB "
-                                  "- continuing checking");
-                        lst->total_free  += lp->free;
-                        lst->total_dirty += lp->dirty;
-                        lst->total_dark  +=  ubifs_calc_dark(c, c->leb_size);
-                        ret = LPT_SCAN_CONTINUE;
-                        goto exit;
                }
-                data->err = PTR_ERR(sleb);
+                goto out;
-                ret = LPT_SCAN_STOP;
-                goto exit;
        }
        is_idx = -1;
@@ -1246,10 +1230,8 @@ static int scan_check_cb(struct ubifs_info *c,
        }
        ubifs_scan_destroy(sleb);
-        ret = LPT_SCAN_CONTINUE;
-exit:
        vfree(buf);
-        return ret;
+        return LPT_SCAN_CONTINUE;
 out_print:
        ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, "
@@ -1258,10 +1240,10 @@ out_print:
        dbg_dump_leb(c, lnum);
 out_destroy:
        ubifs_scan_destroy(sleb);
+        ret = -EINVAL;
 out:
        vfree(buf);
-        data->err = -EINVAL;
+        return ret;
-        return LPT_SCAN_STOP;
 }
 /**
@@ -1278,8 +1260,7 @@ out:
 int dbg_check_lprops(struct ubifs_info *c)
 {
        int i, err;
-        struct scan_check_data data;
+        struct ubifs_lp_stats lst;
-        struct ubifs_lp_stats *lst = &data.lst;
        if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
                return 0;
@@ -1294,29 +1275,23 @@ int dbg_check_lprops(struct ubifs_info *c)
                        return err;
        }
-        memset(lst, 0, sizeof(struct ubifs_lp_stats));
+        memset(&lst, 0, sizeof(struct ubifs_lp_stats));
-        data.err = 0;
        err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1,
                                    (ubifs_lpt_scan_callback)scan_check_cb,
-                                    &data);
+                                    &lst);
        if (err && err != -ENOSPC)
                goto out;
-        if (data.err) {
-                err = data.err;
-                goto out;
-        }
-        if (lst->empty_lebs != c->lst.empty_lebs ||
+        if (lst.empty_lebs != c->lst.empty_lebs ||
-            lst->idx_lebs != c->lst.idx_lebs ||
+            lst.idx_lebs != c->lst.idx_lebs ||
-            lst->total_free != c->lst.total_free ||
+            lst.total_free != c->lst.total_free ||
-            lst->total_dirty != c->lst.total_dirty ||
+            lst.total_dirty != c->lst.total_dirty ||
-            lst->total_used != c->lst.total_used) {
+            lst.total_used != c->lst.total_used) {
                ubifs_err("bad overall accounting");
                ubifs_err("calculated: empty_lebs %d, idx_lebs %d, "
                          "total_free %lld, total_dirty %lld, total_used %lld",
-                          lst->empty_lebs, lst->idx_lebs, lst->total_free,
+                          lst.empty_lebs, lst.idx_lebs, lst.total_free,
-                          lst->total_dirty, lst->total_used);
+                          lst.total_dirty, lst.total_used);
                ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, "
                          "total_free %lld, total_dirty %lld, total_used %lld",
                          c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free,
@@ -1325,11 +1300,11 @@ int dbg_check_lprops(struct ubifs_info *c)
                goto out;
        }
-        if (lst->total_dead != c->lst.total_dead ||
+        if (lst.total_dead != c->lst.total_dead ||
-            lst->total_dark != c->lst.total_dark) {
+            lst.total_dark != c->lst.total_dark) {
                ubifs_err("bad dead/dark space accounting");
                ubifs_err("calculated: total_dead %lld, total_dark %lld",
-                          lst->total_dead, lst->total_dark);
+                          lst.total_dead, lst.total_dark);
                ubifs_err("read from lprops: total_dead %lld, total_dark %lld",
                          c->lst.total_dead, c->lst.total_dark);
                err = -EINVAL;
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 0c9c69bd983a..dfcb5748a7dc 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -29,6 +29,12 @@
 #include <linux/slab.h>
 #include "ubifs.h"
+#ifdef CONFIG_UBIFS_FS_DEBUG
+static int dbg_populate_lsave(struct ubifs_info *c);
+#else
+#define dbg_populate_lsave(c) 0
+#endif
 /**
 * first_dirty_cnode - find first dirty cnode.
 * @c: UBIFS file-system description object
@@ -586,7 +592,7 @@ static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c,
                        if (nnode->nbranch[iip].lnum)
                                break;
                }
-       } while (iip >= UBIFS_LPT_FANOUT);
+        } while (iip >= UBIFS_LPT_FANOUT);
        /* Go right */
        nnode = ubifs_get_nnode(c, nnode, iip);
@@ -815,6 +821,10 @@ static void populate_lsave(struct ubifs_info *c)
                c->lpt_drty_flgs |= LSAVE_DIRTY;
                ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz);
        }
+        if (dbg_populate_lsave(c))
+                return;
        list_for_each_entry(lprops, &c->empty_list, list) {
                c->lsave[cnt++] = lprops->lnum;
                if (cnt >= c->lsave_cnt)
@@ -1994,4 +2004,47 @@ void dbg_dump_lpt_lebs(const struct ubifs_info *c)
               current->pid);
 }
+/**
+ * dbg_populate_lsave - debugging version of 'populate_lsave()'
+ * @c: UBIFS file-system description object
+ *
+ * This is a debugging version for 'populate_lsave()' which populates lsave
+ * with random LEBs instead of useful LEBs, which is good for test coverage.
+ * Returns zero if lsave has not been populated (this debugging feature is
+ * disabled) an non-zero if lsave has been populated.
+ */
+static int dbg_populate_lsave(struct ubifs_info *c)
+{
+        struct ubifs_lprops *lprops;
+        struct ubifs_lpt_heap *heap;
+        int i;
+        if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+                return 0;
+        if (random32() & 3)
+                return 0;
+        for (i = 0; i < c->lsave_cnt; i++)
+                c->lsave[i] = c->main_first;
+        list_for_each_entry(lprops, &c->empty_list, list)
+                c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
+        list_for_each_entry(lprops, &c->freeable_list, list)
+                c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
+        list_for_each_entry(lprops, &c->frdi_idx_list, list)
+                c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
+        heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
+        for (i = 0; i < heap->cnt; i++)
+                c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
+        heap = &c->lpt_heap[LPROPS_DIRTY - 1];
+        for (i = 0; i < heap->cnt; i++)
+                c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
+        heap = &c->lpt_heap[LPROPS_FREE - 1];
+        for (i = 0; i < heap->cnt; i++)
+                c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
+        return 1;
+}
 #endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index 21f47afdacff..278c2382e8c2 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -148,7 +148,7 @@ static int validate_master(const struct ubifs_info *c)
        }
        main_sz = (long long)c->main_lebs * c->leb_size;
-        if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) {
+        if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) {
                err = 9;
                goto out;
        }
@@ -218,7 +218,7 @@ static int validate_master(const struct ubifs_info *c)
        }
        if (c->lst.total_dead + c->lst.total_dark +
-            c->lst.total_used + c->old_idx_sz > main_sz) {
+            c->lst.total_used + c->bi.old_idx_sz > main_sz) {
                err = 21;
                goto out;
        }
@@ -286,7 +286,7 @@ int ubifs_read_master(struct ubifs_info *c)
        c->gc_lnum         = le32_to_cpu(c->mst_node->gc_lnum);
        c->ihead_lnum      = le32_to_cpu(c->mst_node->ihead_lnum);
        c->ihead_offs      = le32_to_cpu(c->mst_node->ihead_offs);
-        c->old_idx_sz      = le64_to_cpu(c->mst_node->index_size);
+        c->bi.old_idx_sz   = le64_to_cpu(c->mst_node->index_size);
        c->lpt_lnum        = le32_to_cpu(c->mst_node->lpt_lnum);
        c->lpt_offs        = le32_to_cpu(c->mst_node->lpt_offs);
        c->nhead_lnum      = le32_to_cpu(c->mst_node->nhead_lnum);
@@ -305,7 +305,7 @@ int ubifs_read_master(struct ubifs_info *c)
        c->lst.total_dead  = le64_to_cpu(c->mst_node->total_dead);
        c->lst.total_dark  = le64_to_cpu(c->mst_node->total_dark);
-        c->calc_idx_sz = c->old_idx_sz;
+        c->calc_idx_sz = c->bi.old_idx_sz;
        if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS))
                c->no_orphs = 1;
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index c3de04dc952a..0b5296a9a4c5 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -340,4 +340,21 @@ static inline void ubifs_release_lprops(struct ubifs_info *c)
        mutex_unlock(&c->lp_mutex);
 }
+/**
+ * ubifs_next_log_lnum - switch to the next log LEB.
+ * @c: UBIFS file-system description object
+ * @lnum: current log LEB
+ *
+ * This helper function returns the log LEB number which goes next after LEB
+ * 'lnum'.
+ */
+static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum)
+{
+        lnum += 1;
+        if (lnum > c->log_last)
+                lnum = UBIFS_LOG_LNUM;
+        return lnum;
+}
 #endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 09df318e368f..bd644bf587a8 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -673,7 +673,8 @@ static int kill_orphans(struct ubifs_info *c)
                sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
                if (IS_ERR(sleb)) {
                        if (PTR_ERR(sleb) == -EUCLEAN)
-                                sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0);
+                                sleb = ubifs_recover_leb(c, lnum, 0,
+                                                         c->sbuf, 0);
                        if (IS_ERR(sleb)) {
                                err = PTR_ERR(sleb);
                                break;
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 3dbad6fbd1eb..731d9e2e7b50 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -564,13 +564,16 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
 }
 /**
- * drop_incomplete_group - drop nodes from an incomplete group.
+ * drop_last_node - drop the last node or group of nodes.
 * @sleb: scanned LEB information
 * @offs: offset of dropped nodes is returned here
+ * @grouped: non-zero if whole group of nodes have to be dropped
 *
- * This function returns %1 if nodes are dropped and %0 otherwise.
+ * This is a helper function for 'ubifs_recover_leb()' which drops the last
+ * node of the scanned LEB or the last group of nodes if @grouped is not zero.
+ * This function returns %1 if a node was dropped and %0 otherwise.
 */
-static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
+static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped)
 {
        int dropped = 0;
@@ -589,6 +592,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
                kfree(snod);
                sleb->nodes_cnt -= 1;
                dropped = 1;
+                if (!grouped)
+                        break;
        }
        return dropped;
 }
@@ -609,8 +614,7 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
 struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
                                         int offs, void *sbuf, int grouped)
 {
-        int err, len = c->leb_size - offs, need_clean = 0, quiet = 1;
+        int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
-        int empty_chkd = 0, start = offs;
        struct ubifs_scan_leb *sleb;
        void *buf = sbuf + offs;
@@ -620,12 +624,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
        if (IS_ERR(sleb))
                return sleb;
-        if (sleb->ecc)
+        ubifs_assert(len >= 8);
-                need_clean = 1;
        while (len >= 8) {
-                int ret;
                dbg_scan("look at LEB %d:%d (%d bytes left)",
                         lnum, offs, len);
@@ -635,8 +635,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
                 * Scan quietly until there is an error from which we cannot
                 * recover
                 */
-                ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
+                ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
                if (ret == SCANNED_A_NODE) {
                        /* A valid node, and not a padding node */
                        struct ubifs_ch *ch = buf;
@@ -649,70 +648,32 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
                        offs += node_len;
                        buf += node_len;
                        len -= node_len;
-                        continue;
+                } else if (ret > 0) {
-                }
-                if (ret > 0) {
                        /* Padding bytes or a valid padding node */
                        offs += ret;
                        buf += ret;
                        len -= ret;
-                        continue;
+                } else if (ret == SCANNED_EMPTY_SPACE ||
-                }
+                           ret == SCANNED_GARBAGE     ||
+                           ret == SCANNED_A_BAD_PAD_NODE ||
-                if (ret == SCANNED_EMPTY_SPACE) {
+                           ret == SCANNED_A_CORRUPT_NODE) {
-                        if (!is_empty(buf, len)) {
+                        dbg_rcvry("found corruption - %d", ret);
-                                if (!is_last_write(c, buf, offs))
-                                        break;
-                                clean_buf(c, &buf, lnum, &offs, &len);
-                                need_clean = 1;
-                        }
-                        empty_chkd = 1;
                        break;
-                }
+                } else {
+                        dbg_err("unexpected return value %d", ret);
-                if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
-                        if (is_last_write(c, buf, offs)) {
-                                clean_buf(c, &buf, lnum, &offs, &len);
-                                need_clean = 1;
-                                empty_chkd = 1;
-                                break;
-                        }
-                if (ret == SCANNED_A_CORRUPT_NODE)
-                        if (no_more_nodes(c, buf, len, lnum, offs)) {
-                                clean_buf(c, &buf, lnum, &offs, &len);
-                                need_clean = 1;
-                                empty_chkd = 1;
-                                break;
-                        }
-                if (quiet) {
-                        /* Redo the last scan but noisily */
-                        quiet = 0;
-                        continue;
-                }
-                switch (ret) {
-                case SCANNED_GARBAGE:
-                        dbg_err("garbage");
-                        goto corrupted;
-                case SCANNED_A_CORRUPT_NODE:
-                case SCANNED_A_BAD_PAD_NODE:
-                        dbg_err("bad node");
-                        goto corrupted;
-                default:
-                        dbg_err("unknown");
                        err = -EINVAL;
                        goto error;
                }
        }
-        if (!empty_chkd && !is_empty(buf, len)) {
+        if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) {
-                if (is_last_write(c, buf, offs)) {
+                if (!is_last_write(c, buf, offs))
-                        clean_buf(c, &buf, lnum, &offs, &len);
+                        goto corrupted_rescan;
-                        need_clean = 1;
+        } else if (ret == SCANNED_A_CORRUPT_NODE) {
-                } else {
+                if (!no_more_nodes(c, buf, len, lnum, offs))
+                        goto corrupted_rescan;
+        } else if (!is_empty(buf, len)) {
+                if (!is_last_write(c, buf, offs)) {
                        int corruption = first_non_ff(buf, len);
                        /*
@@ -728,29 +689,82 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
                }
        }
-        /* Drop nodes from incomplete group */
+        min_io_unit = round_down(offs, c->min_io_size);
-        if (grouped && drop_incomplete_group(sleb, &offs)) {
+        if (grouped)
-                buf = sbuf + offs;
+                /*
-                len = c->leb_size - offs;
+                 * If nodes are grouped, always drop the incomplete group at
-                clean_buf(c, &buf, lnum, &offs, &len);
+                 * the end.
-                need_clean = 1;
+                 */
-        }
+                drop_last_node(sleb, &offs, 1);
-        if (offs % c->min_io_size) {
+        /*
-                clean_buf(c, &buf, lnum, &offs, &len);
+         * While we are in the middle of the same min. I/O unit keep dropping
-                need_clean = 1;
+         * nodes. So basically, what we want is to make sure that the last min.
-        }
+         * I/O unit where we saw the corruption is dropped completely with all
+         * the uncorrupted node which may possibly sit there.
+         *
+         * In other words, let's name the min. I/O unit where the corruption
+         * starts B, and the previous min. I/O unit A. The below code tries to
+         * deal with a situation when half of B contains valid nodes or the end
+         * of a valid node, and the second half of B contains corrupted data or
+         * garbage. This means that UBIFS had been writing to B just before the
+         * power cut happened. I do not know how realistic is this scenario
+         * that half of the min. I/O unit had been written successfully and the
+         * other half not, but this is possible in our 'failure mode emulation'
+         * infrastructure at least.
+         *
+         * So what is the problem, why we need to drop those nodes? Whey can't
+         * we just clean-up the second half of B by putting a padding node
+         * there? We can, and this works fine with one exception which was
+         * reproduced with power cut emulation testing and happens extremely
+         * rarely. The description follows, but it is worth noting that that is
+         * only about the GC head, so we could do this trick only if the bud
+         * belongs to the GC head, but it does not seem to be worth an
+         * additional "if" statement.
+         *
+         * So, imagine the file-system is full, we run GC which is moving valid
+         * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head
+         * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X
+         * and will try to continue. Imagine that LEB X is currently the
+         * dirtiest LEB, and the amount of used space in LEB Y is exactly the
+         * same as amount of free space in LEB X.
+         *
+         * And a power cut happens when nodes are moved from LEB X to LEB Y. We
+         * are here trying to recover LEB Y which is the GC head LEB. We find
+         * the min. I/O unit B as described above. Then we clean-up LEB Y by
+         * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function
+         * fails, because it cannot find a dirty LEB which could be GC'd into
+         * LEB Y! Even LEB X does not match because the amount of valid nodes
+         * there does not fit the free space in LEB Y any more! And this is
+         * because of the padding node which we added to LEB Y. The
+         * user-visible effect of this which I once observed and analysed is
+         * that we cannot mount the file-system with -ENOSPC error.
+         *
+         * So obviously, to make sure that situation does not happen we should
+         * free min. I/O unit B in LEB Y completely and the last used min. I/O
+         * unit in LEB Y should be A. This is basically what the below code
+         * tries to do.
+         */
+        while (min_io_unit == round_down(offs, c->min_io_size) &&
+               min_io_unit != offs &&
+               drop_last_node(sleb, &offs, grouped));
+        buf = sbuf + offs;
+        len = c->leb_size - offs;
+        clean_buf(c, &buf, lnum, &offs, &len);
        ubifs_end_scan(c, sleb, lnum, offs);
-        if (need_clean) {
+        err = fix_unclean_leb(c, sleb, start);
-                err = fix_unclean_leb(c, sleb, start);
+        if (err)
-                if (err)
+                goto error;
-                        goto error;
-        }
        return sleb;
+corrupted_rescan:
+        /* Re-scan the corrupted data with verbose messages */
+        dbg_err("corruptio %d", ret);
+        ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
 corrupted:
        ubifs_scanned_corruption(c, lnum, offs, buf);
        err = -EUCLEAN;
@@ -1070,6 +1084,53 @@ int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
 }
 /**
+ * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit.
+ * @c: UBIFS file-system description object
+ *
+ * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty
+ * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns
+ * zero in case of success and a negative error code in case of failure.
+ */
+static int grab_empty_leb(struct ubifs_info *c)
+{
+        int lnum, err;
+        /*
+         * Note, it is very important to first search for an empty LEB and then
+         * run the commit, not vice-versa. The reason is that there might be
+         * only one empty LEB at the moment, the one which has been the
+         * @c->gc_lnum just before the power cut happened. During the regular
+         * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no
+         * one but GC can grab it. But at this moment this single empty LEB is
+         * not marked as taken, so if we run commit - what happens? Right, the
+         * commit will grab it and write the index there. Remember that the
+         * index always expands as long as there is free space, and it only
+         * starts consolidating when we run out of space.
+         *
+         * IOW, if we run commit now, we might not be able to find a free LEB
+         * after this.
+         */
+        lnum = ubifs_find_free_leb_for_idx(c);
+        if (lnum < 0) {
+                dbg_err("could not find an empty LEB");
+                dbg_dump_lprops(c);
+                dbg_dump_budg(c, &c->bi);
+                return lnum;
+        }
+        /* Reset the index flag */
+        err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
+                                  LPROPS_INDEX, 0);
+        if (err)
+                return err;
+        c->gc_lnum = lnum;
+        dbg_rcvry("found empty LEB %d, run commit", lnum);
+        return ubifs_run_commit(c);
+}
+/**
 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
 * @c: UBIFS file-system description object
 *
@@ -1091,71 +1152,26 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
 {
        struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
        struct ubifs_lprops lp;
-        int lnum, err;
+        int err;
+        dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs);
        c->gc_lnum = -1;
-        if (wbuf->lnum == -1) {
+        if (wbuf->lnum == -1 || wbuf->offs == c->leb_size)
-                dbg_rcvry("no GC head LEB");
+                return grab_empty_leb(c);
-                goto find_free;
-        }
-        /*
-         * See whether the used space in the dirtiest LEB fits in the GC head
-         * LEB.
-         */
-        if (wbuf->offs == c->leb_size) {
-                dbg_rcvry("no room in GC head LEB");
-                goto find_free;
-        }
        err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
        if (err) {
-                /*
+                if (err != -ENOSPC)
-                 * There are no dirty or empty LEBs subject to here being
-                 * enough for the index. Try to use
-                 * 'ubifs_find_free_leb_for_idx()', which will return any empty
-                 * LEBs (ignoring index requirements). If the index then
-                 * doesn't have enough LEBs the recovery commit will fail -
-                 * which is the  same result anyway i.e. recovery fails. So
-                 * there is no problem ignoring index  requirements and just
-                 * grabbing a free LEB since we have already established there
-                 * is not a dirty LEB we could have used instead.
-                 */
-                if (err == -ENOSPC) {
-                        dbg_rcvry("could not find a dirty LEB");
-                        goto find_free;
-                }
-                return err;
-        }
-        ubifs_assert(!(lp.flags & LPROPS_INDEX));
-        lnum = lp.lnum;
-        if (lp.free + lp.dirty == c->leb_size) {
-                /* An empty LEB was returned */
-                if (lp.free != c->leb_size) {
-                        err = ubifs_change_one_lp(c, lnum, c->leb_size,
-                                                  0, 0, 0, 0);
-                        if (err)
-                                return err;
-                }
-                err = ubifs_leb_unmap(c, lnum);
-                if (err)
                        return err;
-                c->gc_lnum = lnum;
-                dbg_rcvry("allocated LEB %d for GC", lnum);
+                dbg_rcvry("could not find a dirty LEB");
-                /* Run the commit */
+                return grab_empty_leb(c);
-                dbg_rcvry("committing");
-                return ubifs_run_commit(c);
-        }
-        /*
-         * There was no empty LEB so the used space in the dirtiest LEB must fit
-         * in the GC head LEB.
-         */
-        if (lp.free + lp.dirty < wbuf->offs) {
-                dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d",
-                          lnum, wbuf->lnum, wbuf->offs);
-                err = ubifs_return_leb(c, lnum);
-                if (err)
-                        return err;
-                goto find_free;
        }
+        ubifs_assert(!(lp.flags & LPROPS_INDEX));
+        ubifs_assert(lp.free + lp.dirty >= wbuf->offs);
        /*
         * We run the commit before garbage collection otherwise subsequent
         * mounts will see the GC and orphan deletion in a different order.
@@ -1164,11 +1180,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
        err = ubifs_run_commit(c);
        if (err)
                return err;
-        /*
-         * The data in the dirtiest LEB fits in the GC head LEB, so do the GC
+        dbg_rcvry("GC'ing LEB %d", lp.lnum);
-         * - use locking to keep 'ubifs_assert()' happy.
-         */
-        dbg_rcvry("GC'ing LEB %d", lnum);
        mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
        err = ubifs_garbage_collect_leb(c, &lp);
        if (err >= 0) {
@@ -1184,37 +1197,17 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
                        err = -EINVAL;
                return err;
        }
-        if (err != LEB_RETAINED) {
-                dbg_err("GC returned %d", err);
+        ubifs_assert(err == LEB_RETAINED);
+        if (err != LEB_RETAINED)
                return -EINVAL;
-        }
        err = ubifs_leb_unmap(c, c->gc_lnum);
        if (err)
                return err;
-        dbg_rcvry("allocated LEB %d for GC", lnum);
-        return 0;
-find_free:
+        dbg_rcvry("allocated LEB %d for GC", lp.lnum);
-        /*
+        return 0;
-         * There is no GC head LEB or the free space in the GC head LEB is too
-         * small, or there are not dirty LEBs. Allocate gc_lnum by calling
-         * 'ubifs_find_free_leb_for_idx()' so GC is not run.
-         */
-        lnum = ubifs_find_free_leb_for_idx(c);
-        if (lnum < 0) {
-                dbg_err("could not find an empty LEB");
-                return lnum;
-        }
-        /* And reset the index flag */
-        err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
-                                  LPROPS_INDEX, 0);
-        if (err)
-                return err;
-        c->gc_lnum = lnum;
-        dbg_rcvry("allocated LEB %d for GC", lnum);
-        /* Run the commit */
-        dbg_rcvry("committing");
-        return ubifs_run_commit(c);
 }
 /**
@@ -1456,7 +1449,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
        err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
        if (err)
                goto out;
-        dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ",
+        dbg_rcvry("inode %lu at %d:%d size %lld -> %lld",
                  (unsigned long)e->inum, lnum, offs, i_size, e->d_size);
        return 0;
@@ -1505,20 +1498,27 @@ int ubifs_recover_size(struct ubifs_info *c)
                                e->i_size = le64_to_cpu(ino->size);
                        }
                }
                if (e->exists && e->i_size < e->d_size) {
-                        if (!e->inode && c->ro_mount) {
+                        if (c->ro_mount) {
                                /* Fix the inode size and pin it in memory */
                                struct inode *inode;
+                                struct ubifs_inode *ui;
+                                ubifs_assert(!e->inode);
                                inode = ubifs_iget(c->vfs_sb, e->inum);
                                if (IS_ERR(inode))
                                        return PTR_ERR(inode);
+                                ui = ubifs_inode(inode);
                                if (inode->i_size < e->d_size) {
                                        dbg_rcvry("ino %lu size %lld -> %lld",
                                                  (unsigned long)e->inum,
-                                                  e->d_size, inode->i_size);
+                                                  inode->i_size, e->d_size);
                                        inode->i_size = e->d_size;
-                                        ubifs_inode(inode)->ui_size = e->d_size;
+                                        ui->ui_size = e->d_size;
+                                        ui->synced_i_size = e->d_size;
                                        e->inode = inode;
                                        this = rb_next(this);
                                        continue;
@@ -1533,9 +1533,11 @@ int ubifs_recover_size(struct ubifs_info *c)
                                        iput(e->inode);
                        }
                }
                this = rb_next(this);
                rb_erase(&e->rb, &c->size_tree);
                kfree(e);
        }
        return 0;
 }
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index eed0fcff8d73..6617280d1679 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -33,43 +33,32 @@
 */
 #include "ubifs.h"
+#include <linux/list_sort.h>
-/*
- * Replay flags.
- *
- * REPLAY_DELETION: node was deleted
- * REPLAY_REF: node is a reference node
- */
-enum {
-        REPLAY_DELETION = 1,
-        REPLAY_REF = 2,
-};
 /**
- * struct replay_entry - replay tree entry.
+ * struct replay_entry - replay list entry.
 * @lnum: logical eraseblock number of the node
 * @offs: node offset
 * @len: node length
+ * @deletion: non-zero if this entry corresponds to a node deletion
 * @sqnum: node sequence number
- * @flags: replay flags
+ * @list: links the replay list
- * @rb: links the replay tree
 * @key: node key
 * @nm: directory entry name
 * @old_size: truncation old size
 * @new_size: truncation new size
- * @free: amount of free space in a bud
- * @dirty: amount of dirty space in a bud from padding and deletion nodes
 *
- * UBIFS journal replay must compare node sequence numbers, which means it must
+ * The replay process first scans all buds and builds the replay list, then
- * build a tree of node information to insert into the TNC.
+ * sorts the replay list in nodes sequence number order, and then inserts all
+ * the replay entries to the TNC.
 */
 struct replay_entry {
        int lnum;
        int offs;
        int len;
+        unsigned int deletion:1;
        unsigned long long sqnum;
-        int flags;
+        struct list_head list;
-        struct rb_node rb;
        union ubifs_key key;
        union {
                struct qstr nm;
@@ -77,10 +66,6 @@ struct replay_entry {
                        loff_t old_size;
                        loff_t new_size;
                };
-                struct {
-                        int free;
-                        int dirty;
-                };
        };
 };
@@ -88,57 +73,64 @@ struct replay_entry {
 * struct bud_entry - entry in the list of buds to replay.
 * @list: next bud in the list
 * @bud: bud description object
- * @free: free bytes in the bud
 * @sqnum: reference node sequence number
+ * @free: free bytes in the bud
+ * @dirty: dirty bytes in the bud
 */
 struct bud_entry {
        struct list_head list;
        struct ubifs_bud *bud;
-        int free;
        unsigned long long sqnum;
+        int free;
+        int dirty;
 };
 /**
 * set_bud_lprops - set free and dirty space used by a bud.
 * @c: UBIFS file-system description object
- * @r: replay entry of bud
+ * @b: bud entry which describes the bud
+ *
+ * This function makes sure the LEB properties of bud @b are set correctly
+ * after the replay. Returns zero in case of success and a negative error code
+ * in case of failure.
 */
-static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
+static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
 {
        const struct ubifs_lprops *lp;
        int err = 0, dirty;
        ubifs_get_lprops(c);
-        lp = ubifs_lpt_lookup_dirty(c, r->lnum);
+        lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum);
        if (IS_ERR(lp)) {
                err = PTR_ERR(lp);
                goto out;
        }
        dirty = lp->dirty;
-        if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
+        if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
                /*
                 * The LEB was added to the journal with a starting offset of
                 * zero which means the LEB must have been empty. The LEB
-                 * property values should be lp->free == c->leb_size and
+                 * property values should be @lp->free == @c->leb_size and
-                 * lp->dirty == 0, but that is not the case. The reason is that
+                 * @lp->dirty == 0, but that is not the case. The reason is that
-                 * the LEB was garbage collected. The garbage collector resets
+                 * the LEB had been garbage collected before it became the bud,
-                 * the free and dirty space without recording it anywhere except
+                 * and there was not commit inbetween. The garbage collector
-                 * lprops, so if there is not a commit then lprops does not have
+                 * resets the free and dirty space without recording it
-                 * that information next time the file system is mounted.
+                 * anywhere except lprops, so if there was no commit then
+                 * lprops does not have that information.
                 *
                 * We do not need to adjust free space because the scan has told
                 * us the exact value which is recorded in the replay entry as
-                 * r->free.
+                 * @b->free.
                 *
                 * However we do need to subtract from the dirty space the
                 * amount of space that the garbage collector reclaimed, which
                 * is the whole LEB minus the amount of space that was free.
                 */
-                dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
+                dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
                        lp->free, lp->dirty);
-                dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
+                dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
                        lp->free, lp->dirty);
                dirty -= c->leb_size - lp->free;
                /*
@@ -150,21 +142,48 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
                 */
                if (dirty != 0)
                        dbg_msg("LEB %d lp: %d free %d dirty "
-                                "replay: %d free %d dirty", r->lnum, lp->free,
+                                "replay: %d free %d dirty", b->bud->lnum,
-                                lp->dirty, r->free, r->dirty);
+                                lp->free, lp->dirty, b->free, b->dirty);
        }
-        lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty,
+        lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty,
                             lp->flags | LPROPS_TAKEN, 0);
        if (IS_ERR(lp)) {
                err = PTR_ERR(lp);
                goto out;
        }
+        /* Make sure the journal head points to the latest bud */
+        err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf,
+                                     b->bud->lnum, c->leb_size - b->free,
+                                     UBI_SHORTTERM);
 out:
        ubifs_release_lprops(c);
        return err;
 }
 /**
+ * set_buds_lprops - set free and dirty space for all replayed buds.
+ * @c: UBIFS file-system description object
+ *
+ * This function sets LEB properties for all replayed buds. Returns zero in
+ * case of success and a negative error code in case of failure.
+ */
+static int set_buds_lprops(struct ubifs_info *c)
+{
+        struct bud_entry *b;
+        int err;
+        list_for_each_entry(b, &c->replay_buds, list) {
+                err = set_bud_lprops(c, b);
+                if (err)
+                        return err;
+        }
+        return 0;
+}
+/**
 * trun_remove_range - apply a replay entry for a truncation to the TNC.
 * @c: UBIFS file-system description object
 * @r: replay entry of truncation
@@ -200,24 +219,22 @@ static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
 */
 static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
 {
-        int err, deletion = ((r->flags & REPLAY_DELETION) != 0);
+        int err;
-        dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum,
+        dbg_mnt("LEB %d:%d len %d deletion %d sqnum %llu %s", r->lnum,
-                r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key));
+                r->offs, r->len, r->deletion, r->sqnum, DBGKEY(&r->key));
        /* Set c->replay_sqnum to help deal with dangling branches. */
        c->replay_sqnum = r->sqnum;
-        if (r->flags & REPLAY_REF)
+        if (is_hash_key(c, &r->key)) {
-                err = set_bud_lprops(c, r);
+                if (r->deletion)
-        else if (is_hash_key(c, &r->key)) {
-                if (deletion)
                        err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
                else
                        err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
                                               r->len, &r->nm);
        } else {
-                if (deletion)
+                if (r->deletion)
                        switch (key_type(c, &r->key)) {
                        case UBIFS_INO_KEY:
                        {
@@ -240,7 +257,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
                        return err;
                if (c->need_recovery)
-                        err = ubifs_recover_size_accum(c, &r->key, deletion,
+                        err = ubifs_recover_size_accum(c, &r->key, r->deletion,
                                                       r->new_size);
        }
@@ -248,68 +265,77 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
 }
 /**
- * destroy_replay_tree - destroy the replay.
+ * replay_entries_cmp - compare 2 replay entries.
- * @c: UBIFS file-system description object
+ * @priv: UBIFS file-system description object
+ * @a: first replay entry
+ * @a: second replay entry
 *
- * Destroy the replay tree.
+ * This is a comparios function for 'list_sort()' which compares 2 replay
+ * entries @a and @b by comparing their sequence numer.  Returns %1 if @a has
+ * greater sequence number and %-1 otherwise.
 */
-static void destroy_replay_tree(struct ubifs_info *c)
+static int replay_entries_cmp(void *priv, struct list_head *a,
+                              struct list_head *b)
 {
-        struct rb_node *this = c->replay_tree.rb_node;
+        struct replay_entry *ra, *rb;
-        struct replay_entry *r;
+        cond_resched();
-        while (this) {
+        if (a == b)
-                if (this->rb_left) {
+                return 0;
-                        this = this->rb_left;
-                        continue;
+        ra = list_entry(a, struct replay_entry, list);
-                } else if (this->rb_right) {
+        rb = list_entry(b, struct replay_entry, list);
-                        this = this->rb_right;
+        ubifs_assert(ra->sqnum != rb->sqnum);
-                        continue;
+        if (ra->sqnum > rb->sqnum)
-                }
+                return 1;
-                r = rb_entry(this, struct replay_entry, rb);
+        return -1;
-                this = rb_parent(this);
-                if (this) {
-                        if (this->rb_left == &r->rb)
-                                this->rb_left = NULL;
-                        else
-                                this->rb_right = NULL;
-                }
-                if (is_hash_key(c, &r->key))
-                        kfree(r->nm.name);
-                kfree(r);
-        }
-        c->replay_tree = RB_ROOT;
 }
 /**
- * apply_replay_tree - apply the replay tree to the TNC.
+ * apply_replay_list - apply the replay list to the TNC.
 * @c: UBIFS file-system description object
 *
- * Apply the replay tree.
+ * Apply all entries in the replay list to the TNC. Returns zero in case of
- * Returns zero in case of success and a negative error code in case of
+ * success and a negative error code in case of failure.
- * failure.
 */
-static int apply_replay_tree(struct ubifs_info *c)
+static int apply_replay_list(struct ubifs_info *c)
 {
-        struct rb_node *this = rb_first(&c->replay_tree);
+        struct replay_entry *r;
+        int err;
-        while (this) {
+        list_sort(c, &c->replay_list, &replay_entries_cmp);
-                struct replay_entry *r;
-                int err;
+        list_for_each_entry(r, &c->replay_list, list) {
                cond_resched();
-                r = rb_entry(this, struct replay_entry, rb);
                err = apply_replay_entry(c, r);
                if (err)
                        return err;
-                this = rb_next(this);
        }
        return 0;
 }
 /**
- * insert_node - insert a node to the replay tree.
+ * destroy_replay_list - destroy the replay.
+ * @c: UBIFS file-system description object
+ *
+ * Destroy the replay list.
+ */
+static void destroy_replay_list(struct ubifs_info *c)
+{
+        struct replay_entry *r, *tmp;
+        list_for_each_entry_safe(r, tmp, &c->replay_list, list) {
+                if (is_hash_key(c, &r->key))
+                        kfree(r->nm.name);
+                list_del(&r->list);
+                kfree(r);
+        }
+}
+/**
+ * insert_node - insert a node to the replay list
 * @c: UBIFS file-system description object
 * @lnum: node logical eraseblock number
 * @offs: node offset
@@ -321,39 +347,25 @@ static int apply_replay_tree(struct ubifs_info *c)
 * @old_size: truncation old size
 * @new_size: truncation new size
 *
- * This function inserts a scanned non-direntry node to the replay tree. The
+ * This function inserts a scanned non-direntry node to the replay list. The
- * replay tree is an RB-tree containing @struct replay_entry elements which are
+ * replay list contains @struct replay_entry elements, and we sort this list in
- * indexed by the sequence number. The replay tree is applied at the very end
+ * sequence number order before applying it. The replay list is applied at the
- * of the replay process. Since the tree is sorted in sequence number order,
+ * very end of the replay process. Since the list is sorted in sequence number
- * the older modifications are applied first. This function returns zero in
+ * order, the older modifications are applied first. This function returns zero
- * case of success and a negative error code in case of failure.
+ * in case of success and a negative error code in case of failure.
 */
 static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
                       union ubifs_key *key, unsigned long long sqnum,
                       int deletion, int *used, loff_t old_size,
                       loff_t new_size)
 {
-        struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
        struct replay_entry *r;
+        dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
        if (key_inum(c, key) >= c->highest_inum)
                c->highest_inum = key_inum(c, key);
-        dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
-        while (*p) {
-                parent = *p;
-                r = rb_entry(parent, struct replay_entry, rb);
-                if (sqnum < r->sqnum) {
-                        p = &(*p)->rb_left;
-                        continue;
-                } else if (sqnum > r->sqnum) {
-                        p = &(*p)->rb_right;
-                        continue;
-                }
-                ubifs_err("duplicate sqnum in replay");
-                return -EINVAL;
-        }
        r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
        if (!r)
                return -ENOMEM;
@@ -363,19 +375,18 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
        r->lnum = lnum;
        r->offs = offs;
        r->len = len;
+        r->deletion = !!deletion;
        r->sqnum = sqnum;
-        r->flags = (deletion ? REPLAY_DELETION : 0);
+        key_copy(c, key, &r->key);
        r->old_size = old_size;
        r->new_size = new_size;
-        key_copy(c, key, &r->key);
-        rb_link_node(&r->rb, parent, p);
+        list_add_tail(&r->list, &c->replay_list);
-        rb_insert_color(&r->rb, &c->replay_tree);
        return 0;
 }
 /**
- * insert_dent - insert a directory entry node into the replay tree.
+ * insert_dent - insert a directory entry node into the replay list.
 * @c: UBIFS file-system description object
 * @lnum: node logical eraseblock number
 * @offs: node offset
@@ -387,43 +398,25 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
 * @deletion: non-zero if this is a deletion
 * @used: number of bytes in use in a LEB
 *
- * This function inserts a scanned directory entry node to the replay tree.
+ * This function inserts a scanned directory entry node or an extended
- * Returns zero in case of success and a negative error code in case of
+ * attribute entry to the replay list. Returns zero in case of success and a
- * failure.
+ * negative error code in case of failure.
- *
- * This function is also used for extended attribute entries because they are
- * implemented as directory entry nodes.
 */
 static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
                       union ubifs_key *key, const char *name, int nlen,
                       unsigned long long sqnum, int deletion, int *used)
 {
-        struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
        struct replay_entry *r;
        char *nbuf;
+        dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
        if (key_inum(c, key) >= c->highest_inum)
                c->highest_inum = key_inum(c, key);
-        dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
-        while (*p) {
-                parent = *p;
-                r = rb_entry(parent, struct replay_entry, rb);
-                if (sqnum < r->sqnum) {
-                        p = &(*p)->rb_left;
-                        continue;
-                }
-                if (sqnum > r->sqnum) {
-                        p = &(*p)->rb_right;
-                        continue;
-                }
-                ubifs_err("duplicate sqnum in replay");
-                return -EINVAL;
-        }
        r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
        if (!r)
                return -ENOMEM;
        nbuf = kmalloc(nlen + 1, GFP_KERNEL);
        if (!nbuf) {
                kfree(r);
@@ -435,17 +428,15 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
        r->lnum = lnum;
        r->offs = offs;
        r->len = len;
+        r->deletion = !!deletion;
        r->sqnum = sqnum;
+        key_copy(c, key, &r->key);
        r->nm.len = nlen;
        memcpy(nbuf, name, nlen);
        nbuf[nlen] = '\0';
        r->nm.name = nbuf;
-        r->flags = (deletion ? REPLAY_DELETION : 0);
-        key_copy(c, key, &r->key);
-        ubifs_assert(!*p);
+        list_add_tail(&r->list, &c->replay_list);
-        rb_link_node(&r->rb, parent, p);
-        rb_insert_color(&r->rb, &c->replay_tree);
        return 0;
 }
@@ -482,29 +473,92 @@ int ubifs_validate_entry(struct ubifs_info *c,
 }
 /**
+ * is_last_bud - check if the bud is the last in the journal head.
+ * @c: UBIFS file-system description object
+ * @bud: bud description object
+ *
+ * This function checks if bud @bud is the last bud in its journal head. This
+ * information is then used by 'replay_bud()' to decide whether the bud can
+ * have corruptions or not. Indeed, only last buds can be corrupted by power
+ * cuts. Returns %1 if this is the last bud, and %0 if not.
+ */
+static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
+{
+        struct ubifs_jhead *jh = &c->jheads[bud->jhead];
+        struct ubifs_bud *next;
+        uint32_t data;
+        int err;
+        if (list_is_last(&bud->list, &jh->buds_list))
+                return 1;
+        /*
+         * The following is a quirk to make sure we work correctly with UBIFS
+         * images used with older UBIFS.
+         *
+         * Normally, the last bud will be the last in the journal head's list
+         * of bud. However, there is one exception if the UBIFS image belongs
+         * to older UBIFS. This is fairly unlikely: one would need to use old
+         * UBIFS, then have a power cut exactly at the right point, and then
+         * try to mount this image with new UBIFS.
+         *
+         * The exception is: it is possible to have 2 buds A and B, A goes
+         * before B, and B is the last, bud B is contains no data, and bud A is
+         * corrupted at the end. The reason is that in older versions when the
+         * journal code switched the next bud (from A to B), it first added a
+         * log reference node for the new bud (B), and only after this it
+         * synchronized the write-buffer of current bud (A). But later this was
+         * changed and UBIFS started to always synchronize the write-buffer of
+         * the bud (A) before writing the log reference for the new bud (B).
+         *
+         * But because older UBIFS always synchronized A's write-buffer before
+         * writing to B, we can recognize this exceptional situation but
+         * checking the contents of bud B - if it is empty, then A can be
+         * treated as the last and we can recover it.
+         *
+         * TODO: remove this piece of code in a couple of years (today it is
+         * 16.05.2011).
+         */
+        next = list_entry(bud->list.next, struct ubifs_bud, list);
+        if (!list_is_last(&next->list, &jh->buds_list))
+                return 0;
+        err = ubi_read(c->ubi, next->lnum, (char *)&data,
+                       next->start, 4);
+        if (err)
+                return 0;
+        return data == 0xFFFFFFFF;
+}
+/**
 * replay_bud - replay a bud logical eraseblock.
 * @c: UBIFS file-system description object
- * @lnum: bud logical eraseblock number to replay
+ * @b: bud entry which describes the bud
- * @offs: bud start offset
- * @jhead: journal head to which this bud belongs
- * @free: amount of free space in the bud is returned here
- * @dirty: amount of dirty space from padding and deletion nodes is returned
- * here
 *
- * This function returns zero in case of success and a negative error code in
+ * This function replays bud @bud, recovers it if needed, and adds all nodes
- * case of failure.
+ * from this bud to the replay list. Returns zero in case of success and a
+ * negative error code in case of failure.
 */
-static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
+static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
-                      int *free, int *dirty)
 {
-        int err = 0, used = 0;
+        int is_last = is_last_bud(c, b->bud);
+        int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
        struct ubifs_scan_leb *sleb;
        struct ubifs_scan_node *snod;
-        struct ubifs_bud *bud;
-        dbg_mnt("replay bud LEB %d, head %d", lnum, jhead);
+        dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d",
-        if (c->need_recovery)
+                lnum, b->bud->jhead, offs, is_last);
-                sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD);
+        if (c->need_recovery && is_last)
+                /*
+                 * Recover only last LEBs in the journal heads, because power
+                 * cuts may cause corruptions only in these LEBs, because only
+                 * these LEBs could possibly be written to at the power cut
+                 * time.
+                 */
+                sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf,
+                                         b->bud->jhead != GCHD);
        else
                sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
        if (IS_ERR(sleb))
@@ -620,19 +674,13 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
                        goto out;
        }
-        bud = ubifs_search_bud(c, lnum);
+        ubifs_assert(ubifs_search_bud(c, lnum));
-        if (!bud)
-                BUG();
        ubifs_assert(sleb->endpt - offs >= used);
        ubifs_assert(sleb->endpt % c->min_io_size == 0);
-        if (sleb->endpt + c->min_io_size <= c->leb_size && !c->ro_mount)
+        b->dirty = sleb->endpt - offs - used;
-                err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum,
+        b->free = c->leb_size - sleb->endpt;
-                                             sleb->endpt, UBI_SHORTTERM);
+        dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free);
-        *dirty = sleb->endpt - offs - used;
-        *free = c->leb_size - sleb->endpt;
 out:
        ubifs_scan_destroy(sleb);
@@ -646,55 +694,6 @@ out_dump:
 }
 /**
- * insert_ref_node - insert a reference node to the replay tree.
- * @c: UBIFS file-system description object
- * @lnum: node logical eraseblock number
- * @offs: node offset
- * @sqnum: sequence number
- * @free: amount of free space in bud
- * @dirty: amount of dirty space from padding and deletion nodes
- *
- * This function inserts a reference node to the replay tree and returns zero
- * in case of success or a negative error code in case of failure.
- */
-static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
-                           unsigned long long sqnum, int free, int dirty)
-{
-        struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
-        struct replay_entry *r;
-        dbg_mnt("add ref LEB %d:%d", lnum, offs);
-        while (*p) {
-                parent = *p;
-                r = rb_entry(parent, struct replay_entry, rb);
-                if (sqnum < r->sqnum) {
-                        p = &(*p)->rb_left;
-                        continue;
-                } else if (sqnum > r->sqnum) {
-                        p = &(*p)->rb_right;
-                        continue;
-                }
-                ubifs_err("duplicate sqnum in replay tree");
-                return -EINVAL;
-        }
-        r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
-        if (!r)
-                return -ENOMEM;
-        r->lnum = lnum;
-        r->offs = offs;
-        r->sqnum = sqnum;
-        r->flags = REPLAY_REF;
-        r->free = free;
-        r->dirty = dirty;
-        rb_link_node(&r->rb, parent, p);
-        rb_insert_color(&r->rb, &c->replay_tree);
-        return 0;
-}
-/**
 * replay_buds - replay all buds.
 * @c: UBIFS file-system description object
 *
@@ -704,17 +703,16 @@ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
 static int replay_buds(struct ubifs_info *c)
 {
        struct bud_entry *b;
-        int err, uninitialized_var(free), uninitialized_var(dirty);
+        int err;
+        unsigned long long prev_sqnum = 0;
        list_for_each_entry(b, &c->replay_buds, list) {
-                err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead,
+                err = replay_bud(c, b);
-                                 &free, &dirty);
-                if (err)
-                        return err;
-                err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum,
-                                      free, dirty);
                if (err)
                        return err;
+                ubifs_assert(b->sqnum > prev_sqnum);
+                prev_sqnum = b->sqnum;
        }
        return 0;
@@ -1054,25 +1052,29 @@ int ubifs_replay_journal(struct ubifs_info *c)
        if (err)
                goto out;
-        err = apply_replay_tree(c);
+        err = apply_replay_list(c);
+        if (err)
+                goto out;
+        err = set_buds_lprops(c);
        if (err)
                goto out;
        /*
-         * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable
+         * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable
-         * to roughly estimate index growth. Things like @c->min_idx_lebs
+         * to roughly estimate index growth. Things like @c->bi.min_idx_lebs
         * depend on it. This means we have to initialize it to make sure
         * budgeting works properly.
         */
-        c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
+        c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
-        c->budg_uncommitted_idx *= c->max_idx_node_sz;
+        c->bi.uncommitted_idx *= c->max_idx_node_sz;
        ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
        dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
                "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
                (unsigned long)c->highest_inum);
 out:
-        destroy_replay_tree(c);
+        destroy_replay_list(c);
        destroy_bud_list(c);
        c->replaying = 0;
        return err;
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index bf31b4729e51..c606f010e8df 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -475,7 +475,8 @@ failed:
 * @c: UBIFS file-system description object
 *
 * This function returns a pointer to the superblock node or a negative error
- * code.
+ * code. Note, the user of this function is responsible of kfree()'ing the
+ * returned superblock buffer.
 */
 struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
 {
@@ -616,6 +617,7 @@ int ubifs_read_superblock(struct ubifs_info *c)
        c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran);
        memcpy(&c->uuid, &sup->uuid, 16);
        c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT);
+        c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP);
        /* Automatically increase file system size to the maximum size */
        c->old_leb_cnt = c->leb_cnt;
@@ -650,3 +652,152 @@ out:
        kfree(sup);
        return err;
 }
+/**
+ * fixup_leb - fixup/unmap an LEB containing free space.
+ * @c: UBIFS file-system description object
+ * @lnum: the LEB number to fix up
+ * @len: number of used bytes in LEB (starting at offset 0)
+ *
+ * This function reads the contents of the given LEB number @lnum, then fixes
+ * it up, so that empty min. I/O units in the end of LEB are actually erased on
+ * flash (rather than being just all-0xff real data). If the LEB is completely
+ * empty, it is simply unmapped.
+ */
+static int fixup_leb(struct ubifs_info *c, int lnum, int len)
+{
+        int err;
+        ubifs_assert(len >= 0);
+        ubifs_assert(len % c->min_io_size == 0);
+        ubifs_assert(len < c->leb_size);
+        if (len == 0) {
+                dbg_mnt("unmap empty LEB %d", lnum);
+                return ubi_leb_unmap(c->ubi, lnum);
+        }
+        dbg_mnt("fixup LEB %d, data len %d", lnum, len);
+        err = ubi_read(c->ubi, lnum, c->sbuf, 0, len);
+        if (err)
+                return err;
+        return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
+}
+/**
+ * fixup_free_space - find & remap all LEBs containing free space.
+ * @c: UBIFS file-system description object
+ *
+ * This function walks through all LEBs in the filesystem and fiexes up those
+ * containing free/empty space.
+ */
+static int fixup_free_space(struct ubifs_info *c)
+{
+        int lnum, err = 0;
+        struct ubifs_lprops *lprops;
+        ubifs_get_lprops(c);
+        /* Fixup LEBs in the master area */
+        for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) {
+                err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz);
+                if (err)
+                        goto out;
+        }
+        /* Unmap unused log LEBs */
+        lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
+        while (lnum != c->ltail_lnum) {
+                err = fixup_leb(c, lnum, 0);
+                if (err)
+                        goto out;
+                lnum = ubifs_next_log_lnum(c, lnum);
+        }
+        /* Fixup the current log head */
+        err = fixup_leb(c, c->lhead_lnum, c->lhead_offs);
+        if (err)
+                goto out;
+        /* Fixup LEBs in the LPT area */
+        for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) {
+                int free = c->ltab[lnum - c->lpt_first].free;
+                if (free > 0) {
+                        err = fixup_leb(c, lnum, c->leb_size - free);
+                        if (err)
+                                goto out;
+                }
+        }
+        /* Unmap LEBs in the orphans area */
+        for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
+                err = fixup_leb(c, lnum, 0);
+                if (err)
+                        goto out;
+        }
+        /* Fixup LEBs in the main area */
+        for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) {
+                lprops = ubifs_lpt_lookup(c, lnum);
+                if (IS_ERR(lprops)) {
+                        err = PTR_ERR(lprops);
+                        goto out;
+                }
+                if (lprops->free > 0) {
+                        err = fixup_leb(c, lnum, c->leb_size - lprops->free);
+                        if (err)
+                                goto out;
+                }
+        }
+out:
+        ubifs_release_lprops(c);
+        return err;
+}
+/**
+ * ubifs_fixup_free_space - find & fix all LEBs with free space.
+ * @c: UBIFS file-system description object
+ *
+ * This function fixes up LEBs containing free space on first mount, if the
+ * appropriate flag was set when the FS was created. Each LEB with one or more
+ * empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure
+ * the free space is actually erased. E.g., this is necessary for some NAND
+ * chips, since the free space may have been programmed like real "0xff" data
+ * (generating a non-0xff ECC), causing future writes to the not-really-erased
+ * NAND pages to behave badly. After the space is fixed up, the superblock flag
+ * is cleared, so that this is skipped for all future mounts.
+ */
+int ubifs_fixup_free_space(struct ubifs_info *c)
+{
+        int err;
+        struct ubifs_sb_node *sup;
+        ubifs_assert(c->space_fixup);
+        ubifs_assert(!c->ro_mount);
+        ubifs_msg("start fixing up free space");
+        err = fixup_free_space(c);
+        if (err)
+                return err;
+        sup = ubifs_read_sb_node(c);
+        if (IS_ERR(sup))
+                return PTR_ERR(sup);
+        /* Free-space fixup is no longer required */
+        c->space_fixup = 0;
+        sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP);
+        err = ubifs_write_sb_node(c, sup);
+        kfree(sup);
+        if (err)
+                return err;
+        ubifs_msg("free space fixup complete");
+        return err;
+}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index be6c7b008f38..6db0bdaa9f74 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -375,7 +375,7 @@ out:
                ubifs_release_dirty_inode_budget(c, ui);
        else {
                /* We've deleted something - clean the "no space" flags */
-                c->nospace = c->nospace_rp = 0;
+                c->bi.nospace = c->bi.nospace_rp = 0;
                smp_wmb();
        }
 done:
@@ -694,11 +694,11 @@ static int init_constants_sb(struct ubifs_info *c)
         * be compressed and direntries are of the maximum size.
         *
         * Note, data, which may be stored in inodes is budgeted separately, so
-         * it is not included into 'c->inode_budget'.
+         * it is not included into 'c->bi.inode_budget'.
         */
-        c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE;
+        c->bi.page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE;
-        c->inode_budget = UBIFS_INO_NODE_SZ;
+        c->bi.inode_budget = UBIFS_INO_NODE_SZ;
-        c->dent_budget = UBIFS_MAX_DENT_NODE_SZ;
+        c->bi.dent_budget = UBIFS_MAX_DENT_NODE_SZ;
        /*
         * When the amount of flash space used by buds becomes
@@ -742,7 +742,7 @@ static void init_constants_master(struct ubifs_info *c)
 {
        long long tmp64;
-        c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+        c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
        c->report_rp_size = ubifs_reported_space(c, c->rp_size);
        /*
@@ -1144,8 +1144,8 @@ static int check_free_space(struct ubifs_info *c)
 {
        ubifs_assert(c->dark_wm > 0);
        if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) {
-                ubifs_err("insufficient free space to mount in read/write mode");
+                ubifs_err("insufficient free space to mount in R/W mode");
-                dbg_dump_budg(c);
+                dbg_dump_budg(c, &c->bi);
                dbg_dump_lprops(c);
                return -ENOSPC;
        }
@@ -1257,12 +1257,12 @@ static int mount_ubifs(struct ubifs_info *c)
                goto out_free;
        }
+        err = alloc_wbufs(c);
+        if (err)
+                goto out_cbuf;
        sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
        if (!c->ro_mount) {
-                err = alloc_wbufs(c);
-                if (err)
-                        goto out_cbuf;
                /* Create background thread */
                c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
                if (IS_ERR(c->bgt)) {
@@ -1304,7 +1304,7 @@ static int mount_ubifs(struct ubifs_info *c)
        if (err)
                goto out_lpt;
-        err = dbg_check_idx_size(c, c->old_idx_sz);
+        err = dbg_check_idx_size(c, c->bi.old_idx_sz);
        if (err)
                goto out_lpt;
@@ -1313,7 +1313,7 @@ static int mount_ubifs(struct ubifs_info *c)
                goto out_journal;
        /* Calculate 'min_idx_lebs' after journal replay */
-        c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+        c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
        err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount);
        if (err)
@@ -1396,6 +1396,12 @@ static int mount_ubifs(struct ubifs_info *c)
        } else
                ubifs_assert(c->lst.taken_empty_lebs > 0);
+        if (!c->ro_mount && c->space_fixup) {
+                err = ubifs_fixup_free_space(c);
+                if (err)
+                        goto out_infos;
+        }
        err = dbg_check_filesystem(c);
        if (err)
                goto out_infos;
@@ -1442,7 +1448,8 @@ static int mount_ubifs(struct ubifs_info *c)
                c->main_lebs, c->main_first, c->leb_cnt - 1);
        dbg_msg("index LEBs:          %d", c->lst.idx_lebs);
        dbg_msg("total index bytes:   %lld (%lld KiB, %lld MiB)",
-                c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20);
+                c->bi.old_idx_sz, c->bi.old_idx_sz >> 10,
+                c->bi.old_idx_sz >> 20);
        dbg_msg("key hash type:       %d", c->key_hash_type);
        dbg_msg("tree fanout:         %d", c->fanout);
        dbg_msg("reserved GC LEB:     %d", c->gc_lnum);
@@ -1456,7 +1463,7 @@ static int mount_ubifs(struct ubifs_info *c)
        dbg_msg("node sizes:          ref %zu, cmt. start %zu, orph %zu",
                UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);
        dbg_msg("max. node sizes:     data %zu, inode %zu dentry %zu, idx %d",
-                UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
+                UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
                UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout));
        dbg_msg("dead watermark:      %d", c->dead_wm);
        dbg_msg("dark watermark:      %d", c->dark_wm);
@@ -1584,6 +1591,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
                }
                sup->leb_cnt = cpu_to_le32(c->leb_cnt);
                err = ubifs_write_sb_node(c, sup);
+                kfree(sup);
                if (err)
                        goto out;
        }
@@ -1631,12 +1639,6 @@ static int ubifs_remount_rw(struct ubifs_info *c)
        if (err)
                goto out;
-        err = alloc_wbufs(c);
-        if (err)
-                goto out;
-        ubifs_create_buds_lists(c);
        /* Create background thread */
        c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
        if (IS_ERR(c->bgt)) {
@@ -1690,6 +1692,13 @@ static int ubifs_remount_rw(struct ubifs_info *c)
                 */
                err = dbg_check_space_info(c);
        }
+        if (c->space_fixup) {
+                err = ubifs_fixup_free_space(c);
+                if (err)
+                        goto out;
+        }
        mutex_unlock(&c->umount_mutex);
        return err;
@@ -1744,7 +1753,6 @@ static void ubifs_remount_ro(struct ubifs_info *c)
        if (err)
                ubifs_ro_mode(c, err);
-        free_wbufs(c);
        vfree(c->orph_buf);
        c->orph_buf = NULL;
        kfree(c->write_reserve_buf);
@@ -1773,10 +1781,9 @@ static void ubifs_put_super(struct super_block *sb)
         * to write them back because of I/O errors.
         */
        if (!c->ro_error) {
-                ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0);
+                ubifs_assert(c->bi.idx_growth == 0);
-                ubifs_assert(c->budg_idx_growth == 0);
+                ubifs_assert(c->bi.dd_growth == 0);
-                ubifs_assert(c->budg_dd_growth == 0);
+                ubifs_assert(c->bi.data_growth == 0);
-                ubifs_assert(c->budg_data_growth == 0);
        }
        /*
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index de485979ca39..8119b1fd8d94 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -2557,11 +2557,11 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
                if (err) {
                        /* Ensure the znode is dirtied */
                        if (znode->cnext || !ubifs_zn_dirty(znode)) {
-                                    znode = dirty_cow_bottom_up(c, znode);
+                                znode = dirty_cow_bottom_up(c, znode);
-                                    if (IS_ERR(znode)) {
+                                if (IS_ERR(znode)) {
-                                            err = PTR_ERR(znode);
+                                        err = PTR_ERR(znode);
-                                            goto out_unlock;
+                                        goto out_unlock;
-                                    }
+                                }
                        }
                        err = tnc_delete(c, znode, n);
                }
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 53288e5d604e..41920f357bbf 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -377,15 +377,13 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
                                c->gap_lebs = NULL;
                                return err;
                        }
-                        if (!dbg_force_in_the_gaps_enabled) {
+                        if (dbg_force_in_the_gaps_enabled()) {
                                /*
                                 * Do not print scary warnings if the debugging
                                 * option which forces in-the-gaps is enabled.
                                 */
-                                ubifs_err("out of space");
+                                ubifs_warn("out of space");
-                                spin_lock(&c->space_lock);
+                                dbg_dump_budg(c, &c->bi);
-                                dbg_dump_budg(c);
-                                spin_unlock(&c->space_lock);
                                dbg_dump_lprops(c);
                        }
                        /* Try to commit anyway */
@@ -796,16 +794,16 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot)
        spin_lock(&c->space_lock);
        /*
         * Although we have not finished committing yet, update size of the
-         * committed index ('c->old_idx_sz') and zero out the index growth
+         * committed index ('c->bi.old_idx_sz') and zero out the index growth
         * budget. It is OK to do this now, because we've reserved all the
         * space which is needed to commit the index, and it is save for the
         * budgeting subsystem to assume the index is already committed,
         * even though it is not.
         */
-        ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c));
+        ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
-        c->old_idx_sz = c->calc_idx_sz;
+        c->bi.old_idx_sz = c->calc_idx_sz;
-        c->budg_uncommitted_idx = 0;
+        c->bi.uncommitted_idx = 0;
-        c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+        c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
        spin_unlock(&c->space_lock);
        mutex_unlock(&c->tnc_mutex);
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index 191ca7863fe7..e24380cf46ed 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -408,9 +408,11 @@ enum {
 * Superblock flags.
 *
 * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set
+ * UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed
 */
 enum {
        UBIFS_FLG_BIGLPT = 0x02,
+        UBIFS_FLG_SPACE_FIXUP = 0x04,
 };
 /**
@@ -434,7 +436,7 @@ struct ubifs_ch {
        __u8 node_type;
        __u8 group_type;
        __u8 padding[2];
-} __attribute__ ((packed));
+} __packed;
 /**
 * union ubifs_dev_desc - device node descriptor.
@@ -448,7 +450,7 @@ struct ubifs_ch {
 union ubifs_dev_desc {
        __le32 new;
        __le64 huge;
-} __attribute__ ((packed));
+} __packed;
 /**
 * struct ubifs_ino_node - inode node.
@@ -509,7 +511,7 @@ struct ubifs_ino_node {
        __le16 compr_type;
        __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */
        __u8 data[];
-} __attribute__ ((packed));
+} __packed;
 /**
 * struct ubifs_dent_node - directory entry node.
@@ -534,7 +536,7 @@ struct ubifs_dent_node {
        __le16 nlen;
        __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */
        __u8 name[];
-} __attribute__ ((packed));
+} __packed;
 /**
 * struct ubifs_data_node - data node.
@@ -555,7 +557,7 @@ struct ubifs_data_node {
        __le16 compr_type;
        __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */
        __u8 data[];
-} __attribute__ ((packed));
+} __packed;
 /**
 * struct ubifs_trun_node - truncation node.
@@ -575,7 +577,7 @@ struct ubifs_trun_node {
        __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */
        __le64 old_size;
        __le64 new_size;
-} __attribute__ ((packed));
+} __packed;
 /**
 * struct ubifs_pad_node - padding node.
@@ -586,7 +588,7 @@ struct ubifs_trun_node {
 struct ubifs_pad_node {
        struct ubifs_ch ch;
        __le32 pad_len;
-} __attribute__ ((packed));
+} __packed;
 /**
 * struct ubifs_sb_node - superblock node.
@@ -644,7 +646,7 @@ struct ubifs_sb_node {
        __u8 uuid[16];
        __le32 ro_compat_version;
        __u8 padding2[3968];
-} __attribute__ ((packed));
+} __packed;
 /**
 * struct ubifs_mst_node - master node.
@@ -711,7 +713,7 @@ struct ubifs_mst_node {
        __le32 idx_lebs;
        __le32 leb_cnt;
        __u8 padding[344];
-} __attribute__ ((packed));
+} __packed;
 /**
 * struct ubifs_ref_node - logical eraseblock reference node.
@@ -727,7 +729,7 @@ struct ubifs_ref_node {
        __le32 offs;
        __le32 jhead;
        __u8 padding[28];
-} __attribute__ ((packed));
+} __packed;
 /**
 * struct ubifs_branch - key/reference/length branch
@@ -741,7 +743,7 @@ struct ubifs_branch {
        __le32 offs;
        __le32 len;
        __u8 key[];
-} __attribute__ ((packed));
+} __packed;
 /**
 * struct ubifs_idx_node - indexing node.
@@ -755,7 +757,7 @@ struct ubifs_idx_node {
        __le16 child_cnt;
        __le16 level;
        __u8 branches[];
-} __attribute__ ((packed));
+} __packed;
 /**
 * struct ubifs_cs_node - commit start node.
@@ -765,7 +767,7 @@ struct ubifs_idx_node {
 struct ubifs_cs_node {
        struct ubifs_ch ch;
        __le64 cmt_no;
-} __attribute__ ((packed));
+} __packed;
 /**
 * struct ubifs_orph_node - orphan node.
@@ -777,6 +779,6 @@ struct ubifs_orph_node {
        struct ubifs_ch ch;
        __le64 cmt_no;
        __le64 inos[];
-} __attribute__ ((packed));
+} __packed;
 #endif /* __UBIFS_MEDIA_H__ */
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 8c40ad3c6721..93d1412a06f0 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -389,9 +389,9 @@ struct ubifs_gced_idx_leb {
 * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
 * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
 * make sure @inode->i_size is always changed under @ui_mutex, because it
- * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock
+ * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would
- * with 'ubifs_writepage()' (see file.c). All the other inode fields are
+ * deadlock with 'ubifs_writepage()' (see file.c). All the other inode fields
- * changed under @ui_mutex, so they do not need "shadow" fields. Note, one
+ * are changed under @ui_mutex, so they do not need "shadow" fields. Note, one
 * could consider to rework locking and base it on "shadow" fields.
 */
 struct ubifs_inode {
@@ -937,6 +937,40 @@ struct ubifs_mount_opts {
        unsigned int compr_type:2;
 };
+/**
+ * struct ubifs_budg_info - UBIFS budgeting information.
+ * @idx_growth: amount of bytes budgeted for index growth
+ * @data_growth: amount of bytes budgeted for cached data
+ * @dd_growth: amount of bytes budgeted for cached data that will make
+ *             other data dirty
+ * @uncommitted_idx: amount of bytes were budgeted for growth of the index, but
+ *                   which still have to be taken into account because the index
+ *                   has not been committed so far
+ * @old_idx_sz: size of index on flash
+ * @min_idx_lebs: minimum number of LEBs required for the index
+ * @nospace: non-zero if the file-system does not have flash space (used as
+ *           optimization)
+ * @nospace_rp: the same as @nospace, but additionally means that even reserved
+ *              pool is full
+ * @page_budget: budget for a page (constant, nenver changed after mount)
+ * @inode_budget: budget for an inode (constant, nenver changed after mount)
+ * @dent_budget: budget for a directory entry (constant, nenver changed after
+ *               mount)
+ */
+struct ubifs_budg_info {
+        long long idx_growth;
+        long long data_growth;
+        long long dd_growth;
+        long long uncommitted_idx;
+        unsigned long long old_idx_sz;
+        int min_idx_lebs;
+        unsigned int nospace:1;
+        unsigned int nospace_rp:1;
+        int page_budget;
+        int inode_budget;
+        int dent_budget;
+};
 struct ubifs_debug_info;
 /**
@@ -980,6 +1014,7 @@ struct ubifs_debug_info;
 * @cmt_wq: wait queue to sleep on if the log is full and a commit is running
 *
 * @big_lpt: flag that LPT is too big to write whole during commit
+ * @space_fixup: flag indicating that free space in LEBs needs to be cleaned up
 * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
 *                   recovery)
 * @bulk_read: enable bulk-reads
@@ -1057,32 +1092,14 @@ struct ubifs_debug_info;
 * @dirty_zn_cnt: number of dirty znodes
 * @clean_zn_cnt: number of clean znodes
 *
- * @budg_idx_growth: amount of bytes budgeted for index growth
+ * @space_lock: protects @bi and @lst
- * @budg_data_growth: amount of bytes budgeted for cached data
+ * @lst: lprops statistics
- * @budg_dd_growth: amount of bytes budgeted for cached data that will make
+ * @bi: budgeting information
- *                  other data dirty
- * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index,
- *                        but which still have to be taken into account because
- *                        the index has not been committed so far
- * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth,
- *              @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst,
- *              @nospace, and @nospace_rp;
- * @min_idx_lebs: minimum number of LEBs required for the index
- * @old_idx_sz: size of index on flash
 * @calc_idx_sz: temporary variable which is used to calculate new index size
 *               (contains accurate new index size at end of TNC commit start)
- * @lst: lprops statistics
- * @nospace: non-zero if the file-system does not have flash space (used as
- *           optimization)
- * @nospace_rp: the same as @nospace, but additionally means that even reserved
- *              pool is full
- *
- * @page_budget: budget for a page
- * @inode_budget: budget for an inode
- * @dent_budget: budget for a directory entry
 *
 * @ref_node_alsz: size of the LEB reference node aligned to the min. flash
- * I/O unit
+ *                 I/O unit
 * @mst_node_alsz: master node aligned size
 * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary
 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
@@ -1189,7 +1206,6 @@ struct ubifs_debug_info;
 * @replaying: %1 during journal replay
 * @mounting: %1 while mounting
 * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode
- * @replay_tree: temporary tree used during journal replay
 * @replay_list: temporary list used during journal replay
 * @replay_buds: list of buds to replay
 * @cs_sqnum: sequence number of first node in the log (commit start node)
@@ -1238,6 +1254,7 @@ struct ubifs_info {
        wait_queue_head_t cmt_wq;
        unsigned int big_lpt:1;
+        unsigned int space_fixup:1;
        unsigned int no_chk_data_crc:1;
        unsigned int bulk_read:1;
        unsigned int default_compr:2;
@@ -1308,21 +1325,10 @@ struct ubifs_info {
        atomic_long_t dirty_zn_cnt;
        atomic_long_t clean_zn_cnt;
-        long long budg_idx_growth;
-        long long budg_data_growth;
-        long long budg_dd_growth;
-        long long budg_uncommitted_idx;
        spinlock_t space_lock;
-        int min_idx_lebs;
-        unsigned long long old_idx_sz;
-        unsigned long long calc_idx_sz;
        struct ubifs_lp_stats lst;
-        unsigned int nospace:1;
+        struct ubifs_budg_info bi;
-        unsigned int nospace_rp:1;
+        unsigned long long calc_idx_sz;
-        int page_budget;
-        int inode_budget;
-        int dent_budget;
        int ref_node_alsz;
        int mst_node_alsz;
@@ -1430,7 +1436,6 @@ struct ubifs_info {
        unsigned int replaying:1;
        unsigned int mounting:1;
        unsigned int remounting_rw:1;
-        struct rb_root replay_tree;
        struct list_head replay_list;
        struct list_head replay_buds;
        unsigned long long cs_sqnum;
@@ -1628,6 +1633,7 @@ int ubifs_write_master(struct ubifs_info *c);
 int ubifs_read_superblock(struct ubifs_info *c);
 struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c);
 int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup);
+int ubifs_fixup_free_space(struct ubifs_info *c);
 /* replay.c */
 int ubifs_validate_entry(struct ubifs_info *c,
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 3299f469e712..16f19f55e63f 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -80,8 +80,8 @@ enum {
        SECURITY_XATTR,
 };
-static const struct inode_operations none_inode_operations;
+static const struct inode_operations empty_iops;
-static const struct file_operations none_file_operations;
+static const struct file_operations empty_fops;
 /**
 * create_xattr - create an extended attribute.
@@ -131,8 +131,8 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
        /* Re-define all operations to be "nothing" */
        inode->i_mapping->a_ops = &empty_aops;
-        inode->i_op = &none_inode_operations;
+        inode->i_op = &empty_iops;
-        inode->i_fop = &none_file_operations;
+        inode->i_fop = &empty_fops;
        inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA;
        ui = ubifs_inode(inode);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index e765743cf9f3..b4d791a83207 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -409,7 +409,7 @@ out:
 }
 /**
- * ufs_getfrag_bloc() - `get_block_t' function, interface between UFS and
+ * ufs_getfrag_block() - `get_block_t' function, interface between UFS and
 * readpage, writepage and so on
 */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 9ef9ed2cfe2e..5e68099db2a5 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -33,7 +33,6 @@
 #include <linux/migrate.h>
 #include <linux/backing-dev.h>
 #include <linux/freezer.h>
-#include <linux/list_sort.h>
 #include "xfs_sb.h"
 #include "xfs_inum.h"
@@ -709,6 +708,27 @@ xfs_buf_get_empty(
        return bp;
 }
+/*
+ * Return a buffer allocated as an empty buffer and associated to external
+ * memory via xfs_buf_associate_memory() back to it's empty state.
+ */
+void
+xfs_buf_set_empty(
+        struct xfs_buf          *bp,
+        size_t                  len)
+{
+        if (bp->b_pages)
+                _xfs_buf_free_pages(bp);
+        bp->b_pages = NULL;
+        bp->b_page_count = 0;
+        bp->b_addr = NULL;
+        bp->b_file_offset = 0;
+        bp->b_buffer_length = bp->b_count_desired = len;
+        bp->b_bn = XFS_BUF_DADDR_NULL;
+        bp->b_flags &= ~XBF_MAPPED;
+}
 static inline struct page *
 mem_to_page(
        void                    *addr)
@@ -1402,12 +1422,12 @@ restart:
 int
 xfs_buftarg_shrink(
        struct shrinker         *shrink,
-        int                     nr_to_scan,
+        struct shrink_control   *sc)
-        gfp_t                   mask)
 {
        struct xfs_buftarg      *btp = container_of(shrink,
                                        struct xfs_buftarg, bt_shrinker);
        struct xfs_buf          *bp;
+        int nr_to_scan = sc->nr_to_scan;
        LIST_HEAD(dispose);
        if (!nr_to_scan)
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index a9a1c4512645..50a7d5fb3b73 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -178,6 +178,7 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
                                xfs_buf_flags_t);
 extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
+extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
 extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
 extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
 extern void xfs_buf_hold(xfs_buf_t *);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index b3486dfa5520..54e623bfbb85 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -586,7 +586,8 @@ xfs_file_compat_ioctl(
        case XFS_IOC_RESVSP_32:
        case XFS_IOC_UNRESVSP_32:
        case XFS_IOC_RESVSP64_32:
-        case XFS_IOC_UNRESVSP64_32: {
+        case XFS_IOC_UNRESVSP64_32:
+        case XFS_IOC_ZERO_RANGE_32: {
                struct xfs_flock64      bf;
                if (xfs_compat_flock64_copyin(&bf, arg))
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index 08b605792a99..80f4060e8970 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -184,6 +184,7 @@ typedef struct compat_xfs_flock64 {
 #define XFS_IOC_UNRESVSP_32     _IOW('X', 41, struct compat_xfs_flock64)
 #define XFS_IOC_RESVSP64_32     _IOW('X', 42, struct compat_xfs_flock64)
 #define XFS_IOC_UNRESVSP64_32   _IOW('X', 43, struct compat_xfs_flock64)
+#define XFS_IOC_ZERO_RANGE_32   _IOW('X', 57, struct compat_xfs_flock64)
 typedef struct compat_xfs_fsop_geom_v1 {
        __u32           blocksize;      /* filesystem (data) block size */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 244be9cbfe78..8633521b3b2e 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -70,6 +70,7 @@
 #include <linux/ctype.h>
 #include <linux/writeback.h>
 #include <linux/capability.h>
+#include <linux/list_sort.h>
 #include <asm/page.h>
 #include <asm/div64.h>
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
index 9f76cceb678d..bd672def95ac 100644
--- a/fs/xfs/linux-2.6/xfs_message.c
+++ b/fs/xfs/linux-2.6/xfs_message.c
@@ -41,23 +41,6 @@ __xfs_printk(
        printk("%sXFS: %pV\n", level, vaf);
 }
-void xfs_printk(
-        const char              *level,
-        const struct xfs_mount  *mp,
-        const char              *fmt, ...)
-{
-        struct va_format        vaf;
-        va_list                 args;
-        va_start(args, fmt);
-        vaf.fmt = fmt;
-        vaf.va = &args;
-        __xfs_printk(level, mp, &vaf);
-        va_end(args);
-}
 #define define_xfs_printk_level(func, kern_level)               \
 void func(const struct xfs_mount *mp, const char *fmt, ...)     \
 {                                                               \
@@ -95,8 +78,7 @@ xfs_alert_tag(
        int                     do_panic = 0;
        if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
-                xfs_printk(KERN_ALERT, mp,
+                xfs_alert(mp, "Transforming an alert into a BUG.");
-                        "XFS: Transforming an alert into a BUG.");
                do_panic = 1;
        }
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h
index f1b3fc1b6c4e..7fb7ea007672 100644
--- a/fs/xfs/linux-2.6/xfs_message.h
+++ b/fs/xfs/linux-2.6/xfs_message.h
@@ -3,9 +3,6 @@
 struct xfs_mount;
-extern void xfs_printk(const char *level, const struct xfs_mount *mp,
-                      const char *fmt, ...)
-        __attribute__ ((format (printf, 3, 4)));
 extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
        __attribute__ ((format (printf, 2, 3)));
 extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
@@ -28,7 +25,9 @@ extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
 extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
        __attribute__ ((format (printf, 2, 3)));
 #else
-static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
+static inline void
+__attribute__ ((format (printf, 2, 3)))
+xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
 {
 }
 #endif
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index b38e58d02299..b0aa59e51fd0 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1787,10 +1787,6 @@ init_xfs_fs(void)
        if (error)
                goto out_cleanup_procfs;
-        error = xfs_init_workqueues();
-        if (error)
-                goto out_sysctl_unregister;
        vfs_initquota();
        error = register_filesystem(&xfs_fs_type);
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index e4f9c1b0836c..8ecad5ff9f9b 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -267,6 +267,16 @@ xfs_sync_inode_attr(
        error = xfs_iflush(ip, flags);
+        /*
+         * We don't want to try again on non-blocking flushes that can't run
+         * again immediately. If an inode really must be written, then that's
+         * what the SYNC_WAIT flag is for.
+         */
+        if (error == EAGAIN) {
+                ASSERT(!(flags & SYNC_WAIT));
+                error = 0;
+        }
 out_unlock:
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
        return error;
@@ -926,6 +936,7 @@ restart:
                                        XFS_LOOKUP_BATCH,
                                        XFS_ICI_RECLAIM_TAG);
                        if (!nr_found) {
+                                done = 1;
                                rcu_read_unlock();
                                break;
                        }
@@ -1021,13 +1032,14 @@ xfs_reclaim_inodes(
 static int
 xfs_reclaim_inode_shrink(
        struct shrinker *shrink,
-        int             nr_to_scan,
+        struct shrink_control *sc)
-        gfp_t           gfp_mask)
 {
        struct xfs_mount *mp;
        struct xfs_perag *pag;
        xfs_agnumber_t  ag;
        int             reclaimable;
+        int nr_to_scan = sc->nr_to_scan;
+        gfp_t gfp_mask = sc->gfp_mask;
        mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
        if (nr_to_scan) {
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 2d0bcb479075..d48b7a579ae1 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -1151,44 +1151,7 @@ TRACE_EVENT(xfs_bunmap,
 );
-#define XFS_BUSY_SYNC \
+DECLARE_EVENT_CLASS(xfs_busy_class,
-        { 0,    "async" }, \
-        { 1,    "sync" }
-TRACE_EVENT(xfs_alloc_busy,
-        TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno,
-                 xfs_agblock_t agbno, xfs_extlen_t len, int sync),
-        TP_ARGS(trans, agno, agbno, len, sync),
-        TP_STRUCT__entry(
-                __field(dev_t, dev)
-                __field(struct xfs_trans *, tp)
-                __field(int, tid)
-                __field(xfs_agnumber_t, agno)
-                __field(xfs_agblock_t, agbno)
-                __field(xfs_extlen_t, len)
-                __field(int, sync)
-        ),
-        TP_fast_assign(
-                __entry->dev = trans->t_mountp->m_super->s_dev;
-                __entry->tp = trans;
-                __entry->tid = trans->t_ticket->t_tid;
-                __entry->agno = agno;
-                __entry->agbno = agbno;
-                __entry->len = len;
-                __entry->sync = sync;
-        ),
-        TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s",
-                  MAJOR(__entry->dev), MINOR(__entry->dev),
-                  __entry->tp,
-                  __entry->tid,
-                  __entry->agno,
-                  __entry->agbno,
-                  __entry->len,
-                  __print_symbolic(__entry->sync, XFS_BUSY_SYNC))
-);
-TRACE_EVENT(xfs_alloc_unbusy,
        TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
                 xfs_agblock_t agbno, xfs_extlen_t len),
        TP_ARGS(mp, agno, agbno, len),
@@ -1210,35 +1173,45 @@ TRACE_EVENT(xfs_alloc_unbusy,
                  __entry->agbno,
                  __entry->len)
 );
+#define DEFINE_BUSY_EVENT(name) \
+DEFINE_EVENT(xfs_busy_class, name, \
+        TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+                 xfs_agblock_t agbno, xfs_extlen_t len), \
+        TP_ARGS(mp, agno, agbno, len))
+DEFINE_BUSY_EVENT(xfs_alloc_busy);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
-#define XFS_BUSY_STATES \
+TRACE_EVENT(xfs_alloc_busy_trim,
-        { 0,    "missing" }, \
-        { 1,    "found" }
-TRACE_EVENT(xfs_alloc_busysearch,
        TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
-                 xfs_agblock_t agbno, xfs_extlen_t len, int found),
+                 xfs_agblock_t agbno, xfs_extlen_t len,
-        TP_ARGS(mp, agno, agbno, len, found),
+                 xfs_agblock_t tbno, xfs_extlen_t tlen),
+        TP_ARGS(mp, agno, agbno, len, tbno, tlen),
        TP_STRUCT__entry(
                __field(dev_t, dev)
                __field(xfs_agnumber_t, agno)
                __field(xfs_agblock_t, agbno)
                __field(xfs_extlen_t, len)
-                __field(int, found)
+                __field(xfs_agblock_t, tbno)
+                __field(xfs_extlen_t, tlen)
        ),
        TP_fast_assign(
                __entry->dev = mp->m_super->s_dev;
                __entry->agno = agno;
                __entry->agbno = agbno;
                __entry->len = len;
-                __entry->found = found;
+                __entry->tbno = tbno;
+                __entry->tlen = tlen;
        ),
-        TP_printk("dev %d:%d agno %u agbno %u len %u %s",
+        TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->agno,
                  __entry->agbno,
                  __entry->len,
-                  __print_symbolic(__entry->found, XFS_BUSY_STATES))
+                  __entry->tbno,
+                  __entry->tlen)
 );
 TRACE_EVENT(xfs_trans_commit_lsn,
@@ -1418,7 +1391,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
                  __entry->wasfromfl,
                  __entry->isfl,
                  __entry->userdata,
-                  __entry->firstblock)
+                  (unsigned long long)__entry->firstblock)
 )
 #define DEFINE_ALLOC_EVENT(name) \
@@ -1433,11 +1406,14 @@ DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
 DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
 DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
 DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
 DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
 DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
 DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
 DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
 DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_busy);
 DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
 DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
 DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 69228aa8605a..b94dace4e785 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -60,7 +60,7 @@ STATIC void	xfs_qm_list_destroy(xfs_dqlist_t *);
 STATIC int      xfs_qm_init_quotainos(xfs_mount_t *);
 STATIC int      xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int      xfs_qm_shake(struct shrinker *, int, gfp_t);
+STATIC int      xfs_qm_shake(struct shrinker *, struct shrink_control *);
 static struct shrinker xfs_qm_shaker = {
        .shrink = xfs_qm_shake,
@@ -2009,10 +2009,10 @@ xfs_qm_shake_freelist(
 STATIC int
 xfs_qm_shake(
        struct shrinker *shrink,
-        int             nr_to_scan,
+        struct shrink_control *sc)
-        gfp_t           gfp_mask)
 {
        int     ndqused, nfree, n;
+        gfp_t gfp_mask = sc->gfp_mask;
        if (!kmem_shake_allow(gfp_mask))
                return 0;
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 58632cc17f2d..da0a561ffba2 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -187,7 +187,6 @@ struct xfs_busy_extent {
        xfs_agnumber_t  agno;
        xfs_agblock_t   bno;
        xfs_extlen_t    length;
-        xlog_tid_t      tid;            /* transaction that created this */
 };
 /*
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 27d64d752eab..acdced86413c 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -41,19 +41,13 @@
 #define XFSA_FIXUP_BNO_OK       1
 #define XFSA_FIXUP_CNT_OK       2
-/*
- * Prototypes for per-ag allocation routines
- */
 STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
 STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
 STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
 STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
-        xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
+                xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
+STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *,
-/*
+                xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *);
- * Internal functions.
- */
 /*
 * Lookup the record equal to [bno, len] in the btree given by cur.
@@ -154,19 +148,21 @@ xfs_alloc_compute_aligned(
        xfs_extlen_t    *reslen)        /* result length */
 {
        xfs_agblock_t   bno;
-        xfs_extlen_t    diff;
        xfs_extlen_t    len;
-        if (args->alignment > 1 && foundlen >= args->minlen) {
+        /* Trim busy sections out of found extent */
-                bno = roundup(foundbno, args->alignment);
+        xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len);
-                diff = bno - foundbno;
-                len = diff >= foundlen ? 0 : foundlen - diff;
+        if (args->alignment > 1 && len >= args->minlen) {
+                xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
+                xfs_extlen_t    diff = aligned_bno - bno;
+                *resbno = aligned_bno;
+                *reslen = diff >= len ? 0 : len - diff;
        } else {
-                bno = foundbno;
+                *resbno = bno;
-                len = foundlen;
+                *reslen = len;
        }
-        *resbno = bno;
-        *reslen = len;
 }
 /*
@@ -280,7 +276,6 @@ xfs_alloc_fix_minleft(
                return 1;
        agf = XFS_BUF_TO_AGF(args->agbp);
        diff = be32_to_cpu(agf->agf_freeblks)
-                + be32_to_cpu(agf->agf_flcount)
                - args->len - args->minleft;
        if (diff >= 0)
                return 1;
@@ -541,16 +536,8 @@ xfs_alloc_ag_vextent(
                if (error)
                        return error;
-                /*
+                ASSERT(!xfs_alloc_busy_search(args->mp, args->agno,
-                 * Search the busylist for these blocks and mark the
+                                              args->agbno, args->len));
-                 * transaction as synchronous if blocks are found. This
-                 * avoids the need to block due to a synchronous log
-                 * force to ensure correct ordering as the synchronous
-                 * transaction will guarantee that for us.
-                 */
-                if (xfs_alloc_busy_search(args->mp, args->agno,
-                                        args->agbno, args->len))
-                        xfs_trans_set_sync(args->tp);
        }
        if (!args->isfl) {
@@ -577,14 +564,14 @@ xfs_alloc_ag_vextent_exact(
 {
        xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
        xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
-        xfs_agblock_t   end;    /* end of allocated extent */
        int             error;
        xfs_agblock_t   fbno;   /* start block of found extent */
-        xfs_agblock_t   fend;   /* end block of found extent */
        xfs_extlen_t    flen;   /* length of found extent */
+        xfs_agblock_t   tbno;   /* start block of trimmed extent */
+        xfs_extlen_t    tlen;   /* length of trimmed extent */
+        xfs_agblock_t   tend;   /* end block of trimmed extent */
+        xfs_agblock_t   end;    /* end of allocated extent */
        int             i;      /* success/failure of operation */
-        xfs_agblock_t   maxend; /* end of maximal extent */
-        xfs_agblock_t   minend; /* end of minimal extent */
        xfs_extlen_t    rlen;   /* length of returned extent */
        ASSERT(args->alignment == 1);
@@ -614,14 +601,22 @@ xfs_alloc_ag_vextent_exact(
                goto error0;
        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
        ASSERT(fbno <= args->agbno);
-        minend = args->agbno + args->minlen;
-        maxend = args->agbno + args->maxlen;
-        fend = fbno + flen;
        /*
-         * Give up if the freespace isn't long enough for the minimum request.
+         * Check for overlapping busy extents.
+         */
+        xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen);
+        /*
+         * Give up if the start of the extent is busy, or the freespace isn't
+         * long enough for the minimum request.
         */
-        if (fend < minend)
+        if (tbno > args->agbno)
+                goto not_found;
+        if (tlen < args->minlen)
+                goto not_found;
+        tend = tbno + tlen;
+        if (tend < args->agbno + args->minlen)
                goto not_found;
        /*
@@ -630,14 +625,14 @@ xfs_alloc_ag_vextent_exact(
         *
         * Fix the length according to mod and prod if given.
         */
-        end = XFS_AGBLOCK_MIN(fend, maxend);
+        end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen);
        args->len = end - args->agbno;
        xfs_alloc_fix_len(args);
        if (!xfs_alloc_fix_minleft(args))
                goto not_found;
        rlen = args->len;
-        ASSERT(args->agbno + rlen <= fend);
+        ASSERT(args->agbno + rlen <= tend);
        end = args->agbno + rlen;
        /*
@@ -686,11 +681,11 @@ xfs_alloc_find_best_extent(
        struct xfs_btree_cur    **scur, /* searching cursor */
        xfs_agblock_t           gdiff,  /* difference for search comparison */
        xfs_agblock_t           *sbno,  /* extent found by search */
-        xfs_extlen_t            *slen,
+        xfs_extlen_t            *slen,  /* extent length */
-        xfs_extlen_t            *slena, /* aligned length */
+        xfs_agblock_t           *sbnoa, /* aligned extent found by search */
+        xfs_extlen_t            *slena, /* aligned extent length */
        int                     dir)    /* 0 = search right, 1 = search left */
 {
-        xfs_agblock_t           bno;
        xfs_agblock_t           new;
        xfs_agblock_t           sdiff;
        int                     error;
@@ -708,16 +703,16 @@ xfs_alloc_find_best_extent(
                if (error)
                        goto error0;
                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-                xfs_alloc_compute_aligned(args, *sbno, *slen, &bno, slena);
+                xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
                /*
                 * The good extent is closer than this one.
                 */
                if (!dir) {
-                        if (bno >= args->agbno + gdiff)
+                        if (*sbnoa >= args->agbno + gdiff)
                                goto out_use_good;
                } else {
-                        if (bno <= args->agbno - gdiff)
+                        if (*sbnoa <= args->agbno - gdiff)
                                goto out_use_good;
                }
@@ -729,8 +724,8 @@ xfs_alloc_find_best_extent(
                        xfs_alloc_fix_len(args);
                        sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
-                                                       args->alignment, *sbno,
+                                                       args->alignment, *sbnoa,
-                                                       *slen, &new);
+                                                       *slena, &new);
                        /*
                         * Choose closer size and invalidate other cursor.
@@ -780,7 +775,7 @@ xfs_alloc_ag_vextent_near(
        xfs_agblock_t   gtbnoa;         /* aligned ... */
        xfs_extlen_t    gtdiff;         /* difference to right side entry */
        xfs_extlen_t    gtlen;          /* length of right side entry */
-        xfs_extlen_t    gtlena = 0;     /* aligned ... */
+        xfs_extlen_t    gtlena;         /* aligned ... */
        xfs_agblock_t   gtnew;          /* useful start bno of right side */
        int             error;          /* error code */
        int             i;              /* result code, temporary */
@@ -789,9 +784,10 @@ xfs_alloc_ag_vextent_near(
        xfs_agblock_t   ltbnoa;         /* aligned ... */
        xfs_extlen_t    ltdiff;         /* difference to left side entry */
        xfs_extlen_t    ltlen;          /* length of left side entry */
-        xfs_extlen_t    ltlena = 0;     /* aligned ... */
+        xfs_extlen_t    ltlena;         /* aligned ... */
        xfs_agblock_t   ltnew;          /* useful start bno of left side */
        xfs_extlen_t    rlen;           /* length of returned extent */
+        int             forced = 0;
 #if defined(DEBUG) && defined(__KERNEL__)
        /*
         * Randomly don't execute the first algorithm.
@@ -800,13 +796,20 @@ xfs_alloc_ag_vextent_near(
        dofirst = random32() & 1;
 #endif
+restart:
+        bno_cur_lt = NULL;
+        bno_cur_gt = NULL;
+        ltlen = 0;
+        gtlena = 0;
+        ltlena = 0;
        /*
         * Get a cursor for the by-size btree.
         */
        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
                args->agno, XFS_BTNUM_CNT);
-        ltlen = 0;
-        bno_cur_lt = bno_cur_gt = NULL;
        /*
         * See if there are any free extents as big as maxlen.
         */
@@ -822,11 +825,13 @@ xfs_alloc_ag_vextent_near(
                        goto error0;
                if (i == 0 || ltlen == 0) {
                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+                        trace_xfs_alloc_near_noentry(args);
                        return 0;
                }
                ASSERT(i == 1);
        }
        args->wasfromfl = 0;
        /*
         * First algorithm.
         * If the requested extent is large wrt the freespaces available
@@ -890,7 +895,7 @@ xfs_alloc_ag_vextent_near(
                        if (args->len < blen)
                                continue;
                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
-                                args->alignment, ltbno, ltlen, &ltnew);
+                                args->alignment, ltbnoa, ltlena, &ltnew);
                        if (ltnew != NULLAGBLOCK &&
                            (args->len > blen || ltdiff < bdiff)) {
                                bdiff = ltdiff;
@@ -1042,11 +1047,12 @@ xfs_alloc_ag_vextent_near(
                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
                        xfs_alloc_fix_len(args);
                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
-                                args->alignment, ltbno, ltlen, &ltnew);
+                                args->alignment, ltbnoa, ltlena, &ltnew);
                        error = xfs_alloc_find_best_extent(args,
                                                &bno_cur_lt, &bno_cur_gt,
-                                                ltdiff, &gtbno, &gtlen, &gtlena,
+                                                ltdiff, &gtbno, &gtlen,
+                                                &gtbnoa, &gtlena,
                                                0 /* search right */);
                } else {
                        ASSERT(gtlena >= args->minlen);
@@ -1057,11 +1063,12 @@ xfs_alloc_ag_vextent_near(
                        args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
                        xfs_alloc_fix_len(args);
                        gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
-                                args->alignment, gtbno, gtlen, &gtnew);
+                                args->alignment, gtbnoa, gtlena, &gtnew);
                        error = xfs_alloc_find_best_extent(args,
                                                &bno_cur_gt, &bno_cur_lt,
-                                                gtdiff, &ltbno, &ltlen, &ltlena,
+                                                gtdiff, &ltbno, &ltlen,
+                                                &ltbnoa, &ltlena,
                                                1 /* search left */);
                }
@@ -1073,6 +1080,12 @@ xfs_alloc_ag_vextent_near(
         * If we couldn't get anything, give up.
         */
        if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
+                if (!forced++) {
+                        trace_xfs_alloc_near_busy(args);
+                        xfs_log_force(args->mp, XFS_LOG_SYNC);
+                        goto restart;
+                }
                trace_xfs_alloc_size_neither(args);
                args->agbno = NULLAGBLOCK;
                return 0;
@@ -1107,12 +1120,13 @@ xfs_alloc_ag_vextent_near(
                return 0;
        }
        rlen = args->len;
-        (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno,
+        (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
-                ltlen, &ltnew);
+                                     ltbnoa, ltlena, &ltnew);
        ASSERT(ltnew >= ltbno);
-        ASSERT(ltnew + rlen <= ltbno + ltlen);
+        ASSERT(ltnew + rlen <= ltbnoa + ltlena);
        ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
        args->agbno = ltnew;
        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
                        ltnew, rlen, XFSA_FIXUP_BNO_OK)))
                goto error0;
@@ -1155,26 +1169,35 @@ xfs_alloc_ag_vextent_size(
        int             i;              /* temp status variable */
        xfs_agblock_t   rbno;           /* returned block number */
        xfs_extlen_t    rlen;           /* length of returned extent */
+        int             forced = 0;
+restart:
        /*
         * Allocate and initialize a cursor for the by-size btree.
         */
        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
                args->agno, XFS_BTNUM_CNT);
        bno_cur = NULL;
        /*
         * Look for an entry >= maxlen+alignment-1 blocks.
         */
        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
                        args->maxlen + args->alignment - 1, &i)))
                goto error0;
        /*
-         * If none, then pick up the last entry in the tree unless the
+         * If none or we have busy extents that we cannot allocate from, then
-         * tree is empty.
+         * we have to settle for a smaller extent. In the case that there are
+         * no large extents, this will return the last entry in the tree unless
+         * the tree is empty. In the case that there are only busy large
+         * extents, this will return the largest small extent unless there
+         * are no smaller extents available.
         */
-        if (!i) {
+        if (!i || forced > 1) {
-                if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno,
+                error = xfs_alloc_ag_vextent_small(args, cnt_cur,
-                                &flen, &i)))
+                                                   &fbno, &flen, &i);
+                if (error)
                        goto error0;
                if (i == 0 || flen == 0) {
                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
@@ -1182,22 +1205,56 @@ xfs_alloc_ag_vextent_size(
                        return 0;
                }
                ASSERT(i == 1);
+                xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
+        } else {
+                /*
+                 * Search for a non-busy extent that is large enough.
+                 * If we are at low space, don't check, or if we fall of
+                 * the end of the btree, turn off the busy check and
+                 * restart.
+                 */
+                for (;;) {
+                        error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
+                        if (error)
+                                goto error0;
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                        xfs_alloc_compute_aligned(args, fbno, flen,
+                                                  &rbno, &rlen);
+                        if (rlen >= args->maxlen)
+                                break;
+                        error = xfs_btree_increment(cnt_cur, 0, &i);
+                        if (error)
+                                goto error0;
+                        if (i == 0) {
+                                /*
+                                 * Our only valid extents must have been busy.
+                                 * Make it unbusy by forcing the log out and
+                                 * retrying. If we've been here before, forcing
+                                 * the log isn't making the extents available,
+                                 * which means they have probably been freed in
+                                 * this transaction.  In that case, we have to
+                                 * give up on them and we'll attempt a minlen
+                                 * allocation the next time around.
+                                 */
+                                xfs_btree_del_cursor(cnt_cur,
+                                                     XFS_BTREE_NOERROR);
+                                trace_xfs_alloc_size_busy(args);
+                                if (!forced++)
+                                        xfs_log_force(args->mp, XFS_LOG_SYNC);
+                                goto restart;
+                        }
+                }
        }
-        /*
-         * There's a freespace as big as maxlen+alignment-1, get it.
-         */
-        else {
-                if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i)))
-                        goto error0;
-                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-        }
        /*
         * In the first case above, we got the last entry in the
         * by-size btree.  Now we check to see if the space hits maxlen
         * once aligned; if not, we search left for something better.
         * This can't happen in the second case above.
         */
-        xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
        XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
                        (rlen <= flen && rbno + rlen <= fbno + flen), error0);
@@ -1251,13 +1308,19 @@ xfs_alloc_ag_vextent_size(
         * Fix up the length.
         */
        args->len = rlen;
-        xfs_alloc_fix_len(args);
+        if (rlen < args->minlen) {
-        if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) {
+                if (!forced++) {
-                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
-                trace_xfs_alloc_size_nominleft(args);
+                        trace_xfs_alloc_size_busy(args);
-                args->agbno = NULLAGBLOCK;
+                        xfs_log_force(args->mp, XFS_LOG_SYNC);
-                return 0;
+                        goto restart;
+                }
+                goto out_nominleft;
        }
+        xfs_alloc_fix_len(args);
+        if (!xfs_alloc_fix_minleft(args))
+                goto out_nominleft;
        rlen = args->len;
        XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
        /*
@@ -1287,6 +1350,12 @@ error0:
        if (bno_cur)
                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
        return error;
+out_nominleft:
+        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+        trace_xfs_alloc_size_nominleft(args);
+        args->agbno = NULLAGBLOCK;
+        return 0;
 }
 /*
@@ -1326,6 +1395,9 @@ xfs_alloc_ag_vextent_small(
                if (error)
                        goto error0;
                if (fbno != NULLAGBLOCK) {
+                        xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1,
+                                             args->userdata);
                        if (args->userdata) {
                                xfs_buf_t       *bp;
@@ -1617,18 +1689,6 @@ xfs_free_ag_extent(
        trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
-        /*
-         * Since blocks move to the free list without the coordination
-         * used in xfs_bmap_finish, we can't allow block to be available
-         * for reallocation and non-transaction writing (user data)
-         * until we know that the transaction that moved it to the free
-         * list is permanently on disk.  We track the blocks by declaring
-         * these blocks as "busy"; the busy list is maintained on a per-ag
-         * basis and each transaction records which entries should be removed
-         * when the iclog commits to disk.  If a busy block is allocated,
-         * the iclog is pushed up to the LSN that freed the block.
-         */
-        xfs_alloc_busy_insert(tp, agno, bno, len);
        return 0;
 error0:
@@ -1923,21 +1983,6 @@ xfs_alloc_get_freelist(
        xfs_alloc_log_agf(tp, agbp, logflags);
        *bnop = bno;
-        /*
-         * As blocks are freed, they are added to the per-ag busy list and
-         * remain there until the freeing transaction is committed to disk.
-         * Now that we have allocated blocks, this list must be searched to see
-         * if a block is being reused.  If one is, then the freeing transaction
-         * must be pushed to disk before this transaction.
-         *
-         * We do this by setting the current transaction to a sync transaction
-         * which guarantees that the freeing transaction is on disk before this
-         * transaction. This is done instead of a synchronous log force here so
-         * that we don't sit and wait with the AGF locked in the transaction
-         * during the log force.
-         */
-        if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1))
-                xfs_trans_set_sync(tp);
        return 0;
 }
@@ -2423,105 +2468,13 @@ xfs_free_extent(
        }
        error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
+        if (!error)
+                xfs_alloc_busy_insert(tp, args.agno, args.agbno, len);
 error0:
        xfs_perag_put(args.pag);
        return error;
 }
-/*
- * AG Busy list management
- * The busy list contains block ranges that have been freed but whose
- * transactions have not yet hit disk.  If any block listed in a busy
- * list is reused, the transaction that freed it must be forced to disk
- * before continuing to use the block.
- *
- * xfs_alloc_busy_insert - add to the per-ag busy list
- * xfs_alloc_busy_clear - remove an item from the per-ag busy list
- * xfs_alloc_busy_search - search for a busy extent
- */
-/*
- * Insert a new extent into the busy tree.
- *
- * The busy extent tree is indexed by the start block of the busy extent.
- * there can be multiple overlapping ranges in the busy extent tree but only
- * ever one entry at a given start block. The reason for this is that
- * multi-block extents can be freed, then smaller chunks of that extent
- * allocated and freed again before the first transaction commit is on disk.
- * If the exact same start block is freed a second time, we have to wait for
- * that busy extent to pass out of the tree before the new extent is inserted.
- * There are two main cases we have to handle here.
- *
- * The first case is a transaction that triggers a "free - allocate - free"
- * cycle. This can occur during btree manipulations as a btree block is freed
- * to the freelist, then allocated from the free list, then freed again. In
- * this case, the second extxpnet free is what triggers the duplicate and as
- * such the transaction IDs should match. Because the extent was allocated in
- * this transaction, the transaction must be marked as synchronous. This is
- * true for all cases where the free/alloc/free occurs in the one transaction,
- * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case.
- * This serves to catch violations of the second case quite effectively.
- *
- * The second case is where the free/alloc/free occur in different
- * transactions. In this case, the thread freeing the extent the second time
- * can't mark the extent busy immediately because it is already tracked in a
- * transaction that may be committing.  When the log commit for the existing
- * busy extent completes, the busy extent will be removed from the tree. If we
- * allow the second busy insert to continue using that busy extent structure,
- * it can be freed before this transaction is safely in the log.  Hence our
- * only option in this case is to force the log to remove the existing busy
- * extent from the list before we insert the new one with the current
- * transaction ID.
- *
- * The problem we are trying to avoid in the free-alloc-free in separate
- * transactions is most easily described with a timeline:
- *
- *      Thread 1        Thread 2        Thread 3        xfslogd
- *      xact alloc
- *      free X
- *      mark busy
- *      commit xact
- *      free xact
- *                      xact alloc
- *                      alloc X
- *                      busy search
- *                      mark xact sync
- *                      commit xact
- *                      free xact
- *                      force log
- *                      checkpoint starts
- *                      ....
- *                                      xact alloc
- *                                      free X
- *                                      mark busy
- *                                      finds match
- *                                      *** KABOOM! ***
- *                                      ....
- *                                                      log IO completes
- *                                                      unbusy X
- *                      checkpoint completes
- *
- * By issuing a log force in thread 3 @ "KABOOM", the thread will block until
- * the checkpoint completes, and the busy extent it matched will have been
- * removed from the tree when it is woken. Hence it can then continue safely.
- *
- * However, to ensure this matching process is robust, we need to use the
- * transaction ID for identifying transaction, as delayed logging results in
- * the busy extent and transaction lifecycles being different. i.e. the busy
- * extent is active for a lot longer than the transaction.  Hence the
- * transaction structure can be freed and reallocated, then mark the same
- * extent busy again in the new transaction. In this case the new transaction
- * will have a different tid but can have the same address, and hence we need
- * to check against the tid.
- *
- * Future: for delayed logging, we could avoid the log force if the extent was
- * first freed in the current checkpoint sequence. This, however, requires the
- * ability to pin the current checkpoint in memory until this transaction
- * commits to ensure that both the original free and the current one combine
- * logically into the one checkpoint. If the checkpoint sequences are
- * different, however, we still need to wait on a log force.
- */
 void
 xfs_alloc_busy_insert(
        struct xfs_trans        *tp,
@@ -2533,9 +2486,7 @@ xfs_alloc_busy_insert(
        struct xfs_busy_extent  *busyp;
        struct xfs_perag        *pag;
        struct rb_node          **rbp;
-        struct rb_node          *parent;
+        struct rb_node          *parent = NULL;
-        int                     match;
        new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL);
        if (!new) {
@@ -2544,7 +2495,7 @@ xfs_alloc_busy_insert(
                 * block, make this a synchronous transaction to insure that
                 * the block is not reused before this transaction commits.
                 */
-                trace_xfs_alloc_busy(tp, agno, bno, len, 1);
+                trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len);
                xfs_trans_set_sync(tp);
                return;
        }
@@ -2552,66 +2503,28 @@ xfs_alloc_busy_insert(
        new->agno = agno;
        new->bno = bno;
        new->length = len;
-        new->tid = xfs_log_get_trans_ident(tp);
        INIT_LIST_HEAD(&new->list);
        /* trace before insert to be able to see failed inserts */
-        trace_xfs_alloc_busy(tp, agno, bno, len, 0);
+        trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len);
        pag = xfs_perag_get(tp->t_mountp, new->agno);
-restart:
        spin_lock(&pag->pagb_lock);
        rbp = &pag->pagb_tree.rb_node;
-        parent = NULL;
+        while (*rbp) {
-        busyp = NULL;
-        match = 0;
-        while (*rbp && match >= 0) {
                parent = *rbp;
                busyp = rb_entry(parent, struct xfs_busy_extent, rb_node);
                if (new->bno < busyp->bno) {
-                        /* may overlap, but exact start block is lower */
                        rbp = &(*rbp)->rb_left;
-                        if (new->bno + new->length > busyp->bno)
+                        ASSERT(new->bno + new->length <= busyp->bno);
-                                match = busyp->tid == new->tid ? 1 : -1;
                } else if (new->bno > busyp->bno) {
-                        /* may overlap, but exact start block is higher */
                        rbp = &(*rbp)->rb_right;
-                        if (bno < busyp->bno + busyp->length)
+                        ASSERT(bno >= busyp->bno + busyp->length);
-                                match = busyp->tid == new->tid ? 1 : -1;
                } else {
-                        match = busyp->tid == new->tid ? 1 : -1;
+                        ASSERT(0);
-                        break;
                }
        }
-        if (match < 0) {
-                /* overlap marked busy in different transaction */
-                spin_unlock(&pag->pagb_lock);
-                xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
-                goto restart;
-        }
-        if (match > 0) {
-                /*
-                 * overlap marked busy in same transaction. Update if exact
-                 * start block match, otherwise combine the busy extents into
-                 * a single range.
-                 */
-                if (busyp->bno == new->bno) {
-                        busyp->length = max(busyp->length, new->length);
-                        spin_unlock(&pag->pagb_lock);
-                        ASSERT(tp->t_flags & XFS_TRANS_SYNC);
-                        xfs_perag_put(pag);
-                        kmem_free(new);
-                        return;
-                }
-                rb_erase(&busyp->rb_node, &pag->pagb_tree);
-                new->length = max(busyp->bno + busyp->length,
-                                        new->bno + new->length) -
-                                min(busyp->bno, new->bno);
-                new->bno = min(busyp->bno, new->bno);
-        } else
-                busyp = NULL;
        rb_link_node(&new->rb_node, parent, rbp);
        rb_insert_color(&new->rb_node, &pag->pagb_tree);
@@ -2619,7 +2532,6 @@ restart:
        list_add(&new->list, &tp->t_busy);
        spin_unlock(&pag->pagb_lock);
        xfs_perag_put(pag);
-        kmem_free(busyp);
 }
 /*
@@ -2668,31 +2580,443 @@ xfs_alloc_busy_search(
                }
        }
        spin_unlock(&pag->pagb_lock);
-        trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match);
        xfs_perag_put(pag);
        return match;
 }
+/*
+ * The found free extent [fbno, fend] overlaps part or all of the given busy
+ * extent.  If the overlap covers the beginning, the end, or all of the busy
+ * extent, the overlapping portion can be made unbusy and used for the
+ * allocation.  We can't split a busy extent because we can't modify a
+ * transaction/CIL context busy list, but we can update an entries block
+ * number or length.
+ *
+ * Returns true if the extent can safely be reused, or false if the search
+ * needs to be restarted.
+ */
+STATIC bool
+xfs_alloc_busy_update_extent(
+        struct xfs_mount        *mp,
+        struct xfs_perag        *pag,
+        struct xfs_busy_extent  *busyp,
+        xfs_agblock_t           fbno,
+        xfs_extlen_t            flen,
+        bool                    userdata)
+{
+        xfs_agblock_t           fend = fbno + flen;
+        xfs_agblock_t           bbno = busyp->bno;
+        xfs_agblock_t           bend = bbno + busyp->length;
+        /*
+         * If there is a busy extent overlapping a user allocation, we have
+         * no choice but to force the log and retry the search.
+         *
+         * Fortunately this does not happen during normal operation, but
+         * only if the filesystem is very low on space and has to dip into
+         * the AGFL for normal allocations.
+         */
+        if (userdata)
+                goto out_force_log;
+        if (bbno < fbno && bend > fend) {
+                /*
+                 * Case 1:
+                 *    bbno           bend
+                 *    +BBBBBBBBBBBBBBBBB+
+                 *        +---------+
+                 *        fbno   fend
+                 */
+                /*
+                 * We would have to split the busy extent to be able to track
+                 * it correct, which we cannot do because we would have to
+                 * modify the list of busy extents attached to the transaction
+                 * or CIL context, which is immutable.
+                 *
+                 * Force out the log to clear the busy extent and retry the
+                 * search.
+                 */
+                goto out_force_log;
+        } else if (bbno >= fbno && bend <= fend) {
+                /*
+                 * Case 2:
+                 *    bbno           bend
+                 *    +BBBBBBBBBBBBBBBBB+
+                 *    +-----------------+
+                 *    fbno           fend
+                 *
+                 * Case 3:
+                 *    bbno           bend
+                 *    +BBBBBBBBBBBBBBBBB+
+                 *    +--------------------------+
+                 *    fbno                    fend
+                 *
+                 * Case 4:
+                 *             bbno           bend
+                 *             +BBBBBBBBBBBBBBBBB+
+                 *    +--------------------------+
+                 *    fbno                    fend
+                 *
+                 * Case 5:
+                 *             bbno           bend
+                 *             +BBBBBBBBBBBBBBBBB+
+                 *    +-----------------------------------+
+                 *    fbno                             fend
+                 *
+                 */
+                /*
+                 * The busy extent is fully covered by the extent we are
+                 * allocating, and can simply be removed from the rbtree.
+                 * However we cannot remove it from the immutable list
+                 * tracking busy extents in the transaction or CIL context,
+                 * so set the length to zero to mark it invalid.
+                 *
+                 * We also need to restart the busy extent search from the
+                 * tree root, because erasing the node can rearrange the
+                 * tree topology.
+                 */
+                rb_erase(&busyp->rb_node, &pag->pagb_tree);
+                busyp->length = 0;
+                return false;
+        } else if (fend < bend) {
+                /*
+                 * Case 6:
+                 *              bbno           bend
+                 *             +BBBBBBBBBBBBBBBBB+
+                 *             +---------+
+                 *             fbno   fend
+                 *
+                 * Case 7:
+                 *             bbno           bend
+                 *             +BBBBBBBBBBBBBBBBB+
+                 *    +------------------+
+                 *    fbno            fend
+                 *
+                 */
+                busyp->bno = fend;
+        } else if (bbno < fbno) {
+                /*
+                 * Case 8:
+                 *    bbno           bend
+                 *    +BBBBBBBBBBBBBBBBB+
+                 *        +-------------+
+                 *        fbno       fend
+                 *
+                 * Case 9:
+                 *    bbno           bend
+                 *    +BBBBBBBBBBBBBBBBB+
+                 *        +----------------------+
+                 *        fbno                fend
+                 */
+                busyp->length = fbno - busyp->bno;
+        } else {
+                ASSERT(0);
+        }
+        trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen);
+        return true;
+out_force_log:
+        spin_unlock(&pag->pagb_lock);
+        xfs_log_force(mp, XFS_LOG_SYNC);
+        trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen);
+        spin_lock(&pag->pagb_lock);
+        return false;
+}
+/*
+ * For a given extent [fbno, flen], make sure we can reuse it safely.
+ */
 void
-xfs_alloc_busy_clear(
+xfs_alloc_busy_reuse(
        struct xfs_mount        *mp,
-        struct xfs_busy_extent  *busyp)
+        xfs_agnumber_t          agno,
+        xfs_agblock_t           fbno,
+        xfs_extlen_t            flen,
+        bool                    userdata)
 {
        struct xfs_perag        *pag;
+        struct rb_node          *rbp;
-        trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno,
+        ASSERT(flen > 0);
-                                                busyp->length);
-        ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno,
+        pag = xfs_perag_get(mp, agno);
-                                                busyp->length) == 1);
+        spin_lock(&pag->pagb_lock);
+restart:
+        rbp = pag->pagb_tree.rb_node;
+        while (rbp) {
+                struct xfs_busy_extent *busyp =
+                        rb_entry(rbp, struct xfs_busy_extent, rb_node);
+                xfs_agblock_t   bbno = busyp->bno;
+                xfs_agblock_t   bend = bbno + busyp->length;
-        list_del_init(&busyp->list);
+                if (fbno + flen <= bbno) {
+                        rbp = rbp->rb_left;
+                        continue;
+                } else if (fbno >= bend) {
+                        rbp = rbp->rb_right;
+                        continue;
+                }
-        pag = xfs_perag_get(mp, busyp->agno);
+                if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen,
-        spin_lock(&pag->pagb_lock);
+                                                  userdata))
-        rb_erase(&busyp->rb_node, &pag->pagb_tree);
+                        goto restart;
+        }
        spin_unlock(&pag->pagb_lock);
        xfs_perag_put(pag);
+}
+/*
+ * For a given extent [fbno, flen], search the busy extent list to find a
+ * subset of the extent that is not busy.  If *rlen is smaller than
+ * args->minlen no suitable extent could be found, and the higher level
+ * code needs to force out the log and retry the allocation.
+ */
+STATIC void
+xfs_alloc_busy_trim(
+        struct xfs_alloc_arg    *args,
+        xfs_agblock_t           bno,
+        xfs_extlen_t            len,
+        xfs_agblock_t           *rbno,
+        xfs_extlen_t            *rlen)
+{
+        xfs_agblock_t           fbno;
+        xfs_extlen_t            flen;
+        struct rb_node          *rbp;
+        ASSERT(len > 0);
+        spin_lock(&args->pag->pagb_lock);
+restart:
+        fbno = bno;
+        flen = len;
+        rbp = args->pag->pagb_tree.rb_node;
+        while (rbp && flen >= args->minlen) {
+                struct xfs_busy_extent *busyp =
+                        rb_entry(rbp, struct xfs_busy_extent, rb_node);
+                xfs_agblock_t   fend = fbno + flen;
+                xfs_agblock_t   bbno = busyp->bno;
+                xfs_agblock_t   bend = bbno + busyp->length;
+                if (fend <= bbno) {
+                        rbp = rbp->rb_left;
+                        continue;
+                } else if (fbno >= bend) {
+                        rbp = rbp->rb_right;
+                        continue;
+                }
+                /*
+                 * If this is a metadata allocation, try to reuse the busy
+                 * extent instead of trimming the allocation.
+                 */
+                if (!args->userdata) {
+                        if (!xfs_alloc_busy_update_extent(args->mp, args->pag,
+                                                          busyp, fbno, flen,
+                                                          false))
+                                goto restart;
+                        continue;
+                }
+                if (bbno <= fbno) {
+                        /* start overlap */
+                        /*
+                         * Case 1:
+                         *    bbno           bend
+                         *    +BBBBBBBBBBBBBBBBB+
+                         *        +---------+
+                         *        fbno   fend
+                         *
+                         * Case 2:
+                         *    bbno           bend
+                         *    +BBBBBBBBBBBBBBBBB+
+                         *    +-------------+
+                         *    fbno       fend
+                         *
+                         * Case 3:
+                         *    bbno           bend
+                         *    +BBBBBBBBBBBBBBBBB+
+                         *        +-------------+
+                         *        fbno       fend
+                         *
+                         * Case 4:
+                         *    bbno           bend
+                         *    +BBBBBBBBBBBBBBBBB+
+                         *    +-----------------+
+                         *    fbno           fend
+                         *
+                         * No unbusy region in extent, return failure.
+                         */
+                        if (fend <= bend)
+                                goto fail;
+                        /*
+                         * Case 5:
+                         *    bbno           bend
+                         *    +BBBBBBBBBBBBBBBBB+
+                         *        +----------------------+
+                         *        fbno                fend
+                         *
+                         * Case 6:
+                         *    bbno           bend
+                         *    +BBBBBBBBBBBBBBBBB+
+                         *    +--------------------------+
+                         *    fbno                    fend
+                         *
+                         * Needs to be trimmed to:
+                         *                       +-------+
+                         *                       fbno fend
+                         */
+                        fbno = bend;
+                } else if (bend >= fend) {
+                        /* end overlap */
+                        /*
+                         * Case 7:
+                         *             bbno           bend
+                         *             +BBBBBBBBBBBBBBBBB+
+                         *    +------------------+
+                         *    fbno            fend
+                         *
+                         * Case 8:
+                         *             bbno           bend
+                         *             +BBBBBBBBBBBBBBBBB+
+                         *    +--------------------------+
+                         *    fbno                    fend
+                         *
+                         * Needs to be trimmed to:
+                         *    +-------+
+                         *    fbno fend
+                         */
+                        fend = bbno;
+                } else {
+                        /* middle overlap */
+                        /*
+                         * Case 9:
+                         *             bbno           bend
+                         *             +BBBBBBBBBBBBBBBBB+
+                         *    +-----------------------------------+
+                         *    fbno                             fend
+                         *
+                         * Can be trimmed to:
+                         *    +-------+        OR         +-------+
+                         *    fbno fend                   fbno fend
+                         *
+                         * Backward allocation leads to significant
+                         * fragmentation of directories, which degrades
+                         * directory performance, therefore we always want to
+                         * choose the option that produces forward allocation
+                         * patterns.
+                         * Preferring the lower bno extent will make the next
+                         * request use "fend" as the start of the next
+                         * allocation;  if the segment is no longer busy at
+                         * that point, we'll get a contiguous allocation, but
+                         * even if it is still busy, we will get a forward
+                         * allocation.
+                         * We try to avoid choosing the segment at "bend",
+                         * because that can lead to the next allocation
+                         * taking the segment at "fbno", which would be a
+                         * backward allocation.  We only use the segment at
+                         * "fbno" if it is much larger than the current
+                         * requested size, because in that case there's a
+                         * good chance subsequent allocations will be
+                         * contiguous.
+                         */
+                        if (bbno - fbno >= args->maxlen) {
+                                /* left candidate fits perfect */
+                                fend = bbno;
+                        } else if (fend - bend >= args->maxlen * 4) {
+                                /* right candidate has enough free space */
+                                fbno = bend;
+                        } else if (bbno - fbno >= args->minlen) {
+                                /* left candidate fits minimum requirement */
+                                fend = bbno;
+                        } else {
+                                goto fail;
+                        }
+                }
+                flen = fend - fbno;
+        }
+        spin_unlock(&args->pag->pagb_lock);
+        if (fbno != bno || flen != len) {
+                trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len,
+                                          fbno, flen);
+        }
+        *rbno = fbno;
+        *rlen = flen;
+        return;
+fail:
+        /*
+         * Return a zero extent length as failure indications.  All callers
+         * re-check if the trimmed extent satisfies the minlen requirement.
+         */
+        spin_unlock(&args->pag->pagb_lock);
+        trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
+        *rbno = fbno;
+        *rlen = 0;
+}
+static void
+xfs_alloc_busy_clear_one(
+        struct xfs_mount        *mp,
+        struct xfs_perag        *pag,
+        struct xfs_busy_extent  *busyp)
+{
+        if (busyp->length) {
+                trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno,
+                                                busyp->length);
+                rb_erase(&busyp->rb_node, &pag->pagb_tree);
+        }
+        list_del_init(&busyp->list);
        kmem_free(busyp);
 }
+void
+xfs_alloc_busy_clear(
+        struct xfs_mount        *mp,
+        struct list_head        *list)
+{
+        struct xfs_busy_extent  *busyp, *n;
+        struct xfs_perag        *pag = NULL;
+        xfs_agnumber_t          agno = NULLAGNUMBER;
+        list_for_each_entry_safe(busyp, n, list, list) {
+                if (busyp->agno != agno) {
+                        if (pag) {
+                                spin_unlock(&pag->pagb_lock);
+                                xfs_perag_put(pag);
+                        }
+                        pag = xfs_perag_get(mp, busyp->agno);
+                        spin_lock(&pag->pagb_lock);
+                        agno = busyp->agno;
+                }
+                xfs_alloc_busy_clear_one(mp, pag, busyp);
+        }
+        if (pag) {
+                spin_unlock(&pag->pagb_lock);
+                xfs_perag_put(pag);
+        }
+}
+/*
+ * Callback for list_sort to sort busy extents by the AG they reside in.
+ */
+int
+xfs_busy_extent_ag_cmp(
+        void                    *priv,
+        struct list_head        *a,
+        struct list_head        *b)
+{
+        return container_of(a, struct xfs_busy_extent, list)->agno -
+                container_of(b, struct xfs_busy_extent, list)->agno;
+}
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index d0b3bc72005b..240ad288f2f9 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -140,11 +140,24 @@ xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno,
        xfs_agblock_t bno, xfs_extlen_t len);
 void
-xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp);
+xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list);
 int
 xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
        xfs_agblock_t bno, xfs_extlen_t len);
+void
+xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno,
+        xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata);
+int
+xfs_busy_extent_ag_cmp(void *priv, struct list_head *a, struct list_head *b);
+static inline void xfs_alloc_busy_sort(struct list_head *list)
+{
+        list_sort(NULL, list, xfs_busy_extent_ag_cmp);
+}
 #endif  /* __KERNEL__ */
 /*
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 3916925e2584..8b469d53599f 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -95,6 +95,8 @@ xfs_allocbt_alloc_block(
                return 0;
        }
+        xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
        xfs_trans_agbtree_delta(cur->bc_tp, 1);
        new->s = cpu_to_be32(bno);
@@ -118,17 +120,6 @@ xfs_allocbt_free_block(
        if (error)
                return error;
-        /*
-         * Since blocks move to the free list without the coordination used in
-         * xfs_bmap_finish, we can't allow block to be available for
-         * reallocation and non-transaction writing (user data) until we know
-         * that the transaction that moved it to the free list is permanently
-         * on disk. We track the blocks by declaring these blocks as "busy";
-         * the busy list is maintained on a per-ag basis and each transaction
-         * records which entries should be removed when the iclog commits to
-         * disk. If a busy block is allocated, the iclog is pushed up to the
-         * LSN that freed the block.
-         */
        xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
        xfs_trans_agbtree_delta(cur->bc_tp, -1);
        return 0;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index be628677c288..9a84a85c03b1 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -202,7 +202,7 @@ xfs_swap_extents(
        xfs_inode_t     *tip,   /* tmp inode */
        xfs_swapext_t   *sxp)
 {
-        xfs_mount_t     *mp;
+        xfs_mount_t     *mp = ip->i_mount;
        xfs_trans_t     *tp;
        xfs_bstat_t     *sbp = &sxp->sx_stat;
        xfs_ifork_t     *tempifp, *ifp, *tifp;
@@ -212,16 +212,12 @@ xfs_swap_extents(
        int             taforkblks = 0;
        __uint64_t      tmp;
-        mp = ip->i_mount;
        tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
        if (!tempifp) {
                error = XFS_ERROR(ENOMEM);
                goto out;
        }
-        sbp = &sxp->sx_stat;
        /*
         * we have to do two separate lock calls here to keep lockdep
         * happy. If we try to get all the locks in one call, lock will
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a37480a6e023..c8e3349c287c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1354,7 +1354,7 @@ xfs_itruncate_start(
                return 0;
        }
        last_byte = xfs_file_last_byte(ip);
-        trace_xfs_itruncate_start(ip, flags, new_size, toss_start, last_byte);
+        trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte);
        if (last_byte > toss_start) {
                if (flags & XFS_ITRUNC_DEFINITE) {
                        xfs_tosspages(ip, toss_start,
@@ -1470,7 +1470,7 @@ xfs_itruncate_finish(
         * file but the log buffers containing the free and reallocation
         * don't, then we'd end up with garbage in the blocks being freed.
         * As long as we make the new_size permanent before actually
-         * freeing any blocks it doesn't matter if they get writtten to.
+         * freeing any blocks it doesn't matter if they get written to.
         *
         * The callers must signal into us whether or not the size
         * setting here must be synchronous.  There are a few cases
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 576fdfe81d60..09983a3344a5 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -970,7 +970,6 @@ xfs_iflush_abort(
 {
        xfs_inode_log_item_t    *iip = ip->i_itemp;
-        iip = ip->i_itemp;
        if (iip) {
                struct xfs_ail  *ailp = iip->ili_item.li_ailp;
                if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index b612ce4520ae..211930246f20 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1449,6 +1449,13 @@ xlog_dealloc_log(xlog_t *log)
        xlog_cil_destroy(log);
+        /*
+         * always need to ensure that the extra buffer does not point to memory
+         * owned by another log buffer before we free it.
+         */
+        xfs_buf_set_empty(log->l_xbuf, log->l_iclog_size);
+        xfs_buf_free(log->l_xbuf);
        iclog = log->l_iclog;
        for (i=0; i<log->l_iclog_bufs; i++) {
                xfs_buf_free(iclog->ic_bp);
@@ -1458,7 +1465,6 @@ xlog_dealloc_log(xlog_t *log)
        }
        spinlock_destroy(&log->l_icloglock);
-        xfs_buf_free(log->l_xbuf);
        log->l_mp->m_log = NULL;
        kmem_free(log);
 }       /* xlog_dealloc_log */
@@ -3248,13 +3254,6 @@ xfs_log_ticket_get(
        return ticket;
 }
-xlog_tid_t
-xfs_log_get_trans_ident(
-        struct xfs_trans        *tp)
-{
-        return tp->t_ticket->t_tid;
-}
 /*
 * Allocate and initialise a new log ticket.
 */
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 3bd3291ef8d2..78c9039994af 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -189,8 +189,6 @@ void	  xlog_iodone(struct xfs_buf *);
 struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
 void      xfs_log_ticket_put(struct xlog_ticket *ticket);
-xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
 void    xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
                                struct xfs_log_vec *log_vector,
                                xfs_lsn_t *commit_lsn, int flags);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 9ca59be08977..7d56e88a3f0e 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -361,13 +361,12 @@ xlog_cil_committed(
        int     abort)
 {
        struct xfs_cil_ctx      *ctx = args;
-        struct xfs_busy_extent  *busyp, *n;
        xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
                                        ctx->start_lsn, abort);
-        list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list)
+        xfs_alloc_busy_sort(&ctx->busy_extents);
-                xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp);
+        xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, &ctx->busy_extents);
        spin_lock(&ctx->cil->xc_cil_lock);
        list_del(&ctx->committing);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 5864850e9e34..2d3b6a498d63 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -146,6 +146,8 @@ static inline uint xlog_get_client_id(__be32 i)
                                           shutdown */
 #define XLOG_TAIL_WARN          0x10    /* log tail verify warning issued */
+typedef __uint32_t xlog_tid_t;
 #ifdef __KERNEL__
 /*
 * Below are states for covering allocation transactions.
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 5cc464a17c93..04142caedb2b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -205,6 +205,35 @@ xlog_bread(
 }
 /*
+ * Read at an offset into the buffer. Returns with the buffer in it's original
+ * state regardless of the result of the read.
+ */
+STATIC int
+xlog_bread_offset(
+        xlog_t          *log,
+        xfs_daddr_t     blk_no,         /* block to read from */
+        int             nbblks,         /* blocks to read */
+        xfs_buf_t       *bp,
+        xfs_caddr_t     offset)
+{
+        xfs_caddr_t     orig_offset = XFS_BUF_PTR(bp);
+        int             orig_len = bp->b_buffer_length;
+        int             error, error2;
+        error = XFS_BUF_SET_PTR(bp, offset, BBTOB(nbblks));
+        if (error)
+                return error;
+        error = xlog_bread_noalign(log, blk_no, nbblks, bp);
+        /* must reset buffer pointer even on error */
+        error2 = XFS_BUF_SET_PTR(bp, orig_offset, orig_len);
+        if (error)
+                return error;
+        return error2;
+}
+/*
 * Write out the buffer at the given block for the given number of blocks.
 * The buffer is kept locked across the write and is returned locked.
 * This can only be used for synchronous log writes.
@@ -1229,20 +1258,12 @@ xlog_write_log_records(
                 */
                ealign = round_down(end_block, sectbb);
                if (j == 0 && (start_block + endcount > ealign)) {
-                        offset = XFS_BUF_PTR(bp);
+                        offset = XFS_BUF_PTR(bp) + BBTOB(ealign - start_block);
-                        balign = BBTOB(ealign - start_block);
+                        error = xlog_bread_offset(log, ealign, sectbb,
-                        error = XFS_BUF_SET_PTR(bp, offset + balign,
+                                                        bp, offset);
-                                                BBTOB(sectbb));
                        if (error)
                                break;
-                        error = xlog_bread_noalign(log, ealign, sectbb, bp);
-                        if (error)
-                                break;
-                        error = XFS_BUF_SET_PTR(bp, offset, bufblks);
-                        if (error)
-                                break;
                }
                offset = xlog_align(log, start_block, endcount, bp);
@@ -3448,19 +3469,9 @@ xlog_do_recovery_pass(
                                 *   - order is important.
                                 */
                                wrapped_hblks = hblks - split_hblks;
-                                error = XFS_BUF_SET_PTR(hbp,
+                                error = xlog_bread_offset(log, 0,
-                                                offset + BBTOB(split_hblks),
+                                                wrapped_hblks, hbp,
-                                                BBTOB(hblks - split_hblks));
+                                                offset + BBTOB(split_hblks));
-                                if (error)
-                                        goto bread_err2;
-                                error = xlog_bread_noalign(log, 0,
-                                                           wrapped_hblks, hbp);
-                                if (error)
-                                        goto bread_err2;
-                                error = XFS_BUF_SET_PTR(hbp, offset,
-                                                        BBTOB(hblks));
                                if (error)
                                        goto bread_err2;
                        }
@@ -3511,19 +3522,9 @@ xlog_do_recovery_pass(
                                 *   _first_, then the log start (LR header end)
                                 *   - order is important.
                                 */
-                                error = XFS_BUF_SET_PTR(dbp,
+                                error = xlog_bread_offset(log, 0,
-                                                offset + BBTOB(split_bblks),
+                                                bblks - split_bblks, hbp,
-                                                BBTOB(bblks - split_bblks));
+                                                offset + BBTOB(split_bblks));
-                                if (error)
-                                        goto bread_err2;
-                                error = xlog_bread_noalign(log, wrapped_hblks,
-                                                bblks - split_bblks,
-                                                dbp);
-                                if (error)
-                                        goto bread_err2;
-                                error = XFS_BUF_SET_PTR(dbp, offset, h_size);
                                if (error)
                                        goto bread_err2;
                        }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index bb3f9a7b24ed..b49b82363d20 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1900,7 +1900,7 @@ xfs_mod_incore_sb_batch(
        uint                    nmsb,
        int                     rsvd)
 {
-        xfs_mod_sb_t            *msbp = &msb[0];
+        xfs_mod_sb_t            *msbp;
        int                     error = 0;
        /*
@@ -1910,7 +1910,7 @@ xfs_mod_incore_sb_batch(
         * changes will be atomic.
         */
        spin_lock(&mp->m_sb_lock);
-        for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) {
+        for (msbp = msb; msbp < (msb + nmsb); msbp++) {
                ASSERT(msbp->msb_field < XFS_SBS_ICOUNT ||
                       msbp->msb_field > XFS_SBS_FDBLOCKS);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 76922793f64f..d1f24858ccc4 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -608,10 +608,8 @@ STATIC void
 xfs_trans_free(
        struct xfs_trans        *tp)
 {
-        struct xfs_busy_extent  *busyp, *n;
+        xfs_alloc_busy_sort(&tp->t_busy);
+        xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy);
-        list_for_each_entry_safe(busyp, n, &tp->t_busy, list)
-                xfs_alloc_busy_clear(tp->t_mountp, busyp);
        atomic_dec(&tp->t_mountp->m_active_trans);
        xfs_trans_free_dqinfo(tp);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index acdb92f14d51..5fc2380092c8 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -346,20 +346,23 @@ xfs_ail_delete(
 */
 STATIC void
 xfs_ail_worker(
-        struct work_struct *work)
+        struct work_struct      *work)
 {
-        struct xfs_ail  *ailp = container_of(to_delayed_work(work),
+        struct xfs_ail          *ailp = container_of(to_delayed_work(work),
                                        struct xfs_ail, xa_work);
-        long            tout;
+        xfs_mount_t             *mp = ailp->xa_mount;
-        xfs_lsn_t       target =  ailp->xa_target;
-        xfs_lsn_t       lsn;
-        xfs_log_item_t  *lip;
-        int             flush_log, count, stuck;
-        xfs_mount_t     *mp = ailp->xa_mount;
        struct xfs_ail_cursor   *cur = &ailp->xa_cursors;
-        int             push_xfsbufd = 0;
+        xfs_log_item_t          *lip;
+        xfs_lsn_t               lsn;
+        xfs_lsn_t               target;
+        long                    tout = 10;
+        int                     flush_log = 0;
+        int                     stuck = 0;
+        int                     count = 0;
+        int                     push_xfsbufd = 0;
        spin_lock(&ailp->xa_lock);
+        target = ailp->xa_target;
        xfs_trans_ail_cursor_init(ailp, cur);
        lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn);
        if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
@@ -368,8 +371,7 @@ xfs_ail_worker(
                 */
                xfs_trans_ail_cursor_done(ailp, cur);
                spin_unlock(&ailp->xa_lock);
-                ailp->xa_last_pushed_lsn = 0;
+                goto out_done;
-                return;
        }
        XFS_STATS_INC(xs_push_ail);
@@ -386,8 +388,7 @@ xfs_ail_worker(
         * lots of contention on the AIL lists.
         */
        lsn = lip->li_lsn;
-        flush_log = stuck = count = 0;
+        while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) {
-        while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) {
                int     lock_result;
                /*
                 * If we can lock the item without sleeping, unlock the AIL
@@ -480,21 +481,25 @@ xfs_ail_worker(
        }
        /* assume we have more work to do in a short while */
-        tout = 10;
+out_done:
        if (!count) {
                /* We're past our target or empty, so idle */
                ailp->xa_last_pushed_lsn = 0;
                /*
-                 * Check for an updated push target before clearing the
+                 * We clear the XFS_AIL_PUSHING_BIT first before checking
-                 * XFS_AIL_PUSHING_BIT. If the target changed, we've got more
+                 * whether the target has changed. If the target has changed,
-                 * work to do. Wait a bit longer before starting that work.
+                 * this pushes the requeue race directly onto the result of the
+                 * atomic test/set bit, so we are guaranteed that either the
+                 * the pusher that changed the target or ourselves will requeue
+                 * the work (but not both).
                 */
+                clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags);
                smp_rmb();
-                if (ailp->xa_target == target) {
+                if (XFS_LSN_CMP(ailp->xa_target, target) == 0 ||
-                        clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags);
+                    test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
                        return;
-                }
                tout = 50;
        } else if (XFS_LSN_CMP(lsn, target) >= 0) {
                /*
@@ -553,7 +558,7 @@ xfs_ail_push(
         * the XFS_AIL_PUSHING_BIT.
         */
        smp_wmb();
-        ailp->xa_target = threshold_lsn;
+        xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn);
        if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
                queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0);
 }
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 26d1867d8156..65584b55607d 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -73,8 +73,6 @@ typedef	__int32_t	xfs_tid_t;	/* transaction identifier */
 typedef __uint32_t      xfs_dablk_t;    /* dir/attr block number (in file) */
 typedef __uint32_t      xfs_dahash_t;   /* dir/attr hash value */
-typedef __uint32_t      xlog_tid_t;     /* transaction ID type */
 /*
 * These types are 64 bits on disk but are either 32 or 64 bits in memory.
 * Disk based types: