44 files changed, 792 insertions, 470 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 3263084eef9..4a551af6f3f 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -30,7 +30,7 @@ config COMPAT_BINFMT_ELF
 config BINFMT_ELF_FDPIC
        bool "Kernel support for FDPIC ELF binaries"
        default y
-        depends on (FRV || BLACKFIN)
+        depends on (FRV || BLACKFIN || (SUPERH32 && !MMU))
        help
          ELF FDPIC binaries are based on ELF, but allow the individual load
          segments of a binary to be located in memory independently of each
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 2f5503902c3..78db4953a80 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -232,7 +232,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
        }
        mntget(newmnt);
-        err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts);
+        err = do_add_mount(newmnt, &nd->path, MNT_SHRINKABLE, &afs_vfsmounts);
        switch (err) {
        case 0:
                path_put(&nd->path);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index fdeadab2f18..80c1f952ef7 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -470,6 +470,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
        char __user *u_platform, *p;
        long hwcap;
        int loop;
+        int nr; /* reset for each csp adjustment */
        /* we're going to shovel a whole load of stuff onto the stack */
 #ifdef CONFIG_MMU
@@ -542,10 +543,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
        /* force 16 byte _final_ alignment here for generality */
 #define DLINFO_ITEMS 13
-        nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0);
+        nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0) + AT_VECTOR_SIZE_ARCH;
-#ifdef DLINFO_ARCH_ITEMS
-        nitems += DLINFO_ARCH_ITEMS;
-#endif
        csp = sp;
        sp -= nitems * 2 * sizeof(unsigned long);
@@ -557,39 +555,46 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
        sp -= sp & 15UL;
        /* put the ELF interpreter info on the stack */
-#define NEW_AUX_ENT(nr, id, val)                                        \
+#define NEW_AUX_ENT(id, val)                                            \
        do {                                                            \
                struct { unsigned long _id, _val; } __user *ent;        \
                                                                        \
                ent = (void __user *) csp;                              \
                __put_user((id), &ent[nr]._id);                         \
                __put_user((val), &ent[nr]._val);                       \
+                nr++;                                                   \
        } while (0)
+        nr = 0;
        csp -= 2 * sizeof(unsigned long);
-        NEW_AUX_ENT(0, AT_NULL, 0);
+        NEW_AUX_ENT(AT_NULL, 0);
        if (k_platform) {
+                nr = 0;
                csp -= 2 * sizeof(unsigned long);
-                NEW_AUX_ENT(0, AT_PLATFORM,
+                NEW_AUX_ENT(AT_PLATFORM,
                            (elf_addr_t) (unsigned long) u_platform);
        }
+        nr = 0;
        csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long);
-        NEW_AUX_ENT( 0, AT_HWCAP,       hwcap);
+        NEW_AUX_ENT(AT_HWCAP,   hwcap);
-        NEW_AUX_ENT( 1, AT_PAGESZ,      PAGE_SIZE);
+        NEW_AUX_ENT(AT_PAGESZ,  PAGE_SIZE);
-        NEW_AUX_ENT( 2, AT_CLKTCK,      CLOCKS_PER_SEC);
+        NEW_AUX_ENT(AT_CLKTCK,  CLOCKS_PER_SEC);
-        NEW_AUX_ENT( 3, AT_PHDR,        exec_params->ph_addr);
+        NEW_AUX_ENT(AT_PHDR,    exec_params->ph_addr);
-        NEW_AUX_ENT( 4, AT_PHENT,       sizeof(struct elf_phdr));
+        NEW_AUX_ENT(AT_PHENT,   sizeof(struct elf_phdr));
-        NEW_AUX_ENT( 5, AT_PHNUM,       exec_params->hdr.e_phnum);
+        NEW_AUX_ENT(AT_PHNUM,   exec_params->hdr.e_phnum);
-        NEW_AUX_ENT( 6, AT_BASE,        interp_params->elfhdr_addr);
+        NEW_AUX_ENT(AT_BASE,    interp_params->elfhdr_addr);
-        NEW_AUX_ENT( 7, AT_FLAGS,       0);
+        NEW_AUX_ENT(AT_FLAGS,   0);
-        NEW_AUX_ENT( 8, AT_ENTRY,       exec_params->entry_addr);
+        NEW_AUX_ENT(AT_ENTRY,   exec_params->entry_addr);
-        NEW_AUX_ENT( 9, AT_UID,         (elf_addr_t) current->uid);
+        NEW_AUX_ENT(AT_UID,     (elf_addr_t) current->uid);
-        NEW_AUX_ENT(10, AT_EUID,        (elf_addr_t) current->euid);
+        NEW_AUX_ENT(AT_EUID,    (elf_addr_t) current->euid);
-        NEW_AUX_ENT(11, AT_GID,         (elf_addr_t) current->gid);
+        NEW_AUX_ENT(AT_GID,     (elf_addr_t) current->gid);
-        NEW_AUX_ENT(12, AT_EGID,        (elf_addr_t) current->egid);
+        NEW_AUX_ENT(AT_EGID,    (elf_addr_t) current->egid);
 #ifdef ARCH_DLINFO
+        nr = 0;
+        csp -= AT_VECTOR_SIZE_ARCH * 2 * sizeof(unsigned long);
        /* ARCH_DLINFO must come last so platform specific code can enforce
         * special alignment requirements on the AUXV if necessary (eg. PPC).
         */
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 63e2ee63058..c3e174b35fe 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -705,7 +705,6 @@ void __init bio_integrity_init_slab(void)
        bio_integrity_slab = KMEM_CACHE(bio_integrity_payload,
                                        SLAB_HWCACHE_ALIGN|SLAB_PANIC);
 }
-EXPORT_SYMBOL(bio_integrity_init_slab);
 static int __init integrity_init(void)
 {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index dcf37cada36..aff54219e04 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -941,8 +941,10 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
         * hooks: /n/, see "layering violations".
         */
        ret = devcgroup_inode_permission(bdev->bd_inode, perm);
-        if (ret != 0)
+        if (ret != 0) {
+                bdput(bdev);
                return ret;
+        }
        ret = -ENXIO;
        file->f_mapping = bdev->bd_inode->i_mapping;
@@ -1234,6 +1236,7 @@ fail:
        bdev = ERR_PTR(error);
        goto out;
 }
+EXPORT_SYMBOL(lookup_bdev);
 /**
 * open_bdev_excl  -  open a block device by name and set it up for use
diff --git a/fs/buffer.c b/fs/buffer.c
index f9580501963..4dbe52948e8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -580,7 +580,7 @@ EXPORT_SYMBOL(mark_buffer_async_write);
 /*
 * The buffer's backing address_space's private_lock must be held
 */
-static inline void __remove_assoc_queue(struct buffer_head *bh)
+static void __remove_assoc_queue(struct buffer_head *bh)
 {
        list_del_init(&bh->b_assoc_buffers);
        WARN_ON(!bh->b_assoc_map);
@@ -2096,6 +2096,52 @@ int generic_write_end(struct file *file, struct address_space *mapping,
 EXPORT_SYMBOL(generic_write_end);
 /*
+ * block_is_partially_uptodate checks whether buffers within a page are
+ * uptodate or not.
+ *
+ * Returns true if all buffers which correspond to a file portion
+ * we want to read are uptodate.
+ */
+int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
+                                        unsigned long from)
+{
+        struct inode *inode = page->mapping->host;
+        unsigned block_start, block_end, blocksize;
+        unsigned to;
+        struct buffer_head *bh, *head;
+        int ret = 1;
+        if (!page_has_buffers(page))
+                return 0;
+        blocksize = 1 << inode->i_blkbits;
+        to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
+        to = from + to;
+        if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
+                return 0;
+        head = page_buffers(page);
+        bh = head;
+        block_start = 0;
+        do {
+                block_end = block_start + blocksize;
+                if (block_end > from && block_start < to) {
+                        if (!buffer_uptodate(bh)) {
+                                ret = 0;
+                                break;
+                        }
+                        if (block_end >= to)
+                                break;
+                }
+                block_start = block_end;
+                bh = bh->b_this_page;
+        } while (bh != head);
+        return ret;
+}
+EXPORT_SYMBOL(block_is_partially_uptodate);
+/*
 * Generic "read page" function for block devices that have the normal
 * get_block functionality. This is most of the block device filesystems.
 * Reads the page asynchronously --- the unlock_buffer() and
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index d82374c9e32..d2c8eef84f3 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -226,7 +226,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
        int err;
        mntget(newmnt);
-        err = do_add_mount(newmnt, nd, nd->path.mnt->mnt_flags, mntlist);
+        err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist);
        switch (err) {
        case 0:
                path_put(&nd->path);
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index da015c12e3e..762d287123c 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -49,8 +49,10 @@ struct configfs_dirent {
 #define CONFIGFS_USET_DEFAULT   0x0080
 #define CONFIGFS_USET_DROPPING  0x0100
 #define CONFIGFS_USET_IN_MKDIR  0x0200
+#define CONFIGFS_USET_CREATING  0x0400
 #define CONFIGFS_NOT_PINNED     (CONFIGFS_ITEM_ATTR)
+extern struct mutex configfs_symlink_mutex;
 extern spinlock_t configfs_dirent_lock;
 extern struct vfsmount * configfs_mount;
@@ -66,6 +68,7 @@ extern void configfs_inode_exit(void);
 extern int configfs_create_file(struct config_item *, const struct configfs_attribute *);
 extern int configfs_make_dirent(struct configfs_dirent *,
                                struct dentry *, void *, umode_t, int);
+extern int configfs_dirent_is_ready(struct configfs_dirent *);
 extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int);
 extern void configfs_hash_and_remove(struct dentry * dir, const char * name);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 179589be063..7a8db78a91d 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -185,7 +185,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
        error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
        if (!error)
                error = configfs_make_dirent(p->d_fsdata, d, k, mode,
-                                             CONFIGFS_DIR);
+                                             CONFIGFS_DIR | CONFIGFS_USET_CREATING);
        if (!error) {
                error = configfs_create(d, mode, init_dir);
                if (!error) {
@@ -209,6 +209,9 @@ static int create_dir(struct config_item * k, struct dentry * p,
 *      configfs_create_dir - create a directory for an config_item.
 *      @item:          config_itemwe're creating directory for.
 *      @dentry:        config_item's dentry.
+ *
+ *      Note: user-created entries won't be allowed under this new directory
+ *      until it is validated by configfs_dir_set_ready()
 */
 static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
@@ -231,6 +234,44 @@ static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
        return error;
 }
+/*
+ * Allow userspace to create new entries under a new directory created with
+ * configfs_create_dir(), and under all of its chidlren directories recursively.
+ * @sd          configfs_dirent of the new directory to validate
+ *
+ * Caller must hold configfs_dirent_lock.
+ */
+static void configfs_dir_set_ready(struct configfs_dirent *sd)
+{
+        struct configfs_dirent *child_sd;
+        sd->s_type &= ~CONFIGFS_USET_CREATING;
+        list_for_each_entry(child_sd, &sd->s_children, s_sibling)
+                if (child_sd->s_type & CONFIGFS_USET_CREATING)
+                        configfs_dir_set_ready(child_sd);
+}
+/*
+ * Check that a directory does not belong to a directory hierarchy being
+ * attached and not validated yet.
+ * @sd          configfs_dirent of the directory to check
+ *
+ * @return      non-zero iff the directory was validated
+ *
+ * Note: takes configfs_dirent_lock, so the result may change from false to true
+ * in two consecutive calls, but never from true to false.
+ */
+int configfs_dirent_is_ready(struct configfs_dirent *sd)
+{
+        int ret;
+        spin_lock(&configfs_dirent_lock);
+        ret = !(sd->s_type & CONFIGFS_USET_CREATING);
+        spin_unlock(&configfs_dirent_lock);
+        return ret;
+}
 int configfs_create_link(struct configfs_symlink *sl,
                         struct dentry *parent,
                         struct dentry *dentry)
@@ -283,6 +324,8 @@ static void remove_dir(struct dentry * d)
 * The only thing special about this is that we remove any files in
 * the directory before we remove the directory, and we've inlined
 * what used to be configfs_rmdir() below, instead of calling separately.
+ *
+ * Caller holds the mutex of the item's inode
 */
 static void configfs_remove_dir(struct config_item * item)
@@ -330,7 +373,19 @@ static struct dentry * configfs_lookup(struct inode *dir,
        struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
        struct configfs_dirent * sd;
        int found = 0;
-        int err = 0;
+        int err;
+        /*
+         * Fake invisibility if dir belongs to a group/default groups hierarchy
+         * being attached
+         *
+         * This forbids userspace to read/write attributes of items which may
+         * not complete their initialization, since the dentries of the
+         * attributes won't be instantiated.
+         */
+        err = -ENOENT;
+        if (!configfs_dirent_is_ready(parent_sd))
+                goto out;
        list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
                if (sd->s_type & CONFIGFS_NOT_PINNED) {
@@ -353,6 +408,7 @@ static struct dentry * configfs_lookup(struct inode *dir,
                return simple_lookup(dir, dentry, nd);
        }
+out:
        return ERR_PTR(err);
 }
@@ -370,13 +426,17 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
        struct configfs_dirent *sd;
        int ret;
+        /* Mark that we're trying to drop the group */
+        parent_sd->s_type |= CONFIGFS_USET_DROPPING;
        ret = -EBUSY;
        if (!list_empty(&parent_sd->s_links))
                goto out;
        ret = 0;
        list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
-                if (sd->s_type & CONFIGFS_NOT_PINNED)
+                if (!sd->s_element ||
+                    (sd->s_type & CONFIGFS_NOT_PINNED))
                        continue;
                if (sd->s_type & CONFIGFS_USET_DEFAULT) {
                        /* Abort if racing with mkdir() */
@@ -385,8 +445,6 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
                                        *wait_mutex = &sd->s_dentry->d_inode->i_mutex;
                                return -EAGAIN;
                        }
-                        /* Mark that we're trying to drop the group */
-                        sd->s_type |= CONFIGFS_USET_DROPPING;
                        /*
                         * Yup, recursive.  If there's a problem, blame
@@ -414,12 +472,11 @@ static void configfs_detach_rollback(struct dentry *dentry)
        struct configfs_dirent *parent_sd = dentry->d_fsdata;
        struct configfs_dirent *sd;
-        list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+        parent_sd->s_type &= ~CONFIGFS_USET_DROPPING;
-                if (sd->s_type & CONFIGFS_USET_DEFAULT) {
+        list_for_each_entry(sd, &parent_sd->s_children, s_sibling)
+                if (sd->s_type & CONFIGFS_USET_DEFAULT)
                        configfs_detach_rollback(sd->s_dentry);
-                        sd->s_type &= ~CONFIGFS_USET_DROPPING;
-                }
-        }
 }
 static void detach_attrs(struct config_item * item)
@@ -558,36 +615,21 @@ static int create_default_group(struct config_group *parent_group,
 static int populate_groups(struct config_group *group)
 {
        struct config_group *new_group;
-        struct dentry *dentry = group->cg_item.ci_dentry;
        int ret = 0;
        int i;
        if (group->default_groups) {
-                /*
-                 * FYI, we're faking mkdir here
-                 * I'm not sure we need this semaphore, as we're called
-                 * from our parent's mkdir.  That holds our parent's
-                 * i_mutex, so afaik lookup cannot continue through our
-                 * parent to find us, let alone mess with our tree.
-                 * That said, taking our i_mutex is closer to mkdir
-                 * emulation, and shouldn't hurt.
-                 */
-                mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
                for (i = 0; group->default_groups[i]; i++) {
                        new_group = group->default_groups[i];
                        ret = create_default_group(group, new_group);
-                        if (ret)
+                        if (ret) {
+                                detach_groups(group);
                                break;
+                        }
                }
-                mutex_unlock(&dentry->d_inode->i_mutex);
        }
-        if (ret)
-                detach_groups(group);
        return ret;
 }
@@ -702,7 +744,15 @@ static int configfs_attach_item(struct config_item *parent_item,
        if (!ret) {
                ret = populate_attrs(item);
                if (ret) {
+                        /*
+                         * We are going to remove an inode and its dentry but
+                         * the VFS may already have hit and used them. Thus,
+                         * we must lock them as rmdir() would.
+                         */
+                        mutex_lock(&dentry->d_inode->i_mutex);
                        configfs_remove_dir(item);
+                        dentry->d_inode->i_flags |= S_DEAD;
+                        mutex_unlock(&dentry->d_inode->i_mutex);
                        d_delete(dentry);
                }
        }
@@ -710,6 +760,7 @@ static int configfs_attach_item(struct config_item *parent_item,
        return ret;
 }
+/* Caller holds the mutex of the item's inode */
 static void configfs_detach_item(struct config_item *item)
 {
        detach_attrs(item);
@@ -728,16 +779,30 @@ static int configfs_attach_group(struct config_item *parent_item,
                sd = dentry->d_fsdata;
                sd->s_type |= CONFIGFS_USET_DIR;
+                /*
+                 * FYI, we're faking mkdir in populate_groups()
+                 * We must lock the group's inode to avoid races with the VFS
+                 * which can already hit the inode and try to add/remove entries
+                 * under it.
+                 *
+                 * We must also lock the inode to remove it safely in case of
+                 * error, as rmdir() would.
+                 */
+                mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
                ret = populate_groups(to_config_group(item));
                if (ret) {
                        configfs_detach_item(item);
-                        d_delete(dentry);
+                        dentry->d_inode->i_flags |= S_DEAD;
                }
+                mutex_unlock(&dentry->d_inode->i_mutex);
+                if (ret)
+                        d_delete(dentry);
        }
        return ret;
 }
+/* Caller holds the mutex of the group's inode */
 static void configfs_detach_group(struct config_item *item)
 {
        detach_groups(to_config_group(item));
@@ -1035,7 +1100,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        struct configfs_subsystem *subsys;
        struct configfs_dirent *sd;
        struct config_item_type *type;
-        struct module *owner = NULL;
+        struct module *subsys_owner = NULL, *new_item_owner = NULL;
        char *name;
        if (dentry->d_parent == configfs_sb->s_root) {
@@ -1044,6 +1109,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        }
        sd = dentry->d_parent->d_fsdata;
+        /*
+         * Fake invisibility if dir belongs to a group/default groups hierarchy
+         * being attached
+         */
+        if (!configfs_dirent_is_ready(sd)) {
+                ret = -ENOENT;
+                goto out;
+        }
        if (!(sd->s_type & CONFIGFS_USET_DIR)) {
                ret = -EPERM;
                goto out;
@@ -1062,10 +1137,25 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
                goto out_put;
        }
+        /*
+         * The subsystem may belong to a different module than the item
+         * being created.  We don't want to safely pin the new item but
+         * fail to pin the subsystem it sits under.
+         */
+        if (!subsys->su_group.cg_item.ci_type) {
+                ret = -EINVAL;
+                goto out_put;
+        }
+        subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
+        if (!try_module_get(subsys_owner)) {
+                ret = -EINVAL;
+                goto out_put;
+        }
        name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
        if (!name) {
                ret = -ENOMEM;
-                goto out_put;
+                goto out_subsys_put;
        }
        snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
@@ -1094,10 +1184,10 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        kfree(name);
        if (ret) {
                /*
-                 * If item == NULL, then link_obj() was never called.
+                 * If ret != 0, then link_obj() was never called.
                 * There are no extra references to clean up.
                 */
-                goto out_put;
+                goto out_subsys_put;
        }
        /*
@@ -1111,8 +1201,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
                goto out_unlink;
        }
-        owner = type->ct_owner;
+        new_item_owner = type->ct_owner;
-        if (!try_module_get(owner)) {
+        if (!try_module_get(new_item_owner)) {
                ret = -EINVAL;
                goto out_unlink;
        }
@@ -1142,6 +1232,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        spin_lock(&configfs_dirent_lock);
        sd->s_type &= ~CONFIGFS_USET_IN_MKDIR;
+        if (!ret)
+                configfs_dir_set_ready(dentry->d_fsdata);
        spin_unlock(&configfs_dirent_lock);
 out_unlink:
@@ -1159,9 +1251,13 @@ out_unlink:
                mutex_unlock(&subsys->su_mutex);
                if (module_got)
-                        module_put(owner);
+                        module_put(new_item_owner);
        }
+out_subsys_put:
+        if (ret)
+                module_put(subsys_owner);
 out_put:
        /*
         * link_obj()/link_group() took a reference from child->parent,
@@ -1180,7 +1276,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
        struct config_item *item;
        struct configfs_subsystem *subsys;
        struct configfs_dirent *sd;
-        struct module *owner = NULL;
+        struct module *subsys_owner = NULL, *dead_item_owner = NULL;
        int ret;
        if (dentry->d_parent == configfs_sb->s_root)
@@ -1207,6 +1303,15 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
                return -EINVAL;
        }
+        /* configfs_mkdir() shouldn't have allowed this */
+        BUG_ON(!subsys->su_group.cg_item.ci_type);
+        subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
+        /*
+         * Ensure that no racing symlink() will make detach_prep() fail while
+         * the new link is temporarily attached
+         */
+        mutex_lock(&configfs_symlink_mutex);
        spin_lock(&configfs_dirent_lock);
        do {
                struct mutex *wait_mutex;
@@ -1215,6 +1320,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
                if (ret) {
                        configfs_detach_rollback(dentry);
                        spin_unlock(&configfs_dirent_lock);
+                        mutex_unlock(&configfs_symlink_mutex);
                        if (ret != -EAGAIN) {
                                config_item_put(parent_item);
                                return ret;
@@ -1224,10 +1330,12 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
                        mutex_lock(wait_mutex);
                        mutex_unlock(wait_mutex);
+                        mutex_lock(&configfs_symlink_mutex);
                        spin_lock(&configfs_dirent_lock);
                }
        } while (ret == -EAGAIN);
        spin_unlock(&configfs_dirent_lock);
+        mutex_unlock(&configfs_symlink_mutex);
        /* Get a working ref for the duration of this function */
        item = configfs_get_config_item(dentry);
@@ -1236,7 +1344,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
        config_item_put(parent_item);
        if (item->ci_type)
-                owner = item->ci_type->ct_owner;
+                dead_item_owner = item->ci_type->ct_owner;
        if (sd->s_type & CONFIGFS_USET_DIR) {
                configfs_detach_group(item);
@@ -1258,7 +1366,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
        /* Drop our reference from above */
        config_item_put(item);
-        module_put(owner);
+        module_put(dead_item_owner);
+        module_put(subsys_owner);
        return 0;
 }
@@ -1314,13 +1423,24 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
 {
        struct dentry * dentry = file->f_path.dentry;
        struct configfs_dirent * parent_sd = dentry->d_fsdata;
+        int err;
        mutex_lock(&dentry->d_inode->i_mutex);
-        file->private_data = configfs_new_dirent(parent_sd, NULL);
+        /*
+         * Fake invisibility if dir belongs to a group/default groups hierarchy
+         * being attached
+         */
+        err = -ENOENT;
+        if (configfs_dirent_is_ready(parent_sd)) {
+                file->private_data = configfs_new_dirent(parent_sd, NULL);
+                if (IS_ERR(file->private_data))
+                        err = PTR_ERR(file->private_data);
+                else
+                        err = 0;
+        }
        mutex_unlock(&dentry->d_inode->i_mutex);
-        return IS_ERR(file->private_data) ? PTR_ERR(file->private_data) : 0;
+        return err;
 }
 static int configfs_dir_close(struct inode *inode, struct file *file)
@@ -1491,6 +1611,10 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
                if (err) {
                        d_delete(dentry);
                        dput(dentry);
+                } else {
+                        spin_lock(&configfs_dirent_lock);
+                        configfs_dir_set_ready(dentry->d_fsdata);
+                        spin_unlock(&configfs_dirent_lock);
                }
        }
@@ -1517,11 +1641,13 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
        mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex,
                          I_MUTEX_PARENT);
        mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+        mutex_lock(&configfs_symlink_mutex);
        spin_lock(&configfs_dirent_lock);
        if (configfs_detach_prep(dentry, NULL)) {
                printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n");
        }
        spin_unlock(&configfs_dirent_lock);
+        mutex_unlock(&configfs_symlink_mutex);
        configfs_detach_group(&group->cg_item);
        dentry->d_inode->i_flags |= S_DEAD;
        mutex_unlock(&dentry->d_inode->i_mutex);
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 0004d18c40a..bf74973b049 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -31,6 +31,9 @@
 #include <linux/configfs.h>
 #include "configfs_internal.h"
+/* Protects attachments of new symlinks */
+DEFINE_MUTEX(configfs_symlink_mutex);
 static int item_depth(struct config_item * item)
 {
        struct config_item * p = item;
@@ -73,11 +76,20 @@ static int create_link(struct config_item *parent_item,
        struct configfs_symlink *sl;
        int ret;
+        ret = -ENOENT;
+        if (!configfs_dirent_is_ready(target_sd))
+                goto out;
        ret = -ENOMEM;
        sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
        if (sl) {
                sl->sl_target = config_item_get(item);
                spin_lock(&configfs_dirent_lock);
+                if (target_sd->s_type & CONFIGFS_USET_DROPPING) {
+                        spin_unlock(&configfs_dirent_lock);
+                        config_item_put(item);
+                        kfree(sl);
+                        return -ENOENT;
+                }
                list_add(&sl->sl_list, &target_sd->s_links);
                spin_unlock(&configfs_dirent_lock);
                ret = configfs_create_link(sl, parent_item->ci_dentry,
@@ -91,6 +103,7 @@ static int create_link(struct config_item *parent_item,
                }
        }
+out:
        return ret;
 }
@@ -120,6 +133,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
 {
        int ret;
        struct nameidata nd;
+        struct configfs_dirent *sd;
        struct config_item *parent_item;
        struct config_item *target_item;
        struct config_item_type *type;
@@ -128,9 +142,19 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
        if (dentry->d_parent == configfs_sb->s_root)
                goto out;
+        sd = dentry->d_parent->d_fsdata;
+        /*
+         * Fake invisibility if dir belongs to a group/default groups hierarchy
+         * being attached
+         */
+        ret = -ENOENT;
+        if (!configfs_dirent_is_ready(sd))
+                goto out;
        parent_item = configfs_get_config_item(dentry->d_parent);
        type = parent_item->ci_type;
+        ret = -EPERM;
        if (!type || !type->ct_item_ops ||
            !type->ct_item_ops->allow_link)
                goto out_put;
@@ -141,7 +165,9 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
        ret = type->ct_item_ops->allow_link(parent_item, target_item);
        if (!ret) {
+                mutex_lock(&configfs_symlink_mutex);
                ret = create_link(parent_item, target_item, dentry);
+                mutex_unlock(&configfs_symlink_mutex);
                if (ret && type->ct_item_ops->drop_link)
                        type->ct_item_ops->drop_link(parent_item,
                                                     target_item);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 285b64a8b06..488eb424f66 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -29,7 +29,7 @@
 #define DEVPTS_DEFAULT_MODE 0600
 extern int pty_limit;                   /* Config limit on Unix98 ptys */
-static DEFINE_IDR(allocated_ptys);
+static DEFINE_IDA(allocated_ptys);
 static DEFINE_MUTEX(allocated_ptys_lock);
 static struct vfsmount *devpts_mnt;
@@ -180,24 +180,24 @@ static struct dentry *get_node(int num)
 int devpts_new_index(void)
 {
        int index;
-        int idr_ret;
+        int ida_ret;
 retry:
-        if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
+        if (!ida_pre_get(&allocated_ptys, GFP_KERNEL)) {
                return -ENOMEM;
        }
        mutex_lock(&allocated_ptys_lock);
-        idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
+        ida_ret = ida_get_new(&allocated_ptys, &index);
-        if (idr_ret < 0) {
+        if (ida_ret < 0) {
                mutex_unlock(&allocated_ptys_lock);
-                if (idr_ret == -EAGAIN)
+                if (ida_ret == -EAGAIN)
                        goto retry;
                return -EIO;
        }
        if (index >= pty_limit) {
-                idr_remove(&allocated_ptys, index);
+                ida_remove(&allocated_ptys, index);
                mutex_unlock(&allocated_ptys_lock);
                return -EIO;
        }
@@ -208,7 +208,7 @@ retry:
 void devpts_kill_index(int idx)
 {
        mutex_lock(&allocated_ptys_lock);
-        idr_remove(&allocated_ptys, idx);
+        ida_remove(&allocated_ptys, idx);
        mutex_unlock(&allocated_ptys_lock);
 }
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 2d3d1027ce2..724ddac9153 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -363,6 +363,7 @@ static int search_rsb_list(struct list_head *head, char *name, int len,
                if (len == r->res_length && !memcmp(name, r->res_name, len))
                        goto found;
        }
+        *r_ret = NULL;
        return -EBADR;
 found:
@@ -1782,7 +1783,8 @@ static void grant_pending_locks(struct dlm_rsb *r)
        list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) {
                if (lkb->lkb_bastfn && lock_requires_bast(lkb, high, cw)) {
-                        if (cw && high == DLM_LOCK_PR)
+                        if (cw && high == DLM_LOCK_PR &&
+                            lkb->lkb_grmode == DLM_LOCK_PR)
                                queue_bast(r, lkb, DLM_LOCK_CW);
                        else
                                queue_bast(r, lkb, high);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 637018c891e..3962262f991 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -891,8 +891,10 @@ static void tcp_connect_to_sock(struct connection *con)
                goto out_err;
        memset(&saddr, 0, sizeof(saddr));
-        if (dlm_nodeid_to_addr(con->nodeid, &saddr))
+        if (dlm_nodeid_to_addr(con->nodeid, &saddr)) {
+                sock_release(sock);
                goto out_err;
+        }
        sock->sk->sk_user_data = con;
        con->rx_action = receive_from_sock;
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index f976f303c19..929e48ae759 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -539,7 +539,7 @@ static ssize_t device_write(struct file *file, const char __user *buf,
        /* do we really need this? can a write happen after a close? */
        if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) &&
-            test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))
+            (proc && test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags)))
                return -EINVAL;
        sigfillset(&allsigs);
diff --git a/fs/dquot.c b/fs/dquot.c
index 1346eebe74c..8ec4d6cc763 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1793,6 +1793,21 @@ static int vfs_quota_on_remount(struct super_block *sb, int type)
        return ret;
 }
+int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
+                      struct path *path)
+{
+        int error = security_quota_on(path->dentry);
+        if (error)
+                return error;
+        /* Quota file not on the same filesystem? */
+        if (path->mnt->mnt_sb != sb)
+                error = -EXDEV;
+        else
+                error = vfs_quota_on_inode(path->dentry->d_inode, type,
+                                           format_id);
+        return error;
+}
 /* Actual function called from quotactl() */
 int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
                 int remount)
@@ -1804,19 +1819,10 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
                return vfs_quota_on_remount(sb, type);
        error = path_lookup(path, LOOKUP_FOLLOW, &nd);
-        if (error < 0)
+        if (!error) {
-                return error;
+                error = vfs_quota_on_path(sb, type, format_id, &nd.path);
-        error = security_quota_on(nd.path.dentry);
+                path_put(&nd.path);
-        if (error)
+        }
-                goto out_path;
-        /* Quota file not on the same filesystem? */
-        if (nd.path.mnt->mnt_sb != sb)
-                error = -EXDEV;
-        else
-                error = vfs_quota_on_inode(nd.path.dentry->d_inode, type,
-                                           format_id);
-out_path:
-        path_put(&nd.path);
        return error;
 }
@@ -2185,6 +2191,7 @@ EXPORT_SYMBOL(unregister_quota_format);
 EXPORT_SYMBOL(dqstats);
 EXPORT_SYMBOL(dq_data_lock);
 EXPORT_SYMBOL(vfs_quota_on);
+EXPORT_SYMBOL(vfs_quota_on_path);
 EXPORT_SYMBOL(vfs_quota_on_mount);
 EXPORT_SYMBOL(vfs_quota_off);
 EXPORT_SYMBOL(vfs_quota_sync);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 7b99917ffad..06db79d05c1 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -475,8 +475,8 @@ int ecryptfs_encrypt_page(struct page *page)
 {
        struct inode *ecryptfs_inode;
        struct ecryptfs_crypt_stat *crypt_stat;
-        char *enc_extent_virt = NULL;
+        char *enc_extent_virt;
-        struct page *enc_extent_page;
+        struct page *enc_extent_page = NULL;
        loff_t extent_offset;
        int rc = 0;
@@ -492,14 +492,14 @@ int ecryptfs_encrypt_page(struct page *page)
                               page->index);
                goto out;
        }
-        enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER);
+        enc_extent_page = alloc_page(GFP_USER);
-        if (!enc_extent_virt) {
+        if (!enc_extent_page) {
                rc = -ENOMEM;
                ecryptfs_printk(KERN_ERR, "Error allocating memory for "
                                "encrypted extent\n");
                goto out;
        }
-        enc_extent_page = virt_to_page(enc_extent_virt);
+        enc_extent_virt = kmap(enc_extent_page);
        for (extent_offset = 0;
             extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
             extent_offset++) {
@@ -527,7 +527,10 @@ int ecryptfs_encrypt_page(struct page *page)
                }
        }
 out:
-        kfree(enc_extent_virt);
+        if (enc_extent_page) {
+                kunmap(enc_extent_page);
+                __free_page(enc_extent_page);
+        }
        return rc;
 }
@@ -609,8 +612,8 @@ int ecryptfs_decrypt_page(struct page *page)
 {
        struct inode *ecryptfs_inode;
        struct ecryptfs_crypt_stat *crypt_stat;
-        char *enc_extent_virt = NULL;
+        char *enc_extent_virt;
-        struct page *enc_extent_page;
+        struct page *enc_extent_page = NULL;
        unsigned long extent_offset;
        int rc = 0;
@@ -627,14 +630,14 @@ int ecryptfs_decrypt_page(struct page *page)
                               page->index);
                goto out;
        }
-        enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER);
+        enc_extent_page = alloc_page(GFP_USER);
-        if (!enc_extent_virt) {
+        if (!enc_extent_page) {
                rc = -ENOMEM;
                ecryptfs_printk(KERN_ERR, "Error allocating memory for "
                                "encrypted extent\n");
                goto out;
        }
-        enc_extent_page = virt_to_page(enc_extent_virt);
+        enc_extent_virt = kmap(enc_extent_page);
        for (extent_offset = 0;
             extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
             extent_offset++) {
@@ -662,7 +665,10 @@ int ecryptfs_decrypt_page(struct page *page)
                }
        }
 out:
-        kfree(enc_extent_virt);
+        if (enc_extent_page) {
+                kunmap(enc_extent_page);
+                __free_page(enc_extent_page);
+        }
        return rc;
 }
diff --git a/fs/exec.c b/fs/exec.c
index 9696bbf0f0b..32993beecbe 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -32,6 +32,7 @@
 #include <linux/swap.h>
 #include <linux/string.h>
 #include <linux/init.h>
+#include <linux/pagemap.h>
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
 #include <linux/key.h>
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 384fc0d1dd7..991d6dfeb51 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -791,6 +791,7 @@ const struct address_space_operations ext2_aops = {
        .direct_IO              = ext2_direct_IO,
        .writepages             = ext2_writepages,
        .migratepage            = buffer_migrate_page,
+        .is_partially_uptodate  = block_is_partially_uptodate,
 };
 const struct address_space_operations ext2_aops_xip = {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 3bf07d70b91..507d8689b11 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1767,44 +1767,47 @@ static int ext3_journalled_set_page_dirty(struct page *page)
 }
 static const struct address_space_operations ext3_ordered_aops = {
-        .readpage       = ext3_readpage,
+        .readpage               = ext3_readpage,
-        .readpages      = ext3_readpages,
+        .readpages              = ext3_readpages,
-        .writepage      = ext3_ordered_writepage,
+        .writepage              = ext3_ordered_writepage,
-        .sync_page      = block_sync_page,
+        .sync_page              = block_sync_page,
-        .write_begin    = ext3_write_begin,
+        .write_begin            = ext3_write_begin,
-        .write_end      = ext3_ordered_write_end,
+        .write_end              = ext3_ordered_write_end,
-        .bmap           = ext3_bmap,
+        .bmap                   = ext3_bmap,
-        .invalidatepage = ext3_invalidatepage,
+        .invalidatepage         = ext3_invalidatepage,
-        .releasepage    = ext3_releasepage,
+        .releasepage            = ext3_releasepage,
-        .direct_IO      = ext3_direct_IO,
+        .direct_IO              = ext3_direct_IO,
-        .migratepage    = buffer_migrate_page,
+        .migratepage            = buffer_migrate_page,
+        .is_partially_uptodate  = block_is_partially_uptodate,
 };
 static const struct address_space_operations ext3_writeback_aops = {
-        .readpage       = ext3_readpage,
+        .readpage               = ext3_readpage,
-        .readpages      = ext3_readpages,
+        .readpages              = ext3_readpages,
-        .writepage      = ext3_writeback_writepage,
+        .writepage              = ext3_writeback_writepage,
-        .sync_page      = block_sync_page,
+        .sync_page              = block_sync_page,
-        .write_begin    = ext3_write_begin,
+        .write_begin            = ext3_write_begin,
-        .write_end      = ext3_writeback_write_end,
+        .write_end              = ext3_writeback_write_end,
-        .bmap           = ext3_bmap,
+        .bmap                   = ext3_bmap,
-        .invalidatepage = ext3_invalidatepage,
+        .invalidatepage         = ext3_invalidatepage,
-        .releasepage    = ext3_releasepage,
+        .releasepage            = ext3_releasepage,
-        .direct_IO      = ext3_direct_IO,
+        .direct_IO              = ext3_direct_IO,
-        .migratepage    = buffer_migrate_page,
+        .migratepage            = buffer_migrate_page,
+        .is_partially_uptodate  = block_is_partially_uptodate,
 };
 static const struct address_space_operations ext3_journalled_aops = {
-        .readpage       = ext3_readpage,
+        .readpage               = ext3_readpage,
-        .readpages      = ext3_readpages,
+        .readpages              = ext3_readpages,
-        .writepage      = ext3_journalled_writepage,
+        .writepage              = ext3_journalled_writepage,
-        .sync_page      = block_sync_page,
+        .sync_page              = block_sync_page,
-        .write_begin    = ext3_write_begin,
+        .write_begin            = ext3_write_begin,
-        .write_end      = ext3_journalled_write_end,
+        .write_end              = ext3_journalled_write_end,
-        .set_page_dirty = ext3_journalled_set_page_dirty,
+        .set_page_dirty         = ext3_journalled_set_page_dirty,
-        .bmap           = ext3_bmap,
+        .bmap                   = ext3_bmap,
-        .invalidatepage = ext3_invalidatepage,
+        .invalidatepage         = ext3_invalidatepage,
-        .releasepage    = ext3_releasepage,
+        .releasepage            = ext3_releasepage,
+        .is_partially_uptodate  = block_is_partially_uptodate,
 };
 void ext3_set_aops(struct inode *inode)
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8ddced38467..f38a5afc39a 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2810,8 +2810,9 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
                journal_unlock_updates(EXT3_SB(sb)->s_journal);
        }
+        err = vfs_quota_on_path(sb, type, format_id, &nd.path);
        path_put(&nd.path);
-        return vfs_quota_on(sb, type, format_id, path, remount);
+        return err;
 }
 /* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8ca2763df09..9843b046c23 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2806,59 +2806,63 @@ static int ext4_journalled_set_page_dirty(struct page *page)
 }
 static const struct address_space_operations ext4_ordered_aops = {
-        .readpage       = ext4_readpage,
+        .readpage               = ext4_readpage,
-        .readpages      = ext4_readpages,
+        .readpages              = ext4_readpages,
-        .writepage      = ext4_normal_writepage,
+        .writepage              = ext4_normal_writepage,
-        .sync_page      = block_sync_page,
+        .sync_page              = block_sync_page,
-        .write_begin    = ext4_write_begin,
+        .write_begin            = ext4_write_begin,
-        .write_end      = ext4_ordered_write_end,
+        .write_end              = ext4_ordered_write_end,
-        .bmap           = ext4_bmap,
+        .bmap                   = ext4_bmap,
-        .invalidatepage = ext4_invalidatepage,
+        .invalidatepage         = ext4_invalidatepage,
-        .releasepage    = ext4_releasepage,
+        .releasepage            = ext4_releasepage,
-        .direct_IO      = ext4_direct_IO,
+        .direct_IO              = ext4_direct_IO,
-        .migratepage    = buffer_migrate_page,
+        .migratepage            = buffer_migrate_page,
+        .is_partially_uptodate  = block_is_partially_uptodate,
 };
 static const struct address_space_operations ext4_writeback_aops = {
-        .readpage       = ext4_readpage,
+        .readpage               = ext4_readpage,
-        .readpages      = ext4_readpages,
+        .readpages              = ext4_readpages,
-        .writepage      = ext4_normal_writepage,
+        .writepage              = ext4_normal_writepage,
-        .sync_page      = block_sync_page,
+        .sync_page              = block_sync_page,
-        .write_begin    = ext4_write_begin,
+        .write_begin            = ext4_write_begin,
-        .write_end      = ext4_writeback_write_end,
+        .write_end              = ext4_writeback_write_end,
-        .bmap           = ext4_bmap,
+        .bmap                   = ext4_bmap,
-        .invalidatepage = ext4_invalidatepage,
+        .invalidatepage         = ext4_invalidatepage,
-        .releasepage    = ext4_releasepage,
+        .releasepage            = ext4_releasepage,
-        .direct_IO      = ext4_direct_IO,
+        .direct_IO              = ext4_direct_IO,
-        .migratepage    = buffer_migrate_page,
+        .migratepage            = buffer_migrate_page,
+        .is_partially_uptodate  = block_is_partially_uptodate,
 };
 static const struct address_space_operations ext4_journalled_aops = {
-        .readpage       = ext4_readpage,
+        .readpage               = ext4_readpage,
-        .readpages      = ext4_readpages,
+        .readpages              = ext4_readpages,
-        .writepage      = ext4_journalled_writepage,
+        .writepage              = ext4_journalled_writepage,
-        .sync_page      = block_sync_page,
+        .sync_page              = block_sync_page,
-        .write_begin    = ext4_write_begin,
+        .write_begin            = ext4_write_begin,
-        .write_end      = ext4_journalled_write_end,
+        .write_end              = ext4_journalled_write_end,
-        .set_page_dirty = ext4_journalled_set_page_dirty,
+        .set_page_dirty         = ext4_journalled_set_page_dirty,
-        .bmap           = ext4_bmap,
+        .bmap                   = ext4_bmap,
-        .invalidatepage = ext4_invalidatepage,
+        .invalidatepage         = ext4_invalidatepage,
-        .releasepage    = ext4_releasepage,
+        .releasepage            = ext4_releasepage,
+        .is_partially_uptodate  = block_is_partially_uptodate,
 };
 static const struct address_space_operations ext4_da_aops = {
-        .readpage       = ext4_readpage,
+        .readpage               = ext4_readpage,
-        .readpages      = ext4_readpages,
+        .readpages              = ext4_readpages,
-        .writepage      = ext4_da_writepage,
+        .writepage              = ext4_da_writepage,
-        .writepages     = ext4_da_writepages,
+        .writepages             = ext4_da_writepages,
-        .sync_page      = block_sync_page,
+        .sync_page              = block_sync_page,
-        .write_begin    = ext4_da_write_begin,
+        .write_begin            = ext4_da_write_begin,
-        .write_end      = ext4_da_write_end,
+        .write_end              = ext4_da_write_end,
-        .bmap           = ext4_bmap,
+        .bmap                   = ext4_bmap,
-        .invalidatepage = ext4_da_invalidatepage,
+        .invalidatepage         = ext4_da_invalidatepage,
-        .releasepage    = ext4_releasepage,
+        .releasepage            = ext4_releasepage,
-        .direct_IO      = ext4_direct_IO,
+        .direct_IO              = ext4_direct_IO,
-        .migratepage    = buffer_migrate_page,
+        .migratepage            = buffer_migrate_page,
+        .is_partially_uptodate  = block_is_partially_uptodate,
 };
 void ext4_set_aops(struct inode *inode)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b5479b1dff1..1e69f29a8c5 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3352,8 +3352,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
                jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
        }
+        err = vfs_quota_on_path(sb, type, format_id, &nd.path);
        path_put(&nd.path);
-        return vfs_quota_on(sb, type, format_id, path, remount);
+        return err;
 }
 /* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 61d62513681..ac4f7db9f13 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -49,73 +49,6 @@ static int get_close_on_exec(unsigned int fd)
        return res;
 }
-/*
- * locate_fd finds a free file descriptor in the open_fds fdset,
- * expanding the fd arrays if necessary.  Must be called with the
- * file_lock held for write.
- */
-static int locate_fd(unsigned int orig_start, int cloexec)
-{
-        struct files_struct *files = current->files;
-        unsigned int newfd;
-        unsigned int start;
-        int error;
-        struct fdtable *fdt;
-        spin_lock(&files->file_lock);
-repeat:
-        fdt = files_fdtable(files);
-        /*
-         * Someone might have closed fd's in the range
-         * orig_start..fdt->next_fd
-         */
-        start = orig_start;
-        if (start < files->next_fd)
-                start = files->next_fd;
-        newfd = start;
-        if (start < fdt->max_fds)
-                newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
-                                           fdt->max_fds, start);
-        error = expand_files(files, newfd);
-        if (error < 0)
-                goto out;
-        /*
-         * If we needed to expand the fs array we
-         * might have blocked - try again.
-         */
-        if (error)
-                goto repeat;
-        if (start <= files->next_fd)
-                files->next_fd = newfd + 1;
-        FD_SET(newfd, fdt->open_fds);
-        if (cloexec)
-                FD_SET(newfd, fdt->close_on_exec);
-        else
-                FD_CLR(newfd, fdt->close_on_exec);
-        error = newfd;
-out:
-        spin_unlock(&files->file_lock);
-        return error;
-}
-static int dupfd(struct file *file, unsigned int start, int cloexec)
-{
-        int fd = locate_fd(start, cloexec);
-        if (fd >= 0)
-                fd_install(fd, file);
-        else
-                fput(file);
-        return fd;
-}
 asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
 {
        int err = -EBADF;
@@ -130,31 +63,35 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
                return -EINVAL;
        spin_lock(&files->file_lock);
-        if (!(file = fcheck(oldfd)))
-                goto out_unlock;
-        get_file(file);                 /* We are now finished with oldfd */
        err = expand_files(files, newfd);
+        file = fcheck(oldfd);
+        if (unlikely(!file))
+                goto Ebadf;
        if (unlikely(err < 0)) {
                if (err == -EMFILE)
-                        err = -EBADF;
+                        goto Ebadf;
-                goto out_fput;
+                goto out_unlock;
        }
+        /*
-        /* To avoid races with open() and dup(), we will mark the fd as
+         * We need to detect attempts to do dup2() over allocated but still
-         * in-use in the open-file bitmap throughout the entire dup2()
+         * not finished descriptor.  NB: OpenBSD avoids that at the price of
-         * process.  This is quite safe: do_close() uses the fd array
+         * extra work in their equivalent of fget() - they insert struct
-         * entry, not the bitmap, to decide what work needs to be
+         * file immediately after grabbing descriptor, mark it larval if
-         * done.  --sct */
+         * more work (e.g. actual opening) is needed and make sure that
-        /* Doesn't work. open() might be there first. --AV */
+         * fget() treats larval files as absent.  Potentially interesting,
+         * but while extra work in fget() is trivial, locking implications
-        /* Yes. It's a race. In user space. Nothing sane to do */
+         * and amount of surgery on open()-related paths in VFS are not.
+         * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
+         * deadlocks in rather amusing ways, AFAICS.  All of that is out of
+         * scope of POSIX or SUS, since neither considers shared descriptor
+         * tables and this condition does not arise without those.
+         */
        err = -EBUSY;
        fdt = files_fdtable(files);
        tofree = fdt->fd[newfd];
        if (!tofree && FD_ISSET(newfd, fdt->open_fds))
-                goto out_fput;
+                goto out_unlock;
+        get_file(file);
        rcu_assign_pointer(fdt->fd[newfd], file);
        FD_SET(newfd, fdt->open_fds);
        if (flags & O_CLOEXEC)
@@ -165,17 +102,14 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
        if (tofree)
                filp_close(tofree, files);
-        err = newfd;
-out:
-        return err;
-out_unlock:
-        spin_unlock(&files->file_lock);
-        goto out;
-out_fput:
+        return newfd;
+Ebadf:
+        err = -EBADF;
+out_unlock:
        spin_unlock(&files->file_lock);
-        fput(file);
+        return err;
-        goto out;
 }
 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
@@ -194,10 +128,15 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 asmlinkage long sys_dup(unsigned int fildes)
 {
        int ret = -EBADF;
-        struct file * file = fget(fildes);
+        struct file *file = fget(fildes);
-        if (file)
+        if (file) {
-                ret = dupfd(file, 0, 0);
+                ret = get_unused_fd();
+                if (ret >= 0)
+                        fd_install(ret, file);
+                else
+                        fput(file);
+        }
        return ret;
 }
@@ -322,8 +261,11 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
        case F_DUPFD_CLOEXEC:
                if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
                        break;
-                get_file(filp);
+                err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
-                err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC);
+                if (err >= 0) {
+                        get_file(filp);
+                        fd_install(err, filp);
+                }
                break;
        case F_GETFD:
                err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
diff --git a/fs/file.c b/fs/file.c
index d8773b19fe4..f313314f996 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -6,6 +6,7 @@
 *  Manage the dynamic fd arrays in the process files_struct.
 */
+#include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/time.h>
@@ -432,3 +433,63 @@ struct files_struct init_files = {
        },
        .file_lock      = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
 };
+/*
+ * allocate a file descriptor, mark it busy.
+ */
+int alloc_fd(unsigned start, unsigned flags)
+{
+        struct files_struct *files = current->files;
+        unsigned int fd;
+        int error;
+        struct fdtable *fdt;
+        spin_lock(&files->file_lock);
+repeat:
+        fdt = files_fdtable(files);
+        fd = start;
+        if (fd < files->next_fd)
+                fd = files->next_fd;
+        if (fd < fdt->max_fds)
+                fd = find_next_zero_bit(fdt->open_fds->fds_bits,
+                                           fdt->max_fds, fd);
+        error = expand_files(files, fd);
+        if (error < 0)
+                goto out;
+        /*
+         * If we needed to expand the fs array we
+         * might have blocked - try again.
+         */
+        if (error)
+                goto repeat;
+        if (start <= files->next_fd)
+                files->next_fd = fd + 1;
+        FD_SET(fd, fdt->open_fds);
+        if (flags & O_CLOEXEC)
+                FD_SET(fd, fdt->close_on_exec);
+        else
+                FD_CLR(fd, fdt->close_on_exec);
+        error = fd;
+#if 1
+        /* Sanity check */
+        if (rcu_dereference(fdt->fd[fd]) != NULL) {
+                printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
+                rcu_assign_pointer(fdt->fd[fd], NULL);
+        }
+#endif
+out:
+        spin_unlock(&files->file_lock);
+        return error;
+}
+int get_unused_fd(void)
+{
+        return alloc_fd(0, 0);
+}
+EXPORT_SYMBOL(get_unused_fd);
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index 629af01e5ad..6caf1e1ee26 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -23,6 +23,8 @@
 int jffs2_sum_init(struct jffs2_sb_info *c)
 {
+        uint32_t sum_size = max_t(uint32_t, c->sector_size, MAX_SUMMARY_SIZE);
        c->summary = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL);
        if (!c->summary) {
@@ -30,7 +32,7 @@ int jffs2_sum_init(struct jffs2_sb_info *c)
                return -ENOMEM;
        }
-        c->summary->sum_buf = vmalloc(c->sector_size);
+        c->summary->sum_buf = kmalloc(sum_size, GFP_KERNEL);
        if (!c->summary->sum_buf) {
                JFFS2_WARNING("Can't allocate buffer for writing out summary information!\n");
@@ -49,7 +51,7 @@ void jffs2_sum_exit(struct jffs2_sb_info *c)
        jffs2_sum_disable_collecting(c->summary);
-        vfree(c->summary->sum_buf);
+        kfree(c->summary->sum_buf);
        c->summary->sum_buf = NULL;
        kfree(c->summary);
@@ -665,7 +667,7 @@ crc_err:
 /* Write summary data to flash - helper function for jffs2_sum_write_sumnode() */
 static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
-                                        uint32_t infosize, uint32_t datasize, int padsize)
+                                uint32_t infosize, uint32_t datasize, int padsize)
 {
        struct jffs2_raw_summary isum;
        union jffs2_sum_mem *temp;
@@ -676,6 +678,26 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
        int ret;
        size_t retlen;
+        if (padsize + datasize > MAX_SUMMARY_SIZE) {
+                /* It won't fit in the buffer. Abort summary for this jeb */
+                jffs2_sum_disable_collecting(c->summary);
+                JFFS2_WARNING("Summary too big (%d data, %d pad) in eraseblock at %08x\n",
+                              datasize, padsize, jeb->offset);
+                /* Non-fatal */
+                return 0;
+        }
+        /* Is there enough space for summary? */
+        if (padsize < 0) {
+                /* don't try to write out summary for this jeb */
+                jffs2_sum_disable_collecting(c->summary);
+                JFFS2_WARNING("Not enough space for summary, padsize = %d\n",
+                              padsize);
+                /* Non-fatal */
+                return 0;
+        }
        memset(c->summary->sum_buf, 0xff, datasize);
        memset(&isum, 0, sizeof(isum));
@@ -821,7 +843,7 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
 {
        int datasize, infosize, padsize;
        struct jffs2_eraseblock *jeb;
-        int ret;
+        int ret = 0;
        dbg_summary("called\n");
@@ -841,16 +863,6 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
        infosize += padsize;
        datasize += padsize;
-        /* Is there enough space for summary? */
-        if (padsize < 0) {
-                /* don't try to write out summary for this jeb */
-                jffs2_sum_disable_collecting(c->summary);
-                JFFS2_WARNING("Not enough space for summary, padsize = %d\n", padsize);
-                spin_lock(&c->erase_completion_lock);
-                return 0;
-        }
        ret = jffs2_sum_write_data(c, jeb, infosize, datasize, padsize);
        spin_lock(&c->erase_completion_lock);
        return ret;
diff --git a/fs/jffs2/summary.h b/fs/jffs2/summary.h
index 8bf34f2fa5c..60207a2ae95 100644
--- a/fs/jffs2/summary.h
+++ b/fs/jffs2/summary.h
@@ -13,6 +13,12 @@
 #ifndef JFFS2_SUMMARY_H
 #define JFFS2_SUMMARY_H
+/* Limit summary size to 64KiB so that we can kmalloc it. If the summary
+   is larger than that, we have to just ditch it and avoid using summary
+   for the eraseblock in question... and it probably doesn't hurt us much
+   anyway. */
+#define MAX_SUMMARY_SIZE 65536
 #include <linux/uio.h>
 #include <linux/jffs2.h>
diff --git a/fs/libfs.c b/fs/libfs.c
index baeb71ee1cd..1add676a19d 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -216,8 +216,8 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
        s->s_flags = MS_NOUSER;
        s->s_maxbytes = ~0ULL;
-        s->s_blocksize = 1024;
+        s->s_blocksize = PAGE_SIZE;
-        s->s_blocksize_bits = 10;
+        s->s_blocksize_bits = PAGE_SHIFT;
        s->s_magic = magic;
        s->s_op = ops ? ops : &simple_super_operations;
        s->s_time_gran = 1;
diff --git a/fs/namei.c b/fs/namei.c
index a7b0a0b8012..4ea63ed5e79 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -274,7 +274,7 @@ int inode_permission(struct inode *inode, int mask)
                return retval;
        return security_inode_permission(inode,
-                        mask & (MAY_READ|MAY_WRITE|MAY_EXEC));
+                        mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND));
 }
 /**
@@ -1431,8 +1431,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
 *  3. We should have write and exec permissions on dir
 *  4. We can't do it if dir is immutable (done in permission())
 */
-static inline int may_create(struct inode *dir, struct dentry *child,
+static inline int may_create(struct inode *dir, struct dentry *child)
-                             struct nameidata *nd)
 {
        if (child->d_inode)
                return -EEXIST;
@@ -1504,7 +1503,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
 int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
                struct nameidata *nd)
 {
-        int error = may_create(dir, dentry, nd);
+        int error = may_create(dir, dentry);
        if (error)
                return error;
@@ -1948,7 +1947,7 @@ EXPORT_SYMBOL_GPL(lookup_create);
 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 {
-        int error = may_create(dir, dentry, NULL);
+        int error = may_create(dir, dentry);
        if (error)
                return error;
@@ -2049,7 +2048,7 @@ asmlinkage long sys_mknod(const char __user *filename, int mode, unsigned dev)
 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
-        int error = may_create(dir, dentry, NULL);
+        int error = may_create(dir, dentry);
        if (error)
                return error;
@@ -2316,7 +2315,7 @@ asmlinkage long sys_unlink(const char __user *pathname)
 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
 {
-        int error = may_create(dir, dentry, NULL);
+        int error = may_create(dir, dentry);
        if (error)
                return error;
@@ -2386,7 +2385,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
        if (!inode)
                return -ENOENT;
-        error = may_create(dir, new_dentry, NULL);
+        error = may_create(dir, new_dentry);
        if (error)
                return error;
@@ -2595,7 +2594,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                return error;
        if (!new_dentry->d_inode)
-                error = may_create(new_dir, new_dentry, NULL);
+                error = may_create(new_dir, new_dentry);
        else
                error = may_delete(new_dir, new_dentry, is_dir);
        if (error)
diff --git a/fs/namespace.c b/fs/namespace.c
index 411728c0c8b..6e283c93b50 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1667,31 +1667,31 @@ static noinline int do_new_mount(struct nameidata *nd, char *type, int flags,
        if (IS_ERR(mnt))
                return PTR_ERR(mnt);
-        return do_add_mount(mnt, nd, mnt_flags, NULL);
+        return do_add_mount(mnt, &nd->path, mnt_flags, NULL);
 }
 /*
 * add a mount into a namespace's mount tree
 * - provide the option of adding the new mount to an expiration list
 */
-int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
+int do_add_mount(struct vfsmount *newmnt, struct path *path,
                 int mnt_flags, struct list_head *fslist)
 {
        int err;
        down_write(&namespace_sem);
        /* Something was mounted here while we slept */
-        while (d_mountpoint(nd->path.dentry) &&
+        while (d_mountpoint(path->dentry) &&
-               follow_down(&nd->path.mnt, &nd->path.dentry))
+               follow_down(&path->mnt, &path->dentry))
                ;
        err = -EINVAL;
-        if (!check_mnt(nd->path.mnt))
+        if (!check_mnt(path->mnt))
                goto unlock;
        /* Refuse the same filesystem on the same mount point */
        err = -EBUSY;
-        if (nd->path.mnt->mnt_sb == newmnt->mnt_sb &&
+        if (path->mnt->mnt_sb == newmnt->mnt_sb &&
-            nd->path.mnt->mnt_root == nd->path.dentry)
+            path->mnt->mnt_root == path->dentry)
                goto unlock;
        err = -EINVAL;
@@ -1699,7 +1699,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
                goto unlock;
        newmnt->mnt_flags = mnt_flags;
-        if ((err = graft_tree(newmnt, &nd->path)))
+        if ((err = graft_tree(newmnt, path)))
                goto unlock;
        if (fslist) /* add to the specified expiration list */
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 2f285ef7639..66df08dd1ca 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -129,7 +129,7 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
                goto out_err;
        mntget(mnt);
-        err = do_add_mount(mnt, nd, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
+        err = do_add_mount(mnt, &nd->path, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
                           &nfs_automount_list);
        if (err < 0) {
                mntput(mnt);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1db080135c6..506c24fb507 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1073,12 +1073,15 @@ static void ocfs2_write_failure(struct inode *inode,
        for(i = 0; i < wc->w_num_pages; i++) {
                tmppage = wc->w_pages[i];
-                if (ocfs2_should_order_data(inode))
+                if (page_has_buffers(tmppage)) {
-                        walk_page_buffers(wc->w_handle, page_buffers(tmppage),
+                        if (ocfs2_should_order_data(inode))
-                                          from, to, NULL,
+                                walk_page_buffers(wc->w_handle,
-                                          ocfs2_journal_dirty_data);
+                                                  page_buffers(tmppage),
+                                                  from, to, NULL,
-                block_commit_write(tmppage, from, to);
+                                                  ocfs2_journal_dirty_data);
+                        block_commit_write(tmppage, from, to);
+                }
        }
 }
@@ -1901,12 +1904,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
                        to = PAGE_CACHE_SIZE;
                }
-                if (ocfs2_should_order_data(inode))
+                if (page_has_buffers(tmppage)) {
-                        walk_page_buffers(wc->w_handle, page_buffers(tmppage),
+                        if (ocfs2_should_order_data(inode))
-                                          from, to, NULL,
+                                walk_page_buffers(wc->w_handle,
-                                          ocfs2_journal_dirty_data);
+                                                  page_buffers(tmppage),
+                                                  from, to, NULL,
-                block_commit_write(tmppage, from, to);
+                                                  ocfs2_journal_dirty_data);
+                        block_commit_write(tmppage, from, to);
+                }
        }
 out_write_size:
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index be2dd95d3a1..ec2ed15c3da 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1766,8 +1766,8 @@ out_inode_unlock:
 out_rw_unlock:
        ocfs2_rw_unlock(inode, 1);
-        mutex_unlock(&inode->i_mutex);
 out:
+        mutex_unlock(&inode->i_mutex);
        return ret;
 }
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index a8c19cb3cfd..7a37240f7a3 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -57,7 +57,7 @@ static int __ocfs2_recovery_thread(void *arg);
 static int ocfs2_commit_cache(struct ocfs2_super *osb);
 static int ocfs2_wait_on_mount(struct ocfs2_super *osb);
 static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
-                                      int dirty);
+                                      int dirty, int replayed);
 static int ocfs2_trylock_journal(struct ocfs2_super *osb,
                                 int slot_num);
 static int ocfs2_recover_orphans(struct ocfs2_super *osb,
@@ -562,8 +562,18 @@ done:
        return status;
 }
+static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
+{
+        le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
+}
+static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
+{
+        return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
+}
 static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
-                                      int dirty)
+                                      int dirty, int replayed)
 {
        int status;
        unsigned int flags;
@@ -593,6 +603,9 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
                flags &= ~OCFS2_JOURNAL_DIRTY_FL;
        fe->id1.journal1.ij_flags = cpu_to_le32(flags);
+        if (replayed)
+                ocfs2_bump_recovery_generation(fe);
        status = ocfs2_write_block(osb, bh, journal->j_inode);
        if (status < 0)
                mlog_errno(status);
@@ -667,7 +680,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
                 * Do not toggle if flush was unsuccessful otherwise
                 * will leave dirty metadata in a "clean" journal
                 */
-                status = ocfs2_journal_toggle_dirty(osb, 0);
+                status = ocfs2_journal_toggle_dirty(osb, 0, 0);
                if (status < 0)
                        mlog_errno(status);
        }
@@ -710,7 +723,7 @@ static void ocfs2_clear_journal_error(struct super_block *sb,
        }
 }
-int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
+int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
 {
        int status = 0;
        struct ocfs2_super *osb;
@@ -729,7 +742,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
        ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
-        status = ocfs2_journal_toggle_dirty(osb, 1);
+        status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
        if (status < 0) {
                mlog_errno(status);
                goto done;
@@ -771,7 +784,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
                goto bail;
        }
-        status = ocfs2_journal_toggle_dirty(journal->j_osb, 0);
+        status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
        if (status < 0)
                mlog_errno(status);
@@ -1034,6 +1047,12 @@ restart:
        spin_unlock(&osb->osb_lock);
        mlog(0, "All nodes recovered\n");
+        /* Refresh all journal recovery generations from disk */
+        status = ocfs2_check_journals_nolocks(osb);
+        status = (status == -EROFS) ? 0 : status;
+        if (status < 0)
+                mlog_errno(status);
        ocfs2_super_unlock(osb, 1);
        /* We always run recovery on our own orphan dir - the dead
@@ -1096,6 +1115,42 @@ out:
        mlog_exit_void();
 }
+static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
+                                    int slot_num,
+                                    struct buffer_head **bh,
+                                    struct inode **ret_inode)
+{
+        int status = -EACCES;
+        struct inode *inode = NULL;
+        BUG_ON(slot_num >= osb->max_slots);
+        inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
+                                            slot_num);
+        if (!inode || is_bad_inode(inode)) {
+                mlog_errno(status);
+                goto bail;
+        }
+        SET_INODE_JOURNAL(inode);
+        status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode);
+        if (status < 0) {
+                mlog_errno(status);
+                goto bail;
+        }
+        status = 0;
+bail:
+        if (inode) {
+                if (status || !ret_inode)
+                        iput(inode);
+                else
+                        *ret_inode = inode;
+        }
+        return status;
+}
 /* Does the actual journal replay and marks the journal inode as
 * clean. Will only replay if the journal inode is marked dirty. */
 static int ocfs2_replay_journal(struct ocfs2_super *osb,
@@ -1109,22 +1164,36 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
        struct ocfs2_dinode *fe;
        journal_t *journal = NULL;
        struct buffer_head *bh = NULL;
+        u32 slot_reco_gen;
-        inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
+        status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
-                                            slot_num);
+        if (status) {
-        if (inode == NULL) {
-                status = -EACCES;
                mlog_errno(status);
                goto done;
        }
-        if (is_bad_inode(inode)) {
-                status = -EACCES;
+        fe = (struct ocfs2_dinode *)bh->b_data;
-                iput(inode);
+        slot_reco_gen = ocfs2_get_recovery_generation(fe);
-                inode = NULL;
+        brelse(bh);
-                mlog_errno(status);
+        bh = NULL;
+        /*
+         * As the fs recovery is asynchronous, there is a small chance that
+         * another node mounted (and recovered) the slot before the recovery
+         * thread could get the lock. To handle that, we dirty read the journal
+         * inode for that slot to get the recovery generation. If it is
+         * different than what we expected, the slot has been recovered.
+         * If not, it needs recovery.
+         */
+        if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
+                mlog(0, "Slot %u already recovered (old/new=%u/%u)\n", slot_num,
+                     osb->slot_recovery_generations[slot_num], slot_reco_gen);
+                osb->slot_recovery_generations[slot_num] = slot_reco_gen;
+                status = -EBUSY;
                goto done;
        }
-        SET_INODE_JOURNAL(inode);
+        /* Continue with recovery as the journal has not yet been recovered */
        status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
        if (status < 0) {
@@ -1138,9 +1207,12 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
        fe = (struct ocfs2_dinode *) bh->b_data;
        flags = le32_to_cpu(fe->id1.journal1.ij_flags);
+        slot_reco_gen = ocfs2_get_recovery_generation(fe);
        if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
                mlog(0, "No recovery required for node %d\n", node_num);
+                /* Refresh recovery generation for the slot */
+                osb->slot_recovery_generations[slot_num] = slot_reco_gen;
                goto done;
        }
@@ -1188,6 +1260,11 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
        flags &= ~OCFS2_JOURNAL_DIRTY_FL;
        fe->id1.journal1.ij_flags = cpu_to_le32(flags);
+        /* Increment recovery generation to indicate successful recovery */
+        ocfs2_bump_recovery_generation(fe);
+        osb->slot_recovery_generations[slot_num] =
+                                        ocfs2_get_recovery_generation(fe);
        status = ocfs2_write_block(osb, bh, inode);
        if (status < 0)
                mlog_errno(status);
@@ -1252,6 +1329,13 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
        status = ocfs2_replay_journal(osb, node_num, slot_num);
        if (status < 0) {
+                if (status == -EBUSY) {
+                        mlog(0, "Skipping recovery for slot %u (node %u) "
+                             "as another node has recovered it\n", slot_num,
+                             node_num);
+                        status = 0;
+                        goto done;
+                }
                mlog_errno(status);
                goto done;
        }
@@ -1334,12 +1418,29 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
 {
        unsigned int node_num;
        int status, i;
+        struct buffer_head *bh = NULL;
+        struct ocfs2_dinode *di;
        /* This is called with the super block cluster lock, so we
         * know that the slot map can't change underneath us. */
        spin_lock(&osb->osb_lock);
        for (i = 0; i < osb->max_slots; i++) {
+                /* Read journal inode to get the recovery generation */
+                status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
+                if (status) {
+                        mlog_errno(status);
+                        goto bail;
+                }
+                di = (struct ocfs2_dinode *)bh->b_data;
+                osb->slot_recovery_generations[i] =
+                                        ocfs2_get_recovery_generation(di);
+                brelse(bh);
+                bh = NULL;
+                mlog(0, "Slot %u recovery generation is %u\n", i,
+                     osb->slot_recovery_generations[i]);
                if (i == osb->slot_num)
                        continue;
@@ -1603,49 +1704,41 @@ static int ocfs2_commit_thread(void *arg)
        return 0;
 }
-/* Look for a dirty journal without taking any cluster locks. Used for
+/* Reads all the journal inodes without taking any cluster locks. Used
- * hard readonly access to determine whether the file system journals
+ * for hard readonly access to determine whether any journal requires
- * require recovery. */
+ * recovery. Also used to refresh the recovery generation numbers after
+ * a journal has been recovered by another node.
+ */
 int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
 {
        int ret = 0;
        unsigned int slot;
-        struct buffer_head *di_bh;
+        struct buffer_head *di_bh = NULL;
        struct ocfs2_dinode *di;
-        struct inode *journal = NULL;
+        int journal_dirty = 0;
        for(slot = 0; slot < osb->max_slots; slot++) {
-                journal = ocfs2_get_system_file_inode(osb,
+                ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
-                                                      JOURNAL_SYSTEM_INODE,
+                if (ret) {
-                                                      slot);
-                if (!journal || is_bad_inode(journal)) {
-                        ret = -EACCES;
-                        mlog_errno(ret);
-                        goto out;
-                }
-                di_bh = NULL;
-                ret = ocfs2_read_block(osb, OCFS2_I(journal)->ip_blkno, &di_bh,
-                                       0, journal);
-                if (ret < 0) {
                        mlog_errno(ret);
                        goto out;
                }
                di = (struct ocfs2_dinode *) di_bh->b_data;
+                osb->slot_recovery_generations[slot] =
+                                        ocfs2_get_recovery_generation(di);
                if (le32_to_cpu(di->id1.journal1.ij_flags) &
                    OCFS2_JOURNAL_DIRTY_FL)
-                        ret = -EROFS;
+                        journal_dirty = 1;
                brelse(di_bh);
-                if (ret)
+                di_bh = NULL;
-                        break;
        }
 out:
-        if (journal)
+        if (journal_dirty)
-                iput(journal);
+                ret = -EROFS;
        return ret;
 }
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index db82be2532e..2178ebffa05 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -161,7 +161,8 @@ int    ocfs2_journal_init(struct ocfs2_journal *journal,
 void   ocfs2_journal_shutdown(struct ocfs2_super *osb);
 int    ocfs2_journal_wipe(struct ocfs2_journal *journal,
                          int full);
-int    ocfs2_journal_load(struct ocfs2_journal *journal, int local);
+int    ocfs2_journal_load(struct ocfs2_journal *journal, int local,
+                          int replayed);
 int    ocfs2_check_journals_nolocks(struct ocfs2_super *osb);
 void   ocfs2_recovery_thread(struct ocfs2_super *osb,
                             int node_num);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 1cb814be8ef..7f625f2b111 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -204,6 +204,8 @@ struct ocfs2_super
        struct ocfs2_slot_info *slot_info;
+        u32 *slot_recovery_generations;
        spinlock_t node_map_lock;
        u64 root_blkno;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 3f194517762..4f619850ccf 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -660,7 +660,10 @@ struct ocfs2_dinode {
                struct {                /* Info for journal system
                                           inodes */
                        __le32 ij_flags;        /* Mounted, version, etc. */
-                        __le32 ij_pad;
+                        __le32 ij_recovery_generation; /* Incremented when the
+                                                          journal is recovered
+                                                          after an unclean
+                                                          shutdown */
                } journal1;
        } id1;                          /* Inode type dependant 1 */
 /*C0*/  union {
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 2560b33889a..88255d3f52b 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1442,6 +1442,15 @@ static int ocfs2_initialize_super(struct super_block *sb,
        }
        mlog(0, "max_slots for this device: %u\n", osb->max_slots);
+        osb->slot_recovery_generations =
+                kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations),
+                        GFP_KERNEL);
+        if (!osb->slot_recovery_generations) {
+                status = -ENOMEM;
+                mlog_errno(status);
+                goto bail;
+        }
        init_waitqueue_head(&osb->osb_wipe_event);
        osb->osb_orphan_wipes = kcalloc(osb->max_slots,
                                        sizeof(*osb->osb_orphan_wipes),
@@ -1703,7 +1712,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
        local = ocfs2_mount_local(osb);
        /* will play back anything left in the journal. */
-        status = ocfs2_journal_load(osb->journal, local);
+        status = ocfs2_journal_load(osb->journal, local, dirty);
        if (status < 0) {
                mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status);
                goto finally;
@@ -1768,6 +1777,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
        ocfs2_free_slot_info(osb);
        kfree(osb->osb_orphan_wipes);
+        kfree(osb->slot_recovery_generations);
        /* FIXME
         * This belongs in journal shutdown, but because we have to
         * allocate osb->journal at the start of ocfs2_initalize_osb(),
diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c
index dc75f22be3f..697663b01ba 100644
--- a/fs/omfs/bitmap.c
+++ b/fs/omfs/bitmap.c
@@ -71,10 +71,10 @@ static int set_run(struct super_block *sb, int map,
                }
                if (set) {
                        set_bit(bit, sbi->s_imap[map]);
-                        set_bit(bit, (long *) bh->b_data);
+                        set_bit(bit, (unsigned long *)bh->b_data);
                } else {
                        clear_bit(bit, sbi->s_imap[map]);
-                        clear_bit(bit, (long *) bh->b_data);
+                        clear_bit(bit, (unsigned long *)bh->b_data);
                }
        }
        mark_buffer_dirty(bh);
@@ -109,7 +109,7 @@ int omfs_allocate_block(struct super_block *sb, u64 block)
                if (!bh)
                        goto out;
-                set_bit(bit, (long *) bh->b_data);
+                set_bit(bit, (unsigned long *)bh->b_data);
                mark_buffer_dirty(bh);
                brelse(bh);
        }
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 05a5bc31e4b..c0757e99887 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -104,7 +104,7 @@ int omfs_make_empty(struct inode *inode, struct super_block *sb)
        oi = (struct omfs_inode *) bh->b_data;
        oi->i_head.h_self = cpu_to_be64(inode->i_ino);
-        oi->i_sibling = ~0ULL;
+        oi->i_sibling = ~cpu_to_be64(0ULL);
        mark_buffer_dirty(bh);
        brelse(bh);
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 66e01fae438..7e2499053e4 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -30,11 +30,11 @@ void omfs_make_empty_table(struct buffer_head *bh, int offset)
 {
        struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset];
-        oe->e_next = ~0ULL;
+        oe->e_next = ~cpu_to_be64(0ULL);
        oe->e_extent_count = cpu_to_be32(1),
        oe->e_fill = cpu_to_be32(0x22),
-        oe->e_entry.e_cluster = ~0ULL;
+        oe->e_entry.e_cluster = ~cpu_to_be64(0ULL);
-        oe->e_entry.e_blocks = ~0ULL;
+        oe->e_entry.e_blocks = ~cpu_to_be64(0ULL);
 }
 int omfs_shrink_inode(struct inode *inode)
diff --git a/fs/open.c b/fs/open.c
index 52647be277a..07da9359481 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -963,62 +963,6 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
 }
 EXPORT_SYMBOL(dentry_open);
-/*
- * Find an empty file descriptor entry, and mark it busy.
- */
-int get_unused_fd_flags(int flags)
-{
-        struct files_struct * files = current->files;
-        int fd, error;
-        struct fdtable *fdt;
-        spin_lock(&files->file_lock);
-repeat:
-        fdt = files_fdtable(files);
-        fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,
-                                files->next_fd);
-        /* Do we need to expand the fd array or fd set?  */
-        error = expand_files(files, fd);
-        if (error < 0)
-                goto out;
-        if (error) {
-                /*
-                 * If we needed to expand the fs array we
-                 * might have blocked - try again.
-                 */
-                goto repeat;
-        }
-        FD_SET(fd, fdt->open_fds);
-        if (flags & O_CLOEXEC)
-                FD_SET(fd, fdt->close_on_exec);
-        else
-                FD_CLR(fd, fdt->close_on_exec);
-        files->next_fd = fd + 1;
-#if 1
-        /* Sanity check */
-        if (fdt->fd[fd] != NULL) {
-                printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
-                fdt->fd[fd] = NULL;
-        }
-#endif
-        error = fd;
-out:
-        spin_unlock(&files->file_lock);
-        return error;
-}
-int get_unused_fd(void)
-{
-        return get_unused_fd_flags(0);
-}
-EXPORT_SYMBOL(get_unused_fd);
 static void __put_unused_fd(struct files_struct *files, unsigned int fd)
 {
        struct fdtable *fdt = files_fdtable(files);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index cb4096cc3fb..4fb81e9c94e 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -300,10 +300,10 @@ out:
        return rtn;
 }
-static DEFINE_IDR(proc_inum_idr);
+static DEFINE_IDA(proc_inum_ida);
 static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
-#define PROC_DYNAMIC_FIRST 0xF0000000UL
+#define PROC_DYNAMIC_FIRST 0xF0000000U
 /*
 * Return an inode number between PROC_DYNAMIC_FIRST and
@@ -311,36 +311,33 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
 */
 static unsigned int get_inode_number(void)
 {
-        int i, inum = 0;
+        unsigned int i;
        int error;
 retry:
-        if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0)
+        if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0)
                return 0;
        spin_lock(&proc_inum_lock);
-        error = idr_get_new(&proc_inum_idr, NULL, &i);
+        error = ida_get_new(&proc_inum_ida, &i);
        spin_unlock(&proc_inum_lock);
        if (error == -EAGAIN)
                goto retry;
        else if (error)
                return 0;
-        inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST;
+        if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
+                spin_lock(&proc_inum_lock);
-        /* inum will never be more than 0xf0ffffff, so no check
+                ida_remove(&proc_inum_ida, i);
-         * for overflow.
+                spin_unlock(&proc_inum_lock);
-         */
+        }
+        return PROC_DYNAMIC_FIRST + i;
-        return inum;
 }
 static void release_inode_number(unsigned int inum)
 {
-        int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK;
        spin_lock(&proc_inum_lock);
-        idr_remove(&proc_inum_idr, id);
+        ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
        spin_unlock(&proc_inum_lock);
 }
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 879e54d35c2..282a13596c7 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2076,8 +2076,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
                return err;
        /* Quotafile not on the same filesystem? */
        if (nd.path.mnt->mnt_sb != sb) {
-                path_put(&nd.path);
+                err = -EXDEV;
-                return -EXDEV;
+                goto out;
        }
        inode = nd.path.dentry->d_inode;
        /* We must not pack tails for quota files on reiserfs for quota IO to work */
@@ -2087,8 +2087,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
                        reiserfs_warning(sb,
                                "reiserfs: Unpacking tail of quota file failed"
                                " (%d). Cannot turn on quotas.", err);
-                        path_put(&nd.path);
+                        err = -EINVAL;
-                        return -EINVAL;
+                        goto out;
                }
                mark_inode_dirty(inode);
        }
@@ -2109,13 +2109,15 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
                /* Just start temporary transaction and finish it */
                err = journal_begin(&th, sb, 1);
                if (err)
-                        return err;
+                        goto out;
                err = journal_end_sync(&th, sb, 1);
                if (err)
-                        return err;
+                        goto out;
        }
+        err = vfs_quota_on_path(sb, type, format_id, &nd.path);
+out:
        path_put(&nd.path);
-        return vfs_quota_on(sb, type, format_id, path, 0);
+        return err;
 }
 /* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 8e51a2aaa97..60d2f822e87 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -418,7 +418,8 @@ static int
 romfs_readpage(struct file *file, struct page * page)
 {
        struct inode *inode = page->mapping->host;
-        loff_t offset, avail, readlen;
+        loff_t offset, size;
+        unsigned long filled;
        void *buf;
        int result = -EIO;
@@ -430,21 +431,29 @@ romfs_readpage(struct file *file, struct page * page)
        /* 32 bit warning -- but not for us :) */
        offset = page_offset(page);
-        if (offset < i_size_read(inode)) {
+        size = i_size_read(inode);
-                avail = inode->i_size-offset;
+        filled = 0;
-                readlen = min_t(unsigned long, avail, PAGE_SIZE);
+        result = 0;
-                if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) {
+        if (offset < size) {
-                        if (readlen < PAGE_SIZE) {
+                unsigned long readlen;
-                                memset(buf + readlen,0,PAGE_SIZE-readlen);
-                        }
+                size -= offset;
-                        SetPageUptodate(page);
+                readlen = size > PAGE_SIZE ? PAGE_SIZE : size;
-                        result = 0;
+                filled = romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen);
+                if (filled != readlen) {
+                        SetPageError(page);
+                        filled = 0;
+                        result = -EIO;
                }
        }
-        if (result) {
-                memset(buf, 0, PAGE_SIZE);
+        if (filled < PAGE_SIZE)
-                SetPageError(page);
+                memset(buf + filled, 0, PAGE_SIZE-filled);
-        }
+        if (!result)
+                SetPageUptodate(page);
        flush_dcache_page(page);
        unlock_page(page);