82 files changed, 1560 insertions, 1837 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 487236c65837..781b47d2f9f2 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1112,8 +1112,8 @@ config HFS_FS
        help
          If you say Y here, you will be able to mount Macintosh-formatted
          floppy disks and hard drive partitions with full read-write access.
-          Please read <file:fs/hfs/HFS.txt> to learn about the available mount
+          Please read <file:Documentation/filesystems/hfs.txt> to learn about
-          options.
+          the available mount options.
          To compile this file system support as a module, choose M here: the
          module will be called hfs.
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index ba8de7ca260b..f0b3171842f2 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1384,7 +1384,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
        prstatus->pr_sigpend = p->pending.signal.sig[0];
        prstatus->pr_sighold = p->blocked.sig[0];
        prstatus->pr_pid = task_pid_vnr(p);
-        prstatus->pr_ppid = task_pid_vnr(p->parent);
+        prstatus->pr_ppid = task_pid_vnr(p->real_parent);
        prstatus->pr_pgrp = task_pgrp_vnr(p);
        prstatus->pr_sid = task_session_vnr(p);
        if (thread_group_leader(p)) {
@@ -1430,7 +1430,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
        psinfo->pr_psargs[len] = 0;
        psinfo->pr_pid = task_pid_vnr(p);
-        psinfo->pr_ppid = task_pid_vnr(p->parent);
+        psinfo->pr_ppid = task_pid_vnr(p->real_parent);
        psinfo->pr_pgrp = task_pgrp_vnr(p);
        psinfo->pr_sid = task_session_vnr(p);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 993f78c55221..e48a630ae266 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -738,9 +738,9 @@ EXPORT_SYMBOL(bd_release);
 static struct kobject *bdev_get_kobj(struct block_device *bdev)
 {
        if (bdev->bd_contains != bdev)
-                return kobject_get(&bdev->bd_part->kobj);
+                return kobject_get(&bdev->bd_part->dev.kobj);
        else
-                return kobject_get(&bdev->bd_disk->kobj);
+                return kobject_get(&bdev->bd_disk->dev.kobj);
 }
 static struct kobject *bdev_get_holder(struct block_device *bdev)
@@ -1176,7 +1176,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
                                ret = -ENXIO;
                                goto out_first;
                        }
-                        kobject_get(&p->kobj);
+                        kobject_get(&p->dev.kobj);
                        bdev->bd_part = p;
                        bd_set_size(bdev, (loff_t) p->nr_sects << 9);
                }
@@ -1299,7 +1299,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part)
                module_put(owner);
                if (bdev->bd_contains != bdev) {
-                        kobject_put(&bdev->bd_part->kobj);
+                        kobject_put(&bdev->bd_part->dev.kobj);
                        bdev->bd_part = NULL;
                }
                bdev->bd_disk = NULL;
diff --git a/fs/char_dev.c b/fs/char_dev.c
index c3bfa76765c4..2c7a8b5b4598 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -510,9 +510,8 @@ struct cdev *cdev_alloc(void)
 {
        struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL);
        if (p) {
-                p->kobj.ktype = &ktype_cdev_dynamic;
                INIT_LIST_HEAD(&p->list);
-                kobject_init(&p->kobj);
+                kobject_init(&p->kobj, &ktype_cdev_dynamic);
        }
        return p;
 }
@@ -529,8 +528,7 @@ void cdev_init(struct cdev *cdev, const struct file_operations *fops)
 {
        memset(cdev, 0, sizeof *cdev);
        INIT_LIST_HEAD(&cdev->list);
-        cdev->kobj.ktype = &ktype_cdev_default;
+        kobject_init(&cdev->kobj, &ktype_cdev_default);
-        kobject_init(&cdev->kobj);
        cdev->ops = fops;
 }
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index dcc6aead70f5..e3eb3556622b 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -362,8 +362,8 @@ static int init_coda_psdev(void)
                goto out_chrdev;
        }               
        for (i = 0; i < MAX_CODADEVS; i++)
-                class_device_create(coda_psdev_class, NULL,
+                device_create(coda_psdev_class, NULL,
-                                MKDEV(CODA_PSDEV_MAJOR,i), NULL, "cfs%d", i);
+                              MKDEV(CODA_PSDEV_MAJOR,i), "cfs%d", i);
        coda_sysctl_init();
        goto out;
@@ -405,7 +405,7 @@ static int __init init_coda(void)
        return 0;
 out:
        for (i = 0; i < MAX_CODADEVS; i++)
-                class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i));
+                device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i));
        class_destroy(coda_psdev_class);
        unregister_chrdev(CODA_PSDEV_MAJOR, "coda");
        coda_sysctl_clean();
@@ -424,7 +424,7 @@ static void __exit exit_coda(void)
                printk("coda: failed to unregister filesystem\n");
        }
        for (i = 0; i < MAX_CODADEVS; i++)
-                class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i));
+                device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i));
        class_destroy(coda_psdev_class);
        unregister_chrdev(CODA_PSDEV_MAJOR, "coda");
        coda_sysctl_clean();
diff --git a/fs/compat.c b/fs/compat.c
index 15078ce4c04a..5216c3fd7517 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1104,10 +1104,6 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
        if (ret < 0)
                goto out;
-        ret = security_file_permission(file, type == READ ? MAY_READ:MAY_WRITE);
-        if (ret)
-                goto out;
        fnv = NULL;
        if (type == READ) {
                fn = file->f_op->read;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index e8b7c3a98a54..da8cb3b3592c 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -10,6 +10,8 @@
 * ioctls.
 */
+#include <linux/joystick.h>
 #include <linux/types.h>
 #include <linux/compat.h>
 #include <linux/kernel.h>
@@ -2642,6 +2644,12 @@ COMPATIBLE_IOCTL(VIDEO_SET_ATTRIBUTES)
 COMPATIBLE_IOCTL(VIDEO_GET_SIZE)
 COMPATIBLE_IOCTL(VIDEO_GET_FRAME_RATE)
+/* joystick */
+COMPATIBLE_IOCTL(JSIOCGVERSION)
+COMPATIBLE_IOCTL(JSIOCGAXES)
+COMPATIBLE_IOCTL(JSIOCGBUTTONS)
+COMPATIBLE_IOCTL(JSIOCGNAME(0))
 /* now things that need handlers */
 HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob)
 HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob)
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 3bf0278ea843..de3b31d0a37d 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -128,7 +128,7 @@ void configfs_release_fs(void)
 }
-static decl_subsys(config, NULL, NULL);
+static struct kobject *config_kobj;
 static int __init configfs_init(void)
 {
@@ -140,9 +140,8 @@ static int __init configfs_init(void)
        if (!configfs_dir_cachep)
                goto out;
-        kobj_set_kset_s(&config_subsys, kernel_subsys);
+        config_kobj = kobject_create_and_add("config", kernel_kobj);
-        err = subsystem_register(&config_subsys);
+        if (!config_kobj) {
-        if (err) {
                kmem_cache_destroy(configfs_dir_cachep);
                configfs_dir_cachep = NULL;
                goto out;
@@ -151,7 +150,7 @@ static int __init configfs_init(void)
        err = register_filesystem(&configfs_fs_type);
        if (err) {
                printk(KERN_ERR "configfs: Unable to register filesystem!\n");
-                subsystem_unregister(&config_subsys);
+                kobject_put(config_kobj);
                kmem_cache_destroy(configfs_dir_cachep);
                configfs_dir_cachep = NULL;
                goto out;
@@ -160,7 +159,7 @@ static int __init configfs_init(void)
        err = configfs_inode_init();
        if (err) {
                unregister_filesystem(&configfs_fs_type);
-                subsystem_unregister(&config_subsys);
+                kobject_put(config_kobj);
                kmem_cache_destroy(configfs_dir_cachep);
                configfs_dir_cachep = NULL;
        }
@@ -171,7 +170,7 @@ out:
 static void __exit configfs_exit(void)
 {
        unregister_filesystem(&configfs_fs_type);
-        subsystem_unregister(&config_subsys);
+        kobject_put(config_kobj);
        kmem_cache_destroy(configfs_dir_cachep);
        configfs_dir_cachep = NULL;
        configfs_inode_exit();
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 6a713b33992f..d26e2826ba5b 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -426,20 +426,19 @@ exit:
 }
 EXPORT_SYMBOL_GPL(debugfs_rename);
-static decl_subsys(debug, NULL, NULL);
+static struct kobject *debug_kobj;
 static int __init debugfs_init(void)
 {
        int retval;
-        kobj_set_kset_s(&debug_subsys, kernel_subsys);
+        debug_kobj = kobject_create_and_add("debug", kernel_kobj);
-        retval = subsystem_register(&debug_subsys);
+        if (!debug_kobj)
-        if (retval)
+                return -EINVAL;
-                return retval;
        retval = register_filesystem(&debug_fs_type);
        if (retval)
-                subsystem_unregister(&debug_subsys);
+                kobject_put(debug_kobj);
        return retval;
 }
@@ -447,7 +446,7 @@ static void __exit debugfs_exit(void)
 {
        simple_release_fs(&debugfs_mount, &debugfs_mount_count);
        unregister_filesystem(&debug_fs_type);
-        subsystem_unregister(&debug_subsys);
+        kobject_put(debug_kobj);
 }
 core_initcall(debugfs_init);
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 6353a8384520..5c108c49cb8c 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -166,26 +166,7 @@ static struct kobj_type dlm_ktype = {
        .release       = lockspace_kobj_release,
 };
-static struct kset dlm_kset = {
+static struct kset *dlm_kset;
-        .ktype  = &dlm_ktype,
-};
-static int kobject_setup(struct dlm_ls *ls)
-{
-        char lsname[DLM_LOCKSPACE_LEN];
-        int error;
-        memset(lsname, 0, DLM_LOCKSPACE_LEN);
-        snprintf(lsname, DLM_LOCKSPACE_LEN, "%s", ls->ls_name);
-        error = kobject_set_name(&ls->ls_kobj, "%s", lsname);
-        if (error)
-                return error;
-        ls->ls_kobj.kset = &dlm_kset;
-        ls->ls_kobj.ktype = &dlm_ktype;
-        return 0;
-}
 static int do_uevent(struct dlm_ls *ls, int in)
 {
@@ -220,24 +201,22 @@ static int do_uevent(struct dlm_ls *ls, int in)
 int dlm_lockspace_init(void)
 {
-        int error;
        ls_count = 0;
        mutex_init(&ls_lock);
        INIT_LIST_HEAD(&lslist);
        spin_lock_init(&lslist_lock);
-        kobject_set_name(&dlm_kset.kobj, "dlm");
+        dlm_kset = kset_create_and_add("dlm", NULL, kernel_kobj);
-        kobj_set_kset_s(&dlm_kset, kernel_subsys);
+        if (!dlm_kset) {
-        error = kset_register(&dlm_kset);
+                printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__);
-        if (error)
+                return -ENOMEM;
-                printk("dlm_lockspace_init: cannot register kset %d\n", error);
+        }
-        return error;
+        return 0;
 }
 void dlm_lockspace_exit(void)
 {
-        kset_unregister(&dlm_kset);
+        kset_unregister(dlm_kset);
 }
 static int dlm_scand(void *data)
@@ -549,13 +528,12 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
                goto out_delist;
        }
-        error = kobject_setup(ls);
+        ls->ls_kobj.kset = dlm_kset;
-        if (error)
+        error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
-                goto out_stop;
+                                     "%s", ls->ls_name);
-        error = kobject_register(&ls->ls_kobj);
        if (error)
                goto out_stop;
+        kobject_uevent(&ls->ls_kobj, KOBJ_ADD);
        /* let kobject handle freeing of ls if there's an error */
        do_unreg = 1;
@@ -601,7 +579,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
        kfree(ls->ls_rsbtbl);
 out_lsfree:
        if (do_unreg)
-                kobject_unregister(&ls->ls_kobj);
+                kobject_put(&ls->ls_kobj);
        else
                kfree(ls);
 out:
@@ -750,7 +728,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
        dlm_clear_members(ls);
        dlm_clear_members_gone(ls);
        kfree(ls->ls_node_array);
-        kobject_unregister(&ls->ls_kobj);
+        kobject_put(&ls->ls_kobj);
        /* The ls structure will be freed when the kobject is done with */
        mutex_lock(&ls_lock);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 0b1ab016fa2e..5a719180983c 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -120,22 +120,9 @@ ecryptfs_do_create(struct inode *directory_inode,
        rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode,
                                             ecryptfs_dentry, mode, nd);
        if (rc) {
-                struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode;
+                printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
-                struct ecryptfs_inode_info *inode_info =
+                       "rc = [%d]\n", __FUNCTION__, rc);
-                        ecryptfs_inode_to_private(ecryptfs_inode);
+                goto out_lock;
-                printk(KERN_WARNING "%s: Error creating underlying file; "
-                       "rc = [%d]; checking for existing\n", __FUNCTION__, rc);
-                if (inode_info) {
-                        mutex_lock(&inode_info->lower_file_mutex);
-                        if (!inode_info->lower_file) {
-                                mutex_unlock(&inode_info->lower_file_mutex);
-                                printk(KERN_ERR "%s: Failure to set underlying "
-                                       "file; rc = [%d]\n", __FUNCTION__, rc);
-                                goto out_lock;
-                        }
-                        mutex_unlock(&inode_info->lower_file_mutex);
-                }
        }
        rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
                                directory_inode->i_sb, 0);
@@ -451,6 +438,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
        dentry->d_inode->i_nlink =
                ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink;
        dentry->d_inode->i_ctime = dir->i_ctime;
+        d_drop(dentry);
 out_unlock:
        unlock_parent(lower_dentry);
        return rc;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index e5580bcb923a..0249aa4ae181 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -734,127 +734,40 @@ static int ecryptfs_init_kmem_caches(void)
        return 0;
 }
-struct ecryptfs_obj {
+static struct kobject *ecryptfs_kobj;
-        char *name;
-        struct list_head slot_list;
-        struct kobject kobj;
-};
-struct ecryptfs_attribute {
-        struct attribute attr;
-        ssize_t(*show) (struct ecryptfs_obj *, char *);
-        ssize_t(*store) (struct ecryptfs_obj *, const char *, size_t);
-};
-static ssize_t
+static ssize_t version_show(struct kobject *kobj,
-ecryptfs_attr_store(struct kobject *kobj,
+                            struct kobj_attribute *attr, char *buff)
-                    struct attribute *attr, const char *buf, size_t len)
 {
-        struct ecryptfs_obj *obj = container_of(kobj, struct ecryptfs_obj,
+        return snprintf(buff, PAGE_SIZE, "%d\n", ECRYPTFS_VERSIONING_MASK);
-                                                kobj);
-        struct ecryptfs_attribute *attribute =
-                container_of(attr, struct ecryptfs_attribute, attr);
-        return (attribute->store ? attribute->store(obj, buf, len) : 0);
 }
-static ssize_t
+static struct kobj_attribute version_attr = __ATTR_RO(version);
-ecryptfs_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
-{
-        struct ecryptfs_obj *obj = container_of(kobj, struct ecryptfs_obj,
-                                                kobj);
-        struct ecryptfs_attribute *attribute =
-                container_of(attr, struct ecryptfs_attribute, attr);
-        return (attribute->show ? attribute->show(obj, buf) : 0);
-}
-static struct sysfs_ops ecryptfs_sysfs_ops = {
+static struct attribute *attributes[] = {
-        .show = ecryptfs_attr_show,
+        &version_attr.attr,
-        .store = ecryptfs_attr_store
+        NULL,
 };
-static struct kobj_type ecryptfs_ktype = {
+static struct attribute_group attr_group = {
-        .sysfs_ops = &ecryptfs_sysfs_ops
+        .attrs = attributes,
 };
-static decl_subsys(ecryptfs, &ecryptfs_ktype, NULL);
-static ssize_t version_show(struct ecryptfs_obj *obj, char *buff)
-{
-        return snprintf(buff, PAGE_SIZE, "%d\n", ECRYPTFS_VERSIONING_MASK);
-}
-static struct ecryptfs_attribute sysfs_attr_version = __ATTR_RO(version);
-static struct ecryptfs_version_str_map_elem {
-        u32 flag;
-        char *str;
-} ecryptfs_version_str_map[] = {
-        {ECRYPTFS_VERSIONING_PASSPHRASE, "passphrase"},
-        {ECRYPTFS_VERSIONING_PUBKEY, "pubkey"},
-        {ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH, "plaintext passthrough"},
-        {ECRYPTFS_VERSIONING_POLICY, "policy"},
-        {ECRYPTFS_VERSIONING_XATTR, "metadata in extended attribute"},
-        {ECRYPTFS_VERSIONING_MULTKEY, "multiple keys per file"}
-};
-static ssize_t version_str_show(struct ecryptfs_obj *obj, char *buff)
-{
-        int i;
-        int remaining = PAGE_SIZE;
-        int total_written = 0;
-        buff[0] = '\0';
-        for (i = 0; i < ARRAY_SIZE(ecryptfs_version_str_map); i++) {
-                int entry_size;
-                if (!(ECRYPTFS_VERSIONING_MASK
-                      & ecryptfs_version_str_map[i].flag))
-                        continue;
-                entry_size = strlen(ecryptfs_version_str_map[i].str);
-                if ((entry_size + 2) > remaining)
-                        goto out;
-                memcpy(buff, ecryptfs_version_str_map[i].str, entry_size);
-                buff[entry_size++] = '\n';
-                buff[entry_size] = '\0';
-                buff += entry_size;
-                total_written += entry_size;
-                remaining -= entry_size;
-        }
-out:
-        return total_written;
-}
-static struct ecryptfs_attribute sysfs_attr_version_str = __ATTR_RO(version_str);
 static int do_sysfs_registration(void)
 {
        int rc;
-        rc = subsystem_register(&ecryptfs_subsys);
+        ecryptfs_kobj = kobject_create_and_add("ecryptfs", fs_kobj);
-        if (rc) {
+        if (!ecryptfs_kobj) {
-                printk(KERN_ERR
+                printk(KERN_ERR "Unable to create ecryptfs kset\n");
-                       "Unable to register ecryptfs sysfs subsystem\n");
+                rc = -ENOMEM;
-                goto out;
-        }
-        rc = sysfs_create_file(&ecryptfs_subsys.kobj,
-                               &sysfs_attr_version.attr);
-        if (rc) {
-                printk(KERN_ERR
-                       "Unable to create ecryptfs version attribute\n");
-                subsystem_unregister(&ecryptfs_subsys);
                goto out;
        }
-        rc = sysfs_create_file(&ecryptfs_subsys.kobj,
+        rc = sysfs_create_group(ecryptfs_kobj, &attr_group);
-                               &sysfs_attr_version_str.attr);
        if (rc) {
                printk(KERN_ERR
-                       "Unable to create ecryptfs version_str attribute\n");
+                       "Unable to create ecryptfs version attributes\n");
-                sysfs_remove_file(&ecryptfs_subsys.kobj,
+                kobject_put(ecryptfs_kobj);
-                                  &sysfs_attr_version.attr);
-                subsystem_unregister(&ecryptfs_subsys);
-                goto out;
        }
 out:
        return rc;
@@ -862,11 +775,8 @@ out:
 static void do_sysfs_unregistration(void)
 {
-        sysfs_remove_file(&ecryptfs_subsys.kobj,
+        sysfs_remove_group(ecryptfs_kobj, &attr_group);
-                          &sysfs_attr_version.attr);
+        kobject_put(ecryptfs_kobj);
-        sysfs_remove_file(&ecryptfs_subsys.kobj,
-                          &sysfs_attr_version_str.attr);
-        subsystem_unregister(&ecryptfs_subsys);
 }
 static int __init ecryptfs_init(void)
@@ -894,7 +804,6 @@ static int __init ecryptfs_init(void)
                printk(KERN_ERR "Failed to register filesystem\n");
                goto out_free_kmem_caches;
        }
-        kobj_set_kset_s(&ecryptfs_subsys, fs_subsys);
        rc = do_sysfs_registration();
        if (rc) {
                printk(KERN_ERR "sysfs registration failed\n");
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index f8cdab2bee3d..4859c4eecd65 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -86,7 +86,6 @@ static void ecryptfs_destroy_inode(struct inode *inode)
                        fput(inode_info->lower_file);
                        inode_info->lower_file = NULL;
                        d_drop(lower_dentry);
-                        d_delete(lower_dentry);
                }
        }
        mutex_unlock(&inode_info->lower_file_mutex);
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 2c1b73fb82ae..5fb366992b73 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -590,21 +590,49 @@ error:
 EXPORT_SYMBOL_GPL(fat_free_clusters);
+/* 128kb is the whole sectors for FAT12 and FAT16 */
+#define FAT_READA_SIZE          (128 * 1024)
+static void fat_ent_reada(struct super_block *sb, struct fat_entry *fatent,
+                          unsigned long reada_blocks)
+{
+        struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops;
+        sector_t blocknr;
+        int i, offset;
+        ops->ent_blocknr(sb, fatent->entry, &offset, &blocknr);
+        for (i = 0; i < reada_blocks; i++)
+                sb_breadahead(sb, blocknr + i);
+}
 int fat_count_free_clusters(struct super_block *sb)
 {
        struct msdos_sb_info *sbi = MSDOS_SB(sb);
        struct fatent_operations *ops = sbi->fatent_ops;
        struct fat_entry fatent;
+        unsigned long reada_blocks, reada_mask, cur_block;
        int err = 0, free;
        lock_fat(sbi);
        if (sbi->free_clusters != -1)
                goto out;
+        reada_blocks = FAT_READA_SIZE >> sb->s_blocksize_bits;
+        reada_mask = reada_blocks - 1;
+        cur_block = 0;
        free = 0;
        fatent_init(&fatent);
        fatent_set_entry(&fatent, FAT_START_ENT);
        while (fatent.entry < sbi->max_cluster) {
+                /* readahead of fat blocks */
+                if ((cur_block & reada_mask) == 0) {
+                        unsigned long rest = sbi->fat_length - cur_block;
+                        fat_ent_reada(sb, &fatent, min(reada_blocks, rest));
+                }
+                cur_block++;
                err = fat_ent_read_block(sb, &fatent);
                if (err)
                        goto out;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 0fca82021d76..300324bd563c 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -482,8 +482,6 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
                if (wbc->nr_to_write <= 0)
                        break;
        }
-        if (!list_empty(&sb->s_more_io))
-                wbc->more_io = 1;
        return;         /* Leave any unwritten inodes on s_io */
 }
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 84f9f7dfdf5b..e5e80d1a4687 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -744,9 +744,6 @@ static inline void unregister_fuseblk(void)
 }
 #endif
-static decl_subsys(fuse, NULL, NULL);
-static decl_subsys(connections, NULL, NULL);
 static void fuse_inode_init_once(struct kmem_cache *cachep, void *foo)
 {
        struct inode * inode = foo;
@@ -791,32 +788,37 @@ static void fuse_fs_cleanup(void)
        kmem_cache_destroy(fuse_inode_cachep);
 }
+static struct kobject *fuse_kobj;
+static struct kobject *connections_kobj;
 static int fuse_sysfs_init(void)
 {
        int err;
-        kobj_set_kset_s(&fuse_subsys, fs_subsys);
+        fuse_kobj = kobject_create_and_add("fuse", fs_kobj);
-        err = subsystem_register(&fuse_subsys);
+        if (!fuse_kobj) {
-        if (err)
+                err = -ENOMEM;
                goto out_err;
+        }
-        kobj_set_kset_s(&connections_subsys, fuse_subsys);
+        connections_kobj = kobject_create_and_add("connections", fuse_kobj);
-        err = subsystem_register(&connections_subsys);
+        if (!connections_kobj) {
-        if (err)
+                err = -ENOMEM;
                goto out_fuse_unregister;
+        }
        return 0;
 out_fuse_unregister:
-        subsystem_unregister(&fuse_subsys);
+        kobject_put(fuse_kobj);
 out_err:
        return err;
 }
 static void fuse_sysfs_cleanup(void)
 {
-        subsystem_unregister(&connections_subsys);
+        kobject_put(connections_kobj);
-        subsystem_unregister(&fuse_subsys);
+        kobject_put(fuse_kobj);
 }
 static int __init fuse_init(void)
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index 04ad0caebedb..8fff11058cee 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -2,7 +2,7 @@ obj-$(CONFIG_GFS2_FS) += gfs2.o
 gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
        glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \
        mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
-        ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \
+        ops_fstype.o ops_inode.o ops_super.o quota.o \
        recovery.o rgrp.o super.o sys.o trans.o util.o
 obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 93fa427bb5f5..e4effc47abfc 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -59,7 +59,6 @@ struct strip_mine {
 static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
                               u64 block, struct page *page)
 {
-        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct inode *inode = &ip->i_inode;
        struct buffer_head *bh;
        int release = 0;
@@ -95,7 +94,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
        set_buffer_uptodate(bh);
        if (!gfs2_is_jdata(ip))
                mark_buffer_dirty(bh);
-        if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
+        if (!gfs2_is_writeback(ip))
                gfs2_trans_add_bh(ip->i_gl, bh, 0);
        if (release) {
@@ -453,8 +452,8 @@ static inline void bmap_unlock(struct inode *inode, int create)
 * Returns: errno
 */
-int gfs2_block_map(struct inode *inode, u64 lblock, int create,
+int gfs2_block_map(struct inode *inode, sector_t lblock,
-                   struct buffer_head *bh_map)
+                   struct buffer_head *bh_map, int create)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -470,6 +469,7 @@ int gfs2_block_map(struct inode *inode, u64 lblock, int create,
        unsigned int maxlen = bh_map->b_size >> inode->i_blkbits;
        struct metapath mp;
        u64 size;
+        struct buffer_head *dibh = NULL;
        BUG_ON(maxlen == 0);
@@ -500,6 +500,8 @@ int gfs2_block_map(struct inode *inode, u64 lblock, int create,
        error = gfs2_meta_inode_buffer(ip, &bh);
        if (error)
                goto out_fail;
+        dibh = bh;
+        get_bh(dibh);
        for (x = 0; x < end_of_metadata; x++) {
                lookup_block(ip, bh, x, &mp, create, &new, &dblock);
@@ -518,13 +520,8 @@ int gfs2_block_map(struct inode *inode, u64 lblock, int create,
                if (boundary)
                        set_buffer_boundary(bh_map);
                if (new) {
-                        struct buffer_head *dibh;
+                        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-                        error = gfs2_meta_inode_buffer(ip, &dibh);
+                        gfs2_dinode_out(ip, dibh->b_data);
-                        if (!error) {
-                                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-                                gfs2_dinode_out(ip, dibh->b_data);
-                                brelse(dibh);
-                        }
                        set_buffer_new(bh_map);
                        goto out_brelse;
                }
@@ -545,6 +542,8 @@ out_brelse:
 out_ok:
        error = 0;
 out_fail:
+        if (dibh)
+                brelse(dibh);
        bmap_unlock(inode, create);
        return error;
 }
@@ -560,7 +559,7 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi
        BUG_ON(!new);
        bh.b_size = 1 << (inode->i_blkbits + 5);
-        ret = gfs2_block_map(inode, lblock, create, &bh);
+        ret = gfs2_block_map(inode, lblock, &bh, create);
        *extlen = bh.b_size >> inode->i_blkbits;
        *dblock = bh.b_blocknr;
        if (buffer_new(&bh))
@@ -684,7 +683,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
        if (metadata)
                revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
-        error = gfs2_rindex_hold(sdp, &ip->i_alloc.al_ri_gh);
+        error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
        if (error)
                return error;
@@ -786,7 +785,7 @@ out_rg_gunlock:
 out_rlist:
        gfs2_rlist_free(&rlist);
 out:
-        gfs2_glock_dq_uninit(&ip->i_alloc.al_ri_gh);
+        gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh);
        return error;
 }
@@ -879,7 +878,6 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
 {
        struct inode *inode = mapping->host;
        struct gfs2_inode *ip = GFS2_I(inode);
-        struct gfs2_sbd *sdp = GFS2_SB(inode);
        loff_t from = inode->i_size;
        unsigned long index = from >> PAGE_CACHE_SHIFT;
        unsigned offset = from & (PAGE_CACHE_SIZE-1);
@@ -911,7 +909,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
        err = 0;
        if (!buffer_mapped(bh)) {
-                gfs2_get_block(inode, iblock, bh, 0);
+                gfs2_block_map(inode, iblock, bh, 0);
                /* unmapped? It's a hole - nothing to do */
                if (!buffer_mapped(bh))
                        goto unlock;
@@ -931,7 +929,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
                err = 0;
        }
-        if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
+        if (!gfs2_is_writeback(ip))
                gfs2_trans_add_bh(ip->i_gl, bh, 0);
        zero_user_page(page, offset, length, KM_USER0);
@@ -1224,8 +1222,13 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
                do_div(lblock_stop, bsize);
        } else {
                unsigned int shift = sdp->sd_sb.sb_bsize_shift;
+                u64 end_of_file = (ip->i_di.di_size + sdp->sd_sb.sb_bsize - 1) >> shift;
                lblock = offset >> shift;
                lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
+                if (lblock_stop > end_of_file) {
+                        *alloc_required = 1;
+                        return 0;
+                }
        }
        for (; lblock < lblock_stop; lblock += extlen) {
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index ac2fd04370dc..4e6cde2943bd 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -15,7 +15,7 @@ struct gfs2_inode;
 struct page;
 int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
-int gfs2_block_map(struct inode *inode, u64 lblock, int create, struct buffer_head *bh);
+int gfs2_block_map(struct inode *inode, sector_t lblock, struct buffer_head *bh, int create);
 int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen);
 int gfs2_truncatei(struct gfs2_inode *ip, u64 size);
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
index 3731ab0771d5..e51991947d2c 100644
--- a/fs/gfs2/daemon.c
+++ b/fs/gfs2/daemon.c
@@ -83,56 +83,6 @@ int gfs2_recoverd(void *data)
 }
 /**
- * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
- * @sdp: Pointer to GFS2 superblock
- *
- * Also, periodically check to make sure that we're using the most recent
- * journal index.
- */
-int gfs2_logd(void *data)
-{
-        struct gfs2_sbd *sdp = data;
-        struct gfs2_holder ji_gh;
-        unsigned long t;
-        int need_flush;
-        while (!kthread_should_stop()) {
-                /* Advance the log tail */
-                t = sdp->sd_log_flush_time +
-                    gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
-                gfs2_ail1_empty(sdp, DIO_ALL);
-                gfs2_log_lock(sdp);
-                need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks);
-                gfs2_log_unlock(sdp);
-                if (need_flush || time_after_eq(jiffies, t)) {
-                        gfs2_log_flush(sdp, NULL);
-                        sdp->sd_log_flush_time = jiffies;
-                }
-                /* Check for latest journal index */
-                t = sdp->sd_jindex_refresh_time +
-                    gfs2_tune_get(sdp, gt_jindex_refresh_secs) * HZ;
-                if (time_after_eq(jiffies, t)) {
-                        if (!gfs2_jindex_hold(sdp, &ji_gh))
-                                gfs2_glock_dq_uninit(&ji_gh);
-                        sdp->sd_jindex_refresh_time = jiffies;
-                }
-                t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
-                if (freezing(current))
-                        refrigerator();
-                schedule_timeout_interruptible(t);
-        }
-        return 0;
-}
-/**
 * gfs2_quotad - Write cached quota changes into the quota file
 * @sdp: Pointer to GFS2 superblock
 *
diff --git a/fs/gfs2/daemon.h b/fs/gfs2/daemon.h
index 0de9b3557955..4be084fb6a62 100644
--- a/fs/gfs2/daemon.h
+++ b/fs/gfs2/daemon.h
@@ -12,7 +12,6 @@
 int gfs2_glockd(void *data);
 int gfs2_recoverd(void *data);
-int gfs2_logd(void *data);
 int gfs2_quotad(void *data);
 #endif /* __DAEMON_DOT_H__ */
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 9949bb746a52..57e2ed932adc 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1876,7 +1876,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
        if (error)
                goto out;
-        error = gfs2_rindex_hold(sdp, &dip->i_alloc.al_ri_gh);
+        error = gfs2_rindex_hold(sdp, &dip->i_alloc->al_ri_gh);
        if (error)
                goto out_qs;
@@ -1949,7 +1949,7 @@ out_rg_gunlock:
        gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
 out_rlist:
        gfs2_rlist_free(&rlist);
-        gfs2_glock_dq_uninit(&dip->i_alloc.al_ri_gh);
+        gfs2_glock_dq_uninit(&dip->i_alloc->al_ri_gh);
 out_qs:
        gfs2_quota_unhold(dip);
 out:
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
index aa8dbf303f6d..f114ba2b3557 100644
--- a/fs/gfs2/eaops.c
+++ b/fs/gfs2/eaops.c
@@ -56,46 +56,6 @@ unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name)
        return type;
 }
-static int user_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
-{
-        struct inode *inode = &ip->i_inode;
-        int error = permission(inode, MAY_READ, NULL);
-        if (error)
-                return error;
-        return gfs2_ea_get_i(ip, er);
-}
-static int user_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
-{
-        struct inode *inode = &ip->i_inode;
-        if (S_ISREG(inode->i_mode) ||
-            (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
-                int error = permission(inode, MAY_WRITE, NULL);
-                if (error)
-                        return error;
-        } else
-                return -EPERM;
-        return gfs2_ea_set_i(ip, er);
-}
-static int user_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
-{
-        struct inode *inode = &ip->i_inode;
-        if (S_ISREG(inode->i_mode) ||
-            (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
-                int error = permission(inode, MAY_WRITE, NULL);
-                if (error)
-                        return error;
-        } else
-                return -EPERM;
-        return gfs2_ea_remove_i(ip, er);
-}
 static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
 {
        if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) &&
@@ -108,8 +68,6 @@ static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
             GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)))
                return -EOPNOTSUPP;
        return gfs2_ea_get_i(ip, er);
 }
@@ -170,40 +128,10 @@ static int system_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
        return gfs2_ea_remove_i(ip, er);
 }
-static int security_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
-{
-        struct inode *inode = &ip->i_inode;
-        int error = permission(inode, MAY_READ, NULL);
-        if (error)
-                return error;
-        return gfs2_ea_get_i(ip, er);
-}
-static int security_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
-{
-        struct inode *inode = &ip->i_inode;
-        int error = permission(inode, MAY_WRITE, NULL);
-        if (error)
-                return error;
-        return gfs2_ea_set_i(ip, er);
-}
-static int security_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
-{
-        struct inode *inode = &ip->i_inode;
-        int error = permission(inode, MAY_WRITE, NULL);
-        if (error)
-                return error;
-        return gfs2_ea_remove_i(ip, er);
-}
 static const struct gfs2_eattr_operations gfs2_user_eaops = {
-        .eo_get = user_eo_get,
+        .eo_get = gfs2_ea_get_i,
-        .eo_set = user_eo_set,
+        .eo_set = gfs2_ea_set_i,
-        .eo_remove = user_eo_remove,
+        .eo_remove = gfs2_ea_remove_i,
        .eo_name = "user",
 };
@@ -215,9 +143,9 @@ const struct gfs2_eattr_operations gfs2_system_eaops = {
 };
 static const struct gfs2_eattr_operations gfs2_security_eaops = {
-        .eo_get = security_eo_get,
+        .eo_get = gfs2_ea_get_i,
-        .eo_set = security_eo_set,
+        .eo_set = gfs2_ea_set_i,
-        .eo_remove = security_eo_remove,
+        .eo_remove = gfs2_ea_remove_i,
        .eo_name = "security",
 };
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 2a7435b5c4dc..bee99704ea10 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -1418,7 +1418,7 @@ out:
 static int ea_dealloc_block(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_alloc *al = &ip->i_alloc;
+        struct gfs2_alloc *al = ip->i_alloc;
        struct gfs2_rgrpd *rgd;
        struct buffer_head *dibh;
        int error;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index a37efe4aae6f..80e09c50590a 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1,6 +1,6 @@
 /*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
@@ -217,7 +217,6 @@ int gfs2_glock_put(struct gfs2_glock *gl)
        if (atomic_dec_and_test(&gl->gl_ref)) {
                hlist_del(&gl->gl_list);
                write_unlock(gl_lock_addr(gl->gl_hash));
-                BUG_ON(spin_is_locked(&gl->gl_spin));
                gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED);
                gfs2_assert(sdp, list_empty(&gl->gl_reclaim));
                gfs2_assert(sdp, list_empty(&gl->gl_holders));
@@ -346,7 +345,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
        gl->gl_object = NULL;
        gl->gl_sbd = sdp;
        gl->gl_aspace = NULL;
-        lops_init_le(&gl->gl_le, &gfs2_glock_lops);
        INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
        /* If this glock protects actual on-disk data or metadata blocks,
@@ -461,7 +459,6 @@ static void wait_on_holder(struct gfs2_holder *gh)
 static void gfs2_demote_wake(struct gfs2_glock *gl)
 {
-        BUG_ON(!spin_is_locked(&gl->gl_spin));
        gl->gl_demote_state = LM_ST_EXCLUSIVE;
        clear_bit(GLF_DEMOTE, &gl->gl_flags);
        smp_mb__after_clear_bit();
@@ -507,21 +504,12 @@ static int rq_mutex(struct gfs2_holder *gh)
 static int rq_promote(struct gfs2_holder *gh)
 {
        struct gfs2_glock *gl = gh->gh_gl;
-        struct gfs2_sbd *sdp = gl->gl_sbd;
        if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
                if (list_empty(&gl->gl_holders)) {
                        gl->gl_req_gh = gh;
                        set_bit(GLF_LOCK, &gl->gl_flags);
                        spin_unlock(&gl->gl_spin);
-                        if (atomic_read(&sdp->sd_reclaim_count) >
-                            gfs2_tune_get(sdp, gt_reclaim_limit) &&
-                            !(gh->gh_flags & LM_FLAG_PRIORITY)) {
-                                gfs2_reclaim_glock(sdp);
-                                gfs2_reclaim_glock(sdp);
-                        }
                        gfs2_glock_xmote_th(gh->gh_gl, gh);
                        spin_lock(&gl->gl_spin);
                }
@@ -567,7 +555,10 @@ static int rq_demote(struct gfs2_glock *gl)
                gfs2_demote_wake(gl);
                return 0;
        }
        set_bit(GLF_LOCK, &gl->gl_flags);
+        set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
        if (gl->gl_demote_state == LM_ST_UNLOCKED ||
            gl->gl_state != LM_ST_EXCLUSIVE) {
                spin_unlock(&gl->gl_spin);
@@ -576,7 +567,9 @@ static int rq_demote(struct gfs2_glock *gl)
                spin_unlock(&gl->gl_spin);
                gfs2_glock_xmote_th(gl, NULL);
        }
        spin_lock(&gl->gl_spin);
+        clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
        return 0;
 }
@@ -598,23 +591,18 @@ static void run_queue(struct gfs2_glock *gl)
                if (!list_empty(&gl->gl_waiters1)) {
                        gh = list_entry(gl->gl_waiters1.next,
                                        struct gfs2_holder, gh_list);
+                        blocked = rq_mutex(gh);
-                        if (test_bit(HIF_MUTEX, &gh->gh_iflags))
-                                blocked = rq_mutex(gh);
-                        else
-                                gfs2_assert_warn(gl->gl_sbd, 0);
                } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
                        blocked = rq_demote(gl);
+                        if (gl->gl_waiters2 && !blocked) {
+                                set_bit(GLF_DEMOTE, &gl->gl_flags);
+                                gl->gl_demote_state = LM_ST_UNLOCKED;
+                        }
+                        gl->gl_waiters2 = 0;
                } else if (!list_empty(&gl->gl_waiters3)) {
                        gh = list_entry(gl->gl_waiters3.next,
                                        struct gfs2_holder, gh_list);
+                        blocked = rq_promote(gh);
-                        if (test_bit(HIF_PROMOTE, &gh->gh_iflags))
-                                blocked = rq_promote(gh);
-                        else
-                                gfs2_assert_warn(gl->gl_sbd, 0);
                } else
                        break;
@@ -632,27 +620,21 @@ static void run_queue(struct gfs2_glock *gl)
 static void gfs2_glmutex_lock(struct gfs2_glock *gl)
 {
-        struct gfs2_holder gh;
-        gfs2_holder_init(gl, 0, 0, &gh);
-        set_bit(HIF_MUTEX, &gh.gh_iflags);
-        if (test_and_set_bit(HIF_WAIT, &gh.gh_iflags))
-                BUG();
        spin_lock(&gl->gl_spin);
        if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
+                struct gfs2_holder gh;
+                gfs2_holder_init(gl, 0, 0, &gh);
+                set_bit(HIF_WAIT, &gh.gh_iflags);
                list_add_tail(&gh.gh_list, &gl->gl_waiters1);
+                spin_unlock(&gl->gl_spin);
+                wait_on_holder(&gh);
+                gfs2_holder_uninit(&gh);
        } else {
                gl->gl_owner_pid = current->pid;
                gl->gl_ip = (unsigned long)__builtin_return_address(0);
-                clear_bit(HIF_WAIT, &gh.gh_iflags);
+                spin_unlock(&gl->gl_spin);
-                smp_mb();
-                wake_up_bit(&gh.gh_iflags, HIF_WAIT);
        }
-        spin_unlock(&gl->gl_spin);
-        wait_on_holder(&gh);
-        gfs2_holder_uninit(&gh);
 }
 /**
@@ -691,7 +673,6 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
        gl->gl_owner_pid = 0;
        gl->gl_ip = 0;
        run_queue(gl);
-        BUG_ON(!spin_is_locked(&gl->gl_spin));
        spin_unlock(&gl->gl_spin);
 }
@@ -722,7 +703,10 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
                }
        } else if (gl->gl_demote_state != LM_ST_UNLOCKED &&
                        gl->gl_demote_state != state) {
-                gl->gl_demote_state = LM_ST_UNLOCKED;
+                if (test_bit(GLF_DEMOTE_IN_PROGRESS,  &gl->gl_flags)) 
+                        gl->gl_waiters2 = 1;
+                else 
+                        gl->gl_demote_state = LM_ST_UNLOCKED;
        }
        spin_unlock(&gl->gl_spin);
 }
@@ -943,8 +927,8 @@ static void gfs2_glock_drop_th(struct gfs2_glock *gl)
        const struct gfs2_glock_operations *glops = gl->gl_ops;
        unsigned int ret;
-        if (glops->go_drop_th)
+        if (glops->go_xmote_th)
-                glops->go_drop_th(gl);
+                glops->go_xmote_th(gl);
        gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
        gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
@@ -1156,8 +1140,6 @@ restart:
                return -EIO;
        }
-        set_bit(HIF_PROMOTE, &gh->gh_iflags);
        spin_lock(&gl->gl_spin);
        add_to_queue(gh);
        run_queue(gl);
@@ -1248,12 +1230,11 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
        list_del_init(&gh->gh_list);
        if (list_empty(&gl->gl_holders)) {
-                spin_unlock(&gl->gl_spin);
+                if (glops->go_unlock) {
+                        spin_unlock(&gl->gl_spin);
-                if (glops->go_unlock)
                        glops->go_unlock(gh);
+                        spin_lock(&gl->gl_spin);
-                spin_lock(&gl->gl_spin);
+                }
                gl->gl_stamp = jiffies;
        }
@@ -1910,8 +1891,6 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
        print_dbg(gi, "  req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no");
        print_dbg(gi, "  lvb_count = %d\n", atomic_read(&gl->gl_lvb_count));
        print_dbg(gi, "  object = %s\n", (gl->gl_object) ? "yes" : "no");
-        print_dbg(gi, "  le = %s\n",
-                   (list_empty(&gl->gl_le.le_list)) ? "no" : "yes");
        print_dbg(gi, "  reclaim = %s\n",
                   (list_empty(&gl->gl_reclaim)) ? "no" : "yes");
        if (gl->gl_aspace)
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 4670dcb2a877..c663b7a0f410 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -56,7 +56,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
                bd = list_entry(head->next, struct gfs2_bufdata,
                                bd_ail_gl_list);
                bh = bd->bd_bh;
-                gfs2_remove_from_ail(NULL, bd);
+                gfs2_remove_from_ail(bd);
                bd->bd_bh = NULL;
                bh->b_private = NULL;
                bd->bd_blkno = bh->b_blocknr;
@@ -86,15 +86,10 @@ static void gfs2_pte_inval(struct gfs2_glock *gl)
        if (!ip || !S_ISREG(inode->i_mode))
                return;
-        if (!test_bit(GIF_PAGED, &ip->i_flags))
-                return;
        unmap_shared_mapping_range(inode->i_mapping, 0, 0);
        if (test_bit(GIF_SW_PAGED, &ip->i_flags))
                set_bit(GLF_DIRTY, &gl->gl_flags);
-        clear_bit(GIF_SW_PAGED, &ip->i_flags);
 }
 /**
@@ -143,44 +138,34 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags)
 static void inode_go_sync(struct gfs2_glock *gl)
 {
        struct gfs2_inode *ip = gl->gl_object;
+        struct address_space *metamapping = gl->gl_aspace->i_mapping;
+        int error;
+        if (gl->gl_state != LM_ST_UNLOCKED)
+                gfs2_pte_inval(gl);
+        if (gl->gl_state != LM_ST_EXCLUSIVE)
+                return;
        if (ip && !S_ISREG(ip->i_inode.i_mode))
                ip = NULL;
        if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
-                if (ip && !gfs2_is_jdata(ip))
-                        filemap_fdatawrite(ip->i_inode.i_mapping);
                gfs2_log_flush(gl->gl_sbd, gl);
-                if (ip && gfs2_is_jdata(ip))
+                filemap_fdatawrite(metamapping);
-                        filemap_fdatawrite(ip->i_inode.i_mapping);
-                gfs2_meta_sync(gl);
                if (ip) {
                        struct address_space *mapping = ip->i_inode.i_mapping;
-                        int error = filemap_fdatawait(mapping);
+                        filemap_fdatawrite(mapping);
+                        error = filemap_fdatawait(mapping);
                        mapping_set_error(mapping, error);
                }
+                error = filemap_fdatawait(metamapping);
+                mapping_set_error(metamapping, error);
                clear_bit(GLF_DIRTY, &gl->gl_flags);
                gfs2_ail_empty_gl(gl);
        }
 }
 /**
- * inode_go_xmote_th - promote/demote a glock
- * @gl: the glock
- * @state: the requested state
- * @flags:
- *
- */
-static void inode_go_xmote_th(struct gfs2_glock *gl)
-{
-        if (gl->gl_state != LM_ST_UNLOCKED)
-                gfs2_pte_inval(gl);
-        if (gl->gl_state == LM_ST_EXCLUSIVE)
-                inode_go_sync(gl);
-}
-/**
 * inode_go_xmote_bh - After promoting/demoting a glock
 * @gl: the glock
 *
@@ -201,22 +186,6 @@ static void inode_go_xmote_bh(struct gfs2_glock *gl)
 }
 /**
- * inode_go_drop_th - unlock a glock
- * @gl: the glock
- *
- * Invoked from rq_demote().
- * Another node needs the lock in EXCLUSIVE mode, or lock (unused for too long)
- * is being purged from our node's glock cache; we're dropping lock.
- */
-static void inode_go_drop_th(struct gfs2_glock *gl)
-{
-        gfs2_pte_inval(gl);
-        if (gl->gl_state == LM_ST_EXCLUSIVE)
-                inode_go_sync(gl);
-}
-/**
 * inode_go_inval - prepare a inode glock to be released
 * @gl: the glock
 * @flags:
@@ -234,10 +203,8 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
                        set_bit(GIF_INVALID, &ip->i_flags);
        }
-        if (ip && S_ISREG(ip->i_inode.i_mode)) {
+        if (ip && S_ISREG(ip->i_inode.i_mode))
                truncate_inode_pages(ip->i_inode.i_mapping, 0);
-                clear_bit(GIF_PAGED, &ip->i_flags);
-        }
 }
 /**
@@ -294,23 +261,6 @@ static int inode_go_lock(struct gfs2_holder *gh)
 }
 /**
- * inode_go_unlock - operation done before an inode lock is unlocked by a
- *                   process
- * @gl: the glock
- * @flags:
- *
- */
-static void inode_go_unlock(struct gfs2_holder *gh)
-{
-        struct gfs2_glock *gl = gh->gh_gl;
-        struct gfs2_inode *ip = gl->gl_object;
-        if (ip)
-                gfs2_meta_cache_flush(ip);
-}
-/**
 * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
 * @gl: the glock
 *
@@ -350,14 +300,14 @@ static void rgrp_go_unlock(struct gfs2_holder *gh)
 }
 /**
- * trans_go_xmote_th - promote/demote the transaction glock
+ * trans_go_sync - promote/demote the transaction glock
 * @gl: the glock
 * @state: the requested state
 * @flags:
 *
 */
-static void trans_go_xmote_th(struct gfs2_glock *gl)
+static void trans_go_sync(struct gfs2_glock *gl)
 {
        struct gfs2_sbd *sdp = gl->gl_sbd;
@@ -384,7 +334,6 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl)
        if (gl->gl_state != LM_ST_UNLOCKED &&
            test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
-                gfs2_meta_cache_flush(GFS2_I(sdp->sd_jdesc->jd_inode));
                j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
                error = gfs2_find_jhead(sdp->sd_jdesc, &head);
@@ -402,24 +351,6 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl)
 }
 /**
- * trans_go_drop_th - unlock the transaction glock
- * @gl: the glock
- *
- * We want to sync the device even with localcaching.  Remember
- * that localcaching journal replay only marks buffers dirty.
- */
-static void trans_go_drop_th(struct gfs2_glock *gl)
-{
-        struct gfs2_sbd *sdp = gl->gl_sbd;
-        if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
-                gfs2_meta_syncfs(sdp);
-                gfs2_log_shutdown(sdp);
-        }
-}
-/**
 * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock
 * @gl: the glock
 *
@@ -433,25 +364,21 @@ static int quota_go_demote_ok(struct gfs2_glock *gl)
 const struct gfs2_glock_operations gfs2_meta_glops = {
        .go_xmote_th = meta_go_sync,
-        .go_drop_th = meta_go_sync,
        .go_type = LM_TYPE_META,
 };
 const struct gfs2_glock_operations gfs2_inode_glops = {
-        .go_xmote_th = inode_go_xmote_th,
+        .go_xmote_th = inode_go_sync,
        .go_xmote_bh = inode_go_xmote_bh,
-        .go_drop_th = inode_go_drop_th,
        .go_inval = inode_go_inval,
        .go_demote_ok = inode_go_demote_ok,
        .go_lock = inode_go_lock,
-        .go_unlock = inode_go_unlock,
        .go_type = LM_TYPE_INODE,
        .go_min_hold_time = HZ / 10,
 };
 const struct gfs2_glock_operations gfs2_rgrp_glops = {
        .go_xmote_th = meta_go_sync,
-        .go_drop_th = meta_go_sync,
        .go_inval = meta_go_inval,
        .go_demote_ok = rgrp_go_demote_ok,
        .go_lock = rgrp_go_lock,
@@ -461,9 +388,8 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
 };
 const struct gfs2_glock_operations gfs2_trans_glops = {
-        .go_xmote_th = trans_go_xmote_th,
+        .go_xmote_th = trans_go_sync,
        .go_xmote_bh = trans_go_xmote_bh,
-        .go_drop_th = trans_go_drop_th,
        .go_type = LM_TYPE_NONDISK,
 };
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index eaddfb5a8e6f..513aaf0dc0ab 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -1,6 +1,6 @@
 /*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
@@ -131,7 +131,6 @@ struct gfs2_bufdata {
 struct gfs2_glock_operations {
        void (*go_xmote_th) (struct gfs2_glock *gl);
        void (*go_xmote_bh) (struct gfs2_glock *gl);
-        void (*go_drop_th) (struct gfs2_glock *gl);
        void (*go_inval) (struct gfs2_glock *gl, int flags);
        int (*go_demote_ok) (struct gfs2_glock *gl);
        int (*go_lock) (struct gfs2_holder *gh);
@@ -141,10 +140,6 @@ struct gfs2_glock_operations {
 };
 enum {
-        /* Actions */
-        HIF_MUTEX               = 0,
-        HIF_PROMOTE             = 1,
        /* States */
        HIF_HOLDER              = 6,
        HIF_FIRST               = 7,
@@ -171,6 +166,8 @@ enum {
        GLF_DEMOTE              = 3,
        GLF_PENDING_DEMOTE      = 4,
        GLF_DIRTY               = 5,
+        GLF_DEMOTE_IN_PROGRESS  = 6,
+        GLF_LFLUSH              = 7,
 };
 struct gfs2_glock {
@@ -190,6 +187,7 @@ struct gfs2_glock {
        struct list_head gl_holders;
        struct list_head gl_waiters1;   /* HIF_MUTEX */
        struct list_head gl_waiters3;   /* HIF_PROMOTE */
+        int gl_waiters2;                /* GIF_DEMOTE */
        const struct gfs2_glock_operations *gl_ops;
@@ -210,7 +208,6 @@ struct gfs2_glock {
        struct gfs2_sbd *gl_sbd;
        struct inode *gl_aspace;
-        struct gfs2_log_element gl_le;
        struct list_head gl_ail_list;
        atomic_t gl_ail_count;
        struct delayed_work gl_work;
@@ -239,7 +236,6 @@ struct gfs2_alloc {
 enum {
        GIF_INVALID             = 0,
        GIF_QD_LOCKED           = 1,
-        GIF_PAGED               = 2,
        GIF_SW_PAGED            = 3,
 };
@@ -268,14 +264,10 @@ struct gfs2_inode {
        struct gfs2_glock *i_gl; /* Move into i_gh? */
        struct gfs2_holder i_iopen_gh;
        struct gfs2_holder i_gh; /* for prepare/commit_write only */
-        struct gfs2_alloc i_alloc;
+        struct gfs2_alloc *i_alloc;
        u64 i_last_rg_alloc;
-        spinlock_t i_spin;
        struct rw_semaphore i_rw_mutex;
-        unsigned long i_last_pfault;
-        struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT];
 };
 /*
@@ -287,19 +279,12 @@ static inline struct gfs2_inode *GFS2_I(struct inode *inode)
        return container_of(inode, struct gfs2_inode, i_inode);
 }
-/* To be removed? */
+static inline struct gfs2_sbd *GFS2_SB(const struct inode *inode)
-static inline struct gfs2_sbd *GFS2_SB(struct inode *inode)
 {
        return inode->i_sb->s_fs_info;
 }
-enum {
-        GFF_DID_DIRECT_ALLOC    = 0,
-        GFF_EXLOCK = 1,
-};
 struct gfs2_file {
-        unsigned long f_flags;          /* GFF_... */
        struct mutex f_fl_mutex;
        struct gfs2_holder f_fl_gh;
 };
@@ -373,8 +358,17 @@ struct gfs2_ail {
        u64 ai_sync_gen;
 };
+struct gfs2_journal_extent {
+        struct list_head extent_list;
+        unsigned int lblock; /* First logical block */
+        u64 dblock; /* First disk block */
+        u64 blocks;
+};
 struct gfs2_jdesc {
        struct list_head jd_list;
+        struct list_head extent_list;
        struct inode *jd_inode;
        unsigned int jd_jid;
@@ -421,13 +415,9 @@ struct gfs2_args {
 struct gfs2_tune {
        spinlock_t gt_spin;
-        unsigned int gt_ilimit;
-        unsigned int gt_ilimit_tries;
-        unsigned int gt_ilimit_min;
        unsigned int gt_demote_secs; /* Cache retention for unheld glock */
        unsigned int gt_incore_log_blocks;
        unsigned int gt_log_flush_secs;
-        unsigned int gt_jindex_refresh_secs; /* Check for new journal index */
        unsigned int gt_recoverd_secs;
        unsigned int gt_logd_secs;
@@ -443,10 +433,8 @@ struct gfs2_tune {
        unsigned int gt_new_files_jdata;
        unsigned int gt_new_files_directio;
        unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
-        unsigned int gt_lockdump_size;
        unsigned int gt_stall_secs; /* Detects trouble! */
        unsigned int gt_complain_secs;
-        unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */
        unsigned int gt_statfs_quantum;
        unsigned int gt_statfs_slow;
 };
@@ -539,7 +527,6 @@ struct gfs2_sbd {
        /* StatFS stuff */
        spinlock_t sd_statfs_spin;
-        struct mutex sd_statfs_mutex;
        struct gfs2_statfs_change_host sd_statfs_master;
        struct gfs2_statfs_change_host sd_statfs_local;
        unsigned long sd_statfs_sync_time;
@@ -602,20 +589,18 @@ struct gfs2_sbd {
        unsigned int sd_log_commited_databuf;
        unsigned int sd_log_commited_revoke;
-        unsigned int sd_log_num_gl;
        unsigned int sd_log_num_buf;
        unsigned int sd_log_num_revoke;
        unsigned int sd_log_num_rg;
        unsigned int sd_log_num_databuf;
-        struct list_head sd_log_le_gl;
        struct list_head sd_log_le_buf;
        struct list_head sd_log_le_revoke;
        struct list_head sd_log_le_rg;
        struct list_head sd_log_le_databuf;
        struct list_head sd_log_le_ordered;
-        unsigned int sd_log_blks_free;
+        atomic_t sd_log_blks_free;
        struct mutex sd_log_reserve_mutex;
        u64 sd_log_sequence;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 5f6dc32946cd..728d3169e7bd 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -31,7 +31,6 @@
 #include "log.h"
 #include "meta_io.h"
 #include "ops_address.h"
-#include "ops_file.h"
 #include "ops_inode.h"
 #include "quota.h"
 #include "rgrp.h"
@@ -132,15 +131,21 @@ static struct inode *gfs2_iget_skip(struct super_block *sb,
 void gfs2_set_iop(struct inode *inode)
 {
+        struct gfs2_sbd *sdp = GFS2_SB(inode);
        umode_t mode = inode->i_mode;
        if (S_ISREG(mode)) {
                inode->i_op = &gfs2_file_iops;
-                inode->i_fop = &gfs2_file_fops;
+                if (sdp->sd_args.ar_localflocks)
-                inode->i_mapping->a_ops = &gfs2_file_aops;
+                        inode->i_fop = &gfs2_file_fops_nolock;
+                else
+                        inode->i_fop = &gfs2_file_fops;
        } else if (S_ISDIR(mode)) {
                inode->i_op = &gfs2_dir_iops;
-                inode->i_fop = &gfs2_dir_fops;
+                if (sdp->sd_args.ar_localflocks)
+                        inode->i_fop = &gfs2_dir_fops_nolock;
+                else
+                        inode->i_fop = &gfs2_dir_fops;
        } else if (S_ISLNK(mode)) {
                inode->i_op = &gfs2_symlink_iops;
        } else {
@@ -291,12 +296,10 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
        di->di_entries = be32_to_cpu(str->di_entries);
        di->di_eattr = be64_to_cpu(str->di_eattr);
-        return 0;
+        if (S_ISREG(ip->i_inode.i_mode))
-}
+                gfs2_set_aops(&ip->i_inode);
-static void gfs2_inode_bh(struct gfs2_inode *ip, struct buffer_head *bh)
+        return 0;
-{
-        ip->i_cache[0] = bh;
 }
 /**
@@ -366,7 +369,8 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip)
        if (error)
                goto out_rg_gunlock;
-        gfs2_trans_add_gl(ip->i_gl);
+        set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
+        set_bit(GLF_LFLUSH, &ip->i_gl->gl_flags);
        gfs2_free_di(rgd, ip);
@@ -707,9 +711,10 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
        int error;
-        gfs2_alloc_get(dip);
+        if (gfs2_alloc_get(dip) == NULL)
+                return -ENOMEM;
-        dip->i_alloc.al_requested = RES_DINODE;
+        dip->i_alloc->al_requested = RES_DINODE;
        error = gfs2_inplace_reserve(dip);
        if (error)
                goto out;
@@ -855,7 +860,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
        error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name);
        if (alloc_required < 0)
-                goto fail;
+                goto fail_quota_locks;
        if (alloc_required) {
                error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid);
                if (error)
@@ -896,7 +901,7 @@ fail_end_trans:
        gfs2_trans_end(sdp);
 fail_ipreserv:
-        if (dip->i_alloc.al_rgd)
+        if (dip->i_alloc->al_rgd)
                gfs2_inplace_release(dip);
 fail_quota_locks:
@@ -966,7 +971,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
        struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
        int error;
        u64 generation;
-        struct buffer_head *bh=NULL;
+        struct buffer_head *bh = NULL;
        if (!name->len || name->len > GFS2_FNAMESIZE)
                return ERR_PTR(-ENAMETOOLONG);
@@ -1003,8 +1008,6 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
        if (IS_ERR(inode))
                goto fail_gunlock2;
-        gfs2_inode_bh(GFS2_I(inode), bh);
        error = gfs2_inode_refresh(GFS2_I(inode));
        if (error)
                goto fail_gunlock2;
@@ -1021,6 +1024,8 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
        if (error)
                goto fail_gunlock2;
+        if (bh)
+                brelse(bh);
        if (!inode)
                return ERR_PTR(-ENOMEM);
        return inode;
@@ -1032,6 +1037,8 @@ fail_gunlock2:
 fail_gunlock:
        gfs2_glock_dq(ghs);
 fail:
+        if (bh)
+                brelse(bh);
        return ERR_PTR(error);
 }
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 351ac87ab384..d44650662615 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -20,6 +20,18 @@ static inline int gfs2_is_jdata(const struct gfs2_inode *ip)
        return ip->i_di.di_flags & GFS2_DIF_JDATA;
 }
+static inline int gfs2_is_writeback(const struct gfs2_inode *ip)
+{
+        const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+        return (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK) && !gfs2_is_jdata(ip);
+}
+static inline int gfs2_is_ordered(const struct gfs2_inode *ip)
+{
+        const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+        return (sdp->sd_args.ar_data == GFS2_DATA_ORDERED) && !gfs2_is_jdata(ip);
+}
 static inline int gfs2_is_dir(const struct gfs2_inode *ip)
 {
        return S_ISDIR(ip->i_inode.i_mode);
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
index 41c5b04caaba..f2efff424224 100644
--- a/fs/gfs2/locking/dlm/mount.c
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -67,6 +67,11 @@ static int make_args(struct gdlm_ls *ls, char *data_arg, int *nodir)
        memset(data, 0, 256);
        strncpy(data, data_arg, 255);
+        if (!strlen(data)) {
+                log_error("no mount options, (u)mount helpers not installed");
+                return -EINVAL;
+        }
        for (options = data; (x = strsep(&options, ":")); ) {
                if (!*x)
                        continue;
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
index 1f7b038530b4..2ebd374b3143 100644
--- a/fs/gfs2/locking/dlm/plock.c
+++ b/fs/gfs2/locking/dlm/plock.c
@@ -89,15 +89,19 @@ int gdlm_plock(void *lockspace, struct lm_lockname *name,
        op->info.number         = name->ln_number;
        op->info.start          = fl->fl_start;
        op->info.end            = fl->fl_end;
-        op->info.owner          = (__u64)(long) fl->fl_owner;
        if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
+                /* fl_owner is lockd which doesn't distinguish
+                   processes on the nfs client */
+                op->info.owner  = (__u64) fl->fl_pid;
                xop->callback   = fl->fl_lmops->fl_grant;
                locks_init_lock(&xop->flc);
                locks_copy_lock(&xop->flc, fl);
                xop->fl         = fl;
                xop->file       = file;
-        } else
+        } else {
+                op->info.owner  = (__u64)(long) fl->fl_owner;
                xop->callback   = NULL;
+        }
        send_op(op);
@@ -203,7 +207,10 @@ int gdlm_punlock(void *lockspace, struct lm_lockname *name,
        op->info.number         = name->ln_number;
        op->info.start          = fl->fl_start;
        op->info.end            = fl->fl_end;
-        op->info.owner          = (__u64)(long) fl->fl_owner;
+        if (fl->fl_lmops && fl->fl_lmops->fl_grant)
+                op->info.owner  = (__u64) fl->fl_pid;
+        else
+                op->info.owner  = (__u64)(long) fl->fl_owner;
        send_op(op);
        wait_event(recv_wq, (op->done != 0));
@@ -242,7 +249,10 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
        op->info.number         = name->ln_number;
        op->info.start          = fl->fl_start;
        op->info.end            = fl->fl_end;
-        op->info.owner          = (__u64)(long) fl->fl_owner;
+        if (fl->fl_lmops && fl->fl_lmops->fl_grant)
+                op->info.owner  = (__u64) fl->fl_pid;
+        else
+                op->info.owner  = (__u64)(long) fl->fl_owner;
        send_op(op);
        wait_event(recv_wq, (op->done != 0));
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
index ae9e6a25fe2b..a87b09839761 100644
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -189,51 +189,39 @@ static struct kobj_type gdlm_ktype = {
        .sysfs_ops     = &gdlm_attr_ops,
 };
-static struct kset gdlm_kset = {
+static struct kset *gdlm_kset;
-        .ktype  = &gdlm_ktype,
-};
 int gdlm_kobject_setup(struct gdlm_ls *ls, struct kobject *fskobj)
 {
        int error;
-        error = kobject_set_name(&ls->kobj, "%s", "lock_module");
+        ls->kobj.kset = gdlm_kset;
-        if (error) {
+        error = kobject_init_and_add(&ls->kobj, &gdlm_ktype, fskobj,
-                log_error("can't set kobj name %d", error);
+                                     "lock_module");
-                return error;
-        }
-        ls->kobj.kset = &gdlm_kset;
-        ls->kobj.ktype = &gdlm_ktype;
-        ls->kobj.parent = fskobj;
-        error = kobject_register(&ls->kobj);
        if (error)
                log_error("can't register kobj %d", error);
+        kobject_uevent(&ls->kobj, KOBJ_ADD);
        return error;
 }
 void gdlm_kobject_release(struct gdlm_ls *ls)
 {
-        kobject_unregister(&ls->kobj);
+        kobject_put(&ls->kobj);
 }
 int gdlm_sysfs_init(void)
 {
-        int error;
+        gdlm_kset = kset_create_and_add("lock_dlm", NULL, kernel_kobj);
+        if (!gdlm_kset) {
-        kobject_set_name(&gdlm_kset.kobj, "lock_dlm");
+                printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__);
-        kobj_set_kset_s(&gdlm_kset, kernel_subsys);
+                return -ENOMEM;
-        error = kset_register(&gdlm_kset);
+        }
-        if (error)
+        return 0;
-                printk("lock_dlm: cannot register kset %d\n", error);
-        return error;
 }
 void gdlm_sysfs_exit(void)
 {
-        kset_unregister(&gdlm_kset);
+        kset_unregister(gdlm_kset);
 }
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
index bd938f06481d..521694fc19d6 100644
--- a/fs/gfs2/locking/dlm/thread.c
+++ b/fs/gfs2/locking/dlm/thread.c
@@ -273,18 +273,13 @@ static int gdlm_thread(void *data, int blist)
        struct gdlm_ls *ls = (struct gdlm_ls *) data;
        struct gdlm_lock *lp = NULL;
        uint8_t complete, blocking, submit, drop;
-        DECLARE_WAITQUEUE(wait, current);
        /* Only thread1 is allowed to do blocking callbacks since gfs
           may wait for a completion callback within a blocking cb. */
        while (!kthread_should_stop()) {
-                set_current_state(TASK_INTERRUPTIBLE);
+                wait_event_interruptible(ls->thread_wait,
-                add_wait_queue(&ls->thread_wait, &wait);
+                                !no_work(ls, blist) || kthread_should_stop());
-                if (no_work(ls, blist))
-                        schedule();
-                remove_wait_queue(&ls->thread_wait, &wait);
-                set_current_state(TASK_RUNNING);
                complete = blocking = submit = drop = 0;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 7df702473252..161ab6f2058e 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -1,6 +1,6 @@
 /*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
@@ -16,6 +16,8 @@
 #include <linux/crc32.h>
 #include <linux/lm_interface.h>
 #include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
 #include "gfs2.h"
 #include "incore.h"
@@ -68,14 +70,12 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
 *
 */
-void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd)
+void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
 {
        bd->bd_ail = NULL;
        list_del_init(&bd->bd_ail_st_list);
        list_del_init(&bd->bd_ail_gl_list);
        atomic_dec(&bd->bd_gl->gl_ail_count);
-        if (mapping)
-                gfs2_meta_cache_flush(GFS2_I(mapping->host));
        brelse(bd->bd_bh);
 }
@@ -92,8 +92,6 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
        struct buffer_head *bh;
        int retry;
-        BUG_ON(!spin_is_locked(&sdp->sd_log_lock));
        do {
                retry = 0;
@@ -210,7 +208,7 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
        gfs2_log_unlock(sdp);
 }
-int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
+static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
 {
        struct gfs2_ail *ai, *s;
        int ret;
@@ -248,7 +246,7 @@ static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
                bd = list_entry(head->prev, struct gfs2_bufdata,
                                bd_ail_st_list);
                gfs2_assert(sdp, bd->bd_ail == ai);
-                gfs2_remove_from_ail(bd->bd_bh->b_page->mapping, bd);
+                gfs2_remove_from_ail(bd);
        }
 }
@@ -303,7 +301,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
        mutex_lock(&sdp->sd_log_reserve_mutex);
        gfs2_log_lock(sdp);
-        while(sdp->sd_log_blks_free <= (blks + reserved_blks)) {
+        while(atomic_read(&sdp->sd_log_blks_free) <= (blks + reserved_blks)) {
                gfs2_log_unlock(sdp);
                gfs2_ail1_empty(sdp, 0);
                gfs2_log_flush(sdp, NULL);
@@ -312,7 +310,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
                        gfs2_ail1_start(sdp, 0);
                gfs2_log_lock(sdp);
        }
-        sdp->sd_log_blks_free -= blks;
+        atomic_sub(blks, &sdp->sd_log_blks_free);
        gfs2_log_unlock(sdp);
        mutex_unlock(&sdp->sd_log_reserve_mutex);
@@ -332,27 +330,23 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
 {
        gfs2_log_lock(sdp);
-        sdp->sd_log_blks_free += blks;
+        atomic_add(blks, &sdp->sd_log_blks_free);
        gfs2_assert_withdraw(sdp,
-                             sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
+                             atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks);
        gfs2_log_unlock(sdp);
        up_read(&sdp->sd_log_flush_lock);
 }
 static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
 {
-        struct inode *inode = sdp->sd_jdesc->jd_inode;
+        struct gfs2_journal_extent *je;
-        int error;
-        struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
+        list_for_each_entry(je, &sdp->sd_jdesc->extent_list, extent_list) {
+                if (lbn >= je->lblock && lbn < je->lblock + je->blocks)
-        bh_map.b_size = 1 << inode->i_blkbits;
+                        return je->dblock + lbn - je->lblock;
-        error = gfs2_block_map(inode, lbn, 0, &bh_map);
+        }
-        if (error || !bh_map.b_blocknr)
-                printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error,
+        return -1;
-                       (unsigned long long)bh_map.b_blocknr, lbn);
-        gfs2_assert_withdraw(sdp, !error && bh_map.b_blocknr);
-        return bh_map.b_blocknr;
 }
 /**
@@ -561,8 +555,8 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
        ail2_empty(sdp, new_tail);
        gfs2_log_lock(sdp);
-        sdp->sd_log_blks_free += dist;
+        atomic_add(dist, &sdp->sd_log_blks_free);
-        gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
+        gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks);
        gfs2_log_unlock(sdp);
        sdp->sd_log_tail = new_tail;
@@ -652,7 +646,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
                get_bh(bh);
                gfs2_log_unlock(sdp);
                lock_buffer(bh);
-                if (test_clear_buffer_dirty(bh)) {
+                if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
                        bh->b_end_io = end_buffer_write_sync;
                        submit_bh(WRITE, bh);
                } else {
@@ -694,20 +688,16 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
 *
 */
-void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
+void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 {
        struct gfs2_ail *ai;
        down_write(&sdp->sd_log_flush_lock);
-        if (gl) {
+        /* Log might have been flushed while we waited for the flush lock */
-                gfs2_log_lock(sdp);
+        if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) {
-                if (list_empty(&gl->gl_le.le_list)) {
+                up_write(&sdp->sd_log_flush_lock);
-                        gfs2_log_unlock(sdp);
+                return;
-                        up_write(&sdp->sd_log_flush_lock);
-                        return;
-                }
-                gfs2_log_unlock(sdp);
        }
        ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL);
@@ -739,7 +729,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
                log_flush_commit(sdp);
        else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
                gfs2_log_lock(sdp);
-                sdp->sd_log_blks_free--; /* Adjust for unreserved buffer */
+                atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
                gfs2_log_unlock(sdp);
                log_write_header(sdp, 0, PULL);
        }
@@ -767,7 +757,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 {
        unsigned int reserved;
-        unsigned int old;
+        unsigned int unused;
        gfs2_log_lock(sdp);
@@ -779,14 +769,11 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
        sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
        gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
        reserved = calc_reserved(sdp);
-        old = sdp->sd_log_blks_free;
+        unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved;
-        sdp->sd_log_blks_free += tr->tr_reserved -
+        gfs2_assert_withdraw(sdp, unused >= 0);
-                                 (reserved - sdp->sd_log_blks_reserved);
+        atomic_add(unused, &sdp->sd_log_blks_free);
+        gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
-        gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old);
-        gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <=
                             sdp->sd_jdesc->jd_blocks);
        sdp->sd_log_blks_reserved = reserved;
        gfs2_log_unlock(sdp);
@@ -825,7 +812,6 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
        down_write(&sdp->sd_log_flush_lock);
        gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
-        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl);
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
@@ -838,7 +824,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
        log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT,
                         (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL);
-        gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks);
+        gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks);
        gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
        gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
@@ -866,3 +852,42 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
        }
 }
+/**
+ * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
+ * @sdp: Pointer to GFS2 superblock
+ *
+ * Also, periodically check to make sure that we're using the most recent
+ * journal index.
+ */
+int gfs2_logd(void *data)
+{
+        struct gfs2_sbd *sdp = data;
+        unsigned long t;
+        int need_flush;
+        while (!kthread_should_stop()) {
+                /* Advance the log tail */
+                t = sdp->sd_log_flush_time +
+                    gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
+                gfs2_ail1_empty(sdp, DIO_ALL);
+                gfs2_log_lock(sdp);
+                need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks);
+                gfs2_log_unlock(sdp);
+                if (need_flush || time_after_eq(jiffies, t)) {
+                        gfs2_log_flush(sdp, NULL);
+                        sdp->sd_log_flush_time = jiffies;
+                }
+                t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
+                if (freezing(current))
+                        refrigerator();
+                schedule_timeout_interruptible(t);
+        }
+        return 0;
+}
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index dae282400627..771152816508 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -48,8 +48,6 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
 unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
                            unsigned int ssize);
-int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags);
 int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
 void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
 void gfs2_log_incr_head(struct gfs2_sbd *sdp);
@@ -57,11 +55,19 @@ void gfs2_log_incr_head(struct gfs2_sbd *sdp);
 struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
 struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
                                      struct buffer_head *real);
-void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
+void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
+static inline void gfs2_log_flush(struct gfs2_sbd *sbd, struct gfs2_glock *gl)
+{
+        if (!gl || test_bit(GLF_LFLUSH, &gl->gl_flags))
+                __gfs2_log_flush(sbd, gl);
+}
 void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
-void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd);
+void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
 void gfs2_log_shutdown(struct gfs2_sbd *sdp);
 void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
+int gfs2_logd(void *data);
 #endif /* __LOG_DOT_H__ */
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 6c27cea761c6..fae59d69d01a 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -87,6 +87,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
        }
        bd->bd_ail = ai;
        list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
+        clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
        gfs2_log_unlock(sdp);
        unlock_buffer(bh);
 }
@@ -124,49 +125,6 @@ static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type)
        return bh;
 }
-static void __glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
-{
-        struct gfs2_glock *gl;
-        struct gfs2_trans *tr = current->journal_info;
-        tr->tr_touched = 1;
-        gl = container_of(le, struct gfs2_glock, gl_le);
-        if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)))
-                return;
-        if (!list_empty(&le->le_list))
-                return;
-        gfs2_glock_hold(gl);
-        set_bit(GLF_DIRTY, &gl->gl_flags);
-        sdp->sd_log_num_gl++;
-        list_add(&le->le_list, &sdp->sd_log_le_gl);
-}
-static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
-{
-        gfs2_log_lock(sdp);
-        __glock_lo_add(sdp, le);
-        gfs2_log_unlock(sdp);
-}
-static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
-{
-        struct list_head *head = &sdp->sd_log_le_gl;
-        struct gfs2_glock *gl;
-        while (!list_empty(head)) {
-                gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list);
-                list_del_init(&gl->gl_le.le_list);
-                sdp->sd_log_num_gl--;
-                gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl));
-                gfs2_glock_put(gl);
-        }
-        gfs2_assert_warn(sdp, !sdp->sd_log_num_gl);
-}
 static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
 {
        struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
@@ -182,7 +140,8 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
        list_add(&bd->bd_list_tr, &tr->tr_list_buf);
        if (!list_empty(&le->le_list))
                goto out;
-        __glock_lo_add(sdp, &bd->bd_gl->gl_le);
+        set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
+        set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
        gfs2_meta_check(sdp, bd->bd_bh);
        gfs2_pin(sdp, bd->bd_bh);
        sdp->sd_log_num_buf++;
@@ -556,17 +515,20 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
        lock_buffer(bd->bd_bh);
        gfs2_log_lock(sdp);
-        if (!list_empty(&bd->bd_list_tr))
+        if (tr) {
-                goto out;
+                if (!list_empty(&bd->bd_list_tr))
-        tr->tr_touched = 1;
+                        goto out;
-        if (gfs2_is_jdata(ip)) {
+                tr->tr_touched = 1;
-                tr->tr_num_buf++;
+                if (gfs2_is_jdata(ip)) {
-                list_add(&bd->bd_list_tr, &tr->tr_list_buf);
+                        tr->tr_num_buf++;
+                        list_add(&bd->bd_list_tr, &tr->tr_list_buf);
+                }
        }
        if (!list_empty(&le->le_list))
                goto out;
-        __glock_lo_add(sdp, &bd->bd_gl->gl_le);
+        set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
+        set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
        if (gfs2_is_jdata(ip)) {
                gfs2_pin(sdp, bd->bd_bh);
                tr->tr_num_databuf_new++;
@@ -773,12 +735,6 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
 }
-const struct gfs2_log_operations gfs2_glock_lops = {
-        .lo_add = glock_lo_add,
-        .lo_after_commit = glock_lo_after_commit,
-        .lo_name = "glock",
-};
 const struct gfs2_log_operations gfs2_buf_lops = {
        .lo_add = buf_lo_add,
        .lo_incore_commit = buf_lo_incore_commit,
@@ -816,7 +772,6 @@ const struct gfs2_log_operations gfs2_databuf_lops = {
 };
 const struct gfs2_log_operations *gfs2_log_ops[] = {
-        &gfs2_glock_lops,
        &gfs2_databuf_lops,
        &gfs2_buf_lops,
        &gfs2_rg_lops,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 7ecfe0d3a491..9c7765c12d62 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -29,9 +29,8 @@ static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo)
        struct gfs2_inode *ip = foo;
        inode_init_once(&ip->i_inode);
-        spin_lock_init(&ip->i_spin);
        init_rwsem(&ip->i_rw_mutex);
-        memset(ip->i_cache, 0, sizeof(ip->i_cache));
+        ip->i_alloc = NULL;
 }
 static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo)
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 4da423985e4f..85aea27b4a86 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -50,6 +50,7 @@ static int gfs2_aspace_writepage(struct page *page,
 static const struct address_space_operations aspace_aops = {
        .writepage = gfs2_aspace_writepage,
        .releasepage = gfs2_releasepage,
+        .sync_page = block_sync_page,
 };
 /**
@@ -221,13 +222,14 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
                   struct buffer_head **bhp)
 {
        *bhp = getbuf(gl, blkno, CREATE);
-        if (!buffer_uptodate(*bhp))
+        if (!buffer_uptodate(*bhp)) {
                ll_rw_block(READ_META, 1, bhp);
-        if (flags & DIO_WAIT) {
+                if (flags & DIO_WAIT) {
-                int error = gfs2_meta_wait(gl->gl_sbd, *bhp);
+                        int error = gfs2_meta_wait(gl->gl_sbd, *bhp);
-                if (error) {
+                        if (error) {
-                        brelse(*bhp);
+                                brelse(*bhp);
-                        return error;
+                                return error;
+                        }
                }
        }
@@ -282,7 +284,7 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
                return;
        }
-        bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL),
+        bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
        bd->bd_bh = bh;
        bd->bd_gl = gl;
@@ -317,7 +319,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
        }
        if (bd) {
                if (bd->bd_ail) {
-                        gfs2_remove_from_ail(NULL, bd);
+                        gfs2_remove_from_ail(bd);
                        bh->b_private = NULL;
                        bd->bd_bh = NULL;
                        bd->bd_blkno = bh->b_blocknr;
@@ -358,32 +360,6 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
 }
 /**
- * gfs2_meta_cache_flush - get rid of any references on buffers for this inode
- * @ip: The GFS2 inode
- *
- * This releases buffers that are in the most-recently-used array of
- * blocks used for indirect block addressing for this inode.
- */
-void gfs2_meta_cache_flush(struct gfs2_inode *ip)
-{
-        struct buffer_head **bh_slot;
-        unsigned int x;
-        spin_lock(&ip->i_spin);
-        for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) {
-                bh_slot = &ip->i_cache[x];
-                if (*bh_slot) {
-                        brelse(*bh_slot);
-                        *bh_slot = NULL;
-                }
-        }
-        spin_unlock(&ip->i_spin);
-}
-/**
 * gfs2_meta_indirect_buffer - Get a metadata buffer
 * @ip: The GFS2 inode
 * @height: The level of this buf in the metadata (indir addr) tree (if any)
@@ -391,8 +367,6 @@ void gfs2_meta_cache_flush(struct gfs2_inode *ip)
 * @new: Non-zero if we may create a new buffer
 * @bhp: the buffer is returned here
 *
- * Try to use the gfs2_inode's MRU metadata tree cache.
- *
 * Returns: errno
 */
@@ -401,58 +375,25 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_glock *gl = ip->i_gl;
-        struct buffer_head *bh = NULL, **bh_slot = ip->i_cache + height;
+        struct buffer_head *bh;
-        int in_cache = 0;
+        int ret = 0;
-        BUG_ON(!gl);
-        BUG_ON(!sdp);
-        spin_lock(&ip->i_spin);
-        if (*bh_slot && (*bh_slot)->b_blocknr == num) {
-                bh = *bh_slot;
-                get_bh(bh);
-                in_cache = 1;
-        }
-        spin_unlock(&ip->i_spin);
-        if (!bh)
-                bh = getbuf(gl, num, CREATE);
-        if (!bh)
-                return -ENOBUFS;
        if (new) {
-                if (gfs2_assert_warn(sdp, height))
+                BUG_ON(height == 0);
-                        goto err;
+                bh = gfs2_meta_new(gl, num);
-                meta_prep_new(bh);
                gfs2_trans_add_bh(ip->i_gl, bh, 1);
                gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
                gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
        } else {
                u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI;
-                if (!buffer_uptodate(bh)) {
+                ret = gfs2_meta_read(gl, num, DIO_WAIT, &bh);
-                        ll_rw_block(READ_META, 1, &bh);
+                if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) {
-                        if (gfs2_meta_wait(sdp, bh))
+                        brelse(bh);
-                                goto err;
+                        ret = -EIO;
                }
-                if (gfs2_metatype_check(sdp, bh, mtype))
-                        goto err;
-        }
-        if (!in_cache) {
-                spin_lock(&ip->i_spin);
-                if (*bh_slot)
-                        brelse(*bh_slot);
-                *bh_slot = bh;
-                get_bh(bh);
-                spin_unlock(&ip->i_spin);
        }
        *bhp = bh;
-        return 0;
+        return ret;
-err:
-        brelse(bh);
-        return -EIO;
 }
 /**
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index b7048222ebb4..73e3b1c76fe1 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -56,7 +56,6 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr,
 void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
-void gfs2_meta_cache_flush(struct gfs2_inode *ip);
 int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
                              int new, struct buffer_head **bhp);
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 9679f8b9870d..38dbe99a30ed 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -20,6 +20,8 @@
 #include <linux/swap.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/lm_interface.h>
+#include <linux/backing-dev.h>
+#include <linux/pagevec.h>
 #include "gfs2.h"
 #include "incore.h"
@@ -32,7 +34,6 @@
 #include "quota.h"
 #include "trans.h"
 #include "rgrp.h"
-#include "ops_file.h"
 #include "super.h"
 #include "util.h"
 #include "glops.h"
@@ -58,22 +59,6 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
 }
 /**
- * gfs2_get_block - Fills in a buffer head with details about a block
- * @inode: The inode
- * @lblock: The block number to look up
- * @bh_result: The buffer head to return the result in
- * @create: Non-zero if we may add block to the file
- *
- * Returns: errno
- */
-int gfs2_get_block(struct inode *inode, sector_t lblock,
-                   struct buffer_head *bh_result, int create)
-{
-        return gfs2_block_map(inode, lblock, create, bh_result);
-}
-/**
 * gfs2_get_block_noalloc - Fills in a buffer head with details about a block
 * @inode: The inode
 * @lblock: The block number to look up
@@ -88,7 +73,7 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
 {
        int error;
-        error = gfs2_block_map(inode, lblock, 0, bh_result);
+        error = gfs2_block_map(inode, lblock, bh_result, 0);
        if (error)
                return error;
        if (!buffer_mapped(bh_result))
@@ -99,20 +84,19 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
 static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
                                 struct buffer_head *bh_result, int create)
 {
-        return gfs2_block_map(inode, lblock, 0, bh_result);
+        return gfs2_block_map(inode, lblock, bh_result, 0);
 }
 /**
- * gfs2_writepage - Write complete page
+ * gfs2_writepage_common - Common bits of writepage
- * @page: Page to write
+ * @page: The page to be written
+ * @wbc: The writeback control
 *
- * Returns: errno
+ * Returns: 1 if writepage is ok, otherwise an error code or zero if no error.
- *
- * Some of this is copied from block_write_full_page() although we still
- * call it to do most of the work.
 */
-static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
+static int gfs2_writepage_common(struct page *page,
+                                 struct writeback_control *wbc)
 {
        struct inode *inode = page->mapping->host;
        struct gfs2_inode *ip = GFS2_I(inode);
@@ -120,41 +104,133 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
        loff_t i_size = i_size_read(inode);
        pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
        unsigned offset;
-        int error;
+        int ret = -EIO;
-        int done_trans = 0;
-        if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) {
+        if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
-                unlock_page(page);
+                goto out;
-                return -EIO;
+        ret = 0;
-        }
        if (current->journal_info)
-                goto out_ignore;
+                goto redirty;
        /* Is the page fully outside i_size? (truncate in progress) */
-        offset = i_size & (PAGE_CACHE_SIZE-1);
+        offset = i_size & (PAGE_CACHE_SIZE-1);
        if (page->index > end_index || (page->index == end_index && !offset)) {
                page->mapping->a_ops->invalidatepage(page, 0);
-                unlock_page(page);
+                goto out;
-                return 0; /* don't care */
+        }
+        return 1;
+redirty:
+        redirty_page_for_writepage(wbc, page);
+out:
+        unlock_page(page);
+        return 0;
+}
+/**
+ * gfs2_writeback_writepage - Write page for writeback mappings
+ * @page: The page
+ * @wbc: The writeback control
+ *
+ */
+static int gfs2_writeback_writepage(struct page *page,
+                                    struct writeback_control *wbc)
+{
+        int ret;
+        ret = gfs2_writepage_common(page, wbc);
+        if (ret <= 0)
+                return ret;
+        ret = mpage_writepage(page, gfs2_get_block_noalloc, wbc);
+        if (ret == -EAGAIN)
+                ret = block_write_full_page(page, gfs2_get_block_noalloc, wbc);
+        return ret;
+}
+/**
+ * gfs2_ordered_writepage - Write page for ordered data files
+ * @page: The page to write
+ * @wbc: The writeback control
+ *
+ */
+static int gfs2_ordered_writepage(struct page *page,
+                                  struct writeback_control *wbc)
+{
+        struct inode *inode = page->mapping->host;
+        struct gfs2_inode *ip = GFS2_I(inode);
+        int ret;
+        ret = gfs2_writepage_common(page, wbc);
+        if (ret <= 0)
+                return ret;
+        if (!page_has_buffers(page)) {
+                create_empty_buffers(page, inode->i_sb->s_blocksize,
+                                     (1 << BH_Dirty)|(1 << BH_Uptodate));
        }
+        gfs2_page_add_databufs(ip, page, 0, inode->i_sb->s_blocksize-1);
+        return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
+}
-        if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) &&
+/**
-            PageChecked(page)) {
+ * __gfs2_jdata_writepage - The core of jdata writepage
+ * @page: The page to write
+ * @wbc: The writeback control
+ *
+ * This is shared between writepage and writepages and implements the
+ * core of the writepage operation. If a transaction is required then
+ * PageChecked will have been set and the transaction will have
+ * already been started before this is called.
+ */
+static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
+{
+        struct inode *inode = page->mapping->host;
+        struct gfs2_inode *ip = GFS2_I(inode);
+        struct gfs2_sbd *sdp = GFS2_SB(inode);
+        if (PageChecked(page)) {
                ClearPageChecked(page);
-                error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
-                if (error)
-                        goto out_ignore;
                if (!page_has_buffers(page)) {
                        create_empty_buffers(page, inode->i_sb->s_blocksize,
                                             (1 << BH_Dirty)|(1 << BH_Uptodate));
                }
                gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
+        }
+        return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
+}
+/**
+ * gfs2_jdata_writepage - Write complete page
+ * @page: Page to write
+ *
+ * Returns: errno
+ *
+ */
+static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
+{
+        struct inode *inode = page->mapping->host;
+        struct gfs2_sbd *sdp = GFS2_SB(inode);
+        int error;
+        int done_trans = 0;
+        error = gfs2_writepage_common(page, wbc);
+        if (error <= 0)
+                return error;
+        if (PageChecked(page)) {
+                if (wbc->sync_mode != WB_SYNC_ALL)
+                        goto out_ignore;
+                error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
+                if (error)
+                        goto out_ignore;
                done_trans = 1;
        }
-        error = block_write_full_page(page, gfs2_get_block_noalloc, wbc);
+        error = __gfs2_jdata_writepage(page, wbc);
        if (done_trans)
                gfs2_trans_end(sdp);
-        gfs2_meta_cache_flush(ip);
        return error;
 out_ignore:
@@ -164,29 +240,190 @@ out_ignore:
 }
 /**
- * gfs2_writepages - Write a bunch of dirty pages back to disk
+ * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk
 * @mapping: The mapping to write
 * @wbc: Write-back control
 *
- * For journaled files and/or ordered writes this just falls back to the
+ * For the data=writeback case we can already ignore buffer heads
- * kernel's default writepages path for now. We will probably want to change
- * that eventually (i.e. when we look at allocate on flush).
- *
- * For the data=writeback case though we can already ignore buffer heads
 * and write whole extents at once. This is a big reduction in the
 * number of I/O requests we send and the bmap calls we make in this case.
 */
-static int gfs2_writepages(struct address_space *mapping,
+static int gfs2_writeback_writepages(struct address_space *mapping,
-                           struct writeback_control *wbc)
+                                     struct writeback_control *wbc)
+{
+        return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
+}
+/**
+ * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages
+ * @mapping: The mapping
+ * @wbc: The writeback control
+ * @writepage: The writepage function to call for each page
+ * @pvec: The vector of pages
+ * @nr_pages: The number of pages to write
+ *
+ * Returns: non-zero if loop should terminate, zero otherwise
+ */
+static int gfs2_write_jdata_pagevec(struct address_space *mapping,
+                                    struct writeback_control *wbc,
+                                    struct pagevec *pvec,
+                                    int nr_pages, pgoff_t end)
 {
        struct inode *inode = mapping->host;
-        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
+        loff_t i_size = i_size_read(inode);
+        pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+        unsigned offset = i_size & (PAGE_CACHE_SIZE-1);
+        unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
+        struct backing_dev_info *bdi = mapping->backing_dev_info;
+        int i;
+        int ret;
+        ret = gfs2_trans_begin(sdp, nrblocks, 0);
+        if (ret < 0)
+                return ret;
+        for(i = 0; i < nr_pages; i++) {
+                struct page *page = pvec->pages[i];
+                lock_page(page);
+                if (unlikely(page->mapping != mapping)) {
+                        unlock_page(page);
+                        continue;
+                }
+                if (!wbc->range_cyclic && page->index > end) {
+                        ret = 1;
+                        unlock_page(page);
+                        continue;
+                }
+                if (wbc->sync_mode != WB_SYNC_NONE)
+                        wait_on_page_writeback(page);
+                if (PageWriteback(page) ||
+                    !clear_page_dirty_for_io(page)) {
+                        unlock_page(page);
+                        continue;
+                }
+                /* Is the page fully outside i_size? (truncate in progress) */
+                if (page->index > end_index || (page->index == end_index && !offset)) {
+                        page->mapping->a_ops->invalidatepage(page, 0);
+                        unlock_page(page);
+                        continue;
+                }
+                ret = __gfs2_jdata_writepage(page, wbc);
+                if (ret || (--(wbc->nr_to_write) <= 0))
+                        ret = 1;
+                if (wbc->nonblocking && bdi_write_congested(bdi)) {
+                        wbc->encountered_congestion = 1;
+                        ret = 1;
+                }
+        }
+        gfs2_trans_end(sdp);
+        return ret;
+}
+/**
+ * gfs2_write_cache_jdata - Like write_cache_pages but different
+ * @mapping: The mapping to write
+ * @wbc: The writeback control
+ * @writepage: The writepage function to call
+ * @data: The data to pass to writepage
+ *
+ * The reason that we use our own function here is that we need to
+ * start transactions before we grab page locks. This allows us
+ * to get the ordering right.
+ */
+static int gfs2_write_cache_jdata(struct address_space *mapping,
+                                  struct writeback_control *wbc)
+{
+        struct backing_dev_info *bdi = mapping->backing_dev_info;
+        int ret = 0;
+        int done = 0;
+        struct pagevec pvec;
+        int nr_pages;
+        pgoff_t index;
+        pgoff_t end;
+        int scanned = 0;
+        int range_whole = 0;
+        if (wbc->nonblocking && bdi_write_congested(bdi)) {
+                wbc->encountered_congestion = 1;
+                return 0;
+        }
+        pagevec_init(&pvec, 0);
+        if (wbc->range_cyclic) {
+                index = mapping->writeback_index; /* Start from prev offset */
+                end = -1;
+        } else {
+                index = wbc->range_start >> PAGE_CACHE_SHIFT;
+                end = wbc->range_end >> PAGE_CACHE_SHIFT;
+                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+                        range_whole = 1;
+                scanned = 1;
+        }
-        if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip))
+retry:
-                return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
+         while (!done && (index <= end) &&
+                (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+                                               PAGECACHE_TAG_DIRTY,
+                                               min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+                scanned = 1;
+                ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end);
+                if (ret)
+                        done = 1;
+                if (ret > 0)
+                        ret = 0;
+                pagevec_release(&pvec);
+                cond_resched();
+        }
+        if (!scanned && !done) {
+                /*
+                 * We hit the last page and there is more work to be done: wrap
+                 * back to the start of the file
+                 */
+                scanned = 1;
+                index = 0;
+                goto retry;
+        }
+        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+                mapping->writeback_index = index;
+        return ret;
+}
+/**
+ * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk
+ * @mapping: The mapping to write
+ * @wbc: The writeback control
+ * 
+ */
-        return generic_writepages(mapping, wbc);
+static int gfs2_jdata_writepages(struct address_space *mapping,
+                                 struct writeback_control *wbc)
+{
+        struct gfs2_inode *ip = GFS2_I(mapping->host);
+        struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
+        int ret;
+        ret = gfs2_write_cache_jdata(mapping, wbc);
+        if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) {
+                gfs2_log_flush(sdp, ip->i_gl);
+                ret = gfs2_write_cache_jdata(mapping, wbc);
+        }
+        return ret;
 }
 /**
@@ -231,62 +468,107 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
 /**
- * gfs2_readpage - readpage with locking
+ * __gfs2_readpage - readpage
- * @file: The file to read a page for. N.B. This may be NULL if we are
+ * @file: The file to read a page for
- * reading an internal file.
 * @page: The page to read
 *
- * Returns: errno
+ * This is the core of gfs2's readpage. Its used by the internal file
+ * reading code as in that case we already hold the glock. Also its
+ * called by gfs2_readpage() once the required lock has been granted.
+ *
 */
-static int gfs2_readpage(struct file *file, struct page *page)
+static int __gfs2_readpage(void *file, struct page *page)
 {
        struct gfs2_inode *ip = GFS2_I(page->mapping->host);
        struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
-        struct gfs2_file *gf = NULL;
-        struct gfs2_holder gh;
        int error;
-        int do_unlock = 0;
-        if (likely(file != &gfs2_internal_file_sentinel)) {
-                if (file) {
-                        gf = file->private_data;
-                        if (test_bit(GFF_EXLOCK, &gf->f_flags))
-                                /* gfs2_sharewrite_fault has grabbed the ip->i_gl already */
-                                goto skip_lock;
-                }
-                gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
-                do_unlock = 1;
-                error = gfs2_glock_nq_atime(&gh);
-                if (unlikely(error))
-                        goto out_unlock;
-        }
-skip_lock:
        if (gfs2_is_stuffed(ip)) {
                error = stuffed_readpage(ip, page);
                unlock_page(page);
-        } else
+        } else {
-                error = mpage_readpage(page, gfs2_get_block);
+                error = mpage_readpage(page, gfs2_block_map);
+        }
        if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
-                error = -EIO;
+                return -EIO;
+        return error;
+}
+/**
+ * gfs2_readpage - read a page of a file
+ * @file: The file to read
+ * @page: The page of the file
+ *
+ * This deals with the locking required. We use a trylock in order to
+ * avoid the page lock / glock ordering problems returning AOP_TRUNCATED_PAGE
+ * in the event that we are unable to get the lock.
+ */
+static int gfs2_readpage(struct file *file, struct page *page)
+{
+        struct gfs2_inode *ip = GFS2_I(page->mapping->host);
+        struct gfs2_holder gh;
+        int error;
-        if (do_unlock) {
+        gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
-                gfs2_glock_dq_m(1, &gh);
+        error = gfs2_glock_nq_atime(&gh);
-                gfs2_holder_uninit(&gh);
+        if (unlikely(error)) {
+                unlock_page(page);
+                goto out;
        }
+        error = __gfs2_readpage(file, page);
+        gfs2_glock_dq(&gh);
 out:
-        return error;
+        gfs2_holder_uninit(&gh);
-out_unlock:
-        unlock_page(page);
        if (error == GLR_TRYFAILED) {
-                error = AOP_TRUNCATED_PAGE;
                yield();
+                return AOP_TRUNCATED_PAGE;
        }
-        if (do_unlock)
+        return error;
-                gfs2_holder_uninit(&gh);
+}
-        goto out;
+/**
+ * gfs2_internal_read - read an internal file
+ * @ip: The gfs2 inode
+ * @ra_state: The readahead state (or NULL for no readahead)
+ * @buf: The buffer to fill
+ * @pos: The file position
+ * @size: The amount to read
+ *
+ */
+int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
+                       char *buf, loff_t *pos, unsigned size)
+{
+        struct address_space *mapping = ip->i_inode.i_mapping;
+        unsigned long index = *pos / PAGE_CACHE_SIZE;
+        unsigned offset = *pos & (PAGE_CACHE_SIZE - 1);
+        unsigned copied = 0;
+        unsigned amt;
+        struct page *page;
+        void *p;
+        do {
+                amt = size - copied;
+                if (offset + size > PAGE_CACHE_SIZE)
+                        amt = PAGE_CACHE_SIZE - offset;
+                page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
+                if (IS_ERR(page))
+                        return PTR_ERR(page);
+                p = kmap_atomic(page, KM_USER0);
+                memcpy(buf + copied, p + offset, amt);
+                kunmap_atomic(p, KM_USER0);
+                mark_page_accessed(page);
+                page_cache_release(page);
+                copied += amt;
+                index++;
+                offset = 0;
+        } while(copied < size);
+        (*pos) += size;
+        return size;
 }
 /**
@@ -300,10 +582,9 @@ out_unlock:
 *    Any I/O we ignore at this time will be done via readpage later.
 * 2. We don't handle stuffed files here we let readpage do the honours.
 * 3. mpage_readpages() does most of the heavy lifting in the common case.
- * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places.
+ * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places.
- * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as
- *    well as read-ahead.
 */
 static int gfs2_readpages(struct file *file, struct address_space *mapping,
                          struct list_head *pages, unsigned nr_pages)
 {
@@ -311,42 +592,20 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct gfs2_holder gh;
-        int ret = 0;
+        int ret;
-        int do_unlock = 0;
-        if (likely(file != &gfs2_internal_file_sentinel)) {
+        gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
-                if (file) {
+        ret = gfs2_glock_nq_atime(&gh);
-                        struct gfs2_file *gf = file->private_data;
+        if (unlikely(ret))
-                        if (test_bit(GFF_EXLOCK, &gf->f_flags))
+                goto out_uninit;
-                                goto skip_lock;
-                }
-                gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
-                                 LM_FLAG_TRY_1CB|GL_ATIME, &gh);
-                do_unlock = 1;
-                ret = gfs2_glock_nq_atime(&gh);
-                if (ret == GLR_TRYFAILED)
-                        goto out_noerror;
-                if (unlikely(ret))
-                        goto out_unlock;
-        }
-skip_lock:
        if (!gfs2_is_stuffed(ip))
-                ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
+                ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map);
+        gfs2_glock_dq(&gh);
-        if (do_unlock) {
+out_uninit:
-                gfs2_glock_dq_m(1, &gh);
+        gfs2_holder_uninit(&gh);
-                gfs2_holder_uninit(&gh);
-        }
-out:
        if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
                ret = -EIO;
        return ret;
-out_noerror:
-        ret = 0;
-out_unlock:
-        if (do_unlock)
-                gfs2_holder_uninit(&gh);
-        goto out;
 }
 /**
@@ -382,20 +641,11 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
        if (unlikely(error))
                goto out_uninit;
-        error = -ENOMEM;
-        page = __grab_cache_page(mapping, index);
-        *pagep = page;
-        if (!page)
-                goto out_unlock;
        gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
        error = gfs2_write_alloc_required(ip, pos, len, &alloc_required);
        if (error)
-                goto out_putpage;
+                goto out_unlock;
-        ip->i_alloc.al_requested = 0;
        if (alloc_required) {
                al = gfs2_alloc_get(ip);
@@ -424,40 +674,47 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
        if (error)
                goto out_trans_fail;
+        error = -ENOMEM;
+        page = __grab_cache_page(mapping, index);
+        *pagep = page;
+        if (unlikely(!page))
+                goto out_endtrans;
        if (gfs2_is_stuffed(ip)) {
+                error = 0;
                if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
                        error = gfs2_unstuff_dinode(ip, page);
                        if (error == 0)
                                goto prepare_write;
-                } else if (!PageUptodate(page))
+                } else if (!PageUptodate(page)) {
                        error = stuffed_readpage(ip, page);
+                }
                goto out;
        }
 prepare_write:
-        error = block_prepare_write(page, from, to, gfs2_get_block);
+        error = block_prepare_write(page, from, to, gfs2_block_map);
 out:
-        if (error) {
+        if (error == 0)
-                gfs2_trans_end(sdp);
+                return 0;
+        page_cache_release(page);
+        if (pos + len > ip->i_inode.i_size)
+                vmtruncate(&ip->i_inode, ip->i_inode.i_size);
+out_endtrans:
+        gfs2_trans_end(sdp);
 out_trans_fail:
-                if (alloc_required) {
+        if (alloc_required) {
-                        gfs2_inplace_release(ip);
+                gfs2_inplace_release(ip);
 out_qunlock:
-                        gfs2_quota_unlock(ip);
+                gfs2_quota_unlock(ip);
 out_alloc_put:
-                        gfs2_alloc_put(ip);
+                gfs2_alloc_put(ip);
-                }
+        }
-out_putpage:
-                page_cache_release(page);
-                if (pos + len > ip->i_inode.i_size)
-                        vmtruncate(&ip->i_inode, ip->i_inode.i_size);
 out_unlock:
-                gfs2_glock_dq_m(1, &ip->i_gh);
+        gfs2_glock_dq(&ip->i_gh);
 out_uninit:
-                gfs2_holder_uninit(&ip->i_gh);
+        gfs2_holder_uninit(&ip->i_gh);
-        }
        return error;
 }
@@ -565,7 +822,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct buffer_head *dibh;
-        struct gfs2_alloc *al = &ip->i_alloc;
+        struct gfs2_alloc *al = ip->i_alloc;
        struct gfs2_dinode *di;
        unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
        unsigned int to = from + len;
@@ -585,19 +842,16 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
        if (gfs2_is_stuffed(ip))
                return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
-        if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
+        if (!gfs2_is_writeback(ip))
                gfs2_page_add_databufs(ip, page, from, to);
        ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
-        if (likely(ret >= 0)) {
+        if (likely(ret >= 0) && (inode->i_size > ip->i_di.di_size)) {
-                copied = ret;
+                di = (struct gfs2_dinode *)dibh->b_data;
-                if  ((pos + copied) > inode->i_size) {
+                ip->i_di.di_size = inode->i_size;
-                        di = (struct gfs2_dinode *)dibh->b_data;
+                di->di_size = cpu_to_be64(inode->i_size);
-                        ip->i_di.di_size = inode->i_size;
+                mark_inode_dirty(inode);
-                        di->di_size = cpu_to_be64(inode->i_size);
-                        mark_inode_dirty(inode);
-                }
        }
        if (inode == sdp->sd_rindex)
@@ -606,7 +860,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
        brelse(dibh);
        gfs2_trans_end(sdp);
 failed:
-        if (al->al_requested) {
+        if (al) {
                gfs2_inplace_release(ip);
                gfs2_quota_unlock(ip);
                gfs2_alloc_put(ip);
@@ -625,11 +879,7 @@ failed:
 
 static int gfs2_set_page_dirty(struct page *page)
 {
-        struct gfs2_inode *ip = GFS2_I(page->mapping->host);
+        SetPageChecked(page);
-        struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
-        if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
-                SetPageChecked(page);
        return __set_page_dirty_buffers(page);
 }
@@ -653,7 +903,7 @@ static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
                return 0;
        if (!gfs2_is_stuffed(ip))
-                dblock = generic_block_bmap(mapping, lblock, gfs2_get_block);
+                dblock = generic_block_bmap(mapping, lblock, gfs2_block_map);
        gfs2_glock_dq_uninit(&i_gh);
@@ -719,13 +969,9 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
 {
        /*
         * Should we return an error here? I can't see that O_DIRECT for
-         * a journaled file makes any sense. For now we'll silently fall
+         * a stuffed file makes any sense. For now we'll silently fall
-         * back to buffered I/O, likewise we do the same for stuffed
+         * back to buffered I/O
-         * files since they are (a) small and (b) unaligned.
         */
-        if (gfs2_is_jdata(ip))
-                return 0;
        if (gfs2_is_stuffed(ip))
                return 0;
@@ -836,9 +1082,23 @@ cannot_release:
        return 0;
 }
-const struct address_space_operations gfs2_file_aops = {
+static const struct address_space_operations gfs2_writeback_aops = {
-        .writepage = gfs2_writepage,
+        .writepage = gfs2_writeback_writepage,
-        .writepages = gfs2_writepages,
+        .writepages = gfs2_writeback_writepages,
+        .readpage = gfs2_readpage,
+        .readpages = gfs2_readpages,
+        .sync_page = block_sync_page,
+        .write_begin = gfs2_write_begin,
+        .write_end = gfs2_write_end,
+        .bmap = gfs2_bmap,
+        .invalidatepage = gfs2_invalidatepage,
+        .releasepage = gfs2_releasepage,
+        .direct_IO = gfs2_direct_IO,
+        .migratepage = buffer_migrate_page,
+};
+static const struct address_space_operations gfs2_ordered_aops = {
+        .writepage = gfs2_ordered_writepage,
        .readpage = gfs2_readpage,
        .readpages = gfs2_readpages,
        .sync_page = block_sync_page,
@@ -849,5 +1109,34 @@ const struct address_space_operations gfs2_file_aops = {
        .invalidatepage = gfs2_invalidatepage,
        .releasepage = gfs2_releasepage,
        .direct_IO = gfs2_direct_IO,
+        .migratepage = buffer_migrate_page,
 };
+static const struct address_space_operations gfs2_jdata_aops = {
+        .writepage = gfs2_jdata_writepage,
+        .writepages = gfs2_jdata_writepages,
+        .readpage = gfs2_readpage,
+        .readpages = gfs2_readpages,
+        .sync_page = block_sync_page,
+        .write_begin = gfs2_write_begin,
+        .write_end = gfs2_write_end,
+        .set_page_dirty = gfs2_set_page_dirty,
+        .bmap = gfs2_bmap,
+        .invalidatepage = gfs2_invalidatepage,
+        .releasepage = gfs2_releasepage,
+};
+void gfs2_set_aops(struct inode *inode)
+{
+        struct gfs2_inode *ip = GFS2_I(inode);
+        if (gfs2_is_writeback(ip))
+                inode->i_mapping->a_ops = &gfs2_writeback_aops;
+        else if (gfs2_is_ordered(ip))
+                inode->i_mapping->a_ops = &gfs2_ordered_aops;
+        else if (gfs2_is_jdata(ip))
+                inode->i_mapping->a_ops = &gfs2_jdata_aops;
+        else
+                BUG();
+}
diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h
index fa1b5b3d28b9..5da21285bba4 100644
--- a/fs/gfs2/ops_address.h
+++ b/fs/gfs2/ops_address.h
@@ -14,9 +14,10 @@
 #include <linux/buffer_head.h>
 #include <linux/mm.h>
-extern const struct address_space_operations gfs2_file_aops;
-extern int gfs2_get_block(struct inode *inode, sector_t lblock,
-                          struct buffer_head *bh_result, int create);
 extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask);
+extern int gfs2_internal_read(struct gfs2_inode *ip,
+                              struct file_ra_state *ra_state,
+                              char *buf, loff_t *pos, unsigned size);
+extern void gfs2_set_aops(struct inode *inode);
 #endif /* __OPS_ADDRESS_DOT_H__ */
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index bb11fd6752d3..f4842f2548cd 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -33,57 +33,12 @@
 #include "lm.h"
 #include "log.h"
 #include "meta_io.h"
-#include "ops_file.h"
-#include "ops_vm.h"
 #include "quota.h"
 #include "rgrp.h"
 #include "trans.h"
 #include "util.h"
 #include "eaops.h"
+#include "ops_address.h"
-/*
- * Most fields left uninitialised to catch anybody who tries to
- * use them. f_flags set to prevent file_accessed() from touching
- * any other part of this. Its use is purely as a flag so that we
- * know (in readpage()) whether or not do to locking.
- */
-struct file gfs2_internal_file_sentinel = {
-        .f_flags = O_NOATIME|O_RDONLY,
-};
-static int gfs2_read_actor(read_descriptor_t *desc, struct page *page,
-                           unsigned long offset, unsigned long size)
-{
-        char *kaddr;
-        unsigned long count = desc->count;
-        if (size > count)
-                size = count;
-        kaddr = kmap(page);
-        memcpy(desc->arg.data, kaddr + offset, size);
-        kunmap(page);
-        desc->count = count - size;
-        desc->written += size;
-        desc->arg.buf += size;
-        return size;
-}
-int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
-                       char *buf, loff_t *pos, unsigned size)
-{
-        struct inode *inode = &ip->i_inode;
-        read_descriptor_t desc;
-        desc.written = 0;
-        desc.arg.data = buf;
-        desc.count = size;
-        desc.error = 0;
-        do_generic_mapping_read(inode->i_mapping, ra_state,
-                                &gfs2_internal_file_sentinel, pos, &desc,
-                                gfs2_read_actor);
-        return desc.written ? desc.written : desc.error;
-}
 /**
 * gfs2_llseek - seek to a location in a file
@@ -214,7 +169,7 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
        if (put_user(fsflags, ptr))
                error = -EFAULT;
-        gfs2_glock_dq_m(1, &gh);
+        gfs2_glock_dq(&gh);
        gfs2_holder_uninit(&gh);
        return error;
 }
@@ -291,7 +246,16 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
                if (error)
                        goto out;
        }
+        if ((flags ^ new_flags) & GFS2_DIF_JDATA) {
+                if (flags & GFS2_DIF_JDATA)
+                        gfs2_log_flush(sdp, ip->i_gl);
+                error = filemap_fdatawrite(inode->i_mapping);
+                if (error)
+                        goto out;
+                error = filemap_fdatawait(inode->i_mapping);
+                if (error)
+                        goto out;
+        }
        error = gfs2_trans_begin(sdp, RES_DINODE, 0);
        if (error)
                goto out;
@@ -303,6 +267,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
        gfs2_dinode_out(ip, bh->b_data);
        brelse(bh);
        gfs2_set_inode_flags(inode);
+        gfs2_set_aops(inode);
 out_trans_end:
        gfs2_trans_end(sdp);
 out:
@@ -338,6 +303,128 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        return -ENOTTY;
 }
+/**
+ * gfs2_allocate_page_backing - Use bmap to allocate blocks
+ * @page: The (locked) page to allocate backing for
+ *
+ * We try to allocate all the blocks required for the page in
+ * one go. This might fail for various reasons, so we keep
+ * trying until all the blocks to back this page are allocated.
+ * If some of the blocks are already allocated, thats ok too.
+ */
+static int gfs2_allocate_page_backing(struct page *page)
+{
+        struct inode *inode = page->mapping->host;
+        struct buffer_head bh;
+        unsigned long size = PAGE_CACHE_SIZE;
+        u64 lblock = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+        do {
+                bh.b_state = 0;
+                bh.b_size = size;
+                gfs2_block_map(inode, lblock, &bh, 1);
+                if (!buffer_mapped(&bh))
+                        return -EIO;
+                size -= bh.b_size;
+                lblock += (bh.b_size >> inode->i_blkbits);
+        } while(size > 0);
+        return 0;
+}
+/**
+ * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable
+ * @vma: The virtual memory area
+ * @page: The page which is about to become writable
+ *
+ * When the page becomes writable, we need to ensure that we have
+ * blocks allocated on disk to back that page.
+ */
+static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+        struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+        struct gfs2_inode *ip = GFS2_I(inode);
+        struct gfs2_sbd *sdp = GFS2_SB(inode);
+        unsigned long last_index;
+        u64 pos = page->index << (PAGE_CACHE_SIZE - inode->i_blkbits);
+        unsigned int data_blocks, ind_blocks, rblocks;
+        int alloc_required = 0;
+        struct gfs2_holder gh;
+        struct gfs2_alloc *al;
+        int ret;
+        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &gh);
+        ret = gfs2_glock_nq_atime(&gh);
+        if (ret)
+                goto out;
+        set_bit(GIF_SW_PAGED, &ip->i_flags);
+        gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
+        ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required);
+        if (ret || !alloc_required)
+                goto out_unlock;
+        ret = -ENOMEM;
+        al = gfs2_alloc_get(ip);
+        if (al == NULL)
+                goto out_unlock;
+        ret = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+        if (ret)
+                goto out_alloc_put;
+        ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
+        if (ret)
+                goto out_quota_unlock;
+        al->al_requested = data_blocks + ind_blocks;
+        ret = gfs2_inplace_reserve(ip);
+        if (ret)
+                goto out_quota_unlock;
+        rblocks = RES_DINODE + ind_blocks;
+        if (gfs2_is_jdata(ip))
+                rblocks += data_blocks ? data_blocks : 1;
+        if (ind_blocks || data_blocks)
+                rblocks += RES_STATFS + RES_QUOTA;
+        ret = gfs2_trans_begin(sdp, rblocks, 0);
+        if (ret)
+                goto out_trans_fail;
+        lock_page(page);
+        ret = -EINVAL;
+        last_index = ip->i_inode.i_size >> PAGE_CACHE_SHIFT;
+        if (page->index > last_index)
+                goto out_unlock_page;
+        ret = 0;
+        if (!PageUptodate(page) || page->mapping != ip->i_inode.i_mapping)
+                goto out_unlock_page;
+        if (gfs2_is_stuffed(ip)) {
+                ret = gfs2_unstuff_dinode(ip, page);
+                if (ret)
+                        goto out_unlock_page;
+        }
+        ret = gfs2_allocate_page_backing(page);
+out_unlock_page:
+        unlock_page(page);
+        gfs2_trans_end(sdp);
+out_trans_fail:
+        gfs2_inplace_release(ip);
+out_quota_unlock:
+        gfs2_quota_unlock(ip);
+out_alloc_put:
+        gfs2_alloc_put(ip);
+out_unlock:
+        gfs2_glock_dq(&gh);
+out:
+        gfs2_holder_uninit(&gh);
+        return ret;
+}
+static struct vm_operations_struct gfs2_vm_ops = {
+        .fault = filemap_fault,
+        .page_mkwrite = gfs2_page_mkwrite,
+};
 /**
 * gfs2_mmap -
@@ -360,14 +447,7 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
                return error;
        }
-        /* This is VM_MAYWRITE instead of VM_WRITE because a call
+        vma->vm_ops = &gfs2_vm_ops;
-           to mprotect() can turn on VM_WRITE later. */
-        if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
-            (VM_MAYSHARE | VM_MAYWRITE))
-                vma->vm_ops = &gfs2_vm_ops_sharewrite;
-        else
-                vma->vm_ops = &gfs2_vm_ops_private;
        gfs2_glock_dq_uninit(&i_gh);
@@ -538,15 +618,6 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
        if (__mandatory_lock(&ip->i_inode))
                return -ENOLCK;
-        if (sdp->sd_args.ar_localflocks) {
-                if (IS_GETLK(cmd)) {
-                        posix_test_lock(file, fl);
-                        return 0;
-                } else {
-                        return posix_lock_file_wait(file, fl);
-                }
-        }
        if (cmd == F_CANCELLK) {
                /* Hack: */
                cmd = F_SETLK;
@@ -632,16 +703,12 @@ static void do_unflock(struct file *file, struct file_lock *fl)
 static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
 {
        struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
-        struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
        if (!(fl->fl_flags & FL_FLOCK))
                return -ENOLCK;
        if (__mandatory_lock(&ip->i_inode))
                return -ENOLCK;
-        if (sdp->sd_args.ar_localflocks)
-                return flock_lock_file_wait(file, fl);
        if (fl->fl_type == F_UNLCK) {
                do_unflock(file, fl);
                return 0;
@@ -678,3 +745,27 @@ const struct file_operations gfs2_dir_fops = {
        .flock          = gfs2_flock,
 };
+const struct file_operations gfs2_file_fops_nolock = {
+        .llseek         = gfs2_llseek,
+        .read           = do_sync_read,
+        .aio_read       = generic_file_aio_read,
+        .write          = do_sync_write,
+        .aio_write      = generic_file_aio_write,
+        .unlocked_ioctl = gfs2_ioctl,
+        .mmap           = gfs2_mmap,
+        .open           = gfs2_open,
+        .release        = gfs2_close,
+        .fsync          = gfs2_fsync,
+        .splice_read    = generic_file_splice_read,
+        .splice_write   = generic_file_splice_write,
+        .setlease       = gfs2_setlease,
+};
+const struct file_operations gfs2_dir_fops_nolock = {
+        .readdir        = gfs2_readdir,
+        .unlocked_ioctl = gfs2_ioctl,
+        .open           = gfs2_open,
+        .release        = gfs2_close,
+        .fsync          = gfs2_fsync,
+};
diff --git a/fs/gfs2/ops_file.h b/fs/gfs2/ops_file.h
deleted file mode 100644
index 7e5d8ec9c846..000000000000
--- a/fs/gfs2/ops_file.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-#ifndef __OPS_FILE_DOT_H__
-#define __OPS_FILE_DOT_H__
-#include <linux/fs.h>
-struct gfs2_inode;
-extern struct file gfs2_internal_file_sentinel;
-extern int gfs2_internal_read(struct gfs2_inode *ip,
-                              struct file_ra_state *ra_state,
-                              char *buf, loff_t *pos, unsigned size);
-extern void gfs2_set_inode_flags(struct inode *inode);
-extern const struct file_operations gfs2_file_fops;
-extern const struct file_operations gfs2_dir_fops;
-#endif /* __OPS_FILE_DOT_H__ */
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 17de58e83d92..43d511bba52d 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1,6 +1,6 @@
 /*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
@@ -21,6 +21,7 @@
 #include "gfs2.h"
 #include "incore.h"
+#include "bmap.h"
 #include "daemon.h"
 #include "glock.h"
 #include "glops.h"
@@ -59,7 +60,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
        mutex_init(&sdp->sd_inum_mutex);
        spin_lock_init(&sdp->sd_statfs_spin);
-        mutex_init(&sdp->sd_statfs_mutex);
        spin_lock_init(&sdp->sd_rindex_spin);
        mutex_init(&sdp->sd_rindex_mutex);
@@ -77,7 +77,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
        spin_lock_init(&sdp->sd_log_lock);
-        INIT_LIST_HEAD(&sdp->sd_log_le_gl);
        INIT_LIST_HEAD(&sdp->sd_log_le_buf);
        INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
        INIT_LIST_HEAD(&sdp->sd_log_le_rg);
@@ -303,6 +302,67 @@ out:
        return error;
 }
+/**
+ * map_journal_extents - create a reusable "extent" mapping from all logical
+ * blocks to all physical blocks for the given journal.  This will save
+ * us time when writing journal blocks.  Most journals will have only one
+ * extent that maps all their logical blocks.  That's because gfs2.mkfs
+ * arranges the journal blocks sequentially to maximize performance.
+ * So the extent would map the first block for the entire file length.
+ * However, gfs2_jadd can happen while file activity is happening, so
+ * those journals may not be sequential.  Less likely is the case where
+ * the users created their own journals by mounting the metafs and
+ * laying it out.  But it's still possible.  These journals might have
+ * several extents.
+ *
+ * TODO: This should be done in bigger chunks rather than one block at a time,
+ *       but since it's only done at mount time, I'm not worried about the
+ *       time it takes.
+ */
+static int map_journal_extents(struct gfs2_sbd *sdp)
+{
+        struct gfs2_jdesc *jd = sdp->sd_jdesc;
+        unsigned int lb;
+        u64 db, prev_db; /* logical block, disk block, prev disk block */
+        struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
+        struct gfs2_journal_extent *jext = NULL;
+        struct buffer_head bh;
+        int rc = 0;
+        prev_db = 0;
+        for (lb = 0; lb < ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift; lb++) {
+                bh.b_state = 0;
+                bh.b_blocknr = 0;
+                bh.b_size = 1 << ip->i_inode.i_blkbits;
+                rc = gfs2_block_map(jd->jd_inode, lb, &bh, 0);
+                db = bh.b_blocknr;
+                if (rc || !db) {
+                        printk(KERN_INFO "GFS2 journal mapping error %d: lb="
+                               "%u db=%llu\n", rc, lb, (unsigned long long)db);
+                        break;
+                }
+                if (!prev_db || db != prev_db + 1) {
+                        jext = kzalloc(sizeof(struct gfs2_journal_extent),
+                                       GFP_KERNEL);
+                        if (!jext) {
+                                printk(KERN_INFO "GFS2 error: out of memory "
+                                       "mapping journal extents.\n");
+                                rc = -ENOMEM;
+                                break;
+                        }
+                        jext->dblock = db;
+                        jext->lblock = lb;
+                        jext->blocks = 1;
+                        list_add_tail(&jext->extent_list, &jd->extent_list);
+                } else {
+                        jext->blocks++;
+                }
+                prev_db = db;
+        }
+        return rc;
+}
 static int init_journal(struct gfs2_sbd *sdp, int undo)
 {
        struct gfs2_holder ji_gh;
@@ -340,7 +400,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
        if (sdp->sd_args.ar_spectator) {
                sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0);
-                sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
+                atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks);
        } else {
                if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) {
                        fs_err(sdp, "can't mount journal #%u\n",
@@ -377,7 +437,10 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
                               sdp->sd_jdesc->jd_jid, error);
                        goto fail_jinode_gh;
                }
-                sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
+                atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks);
+                /* Map the extents for this journal's blocks */
+                map_journal_extents(sdp);
        }
        if (sdp->sd_lockstruct.ls_first) {
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 291f0c7eaa3b..9f71372c1757 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -61,7 +61,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
                inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0);
                if (!IS_ERR(inode)) {
                        gfs2_trans_end(sdp);
-                        if (dip->i_alloc.al_rgd)
+                        if (dip->i_alloc->al_rgd)
                                gfs2_inplace_release(dip);
                        gfs2_quota_unlock(dip);
                        gfs2_alloc_put(dip);
@@ -113,8 +113,18 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
        if (inode && IS_ERR(inode))
                return ERR_PTR(PTR_ERR(inode));
-        if (inode)
+        if (inode) {
+                struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
+                struct gfs2_holder gh;
+                int error;
+                error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+                if (error) {
+                        iput(inode);
+                        return ERR_PTR(error);
+                }
+                gfs2_glock_dq_uninit(&gh);
                return d_splice_alias(inode, dentry);
+        }
        d_add(dentry, inode);
        return NULL;
@@ -366,7 +376,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
        }
        gfs2_trans_end(sdp);
-        if (dip->i_alloc.al_rgd)
+        if (dip->i_alloc->al_rgd)
                gfs2_inplace_release(dip);
        gfs2_quota_unlock(dip);
        gfs2_alloc_put(dip);
@@ -442,7 +452,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        gfs2_assert_withdraw(sdp, !error); /* dip already pinned */
        gfs2_trans_end(sdp);
-        if (dip->i_alloc.al_rgd)
+        if (dip->i_alloc->al_rgd)
                gfs2_inplace_release(dip);
        gfs2_quota_unlock(dip);
        gfs2_alloc_put(dip);
@@ -548,7 +558,7 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
        }
        gfs2_trans_end(sdp);
-        if (dip->i_alloc.al_rgd)
+        if (dip->i_alloc->al_rgd)
                gfs2_inplace_release(dip);
        gfs2_quota_unlock(dip);
        gfs2_alloc_put(dip);
diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h
index 34f0caac1a03..fd8cee231e1d 100644
--- a/fs/gfs2/ops_inode.h
+++ b/fs/gfs2/ops_inode.h
@@ -16,5 +16,11 @@ extern const struct inode_operations gfs2_file_iops;
 extern const struct inode_operations gfs2_dir_iops;
 extern const struct inode_operations gfs2_symlink_iops;
 extern const struct inode_operations gfs2_dev_iops;
+extern const struct file_operations gfs2_file_fops;
+extern const struct file_operations gfs2_dir_fops;
+extern const struct file_operations gfs2_file_fops_nolock;
+extern const struct file_operations gfs2_dir_fops_nolock;
+extern void gfs2_set_inode_flags(struct inode *inode);
 #endif /* __OPS_INODE_DOT_H__ */
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 950f31460e8b..5e524217944a 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -487,7 +487,6 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
        if (ip) {
                ip->i_flags = 0;
                ip->i_gl = NULL;
-                ip->i_last_pfault = jiffies;
        }
        return &ip->i_inode;
 }
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
deleted file mode 100644
index 927d739d4685..000000000000
--- a/fs/gfs2/ops_vm.c
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
-#include "gfs2.h"
-#include "incore.h"
-#include "bmap.h"
-#include "glock.h"
-#include "inode.h"
-#include "ops_vm.h"
-#include "quota.h"
-#include "rgrp.h"
-#include "trans.h"
-#include "util.h"
-static int gfs2_private_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-        struct gfs2_inode *ip = GFS2_I(vma->vm_file->f_mapping->host);
-        set_bit(GIF_PAGED, &ip->i_flags);
-        return filemap_fault(vma, vmf);
-}
-static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
-{
-        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        unsigned long index = page->index;
-        u64 lblock = index << (PAGE_CACHE_SHIFT -
-                                    sdp->sd_sb.sb_bsize_shift);
-        unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift;
-        struct gfs2_alloc *al;
-        unsigned int data_blocks, ind_blocks;
-        unsigned int x;
-        int error;
-        al = gfs2_alloc_get(ip);
-        error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
-        if (error)
-                goto out;
-        error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
-        if (error)
-                goto out_gunlock_q;
-        gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
-        al->al_requested = data_blocks + ind_blocks;
-        error = gfs2_inplace_reserve(ip);
-        if (error)
-                goto out_gunlock_q;
-        error = gfs2_trans_begin(sdp, al->al_rgd->rd_length +
-                                 ind_blocks + RES_DINODE +
-                                 RES_STATFS + RES_QUOTA, 0);
-        if (error)
-                goto out_ipres;
-        if (gfs2_is_stuffed(ip)) {
-                error = gfs2_unstuff_dinode(ip, NULL);
-                if (error)
-                        goto out_trans;
-        }
-        for (x = 0; x < blocks; ) {
-                u64 dblock;
-                unsigned int extlen;
-                int new = 1;
-                error = gfs2_extent_map(&ip->i_inode, lblock, &new, &dblock, &extlen);
-                if (error)
-                        goto out_trans;
-                lblock += extlen;
-                x += extlen;
-        }
-        gfs2_assert_warn(sdp, al->al_alloced);
-out_trans:
-        gfs2_trans_end(sdp);
-out_ipres:
-        gfs2_inplace_release(ip);
-out_gunlock_q:
-        gfs2_quota_unlock(ip);
-out:
-        gfs2_alloc_put(ip);
-        return error;
-}
-static int gfs2_sharewrite_fault(struct vm_area_struct *vma,
-                                                struct vm_fault *vmf)
-{
-        struct file *file = vma->vm_file;
-        struct gfs2_file *gf = file->private_data;
-        struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
-        struct gfs2_holder i_gh;
-        int alloc_required;
-        int error;
-        int ret = 0;
-        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
-        if (error)
-                goto out;
-        set_bit(GIF_PAGED, &ip->i_flags);
-        set_bit(GIF_SW_PAGED, &ip->i_flags);
-        error = gfs2_write_alloc_required(ip,
-                                        (u64)vmf->pgoff << PAGE_CACHE_SHIFT,
-                                        PAGE_CACHE_SIZE, &alloc_required);
-        if (error) {
-                ret = VM_FAULT_OOM; /* XXX: are these right? */
-                goto out_unlock;
-        }
-        set_bit(GFF_EXLOCK, &gf->f_flags);
-        ret = filemap_fault(vma, vmf);
-        clear_bit(GFF_EXLOCK, &gf->f_flags);
-        if (ret & VM_FAULT_ERROR)
-                goto out_unlock;
-        if (alloc_required) {
-                /* XXX: do we need to drop page lock around alloc_page_backing?*/
-                error = alloc_page_backing(ip, vmf->page);
-                if (error) {
-                        /*
-                         * VM_FAULT_LOCKED should always be the case for
-                         * filemap_fault, but it may not be in a future
-                         * implementation.
-                         */
-                        if (ret & VM_FAULT_LOCKED)
-                                unlock_page(vmf->page);
-                        page_cache_release(vmf->page);
-                        ret = VM_FAULT_OOM;
-                        goto out_unlock;
-                }
-                set_page_dirty(vmf->page);
-        }
-out_unlock:
-        gfs2_glock_dq_uninit(&i_gh);
-out:
-        return ret;
-}
-struct vm_operations_struct gfs2_vm_ops_private = {
-        .fault = gfs2_private_fault,
-};
-struct vm_operations_struct gfs2_vm_ops_sharewrite = {
-        .fault = gfs2_sharewrite_fault,
-};
diff --git a/fs/gfs2/ops_vm.h b/fs/gfs2/ops_vm.h
deleted file mode 100644
index 4ae8f43ed5e3..000000000000
--- a/fs/gfs2/ops_vm.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-#ifndef __OPS_VM_DOT_H__
-#define __OPS_VM_DOT_H__
-#include <linux/mm.h>
-extern struct vm_operations_struct gfs2_vm_ops_private;
-extern struct vm_operations_struct gfs2_vm_ops_sharewrite;
-#endif /* __OPS_VM_DOT_H__ */
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index addb51e0f135..a08dabd6ce90 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1,6 +1,6 @@
 /*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
@@ -59,7 +59,6 @@
 #include "super.h"
 #include "trans.h"
 #include "inode.h"
-#include "ops_file.h"
 #include "ops_address.h"
 #include "util.h"
@@ -274,10 +273,10 @@ static int bh_get(struct gfs2_quota_data *qd)
        }
        block = qd->qd_slot / sdp->sd_qc_per_block;
-        offset = qd->qd_slot % sdp->sd_qc_per_block;;
+        offset = qd->qd_slot % sdp->sd_qc_per_block;
        bh_map.b_size = 1 << ip->i_inode.i_blkbits;
-        error = gfs2_block_map(&ip->i_inode, block, 0, &bh_map);
+        error = gfs2_block_map(&ip->i_inode, block, &bh_map, 0);
        if (error)
                goto fail;
        error = gfs2_meta_read(ip->i_gl, bh_map.b_blocknr, DIO_WAIT, &bh);
@@ -454,7 +453,7 @@ static void qdsb_put(struct gfs2_quota_data *qd)
 int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_alloc *al = &ip->i_alloc;
+        struct gfs2_alloc *al = ip->i_alloc;
        struct gfs2_quota_data **qd = al->al_qd;
        int error;
@@ -502,7 +501,7 @@ out:
 void gfs2_quota_unhold(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_alloc *al = &ip->i_alloc;
+        struct gfs2_alloc *al = ip->i_alloc;
        unsigned int x;
        gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
@@ -646,7 +645,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
        }
        if (!buffer_mapped(bh)) {
-                gfs2_get_block(inode, iblock, bh, 1);
+                gfs2_block_map(inode, iblock, bh, 1);
                if (!buffer_mapped(bh))
                        goto unlock;
        }
@@ -793,11 +792,9 @@ static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
        struct gfs2_holder i_gh;
        struct gfs2_quota_host q;
        char buf[sizeof(struct gfs2_quota)];
-        struct file_ra_state ra_state;
        int error;
        struct gfs2_quota_lvb *qlvb;
-        file_ra_state_init(&ra_state, sdp->sd_quota_inode->i_mapping);
 restart:
        error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
        if (error)
@@ -820,8 +817,8 @@ restart:
                memset(buf, 0, sizeof(struct gfs2_quota));
                pos = qd2offset(qd);
-                error = gfs2_internal_read(ip, &ra_state, buf,
+                error = gfs2_internal_read(ip, NULL, buf, &pos,
-                                           &pos, sizeof(struct gfs2_quota));
+                                           sizeof(struct gfs2_quota));
                if (error < 0)
                        goto fail_gunlock;
@@ -856,7 +853,7 @@ fail:
 int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_alloc *al = &ip->i_alloc;
+        struct gfs2_alloc *al = ip->i_alloc;
        unsigned int x;
        int error = 0;
@@ -924,7 +921,7 @@ static int need_sync(struct gfs2_quota_data *qd)
 void gfs2_quota_unlock(struct gfs2_inode *ip)
 {
-        struct gfs2_alloc *al = &ip->i_alloc;
+        struct gfs2_alloc *al = ip->i_alloc;
        struct gfs2_quota_data *qda[4];
        unsigned int count = 0;
        unsigned int x;
@@ -972,7 +969,7 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
 int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_alloc *al = &ip->i_alloc;
+        struct gfs2_alloc *al = ip->i_alloc;
        struct gfs2_quota_data *qd;
        s64 value;
        unsigned int x;
@@ -1016,10 +1013,9 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
 void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
                       u32 uid, u32 gid)
 {
-        struct gfs2_alloc *al = &ip->i_alloc;
+        struct gfs2_alloc *al = ip->i_alloc;
        struct gfs2_quota_data *qd;
        unsigned int x;
-        unsigned int found = 0;
        if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), change))
                return;
@@ -1032,7 +1028,6 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
                if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
                    (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
                        do_qc(qd, change);
-                        found++;
                }
        }
 }
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index beb6c7ac0086..b249e294a95b 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -391,7 +391,7 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea
        lblock = head->lh_blkno;
        gfs2_replay_incr_blk(sdp, &lblock);
        bh_map.b_size = 1 << ip->i_inode.i_blkbits;
-        error = gfs2_block_map(&ip->i_inode, lblock, 0, &bh_map);
+        error = gfs2_block_map(&ip->i_inode, lblock, &bh_map, 0);
        if (error)
                return error;
        if (!bh_map.b_blocknr) {
@@ -504,13 +504,21 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd)
                        if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
                                ro = 1;
                } else {
-                        if (sdp->sd_vfs->s_flags & MS_RDONLY)
+                        if (sdp->sd_vfs->s_flags & MS_RDONLY) {
-                                ro = 1;
+                                /* check if device itself is read-only */
+                                ro = bdev_read_only(sdp->sd_vfs->s_bdev);
+                                if (!ro) {
+                                        fs_info(sdp, "recovery required on "
+                                                "read-only filesystem.\n");
+                                        fs_info(sdp, "write access will be "
+                                                "enabled during recovery.\n");
+                                }
+                        }
                }
                if (ro) {
-                        fs_warn(sdp, "jid=%u: Can't replay: read-only FS\n",
+                        fs_warn(sdp, "jid=%u: Can't replay: read-only block "
-                                jd->jd_jid);
+                                "device\n", jd->jd_jid);
                        error = -EROFS;
                        goto fail_gunlock_tr;
                }
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 708c287e1d0e..3552110b2e5f 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -25,10 +25,10 @@
 #include "rgrp.h"
 #include "super.h"
 #include "trans.h"
-#include "ops_file.h"
 #include "util.h"
 #include "log.h"
 #include "inode.h"
+#include "ops_address.h"
 #define BFITNOENT ((u32)~0)
 #define NO_BLOCK ((u64)~0)
@@ -126,41 +126,43 @@ static unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
 * Return: the block number (bitmap buffer scope) that was found
 */
-static u32 gfs2_bitfit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
+static u32 gfs2_bitfit(unsigned char *buffer, unsigned int buflen, u32 goal,
-                            unsigned int buflen, u32 goal,
+                       unsigned char old_state)
-                            unsigned char old_state)
 {
-        unsigned char *byte, *end, alloc;
+        unsigned char *byte;
        u32 blk = goal;
-        unsigned int bit;
+        unsigned int bit, bitlong;
+        unsigned long *plong, plong55;
        byte = buffer + (goal / GFS2_NBBY);
+        plong = (unsigned long *)(buffer + (goal / GFS2_NBBY));
        bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
-        end = buffer + buflen;
+        bitlong = bit;
-        alloc = (old_state == GFS2_BLKST_FREE) ? 0x55 : 0;
+#if BITS_PER_LONG == 32
+        plong55 = 0x55555555;
-        while (byte < end) {
+#else
-                /* If we're looking for a free block we can eliminate all
+        plong55 = 0x5555555555555555;
-                   bitmap settings with 0x55, which represents four data
+#endif
-                   blocks in a row.  If we're looking for a data block, we can
+        while (byte < buffer + buflen) {
-                   eliminate 0x00 which corresponds to four free blocks. */
-                if ((*byte & 0x55) == alloc) {
+                if (bitlong == 0 && old_state == 0 && *plong == plong55) {
-                        blk += (8 - bit) >> 1;
+                        plong++;
+                        byte += sizeof(unsigned long);
-                        bit = 0;
+                        blk += sizeof(unsigned long) * GFS2_NBBY;
-                        byte++;
                        continue;
                }
                if (((*byte >> bit) & GFS2_BIT_MASK) == old_state)
                        return blk;
                bit += GFS2_BIT_SIZE;
                if (bit >= 8) {
                        bit = 0;
                        byte++;
                }
+                bitlong += GFS2_BIT_SIZE;
+                if (bitlong >= sizeof(unsigned long) * 8) {
+                        bitlong = 0;
+                        plong++;
+                }
                blk++;
        }
@@ -817,11 +819,9 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
 struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
 {
-        struct gfs2_alloc *al = &ip->i_alloc;
+        BUG_ON(ip->i_alloc != NULL);
+        ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_KERNEL);
-        /* FIXME: Should assert that the correct locks are held here... */
+        return ip->i_alloc;
-        memset(al, 0, sizeof(*al));
-        return al;
 }
 /**
@@ -1059,26 +1059,34 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
        struct inode *inode = NULL;
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_rgrpd *rgd, *begin = NULL;
-        struct gfs2_alloc *al = &ip->i_alloc;
+        struct gfs2_alloc *al = ip->i_alloc;
        int flags = LM_FLAG_TRY;
        int skipped = 0;
        int loops = 0;
-        int error;
+        int error, rg_locked;
        /* Try recently successful rgrps */
        rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc);
        while (rgd) {
-                error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+                rg_locked = 0;
-                                           LM_FLAG_TRY, &al->al_rgd_gh);
+                if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
+                        rg_locked = 1;
+                        error = 0;
+                } else {
+                        error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+                                                   LM_FLAG_TRY, &al->al_rgd_gh);
+                }
                switch (error) {
                case 0:
                        if (try_rgrp_fit(rgd, al))
                                goto out;
                        if (rgd->rd_flags & GFS2_RDF_CHECK)
                                inode = try_rgrp_unlink(rgd, last_unlinked);
-                        gfs2_glock_dq_uninit(&al->al_rgd_gh);
+                        if (!rg_locked)
+                                gfs2_glock_dq_uninit(&al->al_rgd_gh);
                        if (inode)
                                return inode;
                        rgd = recent_rgrp_next(rgd, 1);
@@ -1098,15 +1106,23 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
        begin = rgd = forward_rgrp_get(sdp);
        for (;;) {
-                error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags,
+                rg_locked = 0;
-                                          &al->al_rgd_gh);
+                if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
+                        rg_locked = 1;
+                        error = 0;
+                } else {
+                        error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags,
+                                                   &al->al_rgd_gh);
+                }
                switch (error) {
                case 0:
                        if (try_rgrp_fit(rgd, al))
                                goto out;
                        if (rgd->rd_flags & GFS2_RDF_CHECK)
                                inode = try_rgrp_unlink(rgd, last_unlinked);
-                        gfs2_glock_dq_uninit(&al->al_rgd_gh);
+                        if (!rg_locked)
+                                gfs2_glock_dq_uninit(&al->al_rgd_gh);
                        if (inode)
                                return inode;
                        break;
@@ -1158,7 +1174,7 @@ out:
 int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_alloc *al = &ip->i_alloc;
+        struct gfs2_alloc *al = ip->i_alloc;
        struct inode *inode;
        int error = 0;
        u64 last_unlinked = NO_BLOCK;
@@ -1204,7 +1220,7 @@ try_again:
 void gfs2_inplace_release(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_alloc *al = &ip->i_alloc;
+        struct gfs2_alloc *al = ip->i_alloc;
        if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1)
                fs_warn(sdp, "al_alloced = %u, al_requested = %u "
@@ -1213,7 +1229,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
                             al->al_line);
        al->al_rgd = NULL;
-        gfs2_glock_dq_uninit(&al->al_rgd_gh);
+        if (al->al_rgd_gh.gh_gl)
+                gfs2_glock_dq_uninit(&al->al_rgd_gh);
        if (ip != GFS2_I(sdp->sd_rindex))
                gfs2_glock_dq_uninit(&al->al_ri_gh);
 }
@@ -1301,11 +1318,10 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
                /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
                   bitmaps, so we must search the originals for that. */
                if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone)
-                        blk = gfs2_bitfit(rgd, bi->bi_clone + bi->bi_offset,
+                        blk = gfs2_bitfit(bi->bi_clone + bi->bi_offset,
                                          bi->bi_len, goal, old_state);
                else
-                        blk = gfs2_bitfit(rgd,
+                        blk = gfs2_bitfit(bi->bi_bh->b_data + bi->bi_offset,
-                                          bi->bi_bh->b_data + bi->bi_offset,
                                          bi->bi_len, goal, old_state);
                if (blk != BFITNOENT)
                        break;
@@ -1394,7 +1410,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
 u64 gfs2_alloc_data(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_alloc *al = &ip->i_alloc;
+        struct gfs2_alloc *al = ip->i_alloc;
        struct gfs2_rgrpd *rgd = al->al_rgd;
        u32 goal, blk;
        u64 block;
@@ -1439,7 +1455,7 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip)
 u64 gfs2_alloc_meta(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_alloc *al = &ip->i_alloc;
+        struct gfs2_alloc *al = ip->i_alloc;
        struct gfs2_rgrpd *rgd = al->al_rgd;
        u32 goal, blk;
        u64 block;
@@ -1485,7 +1501,7 @@ u64 gfs2_alloc_meta(struct gfs2_inode *ip)
 u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
-        struct gfs2_alloc *al = &dip->i_alloc;
+        struct gfs2_alloc *al = dip->i_alloc;
        struct gfs2_rgrpd *rgd = al->al_rgd;
        u32 blk;
        u64 block;
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b4c6adfc6f2e..149bb161f4b6 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -32,7 +32,9 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd);
 struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
 static inline void gfs2_alloc_put(struct gfs2_inode *ip)
 {
-        return; /* So we can see where ip->i_alloc is used */
+        BUG_ON(ip->i_alloc == NULL);
+        kfree(ip->i_alloc);
+        ip->i_alloc = NULL;
 }
 int gfs2_inplace_reserve_i(struct gfs2_inode *ip,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index dd3e737f528e..ef0562c3bc71 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1,6 +1,6 @@
 /*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
@@ -51,13 +51,9 @@ void gfs2_tune_init(struct gfs2_tune *gt)
 {
        spin_lock_init(&gt->gt_spin);
-        gt->gt_ilimit = 100;
-        gt->gt_ilimit_tries = 3;
-        gt->gt_ilimit_min = 1;
        gt->gt_demote_secs = 300;
        gt->gt_incore_log_blocks = 1024;
        gt->gt_log_flush_secs = 60;
-        gt->gt_jindex_refresh_secs = 60;
        gt->gt_recoverd_secs = 60;
        gt->gt_logd_secs = 1;
        gt->gt_quotad_secs = 5;
@@ -71,10 +67,8 @@ void gfs2_tune_init(struct gfs2_tune *gt)
        gt->gt_new_files_jdata = 0;
        gt->gt_new_files_directio = 0;
        gt->gt_max_readahead = 1 << 18;
-        gt->gt_lockdump_size = 131072;
        gt->gt_stall_secs = 600;
        gt->gt_complain_secs = 10;
-        gt->gt_reclaim_limit = 5000;
        gt->gt_statfs_quantum = 30;
        gt->gt_statfs_slow = 0;
 }
@@ -393,6 +387,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
                if (!jd)
                        break;
+                INIT_LIST_HEAD(&jd->extent_list);
                jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL);
                if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
                        if (!jd->jd_inode)
@@ -422,8 +417,9 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
 void gfs2_jindex_free(struct gfs2_sbd *sdp)
 {
-        struct list_head list;
+        struct list_head list, *head;
        struct gfs2_jdesc *jd;
+        struct gfs2_journal_extent *jext;
        spin_lock(&sdp->sd_jindex_spin);
        list_add(&list, &sdp->sd_jindex_list);
@@ -433,6 +429,14 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp)
        while (!list_empty(&list)) {
                jd = list_entry(list.next, struct gfs2_jdesc, jd_list);
+                head = &jd->extent_list;
+                while (!list_empty(head)) {
+                        jext = list_entry(head->next,
+                                          struct gfs2_journal_extent,
+                                          extent_list);
+                        list_del(&jext->extent_list);
+                        kfree(jext);
+                }
                list_del(&jd->jd_list);
                iput(jd->jd_inode);
                kfree(jd);
@@ -543,7 +547,6 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
        if (error)
                return error;
-        gfs2_meta_cache_flush(ip);
        j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
        error = gfs2_find_jhead(sdp->sd_jdesc, &head);
@@ -686,9 +689,7 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
        if (error)
                return;
-        mutex_lock(&sdp->sd_statfs_mutex);
        gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
-        mutex_unlock(&sdp->sd_statfs_mutex);
        spin_lock(&sdp->sd_statfs_spin);
        l_sc->sc_total += total;
@@ -736,9 +737,7 @@ int gfs2_statfs_sync(struct gfs2_sbd *sdp)
        if (error)
                goto out_bh2;
-        mutex_lock(&sdp->sd_statfs_mutex);
        gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
-        mutex_unlock(&sdp->sd_statfs_mutex);
        spin_lock(&sdp->sd_statfs_spin);
        m_sc->sc_total += l_sc->sc_total;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 06e0b7768d97..eaa3b7b2f99e 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -32,7 +32,8 @@ spinlock_t gfs2_sys_margs_lock;
 static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
 {
-        return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_vfs->s_id);
+        return snprintf(buf, PAGE_SIZE, "%u:%u\n",
+                        MAJOR(sdp->sd_vfs->s_dev), MINOR(sdp->sd_vfs->s_dev));
 }
 static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
@@ -221,9 +222,7 @@ static struct kobj_type gfs2_ktype = {
        .sysfs_ops     = &gfs2_attr_ops,
 };
-static struct kset gfs2_kset = {
+static struct kset *gfs2_kset;
-        .ktype  = &gfs2_ktype,
-};
 /*
 * display struct lm_lockstruct fields
@@ -427,13 +426,11 @@ TUNE_ATTR_2(name, name##_store)
 TUNE_ATTR(demote_secs, 0);
 TUNE_ATTR(incore_log_blocks, 0);
 TUNE_ATTR(log_flush_secs, 0);
-TUNE_ATTR(jindex_refresh_secs, 0);
 TUNE_ATTR(quota_warn_period, 0);
 TUNE_ATTR(quota_quantum, 0);
 TUNE_ATTR(atime_quantum, 0);
 TUNE_ATTR(max_readahead, 0);
 TUNE_ATTR(complain_secs, 0);
-TUNE_ATTR(reclaim_limit, 0);
 TUNE_ATTR(statfs_slow, 0);
 TUNE_ATTR(new_files_jdata, 0);
 TUNE_ATTR(new_files_directio, 0);
@@ -450,13 +447,11 @@ static struct attribute *tune_attrs[] = {
        &tune_attr_demote_secs.attr,
        &tune_attr_incore_log_blocks.attr,
        &tune_attr_log_flush_secs.attr,
-        &tune_attr_jindex_refresh_secs.attr,
        &tune_attr_quota_warn_period.attr,
        &tune_attr_quota_quantum.attr,
        &tune_attr_atime_quantum.attr,
        &tune_attr_max_readahead.attr,
        &tune_attr_complain_secs.attr,
-        &tune_attr_reclaim_limit.attr,
        &tune_attr_statfs_slow.attr,
        &tune_attr_quota_simul_sync.attr,
        &tune_attr_quota_cache_secs.attr,
@@ -495,14 +490,9 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
 {
        int error;
-        sdp->sd_kobj.kset = &gfs2_kset;
+        sdp->sd_kobj.kset = gfs2_kset;
-        sdp->sd_kobj.ktype = &gfs2_ktype;
+        error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL,
+                                     "%s", sdp->sd_table_name);
-        error = kobject_set_name(&sdp->sd_kobj, "%s", sdp->sd_table_name);
-        if (error)
-                goto fail;
-        error = kobject_register(&sdp->sd_kobj);
        if (error)
                goto fail;
@@ -522,6 +512,7 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
        if (error)
                goto fail_args;
+        kobject_uevent(&sdp->sd_kobj, KOBJ_ADD);
        return 0;
 fail_args:
@@ -531,7 +522,7 @@ fail_counters:
 fail_lockstruct:
        sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
 fail_reg:
-        kobject_unregister(&sdp->sd_kobj);
+        kobject_put(&sdp->sd_kobj);
 fail:
        fs_err(sdp, "error %d adding sysfs files", error);
        return error;
@@ -543,21 +534,22 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
        sysfs_remove_group(&sdp->sd_kobj, &args_group);
        sysfs_remove_group(&sdp->sd_kobj, &counters_group);
        sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
-        kobject_unregister(&sdp->sd_kobj);
+        kobject_put(&sdp->sd_kobj);
 }
 int gfs2_sys_init(void)
 {
        gfs2_sys_margs = NULL;
        spin_lock_init(&gfs2_sys_margs_lock);
-        kobject_set_name(&gfs2_kset.kobj, "gfs2");
+        gfs2_kset = kset_create_and_add("gfs2", NULL, fs_kobj);
-        kobj_set_kset_s(&gfs2_kset, fs_subsys);
+        if (!gfs2_kset)
-        return kset_register(&gfs2_kset);
+                return -ENOMEM;
+        return 0;
 }
 void gfs2_sys_uninit(void)
 {
        kfree(gfs2_sys_margs);
-        kset_unregister(&gfs2_kset);
+        kset_unregister(gfs2_kset);
 }
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 717983e2c2ae..73e5d92a657c 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -114,11 +114,6 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
                gfs2_log_flush(sdp, NULL);
 }
-void gfs2_trans_add_gl(struct gfs2_glock *gl)
-{
-        lops_add(gl->gl_sbd, &gl->gl_le);
-}
 /**
 * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction
 * @gl: the glock the buffer belongs to
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index 043d5f4b9c4c..e826f0dab80a 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -30,7 +30,6 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
 void gfs2_trans_end(struct gfs2_sbd *sdp);
-void gfs2_trans_add_gl(struct gfs2_glock *gl);
 void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
 void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
 void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno);
diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c
index f13f1494d4fe..f8452a0eab56 100644
--- a/fs/hfs/bfind.c
+++ b/fs/hfs/bfind.c
@@ -52,6 +52,10 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd)
                rec = (e + b) / 2;
                len = hfs_brec_lenoff(bnode, rec, &off);
                keylen = hfs_brec_keylen(bnode, rec);
+                if (keylen == HFS_BAD_KEYLEN) {
+                        res = -EINVAL;
+                        goto done;
+                }
                hfs_bnode_read(bnode, fd->key, off, keylen);
                cmpval = bnode->tree->keycmp(fd->key, fd->search_key);
                if (!cmpval) {
@@ -67,6 +71,10 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd)
        if (rec != e && e >= 0) {
                len = hfs_brec_lenoff(bnode, e, &off);
                keylen = hfs_brec_keylen(bnode, e);
+                if (keylen == HFS_BAD_KEYLEN) {
+                        res = -EINVAL;
+                        goto done;
+                }
                hfs_bnode_read(bnode, fd->key, off, keylen);
        }
 done:
@@ -198,6 +206,10 @@ int hfs_brec_goto(struct hfs_find_data *fd, int cnt)
        len = hfs_brec_lenoff(bnode, fd->record, &off);
        keylen = hfs_brec_keylen(bnode, fd->record);
+        if (keylen == HFS_BAD_KEYLEN) {
+                res = -EINVAL;
+                goto out;
+        }
        fd->keyoffset = off;
        fd->keylength = keylen;
        fd->entryoffset = off + keylen;
diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c
index 5c87cf4801fc..8626ee375ea8 100644
--- a/fs/hfs/brec.c
+++ b/fs/hfs/brec.c
@@ -44,10 +44,21 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec)
                recoff = hfs_bnode_read_u16(node, node->tree->node_size - (rec + 1) * 2);
                if (!recoff)
                        return 0;
-                if (node->tree->attributes & HFS_TREE_BIGKEYS)
+                if (node->tree->attributes & HFS_TREE_BIGKEYS) {
                        retval = hfs_bnode_read_u16(node, recoff) + 2;
-                else
+                        if (retval > node->tree->max_key_len + 2) {
+                                printk(KERN_ERR "hfs: keylen %d too large\n",
+                                        retval);
+                                retval = HFS_BAD_KEYLEN;
+                        }
+                } else {
                        retval = (hfs_bnode_read_u8(node, recoff) | 1) + 1;
+                        if (retval > node->tree->max_key_len + 1) {
+                                printk(KERN_ERR "hfs: keylen %d too large\n",
+                                        retval);
+                                retval = HFS_BAD_KEYLEN;
+                        }
+                }
        }
        return retval;
 }
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 8a3a650abc87..110dd3515dc8 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -61,7 +61,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
        mapping = tree->inode->i_mapping;
        page = read_mapping_page(mapping, 0, NULL);
        if (IS_ERR(page))
-                goto free_tree;
+                goto free_inode;
        /* Load the header */
        head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc));
@@ -81,6 +81,17 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
                goto fail_page;
        if (!tree->node_count)
                goto fail_page;
+        if ((id == HFS_EXT_CNID) && (tree->max_key_len != HFS_MAX_EXT_KEYLEN)) {
+                printk(KERN_ERR "hfs: invalid extent max_key_len %d\n",
+                        tree->max_key_len);
+                goto fail_page;
+        }
+        if ((id == HFS_CAT_CNID) && (tree->max_key_len != HFS_MAX_CAT_KEYLEN)) {
+                printk(KERN_ERR "hfs: invalid catalog max_key_len %d\n",
+                        tree->max_key_len);
+                goto fail_page;
+        }
        tree->node_size_shift = ffs(size) - 1;
        tree->pages_per_bnode = (tree->node_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
@@ -88,11 +99,12 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
        page_cache_release(page);
        return tree;
- fail_page:
+fail_page:
-        tree->inode->i_mapping->a_ops = &hfs_aops;
        page_cache_release(page);
- free_tree:
+free_inode:
+        tree->inode->i_mapping->a_ops = &hfs_aops;
        iput(tree->inode);
+free_tree:
        kfree(tree);
        return NULL;
 }
diff --git a/fs/hfs/hfs.h b/fs/hfs/hfs.h
index 1445e3a56ed4..c6aae61adfe6 100644
--- a/fs/hfs/hfs.h
+++ b/fs/hfs/hfs.h
@@ -28,6 +28,8 @@
 #define HFS_MAX_NAMELEN         128
 #define HFS_MAX_VALENCE         32767U
+#define HFS_BAD_KEYLEN          0xFF
 /* Meanings of the drAtrb field of the MDB,
 * Reference: _Inside Macintosh: Files_ p. 2-61
 */
@@ -167,6 +169,9 @@ typedef union hfs_btree_key {
        struct hfs_ext_key ext;
 } hfs_btree_key;
+#define HFS_MAX_CAT_KEYLEN      (sizeof(struct hfs_cat_key) - sizeof(u8))
+#define HFS_MAX_EXT_KEYLEN      (sizeof(struct hfs_ext_key) - sizeof(u8))
 typedef union hfs_btree_key btree_key;
 struct hfs_extent {
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 08ff6c7028cc..038ed7436199 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -288,10 +288,12 @@ handle_t *journal_start(journal_t *journal, int nblocks)
                jbd_free_handle(handle);
                current->journal_info = NULL;
                handle = ERR_PTR(err);
+                goto out;
        }
        lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+out:
        return handle;
 }
diff --git a/fs/namei.c b/fs/namei.c
index 3b993db26cee..73e2e665817a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1605,7 +1605,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
        if (S_ISLNK(inode->i_mode))
                return -ELOOP;
        
-        if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
+        if (S_ISDIR(inode->i_mode) && (acc_mode & MAY_WRITE))
                return -EISDIR;
        /*
@@ -1620,7 +1620,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
                        return -EACCES;
                flag &= ~O_TRUNC;
-        } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE))
+        } else if (IS_RDONLY(inode) && (acc_mode & MAY_WRITE))
                return -EROFS;
        error = vfs_permission(nd, acc_mode);
diff --git a/fs/namespace.c b/fs/namespace.c
index 06083885b21e..61bf376e29e8 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -41,8 +41,8 @@ static struct kmem_cache *mnt_cache __read_mostly;
 static struct rw_semaphore namespace_sem;
 /* /sys/fs */
-decl_subsys(fs, NULL, NULL);
+struct kobject *fs_kobj;
-EXPORT_SYMBOL_GPL(fs_subsys);
+EXPORT_SYMBOL_GPL(fs_kobj);
 static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
 {
@@ -1861,10 +1861,9 @@ void __init mnt_init(void)
        if (err)
                printk(KERN_WARNING "%s: sysfs_init error: %d\n",
                        __FUNCTION__, err);
-        err = subsystem_register(&fs_subsys);
+        fs_kobj = kobject_create_and_add("fs", NULL);
-        if (err)
+        if (!fs_kobj)
-                printk(KERN_WARNING "%s: subsystem_register error: %d\n",
+                printk(KERN_WARNING "%s: kobj create error\n", __FUNCTION__);
-                        __FUNCTION__, err);
        init_rootfs();
        init_mount_tree();
 }
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index b35069a2aa9e..bd1b9d663fb9 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -115,6 +115,7 @@ struct nfs4_lock_state {
 #define NFS_LOCK_INITIALIZED 1
        int                     ls_flags;
        struct nfs_seqid_counter        ls_seqid;
+        struct rpc_sequence     ls_sequence;
        struct nfs_unique_id    ls_id;
        nfs4_stateid            ls_stateid;
        atomic_t                ls_count;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f03d9d5f5ba4..9e2e1c7291db 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -741,10 +741,10 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
        if (data->rpc_status == 0) {
                memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
                                sizeof(data->o_res.stateid.data));
+                nfs_confirm_seqid(&data->owner->so_seqid, 0);
                renew_lease(data->o_res.server, data->timestamp);
                data->rpc_done = 1;
        }
-        nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status);
        nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
 }
@@ -759,7 +759,6 @@ static void nfs4_open_confirm_release(void *calldata)
        /* In case of error, no cleanup! */
        if (!data->rpc_done)
                goto out_free;
-        nfs_confirm_seqid(&data->owner->so_seqid, 0);
        state = nfs4_opendata_to_nfs4_state(data);
        if (!IS_ERR(state))
                nfs4_close_state(&data->path, state, data->o_arg.open_flags);
@@ -886,7 +885,6 @@ static void nfs4_open_release(void *calldata)
        /* In case we need an open_confirm, no cleanup! */
        if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)
                goto out_free;
-        nfs_confirm_seqid(&data->owner->so_seqid, 0);
        state = nfs4_opendata_to_nfs4_state(data);
        if (!IS_ERR(state))
                nfs4_close_state(&data->path, state, data->o_arg.open_flags);
@@ -3333,6 +3331,12 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
        p->arg.fh = NFS_FH(inode);
        p->arg.fl = &p->fl;
+        if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) {
+                p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid);
+                if (p->arg.open_seqid == NULL)
+                        goto out_free;
+        }
        p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid);
        if (p->arg.lock_seqid == NULL)
                goto out_free;
@@ -3345,6 +3349,8 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
        memcpy(&p->fl, fl, sizeof(p->fl));
        return p;
 out_free:
+        if (p->arg.open_seqid != NULL)
+                nfs_free_seqid(p->arg.open_seqid);
        kfree(p);
        return NULL;
 }
@@ -3361,23 +3367,23 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
                .rpc_cred = sp->so_cred,
        };
-        if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
-                return;
        dprintk("%s: begin!\n", __FUNCTION__);
        /* Do we need to do an open_to_lock_owner? */
        if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
-                data->arg.open_seqid = nfs_alloc_seqid(&sp->so_seqid);
+                if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0)
-                if (data->arg.open_seqid == NULL) {
+                        return;
-                        data->rpc_status = -ENOMEM;
-                        task->tk_action = NULL;
-                        goto out;
-                }
                data->arg.open_stateid = &state->stateid;
                data->arg.new_lock_owner = 1;
+                /* Retest in case we raced... */
+                if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED))
+                        goto do_rpc;
        }
+        if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
+                return;
+        data->arg.new_lock_owner = 0;
+do_rpc: 
        data->timestamp = jiffies;
        rpc_call_setup(task, &msg, 0);
-out:
        dprintk("%s: done!, ret = %d\n", __FUNCTION__, data->rpc_status);
 }
@@ -3413,8 +3419,6 @@ static void nfs4_lock_release(void *calldata)
        struct nfs4_lockdata *data = calldata;
        dprintk("%s: begin!\n", __FUNCTION__);
-        if (data->arg.open_seqid != NULL)
-                nfs_free_seqid(data->arg.open_seqid);
        if (data->cancelled != 0) {
                struct rpc_task *task;
                task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
@@ -3424,6 +3428,8 @@ static void nfs4_lock_release(void *calldata)
                dprintk("%s: cancelling lock!\n", __FUNCTION__);
        } else
                nfs_free_seqid(data->arg.lock_seqid);
+        if (data->arg.open_seqid != NULL)
+                nfs_free_seqid(data->arg.open_seqid);
        nfs4_put_lock_state(data->lsp);
        put_nfs_open_context(data->ctx);
        kfree(data);
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 3ea352d82eba..5e2e4af1a0e6 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -133,9 +133,7 @@ nfs4_renewd_prepare_shutdown(struct nfs_server *server)
 void
 nfs4_kill_renewd(struct nfs_client *clp)
 {
-        down_read(&clp->cl_sem);
        cancel_delayed_work_sync(&clp->cl_renewd);
-        up_read(&clp->cl_sem);
 }
 /*
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 23a9a36556bf..5a39c6f78acf 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -509,7 +509,10 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
        lsp = kzalloc(sizeof(*lsp), GFP_KERNEL);
        if (lsp == NULL)
                return NULL;
-        lsp->ls_seqid.sequence = &state->owner->so_sequence;
+        rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue");
+        spin_lock_init(&lsp->ls_sequence.lock);
+        INIT_LIST_HEAD(&lsp->ls_sequence.list);
+        lsp->ls_seqid.sequence = &lsp->ls_sequence;
        atomic_set(&lsp->ls_count, 1);
        lsp->ls_owner = fl_owner;
        spin_lock(&clp->cl_lock);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ea929207f274..0b0c72a072ff 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1475,7 +1475,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
                error = PTR_ERR(mntroot);
                goto error_splat_super;
        }
-        if (mntroot->d_inode->i_op != server->nfs_client->rpc_ops->dir_inode_ops) {
+        if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
                dput(mntroot);
                error = -ESTALE;
                goto error_splat_super;
@@ -1826,6 +1826,11 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
                error = PTR_ERR(mntroot);
                goto error_splat_super;
        }
+        if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
+                dput(mntroot);
+                error = -ESTALE;
+                goto error_splat_super;
+        }
        s->s_flags |= MS_ACTIVE;
        mnt->mnt_sb = s;
@@ -1900,6 +1905,11 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
                error = PTR_ERR(mntroot);
                goto error_splat_super;
        }
+        if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
+                dput(mntroot);
+                error = -ESTALE;
+                goto error_splat_super;
+        }
        s->s_flags |= MS_ACTIVE;
        mnt->mnt_sb = s;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 2d116d2298f8..f917fd25858a 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -388,8 +388,11 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
         * Round the length of the data which was specified up to
         * the next multiple of XDR units and then compare that
         * against the length which was actually received.
+         * Note that when RPCSEC/GSS (for example) is used, the
+         * data buffer can be padded so dlen might be larger
+         * than required.  It must never be smaller.
         */
-        if (dlen != XDR_QUADLEN(len)*4)
+        if (dlen < XDR_QUADLEN(len)*4)
                return 0;
        if (args->count > max_blocksize) {
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 986f9b32083c..b86e3658a0af 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -313,8 +313,11 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
         * Round the length of the data which was specified up to
         * the next multiple of XDR units and then compare that
         * against the length which was actually received.
+         * Note that when RPCSEC/GSS (for example) is used, the
+         * data buffer can be padded so dlen might be larger
+         * than required.  It must never be smaller.
         */
-        if (dlen != XDR_QUADLEN(len)*4)
+        if (dlen < XDR_QUADLEN(len)*4)
                return 0;
        rqstp->rq_vec[0].iov_base = (void*)p;
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index a4882c8df945..23c732f27529 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -146,7 +146,7 @@ static struct kset mlog_kset = {
        .kobj   = {.ktype = &mlog_ktype},
 };
-int mlog_sys_init(struct kset *o2cb_subsys)
+int mlog_sys_init(struct kset *o2cb_kset)
 {
        int i = 0;
@@ -157,7 +157,7 @@ int mlog_sys_init(struct kset *o2cb_subsys)
        mlog_attr_ptrs[i] = NULL;
        kobject_set_name(&mlog_kset.kobj, "logmask");
-        kobj_set_kset_s(&mlog_kset, *o2cb_subsys);
+        mlog_kset.kobj.kset = o2cb_kset;
        return kset_register(&mlog_kset);
 }
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c
index 64f6f378fd09..a4b07730b2e1 100644
--- a/fs/ocfs2/cluster/sys.c
+++ b/fs/ocfs2/cluster/sys.c
@@ -28,96 +28,55 @@
 #include <linux/module.h>
 #include <linux/kobject.h>
 #include <linux/sysfs.h>
+#include <linux/fs.h>
 #include "ocfs2_nodemanager.h"
 #include "masklog.h"
 #include "sys.h"
-struct o2cb_attribute {
-        struct attribute        attr;
-        ssize_t (*show)(char *buf);
-        ssize_t (*store)(const char *buf, size_t count);
-};
-#define O2CB_ATTR(_name, _mode, _show, _store)  \
-struct o2cb_attribute o2cb_attr_##_name = __ATTR(_name, _mode, _show, _store)
-#define to_o2cb_attr(_attr) container_of(_attr, struct o2cb_attribute, attr)
-static ssize_t o2cb_interface_revision_show(char *buf)
+static ssize_t version_show(struct kobject *kobj, struct kobj_attribute *attr,
+                            char *buf)
 {
        return snprintf(buf, PAGE_SIZE, "%u\n", O2NM_API_VERSION);
 }
+static struct kobj_attribute attr_version =
-static O2CB_ATTR(interface_revision, S_IFREG | S_IRUGO, o2cb_interface_revision_show, NULL);
+        __ATTR(interface_revision, S_IFREG | S_IRUGO, version_show, NULL);
 static struct attribute *o2cb_attrs[] = {
-        &o2cb_attr_interface_revision.attr,
+        &attr_version.attr,
        NULL,
 };
-static ssize_t
+static struct attribute_group o2cb_attr_group = {
-o2cb_show(struct kobject * kobj, struct attribute * attr, char * buffer);
+        .attrs = o2cb_attrs,
-static ssize_t
-o2cb_store(struct kobject * kobj, struct attribute * attr,
-           const char * buffer, size_t count);
-static struct sysfs_ops o2cb_sysfs_ops = {
-        .show   = o2cb_show,
-        .store  = o2cb_store,
 };
-static struct kobj_type o2cb_subsys_type = {
+static struct kset *o2cb_kset;
-        .default_attrs  = o2cb_attrs,
-        .sysfs_ops      = &o2cb_sysfs_ops,
-};
-/* gives us o2cb_subsys */
-static decl_subsys(o2cb, NULL, NULL);
-static ssize_t
-o2cb_show(struct kobject * kobj, struct attribute * attr, char * buffer)
-{
-        struct o2cb_attribute *o2cb_attr = to_o2cb_attr(attr);
-        struct kset *sbs = to_kset(kobj);
-        BUG_ON(sbs != &o2cb_subsys);
-        if (o2cb_attr->show)
-                return o2cb_attr->show(buffer);
-        return -EIO;
-}
-static ssize_t
-o2cb_store(struct kobject * kobj, struct attribute * attr,
-             const char * buffer, size_t count)
-{
-        struct o2cb_attribute *o2cb_attr = to_o2cb_attr(attr);
-        struct kset *sbs = to_kset(kobj);
-        BUG_ON(sbs != &o2cb_subsys);
-        if (o2cb_attr->store)
-                return o2cb_attr->store(buffer, count);
-        return -EIO;
-}
 void o2cb_sys_shutdown(void)
 {
        mlog_sys_shutdown();
-        subsystem_unregister(&o2cb_subsys);
+        kset_unregister(o2cb_kset);
 }
 int o2cb_sys_init(void)
 {
        int ret;
-        o2cb_subsys.kobj.ktype = &o2cb_subsys_type;
+        o2cb_kset = kset_create_and_add("o2cb", NULL, fs_kobj);
-        ret = subsystem_register(&o2cb_subsys);
+        if (!o2cb_kset)
+                return -ENOMEM;
+        ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group);
        if (ret)
-                return ret;
+                goto error;
-        ret = mlog_sys_init(&o2cb_subsys);
+        ret = mlog_sys_init(o2cb_kset);
        if (ret)
-                subsystem_unregister(&o2cb_subsys);
+                goto error;
+        return 0;
+error:
+        kset_unregister(o2cb_kset);
        return ret;
 }
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index d88173840082..6b7ff1618945 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -131,7 +131,7 @@ static void property_stop(struct seq_file *f, void *v)
        /* Nothing to do */
 }
-static struct seq_operations property_op = {
+static const struct seq_operations property_op = {
        .start          = property_start,
        .next           = property_next,
        .stop           = property_stop,
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 722e12e5acc7..739da701ae7b 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -195,96 +195,45 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
        return ERR_PTR(res);
 }
-/*
+static ssize_t part_start_show(struct device *dev,
- * sysfs bindings for partitions
+                               struct device_attribute *attr, char *buf)
- */
-struct part_attribute {
-        struct attribute attr;
-        ssize_t (*show)(struct hd_struct *,char *);
-        ssize_t (*store)(struct hd_struct *,const char *, size_t);
-};
-static ssize_t 
-part_attr_show(struct kobject * kobj, struct attribute * attr, char * page)
 {
-        struct hd_struct * p = container_of(kobj,struct hd_struct,kobj);
+        struct hd_struct *p = dev_to_part(dev);
-        struct part_attribute * part_attr = container_of(attr,struct part_attribute,attr);
-        ssize_t ret = 0;
-        if (part_attr->show)
-                ret = part_attr->show(p, page);
-        return ret;
-}
-static ssize_t
-part_attr_store(struct kobject * kobj, struct attribute * attr,
-                const char *page, size_t count)
-{
-        struct hd_struct * p = container_of(kobj,struct hd_struct,kobj);
-        struct part_attribute * part_attr = container_of(attr,struct part_attribute,attr);
-        ssize_t ret = 0;
-        if (part_attr->store)
+        return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect);
-                ret = part_attr->store(p, page, count);
-        return ret;
 }
-static struct sysfs_ops part_sysfs_ops = {
+static ssize_t part_size_show(struct device *dev,
-        .show   =       part_attr_show,
+                              struct device_attribute *attr, char *buf)
-        .store  =       part_attr_store,
-};
-static ssize_t part_uevent_store(struct hd_struct * p,
-                                 const char *page, size_t count)
 {
-        kobject_uevent(&p->kobj, KOBJ_ADD);
+        struct hd_struct *p = dev_to_part(dev);
-        return count;
+        return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
 }
-static ssize_t part_dev_read(struct hd_struct * p, char *page)
-{
+static ssize_t part_stat_show(struct device *dev,
-        struct gendisk *disk = container_of(p->kobj.parent,struct gendisk,kobj);
+                              struct device_attribute *attr, char *buf)
-        dev_t dev = MKDEV(disk->major, disk->first_minor + p->partno); 
-        return print_dev_t(page, dev);
-}
-static ssize_t part_start_read(struct hd_struct * p, char *page)
-{
-        return sprintf(page, "%llu\n",(unsigned long long)p->start_sect);
-}
-static ssize_t part_size_read(struct hd_struct * p, char *page)
-{
-        return sprintf(page, "%llu\n",(unsigned long long)p->nr_sects);
-}
-static ssize_t part_stat_read(struct hd_struct * p, char *page)
 {
-        return sprintf(page, "%8u %8llu %8u %8llu\n",
+        struct hd_struct *p = dev_to_part(dev);
+        return sprintf(buf, "%8u %8llu %8u %8llu\n",
                       p->ios[0], (unsigned long long)p->sectors[0],
                       p->ios[1], (unsigned long long)p->sectors[1]);
 }
-static struct part_attribute part_attr_uevent = {
-        .attr = {.name = "uevent", .mode = S_IWUSR },
-        .store  = part_uevent_store
-};
-static struct part_attribute part_attr_dev = {
-        .attr = {.name = "dev", .mode = S_IRUGO },
-        .show   = part_dev_read
-};
-static struct part_attribute part_attr_start = {
-        .attr = {.name = "start", .mode = S_IRUGO },
-        .show   = part_start_read
-};
-static struct part_attribute part_attr_size = {
-        .attr = {.name = "size", .mode = S_IRUGO },
-        .show   = part_size_read
-};
-static struct part_attribute part_attr_stat = {
-        .attr = {.name = "stat", .mode = S_IRUGO },
-        .show   = part_stat_read
-};
 #ifdef CONFIG_FAIL_MAKE_REQUEST
+static ssize_t part_fail_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+        struct hd_struct *p = dev_to_part(dev);
-static ssize_t part_fail_store(struct hd_struct * p,
+        return sprintf(buf, "%d\n", p->make_it_fail);
+}
+static ssize_t part_fail_store(struct device *dev,
+                               struct device_attribute *attr,
                               const char *buf, size_t count)
 {
+        struct hd_struct *p = dev_to_part(dev);
        int i;
        if (count > 0 && sscanf(buf, "%d", &i) > 0)
@@ -292,50 +241,53 @@ static ssize_t part_fail_store(struct hd_struct * p,
        return count;
 }
-static ssize_t part_fail_read(struct hd_struct * p, char *page)
+#endif
-{
-        return sprintf(page, "%d\n", p->make_it_fail);
-}
-static struct part_attribute part_attr_fail = {
-        .attr = {.name = "make-it-fail", .mode = S_IRUGO | S_IWUSR },
-        .store  = part_fail_store,
-        .show   = part_fail_read
-};
+static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
+static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
+static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
+#ifdef CONFIG_FAIL_MAKE_REQUEST
+static struct device_attribute dev_attr_fail =
+        __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
 #endif
-static struct attribute * default_attrs[] = {
+static struct attribute *part_attrs[] = {
-        &part_attr_uevent.attr,
+        &dev_attr_start.attr,
-        &part_attr_dev.attr,
+        &dev_attr_size.attr,
-        &part_attr_start.attr,
+        &dev_attr_stat.attr,
-        &part_attr_size.attr,
-        &part_attr_stat.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
-        &part_attr_fail.attr,
+        &dev_attr_fail.attr,
 #endif
-        NULL,
+        NULL
 };
-extern struct kset block_subsys;
+static struct attribute_group part_attr_group = {
+        .attrs = part_attrs,
+};
-static void part_release(struct kobject *kobj)
+static struct attribute_group *part_attr_groups[] = {
+        &part_attr_group,
+        NULL
+};
+static void part_release(struct device *dev)
 {
-        struct hd_struct * p = container_of(kobj,struct hd_struct,kobj);
+        struct hd_struct *p = dev_to_part(dev);
        kfree(p);
 }
-struct kobj_type ktype_part = {
+struct device_type part_type = {
+        .name           = "partition",
+        .groups         = part_attr_groups,
        .release        = part_release,
-        .default_attrs  = default_attrs,
-        .sysfs_ops      = &part_sysfs_ops,
 };
 static inline void partition_sysfs_add_subdir(struct hd_struct *p)
 {
        struct kobject *k;
-        k = kobject_get(&p->kobj);
+        k = kobject_get(&p->dev.kobj);
-        p->holder_dir = kobject_add_dir(k, "holders");
+        p->holder_dir = kobject_create_and_add("holders", k);
        kobject_put(k);
 }
@@ -343,15 +295,16 @@ static inline void disk_sysfs_add_subdirs(struct gendisk *disk)
 {
        struct kobject *k;
-        k = kobject_get(&disk->kobj);
+        k = kobject_get(&disk->dev.kobj);
-        disk->holder_dir = kobject_add_dir(k, "holders");
+        disk->holder_dir = kobject_create_and_add("holders", k);
-        disk->slave_dir = kobject_add_dir(k, "slaves");
+        disk->slave_dir = kobject_create_and_add("slaves", k);
        kobject_put(k);
 }
 void delete_partition(struct gendisk *disk, int part)
 {
        struct hd_struct *p = disk->part[part-1];
        if (!p)
                return;
        if (!p->nr_sects)
@@ -361,113 +314,55 @@ void delete_partition(struct gendisk *disk, int part)
        p->nr_sects = 0;
        p->ios[0] = p->ios[1] = 0;
        p->sectors[0] = p->sectors[1] = 0;
-        sysfs_remove_link(&p->kobj, "subsystem");
+        kobject_put(p->holder_dir);
-        kobject_unregister(p->holder_dir);
+        device_del(&p->dev);
-        kobject_uevent(&p->kobj, KOBJ_REMOVE);
+        put_device(&p->dev);
-        kobject_del(&p->kobj);
-        kobject_put(&p->kobj);
 }
 void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
 {
        struct hd_struct *p;
+        int err;
        p = kzalloc(sizeof(*p), GFP_KERNEL);
        if (!p)
                return;
-        
        p->start_sect = start;
        p->nr_sects = len;
        p->partno = part;
        p->policy = disk->policy;
-        if (isdigit(disk->kobj.k_name[strlen(disk->kobj.k_name)-1]))
+        if (isdigit(disk->dev.bus_id[strlen(disk->dev.bus_id)-1]))
-                kobject_set_name(&p->kobj, "%sp%d",
+                snprintf(p->dev.bus_id, BUS_ID_SIZE,
-                                 kobject_name(&disk->kobj), part);
+                "%sp%d", disk->dev.bus_id, part);
        else
-                kobject_set_name(&p->kobj, "%s%d",
+                snprintf(p->dev.bus_id, BUS_ID_SIZE,
-                                 kobject_name(&disk->kobj),part);
+                         "%s%d", disk->dev.bus_id, part);
-        p->kobj.parent = &disk->kobj;
-        p->kobj.ktype = &ktype_part;
+        device_initialize(&p->dev);
-        kobject_init(&p->kobj);
+        p->dev.devt = MKDEV(disk->major, disk->first_minor + part);
-        kobject_add(&p->kobj);
+        p->dev.class = &block_class;
-        if (!disk->part_uevent_suppress)
+        p->dev.type = &part_type;
-                kobject_uevent(&p->kobj, KOBJ_ADD);
+        p->dev.parent = &disk->dev;
-        sysfs_create_link(&p->kobj, &block_subsys.kobj, "subsystem");
+        disk->part[part-1] = p;
+        /* delay uevent until 'holders' subdir is created */
+        p->dev.uevent_suppress = 1;
+        device_add(&p->dev);
+        partition_sysfs_add_subdir(p);
+        p->dev.uevent_suppress = 0;
        if (flags & ADDPART_FLAG_WHOLEDISK) {
                static struct attribute addpartattr = {
                        .name = "whole_disk",
                        .mode = S_IRUSR | S_IRGRP | S_IROTH,
                };
+                err = sysfs_create_file(&p->dev.kobj, &addpartattr);
-                sysfs_create_file(&p->kobj, &addpartattr);
        }
-        partition_sysfs_add_subdir(p);
-        disk->part[part-1] = p;
-}
-static char *make_block_name(struct gendisk *disk)
+        /* suppress uevent if the disk supresses it */
-{
+        if (!disk->dev.uevent_suppress)
-        char *name;
+                kobject_uevent(&p->dev.kobj, KOBJ_ADD);
-        static char *block_str = "block:";
-        int size;
-        char *s;
-        size = strlen(block_str) + strlen(disk->disk_name) + 1;
-        name = kmalloc(size, GFP_KERNEL);
-        if (!name)
-                return NULL;
-        strcpy(name, block_str);
-        strcat(name, disk->disk_name);
-        /* ewww... some of these buggers have / in name... */
-        s = strchr(name, '/');
-        if (s)
-                *s = '!';
-        return name;
-}
-static int disk_sysfs_symlinks(struct gendisk *disk)
-{
-        struct device *target = get_device(disk->driverfs_dev);
-        int err;
-        char *disk_name = NULL;
-        if (target) {
-                disk_name = make_block_name(disk);
-                if (!disk_name) {
-                        err = -ENOMEM;
-                        goto err_out;
-                }
-                err = sysfs_create_link(&disk->kobj, &target->kobj, "device");
-                if (err)
-                        goto err_out_disk_name;
-                err = sysfs_create_link(&target->kobj, &disk->kobj, disk_name);
-                if (err)
-                        goto err_out_dev_link;
-        }
-        err = sysfs_create_link(&disk->kobj, &block_subsys.kobj,
-                                "subsystem");
-        if (err)
-                goto err_out_disk_name_lnk;
-        kfree(disk_name);
-        return 0;
-err_out_disk_name_lnk:
-        if (target) {
-                sysfs_remove_link(&target->kobj, disk_name);
-err_out_dev_link:
-                sysfs_remove_link(&disk->kobj, "device");
-err_out_disk_name:
-                kfree(disk_name);
-err_out:
-                put_device(target);
-        }
-        return err;
 }
 /* Not exported, helper to add_disk(). */
@@ -479,19 +374,29 @@ void register_disk(struct gendisk *disk)
        struct hd_struct *p;
        int err;
-        kobject_set_name(&disk->kobj, "%s", disk->disk_name);
+        disk->dev.parent = disk->driverfs_dev;
-        /* ewww... some of these buggers have / in name... */
+        disk->dev.devt = MKDEV(disk->major, disk->first_minor);
-        s = strchr(disk->kobj.k_name, '/');
+        strlcpy(disk->dev.bus_id, disk->disk_name, KOBJ_NAME_LEN);
+        /* ewww... some of these buggers have / in the name... */
+        s = strchr(disk->dev.bus_id, '/');
        if (s)
                *s = '!';
-        if ((err = kobject_add(&disk->kobj)))
+        /* delay uevents, until we scanned partition table */
+        disk->dev.uevent_suppress = 1;
+        if (device_add(&disk->dev))
                return;
-        err = disk_sysfs_symlinks(disk);
+#ifndef CONFIG_SYSFS_DEPRECATED
+        err = sysfs_create_link(block_depr, &disk->dev.kobj,
+                                kobject_name(&disk->dev.kobj));
        if (err) {
-                kobject_del(&disk->kobj);
+                device_del(&disk->dev);
                return;
        }
-        disk_sysfs_add_subdirs(disk);
+#endif
+        disk_sysfs_add_subdirs(disk);
        /* No minors to use for partitions */
        if (disk->minors == 1)
@@ -505,25 +410,23 @@ void register_disk(struct gendisk *disk)
        if (!bdev)
                goto exit;
-        /* scan partition table, but suppress uevents */
        bdev->bd_invalidated = 1;
-        disk->part_uevent_suppress = 1;
        err = blkdev_get(bdev, FMODE_READ, 0);
-        disk->part_uevent_suppress = 0;
        if (err < 0)
                goto exit;
        blkdev_put(bdev);
 exit:
-        /* announce disk after possible partitions are already created */
+        /* announce disk after possible partitions are created */
-        kobject_uevent(&disk->kobj, KOBJ_ADD);
+        disk->dev.uevent_suppress = 0;
+        kobject_uevent(&disk->dev.kobj, KOBJ_ADD);
        /* announce possible partitions */
        for (i = 1; i < disk->minors; i++) {
                p = disk->part[i-1];
                if (!p || !p->nr_sects)
                        continue;
-                kobject_uevent(&p->kobj, KOBJ_ADD);
+                kobject_uevent(&p->dev.kobj, KOBJ_ADD);
        }
 }
@@ -602,19 +505,11 @@ void del_gendisk(struct gendisk *disk)
        disk_stat_set_all(disk, 0);
        disk->stamp = 0;
-        kobject_uevent(&disk->kobj, KOBJ_REMOVE);
+        kobject_put(disk->holder_dir);
-        kobject_unregister(disk->holder_dir);
+        kobject_put(disk->slave_dir);
-        kobject_unregister(disk->slave_dir);
+        disk->driverfs_dev = NULL;
-        if (disk->driverfs_dev) {
+#ifndef CONFIG_SYSFS_DEPRECATED
-                char *disk_name = make_block_name(disk);
+        sysfs_remove_link(block_depr, disk->dev.bus_id);
-                sysfs_remove_link(&disk->kobj, "device");
+#endif
-                if (disk_name) {
+        device_del(&disk->dev);
-                        sysfs_remove_link(&disk->driverfs_dev->kobj, disk_name);
-                        kfree(disk_name);
-                }
-                put_device(disk->driverfs_dev);
-                disk->driverfs_dev = NULL;
-        }
-        sysfs_remove_link(&disk->kobj, "subsystem");
-        kobject_del(&disk->kobj);
 }
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 65c62e1bfd6f..eb97f2897e2b 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -169,7 +169,7 @@ static inline char *task_state(struct task_struct *p, char *buffer)
        ppid = pid_alive(p) ?
                task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
        tpid = pid_alive(p) && p->ptrace ?
-                task_ppid_nr_ns(rcu_dereference(p->parent), ns) : 0;
+                task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0;
        buffer += sprintf(buffer,
                "State:\t%s\n"
                "Tgid:\t%d\n"
@@ -464,8 +464,8 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
                }
                sid = task_session_nr_ns(task, ns);
+                ppid = task_tgid_nr_ns(task->real_parent, ns);
                pgid = task_pgrp_nr_ns(task, ns);
-                ppid = task_ppid_nr_ns(task, ns);
                unlock_task_sighand(task, &flags);
        }
diff --git a/fs/read_write.c b/fs/read_write.c
index ea1f94cc722e..c4d3d17923f1 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -197,25 +197,27 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
 {
        struct inode *inode;
        loff_t pos;
+        int retval = -EINVAL;
        inode = file->f_path.dentry->d_inode;
        if (unlikely((ssize_t) count < 0))
-                goto Einval;
+                return retval;
        pos = *ppos;
        if (unlikely((pos < 0) || (loff_t) (pos + count) < 0))
-                goto Einval;
+                return retval;
        if (unlikely(inode->i_flock && mandatory_lock(inode))) {
-                int retval = locks_mandatory_area(
+                retval = locks_mandatory_area(
                        read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
                        inode, file, pos, count);
                if (retval < 0)
                        return retval;
        }
+        retval = security_file_permission(file,
+                                read_write == READ ? MAY_READ : MAY_WRITE);
+        if (retval)
+                return retval;
        return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
-Einval:
-        return -EINVAL;
 }
 static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
@@ -267,18 +269,15 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
        ret = rw_verify_area(READ, file, pos, count);
        if (ret >= 0) {
                count = ret;
-                ret = security_file_permission (file, MAY_READ);
+                if (file->f_op->read)
-                if (!ret) {
+                        ret = file->f_op->read(file, buf, count, pos);
-                        if (file->f_op->read)
+                else
-                                ret = file->f_op->read(file, buf, count, pos);
+                        ret = do_sync_read(file, buf, count, pos);
-                        else
+                if (ret > 0) {
-                                ret = do_sync_read(file, buf, count, pos);
+                        fsnotify_access(file->f_path.dentry);
-                        if (ret > 0) {
+                        add_rchar(current, ret);
-                                fsnotify_access(file->f_path.dentry);
-                                add_rchar(current, ret);
-                        }
-                        inc_syscr(current);
                }
+                inc_syscr(current);
        }
        return ret;
@@ -325,18 +324,15 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
        ret = rw_verify_area(WRITE, file, pos, count);
        if (ret >= 0) {
                count = ret;
-                ret = security_file_permission (file, MAY_WRITE);
+                if (file->f_op->write)
-                if (!ret) {
+                        ret = file->f_op->write(file, buf, count, pos);
-                        if (file->f_op->write)
+                else
-                                ret = file->f_op->write(file, buf, count, pos);
+                        ret = do_sync_write(file, buf, count, pos);
-                        else
+                if (ret > 0) {
-                                ret = do_sync_write(file, buf, count, pos);
+                        fsnotify_modify(file->f_path.dentry);
-                        if (ret > 0) {
+                        add_wchar(current, ret);
-                                fsnotify_modify(file->f_path.dentry);
-                                add_wchar(current, ret);
-                        }
-                        inc_syscw(current);
                }
+                inc_syscw(current);
        }
        return ret;
@@ -603,9 +599,6 @@ static ssize_t do_readv_writev(int type, struct file *file,
        ret = rw_verify_area(type, file, pos, tot_len);
        if (ret < 0)
                goto out;
-        ret = security_file_permission(file, type == READ ? MAY_READ : MAY_WRITE);
-        if (ret)
-                goto out;
        fnv = NULL;
        if (type == READ) {
@@ -737,10 +730,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
                goto fput_in;
        count = retval;
-        retval = security_file_permission (in_file, MAY_READ);
-        if (retval)
-                goto fput_in;
        /*
         * Get output file, and verify that it is ok..
         */
@@ -759,10 +748,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
                goto fput_out;
        count = retval;
-        retval = security_file_permission (out_file, MAY_WRITE);
-        if (retval)
-                goto fput_out;
        if (!max)
                max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
diff --git a/fs/splice.c b/fs/splice.c
index 6bdcb6107bc3..56b802bfbfa4 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -908,10 +908,6 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
        if (unlikely(ret < 0))
                return ret;
-        ret = security_file_permission(out, MAY_WRITE);
-        if (unlikely(ret < 0))
-                return ret;
        return out->f_op->splice_write(pipe, out, ppos, len, flags);
 }
@@ -934,10 +930,6 @@ static long do_splice_to(struct file *in, loff_t *ppos,
        if (unlikely(ret < 0))
                return ret;
-        ret = security_file_permission(in, MAY_READ);
-        if (unlikely(ret < 0))
-                return ret;
        return in->f_op->splice_read(in, ppos, pipe, len, flags);
 }
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 337162935d21..4948d9bc405d 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -440,7 +440,7 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
 /**
 *      sysfs_remove_one - remove sysfs_dirent from parent
 *      @acxt: addrm context to use
- *      @sd: sysfs_dirent to be added
+ *      @sd: sysfs_dirent to be removed
 *
 *      Mark @sd removed and drop nlink of parent inode if @sd is a
 *      directory.  @sd is unlinked from the children list.
@@ -678,8 +678,10 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
        sd = sysfs_find_dirent(parent_sd, dentry->d_name.name);
        /* no such entry */
-        if (!sd)
+        if (!sd) {
+                ret = ERR_PTR(-ENOENT);
                goto out_unlock;
+        }
        /* attach dentry and inode */
        inode = sysfs_get_inode(sd);
@@ -781,6 +783,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
        old_dentry = sysfs_get_dentry(sd);
        if (IS_ERR(old_dentry)) {
                error = PTR_ERR(old_dentry);
+                old_dentry = NULL;
                goto out;
        }
@@ -848,6 +851,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
        old_dentry = sysfs_get_dentry(sd);
        if (IS_ERR(old_dentry)) {
                error = PTR_ERR(old_dentry);
+                old_dentry = NULL;
                goto out;
        }
        old_parent = old_dentry->d_parent;
@@ -855,6 +859,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
        new_parent = sysfs_get_dentry(new_parent_sd);
        if (IS_ERR(new_parent)) {
                error = PTR_ERR(new_parent);
+                new_parent = NULL;
                goto out;
        }
@@ -878,7 +883,6 @@ again:
        error = 0;
        d_add(new_dentry, NULL);
        d_move(old_dentry, new_dentry);
-        dput(new_dentry);
        /* Remove from old parent's list and insert into new parent's list. */
        sysfs_unlink_sibling(sd);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 4045bdcc4b33..8acf82bba44c 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -20,43 +20,6 @@
 #include "sysfs.h"
-#define to_sattr(a) container_of(a,struct subsys_attribute, attr)
-/*
- * Subsystem file operations.
- * These operations allow subsystems to have files that can be 
- * read/written. 
- */
-static ssize_t 
-subsys_attr_show(struct kobject * kobj, struct attribute * attr, char * page)
-{
-        struct kset *kset = to_kset(kobj);
-        struct subsys_attribute * sattr = to_sattr(attr);
-        ssize_t ret = -EIO;
-        if (sattr->show)
-                ret = sattr->show(kset, page);
-        return ret;
-}
-static ssize_t 
-subsys_attr_store(struct kobject * kobj, struct attribute * attr, 
-                  const char * page, size_t count)
-{
-        struct kset *kset = to_kset(kobj);
-        struct subsys_attribute * sattr = to_sattr(attr);
-        ssize_t ret = -EIO;
-        if (sattr->store)
-                ret = sattr->store(kset, page, count);
-        return ret;
-}
-static struct sysfs_ops subsys_sysfs_ops = {
-        .show   = subsys_attr_show,
-        .store  = subsys_attr_store,
-};
 /*
 * There's one sysfs_buffer for each open file and one
 * sysfs_open_dirent for each sysfs_dirent with one or more open
@@ -66,7 +29,7 @@ static struct sysfs_ops subsys_sysfs_ops = {
 * sysfs_dirent->s_attr.open points to sysfs_open_dirent.  s_attr.open
 * is protected by sysfs_open_dirent_lock.
 */
-static spinlock_t sysfs_open_dirent_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(sysfs_open_dirent_lock);
 struct sysfs_open_dirent {
        atomic_t                refcnt;
@@ -354,31 +317,23 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
 {
        struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
        struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
-        struct sysfs_buffer * buffer;
+        struct sysfs_buffer *buffer;
-        struct sysfs_ops * ops = NULL;
+        struct sysfs_ops *ops;
-        int error;
+        int error = -EACCES;
        /* need attr_sd for attr and ops, its parent for kobj */
        if (!sysfs_get_active_two(attr_sd))
                return -ENODEV;
-        /* if the kobject has no ktype, then we assume that it is a subsystem
+        /* every kobject with an attribute needs a ktype assigned */
-         * itself, and use ops for it.
+        if (kobj->ktype && kobj->ktype->sysfs_ops)
-         */
-        if (kobj->kset && kobj->kset->ktype)
-                ops = kobj->kset->ktype->sysfs_ops;
-        else if (kobj->ktype)
                ops = kobj->ktype->sysfs_ops;
-        else
+        else {
-                ops = &subsys_sysfs_ops;
+                printk(KERN_ERR "missing sysfs attribute operations for "
+                       "kobject: %s\n", kobject_name(kobj));
-        error = -EACCES;
+                WARN_ON(1);
-        /* No sysfs operations, either from having no subsystem,
-         * or the subsystem have no operations.
-         */
-        if (!ops)
                goto err_out;
+        }
        /* File needs write support.
         * The inode's perms must say it's ok, 
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 3eac20c63c41..5f66c4466151 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -19,39 +19,6 @@
 #include "sysfs.h"
-static int object_depth(struct sysfs_dirent *sd)
-{
-        int depth = 0;
-        for (; sd->s_parent; sd = sd->s_parent)
-                depth++;
-        return depth;
-}
-static int object_path_length(struct sysfs_dirent * sd)
-{
-        int length = 1;
-        for (; sd->s_parent; sd = sd->s_parent)
-                length += strlen(sd->s_name) + 1;
-        return length;
-}
-static void fill_object_path(struct sysfs_dirent *sd, char *buffer, int length)
-{
-        --length;
-        for (; sd->s_parent; sd = sd->s_parent) {
-                int cur = strlen(sd->s_name);
-                /* back up enough to print this bus id with '/' */
-                length -= cur;
-                strncpy(buffer + length, sd->s_name, cur);
-                *(buffer + --length) = '/';
-        }
-}
 /**
 *      sysfs_create_link - create symlink between two objects.
 *      @kobj:  object whose directory we're creating the link in.
@@ -112,7 +79,6 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
        return error;
 }
 /**
 *      sysfs_remove_link - remove symlink in object's directory.
 *      @kobj:  object we're acting for.
@@ -124,24 +90,54 @@ void sysfs_remove_link(struct kobject * kobj, const char * name)
        sysfs_hash_and_remove(kobj->sd, name);
 }
-static int sysfs_get_target_path(struct sysfs_dirent * parent_sd,
+static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
-                                 struct sysfs_dirent * target_sd, char *path)
+                                 struct sysfs_dirent *target_sd, char *path)
 {
-        char * s;
+        struct sysfs_dirent *base, *sd;
-        int depth, size;
+        char *s = path;
+        int len = 0;
+        /* go up to the root, stop at the base */
+        base = parent_sd;
+        while (base->s_parent) {
+                sd = target_sd->s_parent;
+                while (sd->s_parent && base != sd)
+                        sd = sd->s_parent;
+                if (base == sd)
+                        break;
+                strcpy(s, "../");
+                s += 3;
+                base = base->s_parent;
+        }
+        /* determine end of target string for reverse fillup */
+        sd = target_sd;
+        while (sd->s_parent && sd != base) {
+                len += strlen(sd->s_name) + 1;
+                sd = sd->s_parent;
+        }
-        depth = object_depth(parent_sd);
+        /* check limits */
-        size = object_path_length(target_sd) + depth * 3 - 1;
+        if (len < 2)
-        if (size > PATH_MAX)
+                return -EINVAL;
+        len--;
+        if ((s - path) + len > PATH_MAX)
                return -ENAMETOOLONG;
-        pr_debug("%s: depth = %d, size = %d\n", __FUNCTION__, depth, size);
+        /* reverse fillup of target string from target to base */
+        sd = target_sd;
+        while (sd->s_parent && sd != base) {
+                int slen = strlen(sd->s_name);
-        for (s = path; depth--; s += 3)
+                len -= slen;
-                strcpy(s,"../");
+                strncpy(s + len, sd->s_name, slen);
+                if (len)
+                        s[--len] = '/';
-        fill_object_path(target_sd, path, size);
+                sd = sd->s_parent;
-        pr_debug("%s: path = '%s'\n", __FUNCTION__, path);
+        }
        return 0;
 }
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 4847eb83fc18..21a1c2b1c5fc 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -261,9 +261,9 @@ xfs_file_readdir(
 #else
 struct hack_dirent {
-        int             namlen;
-        loff_t          offset;
        u64             ino;
+        loff_t          offset;
+        int             namlen;
        unsigned int    d_type;
        char            name[];
 };
@@ -285,8 +285,10 @@ xfs_hack_filldir(
 {
        struct hack_callback *buf = __buf;
        struct hack_dirent *de = (struct hack_dirent *)(buf->dirent + buf->used);
+        unsigned int reclen;
-        if (buf->used + sizeof(struct hack_dirent) + namlen > buf->len)
+        reclen = ALIGN(sizeof(struct hack_dirent) + namlen, sizeof(u64));
+        if (buf->used + reclen > buf->len)
                return -EINVAL;
        de->namlen = namlen;
@@ -294,7 +296,7 @@ xfs_hack_filldir(
        de->ino = ino;
        de->d_type = d_type;
        memcpy(de->name, name, namlen);
-        buf->used += sizeof(struct hack_dirent) + namlen;
+        buf->used += reclen;
        return 0;
 }
@@ -334,7 +336,8 @@ xfs_file_readdir(
                offset = filp->f_pos;
        while (!eof) {
-                int reclen;
+                unsigned int reclen;
                start_offset = offset;
                buf.used = 0;
@@ -355,7 +358,8 @@ xfs_file_readdir(
                                goto done;
                        }
-                        reclen = sizeof(struct hack_dirent) + de->namlen;
+                        reclen = ALIGN(sizeof(struct hack_dirent) + de->namlen,
+                                       sizeof(u64));
                        size -= reclen;
                        de = (struct hack_dirent *)((char *)de + reclen);
                        curr_offset = de->offset /* & 0x7fffffff */;