Merge branch 'smsc911x-armplatforms' of git://github.com/steveglen/linux-2.6

author: Russell King <rmk@dyn-67.arm.linux.org.uk> 2009-04-02 18:22:11 -0400
committer: Russell King <rmk+kernel@arm.linux.org.uk> 2009-04-02 18:22:11 -0400
commit: cd02938a828f4b2098a074afb7454f106f2e8df5 (patch)
tree: 7b543fd6aa82a62dc3a9614c26f89daca83e77d5 /fs
parent: 9d681f3a1b27fdfc17ea251cf8d5f627dab34670 (diff)
parent: 172ef275444efa12d834fb9d1b1acdac92db47f7 (diff)
138 files changed, 8568 insertions, 7127 deletions
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 7578c1ab9e0b..8630615e57fe 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -146,7 +146,6 @@ int afs_proc_init(void)
        proc_afs = proc_mkdir("fs/afs", NULL);
        if (!proc_afs)
                goto error_dir;
-        proc_afs->owner = THIS_MODULE;
        p = proc_create("cells", 0, proc_afs, &afs_proc_cells_fops);
        if (!p)
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index a76803108d06..b7ff33c63101 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -186,6 +186,8 @@ int autofs4_expire_wait(struct dentry *dentry);
 int autofs4_expire_run(struct super_block *, struct vfsmount *,
                        struct autofs_sb_info *,
                        struct autofs_packet_expire __user *);
+int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
+                            struct autofs_sb_info *sbi, int when);
 int autofs4_expire_multi(struct super_block *, struct vfsmount *,
                        struct autofs_sb_info *, int __user *);
 struct dentry *autofs4_expire_direct(struct super_block *sb,
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 025e105bffea..9e5ae8a4f5c8 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -525,40 +525,13 @@ static int autofs_dev_ioctl_expire(struct file *fp,
                                   struct autofs_sb_info *sbi,
                                   struct autofs_dev_ioctl *param)
 {
-        struct dentry *dentry;
        struct vfsmount *mnt;
-        int err = -EAGAIN;
        int how;
        how = param->expire.how;
        mnt = fp->f_path.mnt;
-        if (autofs_type_trigger(sbi->type))
+        return autofs4_do_expire_multi(sbi->sb, mnt, sbi, how);
-                dentry = autofs4_expire_direct(sbi->sb, mnt, sbi, how);
-        else
-                dentry = autofs4_expire_indirect(sbi->sb, mnt, sbi, how);
-        if (dentry) {
-                struct autofs_info *ino = autofs4_dentry_ino(dentry);
-                /*
-                 * This is synchronous because it makes the daemon a
-                 * little easier
-                */
-                err = autofs4_wait(sbi, dentry, NFY_EXPIRE);
-                spin_lock(&sbi->fs_lock);
-                if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
-                        ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
-                        sbi->sb->s_root->d_mounted++;
-                }
-                ino->flags &= ~AUTOFS_INF_EXPIRING;
-                complete_all(&ino->expire_complete);
-                spin_unlock(&sbi->fs_lock);
-                dput(dentry);
-        }
-        return err;
 }
 /* Check if autofs mount point is in use */
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index e3bd50776f9e..75f7ddacf7d6 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -478,22 +478,16 @@ int autofs4_expire_run(struct super_block *sb,
        return ret;
 }
-/* Call repeatedly until it returns -EAGAIN, meaning there's nothing
+int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
-   more to be done */
+                            struct autofs_sb_info *sbi, int when)
-int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
-                        struct autofs_sb_info *sbi, int __user *arg)
 {
        struct dentry *dentry;
        int ret = -EAGAIN;
-        int do_now = 0;
-        if (arg && get_user(do_now, arg))
-                return -EFAULT;
        if (autofs_type_trigger(sbi->type))
-                dentry = autofs4_expire_direct(sb, mnt, sbi, do_now);
+                dentry = autofs4_expire_direct(sb, mnt, sbi, when);
        else
-                dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now);
+                dentry = autofs4_expire_indirect(sb, mnt, sbi, when);
        if (dentry) {
                struct autofs_info *ino = autofs4_dentry_ino(dentry);
@@ -516,3 +510,16 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
        return ret;
 }
+/* Call repeatedly until it returns -EAGAIN, meaning there's nothing
+   more to be done */
+int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
+                        struct autofs_sb_info *sbi, int __user *arg)
+{
+        int do_now = 0;
+        if (arg && get_user(do_now, arg))
+                return -EFAULT;
+        return autofs4_do_expire_multi(sb, mnt, sbi, do_now);
+}
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 74b1469a9504..e383bf0334f1 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -485,22 +485,6 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
        DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
                 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
-        expiring = autofs4_lookup_expiring(sbi, dentry->d_parent, &dentry->d_name);
-        if (expiring) {
-                /*
-                 * If we are racing with expire the request might not
-                 * be quite complete but the directory has been removed
-                 * so it must have been successful, so just wait for it.
-                 */
-                ino = autofs4_dentry_ino(expiring);
-                autofs4_expire_wait(expiring);
-                spin_lock(&sbi->lookup_lock);
-                if (!list_empty(&ino->expiring))
-                        list_del_init(&ino->expiring);
-                spin_unlock(&sbi->lookup_lock);
-                dput(expiring);
-        }
        unhashed = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name);
        if (unhashed)
                dentry = unhashed;
@@ -538,14 +522,31 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
        }
        if (!oz_mode) {
+                mutex_unlock(&dir->i_mutex);
+                expiring = autofs4_lookup_expiring(sbi,
+                                                   dentry->d_parent,
+                                                   &dentry->d_name);
+                if (expiring) {
+                        /*
+                         * If we are racing with expire the request might not
+                         * be quite complete but the directory has been removed
+                         * so it must have been successful, so just wait for it.
+                         */
+                        ino = autofs4_dentry_ino(expiring);
+                        autofs4_expire_wait(expiring);
+                        spin_lock(&sbi->lookup_lock);
+                        if (!list_empty(&ino->expiring))
+                                list_del_init(&ino->expiring);
+                        spin_unlock(&sbi->lookup_lock);
+                        dput(expiring);
+                }
                spin_lock(&dentry->d_lock);
                dentry->d_flags |= DCACHE_AUTOFS_PENDING;
                spin_unlock(&dentry->d_lock);
-                if (dentry->d_op && dentry->d_op->d_revalidate) {
+                if (dentry->d_op && dentry->d_op->d_revalidate)
-                        mutex_unlock(&dir->i_mutex);
                        (dentry->d_op->d_revalidate)(dentry, nd);
-                        mutex_lock(&dir->i_mutex);
+                mutex_lock(&dir->i_mutex);
-                }
        }
        /*
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index d2cf5a54a4b8..9adf5e4f7e96 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -8,7 +8,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
           extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
           extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
           ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \
-           compression.o
+           compression.o delayed-ref.o
 else
 # Normal Makefile
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 72677ce2b74f..b30986f00b9d 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -66,6 +66,12 @@ struct btrfs_inode {
         */
        struct list_head delalloc_inodes;
+        /*
+         * list for tracking inodes that must be sent to disk before a
+         * rename or truncate commit
+         */
+        struct list_head ordered_operations;
        /* the space_info for where this inode's data allocations are done */
        struct btrfs_space_info *space_info;
@@ -86,12 +92,6 @@ struct btrfs_inode {
         */
        u64 logged_trans;
-        /*
-         * trans that last made a change that should be fully fsync'd.  This
-         * gets reset to zero each time the inode is logged
-         */
-        u64 log_dirty_trans;
        /* total number of bytes pending delalloc, used by stat to calc the
         * real block usage of the file
         */
@@ -121,6 +121,25 @@ struct btrfs_inode {
        /* the start of block group preferred for allocations. */
        u64 block_group;
+        /* the fsync log has some corner cases that mean we have to check
+         * directories to see if any unlinks have been done before
+         * the directory was logged.  See tree-log.c for all the
+         * details
+         */
+        u64 last_unlink_trans;
+        /*
+         * ordered_data_close is set by truncate when a file that used
+         * to have good data has been truncated to zero.  When it is set
+         * the btrfs file release call will add this inode to the
+         * ordered operations list so that we make sure to flush out any
+         * new data the application may have written before commit.
+         *
+         * yes, its silly to have a single bitflag, but we might grow more
+         * of these.
+         */
+        unsigned ordered_data_close:1;
        struct inode vfs_inode;
 };
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 37f31b5529aa..dbb724124633 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -254,18 +254,13 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
 * empty_size -- a hint that you plan on doing more cow.  This is the size in
 * bytes the allocator should try to find free next to the block it returns.
 * This is just a hint and may be ignored by the allocator.
- *
- * prealloc_dest -- if you have already reserved a destination for the cow,
- * this uses that block instead of allocating a new one.
- * btrfs_alloc_reserved_extent is used to finish the allocation.
 */
 static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
                             struct extent_buffer *buf,
                             struct extent_buffer *parent, int parent_slot,
                             struct extent_buffer **cow_ret,
-                             u64 search_start, u64 empty_size,
+                             u64 search_start, u64 empty_size)
-                             u64 prealloc_dest)
 {
        u64 parent_start;
        struct extent_buffer *cow;
@@ -291,26 +286,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
        level = btrfs_header_level(buf);
        nritems = btrfs_header_nritems(buf);
-        if (prealloc_dest) {
+        cow = btrfs_alloc_free_block(trans, root, buf->len,
-                struct btrfs_key ins;
+                                     parent_start, root->root_key.objectid,
+                                     trans->transid, level,
-                ins.objectid = prealloc_dest;
+                                     search_start, empty_size);
-                ins.offset = buf->len;
-                ins.type = BTRFS_EXTENT_ITEM_KEY;
-                ret = btrfs_alloc_reserved_extent(trans, root, parent_start,
-                                                  root->root_key.objectid,
-                                                  trans->transid, level, &ins);
-                BUG_ON(ret);
-                cow = btrfs_init_new_buffer(trans, root, prealloc_dest,
-                                            buf->len, level);
-        } else {
-                cow = btrfs_alloc_free_block(trans, root, buf->len,
-                                             parent_start,
-                                             root->root_key.objectid,
-                                             trans->transid, level,
-                                             search_start, empty_size);
-        }
        if (IS_ERR(cow))
                return PTR_ERR(cow);
@@ -413,7 +392,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
                    struct btrfs_root *root, struct extent_buffer *buf,
                    struct extent_buffer *parent, int parent_slot,
-                    struct extent_buffer **cow_ret, u64 prealloc_dest)
+                    struct extent_buffer **cow_ret)
 {
        u64 search_start;
        int ret;
@@ -436,7 +415,6 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
            btrfs_header_owner(buf) == root->root_key.objectid &&
            !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
                *cow_ret = buf;
-                WARN_ON(prealloc_dest);
                return 0;
        }
@@ -447,8 +425,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
        btrfs_set_lock_blocking(buf);
        ret = __btrfs_cow_block(trans, root, buf, parent,
-                                 parent_slot, cow_ret, search_start, 0,
+                                 parent_slot, cow_ret, search_start, 0);
-                                 prealloc_dest);
        return ret;
 }
@@ -617,7 +594,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
                err = __btrfs_cow_block(trans, root, cur, parent, i,
                                        &cur, search_start,
                                        min(16 * blocksize,
-                                            (end_slot - i) * blocksize), 0);
+                                            (end_slot - i) * blocksize));
                if (err) {
                        btrfs_tree_unlock(cur);
                        free_extent_buffer(cur);
@@ -937,7 +914,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                BUG_ON(!child);
                btrfs_tree_lock(child);
                btrfs_set_lock_blocking(child);
-                ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0);
+                ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
                BUG_ON(ret);
                spin_lock(&root->node_lock);
@@ -945,6 +922,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                spin_unlock(&root->node_lock);
                ret = btrfs_update_extent_ref(trans, root, child->start,
+                                              child->len,
                                              mid->start, child->start,
                                              root->root_key.objectid,
                                              trans->transid, level - 1);
@@ -971,6 +949,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
            BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
                return 0;
+        if (trans->transaction->delayed_refs.flushing &&
+            btrfs_header_nritems(mid) > 2)
+                return 0;
        if (btrfs_header_nritems(mid) < 2)
                err_on_enospc = 1;
@@ -979,7 +961,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                btrfs_tree_lock(left);
                btrfs_set_lock_blocking(left);
                wret = btrfs_cow_block(trans, root, left,
-                                       parent, pslot - 1, &left, 0);
+                                       parent, pslot - 1, &left);
                if (wret) {
                        ret = wret;
                        goto enospc;
@@ -990,7 +972,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                btrfs_tree_lock(right);
                btrfs_set_lock_blocking(right);
                wret = btrfs_cow_block(trans, root, right,
-                                       parent, pslot + 1, &right, 0);
+                                       parent, pslot + 1, &right);
                if (wret) {
                        ret = wret;
                        goto enospc;
@@ -1171,7 +1153,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
                        wret = 1;
                } else {
                        ret = btrfs_cow_block(trans, root, left, parent,
-                                              pslot - 1, &left, 0);
+                                              pslot - 1, &left);
                        if (ret)
                                wret = 1;
                        else {
@@ -1222,7 +1204,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
                } else {
                        ret = btrfs_cow_block(trans, root, right,
                                              parent, pslot + 1,
-                                              &right, 0);
+                                              &right);
                        if (ret)
                                wret = 1;
                        else {
@@ -1492,7 +1474,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
        u8 lowest_level = 0;
        u64 blocknr;
        u64 gen;
-        struct btrfs_key prealloc_block;
        lowest_level = p->lowest_level;
        WARN_ON(lowest_level && ins_len > 0);
@@ -1501,8 +1482,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
        if (ins_len < 0)
                lowest_unlock = 2;
-        prealloc_block.objectid = 0;
 again:
        if (p->skip_locking)
                b = btrfs_root_node(root);
@@ -1529,44 +1508,11 @@ again:
                            !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
                                goto cow_done;
                        }
-                        /* ok, we have to cow, is our old prealloc the right
-                         * size?
-                         */
-                        if (prealloc_block.objectid &&
-                            prealloc_block.offset != b->len) {
-                                btrfs_release_path(root, p);
-                                btrfs_free_reserved_extent(root,
-                                           prealloc_block.objectid,
-                                           prealloc_block.offset);
-                                prealloc_block.objectid = 0;
-                                goto again;
-                        }
-                        /*
-                         * for higher level blocks, try not to allocate blocks
-                         * with the block and the parent locks held.
-                         */
-                        if (level > 0 && !prealloc_block.objectid) {
-                                u32 size = b->len;
-                                u64 hint = b->start;
-                                btrfs_release_path(root, p);
-                                ret = btrfs_reserve_extent(trans, root,
-                                                           size, size, 0,
-                                                           hint, (u64)-1,
-                                                           &prealloc_block, 0);
-                                BUG_ON(ret);
-                                goto again;
-                        }
                        btrfs_set_path_blocking(p);
                        wret = btrfs_cow_block(trans, root, b,
                                               p->nodes[level + 1],
-                                               p->slots[level + 1],
+                                               p->slots[level + 1], &b);
-                                               &b, prealloc_block.objectid);
-                        prealloc_block.objectid = 0;
                        if (wret) {
                                free_extent_buffer(b);
                                ret = wret;
@@ -1742,12 +1688,8 @@ done:
         * we don't really know what they plan on doing with the path
         * from here on, so for now just mark it as blocking
         */
-        btrfs_set_path_blocking(p);
+        if (!p->leave_spinning)
-        if (prealloc_block.objectid) {
+                btrfs_set_path_blocking(p);
-                btrfs_free_reserved_extent(root,
-                           prealloc_block.objectid,
-                           prealloc_block.offset);
-        }
        return ret;
 }
@@ -1768,7 +1710,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans,
        int ret;
        eb = btrfs_lock_root_node(root);
-        ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0);
+        ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb);
        BUG_ON(ret);
        btrfs_set_lock_blocking(eb);
@@ -1826,7 +1768,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans,
                        }
                        ret = btrfs_cow_block(trans, root, eb, parent, slot,
-                                              &eb, 0);
+                                              &eb);
                        BUG_ON(ret);
                        if (root->root_key.objectid ==
@@ -2139,7 +2081,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
        spin_unlock(&root->node_lock);
        ret = btrfs_update_extent_ref(trans, root, lower->start,
-                                      lower->start, c->start,
+                                      lower->len, lower->start, c->start,
                                      root->root_key.objectid,
                                      trans->transid, level - 1);
        BUG_ON(ret);
@@ -2221,7 +2163,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
                ret = insert_new_root(trans, root, path, level + 1);
                if (ret)
                        return ret;
-        } else {
+        } else if (!trans->transaction->delayed_refs.flushing) {
                ret = push_nodes_for_insert(trans, root, path, level);
                c = path->nodes[level];
                if (!ret && btrfs_header_nritems(c) <
@@ -2329,66 +2271,27 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
        return ret;
 }
-/*
+static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
- * push some data in the path leaf to the right, trying to free up at
+                                      struct btrfs_root *root,
- * least data_size bytes.  returns zero if the push worked, nonzero otherwise
+                                      struct btrfs_path *path,
- *
+                                      int data_size, int empty,
- * returns 1 if the push failed because the other node didn't have enough
+                                      struct extent_buffer *right,
- * room, 0 if everything worked out and < 0 if there were major errors.
+                                      int free_space, u32 left_nritems)
- */
-static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
-                           *root, struct btrfs_path *path, int data_size,
-                           int empty)
 {
        struct extent_buffer *left = path->nodes[0];
-        struct extent_buffer *right;
+        struct extent_buffer *upper = path->nodes[1];
-        struct extent_buffer *upper;
        struct btrfs_disk_key disk_key;
        int slot;
        u32 i;
-        int free_space;
        int push_space = 0;
        int push_items = 0;
        struct btrfs_item *item;
-        u32 left_nritems;
        u32 nr;
        u32 right_nritems;
        u32 data_end;
        u32 this_item_size;
        int ret;
-        slot = path->slots[1];
-        if (!path->nodes[1])
-                return 1;
-        upper = path->nodes[1];
-        if (slot >= btrfs_header_nritems(upper) - 1)
-                return 1;
-        btrfs_assert_tree_locked(path->nodes[1]);
-        right = read_node_slot(root, upper, slot + 1);
-        btrfs_tree_lock(right);
-        btrfs_set_lock_blocking(right);
-        free_space = btrfs_leaf_free_space(root, right);
-        if (free_space < data_size)
-                goto out_unlock;
-        /* cow and double check */
-        ret = btrfs_cow_block(trans, root, right, upper,
-                              slot + 1, &right, 0);
-        if (ret)
-                goto out_unlock;
-        free_space = btrfs_leaf_free_space(root, right);
-        if (free_space < data_size)
-                goto out_unlock;
-        left_nritems = btrfs_header_nritems(left);
-        if (left_nritems == 0)
-                goto out_unlock;
        if (empty)
                nr = 0;
        else
@@ -2397,6 +2300,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
        if (path->slots[0] >= left_nritems)
                push_space += data_size;
+        slot = path->slots[1];
        i = left_nritems - 1;
        while (i >= nr) {
                item = btrfs_item_nr(left, i);
@@ -2528,24 +2432,82 @@ out_unlock:
 }
 /*
+ * push some data in the path leaf to the right, trying to free up at
+ * least data_size bytes.  returns zero if the push worked, nonzero otherwise
+ *
+ * returns 1 if the push failed because the other node didn't have enough
+ * room, 0 if everything worked out and < 0 if there were major errors.
+ */
+static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
+                           *root, struct btrfs_path *path, int data_size,
+                           int empty)
+{
+        struct extent_buffer *left = path->nodes[0];
+        struct extent_buffer *right;
+        struct extent_buffer *upper;
+        int slot;
+        int free_space;
+        u32 left_nritems;
+        int ret;
+        if (!path->nodes[1])
+                return 1;
+        slot = path->slots[1];
+        upper = path->nodes[1];
+        if (slot >= btrfs_header_nritems(upper) - 1)
+                return 1;
+        btrfs_assert_tree_locked(path->nodes[1]);
+        right = read_node_slot(root, upper, slot + 1);
+        btrfs_tree_lock(right);
+        btrfs_set_lock_blocking(right);
+        free_space = btrfs_leaf_free_space(root, right);
+        if (free_space < data_size)
+                goto out_unlock;
+        /* cow and double check */
+        ret = btrfs_cow_block(trans, root, right, upper,
+                              slot + 1, &right);
+        if (ret)
+                goto out_unlock;
+        free_space = btrfs_leaf_free_space(root, right);
+        if (free_space < data_size)
+                goto out_unlock;
+        left_nritems = btrfs_header_nritems(left);
+        if (left_nritems == 0)
+                goto out_unlock;
+        return __push_leaf_right(trans, root, path, data_size, empty,
+                                right, free_space, left_nritems);
+out_unlock:
+        btrfs_tree_unlock(right);
+        free_extent_buffer(right);
+        return 1;
+}
+/*
 * push some data in the path leaf to the left, trying to free up at
 * least data_size bytes.  returns zero if the push worked, nonzero otherwise
 */
-static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
+static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
-                          *root, struct btrfs_path *path, int data_size,
+                                     struct btrfs_root *root,
-                          int empty)
+                                     struct btrfs_path *path, int data_size,
+                                     int empty, struct extent_buffer *left,
+                                     int free_space, int right_nritems)
 {
        struct btrfs_disk_key disk_key;
        struct extent_buffer *right = path->nodes[0];
-        struct extent_buffer *left;
        int slot;
        int i;
-        int free_space;
        int push_space = 0;
        int push_items = 0;
        struct btrfs_item *item;
        u32 old_left_nritems;
-        u32 right_nritems;
        u32 nr;
        int ret = 0;
        int wret;
@@ -2553,41 +2515,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
        u32 old_left_item_size;
        slot = path->slots[1];
-        if (slot == 0)
-                return 1;
-        if (!path->nodes[1])
-                return 1;
-        right_nritems = btrfs_header_nritems(right);
-        if (right_nritems == 0)
-                return 1;
-        btrfs_assert_tree_locked(path->nodes[1]);
-        left = read_node_slot(root, path->nodes[1], slot - 1);
-        btrfs_tree_lock(left);
-        btrfs_set_lock_blocking(left);
-        free_space = btrfs_leaf_free_space(root, left);
-        if (free_space < data_size) {
-                ret = 1;
-                goto out;
-        }
-        /* cow and double check */
-        ret = btrfs_cow_block(trans, root, left,
-                              path->nodes[1], slot - 1, &left, 0);
-        if (ret) {
-                /* we hit -ENOSPC, but it isn't fatal here */
-                ret = 1;
-                goto out;
-        }
-        free_space = btrfs_leaf_free_space(root, left);
-        if (free_space < data_size) {
-                ret = 1;
-                goto out;
-        }
        if (empty)
                nr = right_nritems;
@@ -2755,6 +2682,154 @@ out:
 }
 /*
+ * push some data in the path leaf to the left, trying to free up at
+ * least data_size bytes.  returns zero if the push worked, nonzero otherwise
+ */
+static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
+                          *root, struct btrfs_path *path, int data_size,
+                          int empty)
+{
+        struct extent_buffer *right = path->nodes[0];
+        struct extent_buffer *left;
+        int slot;
+        int free_space;
+        u32 right_nritems;
+        int ret = 0;
+        slot = path->slots[1];
+        if (slot == 0)
+                return 1;
+        if (!path->nodes[1])
+                return 1;
+        right_nritems = btrfs_header_nritems(right);
+        if (right_nritems == 0)
+                return 1;
+        btrfs_assert_tree_locked(path->nodes[1]);
+        left = read_node_slot(root, path->nodes[1], slot - 1);
+        btrfs_tree_lock(left);
+        btrfs_set_lock_blocking(left);
+        free_space = btrfs_leaf_free_space(root, left);
+        if (free_space < data_size) {
+                ret = 1;
+                goto out;
+        }
+        /* cow and double check */
+        ret = btrfs_cow_block(trans, root, left,
+                              path->nodes[1], slot - 1, &left);
+        if (ret) {
+                /* we hit -ENOSPC, but it isn't fatal here */
+                ret = 1;
+                goto out;
+        }
+        free_space = btrfs_leaf_free_space(root, left);
+        if (free_space < data_size) {
+                ret = 1;
+                goto out;
+        }
+        return __push_leaf_left(trans, root, path, data_size,
+                               empty, left, free_space, right_nritems);
+out:
+        btrfs_tree_unlock(left);
+        free_extent_buffer(left);
+        return ret;
+}
+/*
+ * split the path's leaf in two, making sure there is at least data_size
+ * available for the resulting leaf level of the path.
+ *
+ * returns 0 if all went well and < 0 on failure.
+ */
+static noinline int copy_for_split(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root,
+                               struct btrfs_path *path,
+                               struct extent_buffer *l,
+                               struct extent_buffer *right,
+                               int slot, int mid, int nritems)
+{
+        int data_copy_size;
+        int rt_data_off;
+        int i;
+        int ret = 0;
+        int wret;
+        struct btrfs_disk_key disk_key;
+        nritems = nritems - mid;
+        btrfs_set_header_nritems(right, nritems);
+        data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
+        copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
+                           btrfs_item_nr_offset(mid),
+                           nritems * sizeof(struct btrfs_item));
+        copy_extent_buffer(right, l,
+                     btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
+                     data_copy_size, btrfs_leaf_data(l) +
+                     leaf_data_end(root, l), data_copy_size);
+        rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
+                      btrfs_item_end_nr(l, mid);
+        for (i = 0; i < nritems; i++) {
+                struct btrfs_item *item = btrfs_item_nr(right, i);
+                u32 ioff;
+                if (!right->map_token) {
+                        map_extent_buffer(right, (unsigned long)item,
+                                        sizeof(struct btrfs_item),
+                                        &right->map_token, &right->kaddr,
+                                        &right->map_start, &right->map_len,
+                                        KM_USER1);
+                }
+                ioff = btrfs_item_offset(right, item);
+                btrfs_set_item_offset(right, item, ioff + rt_data_off);
+        }
+        if (right->map_token) {
+                unmap_extent_buffer(right, right->map_token, KM_USER1);
+                right->map_token = NULL;
+        }
+        btrfs_set_header_nritems(l, mid);
+        ret = 0;
+        btrfs_item_key(right, &disk_key, 0);
+        wret = insert_ptr(trans, root, path, &disk_key, right->start,
+                          path->slots[1] + 1, 1);
+        if (wret)
+                ret = wret;
+        btrfs_mark_buffer_dirty(right);
+        btrfs_mark_buffer_dirty(l);
+        BUG_ON(path->slots[0] != slot);
+        ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
+        BUG_ON(ret);
+        if (mid <= slot) {
+                btrfs_tree_unlock(path->nodes[0]);
+                free_extent_buffer(path->nodes[0]);
+                path->nodes[0] = right;
+                path->slots[0] -= mid;
+                path->slots[1] += 1;
+        } else {
+                btrfs_tree_unlock(right);
+                free_extent_buffer(right);
+        }
+        BUG_ON(path->slots[0] < 0);
+        return ret;
+}
+/*
 * split the path's leaf in two, making sure there is at least data_size
 * available for the resulting leaf level of the path.
 *
@@ -2771,17 +2846,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
        int mid;
        int slot;
        struct extent_buffer *right;
-        int data_copy_size;
-        int rt_data_off;
-        int i;
        int ret = 0;
        int wret;
        int double_split;
        int num_doubles = 0;
-        struct btrfs_disk_key disk_key;
        /* first try to make some room by pushing left and right */
-        if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
+        if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY &&
+            !trans->transaction->delayed_refs.flushing) {
                wret = push_leaf_right(trans, root, path, data_size, 0);
                if (wret < 0)
                        return wret;
@@ -2830,11 +2902,14 @@ again:
        write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
                            (unsigned long)btrfs_header_chunk_tree_uuid(right),
                            BTRFS_UUID_SIZE);
        if (mid <= slot) {
                if (nritems == 1 ||
                    leaf_space_used(l, mid, nritems - mid) + data_size >
                        BTRFS_LEAF_DATA_SIZE(root)) {
                        if (slot >= nritems) {
+                                struct btrfs_disk_key disk_key;
                                btrfs_cpu_key_to_disk(&disk_key, ins_key);
                                btrfs_set_header_nritems(right, 0);
                                wret = insert_ptr(trans, root, path,
@@ -2862,6 +2937,8 @@ again:
                if (leaf_space_used(l, 0, mid) + data_size >
                        BTRFS_LEAF_DATA_SIZE(root)) {
                        if (!extend && data_size && slot == 0) {
+                                struct btrfs_disk_key disk_key;
                                btrfs_cpu_key_to_disk(&disk_key, ins_key);
                                btrfs_set_header_nritems(right, 0);
                                wret = insert_ptr(trans, root, path,
@@ -2894,76 +2971,16 @@ again:
                        }
                }
        }
-        nritems = nritems - mid;
-        btrfs_set_header_nritems(right, nritems);
-        data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
-        copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
-                           btrfs_item_nr_offset(mid),
-                           nritems * sizeof(struct btrfs_item));
-        copy_extent_buffer(right, l,
-                     btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
-                     data_copy_size, btrfs_leaf_data(l) +
-                     leaf_data_end(root, l), data_copy_size);
-        rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
-                      btrfs_item_end_nr(l, mid);
-        for (i = 0; i < nritems; i++) {
-                struct btrfs_item *item = btrfs_item_nr(right, i);
-                u32 ioff;
-                if (!right->map_token) {
-                        map_extent_buffer(right, (unsigned long)item,
-                                        sizeof(struct btrfs_item),
-                                        &right->map_token, &right->kaddr,
-                                        &right->map_start, &right->map_len,
-                                        KM_USER1);
-                }
-                ioff = btrfs_item_offset(right, item);
-                btrfs_set_item_offset(right, item, ioff + rt_data_off);
-        }
-        if (right->map_token) {
-                unmap_extent_buffer(right, right->map_token, KM_USER1);
-                right->map_token = NULL;
-        }
-        btrfs_set_header_nritems(l, mid);
-        ret = 0;
-        btrfs_item_key(right, &disk_key, 0);
-        wret = insert_ptr(trans, root, path, &disk_key, right->start,
-                          path->slots[1] + 1, 1);
-        if (wret)
-                ret = wret;
-        btrfs_mark_buffer_dirty(right);
-        btrfs_mark_buffer_dirty(l);
-        BUG_ON(path->slots[0] != slot);
-        ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
+        ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
        BUG_ON(ret);
-        if (mid <= slot) {
-                btrfs_tree_unlock(path->nodes[0]);
-                free_extent_buffer(path->nodes[0]);
-                path->nodes[0] = right;
-                path->slots[0] -= mid;
-                path->slots[1] += 1;
-        } else {
-                btrfs_tree_unlock(right);
-                free_extent_buffer(right);
-        }
-        BUG_ON(path->slots[0] < 0);
        if (double_split) {
                BUG_ON(num_doubles != 0);
                num_doubles++;
                goto again;
        }
        return ret;
 }
@@ -3021,26 +3038,27 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
                return -EAGAIN;
        }
+        btrfs_set_path_blocking(path);
        ret = split_leaf(trans, root, &orig_key, path,
                         sizeof(struct btrfs_item), 1);
        path->keep_locks = 0;
        BUG_ON(ret);
+        btrfs_unlock_up_safe(path, 1);
+        leaf = path->nodes[0];
+        BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
+split:
        /*
         * make sure any changes to the path from split_leaf leave it
         * in a blocking state
         */
        btrfs_set_path_blocking(path);
-        leaf = path->nodes[0];
-        BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
-split:
        item = btrfs_item_nr(leaf, path->slots[0]);
        orig_offset = btrfs_item_offset(leaf, item);
        item_size = btrfs_item_size(leaf, item);
        buf = kmalloc(item_size, GFP_NOFS);
        read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
                            path->slots[0]), item_size);
@@ -3445,39 +3463,27 @@ out:
 }
 /*
- * Given a key and some data, insert items into the tree.
+ * this is a helper for btrfs_insert_empty_items, the main goal here is
- * This does all the path init required, making room in the tree if needed.
+ * to save stack depth by doing the bulk of the work in a function
+ * that doesn't call btrfs_search_slot
 */
-int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
+static noinline_for_stack int
-                            struct btrfs_root *root,
+setup_items_for_insert(struct btrfs_trans_handle *trans,
-                            struct btrfs_path *path,
+                      struct btrfs_root *root, struct btrfs_path *path,
-                            struct btrfs_key *cpu_key, u32 *data_size,
+                      struct btrfs_key *cpu_key, u32 *data_size,
-                            int nr)
+                      u32 total_data, u32 total_size, int nr)
 {
-        struct extent_buffer *leaf;
        struct btrfs_item *item;
-        int ret = 0;
-        int slot;
-        int slot_orig;
        int i;
        u32 nritems;
-        u32 total_size = 0;
-        u32 total_data = 0;
        unsigned int data_end;
        struct btrfs_disk_key disk_key;
+        int ret;
+        struct extent_buffer *leaf;
+        int slot;
-        for (i = 0; i < nr; i++)
-                total_data += data_size[i];
-        total_size = total_data + (nr * sizeof(struct btrfs_item));
-        ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
-        if (ret == 0)
-                return -EEXIST;
-        if (ret < 0)
-                goto out;
-        slot_orig = path->slots[0];
        leaf = path->nodes[0];
+        slot = path->slots[0];
        nritems = btrfs_header_nritems(leaf);
        data_end = leaf_data_end(root, leaf);
@@ -3489,9 +3495,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
                BUG();
        }
-        slot = path->slots[0];
-        BUG_ON(slot < 0);
        if (slot != nritems) {
                unsigned int old_data = btrfs_item_end_nr(leaf, slot);
@@ -3547,21 +3550,60 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
                data_end -= data_size[i];
                btrfs_set_item_size(leaf, item, data_size[i]);
        }
        btrfs_set_header_nritems(leaf, nritems + nr);
-        btrfs_mark_buffer_dirty(leaf);
        ret = 0;
        if (slot == 0) {
+                struct btrfs_disk_key disk_key;
                btrfs_cpu_key_to_disk(&disk_key, cpu_key);
                ret = fixup_low_keys(trans, root, path, &disk_key, 1);
        }
+        btrfs_unlock_up_safe(path, 1);
+        btrfs_mark_buffer_dirty(leaf);
        if (btrfs_leaf_free_space(root, leaf) < 0) {
                btrfs_print_leaf(root, leaf);
                BUG();
        }
+        return ret;
+}
+/*
+ * Given a key and some data, insert items into the tree.
+ * This does all the path init required, making room in the tree if needed.
+ */
+int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root,
+                            struct btrfs_path *path,
+                            struct btrfs_key *cpu_key, u32 *data_size,
+                            int nr)
+{
+        struct extent_buffer *leaf;
+        int ret = 0;
+        int slot;
+        int i;
+        u32 total_size = 0;
+        u32 total_data = 0;
+        for (i = 0; i < nr; i++)
+                total_data += data_size[i];
+        total_size = total_data + (nr * sizeof(struct btrfs_item));
+        ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
+        if (ret == 0)
+                return -EEXIST;
+        if (ret < 0)
+                goto out;
+        leaf = path->nodes[0];
+        slot = path->slots[0];
+        BUG_ON(slot < 0);
+        ret = setup_items_for_insert(trans, root, path, cpu_key, data_size,
+                               total_data, total_size, nr);
 out:
-        btrfs_unlock_up_safe(path, 1);
        return ret;
 }
@@ -3749,7 +3791,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                }
                /* delete the leaf if it is mostly empty */
-                if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
+                if (used < BTRFS_LEAF_DATA_SIZE(root) / 4 &&
+                    !trans->transaction->delayed_refs.flushing) {
                        /* push_leaf_left fixes the path.
                         * make sure the path still points to our leaf
                         * for possible call to del_ptr below
@@ -3757,6 +3800,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                        slot = path->slots[1];
                        extent_buffer_get(leaf);
+                        btrfs_set_path_blocking(path);
                        wret = push_leaf_left(trans, root, path, 1, 1);
                        if (wret < 0 && wret != -ENOSPC)
                                ret = wret;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5e1d4e30e9d8..9417713542a2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -45,6 +45,13 @@ struct btrfs_ordered_sum;
 #define BTRFS_MAX_LEVEL 8
+/*
+ * files bigger than this get some pre-flushing when they are added
+ * to the ordered operations list.  That way we limit the total
+ * work done by the commit
+ */
+#define BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT (8 * 1024 * 1024)
 /* holds pointers to all of the tree roots */
 #define BTRFS_ROOT_TREE_OBJECTID 1ULL
@@ -401,15 +408,16 @@ struct btrfs_path {
        int locks[BTRFS_MAX_LEVEL];
        int reada;
        /* keep some upper locks as we walk down */
-        int keep_locks;
-        int skip_locking;
        int lowest_level;
        /*
         * set by btrfs_split_item, tells search_slot to keep all locks
         * and to force calls to keep space in the nodes
         */
-        int search_for_split;
+        unsigned int search_for_split:1;
+        unsigned int keep_locks:1;
+        unsigned int skip_locking:1;
+        unsigned int leave_spinning:1;
 };
 /*
@@ -688,15 +696,18 @@ struct btrfs_fs_info {
        struct rb_root block_group_cache_tree;
        struct extent_io_tree pinned_extents;
-        struct extent_io_tree pending_del;
-        struct extent_io_tree extent_ins;
        /* logical->physical extent mapping */
        struct btrfs_mapping_tree mapping_tree;
        u64 generation;
        u64 last_trans_committed;
-        u64 last_trans_new_blockgroup;
+        /*
+         * this is updated to the current trans every time a full commit
+         * is required instead of the faster short fsync log commits
+         */
+        u64 last_trans_log_full_commit;
        u64 open_ioctl_trans;
        unsigned long mount_opt;
        u64 max_extent;
@@ -717,12 +728,21 @@ struct btrfs_fs_info {
        struct mutex tree_log_mutex;
        struct mutex transaction_kthread_mutex;
        struct mutex cleaner_mutex;
-        struct mutex extent_ins_mutex;
        struct mutex pinned_mutex;
        struct mutex chunk_mutex;
        struct mutex drop_mutex;
        struct mutex volume_mutex;
        struct mutex tree_reloc_mutex;
+        /*
+         * this protects the ordered operations list only while we are
+         * processing all of the entries on it.  This way we make
+         * sure the commit code doesn't find the list temporarily empty
+         * because another function happens to be doing non-waiting preflush
+         * before jumping into the main commit.
+         */
+        struct mutex ordered_operations_mutex;
        struct list_head trans_list;
        struct list_head hashers;
        struct list_head dead_roots;
@@ -737,10 +757,29 @@ struct btrfs_fs_info {
         * ordered extents
         */
        spinlock_t ordered_extent_lock;
+        /*
+         * all of the data=ordered extents pending writeback
+         * these can span multiple transactions and basically include
+         * every dirty data page that isn't from nodatacow
+         */
        struct list_head ordered_extents;
+        /*
+         * all of the inodes that have delalloc bytes.  It is possible for
+         * this list to be empty even when there is still dirty data=ordered
+         * extents waiting to finish IO.
+         */
        struct list_head delalloc_inodes;
        /*
+         * special rename and truncate targets that must be on disk before
+         * we're allowed to commit.  This is basically the ext3 style
+         * data=ordered list.
+         */
+        struct list_head ordered_operations;
+        /*
         * there is a pool of worker threads for checksumming during writes
         * and a pool for checksumming after reads.  This is because readers
         * can run with FS locks held, and the writers may be waiting for
@@ -781,6 +820,11 @@ struct btrfs_fs_info {
        atomic_t throttle_gen;
        u64 total_pinned;
+        /* protected by the delalloc lock, used to keep from writing
+         * metadata until there is a nice batch
+         */
+        u64 dirty_metadata_bytes;
        struct list_head dirty_cowonly_roots;
        struct btrfs_fs_devices *fs_devices;
@@ -1704,18 +1748,15 @@ static inline struct dentry *fdentry(struct file *file)
 }
 /* extent-tree.c */
+int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root, unsigned long count);
 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
-int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
-                            struct btrfs_root *root, u64 bytenr,
-                            u64 num_bytes, u32 *refs);
 int btrfs_update_pinned_extents(struct btrfs_root *root,
                                u64 bytenr, u64 num, int pin);
 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root, struct extent_buffer *leaf);
 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root, u64 objectid, u64 bytenr);
-int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
-                         struct btrfs_root *root);
 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
 struct btrfs_block_group_cache *btrfs_lookup_block_group(
                                                 struct btrfs_fs_info *info,
@@ -1777,7 +1818,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                         u64 root_objectid, u64 ref_generation,
                         u64 owner_objectid);
 int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
-                            struct btrfs_root *root, u64 bytenr,
+                            struct btrfs_root *root, u64 bytenr, u64 num_bytes,
                            u64 orig_parent, u64 parent,
                            u64 root_objectid, u64 ref_generation,
                            u64 owner_objectid);
@@ -1838,7 +1879,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
 int btrfs_cow_block(struct btrfs_trans_handle *trans,
                    struct btrfs_root *root, struct extent_buffer *buf,
                    struct extent_buffer *parent, int parent_slot,
-                    struct extent_buffer **cow_ret, u64 prealloc_dest);
+                    struct extent_buffer **cow_ret);
 int btrfs_copy_root(struct btrfs_trans_handle *trans,
                      struct btrfs_root *root,
                      struct extent_buffer *buf,
@@ -2060,7 +2101,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
 unsigned long btrfs_force_ra(struct address_space *mapping,
                              struct file_ra_state *ra, struct file *file,
                              pgoff_t offset, pgoff_t last_index);
-int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page);
+int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 int btrfs_readpage(struct file *file, struct page *page);
 void btrfs_delete_inode(struct inode *inode);
 void btrfs_put_inode(struct inode *inode);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
new file mode 100644
index 000000000000..cbf7dc8ae3ec
--- /dev/null
+++ b/fs/btrfs/delayed-ref.c
@@ -0,0 +1,669 @@
+/*
+ * Copyright (C) 2009 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#include <linux/sched.h>
+#include <linux/sort.h>
+#include <linux/ftrace.h>
+#include "ctree.h"
+#include "delayed-ref.h"
+#include "transaction.h"
+/*
+ * delayed back reference update tracking.  For subvolume trees
+ * we queue up extent allocations and backref maintenance for
+ * delayed processing.   This avoids deep call chains where we
+ * add extents in the middle of btrfs_search_slot, and it allows
+ * us to buffer up frequently modified backrefs in an rb tree instead
+ * of hammering updates on the extent allocation tree.
+ *
+ * Right now this code is only used for reference counted trees, but
+ * the long term goal is to get rid of the similar code for delayed
+ * extent tree modifications.
+ */
+/*
+ * entries in the rb tree are ordered by the byte number of the extent
+ * and by the byte number of the parent block.
+ */
+static int comp_entry(struct btrfs_delayed_ref_node *ref,
+                      u64 bytenr, u64 parent)
+{
+        if (bytenr < ref->bytenr)
+                return -1;
+        if (bytenr > ref->bytenr)
+                return 1;
+        if (parent < ref->parent)
+                return -1;
+        if (parent > ref->parent)
+                return 1;
+        return 0;
+}
+/*
+ * insert a new ref into the rbtree.  This returns any existing refs
+ * for the same (bytenr,parent) tuple, or NULL if the new node was properly
+ * inserted.
+ */
+static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
+                                                  u64 bytenr, u64 parent,
+                                                  struct rb_node *node)
+{
+        struct rb_node **p = &root->rb_node;
+        struct rb_node *parent_node = NULL;
+        struct btrfs_delayed_ref_node *entry;
+        int cmp;
+        while (*p) {
+                parent_node = *p;
+                entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
+                                 rb_node);
+                cmp = comp_entry(entry, bytenr, parent);
+                if (cmp < 0)
+                        p = &(*p)->rb_left;
+                else if (cmp > 0)
+                        p = &(*p)->rb_right;
+                else
+                        return entry;
+        }
+        entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+        rb_link_node(node, parent_node, p);
+        rb_insert_color(node, root);
+        return NULL;
+}
+/*
+ * find an entry based on (bytenr,parent).  This returns the delayed
+ * ref if it was able to find one, or NULL if nothing was in that spot
+ */
+static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
+                                  u64 bytenr, u64 parent,
+                                  struct btrfs_delayed_ref_node **last)
+{
+        struct rb_node *n = root->rb_node;
+        struct btrfs_delayed_ref_node *entry;
+        int cmp;
+        while (n) {
+                entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
+                WARN_ON(!entry->in_tree);
+                if (last)
+                        *last = entry;
+                cmp = comp_entry(entry, bytenr, parent);
+                if (cmp < 0)
+                        n = n->rb_left;
+                else if (cmp > 0)
+                        n = n->rb_right;
+                else
+                        return entry;
+        }
+        return NULL;
+}
+int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
+                           struct btrfs_delayed_ref_head *head)
+{
+        struct btrfs_delayed_ref_root *delayed_refs;
+        delayed_refs = &trans->transaction->delayed_refs;
+        assert_spin_locked(&delayed_refs->lock);
+        if (mutex_trylock(&head->mutex))
+                return 0;
+        atomic_inc(&head->node.refs);
+        spin_unlock(&delayed_refs->lock);
+        mutex_lock(&head->mutex);
+        spin_lock(&delayed_refs->lock);
+        if (!head->node.in_tree) {
+                mutex_unlock(&head->mutex);
+                btrfs_put_delayed_ref(&head->node);
+                return -EAGAIN;
+        }
+        btrfs_put_delayed_ref(&head->node);
+        return 0;
+}
+int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
+                           struct list_head *cluster, u64 start)
+{
+        int count = 0;
+        struct btrfs_delayed_ref_root *delayed_refs;
+        struct rb_node *node;
+        struct btrfs_delayed_ref_node *ref;
+        struct btrfs_delayed_ref_head *head;
+        delayed_refs = &trans->transaction->delayed_refs;
+        if (start == 0) {
+                node = rb_first(&delayed_refs->root);
+        } else {
+                ref = NULL;
+                tree_search(&delayed_refs->root, start, (u64)-1, &ref);
+                if (ref) {
+                        struct btrfs_delayed_ref_node *tmp;
+                        node = rb_prev(&ref->rb_node);
+                        while (node) {
+                                tmp = rb_entry(node,
+                                               struct btrfs_delayed_ref_node,
+                                               rb_node);
+                                if (tmp->bytenr < start)
+                                        break;
+                                ref = tmp;
+                                node = rb_prev(&ref->rb_node);
+                        }
+                        node = &ref->rb_node;
+                } else
+                        node = rb_first(&delayed_refs->root);
+        }
+again:
+        while (node && count < 32) {
+                ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+                if (btrfs_delayed_ref_is_head(ref)) {
+                        head = btrfs_delayed_node_to_head(ref);
+                        if (list_empty(&head->cluster)) {
+                                list_add_tail(&head->cluster, cluster);
+                                delayed_refs->run_delayed_start =
+                                        head->node.bytenr;
+                                count++;
+                                WARN_ON(delayed_refs->num_heads_ready == 0);
+                                delayed_refs->num_heads_ready--;
+                        } else if (count) {
+                                /* the goal of the clustering is to find extents
+                                 * that are likely to end up in the same extent
+                                 * leaf on disk.  So, we don't want them spread
+                                 * all over the tree.  Stop now if we've hit
+                                 * a head that was already in use
+                                 */
+                                break;
+                        }
+                }
+                node = rb_next(node);
+        }
+        if (count) {
+                return 0;
+        } else if (start) {
+                /*
+                 * we've gone to the end of the rbtree without finding any
+                 * clusters.  start from the beginning and try again
+                 */
+                start = 0;
+                node = rb_first(&delayed_refs->root);
+                goto again;
+        }
+        return 1;
+}
+/*
+ * This checks to see if there are any delayed refs in the
+ * btree for a given bytenr.  It returns one if it finds any
+ * and zero otherwise.
+ *
+ * If it only finds a head node, it returns 0.
+ *
+ * The idea is to use this when deciding if you can safely delete an
+ * extent from the extent allocation tree.  There may be a pending
+ * ref in the rbtree that adds or removes references, so as long as this
+ * returns one you need to leave the BTRFS_EXTENT_ITEM in the extent
+ * allocation tree.
+ */
+int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
+{
+        struct btrfs_delayed_ref_node *ref;
+        struct btrfs_delayed_ref_root *delayed_refs;
+        struct rb_node *prev_node;
+        int ret = 0;
+        delayed_refs = &trans->transaction->delayed_refs;
+        spin_lock(&delayed_refs->lock);
+        ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
+        if (ref) {
+                prev_node = rb_prev(&ref->rb_node);
+                if (!prev_node)
+                        goto out;
+                ref = rb_entry(prev_node, struct btrfs_delayed_ref_node,
+                               rb_node);
+                if (ref->bytenr == bytenr)
+                        ret = 1;
+        }
+out:
+        spin_unlock(&delayed_refs->lock);
+        return ret;
+}
+/*
+ * helper function to lookup reference count
+ *
+ * the head node for delayed ref is used to store the sum of all the
+ * reference count modifications queued up in the rbtree.  This way you
+ * can check to see what the reference count would be if all of the
+ * delayed refs are processed.
+ */
+int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root, u64 bytenr,
+                            u64 num_bytes, u32 *refs)
+{
+        struct btrfs_delayed_ref_node *ref;
+        struct btrfs_delayed_ref_head *head;
+        struct btrfs_delayed_ref_root *delayed_refs;
+        struct btrfs_path *path;
+        struct extent_buffer *leaf;
+        struct btrfs_extent_item *ei;
+        struct btrfs_key key;
+        u32 num_refs;
+        int ret;
+        path = btrfs_alloc_path();
+        if (!path)
+                return -ENOMEM;
+        key.objectid = bytenr;
+        key.type = BTRFS_EXTENT_ITEM_KEY;
+        key.offset = num_bytes;
+        delayed_refs = &trans->transaction->delayed_refs;
+again:
+        ret = btrfs_search_slot(trans, root->fs_info->extent_root,
+                                &key, path, 0, 0);
+        if (ret < 0)
+                goto out;
+        if (ret == 0) {
+                leaf = path->nodes[0];
+                ei = btrfs_item_ptr(leaf, path->slots[0],
+                                    struct btrfs_extent_item);
+                num_refs = btrfs_extent_refs(leaf, ei);
+        } else {
+                num_refs = 0;
+                ret = 0;
+        }
+        spin_lock(&delayed_refs->lock);
+        ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
+        if (ref) {
+                head = btrfs_delayed_node_to_head(ref);
+                if (mutex_trylock(&head->mutex)) {
+                        num_refs += ref->ref_mod;
+                        mutex_unlock(&head->mutex);
+                        *refs = num_refs;
+                        goto out;
+                }
+                atomic_inc(&ref->refs);
+                spin_unlock(&delayed_refs->lock);
+                btrfs_release_path(root->fs_info->extent_root, path);
+                mutex_lock(&head->mutex);
+                mutex_unlock(&head->mutex);
+                btrfs_put_delayed_ref(ref);
+                goto again;
+        } else {
+                *refs = num_refs;
+        }
+out:
+        spin_unlock(&delayed_refs->lock);
+        btrfs_free_path(path);
+        return ret;
+}
+/*
+ * helper function to update an extent delayed ref in the
+ * rbtree.  existing and update must both have the same
+ * bytenr and parent
+ *
+ * This may free existing if the update cancels out whatever
+ * operation it was doing.
+ */
+static noinline void
+update_existing_ref(struct btrfs_trans_handle *trans,
+                    struct btrfs_delayed_ref_root *delayed_refs,
+                    struct btrfs_delayed_ref_node *existing,
+                    struct btrfs_delayed_ref_node *update)
+{
+        struct btrfs_delayed_ref *existing_ref;
+        struct btrfs_delayed_ref *ref;
+        existing_ref = btrfs_delayed_node_to_ref(existing);
+        ref = btrfs_delayed_node_to_ref(update);
+        if (ref->pin)
+                existing_ref->pin = 1;
+        if (ref->action != existing_ref->action) {
+                /*
+                 * this is effectively undoing either an add or a
+                 * drop.  We decrement the ref_mod, and if it goes
+                 * down to zero we just delete the entry without
+                 * every changing the extent allocation tree.
+                 */
+                existing->ref_mod--;
+                if (existing->ref_mod == 0) {
+                        rb_erase(&existing->rb_node,
+                                 &delayed_refs->root);
+                        existing->in_tree = 0;
+                        btrfs_put_delayed_ref(existing);
+                        delayed_refs->num_entries--;
+                        if (trans->delayed_ref_updates)
+                                trans->delayed_ref_updates--;
+                }
+        } else {
+                if (existing_ref->action == BTRFS_ADD_DELAYED_REF) {
+                        /* if we're adding refs, make sure all the
+                         * details match up.  The extent could
+                         * have been totally freed and reallocated
+                         * by a different owner before the delayed
+                         * ref entries were removed.
+                         */
+                        existing_ref->owner_objectid = ref->owner_objectid;
+                        existing_ref->generation = ref->generation;
+                        existing_ref->root = ref->root;
+                        existing->num_bytes = update->num_bytes;
+                }
+                /*
+                 * the action on the existing ref matches
+                 * the action on the ref we're trying to add.
+                 * Bump the ref_mod by one so the backref that
+                 * is eventually added/removed has the correct
+                 * reference count
+                 */
+                existing->ref_mod += update->ref_mod;
+        }
+}
+/*
+ * helper function to update the accounting in the head ref
+ * existing and update must have the same bytenr
+ */
+static noinline void
+update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
+                         struct btrfs_delayed_ref_node *update)
+{
+        struct btrfs_delayed_ref_head *existing_ref;
+        struct btrfs_delayed_ref_head *ref;
+        existing_ref = btrfs_delayed_node_to_head(existing);
+        ref = btrfs_delayed_node_to_head(update);
+        if (ref->must_insert_reserved) {
+                /* if the extent was freed and then
+                 * reallocated before the delayed ref
+                 * entries were processed, we can end up
+                 * with an existing head ref without
+                 * the must_insert_reserved flag set.
+                 * Set it again here
+                 */
+                existing_ref->must_insert_reserved = ref->must_insert_reserved;
+                /*
+                 * update the num_bytes so we make sure the accounting
+                 * is done correctly
+                 */
+                existing->num_bytes = update->num_bytes;
+        }
+        /*
+         * update the reference mod on the head to reflect this new operation
+         */
+        existing->ref_mod += update->ref_mod;
+}
+/*
+ * helper function to actually insert a delayed ref into the rbtree.
+ * this does all the dirty work in terms of maintaining the correct
+ * overall modification count in the head node and properly dealing
+ * with updating existing nodes as new modifications are queued.
+ */
+static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
+                          struct btrfs_delayed_ref_node *ref,
+                          u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
+                          u64 ref_generation, u64 owner_objectid, int action,
+                          int pin)
+{
+        struct btrfs_delayed_ref_node *existing;
+        struct btrfs_delayed_ref *full_ref;
+        struct btrfs_delayed_ref_head *head_ref = NULL;
+        struct btrfs_delayed_ref_root *delayed_refs;
+        int count_mod = 1;
+        int must_insert_reserved = 0;
+        /*
+         * the head node stores the sum of all the mods, so dropping a ref
+         * should drop the sum in the head node by one.
+         */
+        if (parent == (u64)-1) {
+                if (action == BTRFS_DROP_DELAYED_REF)
+                        count_mod = -1;
+                else if (action == BTRFS_UPDATE_DELAYED_HEAD)
+                        count_mod = 0;
+        }
+        /*
+         * BTRFS_ADD_DELAYED_EXTENT means that we need to update
+         * the reserved accounting when the extent is finally added, or
+         * if a later modification deletes the delayed ref without ever
+         * inserting the extent into the extent allocation tree.
+         * ref->must_insert_reserved is the flag used to record
+         * that accounting mods are required.
+         *
+         * Once we record must_insert_reserved, switch the action to
+         * BTRFS_ADD_DELAYED_REF because other special casing is not required.
+         */
+        if (action == BTRFS_ADD_DELAYED_EXTENT) {
+                must_insert_reserved = 1;
+                action = BTRFS_ADD_DELAYED_REF;
+        } else {
+                must_insert_reserved = 0;
+        }
+        delayed_refs = &trans->transaction->delayed_refs;
+        /* first set the basic ref node struct up */
+        atomic_set(&ref->refs, 1);
+        ref->bytenr = bytenr;
+        ref->parent = parent;
+        ref->ref_mod = count_mod;
+        ref->in_tree = 1;
+        ref->num_bytes = num_bytes;
+        if (btrfs_delayed_ref_is_head(ref)) {
+                head_ref = btrfs_delayed_node_to_head(ref);
+                head_ref->must_insert_reserved = must_insert_reserved;
+                INIT_LIST_HEAD(&head_ref->cluster);
+                mutex_init(&head_ref->mutex);
+        } else {
+                full_ref = btrfs_delayed_node_to_ref(ref);
+                full_ref->root = ref_root;
+                full_ref->generation = ref_generation;
+                full_ref->owner_objectid = owner_objectid;
+                full_ref->pin = pin;
+                full_ref->action = action;
+        }
+        existing = tree_insert(&delayed_refs->root, bytenr,
+                               parent, &ref->rb_node);
+        if (existing) {
+                if (btrfs_delayed_ref_is_head(ref))
+                        update_existing_head_ref(existing, ref);
+                else
+                        update_existing_ref(trans, delayed_refs, existing, ref);
+                /*
+                 * we've updated the existing ref, free the newly
+                 * allocated ref
+                 */
+                kfree(ref);
+        } else {
+                if (btrfs_delayed_ref_is_head(ref)) {
+                        delayed_refs->num_heads++;
+                        delayed_refs->num_heads_ready++;
+                }
+                delayed_refs->num_entries++;
+                trans->delayed_ref_updates++;
+        }
+        return 0;
+}
+/*
+ * add a delayed ref to the tree.  This does all of the accounting required
+ * to make sure the delayed ref is eventually processed before this
+ * transaction commits.
+ */
+int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
+                          u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
+                          u64 ref_generation, u64 owner_objectid, int action,
+                          int pin)
+{
+        struct btrfs_delayed_ref *ref;
+        struct btrfs_delayed_ref_head *head_ref;
+        struct btrfs_delayed_ref_root *delayed_refs;
+        int ret;
+        ref = kmalloc(sizeof(*ref), GFP_NOFS);
+        if (!ref)
+                return -ENOMEM;
+        /*
+         * the parent = 0 case comes from cases where we don't actually
+         * know the parent yet.  It will get updated later via a add/drop
+         * pair.
+         */
+        if (parent == 0)
+                parent = bytenr;
+        head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
+        if (!head_ref) {
+                kfree(ref);
+                return -ENOMEM;
+        }
+        delayed_refs = &trans->transaction->delayed_refs;
+        spin_lock(&delayed_refs->lock);
+        /*
+         * insert both the head node and the new ref without dropping
+         * the spin lock
+         */
+        ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
+                                      (u64)-1, 0, 0, 0, action, pin);
+        BUG_ON(ret);
+        ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
+                                      parent, ref_root, ref_generation,
+                                      owner_objectid, action, pin);
+        BUG_ON(ret);
+        spin_unlock(&delayed_refs->lock);
+        return 0;
+}
+/*
+ * this does a simple search for the head node for a given extent.
+ * It must be called with the delayed ref spinlock held, and it returns
+ * the head node if any where found, or NULL if not.
+ */
+struct btrfs_delayed_ref_head *
+btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
+{
+        struct btrfs_delayed_ref_node *ref;
+        struct btrfs_delayed_ref_root *delayed_refs;
+        delayed_refs = &trans->transaction->delayed_refs;
+        ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
+        if (ref)
+                return btrfs_delayed_node_to_head(ref);
+        return NULL;
+}
+/*
+ * add a delayed ref to the tree.  This does all of the accounting required
+ * to make sure the delayed ref is eventually processed before this
+ * transaction commits.
+ *
+ * The main point of this call is to add and remove a backreference in a single
+ * shot, taking the lock only once, and only searching for the head node once.
+ *
+ * It is the same as doing a ref add and delete in two separate calls.
+ */
+int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
+                          u64 bytenr, u64 num_bytes, u64 orig_parent,
+                          u64 parent, u64 orig_ref_root, u64 ref_root,
+                          u64 orig_ref_generation, u64 ref_generation,
+                          u64 owner_objectid, int pin)
+{
+        struct btrfs_delayed_ref *ref;
+        struct btrfs_delayed_ref *old_ref;
+        struct btrfs_delayed_ref_head *head_ref;
+        struct btrfs_delayed_ref_root *delayed_refs;
+        int ret;
+        ref = kmalloc(sizeof(*ref), GFP_NOFS);
+        if (!ref)
+                return -ENOMEM;
+        old_ref = kmalloc(sizeof(*old_ref), GFP_NOFS);
+        if (!old_ref) {
+                kfree(ref);
+                return -ENOMEM;
+        }
+        /*
+         * the parent = 0 case comes from cases where we don't actually
+         * know the parent yet.  It will get updated later via a add/drop
+         * pair.
+         */
+        if (parent == 0)
+                parent = bytenr;
+        if (orig_parent == 0)
+                orig_parent = bytenr;
+        head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
+        if (!head_ref) {
+                kfree(ref);
+                kfree(old_ref);
+                return -ENOMEM;
+        }
+        delayed_refs = &trans->transaction->delayed_refs;
+        spin_lock(&delayed_refs->lock);
+        /*
+         * insert both the head node and the new ref without dropping
+         * the spin lock
+         */
+        ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
+                                      (u64)-1, 0, 0, 0,
+                                      BTRFS_UPDATE_DELAYED_HEAD, 0);
+        BUG_ON(ret);
+        ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
+                                      parent, ref_root, ref_generation,
+                                      owner_objectid, BTRFS_ADD_DELAYED_REF, 0);
+        BUG_ON(ret);
+        ret = __btrfs_add_delayed_ref(trans, &old_ref->node, bytenr, num_bytes,
+                                      orig_parent, orig_ref_root,
+                                      orig_ref_generation, owner_objectid,
+                                      BTRFS_DROP_DELAYED_REF, pin);
+        BUG_ON(ret);
+        spin_unlock(&delayed_refs->lock);
+        return 0;
+}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
new file mode 100644
index 000000000000..3bec2ff0b15c
--- /dev/null
+++ b/fs/btrfs/delayed-ref.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2008 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#ifndef __DELAYED_REF__
+#define __DELAYED_REF__
+/* these are the possible values of struct btrfs_delayed_ref->action */
+#define BTRFS_ADD_DELAYED_REF    1 /* add one backref to the tree */
+#define BTRFS_DROP_DELAYED_REF   2 /* delete one backref from the tree */
+#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
+#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
+struct btrfs_delayed_ref_node {
+        struct rb_node rb_node;
+        /* the starting bytenr of the extent */
+        u64 bytenr;
+        /* the parent our backref will point to */
+        u64 parent;
+        /* the size of the extent */
+        u64 num_bytes;
+        /* ref count on this data structure */
+        atomic_t refs;
+        /*
+         * how many refs is this entry adding or deleting.  For
+         * head refs, this may be a negative number because it is keeping
+         * track of the total mods done to the reference count.
+         * For individual refs, this will always be a positive number
+         *
+         * It may be more than one, since it is possible for a single
+         * parent to have more than one ref on an extent
+         */
+        int ref_mod;
+        /* is this node still in the rbtree? */
+        unsigned int in_tree:1;
+};
+/*
+ * the head refs are used to hold a lock on a given extent, which allows us
+ * to make sure that only one process is running the delayed refs
+ * at a time for a single extent.  They also store the sum of all the
+ * reference count modifications we've queued up.
+ */
+struct btrfs_delayed_ref_head {
+        struct btrfs_delayed_ref_node node;
+        /*
+         * the mutex is held while running the refs, and it is also
+         * held when checking the sum of reference modifications.
+         */
+        struct mutex mutex;
+        struct list_head cluster;
+        /*
+         * when a new extent is allocated, it is just reserved in memory
+         * The actual extent isn't inserted into the extent allocation tree
+         * until the delayed ref is processed.  must_insert_reserved is
+         * used to flag a delayed ref so the accounting can be updated
+         * when a full insert is done.
+         *
+         * It is possible the extent will be freed before it is ever
+         * inserted into the extent allocation tree.  In this case
+         * we need to update the in ram accounting to properly reflect
+         * the free has happened.
+         */
+        unsigned int must_insert_reserved:1;
+};
+struct btrfs_delayed_ref {
+        struct btrfs_delayed_ref_node node;
+        /* the root objectid our ref will point to */
+        u64 root;
+        /* the generation for the backref */
+        u64 generation;
+        /* owner_objectid of the backref  */
+        u64 owner_objectid;
+        /* operation done by this entry in the rbtree */
+        u8 action;
+        /* if pin == 1, when the extent is freed it will be pinned until
+         * transaction commit
+         */
+        unsigned int pin:1;
+};
+struct btrfs_delayed_ref_root {
+        struct rb_root root;
+        /* this spin lock protects the rbtree and the entries inside */
+        spinlock_t lock;
+        /* how many delayed ref updates we've queued, used by the
+         * throttling code
+         */
+        unsigned long num_entries;
+        /* total number of head nodes in tree */
+        unsigned long num_heads;
+        /* total number of head nodes ready for processing */
+        unsigned long num_heads_ready;
+        /*
+         * set when the tree is flushing before a transaction commit,
+         * used by the throttling code to decide if new updates need
+         * to be run right away
+         */
+        int flushing;
+        u64 run_delayed_start;
+};
+static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
+{
+        WARN_ON(atomic_read(&ref->refs) == 0);
+        if (atomic_dec_and_test(&ref->refs)) {
+                WARN_ON(ref->in_tree);
+                kfree(ref);
+        }
+}
+int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
+                          u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
+                          u64 ref_generation, u64 owner_objectid, int action,
+                          int pin);
+struct btrfs_delayed_ref_head *
+btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
+int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr);
+int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root, u64 bytenr,
+                            u64 num_bytes, u32 *refs);
+int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
+                          u64 bytenr, u64 num_bytes, u64 orig_parent,
+                          u64 parent, u64 orig_ref_root, u64 ref_root,
+                          u64 orig_ref_generation, u64 ref_generation,
+                          u64 owner_objectid, int pin);
+int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
+                           struct btrfs_delayed_ref_head *head);
+int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
+                           struct list_head *cluster, u64 search_start);
+/*
+ * a node might live in a head or a regular ref, this lets you
+ * test for the proper type to use.
+ */
+static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node)
+{
+        return node->parent == (u64)-1;
+}
+/*
+ * helper functions to cast a node into its container
+ */
+static inline struct btrfs_delayed_ref *
+btrfs_delayed_node_to_ref(struct btrfs_delayed_ref_node *node)
+{
+        WARN_ON(btrfs_delayed_ref_is_head(node));
+        return container_of(node, struct btrfs_delayed_ref, node);
+}
+static inline struct btrfs_delayed_ref_head *
+btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node)
+{
+        WARN_ON(!btrfs_delayed_ref_is_head(node));
+        return container_of(node, struct btrfs_delayed_ref_head, node);
+}
+#endif
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 926a0b287a7d..1d70236ba00c 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -145,7 +145,10 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
        key.objectid = dir;
        btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
        key.offset = btrfs_name_hash(name, name_len);
        path = btrfs_alloc_path();
+        path->leave_spinning = 1;
        data_size = sizeof(*dir_item) + name_len;
        dir_item = insert_with_overflow(trans, root, path, &key, data_size,
                                        name, name_len);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6ec80c0fc869..92d73929d381 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -668,14 +668,31 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 static int btree_writepage(struct page *page, struct writeback_control *wbc)
 {
        struct extent_io_tree *tree;
+        struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
+        struct extent_buffer *eb;
+        int was_dirty;
        tree = &BTRFS_I(page->mapping->host)->io_tree;
+        if (!(current->flags & PF_MEMALLOC)) {
+                return extent_write_full_page(tree, page,
+                                              btree_get_extent, wbc);
+        }
-        if (current->flags & PF_MEMALLOC) {
+        redirty_page_for_writepage(wbc, page);
-                redirty_page_for_writepage(wbc, page);
+        eb = btrfs_find_tree_block(root, page_offset(page),
-                unlock_page(page);
+                                      PAGE_CACHE_SIZE);
-                return 0;
+        WARN_ON(!eb);
+        was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
+        if (!was_dirty) {
+                spin_lock(&root->fs_info->delalloc_lock);
+                root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE;
+                spin_unlock(&root->fs_info->delalloc_lock);
        }
-        return extent_write_full_page(tree, page, btree_get_extent, wbc);
+        free_extent_buffer(eb);
+        unlock_page(page);
+        return 0;
 }
 static int btree_writepages(struct address_space *mapping,
@@ -684,15 +701,15 @@ static int btree_writepages(struct address_space *mapping,
        struct extent_io_tree *tree;
        tree = &BTRFS_I(mapping->host)->io_tree;
        if (wbc->sync_mode == WB_SYNC_NONE) {
+                struct btrfs_root *root = BTRFS_I(mapping->host)->root;
                u64 num_dirty;
-                u64 start = 0;
                unsigned long thresh = 32 * 1024 * 1024;
                if (wbc->for_kupdate)
                        return 0;
-                num_dirty = count_range_bits(tree, &start, (u64)-1,
+                /* this is a bit racy, but that's ok */
-                                             thresh, EXTENT_DIRTY);
+                num_dirty = root->fs_info->dirty_metadata_bytes;
                if (num_dirty < thresh)
                        return 0;
        }
@@ -859,9 +876,17 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
            root->fs_info->running_transaction->transid) {
                btrfs_assert_tree_locked(buf);
-                /* ugh, clear_extent_buffer_dirty can be expensive */
+                if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
-                btrfs_set_lock_blocking(buf);
+                        spin_lock(&root->fs_info->delalloc_lock);
+                        if (root->fs_info->dirty_metadata_bytes >= buf->len)
+                                root->fs_info->dirty_metadata_bytes -= buf->len;
+                        else
+                                WARN_ON(1);
+                        spin_unlock(&root->fs_info->delalloc_lock);
+                }
+                /* ugh, clear_extent_buffer_dirty needs to lock the page */
+                btrfs_set_lock_blocking(buf);
                clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
                                          buf);
        }
@@ -1471,12 +1496,6 @@ static int transaction_kthread(void *arg)
                vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
                mutex_lock(&root->fs_info->transaction_kthread_mutex);
-                if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) {
-                        printk(KERN_INFO "btrfs: total reference cache "
-                               "size %llu\n",
-                               root->fs_info->total_ref_cache_size);
-                }
                mutex_lock(&root->fs_info->trans_mutex);
                cur = root->fs_info->running_transaction;
                if (!cur) {
@@ -1493,6 +1512,7 @@ static int transaction_kthread(void *arg)
                mutex_unlock(&root->fs_info->trans_mutex);
                trans = btrfs_start_transaction(root, 1);
                ret = btrfs_commit_transaction(trans, root);
 sleep:
                wake_up_process(root->fs_info->cleaner_kthread);
                mutex_unlock(&root->fs_info->transaction_kthread_mutex);
@@ -1552,6 +1572,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        INIT_LIST_HEAD(&fs_info->dead_roots);
        INIT_LIST_HEAD(&fs_info->hashers);
        INIT_LIST_HEAD(&fs_info->delalloc_inodes);
+        INIT_LIST_HEAD(&fs_info->ordered_operations);
        spin_lock_init(&fs_info->delalloc_lock);
        spin_lock_init(&fs_info->new_trans_lock);
        spin_lock_init(&fs_info->ref_cache_lock);
@@ -1611,10 +1632,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        extent_io_tree_init(&fs_info->pinned_extents,
                             fs_info->btree_inode->i_mapping, GFP_NOFS);
-        extent_io_tree_init(&fs_info->pending_del,
-                             fs_info->btree_inode->i_mapping, GFP_NOFS);
-        extent_io_tree_init(&fs_info->extent_ins,
-                             fs_info->btree_inode->i_mapping, GFP_NOFS);
        fs_info->do_barriers = 1;
        INIT_LIST_HEAD(&fs_info->dead_reloc_roots);
@@ -1627,9 +1644,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        insert_inode_hash(fs_info->btree_inode);
        mutex_init(&fs_info->trans_mutex);
+        mutex_init(&fs_info->ordered_operations_mutex);
        mutex_init(&fs_info->tree_log_mutex);
        mutex_init(&fs_info->drop_mutex);
-        mutex_init(&fs_info->extent_ins_mutex);
        mutex_init(&fs_info->pinned_mutex);
        mutex_init(&fs_info->chunk_mutex);
        mutex_init(&fs_info->transaction_kthread_mutex);
@@ -2358,8 +2375,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
        struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
        u64 transid = btrfs_header_generation(buf);
        struct inode *btree_inode = root->fs_info->btree_inode;
+        int was_dirty;
-        btrfs_set_lock_blocking(buf);
        btrfs_assert_tree_locked(buf);
        if (transid != root->fs_info->generation) {
@@ -2370,7 +2386,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
                        (unsigned long long)root->fs_info->generation);
                WARN_ON(1);
        }
-        set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf);
+        was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
+                                            buf);
+        if (!was_dirty) {
+                spin_lock(&root->fs_info->delalloc_lock);
+                root->fs_info->dirty_metadata_bytes += buf->len;
+                spin_unlock(&root->fs_info->delalloc_lock);
+        }
 }
 void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
@@ -2410,6 +2432,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
 int btree_lock_page_hook(struct page *page)
 {
        struct inode *inode = page->mapping->host;
+        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct extent_buffer *eb;
        unsigned long len;
@@ -2425,6 +2448,16 @@ int btree_lock_page_hook(struct page *page)
        btrfs_tree_lock(eb);
        btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
+        if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
+                spin_lock(&root->fs_info->delalloc_lock);
+                if (root->fs_info->dirty_metadata_bytes >= eb->len)
+                        root->fs_info->dirty_metadata_bytes -= eb->len;
+                else
+                        WARN_ON(1);
+                spin_unlock(&root->fs_info->delalloc_lock);
+        }
        btrfs_tree_unlock(eb);
        free_extent_buffer(eb);
 out:
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 95029db227be..c958ecbc1916 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -72,6 +72,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root,
 void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
 int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
 void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
+void btrfs_mark_buffer_dirty_nonblocking(struct extent_buffer *buf);
 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
 int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
 int wait_on_tree_block_writeback(struct btrfs_root *root,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fefe83ad2059..f5e7cae63d80 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -49,17 +49,23 @@ struct pending_extent_op {
        int del;
 };
-static int finish_current_insert(struct btrfs_trans_handle *trans,
+static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
-                                 struct btrfs_root *extent_root, int all);
+                                         struct btrfs_root *root, u64 parent,
-static int del_pending_extents(struct btrfs_trans_handle *trans,
+                                         u64 root_objectid, u64 ref_generation,
-                               struct btrfs_root *extent_root, int all);
+                                         u64 owner, struct btrfs_key *ins,
-static int pin_down_bytes(struct btrfs_trans_handle *trans,
+                                         int ref_mod);
-                          struct btrfs_root *root,
+static int update_reserved_extents(struct btrfs_root *root,
-                          u64 bytenr, u64 num_bytes, int is_data);
+                                   u64 bytenr, u64 num, int reserve);
 static int update_block_group(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              u64 bytenr, u64 num_bytes, int alloc,
                              int mark_free);
+static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans,
+                                        struct btrfs_root *root,
+                                        u64 bytenr, u64 num_bytes, u64 parent,
+                                        u64 root_objectid, u64 ref_generation,
+                                        u64 owner_objectid, int pin,
+                                        int ref_to_drop);
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
                          struct btrfs_root *extent_root, u64 alloc_bytes,
@@ -554,262 +560,13 @@ out:
        return ret;
 }
-/*
- * updates all the backrefs that are pending on update_list for the
- * extent_root
- */
-static noinline int update_backrefs(struct btrfs_trans_handle *trans,
-                                    struct btrfs_root *extent_root,
-                                    struct btrfs_path *path,
-                                    struct list_head *update_list)
-{
-        struct btrfs_key key;
-        struct btrfs_extent_ref *ref;
-        struct btrfs_fs_info *info = extent_root->fs_info;
-        struct pending_extent_op *op;
-        struct extent_buffer *leaf;
-        int ret = 0;
-        struct list_head *cur = update_list->next;
-        u64 ref_objectid;
-        u64 ref_root = extent_root->root_key.objectid;
-        op = list_entry(cur, struct pending_extent_op, list);
-search:
-        key.objectid = op->bytenr;
-        key.type = BTRFS_EXTENT_REF_KEY;
-        key.offset = op->orig_parent;
-        ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
-        BUG_ON(ret);
-        leaf = path->nodes[0];
-loop:
-        ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
-        ref_objectid = btrfs_ref_objectid(leaf, ref);
-        if (btrfs_ref_root(leaf, ref) != ref_root ||
-            btrfs_ref_generation(leaf, ref) != op->orig_generation ||
-            (ref_objectid != op->level &&
-             ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
-                printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, "
-                       "root %llu, owner %u\n",
-                       (unsigned long long)op->bytenr,
-                       (unsigned long long)op->orig_parent,
-                       (unsigned long long)ref_root, op->level);
-                btrfs_print_leaf(extent_root, leaf);
-                BUG();
-        }
-        key.objectid = op->bytenr;
-        key.offset = op->parent;
-        key.type = BTRFS_EXTENT_REF_KEY;
-        ret = btrfs_set_item_key_safe(trans, extent_root, path, &key);
-        BUG_ON(ret);
-        ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
-        btrfs_set_ref_generation(leaf, ref, op->generation);
-        cur = cur->next;
-        list_del_init(&op->list);
-        unlock_extent(&info->extent_ins, op->bytenr,
-                      op->bytenr + op->num_bytes - 1, GFP_NOFS);
-        kfree(op);
-        if (cur == update_list) {
-                btrfs_mark_buffer_dirty(path->nodes[0]);
-                btrfs_release_path(extent_root, path);
-                goto out;
-        }
-        op = list_entry(cur, struct pending_extent_op, list);
-        path->slots[0]++;
-        while (path->slots[0] < btrfs_header_nritems(leaf)) {
-                btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-                if (key.objectid == op->bytenr &&
-                    key.type == BTRFS_EXTENT_REF_KEY)
-                        goto loop;
-                path->slots[0]++;
-        }
-        btrfs_mark_buffer_dirty(path->nodes[0]);
-        btrfs_release_path(extent_root, path);
-        goto search;
-out:
-        return 0;
-}
-static noinline int insert_extents(struct btrfs_trans_handle *trans,
-                                   struct btrfs_root *extent_root,
-                                   struct btrfs_path *path,
-                                   struct list_head *insert_list, int nr)
-{
-        struct btrfs_key *keys;
-        u32 *data_size;
-        struct pending_extent_op *op;
-        struct extent_buffer *leaf;
-        struct list_head *cur = insert_list->next;
-        struct btrfs_fs_info *info = extent_root->fs_info;
-        u64 ref_root = extent_root->root_key.objectid;
-        int i = 0, last = 0, ret;
-        int total = nr * 2;
-        if (!nr)
-                return 0;
-        keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS);
-        if (!keys)
-                return -ENOMEM;
-        data_size = kzalloc(total * sizeof(u32), GFP_NOFS);
-        if (!data_size) {
-                kfree(keys);
-                return -ENOMEM;
-        }
-        list_for_each_entry(op, insert_list, list) {
-                keys[i].objectid = op->bytenr;
-                keys[i].offset = op->num_bytes;
-                keys[i].type = BTRFS_EXTENT_ITEM_KEY;
-                data_size[i] = sizeof(struct btrfs_extent_item);
-                i++;
-                keys[i].objectid = op->bytenr;
-                keys[i].offset = op->parent;
-                keys[i].type = BTRFS_EXTENT_REF_KEY;
-                data_size[i] = sizeof(struct btrfs_extent_ref);
-                i++;
-        }
-        op = list_entry(cur, struct pending_extent_op, list);
-        i = 0;
-        while (i < total) {
-                int c;
-                ret = btrfs_insert_some_items(trans, extent_root, path,
-                                              keys+i, data_size+i, total-i);
-                BUG_ON(ret < 0);
-                if (last && ret > 1)
-                        BUG();
-                leaf = path->nodes[0];
-                for (c = 0; c < ret; c++) {
-                        int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY;
-                        /*
-                         * if the first item we inserted was a backref, then
-                         * the EXTENT_ITEM will be the odd c's, else it will
-                         * be the even c's
-                         */
-                        if ((ref_first && (c % 2)) ||
-                            (!ref_first && !(c % 2))) {
-                                struct btrfs_extent_item *itm;
-                                itm = btrfs_item_ptr(leaf, path->slots[0] + c,
-                                                     struct btrfs_extent_item);
-                                btrfs_set_extent_refs(path->nodes[0], itm, 1);
-                                op->del++;
-                        } else {
-                                struct btrfs_extent_ref *ref;
-                                ref = btrfs_item_ptr(leaf, path->slots[0] + c,
-                                                     struct btrfs_extent_ref);
-                                btrfs_set_ref_root(leaf, ref, ref_root);
-                                btrfs_set_ref_generation(leaf, ref,
-                                                         op->generation);
-                                btrfs_set_ref_objectid(leaf, ref, op->level);
-                                btrfs_set_ref_num_refs(leaf, ref, 1);
-                                op->del++;
-                        }
-                        /*
-                         * using del to see when its ok to free up the
-                         * pending_extent_op.  In the case where we insert the
-                         * last item on the list in order to help do batching
-                         * we need to not free the extent op until we actually
-                         * insert the extent_item
-                         */
-                        if (op->del == 2) {
-                                unlock_extent(&info->extent_ins, op->bytenr,
-                                              op->bytenr + op->num_bytes - 1,
-                                              GFP_NOFS);
-                                cur = cur->next;
-                                list_del_init(&op->list);
-                                kfree(op);
-                                if (cur != insert_list)
-                                        op = list_entry(cur,
-                                                struct pending_extent_op,
-                                                list);
-                        }
-                }
-                btrfs_mark_buffer_dirty(leaf);
-                btrfs_release_path(extent_root, path);
-                /*
-                 * Ok backref's and items usually go right next to eachother,
-                 * but if we could only insert 1 item that means that we
-                 * inserted on the end of a leaf, and we have no idea what may
-                 * be on the next leaf so we just play it safe.  In order to
-                 * try and help this case we insert the last thing on our
-                 * insert list so hopefully it will end up being the last
-                 * thing on the leaf and everything else will be before it,
-                 * which will let us insert a whole bunch of items at the same
-                 * time.
-                 */
-                if (ret == 1 && !last && (i + ret < total)) {
-                        /*
-                         * last: where we will pick up the next time around
-                         * i: our current key to insert, will be total - 1
-                         * cur: the current op we are screwing with
-                         * op: duh
-                         */
-                        last = i + ret;
-                        i = total - 1;
-                        cur = insert_list->prev;
-                        op = list_entry(cur, struct pending_extent_op, list);
-                } else if (last) {
-                        /*
-                         * ok we successfully inserted the last item on the
-                         * list, lets reset everything
-                         *
-                         * i: our current key to insert, so where we left off
-                         *    last time
-                         * last: done with this
-                         * cur: the op we are messing with
-                         * op: duh
-                         * total: since we inserted the last key, we need to
-                         *        decrement total so we dont overflow
-                         */
-                        i = last;
-                        last = 0;
-                        total--;
-                        if (i < total) {
-                                cur = insert_list->next;
-                                op = list_entry(cur, struct pending_extent_op,
-                                                list);
-                        }
-                } else {
-                        i += ret;
-                }
-                cond_resched();
-        }
-        ret = 0;
-        kfree(keys);
-        kfree(data_size);
-        return ret;
-}
 static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
                                          struct btrfs_root *root,
                                          struct btrfs_path *path,
                                          u64 bytenr, u64 parent,
                                          u64 ref_root, u64 ref_generation,
-                                          u64 owner_objectid)
+                                          u64 owner_objectid,
+                                          int refs_to_add)
 {
        struct btrfs_key key;
        struct extent_buffer *leaf;
@@ -829,9 +586,10 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
                btrfs_set_ref_root(leaf, ref, ref_root);
                btrfs_set_ref_generation(leaf, ref, ref_generation);
                btrfs_set_ref_objectid(leaf, ref, owner_objectid);
-                btrfs_set_ref_num_refs(leaf, ref, 1);
+                btrfs_set_ref_num_refs(leaf, ref, refs_to_add);
        } else if (ret == -EEXIST) {
                u64 existing_owner;
                BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
                leaf = path->nodes[0];
                ref = btrfs_item_ptr(leaf, path->slots[0],
@@ -845,7 +603,7 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
                num_refs = btrfs_ref_num_refs(leaf, ref);
                BUG_ON(num_refs == 0);
-                btrfs_set_ref_num_refs(leaf, ref, num_refs + 1);
+                btrfs_set_ref_num_refs(leaf, ref, num_refs + refs_to_add);
                existing_owner = btrfs_ref_objectid(leaf, ref);
                if (existing_owner != owner_objectid &&
@@ -857,6 +615,7 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
        } else {
                goto out;
        }
+        btrfs_unlock_up_safe(path, 1);
        btrfs_mark_buffer_dirty(path->nodes[0]);
 out:
        btrfs_release_path(root, path);
@@ -865,7 +624,8 @@ out:
 static noinline int remove_extent_backref(struct btrfs_trans_handle *trans,
                                          struct btrfs_root *root,
-                                          struct btrfs_path *path)
+                                          struct btrfs_path *path,
+                                          int refs_to_drop)
 {
        struct extent_buffer *leaf;
        struct btrfs_extent_ref *ref;
@@ -875,8 +635,8 @@ static noinline int remove_extent_backref(struct btrfs_trans_handle *trans,
        leaf = path->nodes[0];
        ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
        num_refs = btrfs_ref_num_refs(leaf, ref);
-        BUG_ON(num_refs == 0);
+        BUG_ON(num_refs < refs_to_drop);
-        num_refs -= 1;
+        num_refs -= refs_to_drop;
        if (num_refs == 0) {
                ret = btrfs_del_item(trans, root, path);
        } else {
@@ -927,332 +687,28 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
 #endif
 }
-static noinline int free_extents(struct btrfs_trans_handle *trans,
-                                 struct btrfs_root *extent_root,
-                                 struct list_head *del_list)
-{
-        struct btrfs_fs_info *info = extent_root->fs_info;
-        struct btrfs_path *path;
-        struct btrfs_key key, found_key;
-        struct extent_buffer *leaf;
-        struct list_head *cur;
-        struct pending_extent_op *op;
-        struct btrfs_extent_item *ei;
-        int ret, num_to_del, extent_slot = 0, found_extent = 0;
-        u32 refs;
-        u64 bytes_freed = 0;
-        path = btrfs_alloc_path();
-        if (!path)
-                return -ENOMEM;
-        path->reada = 1;
-search:
-        /* search for the backref for the current ref we want to delete */
-        cur = del_list->next;
-        op = list_entry(cur, struct pending_extent_op, list);
-        ret = lookup_extent_backref(trans, extent_root, path, op->bytenr,
-                                    op->orig_parent,
-                                    extent_root->root_key.objectid,
-                                    op->orig_generation, op->level, 1);
-        if (ret) {
-                printk(KERN_ERR "btrfs unable to find backref byte nr %llu "
-                       "root %llu gen %llu owner %u\n",
-                       (unsigned long long)op->bytenr,
-                       (unsigned long long)extent_root->root_key.objectid,
-                       (unsigned long long)op->orig_generation, op->level);
-                btrfs_print_leaf(extent_root, path->nodes[0]);
-                WARN_ON(1);
-                goto out;
-        }
-        extent_slot = path->slots[0];
-        num_to_del = 1;
-        found_extent = 0;
-        /*
-         * if we aren't the first item on the leaf we can move back one and see
-         * if our ref is right next to our extent item
-         */
-        if (likely(extent_slot)) {
-                extent_slot--;
-                btrfs_item_key_to_cpu(path->nodes[0], &found_key,
-                                      extent_slot);
-                if (found_key.objectid == op->bytenr &&
-                    found_key.type == BTRFS_EXTENT_ITEM_KEY &&
-                    found_key.offset == op->num_bytes) {
-                        num_to_del++;
-                        found_extent = 1;
-                }
-        }
-        /*
-         * if we didn't find the extent we need to delete the backref and then
-         * search for the extent item key so we can update its ref count
-         */
-        if (!found_extent) {
-                key.objectid = op->bytenr;
-                key.type = BTRFS_EXTENT_ITEM_KEY;
-                key.offset = op->num_bytes;
-                ret = remove_extent_backref(trans, extent_root, path);
-                BUG_ON(ret);
-                btrfs_release_path(extent_root, path);
-                ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
-                BUG_ON(ret);
-                extent_slot = path->slots[0];
-        }
-        /* this is where we update the ref count for the extent */
-        leaf = path->nodes[0];
-        ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item);
-        refs = btrfs_extent_refs(leaf, ei);
-        BUG_ON(refs == 0);
-        refs--;
-        btrfs_set_extent_refs(leaf, ei, refs);
-        btrfs_mark_buffer_dirty(leaf);
-        /*
-         * This extent needs deleting.  The reason cur_slot is extent_slot +
-         * num_to_del is because extent_slot points to the slot where the extent
-         * is, and if the backref was not right next to the extent we will be
-         * deleting at least 1 item, and will want to start searching at the
-         * slot directly next to extent_slot.  However if we did find the
-         * backref next to the extent item them we will be deleting at least 2
-         * items and will want to start searching directly after the ref slot
-         */
-        if (!refs) {
-                struct list_head *pos, *n, *end;
-                int cur_slot = extent_slot+num_to_del;
-                u64 super_used;
-                u64 root_used;
-                path->slots[0] = extent_slot;
-                bytes_freed = op->num_bytes;
-                mutex_lock(&info->pinned_mutex);
-                ret = pin_down_bytes(trans, extent_root, op->bytenr,
-                                     op->num_bytes, op->level >=
-                                     BTRFS_FIRST_FREE_OBJECTID);
-                mutex_unlock(&info->pinned_mutex);
-                BUG_ON(ret < 0);
-                op->del = ret;
-                /*
-                 * we need to see if we can delete multiple things at once, so
-                 * start looping through the list of extents we are wanting to
-                 * delete and see if their extent/backref's are right next to
-                 * eachother and the extents only have 1 ref
-                 */
-                for (pos = cur->next; pos != del_list; pos = pos->next) {
-                        struct pending_extent_op *tmp;
-                        tmp = list_entry(pos, struct pending_extent_op, list);
-                        /* we only want to delete extent+ref at this stage */
-                        if (cur_slot >= btrfs_header_nritems(leaf) - 1)
-                                break;
-                        btrfs_item_key_to_cpu(leaf, &found_key, cur_slot);
-                        if (found_key.objectid != tmp->bytenr ||
-                            found_key.type != BTRFS_EXTENT_ITEM_KEY ||
-                            found_key.offset != tmp->num_bytes)
-                                break;
-                        /* check to make sure this extent only has one ref */
-                        ei = btrfs_item_ptr(leaf, cur_slot,
-                                            struct btrfs_extent_item);
-                        if (btrfs_extent_refs(leaf, ei) != 1)
-                                break;
-                        btrfs_item_key_to_cpu(leaf, &found_key, cur_slot+1);
-                        if (found_key.objectid != tmp->bytenr ||
-                            found_key.type != BTRFS_EXTENT_REF_KEY ||
-                            found_key.offset != tmp->orig_parent)
-                                break;
-                        /*
-                         * the ref is right next to the extent, we can set the
-                         * ref count to 0 since we will delete them both now
-                         */
-                        btrfs_set_extent_refs(leaf, ei, 0);
-                        /* pin down the bytes for this extent */
-                        mutex_lock(&info->pinned_mutex);
-                        ret = pin_down_bytes(trans, extent_root, tmp->bytenr,
-                                             tmp->num_bytes, tmp->level >=
-                                             BTRFS_FIRST_FREE_OBJECTID);
-                        mutex_unlock(&info->pinned_mutex);
-                        BUG_ON(ret < 0);
-                        /*
-                         * use the del field to tell if we need to go ahead and
-                         * free up the extent when we delete the item or not.
-                         */
-                        tmp->del = ret;
-                        bytes_freed += tmp->num_bytes;
-                        num_to_del += 2;
-                        cur_slot += 2;
-                }
-                end = pos;
-                /* update the free space counters */
-                spin_lock(&info->delalloc_lock);
-                super_used = btrfs_super_bytes_used(&info->super_copy);
-                btrfs_set_super_bytes_used(&info->super_copy,
-                                           super_used - bytes_freed);
-                root_used = btrfs_root_used(&extent_root->root_item);
-                btrfs_set_root_used(&extent_root->root_item,
-                                    root_used - bytes_freed);
-                spin_unlock(&info->delalloc_lock);
-                /* delete the items */
-                ret = btrfs_del_items(trans, extent_root, path,
-                                      path->slots[0], num_to_del);
-                BUG_ON(ret);
-                /*
-                 * loop through the extents we deleted and do the cleanup work
-                 * on them
-                 */
-                for (pos = cur, n = pos->next; pos != end;
-                     pos = n, n = pos->next) {
-                        struct pending_extent_op *tmp;
-                        tmp = list_entry(pos, struct pending_extent_op, list);
-                        /*
-                         * remember tmp->del tells us wether or not we pinned
-                         * down the extent
-                         */
-                        ret = update_block_group(trans, extent_root,
-                                                 tmp->bytenr, tmp->num_bytes, 0,
-                                                 tmp->del);
-                        BUG_ON(ret);
-                        list_del_init(&tmp->list);
-                        unlock_extent(&info->extent_ins, tmp->bytenr,
-                                      tmp->bytenr + tmp->num_bytes - 1,
-                                      GFP_NOFS);
-                        kfree(tmp);
-                }
-        } else if (refs && found_extent) {
-                /*
-                 * the ref and extent were right next to eachother, but the
-                 * extent still has a ref, so just free the backref and keep
-                 * going
-                 */
-                ret = remove_extent_backref(trans, extent_root, path);
-                BUG_ON(ret);
-                list_del_init(&op->list);
-                unlock_extent(&info->extent_ins, op->bytenr,
-                              op->bytenr + op->num_bytes - 1, GFP_NOFS);
-                kfree(op);
-        } else {
-                /*
-                 * the extent has multiple refs and the backref we were looking
-                 * for was not right next to it, so just unlock and go next,
-                 * we're good to go
-                 */
-                list_del_init(&op->list);
-                unlock_extent(&info->extent_ins, op->bytenr,
-                              op->bytenr + op->num_bytes - 1, GFP_NOFS);
-                kfree(op);
-        }
-        btrfs_release_path(extent_root, path);
-        if (!list_empty(del_list))
-                goto search;
-out:
-        btrfs_free_path(path);
-        return ret;
-}
 static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root, u64 bytenr,
+                                     u64 num_bytes,
                                     u64 orig_parent, u64 parent,
                                     u64 orig_root, u64 ref_root,
                                     u64 orig_generation, u64 ref_generation,
                                     u64 owner_objectid)
 {
        int ret;
-        struct btrfs_root *extent_root = root->fs_info->extent_root;
+        int pin = owner_objectid < BTRFS_FIRST_FREE_OBJECTID;
-        struct btrfs_path *path;
-        if (root == root->fs_info->extent_root) {
-                struct pending_extent_op *extent_op;
-                u64 num_bytes;
-                BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL);
-                num_bytes = btrfs_level_size(root, (int)owner_objectid);
-                mutex_lock(&root->fs_info->extent_ins_mutex);
-                if (test_range_bit(&root->fs_info->extent_ins, bytenr,
-                                bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) {
-                        u64 priv;
-                        ret = get_state_private(&root->fs_info->extent_ins,
-                                                bytenr, &priv);
-                        BUG_ON(ret);
-                        extent_op = (struct pending_extent_op *)
-                                                        (unsigned long)priv;
-                        BUG_ON(extent_op->parent != orig_parent);
-                        BUG_ON(extent_op->generation != orig_generation);
-                        extent_op->parent = parent;
+        ret = btrfs_update_delayed_ref(trans, bytenr, num_bytes,
-                        extent_op->generation = ref_generation;
+                                       orig_parent, parent, orig_root,
-                } else {
+                                       ref_root, orig_generation,
-                        extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
+                                       ref_generation, owner_objectid, pin);
-                        BUG_ON(!extent_op);
-                        extent_op->type = PENDING_BACKREF_UPDATE;
-                        extent_op->bytenr = bytenr;
-                        extent_op->num_bytes = num_bytes;
-                        extent_op->parent = parent;
-                        extent_op->orig_parent = orig_parent;
-                        extent_op->generation = ref_generation;
-                        extent_op->orig_generation = orig_generation;
-                        extent_op->level = (int)owner_objectid;
-                        INIT_LIST_HEAD(&extent_op->list);
-                        extent_op->del = 0;
-                        set_extent_bits(&root->fs_info->extent_ins,
-                                        bytenr, bytenr + num_bytes - 1,
-                                        EXTENT_WRITEBACK, GFP_NOFS);
-                        set_state_private(&root->fs_info->extent_ins,
-                                          bytenr, (unsigned long)extent_op);
-                }
-                mutex_unlock(&root->fs_info->extent_ins_mutex);
-                return 0;
-        }
-        path = btrfs_alloc_path();
-        if (!path)
-                return -ENOMEM;
-        ret = lookup_extent_backref(trans, extent_root, path,
-                                    bytenr, orig_parent, orig_root,
-                                    orig_generation, owner_objectid, 1);
-        if (ret)
-                goto out;
-        ret = remove_extent_backref(trans, extent_root, path);
-        if (ret)
-                goto out;
-        ret = insert_extent_backref(trans, extent_root, path, bytenr,
-                                    parent, ref_root, ref_generation,
-                                    owner_objectid);
        BUG_ON(ret);
-        finish_current_insert(trans, extent_root, 0);
-        del_pending_extents(trans, extent_root, 0);
-out:
-        btrfs_free_path(path);
        return ret;
 }
 int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
                            struct btrfs_root *root, u64 bytenr,
-                            u64 orig_parent, u64 parent,
+                            u64 num_bytes, u64 orig_parent, u64 parent,
                            u64 ref_root, u64 ref_generation,
                            u64 owner_objectid)
 {
@@ -1260,20 +716,36 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
        if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
            owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
                return 0;
-        ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent,
-                                        parent, ref_root, ref_root,
+        ret = __btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
-                                        ref_generation, ref_generation,
+                                        orig_parent, parent, ref_root,
-                                        owner_objectid);
+                                        ref_root, ref_generation,
+                                        ref_generation, owner_objectid);
        return ret;
 }
 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                                  struct btrfs_root *root, u64 bytenr,
+                                  u64 num_bytes,
                                  u64 orig_parent, u64 parent,
                                  u64 orig_root, u64 ref_root,
                                  u64 orig_generation, u64 ref_generation,
                                  u64 owner_objectid)
 {
+        int ret;
+        ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, ref_root,
+                                    ref_generation, owner_objectid,
+                                    BTRFS_ADD_DELAYED_REF, 0);
+        BUG_ON(ret);
+        return ret;
+}
+static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
+                          struct btrfs_root *root, u64 bytenr,
+                          u64 num_bytes, u64 parent, u64 ref_root,
+                          u64 ref_generation, u64 owner_objectid,
+                          int refs_to_add)
+{
        struct btrfs_path *path;
        int ret;
        struct btrfs_key key;
@@ -1286,17 +758,24 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                return -ENOMEM;
        path->reada = 1;
+        path->leave_spinning = 1;
        key.objectid = bytenr;
        key.type = BTRFS_EXTENT_ITEM_KEY;
-        key.offset = (u64)-1;
+        key.offset = num_bytes;
-        ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
+        /* first find the extent item and update its reference count */
-                                0, 1);
+        ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
-        if (ret < 0)
+                                path, 0, 1);
+        if (ret < 0) {
+                btrfs_set_path_blocking(path);
                return ret;
-        BUG_ON(ret == 0 || path->slots[0] == 0);
+        }
-        path->slots[0]--;
+        if (ret > 0) {
+                WARN_ON(1);
+                btrfs_free_path(path);
+                return -EIO;
+        }
        l = path->nodes[0];
        btrfs_item_key_to_cpu(l, &key, path->slots[0]);
@@ -1310,21 +789,24 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
        BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
        item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
        refs = btrfs_extent_refs(l, item);
-        btrfs_set_extent_refs(l, item, refs + 1);
+        btrfs_set_extent_refs(l, item, refs + refs_to_add);
+        btrfs_unlock_up_safe(path, 1);
        btrfs_mark_buffer_dirty(path->nodes[0]);
        btrfs_release_path(root->fs_info->extent_root, path);
        path->reada = 1;
+        path->leave_spinning = 1;
+        /* now insert the actual backref */
        ret = insert_extent_backref(trans, root->fs_info->extent_root,
                                    path, bytenr, parent,
                                    ref_root, ref_generation,
-                                    owner_objectid);
+                                    owner_objectid, refs_to_add);
        BUG_ON(ret);
-        finish_current_insert(trans, root->fs_info->extent_root, 0);
-        del_pending_extents(trans, root->fs_info->extent_root, 0);
        btrfs_free_path(path);
        return 0;
 }
@@ -1339,68 +821,278 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
        if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
            owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
                return 0;
-        ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent,
+        ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, parent,
                                     0, ref_root, 0, ref_generation,
                                     owner_objectid);
        return ret;
 }
-int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
+static int drop_delayed_ref(struct btrfs_trans_handle *trans,
-                         struct btrfs_root *root)
+                                        struct btrfs_root *root,
+                                        struct btrfs_delayed_ref_node *node)
+{
+        int ret = 0;
+        struct btrfs_delayed_ref *ref = btrfs_delayed_node_to_ref(node);
+        BUG_ON(node->ref_mod == 0);
+        ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes,
+                                  node->parent, ref->root, ref->generation,
+                                  ref->owner_objectid, ref->pin, node->ref_mod);
+        return ret;
+}
+/* helper function to actually process a single delayed ref entry */
+static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans,
+                                        struct btrfs_root *root,
+                                        struct btrfs_delayed_ref_node *node,
+                                        int insert_reserved)
 {
-        u64 start;
-        u64 end;
        int ret;
+        struct btrfs_delayed_ref *ref;
+        if (node->parent == (u64)-1) {
+                struct btrfs_delayed_ref_head *head;
+                /*
+                 * we've hit the end of the chain and we were supposed
+                 * to insert this extent into the tree.  But, it got
+                 * deleted before we ever needed to insert it, so all
+                 * we have to do is clean up the accounting
+                 */
+                if (insert_reserved) {
+                        update_reserved_extents(root, node->bytenr,
+                                                node->num_bytes, 0);
+                }
+                head = btrfs_delayed_node_to_head(node);
+                mutex_unlock(&head->mutex);
+                return 0;
+        }
-        while(1) {
+        ref = btrfs_delayed_node_to_ref(node);
-                finish_current_insert(trans, root->fs_info->extent_root, 1);
+        if (ref->action == BTRFS_ADD_DELAYED_REF) {
-                del_pending_extents(trans, root->fs_info->extent_root, 1);
+                if (insert_reserved) {
+                        struct btrfs_key ins;
-                /* is there more work to do? */
+                        ins.objectid = node->bytenr;
-                ret = find_first_extent_bit(&root->fs_info->pending_del,
+                        ins.offset = node->num_bytes;
-                                            0, &start, &end, EXTENT_WRITEBACK);
+                        ins.type = BTRFS_EXTENT_ITEM_KEY;
-                if (!ret)
-                        continue;
+                        /* record the full extent allocation */
-                ret = find_first_extent_bit(&root->fs_info->extent_ins,
+                        ret = __btrfs_alloc_reserved_extent(trans, root,
-                                            0, &start, &end, EXTENT_WRITEBACK);
+                                        node->parent, ref->root,
-                if (!ret)
+                                        ref->generation, ref->owner_objectid,
-                        continue;
+                                        &ins, node->ref_mod);
-                break;
+                        update_reserved_extents(root, node->bytenr,
+                                                node->num_bytes, 0);
+                } else {
+                        /* just add one backref */
+                        ret = add_extent_ref(trans, root, node->bytenr,
+                                     node->num_bytes,
+                                     node->parent, ref->root, ref->generation,
+                                     ref->owner_objectid, node->ref_mod);
+                }
+                BUG_ON(ret);
+        } else if (ref->action == BTRFS_DROP_DELAYED_REF) {
+                WARN_ON(insert_reserved);
+                ret = drop_delayed_ref(trans, root, node);
        }
        return 0;
 }
-int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
+static noinline struct btrfs_delayed_ref_node *
-                            struct btrfs_root *root, u64 bytenr,
+select_delayed_ref(struct btrfs_delayed_ref_head *head)
-                            u64 num_bytes, u32 *refs)
 {
-        struct btrfs_path *path;
+        struct rb_node *node;
+        struct btrfs_delayed_ref_node *ref;
+        int action = BTRFS_ADD_DELAYED_REF;
+again:
+        /*
+         * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
+         * this prevents ref count from going down to zero when
+         * there still are pending delayed ref.
+         */
+        node = rb_prev(&head->node.rb_node);
+        while (1) {
+                if (!node)
+                        break;
+                ref = rb_entry(node, struct btrfs_delayed_ref_node,
+                                rb_node);
+                if (ref->bytenr != head->node.bytenr)
+                        break;
+                if (btrfs_delayed_node_to_ref(ref)->action == action)
+                        return ref;
+                node = rb_prev(node);
+        }
+        if (action == BTRFS_ADD_DELAYED_REF) {
+                action = BTRFS_DROP_DELAYED_REF;
+                goto again;
+        }
+        return NULL;
+}
+static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
+                                       struct btrfs_root *root,
+                                       struct list_head *cluster)
+{
+        struct btrfs_delayed_ref_root *delayed_refs;
+        struct btrfs_delayed_ref_node *ref;
+        struct btrfs_delayed_ref_head *locked_ref = NULL;
        int ret;
-        struct btrfs_key key;
+        int count = 0;
-        struct extent_buffer *l;
+        int must_insert_reserved = 0;
-        struct btrfs_extent_item *item;
-        WARN_ON(num_bytes < root->sectorsize);
+        delayed_refs = &trans->transaction->delayed_refs;
-        path = btrfs_alloc_path();
+        while (1) {
-        path->reada = 1;
+                if (!locked_ref) {
-        key.objectid = bytenr;
+                        /* pick a new head ref from the cluster list */
-        key.offset = num_bytes;
+                        if (list_empty(cluster))
-        btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+                                break;
-        ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
-                                0, 0);
+                        locked_ref = list_entry(cluster->next,
-        if (ret < 0)
+                                     struct btrfs_delayed_ref_head, cluster);
-                goto out;
-        if (ret != 0) {
+                        /* grab the lock that says we are going to process
-                btrfs_print_leaf(root, path->nodes[0]);
+                         * all the refs for this head */
-                printk(KERN_INFO "btrfs failed to find block number %llu\n",
+                        ret = btrfs_delayed_ref_lock(trans, locked_ref);
-                       (unsigned long long)bytenr);
-                BUG();
+                        /*
+                         * we may have dropped the spin lock to get the head
+                         * mutex lock, and that might have given someone else
+                         * time to free the head.  If that's true, it has been
+                         * removed from our list and we can move on.
+                         */
+                        if (ret == -EAGAIN) {
+                                locked_ref = NULL;
+                                count++;
+                                continue;
+                        }
+                }
+                /*
+                 * record the must insert reserved flag before we
+                 * drop the spin lock.
+                 */
+                must_insert_reserved = locked_ref->must_insert_reserved;
+                locked_ref->must_insert_reserved = 0;
+                /*
+                 * locked_ref is the head node, so we have to go one
+                 * node back for any delayed ref updates
+                 */
+                ref = select_delayed_ref(locked_ref);
+                if (!ref) {
+                        /* All delayed refs have been processed, Go ahead
+                         * and send the head node to run_one_delayed_ref,
+                         * so that any accounting fixes can happen
+                         */
+                        ref = &locked_ref->node;
+                        list_del_init(&locked_ref->cluster);
+                        locked_ref = NULL;
+                }
+                ref->in_tree = 0;
+                rb_erase(&ref->rb_node, &delayed_refs->root);
+                delayed_refs->num_entries--;
+                spin_unlock(&delayed_refs->lock);
+                ret = run_one_delayed_ref(trans, root, ref,
+                                          must_insert_reserved);
+                BUG_ON(ret);
+                btrfs_put_delayed_ref(ref);
+                count++;
+                cond_resched();
+                spin_lock(&delayed_refs->lock);
+        }
+        return count;
+}
+/*
+ * this starts processing the delayed reference count updates and
+ * extent insertions we have queued up so far.  count can be
+ * 0, which means to process everything in the tree at the start
+ * of the run (but not newly added entries), or it can be some target
+ * number you'd like to process.
+ */
+int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root, unsigned long count)
+{
+        struct rb_node *node;
+        struct btrfs_delayed_ref_root *delayed_refs;
+        struct btrfs_delayed_ref_node *ref;
+        struct list_head cluster;
+        int ret;
+        int run_all = count == (unsigned long)-1;
+        int run_most = 0;
+        if (root == root->fs_info->extent_root)
+                root = root->fs_info->tree_root;
+        delayed_refs = &trans->transaction->delayed_refs;
+        INIT_LIST_HEAD(&cluster);
+again:
+        spin_lock(&delayed_refs->lock);
+        if (count == 0) {
+                count = delayed_refs->num_entries * 2;
+                run_most = 1;
+        }
+        while (1) {
+                if (!(run_all || run_most) &&
+                    delayed_refs->num_heads_ready < 64)
+                        break;
+                /*
+                 * go find something we can process in the rbtree.  We start at
+                 * the beginning of the tree, and then build a cluster
+                 * of refs to process starting at the first one we are able to
+                 * lock
+                 */
+                ret = btrfs_find_ref_cluster(trans, &cluster,
+                                             delayed_refs->run_delayed_start);
+                if (ret)
+                        break;
+                ret = run_clustered_refs(trans, root, &cluster);
+                BUG_ON(ret < 0);
+                count -= min_t(unsigned long, ret, count);
+                if (count == 0)
+                        break;
+        }
+        if (run_all) {
+                node = rb_first(&delayed_refs->root);
+                if (!node)
+                        goto out;
+                count = (unsigned long)-1;
+                while (node) {
+                        ref = rb_entry(node, struct btrfs_delayed_ref_node,
+                                       rb_node);
+                        if (btrfs_delayed_ref_is_head(ref)) {
+                                struct btrfs_delayed_ref_head *head;
+                                head = btrfs_delayed_node_to_head(ref);
+                                atomic_inc(&ref->refs);
+                                spin_unlock(&delayed_refs->lock);
+                                mutex_lock(&head->mutex);
+                                mutex_unlock(&head->mutex);
+                                btrfs_put_delayed_ref(ref);
+                                cond_resched();
+                                goto again;
+                        }
+                        node = rb_next(node);
+                }
+                spin_unlock(&delayed_refs->lock);
+                schedule_timeout(1);
+                goto again;
        }
-        l = path->nodes[0];
-        item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
-        *refs = btrfs_extent_refs(l, item);
 out:
-        btrfs_free_path(path);
+        spin_unlock(&delayed_refs->lock);
        return 0;
 }
@@ -1624,7 +1316,7 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
        int refi = 0;
        int slot;
        int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
-                            u64, u64, u64, u64, u64, u64, u64, u64);
+                            u64, u64, u64, u64, u64, u64, u64, u64, u64);
        ref_root = btrfs_header_owner(buf);
        ref_generation = btrfs_header_generation(buf);
@@ -1696,12 +1388,19 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
                if (level == 0) {
                        btrfs_item_key_to_cpu(buf, &key, slot);
+                        fi = btrfs_item_ptr(buf, slot,
+                                            struct btrfs_file_extent_item);
+                        bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
+                        if (bytenr == 0)
+                                continue;
                        ret = process_func(trans, root, bytenr,
-                                           orig_buf->start, buf->start,
+                                   btrfs_file_extent_disk_num_bytes(buf, fi),
-                                           orig_root, ref_root,
+                                   orig_buf->start, buf->start,
-                                           orig_generation, ref_generation,
+                                   orig_root, ref_root,
-                                           key.objectid);
+                                   orig_generation, ref_generation,
+                                   key.objectid);
                        if (ret) {
                                faili = slot;
@@ -1709,7 +1408,7 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
                                goto fail;
                        }
                } else {
-                        ret = process_func(trans, root, bytenr,
+                        ret = process_func(trans, root, bytenr, buf->len,
                                           orig_buf->start, buf->start,
                                           orig_root, ref_root,
                                           orig_generation, ref_generation,
@@ -1786,17 +1485,17 @@ int btrfs_update_ref(struct btrfs_trans_handle *trans,
                        if (bytenr == 0)
                                continue;
                        ret = __btrfs_update_extent_ref(trans, root, bytenr,
-                                            orig_buf->start, buf->start,
+                                    btrfs_file_extent_disk_num_bytes(buf, fi),
-                                            orig_root, ref_root,
+                                    orig_buf->start, buf->start,
-                                            orig_generation, ref_generation,
+                                    orig_root, ref_root, orig_generation,
-                                            key.objectid);
+                                    ref_generation, key.objectid);
                        if (ret)
                                goto fail;
                } else {
                        bytenr = btrfs_node_blockptr(buf, slot);
                        ret = __btrfs_update_extent_ref(trans, root, bytenr,
-                                            orig_buf->start, buf->start,
+                                            buf->len, orig_buf->start,
-                                            orig_root, ref_root,
+                                            buf->start, orig_root, ref_root,
                                            orig_generation, ref_generation,
                                            level - 1);
                        if (ret)
@@ -1815,7 +1514,6 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
                                 struct btrfs_block_group_cache *cache)
 {
        int ret;
-        int pending_ret;
        struct btrfs_root *extent_root = root->fs_info->extent_root;
        unsigned long bi;
        struct extent_buffer *leaf;
@@ -1831,12 +1529,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(leaf);
        btrfs_release_path(extent_root, path);
 fail:
-        finish_current_insert(trans, extent_root, 0);
-        pending_ret = del_pending_extents(trans, extent_root, 0);
        if (ret)
                return ret;
-        if (pending_ret)
-                return pending_ret;
        return 0;
 }
@@ -2361,6 +2055,8 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
                clear_extent_dirty(&fs_info->pinned_extents,
                                bytenr, bytenr + num - 1, GFP_NOFS);
        }
+        mutex_unlock(&root->fs_info->pinned_mutex);
        while (num > 0) {
                cache = btrfs_lookup_block_group(fs_info, bytenr);
                BUG_ON(!cache);
@@ -2452,8 +2148,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
        u64 end;
        int ret;
-        mutex_lock(&root->fs_info->pinned_mutex);
        while (1) {
+                mutex_lock(&root->fs_info->pinned_mutex);
                ret = find_first_extent_bit(unpin, 0, &start, &end,
                                            EXTENT_DIRTY);
                if (ret)
@@ -2461,209 +2157,21 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
                ret = btrfs_discard_extent(root, start, end + 1 - start);
+                /* unlocks the pinned mutex */
                btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
                clear_extent_dirty(unpin, start, end, GFP_NOFS);
-                if (need_resched()) {
+                cond_resched();
-                        mutex_unlock(&root->fs_info->pinned_mutex);
-                        cond_resched();
-                        mutex_lock(&root->fs_info->pinned_mutex);
-                }
        }
        mutex_unlock(&root->fs_info->pinned_mutex);
        return ret;
 }
-static int finish_current_insert(struct btrfs_trans_handle *trans,
-                                 struct btrfs_root *extent_root, int all)
-{
-        u64 start;
-        u64 end;
-        u64 priv;
-        u64 search = 0;
-        struct btrfs_fs_info *info = extent_root->fs_info;
-        struct btrfs_path *path;
-        struct pending_extent_op *extent_op, *tmp;
-        struct list_head insert_list, update_list;
-        int ret;
-        int num_inserts = 0, max_inserts, restart = 0;
-        path = btrfs_alloc_path();
-        INIT_LIST_HEAD(&insert_list);
-        INIT_LIST_HEAD(&update_list);
-        max_inserts = extent_root->leafsize /
-                (2 * sizeof(struct btrfs_key) + 2 * sizeof(struct btrfs_item) +
-                 sizeof(struct btrfs_extent_ref) +
-                 sizeof(struct btrfs_extent_item));
-again:
-        mutex_lock(&info->extent_ins_mutex);
-        while (1) {
-                ret = find_first_extent_bit(&info->extent_ins, search, &start,
-                                            &end, EXTENT_WRITEBACK);
-                if (ret) {
-                        if (restart && !num_inserts &&
-                            list_empty(&update_list)) {
-                                restart = 0;
-                                search = 0;
-                                continue;
-                        }
-                        break;
-                }
-                ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS);
-                if (!ret) {
-                        if (all)
-                                restart = 1;
-                        search = end + 1;
-                        if (need_resched()) {
-                                mutex_unlock(&info->extent_ins_mutex);
-                                cond_resched();
-                                mutex_lock(&info->extent_ins_mutex);
-                        }
-                        continue;
-                }
-                ret = get_state_private(&info->extent_ins, start, &priv);
-                BUG_ON(ret);
-                extent_op = (struct pending_extent_op *)(unsigned long) priv;
-                if (extent_op->type == PENDING_EXTENT_INSERT) {
-                        num_inserts++;
-                        list_add_tail(&extent_op->list, &insert_list);
-                        search = end + 1;
-                        if (num_inserts == max_inserts) {
-                                restart = 1;
-                                break;
-                        }
-                } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
-                        list_add_tail(&extent_op->list, &update_list);
-                        search = end + 1;
-                } else {
-                        BUG();
-                }
-        }
-        /*
-         * process the update list, clear the writeback bit for it, and if
-         * somebody marked this thing for deletion then just unlock it and be
-         * done, the free_extents will handle it
-         */
-        list_for_each_entry_safe(extent_op, tmp, &update_list, list) {
-                clear_extent_bits(&info->extent_ins, extent_op->bytenr,
-                                  extent_op->bytenr + extent_op->num_bytes - 1,
-                                  EXTENT_WRITEBACK, GFP_NOFS);
-                if (extent_op->del) {
-                        list_del_init(&extent_op->list);
-                        unlock_extent(&info->extent_ins, extent_op->bytenr,
-                                      extent_op->bytenr + extent_op->num_bytes
-                                      - 1, GFP_NOFS);
-                        kfree(extent_op);
-                }
-        }
-        mutex_unlock(&info->extent_ins_mutex);
-        /*
-         * still have things left on the update list, go ahead an update
-         * everything
-         */
-        if (!list_empty(&update_list)) {
-                ret = update_backrefs(trans, extent_root, path, &update_list);
-                BUG_ON(ret);
-                /* we may have COW'ed new blocks, so lets start over */
-                if (all)
-                        restart = 1;
-        }
-        /*
-         * if no inserts need to be done, but we skipped some extents and we
-         * need to make sure everything is cleaned then reset everything and
-         * go back to the beginning
-         */
-        if (!num_inserts && restart) {
-                search = 0;
-                restart = 0;
-                INIT_LIST_HEAD(&update_list);
-                INIT_LIST_HEAD(&insert_list);
-                goto again;
-        } else if (!num_inserts) {
-                goto out;
-        }
-        /*
-         * process the insert extents list.  Again if we are deleting this
-         * extent, then just unlock it, pin down the bytes if need be, and be
-         * done with it.  Saves us from having to actually insert the extent
-         * into the tree and then subsequently come along and delete it
-         */
-        mutex_lock(&info->extent_ins_mutex);
-        list_for_each_entry_safe(extent_op, tmp, &insert_list, list) {
-                clear_extent_bits(&info->extent_ins, extent_op->bytenr,
-                                  extent_op->bytenr + extent_op->num_bytes - 1,
-                                  EXTENT_WRITEBACK, GFP_NOFS);
-                if (extent_op->del) {
-                        u64 used;
-                        list_del_init(&extent_op->list);
-                        unlock_extent(&info->extent_ins, extent_op->bytenr,
-                                      extent_op->bytenr + extent_op->num_bytes
-                                      - 1, GFP_NOFS);
-                        mutex_lock(&extent_root->fs_info->pinned_mutex);
-                        ret = pin_down_bytes(trans, extent_root,
-                                             extent_op->bytenr,
-                                             extent_op->num_bytes, 0);
-                        mutex_unlock(&extent_root->fs_info->pinned_mutex);
-                        spin_lock(&info->delalloc_lock);
-                        used = btrfs_super_bytes_used(&info->super_copy);
-                        btrfs_set_super_bytes_used(&info->super_copy,
-                                        used - extent_op->num_bytes);
-                        used = btrfs_root_used(&extent_root->root_item);
-                        btrfs_set_root_used(&extent_root->root_item,
-                                        used - extent_op->num_bytes);
-                        spin_unlock(&info->delalloc_lock);
-                        ret = update_block_group(trans, extent_root,
-                                                 extent_op->bytenr,
-                                                 extent_op->num_bytes,
-                                                 0, ret > 0);
-                        BUG_ON(ret);
-                        kfree(extent_op);
-                        num_inserts--;
-                }
-        }
-        mutex_unlock(&info->extent_ins_mutex);
-        ret = insert_extents(trans, extent_root, path, &insert_list,
-                             num_inserts);
-        BUG_ON(ret);
-        /*
-         * if restart is set for whatever reason we need to go back and start
-         * searching through the pending list again.
-         *
-         * We just inserted some extents, which could have resulted in new
-         * blocks being allocated, which would result in new blocks needing
-         * updates, so if all is set we _must_ restart to get the updated
-         * blocks.
-         */
-        if (restart || all) {
-                INIT_LIST_HEAD(&insert_list);
-                INIT_LIST_HEAD(&update_list);
-                search = 0;
-                restart = 0;
-                num_inserts = 0;
-                goto again;
-        }
-out:
-        btrfs_free_path(path);
-        return 0;
-}
 static int pin_down_bytes(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root,
-                          u64 bytenr, u64 num_bytes, int is_data)
+                          struct btrfs_path *path,
+                          u64 bytenr, u64 num_bytes, int is_data,
+                          struct extent_buffer **must_clean)
 {
        int err = 0;
        struct extent_buffer *buf;
@@ -2686,17 +2194,19 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
                u64 header_transid = btrfs_header_generation(buf);
                if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
                    header_owner != BTRFS_TREE_RELOC_OBJECTID &&
+                    header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID &&
                    header_transid == trans->transid &&
                    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
-                        clean_tree_block(NULL, root, buf);
+                        *must_clean = buf;
-                        btrfs_tree_unlock(buf);
-                        free_extent_buffer(buf);
                        return 1;
                }
                btrfs_tree_unlock(buf);
        }
        free_extent_buffer(buf);
 pinit:
+        btrfs_set_path_blocking(path);
+        mutex_lock(&root->fs_info->pinned_mutex);
+        /* unlocks the pinned mutex */
        btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
        BUG_ON(err < 0);
@@ -2710,7 +2220,8 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root,
                         u64 bytenr, u64 num_bytes, u64 parent,
                         u64 root_objectid, u64 ref_generation,
-                         u64 owner_objectid, int pin, int mark_free)
+                         u64 owner_objectid, int pin, int mark_free,
+                         int refs_to_drop)
 {
        struct btrfs_path *path;
        struct btrfs_key key;
@@ -2732,6 +2243,7 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                return -ENOMEM;
        path->reada = 1;
+        path->leave_spinning = 1;
        ret = lookup_extent_backref(trans, extent_root, path,
                                    bytenr, parent, root_objectid,
                                    ref_generation, owner_objectid, 1);
@@ -2753,9 +2265,11 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                                break;
                }
                if (!found_extent) {
-                        ret = remove_extent_backref(trans, extent_root, path);
+                        ret = remove_extent_backref(trans, extent_root, path,
+                                                    refs_to_drop);
                        BUG_ON(ret);
                        btrfs_release_path(extent_root, path);
+                        path->leave_spinning = 1;
                        ret = btrfs_search_slot(trans, extent_root,
                                                &key, path, -1, 1);
                        if (ret) {
@@ -2771,8 +2285,9 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                btrfs_print_leaf(extent_root, path->nodes[0]);
                WARN_ON(1);
                printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
-                       "root %llu gen %llu owner %llu\n",
+                       "parent %llu root %llu gen %llu owner %llu\n",
                       (unsigned long long)bytenr,
+                       (unsigned long long)parent,
                       (unsigned long long)root_objectid,
                       (unsigned long long)ref_generation,
                       (unsigned long long)owner_objectid);
@@ -2782,17 +2297,23 @@ static int __free_extent(struct btrfs_trans_handle *trans,
        ei = btrfs_item_ptr(leaf, extent_slot,
                            struct btrfs_extent_item);
        refs = btrfs_extent_refs(leaf, ei);
-        BUG_ON(refs == 0);
-        refs -= 1;
-        btrfs_set_extent_refs(leaf, ei, refs);
+        /*
+         * we're not allowed to delete the extent item if there
+         * are other delayed ref updates pending
+         */
+        BUG_ON(refs < refs_to_drop);
+        refs -= refs_to_drop;
+        btrfs_set_extent_refs(leaf, ei, refs);
        btrfs_mark_buffer_dirty(leaf);
-        if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
+        if (refs == 0 && found_extent &&
+            path->slots[0] == extent_slot + 1) {
                struct btrfs_extent_ref *ref;
                ref = btrfs_item_ptr(leaf, path->slots[0],
                                     struct btrfs_extent_ref);
-                BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1);
+                BUG_ON(btrfs_ref_num_refs(leaf, ref) != refs_to_drop);
                /* if the back ref and the extent are next to each other
                 * they get deleted below in one shot
                 */
@@ -2800,11 +2321,13 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                num_to_del = 2;
        } else if (found_extent) {
                /* otherwise delete the extent back ref */
-                ret = remove_extent_backref(trans, extent_root, path);
+                ret = remove_extent_backref(trans, extent_root, path,
+                                            refs_to_drop);
                BUG_ON(ret);
                /* if refs are 0, we need to setup the path for deletion */
                if (refs == 0) {
                        btrfs_release_path(extent_root, path);
+                        path->leave_spinning = 1;
                        ret = btrfs_search_slot(trans, extent_root, &key, path,
                                                -1, 1);
                        BUG_ON(ret);
@@ -2814,16 +2337,18 @@ static int __free_extent(struct btrfs_trans_handle *trans,
        if (refs == 0) {
                u64 super_used;
                u64 root_used;
+                struct extent_buffer *must_clean = NULL;
                if (pin) {
-                        mutex_lock(&root->fs_info->pinned_mutex);
+                        ret = pin_down_bytes(trans, root, path,
-                        ret = pin_down_bytes(trans, root, bytenr, num_bytes,
+                                bytenr, num_bytes,
-                                owner_objectid >= BTRFS_FIRST_FREE_OBJECTID);
+                                owner_objectid >= BTRFS_FIRST_FREE_OBJECTID,
-                        mutex_unlock(&root->fs_info->pinned_mutex);
+                                &must_clean);
                        if (ret > 0)
                                mark_free = 1;
                        BUG_ON(ret < 0);
                }
                /* block accounting for super block */
                spin_lock(&info->delalloc_lock);
                super_used = btrfs_super_bytes_used(&info->super_copy);
@@ -2835,14 +2360,34 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                btrfs_set_root_used(&root->root_item,
                                           root_used - num_bytes);
                spin_unlock(&info->delalloc_lock);
+                /*
+                 * it is going to be very rare for someone to be waiting
+                 * on the block we're freeing.  del_items might need to
+                 * schedule, so rather than get fancy, just force it
+                 * to blocking here
+                 */
+                if (must_clean)
+                        btrfs_set_lock_blocking(must_clean);
                ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
                                      num_to_del);
                BUG_ON(ret);
                btrfs_release_path(extent_root, path);
+                if (must_clean) {
+                        clean_tree_block(NULL, root, must_clean);
+                        btrfs_tree_unlock(must_clean);
+                        free_extent_buffer(must_clean);
+                }
                if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
                        ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
                        BUG_ON(ret);
+                } else {
+                        invalidate_mapping_pages(info->btree_inode->i_mapping,
+                             bytenr >> PAGE_CACHE_SHIFT,
+                             (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT);
                }
                ret = update_block_group(trans, root, bytenr, num_bytes, 0,
@@ -2850,218 +2395,103 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                BUG_ON(ret);
        }
        btrfs_free_path(path);
-        finish_current_insert(trans, extent_root, 0);
        return ret;
 }
 /*
- * find all the blocks marked as pending in the radix tree and remove
+ * remove an extent from the root, returns 0 on success
- * them from the extent map
 */
-static int del_pending_extents(struct btrfs_trans_handle *trans,
+static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
-                               struct btrfs_root *extent_root, int all)
+                                        struct btrfs_root *root,
+                                        u64 bytenr, u64 num_bytes, u64 parent,
+                                        u64 root_objectid, u64 ref_generation,
+                                        u64 owner_objectid, int pin,
+                                        int refs_to_drop)
 {
-        int ret;
+        WARN_ON(num_bytes < root->sectorsize);
-        int err = 0;
-        u64 start;
-        u64 end;
-        u64 priv;
-        u64 search = 0;
-        int nr = 0, skipped = 0;
-        struct extent_io_tree *pending_del;
-        struct extent_io_tree *extent_ins;
-        struct pending_extent_op *extent_op;
-        struct btrfs_fs_info *info = extent_root->fs_info;
-        struct list_head delete_list;
-        INIT_LIST_HEAD(&delete_list);
-        extent_ins = &extent_root->fs_info->extent_ins;
-        pending_del = &extent_root->fs_info->pending_del;
-again:
-        mutex_lock(&info->extent_ins_mutex);
-        while (1) {
-                ret = find_first_extent_bit(pending_del, search, &start, &end,
-                                            EXTENT_WRITEBACK);
-                if (ret) {
-                        if (all && skipped && !nr) {
-                                search = 0;
-                                skipped = 0;
-                                continue;
-                        }
-                        mutex_unlock(&info->extent_ins_mutex);
-                        break;
-                }
-                ret = try_lock_extent(extent_ins, start, end, GFP_NOFS);
-                if (!ret) {
-                        search = end+1;
-                        skipped = 1;
-                        if (need_resched()) {
-                                mutex_unlock(&info->extent_ins_mutex);
-                                cond_resched();
-                                mutex_lock(&info->extent_ins_mutex);
-                        }
-                        continue;
-                }
-                BUG_ON(ret < 0);
-                ret = get_state_private(pending_del, start, &priv);
-                BUG_ON(ret);
-                extent_op = (struct pending_extent_op *)(unsigned long)priv;
-                clear_extent_bits(pending_del, start, end, EXTENT_WRITEBACK,
-                                  GFP_NOFS);
-                if (!test_range_bit(extent_ins, start, end,
-                                    EXTENT_WRITEBACK, 0)) {
-                        list_add_tail(&extent_op->list, &delete_list);
-                        nr++;
-                } else {
-                        kfree(extent_op);
-                        ret = get_state_private(&info->extent_ins, start,
-                                                &priv);
-                        BUG_ON(ret);
-                        extent_op = (struct pending_extent_op *)
-                                                (unsigned long)priv;
-                        clear_extent_bits(&info->extent_ins, start, end,
-                                          EXTENT_WRITEBACK, GFP_NOFS);
-                        if (extent_op->type == PENDING_BACKREF_UPDATE) {
-                                list_add_tail(&extent_op->list, &delete_list);
-                                search = end + 1;
-                                nr++;
-                                continue;
-                        }
-                        mutex_lock(&extent_root->fs_info->pinned_mutex);
-                        ret = pin_down_bytes(trans, extent_root, start,
-                                             end + 1 - start, 0);
-                        mutex_unlock(&extent_root->fs_info->pinned_mutex);
-                        ret = update_block_group(trans, extent_root, start,
-                                                end + 1 - start, 0, ret > 0);
-                        unlock_extent(extent_ins, start, end, GFP_NOFS);
-                        BUG_ON(ret);
-                        kfree(extent_op);
-                }
-                if (ret)
-                        err = ret;
-                search = end + 1;
-                if (need_resched()) {
-                        mutex_unlock(&info->extent_ins_mutex);
-                        cond_resched();
-                        mutex_lock(&info->extent_ins_mutex);
-                }
-        }
-        if (nr) {
+        /*
-                ret = free_extents(trans, extent_root, &delete_list);
+         * if metadata always pin
-                BUG_ON(ret);
+         * if data pin when any transaction has committed this
-        }
+         */
+        if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID ||
+            ref_generation != trans->transid)
+                pin = 1;
-        if (all && skipped) {
+        if (ref_generation != trans->transid)
-                INIT_LIST_HEAD(&delete_list);
+                pin = 1;
-                search = 0;
-                nr = 0;
-                goto again;
-        }
-        if (!err)
+        return __free_extent(trans, root, bytenr, num_bytes, parent,
-                finish_current_insert(trans, extent_root, 0);
+                            root_objectid, ref_generation,
-        return err;
+                            owner_objectid, pin, pin == 0, refs_to_drop);
 }
 /*
- * remove an extent from the root, returns 0 on success
+ * when we free an extent, it is possible (and likely) that we free the last
+ * delayed ref for that extent as well.  This searches the delayed ref tree for
+ * a given extent, and if there are no other delayed refs to be processed, it
+ * removes it from the tree.
 */
-static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
+static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
-                               struct btrfs_root *root,
+                                      struct btrfs_root *root, u64 bytenr)
-                               u64 bytenr, u64 num_bytes, u64 parent,
-                               u64 root_objectid, u64 ref_generation,
-                               u64 owner_objectid, int pin)
 {
-        struct btrfs_root *extent_root = root->fs_info->extent_root;
+        struct btrfs_delayed_ref_head *head;
-        int pending_ret;
+        struct btrfs_delayed_ref_root *delayed_refs;
+        struct btrfs_delayed_ref_node *ref;
+        struct rb_node *node;
        int ret;
-        WARN_ON(num_bytes < root->sectorsize);
+        delayed_refs = &trans->transaction->delayed_refs;
-        if (root == extent_root) {
+        spin_lock(&delayed_refs->lock);
-                struct pending_extent_op *extent_op = NULL;
+        head = btrfs_find_delayed_ref_head(trans, bytenr);
+        if (!head)
-                mutex_lock(&root->fs_info->extent_ins_mutex);
+                goto out;
-                if (test_range_bit(&root->fs_info->extent_ins, bytenr,
-                                bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) {
-                        u64 priv;
-                        ret = get_state_private(&root->fs_info->extent_ins,
-                                                bytenr, &priv);
-                        BUG_ON(ret);
-                        extent_op = (struct pending_extent_op *)
-                                                (unsigned long)priv;
-                        extent_op->del = 1;
+        node = rb_prev(&head->node.rb_node);
-                        if (extent_op->type == PENDING_EXTENT_INSERT) {
+        if (!node)
-                                mutex_unlock(&root->fs_info->extent_ins_mutex);
+                goto out;
-                                return 0;
-                        }
-                }
-                if (extent_op) {
+        ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-                        ref_generation = extent_op->orig_generation;
-                        parent = extent_op->orig_parent;
-                }
-                extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
+        /* there are still entries for this ref, we can't drop it */
-                BUG_ON(!extent_op);
+        if (ref->bytenr == bytenr)
+                goto out;
-                extent_op->type = PENDING_EXTENT_DELETE;
-                extent_op->bytenr = bytenr;
-                extent_op->num_bytes = num_bytes;
-                extent_op->parent = parent;
-                extent_op->orig_parent = parent;
-                extent_op->generation = ref_generation;
-                extent_op->orig_generation = ref_generation;
-                extent_op->level = (int)owner_objectid;
-                INIT_LIST_HEAD(&extent_op->list);
-                extent_op->del = 0;
-                set_extent_bits(&root->fs_info->pending_del,
-                                bytenr, bytenr + num_bytes - 1,
-                                EXTENT_WRITEBACK, GFP_NOFS);
-                set_state_private(&root->fs_info->pending_del,
-                                  bytenr, (unsigned long)extent_op);
-                mutex_unlock(&root->fs_info->extent_ins_mutex);
-                return 0;
-        }
-        /* if metadata always pin */
-        if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
-                if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
-                        mutex_lock(&root->fs_info->pinned_mutex);
-                        btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
-                        mutex_unlock(&root->fs_info->pinned_mutex);
-                        update_reserved_extents(root, bytenr, num_bytes, 0);
-                        return 0;
-                }
-                pin = 1;
-        }
-        /* if data pin when any transaction has committed this */
+        /*
-        if (ref_generation != trans->transid)
+         * waiting for the lock here would deadlock.  If someone else has it
-                pin = 1;
+         * locked they are already in the process of dropping it anyway
+         */
+        if (!mutex_trylock(&head->mutex))
+                goto out;
-        ret = __free_extent(trans, root, bytenr, num_bytes, parent,
+        /*
-                            root_objectid, ref_generation,
+         * at this point we have a head with no other entries.  Go
-                            owner_objectid, pin, pin == 0);
+         * ahead and process it.
+         */
+        head->node.in_tree = 0;
+        rb_erase(&head->node.rb_node, &delayed_refs->root);
-        finish_current_insert(trans, root->fs_info->extent_root, 0);
+        delayed_refs->num_entries--;
-        pending_ret = del_pending_extents(trans, root->fs_info->extent_root, 0);
-        return ret ? ret : pending_ret;
+        /*
+         * we don't take a ref on the node because we're removing it from the
+         * tree, so we just steal the ref the tree was holding.
+         */
+        delayed_refs->num_heads--;
+        if (list_empty(&head->cluster))
+                delayed_refs->num_heads_ready--;
+        list_del_init(&head->cluster);
+        spin_unlock(&delayed_refs->lock);
+        ret = run_one_delayed_ref(trans, root->fs_info->tree_root,
+                                  &head->node, head->must_insert_reserved);
+        BUG_ON(ret);
+        btrfs_put_delayed_ref(&head->node);
+        return 0;
+out:
+        spin_unlock(&delayed_refs->lock);
+        return 0;
 }
 int btrfs_free_extent(struct btrfs_trans_handle *trans,
@@ -3072,9 +2502,30 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
 {
        int ret;
-        ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent,
+        /*
-                                  root_objectid, ref_generation,
+         * tree log blocks never actually go into the extent allocation
-                                  owner_objectid, pin);
+         * tree, just update pinning info and exit early.
+         *
+         * data extents referenced by the tree log do need to have
+         * their reference counts bumped.
+         */
+        if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID &&
+            owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
+                mutex_lock(&root->fs_info->pinned_mutex);
+                /* unlocks the pinned mutex */
+                btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
+                update_reserved_extents(root, bytenr, num_bytes, 0);
+                ret = 0;
+        } else {
+                ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent,
+                                       root_objectid, ref_generation,
+                                       owner_objectid,
+                                       BTRFS_DROP_DELAYED_REF, 1);
+                BUG_ON(ret);
+                ret = check_ref_cleanup(trans, root, bytenr);
+                BUG_ON(ret);
+        }
        return ret;
 }
@@ -3475,10 +2926,10 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
                                         struct btrfs_root *root, u64 parent,
                                         u64 root_objectid, u64 ref_generation,
-                                         u64 owner, struct btrfs_key *ins)
+                                         u64 owner, struct btrfs_key *ins,
+                                         int ref_mod)
 {
        int ret;
-        int pending_ret;
        u64 super_used;
        u64 root_used;
        u64 num_bytes = ins->offset;
@@ -3503,33 +2954,6 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
        btrfs_set_root_used(&root->root_item, root_used + num_bytes);
        spin_unlock(&info->delalloc_lock);
-        if (root == extent_root) {
-                struct pending_extent_op *extent_op;
-                extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
-                BUG_ON(!extent_op);
-                extent_op->type = PENDING_EXTENT_INSERT;
-                extent_op->bytenr = ins->objectid;
-                extent_op->num_bytes = ins->offset;
-                extent_op->parent = parent;
-                extent_op->orig_parent = 0;
-                extent_op->generation = ref_generation;
-                extent_op->orig_generation = 0;
-                extent_op->level = (int)owner;
-                INIT_LIST_HEAD(&extent_op->list);
-                extent_op->del = 0;
-                mutex_lock(&root->fs_info->extent_ins_mutex);
-                set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
-                                ins->objectid + ins->offset - 1,
-                                EXTENT_WRITEBACK, GFP_NOFS);
-                set_state_private(&root->fs_info->extent_ins,
-                                  ins->objectid, (unsigned long)extent_op);
-                mutex_unlock(&root->fs_info->extent_ins_mutex);
-                goto update_block;
-        }
        memcpy(&keys[0], ins, sizeof(*ins));
        keys[1].objectid = ins->objectid;
        keys[1].type = BTRFS_EXTENT_REF_KEY;
@@ -3540,37 +2964,31 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
        path = btrfs_alloc_path();
        BUG_ON(!path);
+        path->leave_spinning = 1;
        ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
                                       sizes, 2);
        BUG_ON(ret);
        extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
                                     struct btrfs_extent_item);
-        btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
+        btrfs_set_extent_refs(path->nodes[0], extent_item, ref_mod);
        ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
                             struct btrfs_extent_ref);
        btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
        btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
        btrfs_set_ref_objectid(path->nodes[0], ref, owner);
-        btrfs_set_ref_num_refs(path->nodes[0], ref, 1);
+        btrfs_set_ref_num_refs(path->nodes[0], ref, ref_mod);
        btrfs_mark_buffer_dirty(path->nodes[0]);
        trans->alloc_exclude_start = 0;
        trans->alloc_exclude_nr = 0;
        btrfs_free_path(path);
-        finish_current_insert(trans, extent_root, 0);
-        pending_ret = del_pending_extents(trans, extent_root, 0);
        if (ret)
                goto out;
-        if (pending_ret) {
-                ret = pending_ret;
-                goto out;
-        }
-update_block:
        ret = update_block_group(trans, root, ins->objectid,
                                 ins->offset, 1, 0);
        if (ret) {
@@ -3592,9 +3010,12 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
        if (root_objectid == BTRFS_TREE_LOG_OBJECTID)
                return 0;
-        ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
-                                            ref_generation, owner, ins);
+        ret = btrfs_add_delayed_ref(trans, ins->objectid,
-        update_reserved_extents(root, ins->objectid, ins->offset, 0);
+                                    ins->offset, parent, root_objectid,
+                                    ref_generation, owner,
+                                    BTRFS_ADD_DELAYED_EXTENT, 0);
+        BUG_ON(ret);
        return ret;
 }
@@ -3621,7 +3042,7 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
        BUG_ON(ret);
        put_block_group(block_group);
        ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
-                                            ref_generation, owner, ins);
+                                            ref_generation, owner, ins, 1);
        return ret;
 }
@@ -3640,20 +3061,18 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
                       u64 search_end, struct btrfs_key *ins, u64 data)
 {
        int ret;
        ret = __btrfs_reserve_extent(trans, root, num_bytes,
                                     min_alloc_size, empty_size, hint_byte,
                                     search_end, ins, data);
        BUG_ON(ret);
        if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
-                ret = __btrfs_alloc_reserved_extent(trans, root, parent,
+                ret = btrfs_add_delayed_ref(trans, ins->objectid,
-                                        root_objectid, ref_generation,
+                                            ins->offset, parent, root_objectid,
-                                        owner_objectid, ins);
+                                            ref_generation, owner_objectid,
+                                            BTRFS_ADD_DELAYED_EXTENT, 0);
                BUG_ON(ret);
-        } else {
-                update_reserved_extents(root, ins->objectid, ins->offset, 1);
        }
+        update_reserved_extents(root, ins->objectid, ins->offset, 1);
        return ret;
 }
@@ -3789,7 +3208,7 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
                fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
-                ret = __btrfs_free_extent(trans, root, disk_bytenr,
+                ret = btrfs_free_extent(trans, root, disk_bytenr,
                                btrfs_file_extent_disk_num_bytes(leaf, fi),
                                leaf->start, leaf_owner, leaf_generation,
                                key.objectid, 0);
@@ -3829,7 +3248,7 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
         */
        for (i = 0; i < ref->nritems; i++) {
                info = ref->extents + sorted[i].slot;
-                ret = __btrfs_free_extent(trans, root, info->bytenr,
+                ret = btrfs_free_extent(trans, root, info->bytenr,
                                          info->num_bytes, ref->bytenr,
                                          ref->owner, ref->generation,
                                          info->objectid, 0);
@@ -3846,12 +3265,13 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
        return 0;
 }
-static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start,
+static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
+                                     struct btrfs_root *root, u64 start,
                                     u64 len, u32 *refs)
 {
        int ret;
-        ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs);
+        ret = btrfs_lookup_extent_ref(trans, root, start, len, refs);
        BUG_ON(ret);
 #if 0 /* some debugging code in case we see problems here */
@@ -3959,7 +3379,8 @@ static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
                 * we just decrement it below and don't update any
                 * of the refs the leaf points to.
                 */
-                ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
+                ret = drop_snap_lookup_refcount(trans, root, bytenr,
+                                                blocksize, &refs);
                BUG_ON(ret);
                if (refs != 1)
                        continue;
@@ -4010,7 +3431,7 @@ static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
         */
        for (i = 0; i < refi; i++) {
                bytenr = sorted[i].bytenr;
-                ret = __btrfs_free_extent(trans, root, bytenr,
+                ret = btrfs_free_extent(trans, root, bytenr,
                                        blocksize, eb->start,
                                        root_owner, root_gen, 0, 1);
                BUG_ON(ret);
@@ -4053,7 +3474,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
        WARN_ON(*level < 0);
        WARN_ON(*level >= BTRFS_MAX_LEVEL);
-        ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
+        ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start,
                                path->nodes[*level]->len, &refs);
        BUG_ON(ret);
        if (refs > 1)
@@ -4104,7 +3525,8 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
                ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
                blocksize = btrfs_level_size(root, *level - 1);
-                ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
+                ret = drop_snap_lookup_refcount(trans, root, bytenr,
+                                                blocksize, &refs);
                BUG_ON(ret);
                /*
@@ -4119,7 +3541,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
                        root_gen = btrfs_header_generation(parent);
                        path->slots[*level]++;
-                        ret = __btrfs_free_extent(trans, root, bytenr,
+                        ret = btrfs_free_extent(trans, root, bytenr,
                                                blocksize, parent->start,
                                                root_owner, root_gen,
                                                *level - 1, 1);
@@ -4165,7 +3587,7 @@ out:
         * cleanup and free the reference on the last node
         * we processed
         */
-        ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
+        ret = btrfs_free_extent(trans, root, bytenr, blocksize,
                                  parent->start, root_owner, root_gen,
                                  *level, 1);
        free_extent_buffer(path->nodes[*level]);
@@ -4354,6 +3776,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
        struct btrfs_path *path;
        int i;
        int orig_level;
+        int update_count;
        struct btrfs_root_item *root_item = &root->root_item;
        WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
@@ -4395,6 +3818,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
                }
        }
        while (1) {
+                unsigned long update;
                wret = walk_down_tree(trans, root, path, &level);
                if (wret > 0)
                        break;
@@ -4407,12 +3831,21 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
                        break;
                if (wret < 0)
                        ret = wret;
-                if (trans->transaction->in_commit) {
+                if (trans->transaction->in_commit ||
+                    trans->transaction->delayed_refs.flushing) {
                        ret = -EAGAIN;
                        break;
                }
                atomic_inc(&root->fs_info->throttle_gen);
                wake_up(&root->fs_info->transaction_throttle);
+                for (update_count = 0; update_count < 16; update_count++) {
+                        update = trans->delayed_ref_updates;
+                        trans->delayed_ref_updates = 0;
+                        if (update)
+                                btrfs_run_delayed_refs(trans, root, update);
+                        else
+                                break;
+                }
        }
        for (i = 0; i <= orig_level; i++) {
                if (path->nodes[i]) {
@@ -5457,6 +4890,7 @@ static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
                                        root->root_key.objectid,
                                        trans->transid, key.objectid);
                BUG_ON(ret);
                ret = btrfs_free_extent(trans, root,
                                        bytenr, num_bytes, leaf->start,
                                        btrfs_header_owner(leaf),
@@ -5768,9 +5202,6 @@ static noinline int relocate_tree_block(struct btrfs_trans_handle *trans,
                                ref_path, NULL, NULL);
        BUG_ON(ret);
-        if (root == root->fs_info->extent_root)
-                btrfs_extent_post_op(trans, root);
        return 0;
 }
@@ -6038,6 +5469,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
+        path->leave_spinning = 1;
        ret = btrfs_insert_empty_inode(trans, root, path, objectid);
        if (ret)
                goto out;
@@ -6208,6 +5640,9 @@ again:
        btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
        mutex_unlock(&root->fs_info->cleaner_mutex);
+        trans = btrfs_start_transaction(info->tree_root, 1);
+        btrfs_commit_transaction(trans, info->tree_root);
        while (1) {
                ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
                if (ret < 0)
@@ -6466,7 +5901,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
        extent_root = root->fs_info->extent_root;
-        root->fs_info->last_trans_new_blockgroup = trans->transid;
+        root->fs_info->last_trans_log_full_commit = trans->transid;
        cache = kzalloc(sizeof(*cache), GFP_NOFS);
        if (!cache)
@@ -6500,9 +5935,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
                                sizeof(cache->item));
        BUG_ON(ret);
-        finish_current_insert(trans, extent_root, 0);
-        ret = del_pending_extents(trans, extent_root, 0);
-        BUG_ON(ret);
        set_avail_alloc_bits(extent_root->fs_info, type);
        return 0;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ebe6b29e6069..08085af089e2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3124,20 +3124,15 @@ void free_extent_buffer(struct extent_buffer *eb)
 int clear_extent_buffer_dirty(struct extent_io_tree *tree,
                              struct extent_buffer *eb)
 {
-        int set;
        unsigned long i;
        unsigned long num_pages;
        struct page *page;
-        u64 start = eb->start;
-        u64 end = start + eb->len - 1;
-        set = clear_extent_dirty(tree, start, end, GFP_NOFS);
        num_pages = num_extent_pages(eb->start, eb->len);
        for (i = 0; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
-                if (!set && !PageDirty(page))
+                if (!PageDirty(page))
                        continue;
                lock_page(page);
@@ -3146,22 +3141,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
                else
                        set_page_private(page, EXTENT_PAGE_PRIVATE);
-                /*
-                 * if we're on the last page or the first page and the
-                 * block isn't aligned on a page boundary, do extra checks
-                 * to make sure we don't clean page that is partially dirty
-                 */
-                if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
-                    ((i == num_pages - 1) &&
-                     ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
-                        start = (u64)page->index << PAGE_CACHE_SHIFT;
-                        end  = start + PAGE_CACHE_SIZE - 1;
-                        if (test_range_bit(tree, start, end,
-                                           EXTENT_DIRTY, 0)) {
-                                unlock_page(page);
-                                continue;
-                        }
-                }
                clear_page_dirty_for_io(page);
                spin_lock_irq(&page->mapping->tree_lock);
                if (!PageDirty(page)) {
@@ -3187,29 +3166,13 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
 {
        unsigned long i;
        unsigned long num_pages;
+        int was_dirty = 0;
+        was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
        num_pages = num_extent_pages(eb->start, eb->len);
-        for (i = 0; i < num_pages; i++) {
+        for (i = 0; i < num_pages; i++)
-                struct page *page = extent_buffer_page(eb, i);
-                /* writepage may need to do something special for the
-                 * first page, we have to make sure page->private is
-                 * properly set.  releasepage may drop page->private
-                 * on us if the page isn't already dirty.
-                 */
-                lock_page(page);
-                if (i == 0) {
-                        set_page_extent_head(page, eb->len);
-                } else if (PagePrivate(page) &&
-                           page->private != EXTENT_PAGE_PRIVATE) {
-                        set_page_extent_mapped(page);
-                }
                __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
-                set_extent_dirty(tree, page_offset(page),
+        return was_dirty;
-                                 page_offset(page) + PAGE_CACHE_SIZE - 1,
-                                 GFP_NOFS);
-                unlock_page(page);
-        }
-        return 0;
 }
 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
@@ -3789,6 +3752,10 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
                ret = 0;
                goto out;
        }
+        if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
+                ret = 0;
+                goto out;
+        }
        /* at this point we can safely release the extent buffer */
        num_pages = num_extent_pages(eb->start, eb->len);
        for (i = 0; i < num_pages; i++)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 1f9df88afbf6..5bc20abf3f3d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -25,6 +25,7 @@
 /* these are bit numbers for test/set bit */
 #define EXTENT_BUFFER_UPTODATE 0
 #define EXTENT_BUFFER_BLOCKING 1
+#define EXTENT_BUFFER_DIRTY 2
 /*
 * page->private values.  Every page that is controlled by the extent
@@ -254,6 +255,8 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
                              struct extent_buffer *eb);
 int set_extent_buffer_dirty(struct extent_io_tree *tree,
                             struct extent_buffer *eb);
+int test_extent_buffer_dirty(struct extent_io_tree *tree,
+                             struct extent_buffer *eb);
 int set_extent_buffer_uptodate(struct extent_io_tree *tree,
                               struct extent_buffer *eb);
 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 964652435fd1..9b99886562d0 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -52,6 +52,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
        file_key.offset = pos;
        btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
+        path->leave_spinning = 1;
        ret = btrfs_insert_empty_item(trans, root, path, &file_key,
                                      sizeof(*item));
        if (ret < 0)
@@ -523,6 +524,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
                key.offset = end_byte - 1;
                key.type = BTRFS_EXTENT_CSUM_KEY;
+                path->leave_spinning = 1;
                ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
                if (ret > 0) {
                        if (path->slots[0] == 0)
@@ -757,8 +759,10 @@ insert:
        } else {
                ins_size = csum_size;
        }
+        path->leave_spinning = 1;
        ret = btrfs_insert_empty_item(trans, root, path, &file_key,
                                      ins_size);
+        path->leave_spinning = 0;
        if (ret < 0)
                goto fail_unlock;
        if (ret != 0) {
@@ -776,7 +780,6 @@ found:
        item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
                                      btrfs_item_size_nr(leaf, path->slots[0]));
        eb_token = NULL;
-        cond_resched();
 next_sector:
        if (!eb_token ||
@@ -817,9 +820,9 @@ next_sector:
                eb_token = NULL;
        }
        btrfs_mark_buffer_dirty(path->nodes[0]);
-        cond_resched();
        if (total_bytes < sums->len) {
                btrfs_release_path(root, path);
+                cond_resched();
                goto again;
        }
 out:
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index dc78954861b3..9c9fb46ccd08 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -606,6 +606,7 @@ next_slot:
                        btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
                        btrfs_release_path(root, path);
+                        path->leave_spinning = 1;
                        ret = btrfs_insert_empty_item(trans, root, path, &ins,
                                                      sizeof(*extent));
                        BUG_ON(ret);
@@ -639,17 +640,22 @@ next_slot:
                                                        ram_bytes);
                        btrfs_set_file_extent_type(leaf, extent, found_type);
+                        btrfs_unlock_up_safe(path, 1);
                        btrfs_mark_buffer_dirty(path->nodes[0]);
+                        btrfs_set_lock_blocking(path->nodes[0]);
                        if (disk_bytenr != 0) {
                                ret = btrfs_update_extent_ref(trans, root,
-                                                disk_bytenr, orig_parent,
+                                                disk_bytenr,
+                                                le64_to_cpu(old.disk_num_bytes),
+                                                orig_parent,
                                                leaf->start,
                                                root->root_key.objectid,
                                                trans->transid, ins.objectid);
                                BUG_ON(ret);
                        }
+                        path->leave_spinning = 0;
                        btrfs_release_path(root, path);
                        if (disk_bytenr != 0)
                                inode_add_bytes(inode, extent_end - end);
@@ -912,7 +918,7 @@ again:
        btrfs_set_file_extent_other_encoding(leaf, fi, 0);
        if (orig_parent != leaf->start) {
-                ret = btrfs_update_extent_ref(trans, root, bytenr,
+                ret = btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
                                              orig_parent, leaf->start,
                                              root->root_key.objectid,
                                              trans->transid, inode->i_ino);
@@ -1155,6 +1161,20 @@ out_nolock:
                page_cache_release(pinned[1]);
        *ppos = pos;
+        /*
+         * we want to make sure fsync finds this change
+         * but we haven't joined a transaction running right now.
+         *
+         * Later on, someone is sure to update the inode and get the
+         * real transid recorded.
+         *
+         * We set last_trans now to the fs_info generation + 1,
+         * this will either be one more than the running transaction
+         * or the generation used for the next transaction if there isn't
+         * one running right now.
+         */
+        BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
        if (num_written > 0 && will_write) {
                struct btrfs_trans_handle *trans;
@@ -1167,8 +1187,11 @@ out_nolock:
                        ret = btrfs_log_dentry_safe(trans, root,
                                                    file->f_dentry);
                        if (ret == 0) {
-                                btrfs_sync_log(trans, root);
+                                ret = btrfs_sync_log(trans, root);
-                                btrfs_end_transaction(trans, root);
+                                if (ret == 0)
+                                        btrfs_end_transaction(trans, root);
+                                else
+                                        btrfs_commit_transaction(trans, root);
                        } else {
                                btrfs_commit_transaction(trans, root);
                        }
@@ -1185,6 +1208,18 @@ out_nolock:
 int btrfs_release_file(struct inode *inode, struct file *filp)
 {
+        /*
+         * ordered_data_close is set by settattr when we are about to truncate
+         * a file from a non-zero size to a zero size.  This tries to
+         * flush down new bytes that may have been written if the
+         * application were using truncate to replace a file in place.
+         */
+        if (BTRFS_I(inode)->ordered_data_close) {
+                BTRFS_I(inode)->ordered_data_close = 0;
+                btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
+                if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
+                        filemap_flush(inode->i_mapping);
+        }
        if (filp->private_data)
                btrfs_ioctl_trans_end(filp);
        return 0;
@@ -1260,8 +1295,11 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
        if (ret > 0) {
                ret = btrfs_commit_transaction(trans, root);
        } else {
-                btrfs_sync_log(trans, root);
+                ret = btrfs_sync_log(trans, root);
-                ret = btrfs_end_transaction(trans, root);
+                if (ret == 0)
+                        ret = btrfs_end_transaction(trans, root);
+                else
+                        ret = btrfs_commit_transaction(trans, root);
        }
        mutex_lock(&dentry->d_inode->i_mutex);
 out:
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 3d46fa1f29a4..6b627c611808 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -73,6 +73,8 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
+        path->leave_spinning = 1;
        ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
        if (ret > 0) {
                ret = -ENOENT;
@@ -127,6 +129,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
+        path->leave_spinning = 1;
        ret = btrfs_insert_empty_item(trans, root, path, &key,
                                      ins_len);
        if (ret == -EEXIST) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7d4f948bc22a..06d8db5afb08 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -134,6 +134,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
+        path->leave_spinning = 1;
        btrfs_set_trans_block_group(trans, inode);
        key.objectid = inode->i_ino;
@@ -167,9 +168,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
                        cur_size = min_t(unsigned long, compressed_size,
                                       PAGE_CACHE_SIZE);
-                        kaddr = kmap(cpage);
+                        kaddr = kmap_atomic(cpage, KM_USER0);
                        write_extent_buffer(leaf, kaddr, ptr, cur_size);
-                        kunmap(cpage);
+                        kunmap_atomic(kaddr, KM_USER0);
                        i++;
                        ptr += cur_size;
@@ -204,7 +205,7 @@ fail:
 * does the checks required to make sure the data is small enough
 * to fit as an inline extent.
 */
-static int cow_file_range_inline(struct btrfs_trans_handle *trans,
+static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
                                 struct inode *inode, u64 start, u64 end,
                                 size_t compressed_size,
@@ -854,11 +855,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
        u64 cur_end;
        int limit = 10 * 1024 * 1042;
-        if (!btrfs_test_opt(root, COMPRESS)) {
-                return cow_file_range(inode, locked_page, start, end,
-                                      page_started, nr_written, 1);
-        }
        clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED |
                         EXTENT_DELALLOC, 1, 0, GFP_NOFS);
        while (start < end) {
@@ -935,7 +931,8 @@ static noinline int csum_exist_in_range(struct btrfs_root *root,
 * If no cow copies or snapshots exist, we write directly to the existing
 * blocks on disk
 */
-static int run_delalloc_nocow(struct inode *inode, struct page *locked_page,
+static noinline int run_delalloc_nocow(struct inode *inode,
+                                       struct page *locked_page,
                              u64 start, u64 end, int *page_started, int force,
                              unsigned long *nr_written)
 {
@@ -1133,6 +1130,7 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
                              unsigned long *nr_written)
 {
        int ret;
+        struct btrfs_root *root = BTRFS_I(inode)->root;
        if (btrfs_test_flag(inode, NODATACOW))
                ret = run_delalloc_nocow(inode, locked_page, start, end,
@@ -1140,10 +1138,12 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
        else if (btrfs_test_flag(inode, PREALLOC))
                ret = run_delalloc_nocow(inode, locked_page, start, end,
                                         page_started, 0, nr_written);
+        else if (!btrfs_test_opt(root, COMPRESS))
+                ret = cow_file_range(inode, locked_page, start, end,
+                                      page_started, nr_written, 1);
        else
                ret = cow_file_range_async(inode, locked_page, start, end,
                                           page_started, nr_written);
        return ret;
 }
@@ -1453,6 +1453,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
        path = btrfs_alloc_path();
        BUG_ON(!path);
+        path->leave_spinning = 1;
        ret = btrfs_drop_extents(trans, root, inode, file_pos,
                                 file_pos + num_bytes, file_pos, &hint);
        BUG_ON(ret);
@@ -1475,6 +1476,10 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
        btrfs_set_file_extent_compression(leaf, fi, compression);
        btrfs_set_file_extent_encryption(leaf, fi, encryption);
        btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
+        btrfs_unlock_up_safe(path, 1);
+        btrfs_set_lock_blocking(leaf);
        btrfs_mark_buffer_dirty(leaf);
        inode_add_bytes(inode, num_bytes);
@@ -1487,11 +1492,35 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
                                          root->root_key.objectid,
                                          trans->transid, inode->i_ino, &ins);
        BUG_ON(ret);
        btrfs_free_path(path);
        return 0;
 }
+/*
+ * helper function for btrfs_finish_ordered_io, this
+ * just reads in some of the csum leaves to prime them into ram
+ * before we start the transaction.  It limits the amount of btree
+ * reads required while inside the transaction.
+ */
+static noinline void reada_csum(struct btrfs_root *root,
+                                struct btrfs_path *path,
+                                struct btrfs_ordered_extent *ordered_extent)
+{
+        struct btrfs_ordered_sum *sum;
+        u64 bytenr;
+        sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum,
+                         list);
+        bytenr = sum->sums[0].bytenr;
+        /*
+         * we don't care about the results, the point of this search is
+         * just to get the btree leaves into ram
+         */
+        btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0);
+}
 /* as ordered data IO finishes, this gets called so we can finish
 * an ordered extent if the range of bytes in the file it covers are
 * fully written.
@@ -1500,8 +1529,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
-        struct btrfs_ordered_extent *ordered_extent;
+        struct btrfs_ordered_extent *ordered_extent = NULL;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+        struct btrfs_path *path;
        int compressed = 0;
        int ret;
@@ -1509,9 +1539,33 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
        if (!ret)
                return 0;
+        /*
+         * before we join the transaction, try to do some of our IO.
+         * This will limit the amount of IO that we have to do with
+         * the transaction running.  We're unlikely to need to do any
+         * IO if the file extents are new, the disk_i_size checks
+         * covers the most common case.
+         */
+        if (start < BTRFS_I(inode)->disk_i_size) {
+                path = btrfs_alloc_path();
+                if (path) {
+                        ret = btrfs_lookup_file_extent(NULL, root, path,
+                                                       inode->i_ino,
+                                                       start, 0);
+                        ordered_extent = btrfs_lookup_ordered_extent(inode,
+                                                                     start);
+                        if (!list_empty(&ordered_extent->list)) {
+                                btrfs_release_path(root, path);
+                                reada_csum(root, path, ordered_extent);
+                        }
+                        btrfs_free_path(path);
+                }
+        }
        trans = btrfs_join_transaction(root, 1);
-        ordered_extent = btrfs_lookup_ordered_extent(inode, start);
+        if (!ordered_extent)
+                ordered_extent = btrfs_lookup_ordered_extent(inode, start);
        BUG_ON(!ordered_extent);
        if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
                goto nocow;
@@ -2101,6 +2155,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
        path = btrfs_alloc_path();
        BUG_ON(!path);
+        path->leave_spinning = 1;
        ret = btrfs_lookup_inode(trans, root, path,
                                 &BTRFS_I(inode)->location, 1);
        if (ret) {
@@ -2147,6 +2202,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
                goto err;
        }
+        path->leave_spinning = 1;
        di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
                                    name, name_len, -1);
        if (IS_ERR(di)) {
@@ -2190,8 +2246,6 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
        ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
                                         inode, dir->i_ino);
        BUG_ON(ret != 0 && ret != -ENOENT);
-        if (ret != -ENOENT)
-                BTRFS_I(dir)->log_dirty_trans = trans->transid;
        ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
                                           dir, index);
@@ -2224,6 +2278,9 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
        trans = btrfs_start_transaction(root, 1);
        btrfs_set_trans_block_group(trans, dir);
+        btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
        ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
                                 dentry->d_name.name, dentry->d_name.len);
@@ -2498,6 +2555,7 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
        key.type = (u8)-1;
 search_again:
+        path->leave_spinning = 1;
        ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
        if (ret < 0)
                goto error;
@@ -2644,6 +2702,7 @@ delete:
                        break;
                }
                if (found_extent) {
+                        btrfs_set_path_blocking(path);
                        ret = btrfs_free_extent(trans, root, extent_start,
                                                extent_num_bytes,
                                                leaf->start, root_owner,
@@ -2848,11 +2907,21 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
        if (err)
                return err;
-        if (S_ISREG(inode->i_mode) &&
+        if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
-            attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
+                if (attr->ia_size > inode->i_size) {
-                err = btrfs_cont_expand(inode, attr->ia_size);
+                        err = btrfs_cont_expand(inode, attr->ia_size);
-                if (err)
+                        if (err)
-                        return err;
+                                return err;
+                } else if (inode->i_size > 0 &&
+                           attr->ia_size == 0) {
+                        /* we're truncating a file that used to have good
+                         * data down to zero.  Make sure it gets into
+                         * the ordered flush list so that any new writes
+                         * get down to disk quickly.
+                         */
+                        BTRFS_I(inode)->ordered_data_close = 1;
+                }
        }
        err = inode_setattr(inode, attr);
@@ -2984,13 +3053,14 @@ static noinline void init_btrfs_i(struct inode *inode)
        bi->disk_i_size = 0;
        bi->flags = 0;
        bi->index_cnt = (u64)-1;
-        bi->log_dirty_trans = 0;
+        bi->last_unlink_trans = 0;
        extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
        extent_io_tree_init(&BTRFS_I(inode)->io_tree,
                             inode->i_mapping, GFP_NOFS);
        extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
                             inode->i_mapping, GFP_NOFS);
        INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
+        INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
        btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
        mutex_init(&BTRFS_I(inode)->extent_mutex);
        mutex_init(&BTRFS_I(inode)->log_mutex);
@@ -3449,6 +3519,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        sizes[0] = sizeof(struct btrfs_inode_item);
        sizes[1] = name_len + sizeof(*ref);
+        path->leave_spinning = 1;
        ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
        if (ret != 0)
                goto fail;
@@ -3727,6 +3798,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
                drop_inode = 1;
        nr = trans->blocks_used;
+        btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
        btrfs_end_transaction_throttle(trans, root);
 fail:
        if (drop_inode) {
@@ -4292,8 +4365,9 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
 * beyond EOF, then the page is guaranteed safe against truncation until we
 * unlock the page.
 */
-int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+        struct page *page = vmf->page;
        struct inode *inode = fdentry(vma->vm_file)->d_inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
@@ -4306,10 +4380,15 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
        u64 page_end;
        ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
-        if (ret)
+        if (ret) {
+                if (ret == -ENOMEM)
+                        ret = VM_FAULT_OOM;
+                else /* -ENOSPC, -EIO, etc */
+                        ret = VM_FAULT_SIGBUS;
                goto out;
+        }
-        ret = -EINVAL;
+        ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
 again:
        lock_page(page);
        size = i_size_read(inode);
@@ -4357,6 +4436,8 @@ again:
        }
        ClearPageChecked(page);
        set_page_dirty(page);
+        BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
        unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
 out_unlock:
@@ -4382,6 +4463,27 @@ static void btrfs_truncate(struct inode *inode)
        btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
        trans = btrfs_start_transaction(root, 1);
+        /*
+         * setattr is responsible for setting the ordered_data_close flag,
+         * but that is only tested during the last file release.  That
+         * could happen well after the next commit, leaving a great big
+         * window where new writes may get lost if someone chooses to write
+         * to this file after truncating to zero
+         *
+         * The inode doesn't have any dirty data here, and so if we commit
+         * this is a noop.  If someone immediately starts writing to the inode
+         * it is very likely we'll catch some of their writes in this
+         * transaction, and the commit will find this file on the ordered
+         * data list with good things to send down.
+         *
+         * This is a best effort solution, there is still a window where
+         * using truncate to replace the contents of the file will
+         * end up with a zero length file after a crash.
+         */
+        if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
+                btrfs_add_ordered_operation(trans, root, inode);
        btrfs_set_trans_block_group(trans, inode);
        btrfs_i_size_write(inode, inode->i_size);
@@ -4458,12 +4560,15 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        ei->i_acl = BTRFS_ACL_NOT_CACHED;
        ei->i_default_acl = BTRFS_ACL_NOT_CACHED;
        INIT_LIST_HEAD(&ei->i_orphan);
+        INIT_LIST_HEAD(&ei->ordered_operations);
        return &ei->vfs_inode;
 }
 void btrfs_destroy_inode(struct inode *inode)
 {
        struct btrfs_ordered_extent *ordered;
+        struct btrfs_root *root = BTRFS_I(inode)->root;
        WARN_ON(!list_empty(&inode->i_dentry));
        WARN_ON(inode->i_data.nrpages);
@@ -4474,13 +4579,24 @@ void btrfs_destroy_inode(struct inode *inode)
            BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED)
                posix_acl_release(BTRFS_I(inode)->i_default_acl);
-        spin_lock(&BTRFS_I(inode)->root->list_lock);
+        /*
+         * Make sure we're properly removed from the ordered operation
+         * lists.
+         */
+        smp_mb();
+        if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
+                spin_lock(&root->fs_info->ordered_extent_lock);
+                list_del_init(&BTRFS_I(inode)->ordered_operations);
+                spin_unlock(&root->fs_info->ordered_extent_lock);
+        }
+        spin_lock(&root->list_lock);
        if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
                printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan"
                       " list\n", inode->i_ino);
                dump_stack();
        }
-        spin_unlock(&BTRFS_I(inode)->root->list_lock);
+        spin_unlock(&root->list_lock);
        while (1) {
                ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
@@ -4605,8 +4721,36 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (ret)
                goto out_unlock;
+        /*
+         * we're using rename to replace one file with another.
+         * and the replacement file is large.  Start IO on it now so
+         * we don't add too much work to the end of the transaction
+         */
+        if (new_inode && old_inode && S_ISREG(old_inode->i_mode) &&
+            new_inode->i_size &&
+            old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
+                filemap_flush(old_inode->i_mapping);
        trans = btrfs_start_transaction(root, 1);
+        /*
+         * make sure the inode gets flushed if it is replacing
+         * something.
+         */
+        if (new_inode && new_inode->i_size &&
+            old_inode && S_ISREG(old_inode->i_mode)) {
+                btrfs_add_ordered_operation(trans, root, old_inode);
+        }
+        /*
+         * this is an ugly little race, but the rename is required to make
+         * sure that if we crash, the inode is either at the old name
+         * or the new one.  pinning the log transaction lets us make sure
+         * we don't allow a log commit to come in after we unlink the
+         * name but before we add the new name back in.
+         */
+        btrfs_pin_log_trans(root);
        btrfs_set_trans_block_group(trans, new_dir);
        btrfs_inc_nlink(old_dentry->d_inode);
@@ -4614,6 +4758,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        new_dir->i_ctime = new_dir->i_mtime = ctime;
        old_inode->i_ctime = ctime;
+        if (old_dentry->d_parent != new_dentry->d_parent)
+                btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
        ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode,
                                 old_dentry->d_name.name,
                                 old_dentry->d_name.len);
@@ -4645,7 +4792,14 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (ret)
                goto out_fail;
+        btrfs_log_new_name(trans, old_inode, old_dir,
+                                       new_dentry->d_parent);
 out_fail:
+        /* this btrfs_end_log_trans just allows the current
+         * log-sub transaction to complete
+         */
+        btrfs_end_log_trans(root);
        btrfs_end_transaction_throttle(trans, root);
 out_unlock:
        return ret;
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 47b0a88c12a2..a5310c0f41e2 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -71,12 +71,13 @@ void btrfs_clear_lock_blocking(struct extent_buffer *eb)
 static int btrfs_spin_on_block(struct extent_buffer *eb)
 {
        int i;
        for (i = 0; i < 512; i++) {
-                cpu_relax();
                if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
                        return 1;
                if (need_resched())
                        break;
+                cpu_relax();
        }
        return 0;
 }
@@ -95,13 +96,15 @@ int btrfs_try_spin_lock(struct extent_buffer *eb)
 {
        int i;
-        spin_nested(eb);
+        if (btrfs_spin_on_block(eb)) {
-        if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
+                spin_nested(eb);
-                return 1;
+                if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-        spin_unlock(&eb->lock);
+                        return 1;
+                spin_unlock(&eb->lock);
+        }
        /* spin for a bit on the BLOCKING flag */
        for (i = 0; i < 2; i++) {
+                cpu_relax();
                if (!btrfs_spin_on_block(eb))
                        break;
@@ -148,6 +151,9 @@ int btrfs_tree_lock(struct extent_buffer *eb)
        DEFINE_WAIT(wait);
        wait.func = btrfs_wake_function;
+        if (!btrfs_spin_on_block(eb))
+                goto sleep;
        while(1) {
                spin_nested(eb);
@@ -165,9 +171,10 @@ int btrfs_tree_lock(struct extent_buffer *eb)
                 * spin for a bit, and if the blocking flag goes away,
                 * loop around
                 */
+                cpu_relax();
                if (btrfs_spin_on_block(eb))
                        continue;
+sleep:
                prepare_to_wait_exclusive(&eb->lock_wq, &wait,
                                          TASK_UNINTERRUPTIBLE);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 77c2411a5f0f..53c87b197d70 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -310,6 +310,16 @@ int btrfs_remove_ordered_extent(struct inode *inode,
        spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
        list_del_init(&entry->root_extent_list);
+        /*
+         * we have no more ordered extents for this inode and
+         * no dirty pages.  We can safely remove it from the
+         * list of ordered extents
+         */
+        if (RB_EMPTY_ROOT(&tree->tree) &&
+            !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
+                list_del_init(&BTRFS_I(inode)->ordered_operations);
+        }
        spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
        mutex_unlock(&tree->mutex);
@@ -370,6 +380,68 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
 }
 /*
+ * this is used during transaction commit to write all the inodes
+ * added to the ordered operation list.  These files must be fully on
+ * disk before the transaction commits.
+ *
+ * we have two modes here, one is to just start the IO via filemap_flush
+ * and the other is to wait for all the io.  When we wait, we have an
+ * extra check to make sure the ordered operation list really is empty
+ * before we return
+ */
+int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
+{
+        struct btrfs_inode *btrfs_inode;
+        struct inode *inode;
+        struct list_head splice;
+        INIT_LIST_HEAD(&splice);
+        mutex_lock(&root->fs_info->ordered_operations_mutex);
+        spin_lock(&root->fs_info->ordered_extent_lock);
+again:
+        list_splice_init(&root->fs_info->ordered_operations, &splice);
+        while (!list_empty(&splice)) {
+                btrfs_inode = list_entry(splice.next, struct btrfs_inode,
+                                   ordered_operations);
+                inode = &btrfs_inode->vfs_inode;
+                list_del_init(&btrfs_inode->ordered_operations);
+                /*
+                 * the inode may be getting freed (in sys_unlink path).
+                 */
+                inode = igrab(inode);
+                if (!wait && inode) {
+                        list_add_tail(&BTRFS_I(inode)->ordered_operations,
+                              &root->fs_info->ordered_operations);
+                }
+                spin_unlock(&root->fs_info->ordered_extent_lock);
+                if (inode) {
+                        if (wait)
+                                btrfs_wait_ordered_range(inode, 0, (u64)-1);
+                        else
+                                filemap_flush(inode->i_mapping);
+                        iput(inode);
+                }
+                cond_resched();
+                spin_lock(&root->fs_info->ordered_extent_lock);
+        }
+        if (wait && !list_empty(&root->fs_info->ordered_operations))
+                goto again;
+        spin_unlock(&root->fs_info->ordered_extent_lock);
+        mutex_unlock(&root->fs_info->ordered_operations_mutex);
+        return 0;
+}
+/*
 * Used to start IO or wait for a given ordered extent to finish.
 *
 * If wait is one, this effectively waits on page writeback for all the pages
@@ -726,3 +798,49 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
        return ret;
 }
+/*
+ * add a given inode to the list of inodes that must be fully on
+ * disk before a transaction commit finishes.
+ *
+ * This basically gives us the ext3 style data=ordered mode, and it is mostly
+ * used to make sure renamed files are fully on disk.
+ *
+ * It is a noop if the inode is already fully on disk.
+ *
+ * If trans is not null, we'll do a friendly check for a transaction that
+ * is already flushing things and force the IO down ourselves.
+ */
+int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
+                                struct btrfs_root *root,
+                                struct inode *inode)
+{
+        u64 last_mod;
+        last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
+        /*
+         * if this file hasn't been changed since the last transaction
+         * commit, we can safely return without doing anything
+         */
+        if (last_mod < root->fs_info->last_trans_committed)
+                return 0;
+        /*
+         * the transaction is already committing.  Just start the IO and
+         * don't bother with all of this list nonsense
+         */
+        if (trans && root->fs_info->running_transaction->blocked) {
+                btrfs_wait_ordered_range(inode, 0, (u64)-1);
+                return 0;
+        }
+        spin_lock(&root->fs_info->ordered_extent_lock);
+        if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
+                list_add_tail(&BTRFS_I(inode)->ordered_operations,
+                              &root->fs_info->ordered_operations);
+        }
+        spin_unlock(&root->fs_info->ordered_extent_lock);
+        return 0;
+}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index ab66d5e8d6d6..3d31c8827b01 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -155,4 +155,8 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
 int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
                           loff_t end, int sync_mode);
 int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
+int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
+int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
+                                struct btrfs_root *root,
+                                struct inode *inode);
 #endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 4112d53d4f4d..664782c6a2df 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -65,6 +65,15 @@ static noinline int join_transaction(struct btrfs_root *root)
                cur_trans->use_count = 1;
                cur_trans->commit_done = 0;
                cur_trans->start_time = get_seconds();
+                cur_trans->delayed_refs.root.rb_node = NULL;
+                cur_trans->delayed_refs.num_entries = 0;
+                cur_trans->delayed_refs.num_heads_ready = 0;
+                cur_trans->delayed_refs.num_heads = 0;
+                cur_trans->delayed_refs.flushing = 0;
+                cur_trans->delayed_refs.run_delayed_start = 0;
+                spin_lock_init(&cur_trans->delayed_refs.lock);
                INIT_LIST_HEAD(&cur_trans->pending_snapshots);
                list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
                extent_io_tree_init(&cur_trans->dirty_pages,
@@ -182,6 +191,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
        h->block_group = 0;
        h->alloc_exclude_nr = 0;
        h->alloc_exclude_start = 0;
+        h->delayed_ref_updates = 0;
        root->fs_info->running_transaction->use_count++;
        mutex_unlock(&root->fs_info->trans_mutex);
        return h;
@@ -271,7 +282,6 @@ void btrfs_throttle(struct btrfs_root *root)
        if (!root->fs_info->open_ioctl_trans)
                wait_current_trans(root);
        mutex_unlock(&root->fs_info->trans_mutex);
        throttle_on_drops(root);
 }
@@ -280,6 +290,27 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 {
        struct btrfs_transaction *cur_trans;
        struct btrfs_fs_info *info = root->fs_info;
+        int count = 0;
+        while (count < 4) {
+                unsigned long cur = trans->delayed_ref_updates;
+                trans->delayed_ref_updates = 0;
+                if (cur &&
+                    trans->transaction->delayed_refs.num_heads_ready > 64) {
+                        trans->delayed_ref_updates = 0;
+                        /*
+                         * do a full flush if the transaction is trying
+                         * to close
+                         */
+                        if (trans->transaction->delayed_refs.flushing)
+                                cur = 0;
+                        btrfs_run_delayed_refs(trans, root, cur);
+                } else {
+                        break;
+                }
+                count++;
+        }
        mutex_lock(&info->trans_mutex);
        cur_trans = info->running_transaction;
@@ -424,9 +455,10 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
        u64 old_root_bytenr;
        struct btrfs_root *tree_root = root->fs_info->tree_root;
-        btrfs_extent_post_op(trans, root);
        btrfs_write_dirty_block_groups(trans, root);
-        btrfs_extent_post_op(trans, root);
+        ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+        BUG_ON(ret);
        while (1) {
                old_root_bytenr = btrfs_root_bytenr(&root->root_item);
@@ -438,14 +470,14 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
                                     btrfs_header_level(root->node));
                btrfs_set_root_generation(&root->root_item, trans->transid);
-                btrfs_extent_post_op(trans, root);
                ret = btrfs_update_root(trans, tree_root,
                                        &root->root_key,
                                        &root->root_item);
                BUG_ON(ret);
                btrfs_write_dirty_block_groups(trans, root);
-                btrfs_extent_post_op(trans, root);
+                ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+                BUG_ON(ret);
        }
        return 0;
 }
@@ -459,15 +491,18 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct list_head *next;
        struct extent_buffer *eb;
+        int ret;
-        btrfs_extent_post_op(trans, fs_info->tree_root);
+        ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+        BUG_ON(ret);
        eb = btrfs_lock_root_node(fs_info->tree_root);
-        btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb, 0);
+        btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb);
        btrfs_tree_unlock(eb);
        free_extent_buffer(eb);
-        btrfs_extent_post_op(trans, fs_info->tree_root);
+        ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+        BUG_ON(ret);
        while (!list_empty(&fs_info->dirty_cowonly_roots)) {
                next = fs_info->dirty_cowonly_roots.next;
@@ -475,6 +510,9 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
                root = list_entry(next, struct btrfs_root, dirty_list);
                update_cowonly_root(trans, root);
+                ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+                BUG_ON(ret);
        }
        return 0;
 }
@@ -635,6 +673,31 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
 }
 /*
+ * when dropping snapshots, we generate a ton of delayed refs, and it makes
+ * sense not to join the transaction while it is trying to flush the current
+ * queue of delayed refs out.
+ *
+ * This is used by the drop snapshot code only
+ */
+static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
+{
+        DEFINE_WAIT(wait);
+        mutex_lock(&info->trans_mutex);
+        while (info->running_transaction &&
+               info->running_transaction->delayed_refs.flushing) {
+                prepare_to_wait(&info->transaction_wait, &wait,
+                                TASK_UNINTERRUPTIBLE);
+                mutex_unlock(&info->trans_mutex);
+                schedule();
+                mutex_lock(&info->trans_mutex);
+                finish_wait(&info->transaction_wait, &wait);
+        }
+        mutex_unlock(&info->trans_mutex);
+        return 0;
+}
+/*
 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
 * all of them
 */
@@ -661,7 +724,22 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
                atomic_inc(&root->fs_info->throttles);
                while (1) {
+                        /*
+                         * we don't want to jump in and create a bunch of
+                         * delayed refs if the transaction is starting to close
+                         */
+                        wait_transaction_pre_flush(tree_root->fs_info);
                        trans = btrfs_start_transaction(tree_root, 1);
+                        /*
+                         * we've joined a transaction, make sure it isn't
+                         * closing right now
+                         */
+                        if (trans->transaction->delayed_refs.flushing) {
+                                btrfs_end_transaction(trans, tree_root);
+                                continue;
+                        }
                        mutex_lock(&root->fs_info->drop_mutex);
                        ret = btrfs_drop_snapshot(trans, dirty->root);
                        if (ret != -EAGAIN)
@@ -766,7 +844,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
        old = btrfs_lock_root_node(root);
-        btrfs_cow_block(trans, root, old, NULL, 0, &old, 0);
+        btrfs_cow_block(trans, root, old, NULL, 0, &old);
        btrfs_copy_root(trans, root, old, &tmp, objectid);
        btrfs_tree_unlock(old);
@@ -894,12 +972,31 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        struct extent_io_tree *pinned_copy;
        DEFINE_WAIT(wait);
        int ret;
+        int should_grow = 0;
+        unsigned long now = get_seconds();
+        btrfs_run_ordered_operations(root, 0);
+        /* make a pass through all the delayed refs we have so far
+         * any runnings procs may add more while we are here
+         */
+        ret = btrfs_run_delayed_refs(trans, root, 0);
+        BUG_ON(ret);
+        cur_trans = trans->transaction;
+        /*
+         * set the flushing flag so procs in this transaction have to
+         * start sending their work down.
+         */
+        cur_trans->delayed_refs.flushing = 1;
+        ret = btrfs_run_delayed_refs(trans, root, 0);
+        BUG_ON(ret);
-        INIT_LIST_HEAD(&dirty_fs_roots);
        mutex_lock(&root->fs_info->trans_mutex);
-        if (trans->transaction->in_commit) {
+        INIT_LIST_HEAD(&dirty_fs_roots);
-                cur_trans = trans->transaction;
+        if (cur_trans->in_commit) {
-                trans->transaction->use_count++;
+                cur_trans->use_count++;
                mutex_unlock(&root->fs_info->trans_mutex);
                btrfs_end_transaction(trans, root);
@@ -922,7 +1019,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        trans->transaction->in_commit = 1;
        trans->transaction->blocked = 1;
-        cur_trans = trans->transaction;
        if (cur_trans->list.prev != &root->fs_info->trans_list) {
                prev_trans = list_entry(cur_trans->list.prev,
                                        struct btrfs_transaction, list);
@@ -937,6 +1033,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                }
        }
+        if (now < cur_trans->start_time || now - cur_trans->start_time < 1)
+                should_grow = 1;
        do {
                int snap_pending = 0;
                joined = cur_trans->num_joined;
@@ -949,7 +1048,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                if (cur_trans->num_writers > 1)
                        timeout = MAX_SCHEDULE_TIMEOUT;
-                else
+                else if (should_grow)
                        timeout = 1;
                mutex_unlock(&root->fs_info->trans_mutex);
@@ -959,16 +1058,30 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                        BUG_ON(ret);
                }
-                schedule_timeout(timeout);
+                /*
+                 * rename don't use btrfs_join_transaction, so, once we
+                 * set the transaction to blocked above, we aren't going
+                 * to get any new ordered operations.  We can safely run
+                 * it here and no for sure that nothing new will be added
+                 * to the list
+                 */
+                btrfs_run_ordered_operations(root, 1);
+                smp_mb();
+                if (cur_trans->num_writers > 1 || should_grow)
+                        schedule_timeout(timeout);
                mutex_lock(&root->fs_info->trans_mutex);
                finish_wait(&cur_trans->writer_wait, &wait);
        } while (cur_trans->num_writers > 1 ||
-                 (cur_trans->num_joined != joined));
+                 (should_grow && cur_trans->num_joined != joined));
        ret = create_pending_snapshots(trans, root->fs_info);
        BUG_ON(ret);
+        ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+        BUG_ON(ret);
        WARN_ON(cur_trans != trans->transaction);
        /* btrfs_commit_tree_roots is responsible for getting the
@@ -1032,6 +1145,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        btrfs_copy_pinned(root, pinned_copy);
        trans->transaction->blocked = 0;
        wake_up(&root->fs_info->transaction_throttle);
        wake_up(&root->fs_info->transaction_wait);
@@ -1058,6 +1172,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        mutex_lock(&root->fs_info->trans_mutex);
        cur_trans->commit_done = 1;
        root->fs_info->last_trans_committed = cur_trans->transid;
        wake_up(&cur_trans->commit_wait);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index ea292117f882..94f5bde2b58d 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -19,10 +19,16 @@
 #ifndef __BTRFS_TRANSACTION__
 #define __BTRFS_TRANSACTION__
 #include "btrfs_inode.h"
+#include "delayed-ref.h"
 struct btrfs_transaction {
        u64 transid;
+        /*
+         * total writers in this transaction, it must be zero before the
+         * transaction can end
+         */
        unsigned long num_writers;
        unsigned long num_joined;
        int in_commit;
        int use_count;
@@ -34,6 +40,7 @@ struct btrfs_transaction {
        wait_queue_head_t writer_wait;
        wait_queue_head_t commit_wait;
        struct list_head pending_snapshots;
+        struct btrfs_delayed_ref_root delayed_refs;
 };
 struct btrfs_trans_handle {
@@ -44,6 +51,7 @@ struct btrfs_trans_handle {
        u64 block_group;
        u64 alloc_exclude_start;
        u64 alloc_exclude_nr;
+        unsigned long delayed_ref_updates;
 };
 struct btrfs_pending_snapshot {
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index 98d25fa4570e..b10eacdb1620 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -124,8 +124,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
        }
        btrfs_release_path(root, path);
-        if (is_extent)
-                btrfs_extent_post_op(trans, root);
 out:
        if (path)
                btrfs_free_path(path);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9c462fbd60fa..fc9b87a7975b 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -35,6 +35,49 @@
 #define LOG_INODE_EXISTS 1
 /*
+ * directory trouble cases
+ *
+ * 1) on rename or unlink, if the inode being unlinked isn't in the fsync
+ * log, we must force a full commit before doing an fsync of the directory
+ * where the unlink was done.
+ * ---> record transid of last unlink/rename per directory
+ *
+ * mkdir foo/some_dir
+ * normal commit
+ * rename foo/some_dir foo2/some_dir
+ * mkdir foo/some_dir
+ * fsync foo/some_dir/some_file
+ *
+ * The fsync above will unlink the original some_dir without recording
+ * it in its new location (foo2).  After a crash, some_dir will be gone
+ * unless the fsync of some_file forces a full commit
+ *
+ * 2) we must log any new names for any file or dir that is in the fsync
+ * log. ---> check inode while renaming/linking.
+ *
+ * 2a) we must log any new names for any file or dir during rename
+ * when the directory they are being removed from was logged.
+ * ---> check inode and old parent dir during rename
+ *
+ *  2a is actually the more important variant.  With the extra logging
+ *  a crash might unlink the old name without recreating the new one
+ *
+ * 3) after a crash, we must go through any directories with a link count
+ * of zero and redo the rm -rf
+ *
+ * mkdir f1/foo
+ * normal commit
+ * rm -rf f1/foo
+ * fsync(f1)
+ *
+ * The directory f1 was fully removed from the FS, but fsync was never
+ * called on f1, only its parent dir.  After a crash the rm -rf must
+ * be replayed.  This must be able to recurse down the entire
+ * directory tree.  The inode link count fixup code takes care of the
+ * ugly details.
+ */
+/*
 * stages for the tree walking.  The first
 * stage (0) is to only pin down the blocks we find
 * the second stage (1) is to make sure that all the inodes
@@ -47,12 +90,17 @@
 #define LOG_WALK_REPLAY_INODES 1
 #define LOG_WALK_REPLAY_ALL 2
-static int __btrfs_log_inode(struct btrfs_trans_handle *trans,
+static int btrfs_log_inode(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root, struct inode *inode,
                             int inode_only);
 static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
                             struct btrfs_path *path, u64 objectid);
+static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
+                                       struct btrfs_root *root,
+                                       struct btrfs_root *log,
+                                       struct btrfs_path *path,
+                                       u64 dirid, int del_all);
 /*
 * tree logging is a special write ahead log used to make sure that
@@ -133,10 +181,25 @@ static int join_running_log_trans(struct btrfs_root *root)
 }
 /*
+ * This either makes the current running log transaction wait
+ * until you call btrfs_end_log_trans() or it makes any future
+ * log transactions wait until you call btrfs_end_log_trans()
+ */
+int btrfs_pin_log_trans(struct btrfs_root *root)
+{
+        int ret = -ENOENT;
+        mutex_lock(&root->log_mutex);
+        atomic_inc(&root->log_writers);
+        mutex_unlock(&root->log_mutex);
+        return ret;
+}
+/*
 * indicate we're done making changes to the log tree
 * and wake up anyone waiting to do a sync
 */
-static int end_log_trans(struct btrfs_root *root)
+int btrfs_end_log_trans(struct btrfs_root *root)
 {
        if (atomic_dec_and_test(&root->log_writers)) {
                smp_mb();
@@ -203,7 +266,6 @@ static int process_one_buffer(struct btrfs_root *log,
                mutex_lock(&log->fs_info->pinned_mutex);
                btrfs_update_pinned_extents(log->fs_info->extent_root,
                                            eb->start, eb->len, 1);
-                mutex_unlock(&log->fs_info->pinned_mutex);
        }
        if (btrfs_buffer_uptodate(eb, gen)) {
@@ -603,6 +665,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
        ret = link_to_fixup_dir(trans, root, path, location.objectid);
        BUG_ON(ret);
        ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
        BUG_ON(ret);
        kfree(name);
@@ -804,6 +867,7 @@ conflict_again:
                                            victim_name_len)) {
                                btrfs_inc_nlink(inode);
                                btrfs_release_path(root, path);
                                ret = btrfs_unlink_inode(trans, root, dir,
                                                         inode, victim_name,
                                                         victim_name_len);
@@ -922,13 +986,20 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
                key.offset--;
                btrfs_release_path(root, path);
        }
-        btrfs_free_path(path);
+        btrfs_release_path(root, path);
        if (nlink != inode->i_nlink) {
                inode->i_nlink = nlink;
                btrfs_update_inode(trans, root, inode);
        }
        BTRFS_I(inode)->index_cnt = (u64)-1;
+        if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) {
+                ret = replay_dir_deletes(trans, root, NULL, path,
+                                         inode->i_ino, 1);
+                BUG_ON(ret);
+        }
+        btrfs_free_path(path);
        return 0;
 }
@@ -971,9 +1042,12 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
                iput(inode);
-                if (key.offset == 0)
+                /*
-                        break;
+                 * fixup on a directory may create new entries,
-                key.offset--;
+                 * make sure we always look for the highset possible
+                 * offset
+                 */
+                key.offset = (u64)-1;
        }
        btrfs_release_path(root, path);
        return 0;
@@ -1313,11 +1387,11 @@ again:
                read_extent_buffer(eb, name, (unsigned long)(di + 1),
                                  name_len);
                log_di = NULL;
-                if (dir_key->type == BTRFS_DIR_ITEM_KEY) {
+                if (log && dir_key->type == BTRFS_DIR_ITEM_KEY) {
                        log_di = btrfs_lookup_dir_item(trans, log, log_path,
                                                       dir_key->objectid,
                                                       name, name_len, 0);
-                } else if (dir_key->type == BTRFS_DIR_INDEX_KEY) {
+                } else if (log && dir_key->type == BTRFS_DIR_INDEX_KEY) {
                        log_di = btrfs_lookup_dir_index_item(trans, log,
                                                     log_path,
                                                     dir_key->objectid,
@@ -1378,7 +1452,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
                                       struct btrfs_root *root,
                                       struct btrfs_root *log,
                                       struct btrfs_path *path,
-                                       u64 dirid)
+                                       u64 dirid, int del_all)
 {
        u64 range_start;
        u64 range_end;
@@ -1408,10 +1482,14 @@ again:
        range_start = 0;
        range_end = 0;
        while (1) {
-                ret = find_dir_range(log, path, dirid, key_type,
+                if (del_all)
-                                     &range_start, &range_end);
+                        range_end = (u64)-1;
-                if (ret != 0)
+                else {
-                        break;
+                        ret = find_dir_range(log, path, dirid, key_type,
+                                             &range_start, &range_end);
+                        if (ret != 0)
+                                break;
+                }
                dir_key.offset = range_start;
                while (1) {
@@ -1437,7 +1515,8 @@ again:
                                break;
                        ret = check_item_in_log(trans, root, log, path,
-                                                log_path, dir, &found_key);
+                                                log_path, dir,
+                                                &found_key);
                        BUG_ON(ret);
                        if (found_key.offset == (u64)-1)
                                break;
@@ -1514,7 +1593,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
                        mode = btrfs_inode_mode(eb, inode_item);
                        if (S_ISDIR(mode)) {
                                ret = replay_dir_deletes(wc->trans,
-                                         root, log, path, key.objectid);
+                                         root, log, path, key.objectid, 0);
                                BUG_ON(ret);
                        }
                        ret = overwrite_item(wc->trans, root, path,
@@ -1533,6 +1612,17 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
                                        root, inode, inode->i_size,
                                        BTRFS_EXTENT_DATA_KEY);
                                BUG_ON(ret);
+                                /* if the nlink count is zero here, the iput
+                                 * will free the inode.  We bump it to make
+                                 * sure it doesn't get freed until the link
+                                 * count fixup is done
+                                 */
+                                if (inode->i_nlink == 0) {
+                                        btrfs_inc_nlink(inode);
+                                        btrfs_update_inode(wc->trans,
+                                                           root, inode);
+                                }
                                iput(inode);
                        }
                        ret = link_to_fixup_dir(wc->trans, root,
@@ -1840,7 +1930,8 @@ static int update_log_root(struct btrfs_trans_handle *trans,
        return ret;
 }
-static int wait_log_commit(struct btrfs_root *root, unsigned long transid)
+static int wait_log_commit(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root, unsigned long transid)
 {
        DEFINE_WAIT(wait);
        int index = transid % 2;
@@ -1854,9 +1945,12 @@ static int wait_log_commit(struct btrfs_root *root, unsigned long transid)
                prepare_to_wait(&root->log_commit_wait[index],
                                &wait, TASK_UNINTERRUPTIBLE);
                mutex_unlock(&root->log_mutex);
-                if (root->log_transid < transid + 2 &&
+                if (root->fs_info->last_trans_log_full_commit !=
+                    trans->transid && root->log_transid < transid + 2 &&
                    atomic_read(&root->log_commit[index]))
                        schedule();
                finish_wait(&root->log_commit_wait[index], &wait);
                mutex_lock(&root->log_mutex);
        } while (root->log_transid < transid + 2 &&
@@ -1864,14 +1958,16 @@ static int wait_log_commit(struct btrfs_root *root, unsigned long transid)
        return 0;
 }
-static int wait_for_writer(struct btrfs_root *root)
+static int wait_for_writer(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root)
 {
        DEFINE_WAIT(wait);
        while (atomic_read(&root->log_writers)) {
                prepare_to_wait(&root->log_writer_wait,
                                &wait, TASK_UNINTERRUPTIBLE);
                mutex_unlock(&root->log_mutex);
-                if (atomic_read(&root->log_writers))
+                if (root->fs_info->last_trans_log_full_commit !=
+                    trans->transid && atomic_read(&root->log_writers))
                        schedule();
                mutex_lock(&root->log_mutex);
                finish_wait(&root->log_writer_wait, &wait);
@@ -1882,7 +1978,14 @@ static int wait_for_writer(struct btrfs_root *root)
 /*
 * btrfs_sync_log does sends a given tree log down to the disk and
 * updates the super blocks to record it.  When this call is done,
- * you know that any inodes previously logged are safely on disk
+ * you know that any inodes previously logged are safely on disk only
+ * if it returns 0.
+ *
+ * Any other return value means you need to call btrfs_commit_transaction.
+ * Some of the edge cases for fsyncing directories that have had unlinks
+ * or renames done in the past mean that sometimes the only safe
+ * fsync is to commit the whole FS.  When btrfs_sync_log returns -EAGAIN,
+ * that has happened.
 */
 int btrfs_sync_log(struct btrfs_trans_handle *trans,
                   struct btrfs_root *root)
@@ -1896,7 +1999,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        mutex_lock(&root->log_mutex);
        index1 = root->log_transid % 2;
        if (atomic_read(&root->log_commit[index1])) {
-                wait_log_commit(root, root->log_transid);
+                wait_log_commit(trans, root, root->log_transid);
                mutex_unlock(&root->log_mutex);
                return 0;
        }
@@ -1904,18 +2007,26 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        /* wait for previous tree log sync to complete */
        if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
-                wait_log_commit(root, root->log_transid - 1);
+                wait_log_commit(trans, root, root->log_transid - 1);
        while (1) {
                unsigned long batch = root->log_batch;
                mutex_unlock(&root->log_mutex);
                schedule_timeout_uninterruptible(1);
                mutex_lock(&root->log_mutex);
-                wait_for_writer(root);
+                wait_for_writer(trans, root);
                if (batch == root->log_batch)
                        break;
        }
+        /* bail out if we need to do a full commit */
+        if (root->fs_info->last_trans_log_full_commit == trans->transid) {
+                ret = -EAGAIN;
+                mutex_unlock(&root->log_mutex);
+                goto out;
+        }
        ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages);
        BUG_ON(ret);
@@ -1951,16 +2062,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        index2 = log_root_tree->log_transid % 2;
        if (atomic_read(&log_root_tree->log_commit[index2])) {
-                wait_log_commit(log_root_tree, log_root_tree->log_transid);
+                wait_log_commit(trans, log_root_tree,
+                                log_root_tree->log_transid);
                mutex_unlock(&log_root_tree->log_mutex);
                goto out;
        }
        atomic_set(&log_root_tree->log_commit[index2], 1);
-        if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2]))
+        if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) {
-                wait_log_commit(log_root_tree, log_root_tree->log_transid - 1);
+                wait_log_commit(trans, log_root_tree,
+                                log_root_tree->log_transid - 1);
+        }
+        wait_for_writer(trans, log_root_tree);
-        wait_for_writer(log_root_tree);
+        /*
+         * now that we've moved on to the tree of log tree roots,
+         * check the full commit flag again
+         */
+        if (root->fs_info->last_trans_log_full_commit == trans->transid) {
+                mutex_unlock(&log_root_tree->log_mutex);
+                ret = -EAGAIN;
+                goto out_wake_log_root;
+        }
        ret = btrfs_write_and_wait_marked_extents(log_root_tree,
                                &log_root_tree->dirty_log_pages);
@@ -1985,7 +2109,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
         * in and cause problems either.
         */
        write_ctree_super(trans, root->fs_info->tree_root, 2);
+        ret = 0;
+out_wake_log_root:
        atomic_set(&log_root_tree->log_commit[index2], 0);
        smp_mb();
        if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
@@ -1998,7 +2124,8 @@ out:
        return 0;
 }
-/* * free all the extents used by the tree log.  This should be called
+/*
+ * free all the extents used by the tree log.  This should be called
 * at commit time of the full transaction
 */
 int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
@@ -2132,7 +2259,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
        btrfs_free_path(path);
        mutex_unlock(&BTRFS_I(dir)->log_mutex);
-        end_log_trans(root);
+        btrfs_end_log_trans(root);
        return 0;
 }
@@ -2159,7 +2286,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
        ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino,
                                  dirid, &index);
        mutex_unlock(&BTRFS_I(inode)->log_mutex);
-        end_log_trans(root);
+        btrfs_end_log_trans(root);
        return ret;
 }
@@ -2559,7 +2686,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 *
 * This handles both files and directories.
 */
-static int __btrfs_log_inode(struct btrfs_trans_handle *trans,
+static int btrfs_log_inode(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root, struct inode *inode,
                             int inode_only)
 {
@@ -2585,28 +2712,17 @@ static int __btrfs_log_inode(struct btrfs_trans_handle *trans,
        min_key.offset = 0;
        max_key.objectid = inode->i_ino;
+        /* today the code can only do partial logging of directories */
+        if (!S_ISDIR(inode->i_mode))
+            inode_only = LOG_INODE_ALL;
        if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode))
                max_key.type = BTRFS_XATTR_ITEM_KEY;
        else
                max_key.type = (u8)-1;
        max_key.offset = (u64)-1;
-        /*
-         * if this inode has already been logged and we're in inode_only
-         * mode, we don't want to delete the things that have already
-         * been written to the log.
-         *
-         * But, if the inode has been through an inode_only log,
-         * the logged_trans field is not set.  This allows us to catch
-         * any new names for this inode in the backrefs by logging it
-         * again
-         */
-        if (inode_only == LOG_INODE_EXISTS &&
-            BTRFS_I(inode)->logged_trans == trans->transid) {
-                btrfs_free_path(path);
-                btrfs_free_path(dst_path);
-                goto out;
-        }
        mutex_lock(&BTRFS_I(inode)->log_mutex);
        /*
@@ -2693,7 +2809,6 @@ next_slot:
        if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
                btrfs_release_path(root, path);
                btrfs_release_path(log, dst_path);
-                BTRFS_I(inode)->log_dirty_trans = 0;
                ret = log_directory_changes(trans, root, inode, path, dst_path);
                BUG_ON(ret);
        }
@@ -2702,19 +2817,69 @@ next_slot:
        btrfs_free_path(path);
        btrfs_free_path(dst_path);
-out:
        return 0;
 }
-int btrfs_log_inode(struct btrfs_trans_handle *trans,
+/*
-                    struct btrfs_root *root, struct inode *inode,
+ * follow the dentry parent pointers up the chain and see if any
-                    int inode_only)
+ * of the directories in it require a full commit before they can
+ * be logged.  Returns zero if nothing special needs to be done or 1 if
+ * a full commit is required.
+ */
+static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
+                                               struct inode *inode,
+                                               struct dentry *parent,
+                                               struct super_block *sb,
+                                               u64 last_committed)
 {
-        int ret;
+        int ret = 0;
+        struct btrfs_root *root;
-        start_log_trans(trans, root);
+        /*
-        ret = __btrfs_log_inode(trans, root, inode, inode_only);
+         * for regular files, if its inode is already on disk, we don't
-        end_log_trans(root);
+         * have to worry about the parents at all.  This is because
+         * we can use the last_unlink_trans field to record renames
+         * and other fun in this file.
+         */
+        if (S_ISREG(inode->i_mode) &&
+            BTRFS_I(inode)->generation <= last_committed &&
+            BTRFS_I(inode)->last_unlink_trans <= last_committed)
+                        goto out;
+        if (!S_ISDIR(inode->i_mode)) {
+                if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
+                        goto out;
+                inode = parent->d_inode;
+        }
+        while (1) {
+                BTRFS_I(inode)->logged_trans = trans->transid;
+                smp_mb();
+                if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
+                        root = BTRFS_I(inode)->root;
+                        /*
+                         * make sure any commits to the log are forced
+                         * to be full commits
+                         */
+                        root->fs_info->last_trans_log_full_commit =
+                                trans->transid;
+                        ret = 1;
+                        break;
+                }
+                if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
+                        break;
+                if (parent == sb->s_root)
+                        break;
+                parent = parent->d_parent;
+                inode = parent->d_inode;
+        }
+out:
        return ret;
 }
@@ -2724,31 +2889,65 @@ int btrfs_log_inode(struct btrfs_trans_handle *trans,
 * only logging is done of any parent directories that are older than
 * the last committed transaction
 */
-int btrfs_log_dentry(struct btrfs_trans_handle *trans,
+int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
-                    struct btrfs_root *root, struct dentry *dentry)
+                    struct btrfs_root *root, struct inode *inode,
+                    struct dentry *parent, int exists_only)
 {
-        int inode_only = LOG_INODE_ALL;
+        int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
        struct super_block *sb;
-        int ret;
+        int ret = 0;
+        u64 last_committed = root->fs_info->last_trans_committed;
+        sb = inode->i_sb;
+        if (root->fs_info->last_trans_log_full_commit >
+            root->fs_info->last_trans_committed) {
+                ret = 1;
+                goto end_no_trans;
+        }
+        ret = check_parent_dirs_for_sync(trans, inode, parent,
+                                         sb, last_committed);
+        if (ret)
+                goto end_no_trans;
        start_log_trans(trans, root);
-        sb = dentry->d_inode->i_sb;
-        while (1) {
-                ret = __btrfs_log_inode(trans, root, dentry->d_inode,
-                                        inode_only);
-                BUG_ON(ret);
-                inode_only = LOG_INODE_EXISTS;
-                dentry = dentry->d_parent;
+        ret = btrfs_log_inode(trans, root, inode, inode_only);
-                if (!dentry || !dentry->d_inode || sb != dentry->d_inode->i_sb)
+        BUG_ON(ret);
+        /*
+         * for regular files, if its inode is already on disk, we don't
+         * have to worry about the parents at all.  This is because
+         * we can use the last_unlink_trans field to record renames
+         * and other fun in this file.
+         */
+        if (S_ISREG(inode->i_mode) &&
+            BTRFS_I(inode)->generation <= last_committed &&
+            BTRFS_I(inode)->last_unlink_trans <= last_committed)
+                        goto no_parent;
+        inode_only = LOG_INODE_EXISTS;
+        while (1) {
+                if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
                        break;
-                if (BTRFS_I(dentry->d_inode)->generation <=
+                inode = parent->d_inode;
-                    root->fs_info->last_trans_committed)
+                if (BTRFS_I(inode)->generation >
+                    root->fs_info->last_trans_committed) {
+                        ret = btrfs_log_inode(trans, root, inode, inode_only);
+                        BUG_ON(ret);
+                }
+                if (parent == sb->s_root)
                        break;
+                parent = parent->d_parent;
        }
-        end_log_trans(root);
+no_parent:
-        return 0;
+        ret = 0;
+        btrfs_end_log_trans(root);
+end_no_trans:
+        return ret;
 }
 /*
@@ -2760,12 +2959,8 @@ int btrfs_log_dentry(struct btrfs_trans_handle *trans,
 int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root, struct dentry *dentry)
 {
-        u64 gen;
+        return btrfs_log_inode_parent(trans, root, dentry->d_inode,
-        gen = root->fs_info->last_trans_new_blockgroup;
+                                      dentry->d_parent, 0);
-        if (gen > root->fs_info->last_trans_committed)
-                return 1;
-        else
-                return btrfs_log_dentry(trans, root, dentry);
 }
 /*
@@ -2884,3 +3079,94 @@ again:
        kfree(log_root_tree);
        return 0;
 }
+/*
+ * there are some corner cases where we want to force a full
+ * commit instead of allowing a directory to be logged.
+ *
+ * They revolve around files there were unlinked from the directory, and
+ * this function updates the parent directory so that a full commit is
+ * properly done if it is fsync'd later after the unlinks are done.
+ */
+void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
+                             struct inode *dir, struct inode *inode,
+                             int for_rename)
+{
+        /*
+         * when we're logging a file, if it hasn't been renamed
+         * or unlinked, and its inode is fully committed on disk,
+         * we don't have to worry about walking up the directory chain
+         * to log its parents.
+         *
+         * So, we use the last_unlink_trans field to put this transid
+         * into the file.  When the file is logged we check it and
+         * don't log the parents if the file is fully on disk.
+         */
+        if (S_ISREG(inode->i_mode))
+                BTRFS_I(inode)->last_unlink_trans = trans->transid;
+        /*
+         * if this directory was already logged any new
+         * names for this file/dir will get recorded
+         */
+        smp_mb();
+        if (BTRFS_I(dir)->logged_trans == trans->transid)
+                return;
+        /*
+         * if the inode we're about to unlink was logged,
+         * the log will be properly updated for any new names
+         */
+        if (BTRFS_I(inode)->logged_trans == trans->transid)
+                return;
+        /*
+         * when renaming files across directories, if the directory
+         * there we're unlinking from gets fsync'd later on, there's
+         * no way to find the destination directory later and fsync it
+         * properly.  So, we have to be conservative and force commits
+         * so the new name gets discovered.
+         */
+        if (for_rename)
+                goto record;
+        /* we can safely do the unlink without any special recording */
+        return;
+record:
+        BTRFS_I(dir)->last_unlink_trans = trans->transid;
+}
+/*
+ * Call this after adding a new name for a file and it will properly
+ * update the log to reflect the new name.
+ *
+ * It will return zero if all goes well, and it will return 1 if a
+ * full transaction commit is required.
+ */
+int btrfs_log_new_name(struct btrfs_trans_handle *trans,
+                        struct inode *inode, struct inode *old_dir,
+                        struct dentry *parent)
+{
+        struct btrfs_root * root = BTRFS_I(inode)->root;
+        /*
+         * this will force the logging code to walk the dentry chain
+         * up for the file
+         */
+        if (S_ISREG(inode->i_mode))
+                BTRFS_I(inode)->last_unlink_trans = trans->transid;
+        /*
+         * if this inode hasn't been logged and directory we're renaming it
+         * from hasn't been logged, we don't need to log it
+         */
+        if (BTRFS_I(inode)->logged_trans <=
+            root->fs_info->last_trans_committed &&
+            (!old_dir || BTRFS_I(old_dir)->logged_trans <=
+                    root->fs_info->last_trans_committed))
+                return 0;
+        return btrfs_log_inode_parent(trans, root, inode, parent, 1);
+}
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index b9409b32ed02..d09c7609e16b 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -22,14 +22,9 @@
 int btrfs_sync_log(struct btrfs_trans_handle *trans,
                   struct btrfs_root *root);
 int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);
-int btrfs_log_dentry(struct btrfs_trans_handle *trans,
-                    struct btrfs_root *root, struct dentry *dentry);
 int btrfs_recover_log_trees(struct btrfs_root *tree_root);
 int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root, struct dentry *dentry);
-int btrfs_log_inode(struct btrfs_trans_handle *trans,
-                    struct btrfs_root *root, struct inode *inode,
-                    int inode_only);
 int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
                                 const char *name, int name_len,
@@ -38,4 +33,16 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root,
                               const char *name, int name_len,
                               struct inode *inode, u64 dirid);
+int btrfs_join_running_log_trans(struct btrfs_root *root);
+int btrfs_end_log_trans(struct btrfs_root *root);
+int btrfs_pin_log_trans(struct btrfs_root *root);
+int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
+                    struct btrfs_root *root, struct inode *inode,
+                    struct dentry *parent, int exists_only);
+void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
+                             struct inode *dir, struct inode *inode,
+                             int for_rename);
+int btrfs_log_new_name(struct btrfs_trans_handle *trans,
+                        struct inode *inode, struct inode *old_dir,
+                        struct dentry *parent);
 #endif
diff --git a/fs/buffer.c b/fs/buffer.c
index a2fd743d97cb..f5f8b15a6e40 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -290,7 +290,7 @@ static void free_more_memory(void)
                                                &zone);
                if (zone)
                        try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
-                                                GFP_NOFS);
+                                                GFP_NOFS, NULL);
        }
 }
@@ -547,6 +547,39 @@ repeat:
        return err;
 }
+void do_thaw_all(unsigned long unused)
+{
+        struct super_block *sb;
+        char b[BDEVNAME_SIZE];
+        spin_lock(&sb_lock);
+restart:
+        list_for_each_entry(sb, &super_blocks, s_list) {
+                sb->s_count++;
+                spin_unlock(&sb_lock);
+                down_read(&sb->s_umount);
+                while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
+                        printk(KERN_WARNING "Emergency Thaw on %s\n",
+                               bdevname(sb->s_bdev, b));
+                up_read(&sb->s_umount);
+                spin_lock(&sb_lock);
+                if (__put_super_and_need_restart(sb))
+                        goto restart;
+        }
+        spin_unlock(&sb_lock);
+        printk(KERN_WARNING "Emergency Thaw complete\n");
+}
+/**
+ * emergency_thaw_all -- forcibly thaw every frozen filesystem
+ *
+ * Used for emergency unfreeze of all filesystems via SysRq
+ */
+void emergency_thaw_all(void)
+{
+        pdflush_operation(do_thaw_all, 0);
+}
 /**
 * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
 * @mapping: the mapping which wants those buffers written
@@ -621,14 +654,7 @@ static void __set_page_dirty(struct page *page,
        spin_lock_irq(&mapping->tree_lock);
        if (page->mapping) {    /* Race with truncate? */
                WARN_ON_ONCE(warn && !PageUptodate(page));
+                account_page_dirtied(page, mapping);
-                if (mapping_cap_account_dirty(mapping)) {
-                        __inc_zone_page_state(page, NR_FILE_DIRTY);
-                        __inc_bdi_stat(mapping->backing_dev_info,
-                                        BDI_RECLAIMABLE);
-                        task_dirty_inc(current);
-                        task_io_account_write(PAGE_CACHE_SIZE);
-                }
                radix_tree_tag_set(&mapping->page_tree,
                                page_index(page), PAGECACHE_TAG_DIRTY);
        }
@@ -2320,13 +2346,14 @@ int block_commit_write(struct page *page, unsigned from, unsigned to)
 * unlock the page.
 */
 int
-block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
+block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
                   get_block_t get_block)
 {
+        struct page *page = vmf->page;
        struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
        unsigned long end;
        loff_t size;
-        int ret = -EINVAL;
+        int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
        lock_page(page);
        size = i_size_read(inode);
@@ -2346,6 +2373,13 @@ block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
        if (!ret)
                ret = block_commit_write(page, 0, end);
+        if (unlikely(ret)) {
+                if (ret == -ENOMEM)
+                        ret = VM_FAULT_OOM;
+                else /* -ENOSPC, -EIO, etc */
+                        ret = VM_FAULT_SIGBUS;
+        }
 out_unlock:
        unlock_page(page);
        return ret;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 877e4d9a1159..7f19fefd3d45 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -404,7 +404,6 @@ cifs_proc_init(void)
        if (proc_fs_cifs == NULL)
                return;
-        proc_fs_cifs->owner = THIS_MODULE;
        proc_create("DebugData", 0, proc_fs_cifs, &cifs_debug_data_proc_fops);
 #ifdef CONFIG_CIFS_STATS
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index e4a6223c3145..af737bb56cb7 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -740,8 +740,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
 out_release_free_unlock:
        crypto_free_hash(s->hash_desc.tfm);
 out_free_unlock:
-        memset(s->block_aligned_filename, 0, s->block_aligned_filename_size);
+        kzfree(s->block_aligned_filename);
-        kfree(s->block_aligned_filename);
 out_unlock:
        mutex_unlock(s->tfm_mutex);
 out:
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 96ef51489e01..295e7fa56755 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -291,8 +291,7 @@ int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon)
        if (daemon->user_ns)
                put_user_ns(daemon->user_ns);
        mutex_unlock(&daemon->mux);
-        memset(daemon, 0, sizeof(*daemon));
+        kzfree(daemon);
-        kfree(daemon);
 out:
        return rc;
 }
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 5de2c2db3aa2..2a701d593d35 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -28,6 +28,7 @@ struct eventfd_ctx {
         * issue a wakeup.
         */
        __u64 count;
+        unsigned int flags;
 };
 /*
@@ -50,7 +51,7 @@ int eventfd_signal(struct file *file, int n)
                n = (int) (ULLONG_MAX - ctx->count);
        ctx->count += n;
        if (waitqueue_active(&ctx->wqh))
-                wake_up_locked(&ctx->wqh);
+                wake_up_locked_poll(&ctx->wqh, POLLIN);
        spin_unlock_irqrestore(&ctx->wqh.lock, flags);
        return n;
@@ -87,22 +88,20 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
 {
        struct eventfd_ctx *ctx = file->private_data;
        ssize_t res;
-        __u64 ucnt;
+        __u64 ucnt = 0;
        DECLARE_WAITQUEUE(wait, current);
        if (count < sizeof(ucnt))
                return -EINVAL;
        spin_lock_irq(&ctx->wqh.lock);
        res = -EAGAIN;
-        ucnt = ctx->count;
+        if (ctx->count > 0)
-        if (ucnt > 0)
                res = sizeof(ucnt);
        else if (!(file->f_flags & O_NONBLOCK)) {
                __add_wait_queue(&ctx->wqh, &wait);
                for (res = 0;;) {
                        set_current_state(TASK_INTERRUPTIBLE);
                        if (ctx->count > 0) {
-                                ucnt = ctx->count;
                                res = sizeof(ucnt);
                                break;
                        }
@@ -117,10 +116,11 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
                __remove_wait_queue(&ctx->wqh, &wait);
                __set_current_state(TASK_RUNNING);
        }
-        if (res > 0) {
+        if (likely(res > 0)) {
-                ctx->count = 0;
+                ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
+                ctx->count -= ucnt;
                if (waitqueue_active(&ctx->wqh))
-                        wake_up_locked(&ctx->wqh);
+                        wake_up_locked_poll(&ctx->wqh, POLLOUT);
        }
        spin_unlock_irq(&ctx->wqh.lock);
        if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
@@ -166,10 +166,10 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c
                __remove_wait_queue(&ctx->wqh, &wait);
                __set_current_state(TASK_RUNNING);
        }
-        if (res > 0) {
+        if (likely(res > 0)) {
                ctx->count += ucnt;
                if (waitqueue_active(&ctx->wqh))
-                        wake_up_locked(&ctx->wqh);
+                        wake_up_locked_poll(&ctx->wqh, POLLIN);
        }
        spin_unlock_irq(&ctx->wqh.lock);
@@ -207,7 +207,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
        BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
        BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
-        if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK))
+        if (flags & ~EFD_FLAGS_SET)
                return -EINVAL;
        ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
@@ -216,13 +216,14 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
        init_waitqueue_head(&ctx->wqh);
        ctx->count = count;
+        ctx->flags = flags;
        /*
         * When we call this, the initialization must be complete, since
         * anon_inode_getfd() will install the fd.
         */
        fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
-                              flags & (O_CLOEXEC | O_NONBLOCK));
+                              flags & EFD_SHARED_FCNTL_FLAGS);
        if (fd < 0)
                kfree(ctx);
        return fd;
@@ -232,3 +233,4 @@ SYSCALL_DEFINE1(eventfd, unsigned int, count)
 {
        return sys_eventfd2(count, 0);
 }
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index c5c424f23fd5..a89f370fadb5 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1,6 +1,6 @@
 /*
- *  fs/eventpoll.c (Efficent event polling implementation)
+ *  fs/eventpoll.c (Efficient event retrieval implementation)
- *  Copyright (C) 2001,...,2007  Davide Libenzi
+ *  Copyright (C) 2001,...,2009  Davide Libenzi
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
@@ -71,29 +71,11 @@
 * a better scalability.
 */
-#define DEBUG_EPOLL 0
-#if DEBUG_EPOLL > 0
-#define DPRINTK(x) printk x
-#define DNPRINTK(n, x) do { if ((n) <= DEBUG_EPOLL) printk x; } while (0)
-#else /* #if DEBUG_EPOLL > 0 */
-#define DPRINTK(x) (void) 0
-#define DNPRINTK(n, x) (void) 0
-#endif /* #if DEBUG_EPOLL > 0 */
-#define DEBUG_EPI 0
-#if DEBUG_EPI != 0
-#define EPI_SLAB_DEBUG (SLAB_DEBUG_FREE | SLAB_RED_ZONE /* | SLAB_POISON */)
-#else /* #if DEBUG_EPI != 0 */
-#define EPI_SLAB_DEBUG 0
-#endif /* #if DEBUG_EPI != 0 */
 /* Epoll private bits inside the event mask */
 #define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET)
-/* Maximum number of poll wake up nests we are allowing */
+/* Maximum number of nesting allowed inside epoll sets */
-#define EP_MAX_POLLWAKE_NESTS 4
+#define EP_MAX_NESTS 4
 /* Maximum msec timeout value storeable in a long int */
 #define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
@@ -110,24 +92,21 @@ struct epoll_filefd {
 };
 /*
- * Node that is linked into the "wake_task_list" member of the "struct poll_safewake".
+ * Structure used to track possible nested calls, for too deep recursions
- * It is used to keep track on all tasks that are currently inside the wake_up() code
+ * and loop cycles.
- * to 1) short-circuit the one coming from the same task and same wait queue head
- * (loop) 2) allow a maximum number of epoll descriptors inclusion nesting
- * 3) let go the ones coming from other tasks.
 */
-struct wake_task_node {
+struct nested_call_node {
        struct list_head llink;
-        struct task_struct *task;
+        void *cookie;
-        wait_queue_head_t *wq;
+        int cpu;
 };
 /*
- * This is used to implement the safe poll wake up avoiding to reenter
+ * This structure is used as collector for nested calls, to check for
- * the poll callback from inside wake_up().
+ * maximum recursion dept and loop cycles.
 */
-struct poll_safewake {
+struct nested_calls {
-        struct list_head wake_task_list;
+        struct list_head tasks_call_list;
        spinlock_t lock;
 };
@@ -213,7 +192,7 @@ struct eppoll_entry {
        struct list_head llink;
        /* The "base" pointer is set to the container "struct epitem" */
-        void *base;
+        struct epitem *base;
        /*
         * Wait queue item that will be linked to the target file wait
@@ -231,6 +210,12 @@ struct ep_pqueue {
        struct epitem *epi;
 };
+/* Used by the ep_send_events() function as callback private data */
+struct ep_send_events_data {
+        int maxevents;
+        struct epoll_event __user *events;
+};
 /*
 * Configuration options available inside /proc/sys/fs/epoll/
 */
@@ -242,8 +227,11 @@ static int max_user_watches __read_mostly;
 */
 static DEFINE_MUTEX(epmutex);
-/* Safe wake up implementation */
+/* Used for safe wake up implementation */
-static struct poll_safewake psw;
+static struct nested_calls poll_safewake_ncalls;
+/* Used to call file's f_op->poll() under the nested calls boundaries */
+static struct nested_calls poll_readywalk_ncalls;
 /* Slab cache used to allocate "struct epitem" */
 static struct kmem_cache *epi_cache __read_mostly;
@@ -312,89 +300,230 @@ static inline int ep_op_has_event(int op)
 }
 /* Initialize the poll safe wake up structure */
-static void ep_poll_safewake_init(struct poll_safewake *psw)
+static void ep_nested_calls_init(struct nested_calls *ncalls)
 {
+        INIT_LIST_HEAD(&ncalls->tasks_call_list);
-        INIT_LIST_HEAD(&psw->wake_task_list);
+        spin_lock_init(&ncalls->lock);
-        spin_lock_init(&psw->lock);
 }
-/*
+/**
- * Perform a safe wake up of the poll wait list. The problem is that
+ * ep_call_nested - Perform a bound (possibly) nested call, by checking
- * with the new callback'd wake up system, it is possible that the
+ *                  that the recursion limit is not exceeded, and that
- * poll callback is reentered from inside the call to wake_up() done
+ *                  the same nested call (by the meaning of same cookie) is
- * on the poll wait queue head. The rule is that we cannot reenter the
+ *                  no re-entered.
- * wake up code from the same task more than EP_MAX_POLLWAKE_NESTS times,
+ *
- * and we cannot reenter the same wait queue head at all. This will
+ * @ncalls: Pointer to the nested_calls structure to be used for this call.
- * enable to have a hierarchy of epoll file descriptor of no more than
+ * @max_nests: Maximum number of allowed nesting calls.
- * EP_MAX_POLLWAKE_NESTS deep. We need the irq version of the spin lock
+ * @nproc: Nested call core function pointer.
- * because this one gets called by the poll callback, that in turn is called
+ * @priv: Opaque data to be passed to the @nproc callback.
- * from inside a wake_up(), that might be called from irq context.
+ * @cookie: Cookie to be used to identify this nested call.
+ *
+ * Returns: Returns the code returned by the @nproc callback, or -1 if
+ *          the maximum recursion limit has been exceeded.
 */
-static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq)
+static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
+                          int (*nproc)(void *, void *, int), void *priv,
+                          void *cookie)
 {
-        int wake_nests = 0;
+        int error, call_nests = 0;
        unsigned long flags;
-        struct task_struct *this_task = current;
+        int this_cpu = get_cpu();
-        struct list_head *lsthead = &psw->wake_task_list;
+        struct list_head *lsthead = &ncalls->tasks_call_list;
-        struct wake_task_node *tncur;
+        struct nested_call_node *tncur;
-        struct wake_task_node tnode;
+        struct nested_call_node tnode;
-        spin_lock_irqsave(&psw->lock, flags);
+        spin_lock_irqsave(&ncalls->lock, flags);
-        /* Try to see if the current task is already inside this wakeup call */
+        /*
+         * Try to see if the current task is already inside this wakeup call.
+         * We use a list here, since the population inside this set is always
+         * very much limited.
+         */
        list_for_each_entry(tncur, lsthead, llink) {
+                if (tncur->cpu == this_cpu &&
-                if (tncur->wq == wq ||
+                    (tncur->cookie == cookie || ++call_nests > max_nests)) {
-                    (tncur->task == this_task && ++wake_nests > EP_MAX_POLLWAKE_NESTS)) {
                        /*
                         * Ops ... loop detected or maximum nest level reached.
                         * We abort this wake by breaking the cycle itself.
                         */
-                        spin_unlock_irqrestore(&psw->lock, flags);
+                        error = -1;
-                        return;
+                        goto out_unlock;
                }
        }
-        /* Add the current task to the list */
+        /* Add the current task and cookie to the list */
-        tnode.task = this_task;
+        tnode.cpu = this_cpu;
-        tnode.wq = wq;
+        tnode.cookie = cookie;
        list_add(&tnode.llink, lsthead);
-        spin_unlock_irqrestore(&psw->lock, flags);
+        spin_unlock_irqrestore(&ncalls->lock, flags);
-        /* Do really wake up now */
+        /* Call the nested function */
-        wake_up_nested(wq, 1 + wake_nests);
+        error = (*nproc)(priv, cookie, call_nests);
        /* Remove the current task from the list */
-        spin_lock_irqsave(&psw->lock, flags);
+        spin_lock_irqsave(&ncalls->lock, flags);
        list_del(&tnode.llink);
-        spin_unlock_irqrestore(&psw->lock, flags);
+ out_unlock:
+        spin_unlock_irqrestore(&ncalls->lock, flags);
+        put_cpu();
+        return error;
+}
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static inline void ep_wake_up_nested(wait_queue_head_t *wqueue,
+                                     unsigned long events, int subclass)
+{
+        unsigned long flags;
+        spin_lock_irqsave_nested(&wqueue->lock, flags, subclass);
+        wake_up_locked_poll(wqueue, events);
+        spin_unlock_irqrestore(&wqueue->lock, flags);
+}
+#else
+static inline void ep_wake_up_nested(wait_queue_head_t *wqueue,
+                                     unsigned long events, int subclass)
+{
+        wake_up_poll(wqueue, events);
+}
+#endif
+static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests)
+{
+        ep_wake_up_nested((wait_queue_head_t *) cookie, POLLIN,
+                          1 + call_nests);
+        return 0;
+}
+/*
+ * Perform a safe wake up of the poll wait list. The problem is that
+ * with the new callback'd wake up system, it is possible that the
+ * poll callback is reentered from inside the call to wake_up() done
+ * on the poll wait queue head. The rule is that we cannot reenter the
+ * wake up code from the same task more than EP_MAX_NESTS times,
+ * and we cannot reenter the same wait queue head at all. This will
+ * enable to have a hierarchy of epoll file descriptor of no more than
+ * EP_MAX_NESTS deep.
+ */
+static void ep_poll_safewake(wait_queue_head_t *wq)
+{
+        ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
+                       ep_poll_wakeup_proc, NULL, wq);
 }
 /*
- * This function unregister poll callbacks from the associated file descriptor.
+ * This function unregisters poll callbacks from the associated file
- * Since this must be called without holding "ep->lock" the atomic exchange trick
+ * descriptor.  Must be called with "mtx" held (or "epmutex" if called from
- * will protect us from multiple unregister.
+ * ep_free).
 */
 static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
 {
-        int nwait;
        struct list_head *lsthead = &epi->pwqlist;
        struct eppoll_entry *pwq;
-        /* This is called without locks, so we need the atomic exchange */
+        while (!list_empty(lsthead)) {
-        nwait = xchg(&epi->nwait, 0);
+                pwq = list_first_entry(lsthead, struct eppoll_entry, llink);
-        if (nwait) {
+                list_del(&pwq->llink);
-                while (!list_empty(lsthead)) {
+                remove_wait_queue(pwq->whead, &pwq->wait);
-                        pwq = list_first_entry(lsthead, struct eppoll_entry, llink);
+                kmem_cache_free(pwq_cache, pwq);
+        }
+}
-                        list_del_init(&pwq->llink);
+/**
-                        remove_wait_queue(pwq->whead, &pwq->wait);
+ * ep_scan_ready_list - Scans the ready list in a way that makes possible for
-                        kmem_cache_free(pwq_cache, pwq);
+ *                      the scan code, to call f_op->poll(). Also allows for
-                }
+ *                      O(NumReady) performance.
+ *
+ * @ep: Pointer to the epoll private data structure.
+ * @sproc: Pointer to the scan callback.
+ * @priv: Private opaque data passed to the @sproc callback.
+ *
+ * Returns: The same integer error code returned by the @sproc callback.
+ */
+static int ep_scan_ready_list(struct eventpoll *ep,
+                              int (*sproc)(struct eventpoll *,
+                                           struct list_head *, void *),
+                              void *priv)
+{
+        int error, pwake = 0;
+        unsigned long flags;
+        struct epitem *epi, *nepi;
+        LIST_HEAD(txlist);
+        /*
+         * We need to lock this because we could be hit by
+         * eventpoll_release_file() and epoll_ctl().
+         */
+        mutex_lock(&ep->mtx);
+        /*
+         * Steal the ready list, and re-init the original one to the
+         * empty list. Also, set ep->ovflist to NULL so that events
+         * happening while looping w/out locks, are not lost. We cannot
+         * have the poll callback to queue directly on ep->rdllist,
+         * because we want the "sproc" callback to be able to do it
+         * in a lockless way.
+         */
+        spin_lock_irqsave(&ep->lock, flags);
+        list_splice_init(&ep->rdllist, &txlist);
+        ep->ovflist = NULL;
+        spin_unlock_irqrestore(&ep->lock, flags);
+        /*
+         * Now call the callback function.
+         */
+        error = (*sproc)(ep, &txlist, priv);
+        spin_lock_irqsave(&ep->lock, flags);
+        /*
+         * During the time we spent inside the "sproc" callback, some
+         * other events might have been queued by the poll callback.
+         * We re-insert them inside the main ready-list here.
+         */
+        for (nepi = ep->ovflist; (epi = nepi) != NULL;
+             nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
+                /*
+                 * We need to check if the item is already in the list.
+                 * During the "sproc" callback execution time, items are
+                 * queued into ->ovflist but the "txlist" might already
+                 * contain them, and the list_splice() below takes care of them.
+                 */
+                if (!ep_is_linked(&epi->rdllink))
+                        list_add_tail(&epi->rdllink, &ep->rdllist);
+        }
+        /*
+         * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
+         * releasing the lock, events will be queued in the normal way inside
+         * ep->rdllist.
+         */
+        ep->ovflist = EP_UNACTIVE_PTR;
+        /*
+         * Quickly re-inject items left on "txlist".
+         */
+        list_splice(&txlist, &ep->rdllist);
+        if (!list_empty(&ep->rdllist)) {
+                /*
+                 * Wake up (if active) both the eventpoll wait list and
+                 * the ->poll() wait list (delayed after we release the lock).
+                 */
+                if (waitqueue_active(&ep->wq))
+                        wake_up_locked(&ep->wq);
+                if (waitqueue_active(&ep->poll_wait))
+                        pwake++;
        }
+        spin_unlock_irqrestore(&ep->lock, flags);
+        mutex_unlock(&ep->mtx);
+        /* We have to call this outside the lock */
+        if (pwake)
+                ep_poll_safewake(&ep->poll_wait);
+        return error;
 }
 /*
@@ -434,9 +563,6 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
        atomic_dec(&ep->user->epoll_watches);
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n",
-                     current, ep, file));
        return 0;
 }
@@ -447,7 +573,7 @@ static void ep_free(struct eventpoll *ep)
        /* We need to release all tasks waiting for these file */
        if (waitqueue_active(&ep->poll_wait))
-                ep_poll_safewake(&psw, &ep->poll_wait);
+                ep_poll_safewake(&ep->poll_wait);
        /*
         * We need to lock this because we could be hit by
@@ -492,26 +618,54 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file)
        if (ep)
                ep_free(ep);
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep));
        return 0;
 }
+static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
+                               void *priv)
+{
+        struct epitem *epi, *tmp;
+        list_for_each_entry_safe(epi, tmp, head, rdllink) {
+                if (epi->ffd.file->f_op->poll(epi->ffd.file, NULL) &
+                    epi->event.events)
+                        return POLLIN | POLLRDNORM;
+                else {
+                        /*
+                         * Item has been dropped into the ready list by the poll
+                         * callback, but it's not actually ready, as far as
+                         * caller requested events goes. We can remove it here.
+                         */
+                        list_del_init(&epi->rdllink);
+                }
+        }
+        return 0;
+}
+static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
+{
+        return ep_scan_ready_list(priv, ep_read_events_proc, NULL);
+}
 static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
 {
-        unsigned int pollflags = 0;
+        int pollflags;
-        unsigned long flags;
        struct eventpoll *ep = file->private_data;
        /* Insert inside our poll wait queue */
        poll_wait(file, &ep->poll_wait, wait);
-        /* Check our condition */
+        /*
-        spin_lock_irqsave(&ep->lock, flags);
+         * Proceed to find out if wanted events are really available inside
-        if (!list_empty(&ep->rdllist))
+         * the ready list. This need to be done under ep_call_nested()
-                pollflags = POLLIN | POLLRDNORM;
+         * supervision, since the call to f_op->poll() done on listed files
-        spin_unlock_irqrestore(&ep->lock, flags);
+         * could re-enter here.
+         */
+        pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS,
+                                   ep_poll_readyevents_proc, ep, ep);
-        return pollflags;
+        return pollflags != -1 ? pollflags : 0;
 }
 /* File callbacks that implement the eventpoll file behaviour */
@@ -541,7 +695,7 @@ void eventpoll_release_file(struct file *file)
         * We don't want to get "file->f_lock" because it is not
         * necessary. It is not necessary because we're in the "struct file"
         * cleanup path, and this means that noone is using this file anymore.
-         * So, for example, epoll_ctl() cannot hit here sicne if we reach this
+         * So, for example, epoll_ctl() cannot hit here since if we reach this
         * point, the file counter already went to zero and fget() would fail.
         * The only hit might come from ep_free() but by holding the mutex
         * will correctly serialize the operation. We do need to acquire
@@ -588,8 +742,6 @@ static int ep_alloc(struct eventpoll **pep)
        *pep = ep;
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n",
-                     current, ep));
        return 0;
 free_uid:
@@ -623,9 +775,6 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
                }
        }
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_find(%p) -> %p\n",
-                     current, file, epir));
        return epir;
 }
@@ -641,9 +790,6 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
        struct epitem *epi = ep_item_from_wait(wait);
        struct eventpoll *ep = epi->ep;
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
-                     current, epi->ffd.file, epi, ep));
        spin_lock_irqsave(&ep->lock, flags);
        /*
@@ -656,6 +802,15 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
                goto out_unlock;
        /*
+         * Check the events coming with the callback. At this stage, not
+         * every device reports the events in the "key" parameter of the
+         * callback. We need to be able to handle both cases here, hence the
+         * test for "key" != NULL before the event match test.
+         */
+        if (key && !((unsigned long) key & epi->event.events))
+                goto out_unlock;
+        /*
         * If we are trasfering events to userspace, we can hold no locks
         * (because we're accessing user memory, and because of linux f_op->poll()
         * semantics). All the events that happens during that period of time are
@@ -670,12 +825,9 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
        }
        /* If this file is already in the ready list we exit soon */
-        if (ep_is_linked(&epi->rdllink))
+        if (!ep_is_linked(&epi->rdllink))
-                goto is_linked;
+                list_add_tail(&epi->rdllink, &ep->rdllist);
-        list_add_tail(&epi->rdllink, &ep->rdllist);
-is_linked:
        /*
         * Wake up ( if active ) both the eventpoll wait list and the ->poll()
         * wait list.
@@ -690,7 +842,7 @@ out_unlock:
        /* We have to call this outside the lock */
        if (pwake)
-                ep_poll_safewake(&psw, &ep->poll_wait);
+                ep_poll_safewake(&ep->poll_wait);
        return 1;
 }
@@ -817,10 +969,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
        /* We have to call this outside the lock */
        if (pwake)
-                ep_poll_safewake(&psw, &ep->poll_wait);
+                ep_poll_safewake(&ep->poll_wait);
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_insert(%p, %p, %d)\n",
-                     current, ep, tfile, fd));
        return 0;
@@ -851,15 +1000,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
 {
        int pwake = 0;
        unsigned int revents;
-        unsigned long flags;
        /*
-         * Set the new event interest mask before calling f_op->poll(), otherwise
+         * Set the new event interest mask before calling f_op->poll();
-         * a potential race might occur. In fact if we do this operation inside
+         * otherwise we might miss an event that happens between the
-         * the lock, an event might happen between the f_op->poll() call and the
+         * f_op->poll() call and the new event set registering.
-         * new event set registering.
         */
        epi->event.events = event->events;
+        epi->event.data = event->data; /* protected by mtx */
        /*
         * Get current event bits. We can safely use the file* here because
@@ -867,16 +1015,12 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
         */
        revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL);
-        spin_lock_irqsave(&ep->lock, flags);
-        /* Copy the data member from inside the lock */
-        epi->event.data = event->data;
        /*
         * If the item is "hot" and it is not registered inside the ready
         * list, push it inside.
         */
        if (revents & event->events) {
+                spin_lock_irq(&ep->lock);
                if (!ep_is_linked(&epi->rdllink)) {
                        list_add_tail(&epi->rdllink, &ep->rdllist);
@@ -886,142 +1030,84 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
                        if (waitqueue_active(&ep->poll_wait))
                                pwake++;
                }
+                spin_unlock_irq(&ep->lock);
        }
-        spin_unlock_irqrestore(&ep->lock, flags);
        /* We have to call this outside the lock */
        if (pwake)
-                ep_poll_safewake(&psw, &ep->poll_wait);
+                ep_poll_safewake(&ep->poll_wait);
        return 0;
 }
-static int ep_send_events(struct eventpoll *ep, struct epoll_event __user *events,
+static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
-                          int maxevents)
+                               void *priv)
 {
-        int eventcnt, error = -EFAULT, pwake = 0;
+        struct ep_send_events_data *esed = priv;
+        int eventcnt;
        unsigned int revents;
-        unsigned long flags;
+        struct epitem *epi;
-        struct epitem *epi, *nepi;
+        struct epoll_event __user *uevent;
-        struct list_head txlist;
-        INIT_LIST_HEAD(&txlist);
-        /*
-         * We need to lock this because we could be hit by
-         * eventpoll_release_file() and epoll_ctl(EPOLL_CTL_DEL).
-         */
-        mutex_lock(&ep->mtx);
-        /*
-         * Steal the ready list, and re-init the original one to the
-         * empty list. Also, set ep->ovflist to NULL so that events
-         * happening while looping w/out locks, are not lost. We cannot
-         * have the poll callback to queue directly on ep->rdllist,
-         * because we are doing it in the loop below, in a lockless way.
-         */
-        spin_lock_irqsave(&ep->lock, flags);
-        list_splice(&ep->rdllist, &txlist);
-        INIT_LIST_HEAD(&ep->rdllist);
-        ep->ovflist = NULL;
-        spin_unlock_irqrestore(&ep->lock, flags);
        /*
-         * We can loop without lock because this is a task private list.
+         * We can loop without lock because we are passed a task private list.
-         * We just splice'd out the ep->rdllist in ep_collect_ready_items().
+         * Items cannot vanish during the loop because ep_scan_ready_list() is
-         * Items cannot vanish during the loop because we are holding "mtx".
+         * holding "mtx" during this call.
         */
-        for (eventcnt = 0; !list_empty(&txlist) && eventcnt < maxevents;) {
+        for (eventcnt = 0, uevent = esed->events;
-                epi = list_first_entry(&txlist, struct epitem, rdllink);
+             !list_empty(head) && eventcnt < esed->maxevents;) {
+                epi = list_first_entry(head, struct epitem, rdllink);
                list_del_init(&epi->rdllink);
-                /*
+                revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL) &
-                 * Get the ready file event set. We can safely use the file
+                        epi->event.events;
-                 * because we are holding the "mtx" and this will guarantee
-                 * that both the file and the item will not vanish.
-                 */
-                revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL);
-                revents &= epi->event.events;
                /*
-                 * Is the event mask intersect the caller-requested one,
+                 * If the event mask intersect the caller-requested one,
-                 * deliver the event to userspace. Again, we are holding
+                 * deliver the event to userspace. Again, ep_scan_ready_list()
-                 * "mtx", so no operations coming from userspace can change
+                 * is holding "mtx", so no operations coming from userspace
-                 * the item.
+                 * can change the item.
                 */
                if (revents) {
-                        if (__put_user(revents,
+                        if (__put_user(revents, &uevent->events) ||
-                                       &events[eventcnt].events) ||
+                            __put_user(epi->event.data, &uevent->data)) {
-                            __put_user(epi->event.data,
+                                list_add(&epi->rdllink, head);
-                                       &events[eventcnt].data))
+                                return eventcnt ? eventcnt : -EFAULT;
-                                goto errxit;
+                        }
+                        eventcnt++;
+                        uevent++;
                        if (epi->event.events & EPOLLONESHOT)
                                epi->event.events &= EP_PRIVATE_BITS;
-                        eventcnt++;
+                        else if (!(epi->event.events & EPOLLET)) {
+                                /*
+                                 * If this file has been added with Level
+                                 * Trigger mode, we need to insert back inside
+                                 * the ready list, so that the next call to
+                                 * epoll_wait() will check again the events
+                                 * availability. At this point, noone can insert
+                                 * into ep->rdllist besides us. The epoll_ctl()
+                                 * callers are locked out by
+                                 * ep_scan_ready_list() holding "mtx" and the
+                                 * poll callback will queue them in ep->ovflist.
+                                 */
+                                list_add_tail(&epi->rdllink, &ep->rdllist);
+                        }
                }
-                /*
-                 * At this point, noone can insert into ep->rdllist besides
-                 * us. The epoll_ctl() callers are locked out by us holding
-                 * "mtx" and the poll callback will queue them in ep->ovflist.
-                 */
-                if (!(epi->event.events & EPOLLET) &&
-                    (revents & epi->event.events))
-                        list_add_tail(&epi->rdllink, &ep->rdllist);
-        }
-        error = 0;
-errxit:
-        spin_lock_irqsave(&ep->lock, flags);
-        /*
-         * During the time we spent in the loop above, some other events
-         * might have been queued by the poll callback. We re-insert them
-         * inside the main ready-list here.
-         */
-        for (nepi = ep->ovflist; (epi = nepi) != NULL;
-             nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
-                /*
-                 * If the above loop quit with errors, the epoll item might still
-                 * be linked to "txlist", and the list_splice() done below will
-                 * take care of those cases.
-                 */
-                if (!ep_is_linked(&epi->rdllink))
-                        list_add_tail(&epi->rdllink, &ep->rdllist);
        }
-        /*
-         * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
-         * releasing the lock, events will be queued in the normal way inside
-         * ep->rdllist.
-         */
-        ep->ovflist = EP_UNACTIVE_PTR;
-        /*
+        return eventcnt;
-         * In case of error in the event-send loop, or in case the number of
+}
-         * ready events exceeds the userspace limit, we need to splice the
-         * "txlist" back inside ep->rdllist.
-         */
-        list_splice(&txlist, &ep->rdllist);
-        if (!list_empty(&ep->rdllist)) {
-                /*
-                 * Wake up (if active) both the eventpoll wait list and the ->poll()
-                 * wait list (delayed after we release the lock).
-                 */
-                if (waitqueue_active(&ep->wq))
-                        wake_up_locked(&ep->wq);
-                if (waitqueue_active(&ep->poll_wait))
-                        pwake++;
-        }
-        spin_unlock_irqrestore(&ep->lock, flags);
-        mutex_unlock(&ep->mtx);
+static int ep_send_events(struct eventpoll *ep,
+                          struct epoll_event __user *events, int maxevents)
+{
+        struct ep_send_events_data esed;
-        /* We have to call this outside the lock */
+        esed.maxevents = maxevents;
-        if (pwake)
+        esed.events = events;
-                ep_poll_safewake(&psw, &ep->poll_wait);
-        return eventcnt == 0 ? error: eventcnt;
+        return ep_scan_ready_list(ep, ep_send_events_proc, &esed);
 }
 static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
@@ -1033,7 +1119,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
        wait_queue_t wait;
        /*
-         * Calculate the timeout by checking for the "infinite" value ( -1 )
+         * Calculate the timeout by checking for the "infinite" value (-1)
         * and the overflow condition. The passed timeout is in milliseconds,
         * that why (t * HZ) / 1000.
         */
@@ -1076,9 +1162,8 @@ retry:
                set_current_state(TASK_RUNNING);
        }
        /* Is it worth to try to dig for events ? */
-        eavail = !list_empty(&ep->rdllist);
+        eavail = !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR;
        spin_unlock_irqrestore(&ep->lock, flags);
@@ -1099,41 +1184,30 @@ retry:
 */
 SYSCALL_DEFINE1(epoll_create1, int, flags)
 {
-        int error, fd = -1;
+        int error;
-        struct eventpoll *ep;
+        struct eventpoll *ep = NULL;
        /* Check the EPOLL_* constant for consistency.  */
        BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
        if (flags & ~EPOLL_CLOEXEC)
                return -EINVAL;
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
-                     current, flags));
        /*
-         * Create the internal data structure ( "struct eventpoll" ).
+         * Create the internal data structure ("struct eventpoll").
         */
        error = ep_alloc(&ep);
-        if (error < 0) {
+        if (error < 0)
-                fd = error;
+                return error;
-                goto error_return;
-        }
        /*
         * Creates all the items needed to setup an eventpoll file. That is,
         * a file structure and a free file descriptor.
         */
-        fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
+        error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
-                              flags & O_CLOEXEC);
+                                 flags & O_CLOEXEC);
-        if (fd < 0)
+        if (error < 0)
                ep_free(ep);
-error_return:
+        return error;
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
-                     current, flags, fd));
-        return fd;
 }
 SYSCALL_DEFINE1(epoll_create, int, size)
@@ -1158,9 +1232,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
        struct epitem *epi;
        struct epoll_event epds;
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p)\n",
-                     current, epfd, op, fd, event));
        error = -EFAULT;
        if (ep_op_has_event(op) &&
            copy_from_user(&epds, event, sizeof(struct epoll_event)))
@@ -1211,7 +1282,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
        case EPOLL_CTL_ADD:
                if (!epi) {
                        epds.events |= POLLERR | POLLHUP;
                        error = ep_insert(ep, &epds, tfile, fd);
                } else
                        error = -EEXIST;
@@ -1237,8 +1307,6 @@ error_tgt_fput:
 error_fput:
        fput(file);
 error_return:
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p) = %d\n",
-                     current, epfd, op, fd, event, error));
        return error;
 }
@@ -1254,9 +1322,6 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
        struct file *file;
        struct eventpoll *ep;
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d)\n",
-                     current, epfd, events, maxevents, timeout));
        /* The maximum number of event must be greater than zero */
        if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
                return -EINVAL;
@@ -1293,8 +1358,6 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
 error_fput:
        fput(file);
 error_return:
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d) = %d\n",
-                     current, epfd, events, maxevents, timeout, error));
        return error;
 }
@@ -1359,17 +1422,18 @@ static int __init eventpoll_init(void)
                EP_ITEM_COST;
        /* Initialize the structure used to perform safe poll wait head wake ups */
-        ep_poll_safewake_init(&psw);
+        ep_nested_calls_init(&poll_safewake_ncalls);
+        /* Initialize the structure used to perform file's f_op->poll() calls */
+        ep_nested_calls_init(&poll_readywalk_ncalls);
        /* Allocates slab cache used to allocate "struct epitem" items */
        epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem),
-                        0, SLAB_HWCACHE_ALIGN|EPI_SLAB_DEBUG|SLAB_PANIC,
+                        0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
-                        NULL);
        /* Allocates slab cache used to allocate "struct eppoll_entry" */
        pwq_cache = kmem_cache_create("eventpoll_pwq",
-                        sizeof(struct eppoll_entry), 0,
+                        sizeof(struct eppoll_entry), 0, SLAB_PANIC, NULL);
-                        EPI_SLAB_DEBUG|SLAB_PANIC, NULL);
        return 0;
 }
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 38f40d55899c..53c72ad85877 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -55,7 +55,8 @@ static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
 }
 static int ext4_group_used_meta_blocks(struct super_block *sb,
-                                ext4_group_t block_group)
+                                       ext4_group_t block_group,
+                                       struct ext4_group_desc *gdp)
 {
        ext4_fsblk_t tmp;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -63,10 +64,6 @@ static int ext4_group_used_meta_blocks(struct super_block *sb,
        int used_blocks = sbi->s_itb_per_group + 2;
        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
-                struct ext4_group_desc *gdp;
-                struct buffer_head *bh;
-                gdp = ext4_get_group_desc(sb, block_group, &bh);
                if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp),
                                        block_group))
                        used_blocks--;
@@ -177,7 +174,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
                 */
                mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
        }
-        return free_blocks - ext4_group_used_meta_blocks(sb, block_group);
+        return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp);
 }
@@ -473,9 +470,8 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
        if (sbi->s_log_groups_per_flex) {
                ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
-                spin_lock(sb_bgl_lock(sbi, flex_group));
+                atomic_add(blocks_freed,
-                sbi->s_flex_groups[flex_group].free_blocks += blocks_freed;
+                           &sbi->s_flex_groups[flex_group].free_blocks);
-                spin_unlock(sb_bgl_lock(sbi, flex_group));
        }
        /*
         * request to reload the buddy with the
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 2df2e40b01af..b64789929a65 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -67,7 +67,8 @@ int ext4_check_dir_entry(const char *function, struct inode *dir,
                         unsigned int offset)
 {
        const char *error_msg = NULL;
-        const int rlen = ext4_rec_len_from_disk(de->rec_len);
+        const int rlen = ext4_rec_len_from_disk(de->rec_len,
+                                                dir->i_sb->s_blocksize);
        if (rlen < EXT4_DIR_REC_LEN(1))
                error_msg = "rec_len is smaller than minimal";
@@ -178,10 +179,11 @@ revalidate:
                                 * least that it is non-zero.  A
                                 * failure will be detected in the
                                 * dirent test below. */
-                                if (ext4_rec_len_from_disk(de->rec_len)
+                                if (ext4_rec_len_from_disk(de->rec_len,
-                                                < EXT4_DIR_REC_LEN(1))
+                                        sb->s_blocksize) < EXT4_DIR_REC_LEN(1))
                                        break;
-                                i += ext4_rec_len_from_disk(de->rec_len);
+                                i += ext4_rec_len_from_disk(de->rec_len,
+                                                            sb->s_blocksize);
                        }
                        offset = i;
                        filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
@@ -203,7 +205,8 @@ revalidate:
                                ret = stored;
                                goto out;
                        }
-                        offset += ext4_rec_len_from_disk(de->rec_len);
+                        offset += ext4_rec_len_from_disk(de->rec_len,
+                                        sb->s_blocksize);
                        if (le32_to_cpu(de->inode)) {
                                /* We might block in the next section
                                 * if the data destination is
@@ -225,7 +228,8 @@ revalidate:
                                        goto revalidate;
                                stored++;
                        }
-                        filp->f_pos += ext4_rec_len_from_disk(de->rec_len);
+                        filp->f_pos += ext4_rec_len_from_disk(de->rec_len,
+                                                sb->s_blocksize);
                }
                offset = 0;
                brelse(bh);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6083bb38057b..d0f15ef56de1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -33,14 +33,6 @@
 #undef EXT4FS_DEBUG
 /*
- * Define EXT4_RESERVATION to reserve data blocks for expanding files
- */
-#define EXT4_DEFAULT_RESERVE_BLOCKS     8
-/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
-#define EXT4_MAX_RESERVE_BLOCKS         1027
-#define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0
-/*
 * Debug code
 */
 #ifdef EXT4FS_DEBUG
@@ -54,8 +46,6 @@
 #define ext4_debug(f, a...)     do {} while (0)
 #endif
-#define EXT4_MULTIBLOCK_ALLOCATOR       1
 /* prefer goal again. length */
 #define EXT4_MB_HINT_MERGE              1
 /* blocks already reserved */
@@ -180,8 +170,9 @@ struct ext4_group_desc
 */
 struct flex_groups {
-        __u32 free_inodes;
+        atomic_t free_inodes;
-        __u32 free_blocks;
+        atomic_t free_blocks;
+        atomic_t used_dirs;
 };
 #define EXT4_BG_INODE_UNINIT    0x0001 /* Inode table/bitmap not in use */
@@ -249,6 +240,30 @@ struct flex_groups {
 #define EXT4_FL_USER_VISIBLE            0x000BDFFF /* User visible flags */
 #define EXT4_FL_USER_MODIFIABLE         0x000B80FF /* User modifiable flags */
+/* Flags that should be inherited by new inodes from their parent. */
+#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
+                           EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\
+                           EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
+                           EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
+                           EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
+/* Flags that are appropriate for regular files (all but dir-specific ones). */
+#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
+/* Flags that are appropriate for non-directories/regular files. */
+#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
+/* Mask out flags that are inappropriate for the given type of inode. */
+static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
+{
+        if (S_ISDIR(mode))
+                return flags;
+        else if (S_ISREG(mode))
+                return flags & EXT4_REG_FLMASK;
+        else
+                return flags & EXT4_OTHER_FLMASK;
+}
 /*
 * Inode dynamic state flags
 */
@@ -256,6 +271,7 @@ struct flex_groups {
 #define EXT4_STATE_NEW                  0x00000002 /* inode is newly created */
 #define EXT4_STATE_XATTR                0x00000004 /* has in-inode xattrs */
 #define EXT4_STATE_NO_EXPAND            0x00000008 /* No space for expansion */
+#define EXT4_STATE_DA_ALLOC_CLOSE       0x00000010 /* Alloc DA blks on close */
 /* Used to pass group descriptor data when online resize is done */
 struct ext4_new_group_input {
@@ -303,7 +319,9 @@ struct ext4_new_group_data {
 #define EXT4_IOC_GROUP_EXTEND           _IOW('f', 7, unsigned long)
 #define EXT4_IOC_GROUP_ADD              _IOW('f', 8, struct ext4_new_group_input)
 #define EXT4_IOC_MIGRATE                _IO('f', 9)
+ /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
+#define EXT4_IOC_ALLOC_DA_BLKS          _IO('f', 12)
 /*
 * ioctl commands in 32 bit emulation
@@ -531,7 +549,7 @@ do {									       \
 #define EXT4_MOUNT_NO_UID32             0x02000  /* Disable 32-bit UIDs */
 #define EXT4_MOUNT_XATTR_USER           0x04000 /* Extended user attributes */
 #define EXT4_MOUNT_POSIX_ACL            0x08000 /* POSIX Access Control Lists */
-#define EXT4_MOUNT_RESERVATION          0x10000 /* Preallocation */
+#define EXT4_MOUNT_NO_AUTO_DA_ALLOC     0x10000 /* No auto delalloc mapping */
 #define EXT4_MOUNT_BARRIER              0x20000 /* Use block barriers */
 #define EXT4_MOUNT_NOBH                 0x40000 /* No bufferheads */
 #define EXT4_MOUNT_QUOTA                0x80000 /* Some quota option set */
@@ -666,7 +684,8 @@ struct ext4_super_block {
        __u8    s_log_groups_per_flex;  /* FLEX_BG group size */
        __u8    s_reserved_char_pad2;
        __le16  s_reserved_pad;
-        __u32   s_reserved[162];        /* Padding to the end of the block */
+        __le64  s_kbytes_written;       /* nr of lifetime kilobytes written */
+        __u32   s_reserved[160];        /* Padding to the end of the block */
 };
 #ifdef __KERNEL__
@@ -814,6 +833,12 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 #define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */
 /*
+ * Minimum number of groups in a flexgroup before we separate out
+ * directories into the first block group of a flexgroup
+ */
+#define EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME 4
+/*
 * Structure of a directory entry
 */
 #define EXT4_NAME_LEN 255
@@ -865,24 +890,6 @@ struct ext4_dir_entry_2 {
                                         ~EXT4_DIR_ROUND)
 #define EXT4_MAX_REC_LEN                ((1<<16)-1)
-static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
-{
-        unsigned len = le16_to_cpu(dlen);
-        if (len == EXT4_MAX_REC_LEN || len == 0)
-                return 1 << 16;
-        return len;
-}
-static inline __le16 ext4_rec_len_to_disk(unsigned len)
-{
-        if (len == (1 << 16))
-                return cpu_to_le16(EXT4_MAX_REC_LEN);
-        else if (len > (1 << 16))
-                BUG();
-        return cpu_to_le16(len);
-}
 /*
 * Hash Tree Directory indexing
 * (c) Daniel Phillips, 2001
@@ -970,22 +977,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
 extern struct proc_dir_entry *ext4_proc_root;
-#ifdef CONFIG_PROC_FS
-extern const struct file_operations ext4_ui_proc_fops;
-#define EXT4_PROC_HANDLER(name, var)                                    \
-do {                                                                    \
-        proc = proc_create_data(name, mode, sbi->s_proc,                \
-                                &ext4_ui_proc_fops, &sbi->s_##var);     \
-        if (proc == NULL) {                                             \
-                printk(KERN_ERR "EXT4-fs: can't create %s\n", name);    \
-                goto err_out;                                           \
-        }                                                               \
-} while (0)
-#else
-#define EXT4_PROC_HANDLER(name, var)
-#endif
 /*
 * Function prototypes
 */
@@ -1092,13 +1083,14 @@ extern int ext4_can_truncate(struct inode *inode);
 extern void ext4_truncate(struct inode *);
 extern void ext4_set_inode_flags(struct inode *);
 extern void ext4_get_inode_flags(struct ext4_inode_info *);
+extern int ext4_alloc_da_blocks(struct inode *inode);
 extern void ext4_set_aops(struct inode *inode);
 extern int ext4_writepage_trans_blocks(struct inode *);
 extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
 extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
 extern int ext4_block_truncate_page(handle_t *handle,
                struct address_space *mapping, loff_t from);
-extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
+extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 extern qsize_t ext4_get_reserved_space(struct inode *inode);
 /* ioctl.c */
@@ -1107,7 +1099,10 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
 /* migrate.c */
 extern int ext4_ext_migrate(struct inode *);
 /* namei.c */
+extern unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize);
+extern __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize);
 extern int ext4_orphan_add(handle_t *, struct inode *);
 extern int ext4_orphan_del(handle_t *, struct inode *);
 extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 18cb67b2cbbc..f0c3ec85bd48 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -241,5 +241,6 @@ extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
 extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
                                                ext4_lblk_t *, ext4_fsblk_t *);
 extern void ext4_ext_drop_refs(struct ext4_ext_path *);
+extern int ext4_ext_check_inode(struct inode *inode);
 #endif /* _EXT4_EXTENTS */
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
index e69acc16f5c4..4ce2187123aa 100644
--- a/fs/ext4/ext4_i.h
+++ b/fs/ext4/ext4_i.h
@@ -33,9 +33,6 @@ typedef __u32 ext4_lblk_t;
 /* data type for block group number */
 typedef unsigned int ext4_group_t;
-#define rsv_start rsv_window._rsv_start
-#define rsv_end rsv_window._rsv_end
 /*
 * storage for cached extent
 */
@@ -125,6 +122,9 @@ struct ext4_inode_info {
        struct list_head i_prealloc_list;
        spinlock_t i_prealloc_lock;
+        /* ialloc */
+        ext4_group_t    i_last_alloc_group;
        /* allocation reservation info for delalloc */
        unsigned int i_reserved_data_blocks;
        unsigned int i_reserved_meta_blocks;
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 039b6ea1a042..57b71fefbccf 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -62,12 +62,10 @@ struct ext4_sb_info {
        struct percpu_counter s_freeinodes_counter;
        struct percpu_counter s_dirs_counter;
        struct percpu_counter s_dirtyblocks_counter;
-        struct blockgroup_lock s_blockgroup_lock;
+        struct blockgroup_lock *s_blockgroup_lock;
        struct proc_dir_entry *s_proc;
+        struct kobject s_kobj;
-        /* root of the per fs reservation window tree */
+        struct completion s_kobj_unregister;
-        spinlock_t s_rsv_window_lock;
-        struct rb_root s_rsv_window_root;
        /* Journaling */
        struct inode *s_journal_inode;
@@ -146,6 +144,10 @@ struct ext4_sb_info {
        /* locality groups */
        struct ext4_locality_group *s_locality_groups;
+        /* for write statistics */
+        unsigned long s_sectors_written_start;
+        u64 s_kbytes_written;
        unsigned int s_log_groups_per_flex;
        struct flex_groups *s_flex_groups;
 };
@@ -153,7 +155,7 @@ struct ext4_sb_info {
 static inline spinlock_t *
 sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group)
 {
-        return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group);
+        return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
 }
 #endif  /* _EXT4_SB */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e0aa4fe4f596..ac77d8b8251d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -152,6 +152,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
        ext4_fsblk_t bg_start;
        ext4_fsblk_t last_block;
        ext4_grpblk_t colour;
+        ext4_group_t block_group;
+        int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
        int depth;
        if (path) {
@@ -170,10 +172,31 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
        }
        /* OK. use inode's group */
-        bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
+        block_group = ei->i_block_group;
+        if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
+                /*
+                 * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
+                 * block groups per flexgroup, reserve the first block 
+                 * group for directories and special files.  Regular 
+                 * files will start at the second block group.  This
+                 * tends to speed up directory access and improves 
+                 * fsck times.
+                 */
+                block_group &= ~(flex_size-1);
+                if (S_ISREG(inode->i_mode))
+                        block_group++;
+        }
+        bg_start = (block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
                le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
        last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
+        /*
+         * If we are doing delayed allocation, we don't need take
+         * colour into account.
+         */
+        if (test_opt(inode->i_sb, DELALLOC))
+                return bg_start;
        if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
                colour = (current->pid % 16) *
                        (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
@@ -301,7 +324,64 @@ ext4_ext_max_entries(struct inode *inode, int depth)
        return max;
 }
-static int __ext4_ext_check_header(const char *function, struct inode *inode,
+static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
+{
+        ext4_fsblk_t block = ext_pblock(ext);
+        int len = ext4_ext_get_actual_len(ext);
+        struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
+        if (unlikely(block < le32_to_cpu(es->s_first_data_block) ||
+                        ((block + len) > ext4_blocks_count(es))))
+                return 0;
+        else
+                return 1;
+}
+static int ext4_valid_extent_idx(struct inode *inode,
+                                struct ext4_extent_idx *ext_idx)
+{
+        ext4_fsblk_t block = idx_pblock(ext_idx);
+        struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
+        if (unlikely(block < le32_to_cpu(es->s_first_data_block) ||
+                        (block > ext4_blocks_count(es))))
+                return 0;
+        else
+                return 1;
+}
+static int ext4_valid_extent_entries(struct inode *inode,
+                                struct ext4_extent_header *eh,
+                                int depth)
+{
+        struct ext4_extent *ext;
+        struct ext4_extent_idx *ext_idx;
+        unsigned short entries;
+        if (eh->eh_entries == 0)
+                return 1;
+        entries = le16_to_cpu(eh->eh_entries);
+        if (depth == 0) {
+                /* leaf entries */
+                ext = EXT_FIRST_EXTENT(eh);
+                while (entries) {
+                        if (!ext4_valid_extent(inode, ext))
+                                return 0;
+                        ext++;
+                        entries--;
+                }
+        } else {
+                ext_idx = EXT_FIRST_INDEX(eh);
+                while (entries) {
+                        if (!ext4_valid_extent_idx(inode, ext_idx))
+                                return 0;
+                        ext_idx++;
+                        entries--;
+                }
+        }
+        return 1;
+}
+static int __ext4_ext_check(const char *function, struct inode *inode,
                                        struct ext4_extent_header *eh,
                                        int depth)
 {
@@ -329,11 +409,15 @@ static int __ext4_ext_check_header(const char *function, struct inode *inode,
                error_msg = "invalid eh_entries";
                goto corrupted;
        }
+        if (!ext4_valid_extent_entries(inode, eh, depth)) {
+                error_msg = "invalid extent entries";
+                goto corrupted;
+        }
        return 0;
 corrupted:
        ext4_error(inode->i_sb, function,
-                        "bad header in inode #%lu: %s - magic %x, "
+                        "bad header/extent in inode #%lu: %s - magic %x, "
                        "entries %u, max %u(%u), depth %u(%u)",
                        inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
                        le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
@@ -342,8 +426,13 @@ corrupted:
        return -EIO;
 }
-#define ext4_ext_check_header(inode, eh, depth) \
+#define ext4_ext_check(inode, eh, depth)        \
-        __ext4_ext_check_header(__func__, inode, eh, depth)
+        __ext4_ext_check(__func__, inode, eh, depth)
+int ext4_ext_check_inode(struct inode *inode)
+{
+        return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode));
+}
 #ifdef EXT_DEBUG
 static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
@@ -547,9 +636,6 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
        eh = ext_inode_hdr(inode);
        depth = ext_depth(inode);
-        if (ext4_ext_check_header(inode, eh, depth))
-                return ERR_PTR(-EIO);
        /* account possible depth increase */
        if (!path) {
@@ -565,6 +651,8 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
        i = depth;
        /* walk through the tree */
        while (i) {
+                int need_to_validate = 0;
                ext_debug("depth %d: num %d, max %d\n",
                          ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
@@ -573,10 +661,17 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
                path[ppos].p_depth = i;
                path[ppos].p_ext = NULL;
-                bh = sb_bread(inode->i_sb, path[ppos].p_block);
+                bh = sb_getblk(inode->i_sb, path[ppos].p_block);
-                if (!bh)
+                if (unlikely(!bh))
                        goto err;
+                if (!bh_uptodate_or_lock(bh)) {
+                        if (bh_submit_read(bh) < 0) {
+                                put_bh(bh);
+                                goto err;
+                        }
+                        /* validate the extent entries */
+                        need_to_validate = 1;
+                }
                eh = ext_block_hdr(bh);
                ppos++;
                BUG_ON(ppos > depth);
@@ -584,7 +679,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
                path[ppos].p_hdr = eh;
                i--;
-                if (ext4_ext_check_header(inode, eh, i))
+                if (need_to_validate && ext4_ext_check(inode, eh, i))
                        goto err;
        }
@@ -1181,7 +1276,7 @@ got_index:
                        return -EIO;
                eh = ext_block_hdr(bh);
                /* subtract from p_depth to get proper eh_depth */
-                if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) {
+                if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
                        put_bh(bh);
                        return -EIO;
                }
@@ -1194,7 +1289,7 @@ got_index:
        if (bh == NULL)
                return -EIO;
        eh = ext_block_hdr(bh);
-        if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) {
+        if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
                put_bh(bh);
                return -EIO;
        }
@@ -2137,7 +2232,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
                return -ENOMEM;
        }
        path[0].p_hdr = ext_inode_hdr(inode);
-        if (ext4_ext_check_header(inode, path[0].p_hdr, depth)) {
+        if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
                err = -EIO;
                goto out;
        }
@@ -2191,7 +2286,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
                                err = -EIO;
                                break;
                        }
-                        if (ext4_ext_check_header(inode, ext_block_hdr(bh),
+                        if (ext4_ext_check(inode, ext_block_hdr(bh),
                                                        depth - i - 1)) {
                                err = -EIO;
                                break;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index f731cb545a03..588af8c77246 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -33,9 +33,14 @@
 */
 static int ext4_release_file(struct inode *inode, struct file *filp)
 {
+        if (EXT4_I(inode)->i_state & EXT4_STATE_DA_ALLOC_CLOSE) {
+                ext4_alloc_da_blocks(inode);
+                EXT4_I(inode)->i_state &= ~EXT4_STATE_DA_ALLOC_CLOSE;
+        }
        /* if we are the last writer on the inode, drop the block reservation */
        if ((filp->f_mode & FMODE_WRITE) &&
-                        (atomic_read(&inode->i_writecount) == 1))
+                        (atomic_read(&inode->i_writecount) == 1) &&
+                        !EXT4_I(inode)->i_reserved_data_blocks)
        {
                down_write(&EXT4_I(inode)->i_data_sem);
                ext4_discard_preallocations(inode);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index fb51b40e3e8f..47b84e8df568 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -189,7 +189,6 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
        struct ext4_super_block *es;
        struct ext4_sb_info *sbi;
        int fatal = 0, err, count, cleared;
-        ext4_group_t flex_group;
        if (atomic_read(&inode->i_count) > 1) {
                printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
@@ -268,6 +267,13 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
                        if (is_directory) {
                                count = ext4_used_dirs_count(sb, gdp) - 1;
                                ext4_used_dirs_set(sb, gdp, count);
+                                if (sbi->s_log_groups_per_flex) {
+                                        ext4_group_t f;
+                                        f = ext4_flex_group(sbi, block_group);
+                                        atomic_dec(&sbi->s_flex_groups[f].free_inodes);
+                                }
                        }
                        gdp->bg_checksum = ext4_group_desc_csum(sbi,
                                                        block_group, gdp);
@@ -277,10 +283,10 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
                                percpu_counter_dec(&sbi->s_dirs_counter);
                        if (sbi->s_log_groups_per_flex) {
-                                flex_group = ext4_flex_group(sbi, block_group);
+                                ext4_group_t f;
-                                spin_lock(sb_bgl_lock(sbi, flex_group));
-                                sbi->s_flex_groups[flex_group].free_inodes++;
+                                f = ext4_flex_group(sbi, block_group);
-                                spin_unlock(sb_bgl_lock(sbi, flex_group));
+                                atomic_inc(&sbi->s_flex_groups[f].free_inodes);
                        }
                }
                BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
@@ -360,9 +366,9 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
                sbi->s_log_groups_per_flex;
 find_close_to_parent:
-        flexbg_free_blocks = flex_group[best_flex].free_blocks;
+        flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks);
        flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
-        if (flex_group[best_flex].free_inodes &&
+        if (atomic_read(&flex_group[best_flex].free_inodes) &&
            flex_freeb_ratio > free_block_ratio)
                goto found_flexbg;
@@ -375,24 +381,24 @@ find_close_to_parent:
                if (i == parent_fbg_group || i == parent_fbg_group - 1)
                        continue;
-                flexbg_free_blocks = flex_group[i].free_blocks;
+                flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks);
                flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
                if (flex_freeb_ratio > free_block_ratio &&
-                    flex_group[i].free_inodes) {
+                    (atomic_read(&flex_group[i].free_inodes))) {
                        best_flex = i;
                        goto found_flexbg;
                }
-                if (flex_group[best_flex].free_inodes == 0 ||
+                if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) ||
-                    (flex_group[i].free_blocks >
+                    ((atomic_read(&flex_group[i].free_blocks) >
-                     flex_group[best_flex].free_blocks &&
+                      atomic_read(&flex_group[best_flex].free_blocks)) &&
-                     flex_group[i].free_inodes))
+                     atomic_read(&flex_group[i].free_inodes)))
                        best_flex = i;
        }
-        if (!flex_group[best_flex].free_inodes ||
+        if (!atomic_read(&flex_group[best_flex].free_inodes) ||
-            !flex_group[best_flex].free_blocks)
+            !atomic_read(&flex_group[best_flex].free_blocks))
                return -1;
 found_flexbg:
@@ -410,6 +416,42 @@ out:
        return 0;
 }
+struct orlov_stats {
+        __u32 free_inodes;
+        __u32 free_blocks;
+        __u32 used_dirs;
+};
+/*
+ * Helper function for Orlov's allocator; returns critical information
+ * for a particular block group or flex_bg.  If flex_size is 1, then g
+ * is a block group number; otherwise it is flex_bg number.
+ */
+void get_orlov_stats(struct super_block *sb, ext4_group_t g,
+                       int flex_size, struct orlov_stats *stats)
+{
+        struct ext4_group_desc *desc;
+        struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
+        if (flex_size > 1) {
+                stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
+                stats->free_blocks = atomic_read(&flex_group[g].free_blocks);
+                stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
+                return;
+        }
+        desc = ext4_get_group_desc(sb, g, NULL);
+        if (desc) {
+                stats->free_inodes = ext4_free_inodes_count(sb, desc);
+                stats->free_blocks = ext4_free_blks_count(sb, desc);
+                stats->used_dirs = ext4_used_dirs_count(sb, desc);
+        } else {
+                stats->free_inodes = 0;
+                stats->free_blocks = 0;
+                stats->used_dirs = 0;
+        }
+}
 /*
 * Orlov's allocator for directories.
 *
@@ -425,35 +467,34 @@ out:
 * it has too many directories already (max_dirs) or
 * it has too few free inodes left (min_inodes) or
 * it has too few free blocks left (min_blocks) or
- * it's already running too large debt (max_debt).
 * Parent's group is preferred, if it doesn't satisfy these
 * conditions we search cyclically through the rest. If none
 * of the groups look good we just look for a group with more
 * free inodes than average (starting at parent's group).
- *
- * Debt is incremented each time we allocate a directory and decremented
- * when we allocate an inode, within 0--255.
 */
-#define INODE_COST 64
-#define BLOCK_COST 256
 static int find_group_orlov(struct super_block *sb, struct inode *parent,
-                                ext4_group_t *group)
+                            ext4_group_t *group, int mode)
 {
        ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
-        struct ext4_super_block *es = sbi->s_es;
        ext4_group_t ngroups = sbi->s_groups_count;
        int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
        unsigned int freei, avefreei;
        ext4_fsblk_t freeb, avefreeb;
-        ext4_fsblk_t blocks_per_dir;
        unsigned int ndirs;
-        int max_debt, max_dirs, min_inodes;
+        int max_dirs, min_inodes;
        ext4_grpblk_t min_blocks;
-        ext4_group_t i;
+        ext4_group_t i, grp, g;
        struct ext4_group_desc *desc;
+        struct orlov_stats stats;
+        int flex_size = ext4_flex_bg_size(sbi);
+        if (flex_size > 1) {
+                ngroups = (ngroups + flex_size - 1) >>
+                        sbi->s_log_groups_per_flex;
+                parent_group >>= sbi->s_log_groups_per_flex;
+        }
        freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
        avefreei = freei / ngroups;
@@ -462,71 +503,97 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
        do_div(avefreeb, ngroups);
        ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
-        if ((parent == sb->s_root->d_inode) ||
+        if (S_ISDIR(mode) &&
-            (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) {
+            ((parent == sb->s_root->d_inode) ||
+             (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) {
                int best_ndir = inodes_per_group;
-                ext4_group_t grp;
                int ret = -1;
                get_random_bytes(&grp, sizeof(grp));
                parent_group = (unsigned)grp % ngroups;
                for (i = 0; i < ngroups; i++) {
-                        grp = (parent_group + i) % ngroups;
+                        g = (parent_group + i) % ngroups;
-                        desc = ext4_get_group_desc(sb, grp, NULL);
+                        get_orlov_stats(sb, g, flex_size, &stats);
-                        if (!desc || !ext4_free_inodes_count(sb, desc))
+                        if (!stats.free_inodes)
                                continue;
-                        if (ext4_used_dirs_count(sb, desc) >= best_ndir)
+                        if (stats.used_dirs >= best_ndir)
                                continue;
-                        if (ext4_free_inodes_count(sb, desc) < avefreei)
+                        if (stats.free_inodes < avefreei)
                                continue;
-                        if (ext4_free_blks_count(sb, desc) < avefreeb)
+                        if (stats.free_blocks < avefreeb)
                                continue;
-                        *group = grp;
+                        grp = g;
                        ret = 0;
-                        best_ndir = ext4_used_dirs_count(sb, desc);
+                        best_ndir = stats.used_dirs;
+                }
+                if (ret)
+                        goto fallback;
+        found_flex_bg:
+                if (flex_size == 1) {
+                        *group = grp;
+                        return 0;
+                }
+                /*
+                 * We pack inodes at the beginning of the flexgroup's
+                 * inode tables.  Block allocation decisions will do
+                 * something similar, although regular files will
+                 * start at 2nd block group of the flexgroup.  See
+                 * ext4_ext_find_goal() and ext4_find_near().
+                 */
+                grp *= flex_size;
+                for (i = 0; i < flex_size; i++) {
+                        if (grp+i >= sbi->s_groups_count)
+                                break;
+                        desc = ext4_get_group_desc(sb, grp+i, NULL);
+                        if (desc && ext4_free_inodes_count(sb, desc)) {
+                                *group = grp+i;
+                                return 0;
+                        }
                }
-                if (ret == 0)
-                        return ret;
                goto fallback;
        }
-        blocks_per_dir = ext4_blocks_count(es) - freeb;
-        do_div(blocks_per_dir, ndirs);
        max_dirs = ndirs / ngroups + inodes_per_group / 16;
-        min_inodes = avefreei - inodes_per_group / 4;
+        min_inodes = avefreei - inodes_per_group*flex_size / 4;
-        min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb) / 4;
+        if (min_inodes < 1)
+                min_inodes = 1;
-        max_debt = EXT4_BLOCKS_PER_GROUP(sb);
+        min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4;
-        max_debt /= max_t(int, blocks_per_dir, BLOCK_COST);
-        if (max_debt * INODE_COST > inodes_per_group)
+        /*
-                max_debt = inodes_per_group / INODE_COST;
+         * Start looking in the flex group where we last allocated an
-        if (max_debt > 255)
+         * inode for this parent directory
-                max_debt = 255;
+         */
-        if (max_debt == 0)
+        if (EXT4_I(parent)->i_last_alloc_group != ~0) {
-                max_debt = 1;
+                parent_group = EXT4_I(parent)->i_last_alloc_group;
+                if (flex_size > 1)
+                        parent_group >>= sbi->s_log_groups_per_flex;
+        }
        for (i = 0; i < ngroups; i++) {
-                *group = (parent_group + i) % ngroups;
+                grp = (parent_group + i) % ngroups;
-                desc = ext4_get_group_desc(sb, *group, NULL);
+                get_orlov_stats(sb, grp, flex_size, &stats);
-                if (!desc || !ext4_free_inodes_count(sb, desc))
+                if (stats.used_dirs >= max_dirs)
-                        continue;
-                if (ext4_used_dirs_count(sb, desc) >= max_dirs)
                        continue;
-                if (ext4_free_inodes_count(sb, desc) < min_inodes)
+                if (stats.free_inodes < min_inodes)
                        continue;
-                if (ext4_free_blks_count(sb, desc) < min_blocks)
+                if (stats.free_blocks < min_blocks)
                        continue;
-                return 0;
+                goto found_flex_bg;
        }
 fallback:
+        ngroups = sbi->s_groups_count;
+        avefreei = freei / ngroups;
+        parent_group = EXT4_I(parent)->i_block_group;
        for (i = 0; i < ngroups; i++) {
-                *group = (parent_group + i) % ngroups;
+                grp = (parent_group + i) % ngroups;
-                desc = ext4_get_group_desc(sb, *group, NULL);
+                desc = ext4_get_group_desc(sb, grp, NULL);
                if (desc && ext4_free_inodes_count(sb, desc) &&
-                        ext4_free_inodes_count(sb, desc) >= avefreei)
+                    ext4_free_inodes_count(sb, desc) >= avefreei) {
+                        *group = grp;
                        return 0;
+                }
        }
        if (avefreei) {
@@ -542,12 +609,51 @@ fallback:
 }
 static int find_group_other(struct super_block *sb, struct inode *parent,
-                                ext4_group_t *group)
+                            ext4_group_t *group, int mode)
 {
        ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
        ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
        struct ext4_group_desc *desc;
-        ext4_group_t i;
+        ext4_group_t i, last;
+        int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
+        /*
+         * Try to place the inode is the same flex group as its
+         * parent.  If we can't find space, use the Orlov algorithm to
+         * find another flex group, and store that information in the
+         * parent directory's inode information so that use that flex
+         * group for future allocations.
+         */
+        if (flex_size > 1) {
+                int retry = 0;
+        try_again:
+                parent_group &= ~(flex_size-1);
+                last = parent_group + flex_size;
+                if (last > ngroups)
+                        last = ngroups;
+                for  (i = parent_group; i < last; i++) {
+                        desc = ext4_get_group_desc(sb, i, NULL);
+                        if (desc && ext4_free_inodes_count(sb, desc)) {
+                                *group = i;
+                                return 0;
+                        }
+                }
+                if (!retry && EXT4_I(parent)->i_last_alloc_group != ~0) {
+                        retry = 1;
+                        parent_group = EXT4_I(parent)->i_last_alloc_group;
+                        goto try_again;
+                }
+                /*
+                 * If this didn't work, use the Orlov search algorithm
+                 * to find a new flex group; we pass in the mode to
+                 * avoid the topdir algorithms.
+                 */
+                *group = parent_group + flex_size;
+                if (*group > ngroups)
+                        *group = 0;
+                return find_group_orlov(sb, parent, group, mode);
+        }
        /*
         * Try to place the inode in its parent directory
@@ -665,6 +771,11 @@ static int ext4_claim_inode(struct super_block *sb,
        if (S_ISDIR(mode)) {
                count = ext4_used_dirs_count(sb, gdp) + 1;
                ext4_used_dirs_set(sb, gdp, count);
+                if (sbi->s_log_groups_per_flex) {
+                        ext4_group_t f = ext4_flex_group(sbi, group);
+                        atomic_inc(&sbi->s_flex_groups[f].free_inodes);
+                }
        }
        gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
 err_ret:
@@ -716,10 +827,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
        sbi = EXT4_SB(sb);
        es = sbi->s_es;
-        if (sbi->s_log_groups_per_flex) {
+        if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
                ret2 = find_group_flex(sb, dir, &group);
                if (ret2 == -1) {
-                        ret2 = find_group_other(sb, dir, &group);
+                        ret2 = find_group_other(sb, dir, &group, mode);
                        if (ret2 == 0 && once)
                                once = 0;
                                printk(KERN_NOTICE "ext4: find_group_flex "
@@ -733,11 +844,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
                if (test_opt(sb, OLDALLOC))
                        ret2 = find_group_dir(sb, dir, &group);
                else
-                        ret2 = find_group_orlov(sb, dir, &group);
+                        ret2 = find_group_orlov(sb, dir, &group, mode);
        } else
-                ret2 = find_group_other(sb, dir, &group);
+                ret2 = find_group_other(sb, dir, &group, mode);
 got_group:
+        EXT4_I(dir)->i_last_alloc_group = group;
        err = -ENOSPC;
        if (ret2 == -1)
                goto out;
@@ -858,9 +970,7 @@ got:
        if (sbi->s_log_groups_per_flex) {
                flex_group = ext4_flex_group(sbi, group);
-                spin_lock(sb_bgl_lock(sbi, flex_group));
+                atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
-                sbi->s_flex_groups[flex_group].free_inodes--;
-                spin_unlock(sb_bgl_lock(sbi, flex_group));
        }
        inode->i_uid = current_fsuid();
@@ -885,19 +995,16 @@ got:
        ei->i_disksize = 0;
        /*
-         * Don't inherit extent flag from directory. We set extent flag on
+         * Don't inherit extent flag from directory, amongst others. We set
-         * newly created directory and file only if -o extent mount option is
+         * extent flag on newly created directory and file only if -o extent
-         * specified
+         * mount option is specified
         */
-        ei->i_flags = EXT4_I(dir)->i_flags & ~(EXT4_INDEX_FL|EXT4_EXTENTS_FL);
+        ei->i_flags =
-        if (S_ISLNK(mode))
+                ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
-                ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL);
-        /* dirsync only applies to directories */
-        if (!S_ISDIR(mode))
-                ei->i_flags &= ~EXT4_DIRSYNC_FL;
        ei->i_file_acl = 0;
        ei->i_dtime = 0;
        ei->i_block_group = group;
+        ei->i_last_alloc_group = ~0;
        ext4_set_inode_flags(inode);
        if (IS_DIRSYNC(inode))
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 71d3ecd5db79..a2e7952bc5f9 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -371,6 +371,34 @@ static int ext4_block_to_path(struct inode *inode,
        return n;
 }
+static int __ext4_check_blockref(const char *function, struct inode *inode,
+                                 unsigned int *p, unsigned int max) {
+        unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es);
+        unsigned int *bref = p;
+        while (bref < p+max) {
+                if (unlikely(*bref >= maxblocks)) {
+                        ext4_error(inode->i_sb, function,
+                                   "block reference %u >= max (%u) "
+                                   "in inode #%lu, offset=%d",
+                                   *bref, maxblocks,
+                                   inode->i_ino, (int)(bref-p));
+                        return -EIO;
+                }
+                bref++;
+        }
+        return 0;
+}
+#define ext4_check_indirect_blockref(inode, bh)                         \
+        __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data,  \
+                              EXT4_ADDR_PER_BLOCK((inode)->i_sb))
+#define ext4_check_inode_blockref(inode)                                \
+        __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data,   \
+                              EXT4_NDIR_BLOCKS)
 /**
 *      ext4_get_branch - read the chain of indirect blocks leading to data
 *      @inode: inode in question
@@ -415,9 +443,22 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
        if (!p->key)
                goto no_block;
        while (--depth) {
-                bh = sb_bread(sb, le32_to_cpu(p->key));
+                bh = sb_getblk(sb, le32_to_cpu(p->key));
-                if (!bh)
+                if (unlikely(!bh))
                        goto failure;
+                  
+                if (!bh_uptodate_or_lock(bh)) {
+                        if (bh_submit_read(bh) < 0) {
+                                put_bh(bh);
+                                goto failure;
+                        }
+                        /* validate block references */
+                        if (ext4_check_indirect_blockref(inode, bh)) {
+                                put_bh(bh);
+                                goto failure;
+                        }
+                }
+                
                add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
                /* Reader: end */
                if (!p->key)
@@ -459,6 +500,8 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
        ext4_fsblk_t bg_start;
        ext4_fsblk_t last_block;
        ext4_grpblk_t colour;
+        ext4_group_t block_group;
+        int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
        /* Try to find previous block */
        for (p = ind->p - 1; p >= start; p--) {
@@ -474,9 +517,22 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
         * It is going to be referred to from the inode itself? OK, just put it
         * into the same cylinder group then.
         */
-        bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group);
+        block_group = ei->i_block_group;
+        if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
+                block_group &= ~(flex_size-1);
+                if (S_ISREG(inode->i_mode))
+                        block_group++;
+        }
+        bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
        last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
+        /*
+         * If we are doing delayed allocation, we don't need take
+         * colour into account.
+         */
+        if (test_opt(inode->i_sb, DELALLOC))
+                return bg_start;
        if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
                colour = (current->pid % 16) *
                        (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
@@ -1052,9 +1108,16 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
        /*
         * free those over-booking quota for metadata blocks
         */
        if (mdb_free)
                vfs_dq_release_reservation_block(inode, mdb_free);
+        /*
+         * If we have done all the pending block allocations and if
+         * there aren't any writers on the inode, we can discard the
+         * inode's preallocations.
+         */
+        if (!total && (atomic_read(&inode->i_writecount) == 0))
+                ext4_discard_preallocations(inode);
 }
 /*
@@ -1688,9 +1751,10 @@ static void ext4_da_page_release_reservation(struct page *page,
 struct mpage_da_data {
        struct inode *inode;
-        struct buffer_head lbh;                 /* extent of blocks */
+        sector_t b_blocknr;             /* start block number of extent */
+        size_t b_size;                  /* size of extent */
+        unsigned long b_state;          /* state of the extent */
        unsigned long first_page, next_page;    /* extent of pages */
-        get_block_t *get_block;
        struct writeback_control *wbc;
        int io_done;
        int pages_written;
@@ -1704,7 +1768,6 @@ struct mpage_da_data {
 * @mpd->inode: inode
 * @mpd->first_page: first page of the extent
 * @mpd->next_page: page after the last page of the extent
- * @mpd->get_block: the filesystem's block mapper function
 *
 * By the time mpage_da_submit_io() is called we expect all blocks
 * to be allocated. this may be wrong if allocation failed.
@@ -1724,7 +1787,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
        /*
         * We need to start from the first_page to the next_page - 1
         * to make sure we also write the mapped dirty buffer_heads.
-         * If we look at mpd->lbh.b_blocknr we would only be looking
+         * If we look at mpd->b_blocknr we would only be looking
         * at the currently mapped buffer_heads.
         */
        index = mpd->first_page;
@@ -1914,68 +1977,111 @@ static void ext4_print_free_blocks(struct inode *inode)
        return;
 }
+#define         EXT4_DELALLOC_RSVED     1
+static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
+                                   struct buffer_head *bh_result, int create)
+{
+        int ret;
+        unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
+        loff_t disksize = EXT4_I(inode)->i_disksize;
+        handle_t *handle = NULL;
+        handle = ext4_journal_current_handle();
+        BUG_ON(!handle);
+        ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
+                                   bh_result, create, 0, EXT4_DELALLOC_RSVED);
+        if (ret <= 0)
+                return ret;
+        bh_result->b_size = (ret << inode->i_blkbits);
+        if (ext4_should_order_data(inode)) {
+                int retval;
+                retval = ext4_jbd2_file_inode(handle, inode);
+                if (retval)
+                        /*
+                         * Failed to add inode for ordered mode. Don't
+                         * update file size
+                         */
+                        return retval;
+        }
+        /*
+         * Update on-disk size along with block allocation we don't
+         * use 'extend_disksize' as size may change within already
+         * allocated block -bzzz
+         */
+        disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
+        if (disksize > i_size_read(inode))
+                disksize = i_size_read(inode);
+        if (disksize > EXT4_I(inode)->i_disksize) {
+                ext4_update_i_disksize(inode, disksize);
+                ret = ext4_mark_inode_dirty(handle, inode);
+                return ret;
+        }
+        return 0;
+}
 /*
 * mpage_da_map_blocks - go through given space
 *
- * @mpd->lbh - bh describing space
+ * @mpd - bh describing space
- * @mpd->get_block - the filesystem's block mapper function
 *
 * The function skips space we know is already mapped to disk blocks.
 *
 */
-static int  mpage_da_map_blocks(struct mpage_da_data *mpd)
+static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 {
        int err = 0;
        struct buffer_head new;
-        struct buffer_head *lbh = &mpd->lbh;
        sector_t next;
        /*
         * We consider only non-mapped and non-allocated blocks
         */
-        if (buffer_mapped(lbh) && !buffer_delay(lbh))
+        if ((mpd->b_state  & (1 << BH_Mapped)) &&
+            !(mpd->b_state & (1 << BH_Delay)))
                return 0;
-        new.b_state = lbh->b_state;
+        new.b_state = mpd->b_state;
        new.b_blocknr = 0;
-        new.b_size = lbh->b_size;
+        new.b_size = mpd->b_size;
-        next = lbh->b_blocknr;
+        next = mpd->b_blocknr;
        /*
         * If we didn't accumulate anything
         * to write simply return
         */
        if (!new.b_size)
                return 0;
-        err = mpd->get_block(mpd->inode, next, &new, 1);
-        if (err) {
-                /* If get block returns with error
+        err = ext4_da_get_block_write(mpd->inode, next, &new, 1);
-                 * we simply return. Later writepage
+        if (err) {
-                 * will redirty the page and writepages
+                /*
-                 * will find the dirty page again
+                 * If get block returns with error we simply
+                 * return. Later writepage will redirty the page and
+                 * writepages will find the dirty page again
                 */
                if (err == -EAGAIN)
                        return 0;
                if (err == -ENOSPC &&
-                                ext4_count_free_blocks(mpd->inode->i_sb)) {
+                    ext4_count_free_blocks(mpd->inode->i_sb)) {
                        mpd->retval = err;
                        return 0;
                }
                /*
-                 * get block failure will cause us
+                 * get block failure will cause us to loop in
-                 * to loop in writepages. Because
+                 * writepages, because a_ops->writepage won't be able
-                 * a_ops->writepage won't be able to
+                 * to make progress. The page will be redirtied by
-                 * make progress. The page will be redirtied
+                 * writepage and writepages will again try to write
-                 * by writepage and writepages will again
+                 * the same.
-                 * try to write the same.
                 */
                printk(KERN_EMERG "%s block allocation failed for inode %lu "
                                  "at logical offset %llu with max blocks "
                                  "%zd with error %d\n",
                                  __func__, mpd->inode->i_ino,
                                  (unsigned long long)next,
-                                  lbh->b_size >> mpd->inode->i_blkbits, err);
+                                  mpd->b_size >> mpd->inode->i_blkbits, err);
                printk(KERN_EMERG "This should not happen.!! "
                                        "Data will be lost\n");
                if (err == -ENOSPC) {
@@ -1983,7 +2089,7 @@ static int  mpage_da_map_blocks(struct mpage_da_data *mpd)
                }
                /* invlaidate all the pages */
                ext4_da_block_invalidatepages(mpd, next,
-                                lbh->b_size >> mpd->inode->i_blkbits);
+                                mpd->b_size >> mpd->inode->i_blkbits);
                return err;
        }
        BUG_ON(new.b_size == 0);
@@ -1995,7 +2101,8 @@ static int  mpage_da_map_blocks(struct mpage_da_data *mpd)
         * If blocks are delayed marked, we need to
         * put actual blocknr and drop delayed bit
         */
-        if (buffer_delay(lbh) || buffer_unwritten(lbh))
+        if ((mpd->b_state & (1 << BH_Delay)) ||
+            (mpd->b_state & (1 << BH_Unwritten)))
                mpage_put_bnr_to_bhs(mpd, next, &new);
        return 0;
@@ -2014,12 +2121,11 @@ static int  mpage_da_map_blocks(struct mpage_da_data *mpd)
 * the function is used to collect contig. blocks in same state
 */
 static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
-                                   sector_t logical, struct buffer_head *bh)
+                                   sector_t logical, size_t b_size,
+                                   unsigned long b_state)
 {
        sector_t next;
-        size_t b_size = bh->b_size;
+        int nrblocks = mpd->b_size >> mpd->inode->i_blkbits;
-        struct buffer_head *lbh = &mpd->lbh;
-        int nrblocks = lbh->b_size >> mpd->inode->i_blkbits;
        /* check if thereserved journal credits might overflow */
        if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) {
@@ -2046,19 +2152,19 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
        /*
         * First block in the extent
         */
-        if (lbh->b_size == 0) {
+        if (mpd->b_size == 0) {
-                lbh->b_blocknr = logical;
+                mpd->b_blocknr = logical;
-                lbh->b_size = b_size;
+                mpd->b_size = b_size;
-                lbh->b_state = bh->b_state & BH_FLAGS;
+                mpd->b_state = b_state & BH_FLAGS;
                return;
        }
-        next = lbh->b_blocknr + nrblocks;
+        next = mpd->b_blocknr + nrblocks;
        /*
         * Can we merge the block to our big extent?
         */
-        if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) {
+        if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) {
-                lbh->b_size += b_size;
+                mpd->b_size += b_size;
                return;
        }
@@ -2087,7 +2193,7 @@ static int __mpage_da_writepage(struct page *page,
 {
        struct mpage_da_data *mpd = data;
        struct inode *inode = mpd->inode;
-        struct buffer_head *bh, *head, fake;
+        struct buffer_head *bh, *head;
        sector_t logical;
        if (mpd->io_done) {
@@ -2129,9 +2235,9 @@ static int __mpage_da_writepage(struct page *page,
                /*
                 * ... and blocks
                 */
-                mpd->lbh.b_size = 0;
+                mpd->b_size = 0;
-                mpd->lbh.b_state = 0;
+                mpd->b_state = 0;
-                mpd->lbh.b_blocknr = 0;
+                mpd->b_blocknr = 0;
        }
        mpd->next_page = page->index + 1;
@@ -2139,16 +2245,8 @@ static int __mpage_da_writepage(struct page *page,
                  (PAGE_CACHE_SHIFT - inode->i_blkbits);
        if (!page_has_buffers(page)) {
-                /*
+                mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE,
-                 * There is no attached buffer heads yet (mmap?)
+                                       (1 << BH_Dirty) | (1 << BH_Uptodate));
-                 * we treat the page asfull of dirty blocks
-                 */
-                bh = &fake;
-                bh->b_size = PAGE_CACHE_SIZE;
-                bh->b_state = 0;
-                set_buffer_dirty(bh);
-                set_buffer_uptodate(bh);
-                mpage_add_bh_to_extent(mpd, logical, bh);
                if (mpd->io_done)
                        return MPAGE_DA_EXTENT_TAIL;
        } else {
@@ -2166,8 +2264,10 @@ static int __mpage_da_writepage(struct page *page,
                         * with the page in ext4_da_writepage
                         */
                        if (buffer_dirty(bh) &&
-                                (!buffer_mapped(bh) || buffer_delay(bh))) {
+                            (!buffer_mapped(bh) || buffer_delay(bh))) {
-                                mpage_add_bh_to_extent(mpd, logical, bh);
+                                mpage_add_bh_to_extent(mpd, logical,
+                                                       bh->b_size,
+                                                       bh->b_state);
                                if (mpd->io_done)
                                        return MPAGE_DA_EXTENT_TAIL;
                        } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
@@ -2179,9 +2279,8 @@ static int __mpage_da_writepage(struct page *page,
                                 * unmapped buffer_head later we need to
                                 * use the b_state flag of that buffer_head.
                                 */
-                                if (mpd->lbh.b_size == 0)
+                                if (mpd->b_size == 0)
-                                        mpd->lbh.b_state =
+                                        mpd->b_state = bh->b_state & BH_FLAGS;
-                                                bh->b_state & BH_FLAGS;
                        }
                        logical++;
                } while ((bh = bh->b_this_page) != head);
@@ -2191,51 +2290,6 @@ static int __mpage_da_writepage(struct page *page,
 }
 /*
- * mpage_da_writepages - walk the list of dirty pages of the given
- * address space, allocates non-allocated blocks, maps newly-allocated
- * blocks to existing bhs and issue IO them
- *
- * @mapping: address space structure to write
- * @wbc: subtract the number of written pages from *@wbc->nr_to_write
- * @get_block: the filesystem's block mapper function.
- *
- * This is a library function, which implements the writepages()
- * address_space_operation.
- */
-static int mpage_da_writepages(struct address_space *mapping,
-                               struct writeback_control *wbc,
-                               struct mpage_da_data *mpd)
-{
-        int ret;
-        if (!mpd->get_block)
-                return generic_writepages(mapping, wbc);
-        mpd->lbh.b_size = 0;
-        mpd->lbh.b_state = 0;
-        mpd->lbh.b_blocknr = 0;
-        mpd->first_page = 0;
-        mpd->next_page = 0;
-        mpd->io_done = 0;
-        mpd->pages_written = 0;
-        mpd->retval = 0;
-        ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
-        /*
-         * Handle last extent of pages
-         */
-        if (!mpd->io_done && mpd->next_page != mpd->first_page) {
-                if (mpage_da_map_blocks(mpd) == 0)
-                        mpage_da_submit_io(mpd);
-                mpd->io_done = 1;
-                ret = MPAGE_DA_EXTENT_TAIL;
-        }
-        wbc->nr_to_write -= mpd->pages_written;
-        return ret;
-}
-/*
 * this is a special callback for ->write_begin() only
 * it's intention is to return mapped block or reserve space
 */
@@ -2274,51 +2328,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
        return ret;
 }
-#define         EXT4_DELALLOC_RSVED     1
-static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
-                                   struct buffer_head *bh_result, int create)
-{
-        int ret;
-        unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
-        loff_t disksize = EXT4_I(inode)->i_disksize;
-        handle_t *handle = NULL;
-        handle = ext4_journal_current_handle();
-        BUG_ON(!handle);
-        ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
-                        bh_result, create, 0, EXT4_DELALLOC_RSVED);
-        if (ret > 0) {
-                bh_result->b_size = (ret << inode->i_blkbits);
-                if (ext4_should_order_data(inode)) {
-                        int retval;
-                        retval = ext4_jbd2_file_inode(handle, inode);
-                        if (retval)
-                                /*
-                                 * Failed to add inode for ordered
-                                 * mode. Don't update file size
-                                 */
-                                return retval;
-                }
-                /*
-                 * Update on-disk size along with block allocation
-                 * we don't use 'extend_disksize' as size may change
-                 * within already allocated block -bzzz
-                 */
-                disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
-                if (disksize > i_size_read(inode))
-                        disksize = i_size_read(inode);
-                if (disksize > EXT4_I(inode)->i_disksize) {
-                        ext4_update_i_disksize(inode, disksize);
-                        ret = ext4_mark_inode_dirty(handle, inode);
-                        return ret;
-                }
-                ret = 0;
-        }
-        return ret;
-}
 static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
 {
@@ -2569,8 +2578,38 @@ retry:
                        dump_stack();
                        goto out_writepages;
                }
-                mpd.get_block = ext4_da_get_block_write;
-                ret = mpage_da_writepages(mapping, wbc, &mpd);
+                /*
+                 * Now call __mpage_da_writepage to find the next
+                 * contiguous region of logical blocks that need
+                 * blocks to be allocated by ext4.  We don't actually
+                 * submit the blocks for I/O here, even though
+                 * write_cache_pages thinks it will, and will set the
+                 * pages as clean for write before calling
+                 * __mpage_da_writepage().
+                 */
+                mpd.b_size = 0;
+                mpd.b_state = 0;
+                mpd.b_blocknr = 0;
+                mpd.first_page = 0;
+                mpd.next_page = 0;
+                mpd.io_done = 0;
+                mpd.pages_written = 0;
+                mpd.retval = 0;
+                ret = write_cache_pages(mapping, wbc, __mpage_da_writepage,
+                                        &mpd);
+                /*
+                 * If we have a contigous extent of pages and we
+                 * haven't done the I/O yet, map the blocks and submit
+                 * them for I/O.
+                 */
+                if (!mpd.io_done && mpd.next_page != mpd.first_page) {
+                        if (mpage_da_map_blocks(&mpd) == 0)
+                                mpage_da_submit_io(&mpd);
+                        mpd.io_done = 1;
+                        ret = MPAGE_DA_EXTENT_TAIL;
+                }
+                wbc->nr_to_write -= mpd.pages_written;
                ext4_journal_stop(handle);
@@ -2846,6 +2885,48 @@ out:
        return;
 }
+/*
+ * Force all delayed allocation blocks to be allocated for a given inode.
+ */
+int ext4_alloc_da_blocks(struct inode *inode)
+{
+        if (!EXT4_I(inode)->i_reserved_data_blocks &&
+            !EXT4_I(inode)->i_reserved_meta_blocks)
+                return 0;
+        /*
+         * We do something simple for now.  The filemap_flush() will
+         * also start triggering a write of the data blocks, which is
+         * not strictly speaking necessary (and for users of
+         * laptop_mode, not even desirable).  However, to do otherwise
+         * would require replicating code paths in:
+         * 
+         * ext4_da_writepages() ->
+         *    write_cache_pages() ---> (via passed in callback function)
+         *        __mpage_da_writepage() -->
+         *           mpage_add_bh_to_extent()
+         *           mpage_da_map_blocks()
+         *
+         * The problem is that write_cache_pages(), located in
+         * mm/page-writeback.c, marks pages clean in preparation for
+         * doing I/O, which is not desirable if we're not planning on
+         * doing I/O at all.
+         *
+         * We could call write_cache_pages(), and then redirty all of
+         * the pages by calling redirty_page_for_writeback() but that
+         * would be ugly in the extreme.  So instead we would need to
+         * replicate parts of the code in the above functions,
+         * simplifying them becuase we wouldn't actually intend to
+         * write out the pages, but rather only collect contiguous
+         * logical block extents, call the multi-block allocator, and
+         * then update the buffer heads with the block allocations.
+         * 
+         * For now, though, we'll cheat by calling filemap_flush(),
+         * which will map the blocks, and start the I/O, but not
+         * actually wait for the I/O to complete.
+         */
+        return filemap_flush(inode->i_mapping);
+}
 /*
 * bmap() is special.  It gets used by applications such as lilo and by
@@ -3868,6 +3949,9 @@ void ext4_truncate(struct inode *inode)
        if (!ext4_can_truncate(inode))
                return;
+        if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
+                ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
        if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
                ext4_ext_truncate(inode);
                return;
@@ -4110,12 +4194,7 @@ make_io:
                        unsigned num;
                        table = ext4_inode_table(sb, gdp);
-                        /* Make sure s_inode_readahead_blks is a power of 2 */
+                        /* s_inode_readahead_blks is always a power of 2 */
-                        while (EXT4_SB(sb)->s_inode_readahead_blks &
-                               (EXT4_SB(sb)->s_inode_readahead_blks-1))
-                                EXT4_SB(sb)->s_inode_readahead_blks = 
-                                   (EXT4_SB(sb)->s_inode_readahead_blks &
-                                    (EXT4_SB(sb)->s_inode_readahead_blks-1));
                        b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
                        if (table > b)
                                b = table;
@@ -4287,6 +4366,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        ei->i_disksize = inode->i_size;
        inode->i_generation = le32_to_cpu(raw_inode->i_generation);
        ei->i_block_group = iloc.block_group;
+        ei->i_last_alloc_group = ~0;
        /*
         * NOTE! The in-memory inode i_data array is in little-endian order
         * even on big-endian machines: we do NOT byteswap the block numbers!
@@ -4329,6 +4409,20 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                        (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
        }
+        if (ei->i_flags & EXT4_EXTENTS_FL) {
+                /* Validate extent which is part of inode */
+                ret = ext4_ext_check_inode(inode);
+        } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+                   (S_ISLNK(inode->i_mode) &&
+                    !ext4_inode_is_fast_symlink(inode))) {
+                /* Validate block references which are part of inode */
+                ret = ext4_check_inode_blockref(inode);
+        }
+        if (ret) {
+                brelse(bh);
+                goto bad_inode;
+        }
        if (S_ISREG(inode->i_mode)) {
                inode->i_op = &ext4_file_inode_operations;
                inode->i_fop = &ext4_file_operations;
@@ -4345,7 +4439,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                        inode->i_op = &ext4_symlink_inode_operations;
                        ext4_set_aops(inode);
                }
-        } else {
+        } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+              S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
                inode->i_op = &ext4_special_inode_operations;
                if (raw_inode->i_block[0])
                        init_special_inode(inode, inode->i_mode,
@@ -4353,6 +4448,13 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                else
                        init_special_inode(inode, inode->i_mode,
                           new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
+        } else {
+                brelse(bh);
+                ret = -EIO;
+                ext4_error(inode->i_sb, __func__, 
+                           "bogus i_mode (%o) for inode=%lu",
+                           inode->i_mode, inode->i_ino);
+                goto bad_inode;
        }
        brelse(iloc.bh);
        ext4_set_inode_flags(inode);
@@ -5146,8 +5248,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
        return !buffer_mapped(bh);
 }
-int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+        struct page *page = vmf->page;
        loff_t size;
        unsigned long len;
        int ret = -EINVAL;
@@ -5199,6 +5302,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
                goto out_unlock;
        ret = 0;
 out_unlock:
+        if (ret)
+                ret = VM_FAULT_SIGBUS;
        up_read(&inode->i_alloc_sem);
        return ret;
 }
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 42dc83fb247a..91e75f7a9e73 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -48,8 +48,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                if (err)
                        return err;
-                if (!S_ISDIR(inode->i_mode))
+                flags = ext4_mask_flags(inode->i_mode, flags);
-                        flags &= ~EXT4_DIRSYNC_FL;
                err = -EPERM;
                mutex_lock(&inode->i_mutex);
@@ -263,6 +262,20 @@ setversion_out:
                return err;
        }
+        case EXT4_IOC_ALLOC_DA_BLKS:
+        {
+                int err;
+                if (!is_owner_or_cap(inode))
+                        return -EACCES;
+                err = mnt_want_write(filp->f_path.mnt);
+                if (err)
+                        return err;
+                err = ext4_alloc_da_blocks(inode);
+                mnt_drop_write(filp->f_path.mnt);
+                return err;
+        }
        default:
                return -ENOTTY;
        }
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b038188bd039..f871677a7984 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -46,22 +46,23 @@
 * The allocation request involve request for multiple number of blocks
 * near to the goal(block) value specified.
 *
- * During initialization phase of the allocator we decide to use the group
+ * During initialization phase of the allocator we decide to use the
- * preallocation or inode preallocation depending on the size file. The
+ * group preallocation or inode preallocation depending on the size of
- * size of the file could be the resulting file size we would have after
+ * the file. The size of the file could be the resulting file size we
- * allocation or the current file size which ever is larger. If the size is
+ * would have after allocation, or the current file size, which ever
- * less that sbi->s_mb_stream_request we select the group
+ * is larger. If the size is less than sbi->s_mb_stream_request we
- * preallocation. The default value of s_mb_stream_request is 16
+ * select to use the group preallocation. The default value of
- * blocks. This can also be tuned via
+ * s_mb_stream_request is 16 blocks. This can also be tuned via
- * /proc/fs/ext4/<partition>/stream_req. The value is represented in terms
+ * /sys/fs/ext4/<partition>/mb_stream_req. The value is represented in
- * of number of blocks.
+ * terms of number of blocks.
 *
 * The main motivation for having small file use group preallocation is to
- * ensure that we have small file closer in the disk.
+ * ensure that we have small files closer together on the disk.
 *
- * First stage the allocator looks at the inode prealloc list
+ * First stage the allocator looks at the inode prealloc list,
- * ext4_inode_info->i_prealloc_list contain list of prealloc spaces for
+ * ext4_inode_info->i_prealloc_list, which contains list of prealloc
- * this particular inode. The inode prealloc space is represented as:
+ * spaces for this particular inode. The inode prealloc space is
+ * represented as:
 *
 * pa_lstart -> the logical start block for this prealloc space
 * pa_pstart -> the physical start block for this prealloc space
@@ -121,29 +122,29 @@
 * list. In case of inode preallocation we follow a list of heuristics
 * based on file size. This can be found in ext4_mb_normalize_request. If
 * we are doing a group prealloc we try to normalize the request to
- * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is set to
+ * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is
 * 512 blocks. This can be tuned via
- * /proc/fs/ext4/<partition/group_prealloc. The value is represented in
+ * /sys/fs/ext4/<partition/mb_group_prealloc. The value is represented in
 * terms of number of blocks. If we have mounted the file system with -O
 * stripe=<value> option the group prealloc request is normalized to the
 * stripe value (sbi->s_stripe)
 *
- * The regular allocator(using the buddy cache) support few tunables.
+ * The regular allocator(using the buddy cache) supports few tunables.
 *
- * /proc/fs/ext4/<partition>/min_to_scan
+ * /sys/fs/ext4/<partition>/mb_min_to_scan
- * /proc/fs/ext4/<partition>/max_to_scan
+ * /sys/fs/ext4/<partition>/mb_max_to_scan
- * /proc/fs/ext4/<partition>/order2_req
+ * /sys/fs/ext4/<partition>/mb_order2_req
 *
- * The regular allocator use buddy scan only if the request len is power of
+ * The regular allocator uses buddy scan only if the request len is power of
 * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
 * value of s_mb_order2_reqs can be tuned via
- * /proc/fs/ext4/<partition>/order2_req.  If the request len is equal to
+ * /sys/fs/ext4/<partition>/mb_order2_req.  If the request len is equal to
 * stripe size (sbi->s_stripe), we try to search for contigous block in
- * stripe size. This should result in better allocation on RAID setup. If
+ * stripe size. This should result in better allocation on RAID setups. If
- * not we search in the specific group using bitmap for best extents. The
+ * not, we search in the specific group using bitmap for best extents. The
- * tunable min_to_scan and max_to_scan controll the behaviour here.
+ * tunable min_to_scan and max_to_scan control the behaviour here.
 * min_to_scan indicate how long the mballoc __must__ look for a best
- * extent and max_to_scanindicate how long the mballoc __can__ look for a
+ * extent and max_to_scan indicates how long the mballoc __can__ look for a
 * best extent in the found extents. Searching for the blocks starts with
 * the group specified as the goal value in allocation context via
 * ac_g_ex. Each group is first checked based on the criteria whether it
@@ -337,8 +338,6 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
                                        ext4_group_t group);
 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
                                                ext4_group_t group);
-static int ext4_mb_init_per_dev_proc(struct super_block *sb);
-static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
 static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
@@ -1726,6 +1725,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
 {
        unsigned free, fragments;
        unsigned i, bits;
+        int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
        struct ext4_group_desc *desc;
        struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
@@ -1747,6 +1747,12 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
                if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
                        return 0;
+                /* Avoid using the first bg of a flexgroup for data files */
+                if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
+                    (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
+                    ((group % flex_size) == 0))
+                        return 0;
                bits = ac->ac_sb->s_blocksize_bits + 1;
                for (i = ac->ac_2order; i <= bits; i++)
                        if (grp->bb_counters[i] > 0)
@@ -1971,7 +1977,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
        /*
         * We search using buddy data only if the order of the request
         * is greater than equal to the sbi_s_mb_order2_reqs
-         * You can tune it via /proc/fs/ext4/<partition>/order2_req
+         * You can tune it via /sys/fs/ext4/<partition>/mb_order2_req
         */
        if (i >= sbi->s_mb_order2_reqs) {
                /*
@@ -2693,7 +2699,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
        i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int);
        sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
        if (sbi->s_mb_maxs == NULL) {
-                kfree(sbi->s_mb_maxs);
+                kfree(sbi->s_mb_offsets);
                return -ENOMEM;
        }
@@ -2746,7 +2752,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
                spin_lock_init(&lg->lg_prealloc_lock);
        }
-        ext4_mb_init_per_dev_proc(sb);
        ext4_mb_history_init(sb);
        if (sbi->s_journal)
@@ -2829,7 +2834,6 @@ int ext4_mb_release(struct super_block *sb)
        free_percpu(sbi->s_locality_groups);
        ext4_mb_history_release(sb);
-        ext4_mb_destroy_per_dev_proc(sb);
        return 0;
 }
@@ -2890,62 +2894,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
        mb_debug("freed %u blocks in %u structures\n", count, count2);
 }
-#define EXT4_MB_STATS_NAME              "stats"
-#define EXT4_MB_MAX_TO_SCAN_NAME        "max_to_scan"
-#define EXT4_MB_MIN_TO_SCAN_NAME        "min_to_scan"
-#define EXT4_MB_ORDER2_REQ              "order2_req"
-#define EXT4_MB_STREAM_REQ              "stream_req"
-#define EXT4_MB_GROUP_PREALLOC          "group_prealloc"
-static int ext4_mb_init_per_dev_proc(struct super_block *sb)
-{
-#ifdef CONFIG_PROC_FS
-        mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
-        struct ext4_sb_info *sbi = EXT4_SB(sb);
-        struct proc_dir_entry *proc;
-        if (sbi->s_proc == NULL)
-                return -EINVAL;
-        EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats);
-        EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan);
-        EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan);
-        EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs);
-        EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request);
-        EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc);
-        return 0;
-err_out:
-        remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
-        remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
-        remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
-        remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
-        remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
-        remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
-        return -ENOMEM;
-#else
-        return 0;
-#endif
-}
-static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
-{
-#ifdef CONFIG_PROC_FS
-        struct ext4_sb_info *sbi = EXT4_SB(sb);
-        if (sbi->s_proc == NULL)
-                return -EINVAL;
-        remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
-        remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
-        remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
-        remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
-        remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
-        remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
-#endif
-        return 0;
-}
 int __init init_ext4_mballoc(void)
 {
        ext4_pspace_cachep =
@@ -3096,9 +3044,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
        if (sbi->s_log_groups_per_flex) {
                ext4_group_t flex_group = ext4_flex_group(sbi,
                                                          ac->ac_b_ex.fe_group);
-                spin_lock(sb_bgl_lock(sbi, flex_group));
+                atomic_sub(ac->ac_b_ex.fe_len,
-                sbi->s_flex_groups[flex_group].free_blocks -= ac->ac_b_ex.fe_len;
+                           &sbi->s_flex_groups[flex_group].free_blocks);
-                spin_unlock(sb_bgl_lock(sbi, flex_group));
        }
        err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -3116,7 +3063,7 @@ out_err:
 * here we normalize request for locality group
 * Group request are normalized to s_strip size if we set the same via mount
 * option. If not we set it to s_mb_group_prealloc which can be configured via
- * /proc/fs/ext4/<partition>/group_prealloc
+ * /sys/fs/ext4/<partition>/mb_group_prealloc
 *
 * XXX: should we try to preallocate more than the group has now?
 */
@@ -3608,8 +3555,11 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
        spin_unlock(&pa->pa_lock);
        grp_blk = pa->pa_pstart;
-        /* If linear, pa_pstart may be in the next group when pa is used up */
+        /* 
-        if (pa->pa_linear)
+         * If doing group-based preallocation, pa_pstart may be in the
+         * next group when pa is used up
+         */
+        if (pa->pa_type == MB_GROUP_PA)
                grp_blk--;
        ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
@@ -3704,7 +3654,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
        INIT_LIST_HEAD(&pa->pa_inode_list);
        INIT_LIST_HEAD(&pa->pa_group_list);
        pa->pa_deleted = 0;
-        pa->pa_linear = 0;
+        pa->pa_type = MB_INODE_PA;
        mb_debug("new inode pa %p: %llu/%u for %u\n", pa,
                        pa->pa_pstart, pa->pa_len, pa->pa_lstart);
@@ -3767,7 +3717,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
        INIT_LIST_HEAD(&pa->pa_inode_list);
        INIT_LIST_HEAD(&pa->pa_group_list);
        pa->pa_deleted = 0;
-        pa->pa_linear = 1;
+        pa->pa_type = MB_GROUP_PA;
        mb_debug("new group pa %p: %llu/%u for %u\n", pa,
                 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
@@ -4021,7 +3971,7 @@ repeat:
                list_del_rcu(&pa->pa_inode_list);
                spin_unlock(pa->pa_obj_lock);
-                if (pa->pa_linear)
+                if (pa->pa_type == MB_GROUP_PA)
                        ext4_mb_release_group_pa(&e4b, pa, ac);
                else
                        ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
@@ -4121,7 +4071,7 @@ repeat:
        spin_unlock(&ei->i_prealloc_lock);
        list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
-                BUG_ON(pa->pa_linear != 0);
+                BUG_ON(pa->pa_type != MB_INODE_PA);
                ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
                err = ext4_mb_load_buddy(sb, group, &e4b);
@@ -4232,7 +4182,7 @@ static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 * file is determined by the current size or the resulting size after
 * allocation which ever is larger
 *
- * One can tune this size via /proc/fs/ext4/<partition>/stream_req
+ * One can tune this size via /sys/fs/ext4/<partition>/mb_stream_req
 */
 static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
 {
@@ -4373,7 +4323,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
                        continue;
                }
                /* only lg prealloc space */
-                BUG_ON(!pa->pa_linear);
+                BUG_ON(pa->pa_type != MB_GROUP_PA);
                /* seems this one can be freed ... */
                pa->pa_deleted = 1;
@@ -4442,7 +4392,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
                                                pa_inode_list) {
                spin_lock(&tmp_pa->pa_lock);
                if (tmp_pa->pa_deleted) {
-                        spin_unlock(&pa->pa_lock);
+                        spin_unlock(&tmp_pa->pa_lock);
                        continue;
                }
                if (!added && pa->pa_free < tmp_pa->pa_free) {
@@ -4479,7 +4429,7 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
 {
        struct ext4_prealloc_space *pa = ac->ac_pa;
        if (pa) {
-                if (pa->pa_linear) {
+                if (pa->pa_type == MB_GROUP_PA) {
                        /* see comment in ext4_mb_use_group_pa() */
                        spin_lock(&pa->pa_lock);
                        pa->pa_pstart += ac->ac_b_ex.fe_len;
@@ -4499,7 +4449,7 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
                 * doesn't grow big.  We need to release
                 * alloc_semp before calling ext4_mb_add_n_trim()
                 */
-                if (pa->pa_linear && likely(pa->pa_free)) {
+                if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
                        spin_lock(pa->pa_obj_lock);
                        list_del_rcu(&pa->pa_inode_list);
                        spin_unlock(pa->pa_obj_lock);
@@ -4936,9 +4886,7 @@ do_more:
        if (sbi->s_log_groups_per_flex) {
                ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
-                spin_lock(sb_bgl_lock(sbi, flex_group));
+                atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks);
-                sbi->s_flex_groups[flex_group].free_blocks += count;
-                spin_unlock(sb_bgl_lock(sbi, flex_group));
        }
        ext4_mb_release_desc(&e4b);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 10a2921baf14..dd9e6cd5f6cf 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -132,12 +132,15 @@ struct ext4_prealloc_space {
        ext4_lblk_t             pa_lstart;      /* log. block */
        unsigned short          pa_len;         /* len of preallocated chunk */
        unsigned short          pa_free;        /* how many blocks are free */
-        unsigned short          pa_linear;      /* consumed in one direction
+        unsigned short          pa_type;        /* pa type. inode or group */
-                                                 * strictly, for grp prealloc */
        spinlock_t              *pa_obj_lock;
        struct inode            *pa_inode;      /* hack, for history only */
 };
+enum {
+        MB_INODE_PA = 0,
+        MB_GROUP_PA = 1
+};
 struct ext4_free_extent {
        ext4_lblk_t fe_logical;
@@ -247,7 +250,6 @@ static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
-struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
 static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
                                        struct ext4_free_extent *fex)
 {
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 83410244d3ee..22098e1cd085 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -161,12 +161,12 @@ static struct dx_frame *dx_probe(const struct qstr *d_name,
                                 struct dx_frame *frame,
                                 int *err);
 static void dx_release(struct dx_frame *frames);
-static int dx_make_map(struct ext4_dir_entry_2 *de, int size,
+static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
                       struct dx_hash_info *hinfo, struct dx_map_entry map[]);
 static void dx_sort_map(struct dx_map_entry *map, unsigned count);
 static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
-                struct dx_map_entry *offsets, int count);
+                struct dx_map_entry *offsets, int count, unsigned blocksize);
-static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size);
+static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize);
 static void dx_insert_block(struct dx_frame *frame,
                                        u32 hash, ext4_lblk_t block);
 static int ext4_htree_next_block(struct inode *dir, __u32 hash,
@@ -180,14 +180,38 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                             struct inode *inode);
+unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
+{
+        unsigned len = le16_to_cpu(dlen);
+        if (len == EXT4_MAX_REC_LEN || len == 0)
+                return blocksize;
+        return (len & 65532) | ((len & 3) << 16);
+}
+  
+__le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
+{
+        if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
+                BUG();
+        if (len < 65536)
+                return cpu_to_le16(len);
+        if (len == blocksize) {
+                if (blocksize == 65536)
+                        return cpu_to_le16(EXT4_MAX_REC_LEN);
+                else 
+                        return cpu_to_le16(0);
+        }
+        return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
+}
 /*
 * p is at least 6 bytes before the end of page
 */
 static inline struct ext4_dir_entry_2 *
-ext4_next_entry(struct ext4_dir_entry_2 *p)
+ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
 {
        return (struct ext4_dir_entry_2 *)((char *)p +
-                ext4_rec_len_from_disk(p->rec_len));
+                ext4_rec_len_from_disk(p->rec_len, blocksize));
 }
 /*
@@ -294,7 +318,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent
                        space += EXT4_DIR_REC_LEN(de->name_len);
                        names++;
                }
-                de = ext4_next_entry(de);
+                de = ext4_next_entry(de, size);
        }
        printk("(%i)\n", names);
        return (struct stats) { names, space, 1 };
@@ -585,7 +609,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
        top = (struct ext4_dir_entry_2 *) ((char *) de +
                                           dir->i_sb->s_blocksize -
                                           EXT4_DIR_REC_LEN(0));
-        for (; de < top; de = ext4_next_entry(de)) {
+        for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
                if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
                                        (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
                                                +((char *)de - bh->b_data))) {
@@ -663,7 +687,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
        }
        if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
                de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
-                de = ext4_next_entry(de);
+                de = ext4_next_entry(de, dir->i_sb->s_blocksize);
                if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0)
                        goto errout;
                count++;
@@ -713,15 +737,15 @@ errout:
 * Create map of hash values, offsets, and sizes, stored at end of block.
 * Returns number of entries mapped.
 */
-static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
+static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
-                        struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
+                       struct dx_hash_info *hinfo,
+                       struct dx_map_entry *map_tail)
 {
        int count = 0;
        char *base = (char *) de;
        struct dx_hash_info h = *hinfo;
-        while ((char *) de < base + size)
+        while ((char *) de < base + blocksize) {
-        {
                if (de->name_len && de->inode) {
                        ext4fs_dirhash(de->name, de->name_len, &h);
                        map_tail--;
@@ -732,7 +756,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
                        cond_resched();
                }
                /* XXX: do we need to check rec_len == 0 case? -Chris */
-                de = ext4_next_entry(de);
+                de = ext4_next_entry(de, blocksize);
        }
        return count;
 }
@@ -832,7 +856,8 @@ static inline int search_dirblock(struct buffer_head *bh,
                        return 1;
                }
                /* prevent looping on a bad block */
-                de_len = ext4_rec_len_from_disk(de->rec_len);
+                de_len = ext4_rec_len_from_disk(de->rec_len,
+                                                dir->i_sb->s_blocksize);
                if (de_len <= 0)
                        return -1;
                offset += de_len;
@@ -996,7 +1021,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
                de = (struct ext4_dir_entry_2 *) bh->b_data;
                top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
                                       EXT4_DIR_REC_LEN(0));
-                for (; de < top; de = ext4_next_entry(de)) {
+                for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) {
                        int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
                                  + ((char *) de - bh->b_data);
@@ -1052,8 +1077,16 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
                        return ERR_PTR(-EIO);
                }
                inode = ext4_iget(dir->i_sb, ino);
-                if (IS_ERR(inode))
+                if (unlikely(IS_ERR(inode))) {
-                        return ERR_CAST(inode);
+                        if (PTR_ERR(inode) == -ESTALE) {
+                                ext4_error(dir->i_sb, __func__,
+                                                "deleted inode referenced: %u",
+                                                ino);
+                                return ERR_PTR(-EIO);
+                        } else {
+                                return ERR_CAST(inode);
+                        }
+                }
        }
        return d_splice_alias(inode, dentry);
 }
@@ -1109,7 +1142,8 @@ static inline void ext4_set_de_type(struct super_block *sb,
 * Returns pointer to last entry moved.
 */
 static struct ext4_dir_entry_2 *
-dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
+dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
+                unsigned blocksize)
 {
        unsigned rec_len = 0;
@@ -1118,7 +1152,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
                rec_len = EXT4_DIR_REC_LEN(de->name_len);
                memcpy (to, de, rec_len);
                ((struct ext4_dir_entry_2 *) to)->rec_len =
-                                ext4_rec_len_to_disk(rec_len);
+                                ext4_rec_len_to_disk(rec_len, blocksize);
                de->inode = 0;
                map++;
                to += rec_len;
@@ -1130,19 +1164,19 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
 * Compact each dir entry in the range to the minimal rec_len.
 * Returns pointer to last entry in range.
 */
-static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
+static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize)
 {
        struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
        unsigned rec_len = 0;
        prev = to = de;
-        while ((char*)de < base + size) {
+        while ((char*)de < base + blocksize) {
-                next = ext4_next_entry(de);
+                next = ext4_next_entry(de, blocksize);
                if (de->inode && de->name_len) {
                        rec_len = EXT4_DIR_REC_LEN(de->name_len);
                        if (de > to)
                                memmove(to, de, rec_len);
-                        to->rec_len = ext4_rec_len_to_disk(rec_len);
+                        to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
                        prev = to;
                        to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
                }
@@ -1215,10 +1249,12 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
                                        hash2, split, count-split));
        /* Fancy dance to stay within two buffers */
-        de2 = dx_move_dirents(data1, data2, map + split, count - split);
+        de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize);
        de = dx_pack_dirents(data1, blocksize);
-        de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
+        de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de,
-        de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2);
+                                           blocksize);
+        de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2,
+                                            blocksize);
        dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
        dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
@@ -1268,6 +1304,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
        const char      *name = dentry->d_name.name;
        int             namelen = dentry->d_name.len;
        unsigned int    offset = 0;
+        unsigned int    blocksize = dir->i_sb->s_blocksize;
        unsigned short  reclen;
        int             nlen, rlen, err;
        char            *top;
@@ -1275,7 +1312,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
        reclen = EXT4_DIR_REC_LEN(namelen);
        if (!de) {
                de = (struct ext4_dir_entry_2 *)bh->b_data;
-                top = bh->b_data + dir->i_sb->s_blocksize - reclen;
+                top = bh->b_data + blocksize - reclen;
                while ((char *) de <= top) {
                        if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
                                                  bh, offset)) {
@@ -1287,7 +1324,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
                                return -EEXIST;
                        }
                        nlen = EXT4_DIR_REC_LEN(de->name_len);
-                        rlen = ext4_rec_len_from_disk(de->rec_len);
+                        rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
                        if ((de->inode? rlen - nlen: rlen) >= reclen)
                                break;
                        de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
@@ -1306,11 +1343,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
        /* By now the buffer is marked for journaling */
        nlen = EXT4_DIR_REC_LEN(de->name_len);
-        rlen = ext4_rec_len_from_disk(de->rec_len);
+        rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
        if (de->inode) {
                struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
-                de1->rec_len = ext4_rec_len_to_disk(rlen - nlen);
+                de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, blocksize);
-                de->rec_len = ext4_rec_len_to_disk(nlen);
+                de->rec_len = ext4_rec_len_to_disk(nlen, blocksize);
                de = de1;
        }
        de->file_type = EXT4_FT_UNKNOWN;
@@ -1380,7 +1417,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
        /* The 0th block becomes the root, move the dirents out */
        fde = &root->dotdot;
        de = (struct ext4_dir_entry_2 *)((char *)fde +
-                ext4_rec_len_from_disk(fde->rec_len));
+                ext4_rec_len_from_disk(fde->rec_len, blocksize));
        if ((char *) de >= (((char *) root) + blocksize)) {
                ext4_error(dir->i_sb, __func__,
                           "invalid rec_len for '..' in inode %lu",
@@ -1402,12 +1439,14 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
        memcpy (data1, de, len);
        de = (struct ext4_dir_entry_2 *) data1;
        top = data1 + len;
-        while ((char *)(de2 = ext4_next_entry(de)) < top)
+        while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
                de = de2;
-        de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
+        de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de,
+                                           blocksize);
        /* Initialize the root; the dot dirents already exist */
        de = (struct ext4_dir_entry_2 *) (&root->dotdot);
-        de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2));
+        de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2),
+                                           blocksize);
        memset (&root->info, 0, sizeof(root->info));
        root->info.info_length = sizeof(root->info);
        root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
@@ -1488,7 +1527,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
                return retval;
        de = (struct ext4_dir_entry_2 *) bh->b_data;
        de->inode = 0;
-        de->rec_len = ext4_rec_len_to_disk(blocksize);
+        de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
        return add_dirent_to_buf(handle, dentry, inode, de, bh);
 }
@@ -1551,7 +1590,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                        goto cleanup;
                node2 = (struct dx_node *)(bh2->b_data);
                entries2 = node2->entries;
-                node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize);
+                node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize,
+                                                           sb->s_blocksize);
                node2->fake.inode = 0;
                BUFFER_TRACE(frame->bh, "get_write_access");
                err = ext4_journal_get_write_access(handle, frame->bh);
@@ -1639,6 +1679,7 @@ static int ext4_delete_entry(handle_t *handle,
                             struct buffer_head *bh)
 {
        struct ext4_dir_entry_2 *de, *pde;
+        unsigned int blocksize = dir->i_sb->s_blocksize;
        int i;
        i = 0;
@@ -1652,8 +1693,11 @@ static int ext4_delete_entry(handle_t *handle,
                        ext4_journal_get_write_access(handle, bh);
                        if (pde)
                                pde->rec_len = ext4_rec_len_to_disk(
-                                        ext4_rec_len_from_disk(pde->rec_len) +
+                                        ext4_rec_len_from_disk(pde->rec_len,
-                                        ext4_rec_len_from_disk(de->rec_len));
+                                                               blocksize) +
+                                        ext4_rec_len_from_disk(de->rec_len,
+                                                               blocksize),
+                                        blocksize);
                        else
                                de->inode = 0;
                        dir->i_version++;
@@ -1661,9 +1705,9 @@ static int ext4_delete_entry(handle_t *handle,
                        ext4_handle_dirty_metadata(handle, dir, bh);
                        return 0;
                }
-                i += ext4_rec_len_from_disk(de->rec_len);
+                i += ext4_rec_len_from_disk(de->rec_len, blocksize);
                pde = de;
-                de = ext4_next_entry(de);
+                de = ext4_next_entry(de, blocksize);
        }
        return -ENOENT;
 }
@@ -1793,6 +1837,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        struct inode *inode;
        struct buffer_head *dir_block;
        struct ext4_dir_entry_2 *de;
+        unsigned int blocksize = dir->i_sb->s_blocksize;
        int err, retries = 0;
        if (EXT4_DIR_LINK_MAX(dir))
@@ -1824,13 +1869,14 @@ retry:
        de = (struct ext4_dir_entry_2 *) dir_block->b_data;
        de->inode = cpu_to_le32(inode->i_ino);
        de->name_len = 1;
-        de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len));
+        de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
+                                           blocksize);
        strcpy(de->name, ".");
        ext4_set_de_type(dir->i_sb, de, S_IFDIR);
-        de = ext4_next_entry(de);
+        de = ext4_next_entry(de, blocksize);
        de->inode = cpu_to_le32(dir->i_ino);
-        de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize -
+        de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(1),
-                                                EXT4_DIR_REC_LEN(1));
+                                           blocksize);
        de->name_len = 2;
        strcpy(de->name, "..");
        ext4_set_de_type(dir->i_sb, de, S_IFDIR);
@@ -1885,7 +1931,7 @@ static int empty_dir(struct inode *inode)
                return 1;
        }
        de = (struct ext4_dir_entry_2 *) bh->b_data;
-        de1 = ext4_next_entry(de);
+        de1 = ext4_next_entry(de, sb->s_blocksize);
        if (le32_to_cpu(de->inode) != inode->i_ino ||
                        !le32_to_cpu(de1->inode) ||
                        strcmp(".", de->name) ||
@@ -1896,9 +1942,9 @@ static int empty_dir(struct inode *inode)
                brelse(bh);
                return 1;
        }
-        offset = ext4_rec_len_from_disk(de->rec_len) +
+        offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) +
-                 ext4_rec_len_from_disk(de1->rec_len);
+                 ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize);
-        de = ext4_next_entry(de1);
+        de = ext4_next_entry(de1, sb->s_blocksize);
        while (offset < inode->i_size) {
                if (!bh ||
                        (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
@@ -1927,8 +1973,8 @@ static int empty_dir(struct inode *inode)
                        brelse(bh);
                        return 0;
                }
-                offset += ext4_rec_len_from_disk(de->rec_len);
+                offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
-                de = ext4_next_entry(de);
+                de = ext4_next_entry(de, sb->s_blocksize);
        }
        brelse(bh);
        return 1;
@@ -2297,8 +2343,8 @@ retry:
        return err;
 }
-#define PARENT_INO(buffer) \
+#define PARENT_INO(buffer, size) \
-        (ext4_next_entry((struct ext4_dir_entry_2 *)(buffer))->inode)
+        (ext4_next_entry((struct ext4_dir_entry_2 *)(buffer), size)->inode)
 /*
 * Anybody can rename anything with this: the permission checks are left to the
@@ -2311,7 +2357,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct inode *old_inode, *new_inode;
        struct buffer_head *old_bh, *new_bh, *dir_bh;
        struct ext4_dir_entry_2 *old_de, *new_de;
-        int retval;
+        int retval, force_da_alloc = 0;
        old_bh = new_bh = dir_bh = NULL;
@@ -2358,7 +2404,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval);
                if (!dir_bh)
                        goto end_rename;
-                if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
+                if (le32_to_cpu(PARENT_INO(dir_bh->b_data,
+                                old_dir->i_sb->s_blocksize)) != old_dir->i_ino)
                        goto end_rename;
                retval = -EMLINK;
                if (!new_inode && new_dir != old_dir &&
@@ -2430,7 +2477,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (dir_bh) {
                BUFFER_TRACE(dir_bh, "get_write_access");
                ext4_journal_get_write_access(handle, dir_bh);
-                PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
+                PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
+                                                cpu_to_le32(new_dir->i_ino);
                BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
                ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
                ext4_dec_count(handle, old_dir);
@@ -2449,6 +2497,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                ext4_mark_inode_dirty(handle, new_inode);
                if (!new_inode->i_nlink)
                        ext4_orphan_add(handle, new_inode);
+                if (!test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))
+                        force_da_alloc = 1;
        }
        retval = 0;
@@ -2457,6 +2507,8 @@ end_rename:
        brelse(old_bh);
        brelse(new_bh);
        ext4_journal_stop(handle);
+        if (retval == 0 && force_da_alloc)
+                ext4_alloc_da_blocks(old_inode);
        return retval;
 }
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index c06886abd658..546c7dd869e1 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -938,10 +938,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
                ext4_group_t flex_group;
                flex_group = ext4_flex_group(sbi, input->group);
-                sbi->s_flex_groups[flex_group].free_blocks +=
+                atomic_add(input->free_blocks_count,
-                        input->free_blocks_count;
+                           &sbi->s_flex_groups[flex_group].free_blocks);
-                sbi->s_flex_groups[flex_group].free_inodes +=
+                atomic_add(EXT4_INODES_PER_GROUP(sb),
-                        EXT4_INODES_PER_GROUP(sb);
+                           &sbi->s_flex_groups[flex_group].free_inodes);
        }
        ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f7371a6a923d..9987bba99db3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -35,6 +35,7 @@
 #include <linux/quotaops.h>
 #include <linux/seq_file.h>
 #include <linux/proc_fs.h>
+#include <linux/ctype.h>
 #include <linux/marker.h>
 #include <linux/log2.h>
 #include <linux/crc16.h>
@@ -48,6 +49,7 @@
 #include "group.h"
 struct proc_dir_entry *ext4_proc_root;
+static struct kset *ext4_kset;
 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
                             unsigned long journal_devnum);
@@ -577,9 +579,9 @@ static void ext4_put_super(struct super_block *sb)
                ext4_commit_super(sb, es, 1);
        }
        if (sbi->s_proc) {
-                remove_proc_entry("inode_readahead_blks", sbi->s_proc);
                remove_proc_entry(sb->s_id, ext4_proc_root);
        }
+        kobject_del(&sbi->s_kobj);
        for (i = 0; i < sbi->s_gdb_count; i++)
                brelse(sbi->s_group_desc[i]);
@@ -615,6 +617,17 @@ static void ext4_put_super(struct super_block *sb)
                ext4_blkdev_remove(sbi);
        }
        sb->s_fs_info = NULL;
+        /*
+         * Now that we are completely done shutting down the
+         * superblock, we need to actually destroy the kobject.
+         */
+        unlock_kernel();
+        unlock_super(sb);
+        kobject_put(&sbi->s_kobj);
+        wait_for_completion(&sbi->s_kobj_unregister);
+        lock_super(sb);
+        lock_kernel();
+        kfree(sbi->s_blockgroup_lock);
        kfree(sbi);
        return;
 }
@@ -803,8 +816,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
        if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
                seq_puts(seq, ",noacl");
 #endif
-        if (!test_opt(sb, RESERVATION))
-                seq_puts(seq, ",noreservation");
        if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
                seq_printf(seq, ",commit=%u",
                           (unsigned) (sbi->s_commit_interval / HZ));
@@ -855,6 +866,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
        if (test_opt(sb, DATA_ERR_ABORT))
                seq_puts(seq, ",data_err=abort");
+        if (test_opt(sb, NO_AUTO_DA_ALLOC))
+                seq_puts(seq, ",noauto_da_alloc");
        ext4_show_quota_options(seq, sb);
        return 0;
 }
@@ -1004,7 +1018,7 @@ enum {
        Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
        Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
        Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
-        Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
+        Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh,
        Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
        Opt_journal_update, Opt_journal_dev,
        Opt_journal_checksum, Opt_journal_async_commit,
@@ -1012,8 +1026,8 @@ enum {
        Opt_data_err_abort, Opt_data_err_ignore,
        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
-        Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
+        Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
-        Opt_grpquota, Opt_i_version,
+        Opt_usrquota, Opt_grpquota, Opt_i_version,
        Opt_stripe, Opt_delalloc, Opt_nodelalloc,
        Opt_inode_readahead_blks, Opt_journal_ioprio
 };
@@ -1039,8 +1053,6 @@ static const match_table_t tokens = {
        {Opt_nouser_xattr, "nouser_xattr"},
        {Opt_acl, "acl"},
        {Opt_noacl, "noacl"},
-        {Opt_reservation, "reservation"},
-        {Opt_noreservation, "noreservation"},
        {Opt_noload, "noload"},
        {Opt_nobh, "nobh"},
        {Opt_bh, "bh"},
@@ -1068,6 +1080,8 @@ static const match_table_t tokens = {
        {Opt_quota, "quota"},
        {Opt_usrquota, "usrquota"},
        {Opt_barrier, "barrier=%u"},
+        {Opt_barrier, "barrier"},
+        {Opt_nobarrier, "nobarrier"},
        {Opt_i_version, "i_version"},
        {Opt_stripe, "stripe=%u"},
        {Opt_resize, "resize"},
@@ -1075,6 +1089,9 @@ static const match_table_t tokens = {
        {Opt_nodelalloc, "nodelalloc"},
        {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
        {Opt_journal_ioprio, "journal_ioprio=%u"},
+        {Opt_auto_da_alloc, "auto_da_alloc=%u"},
+        {Opt_auto_da_alloc, "auto_da_alloc"},
+        {Opt_noauto_da_alloc, "noauto_da_alloc"},
        {Opt_err, NULL},
 };
@@ -1207,12 +1224,6 @@ static int parse_options(char *options, struct super_block *sb,
                               "not supported\n");
                        break;
 #endif
-                case Opt_reservation:
-                        set_opt(sbi->s_mount_opt, RESERVATION);
-                        break;
-                case Opt_noreservation:
-                        clear_opt(sbi->s_mount_opt, RESERVATION);
-                        break;
                case Opt_journal_update:
                        /* @@@ FIXME */
                        /* Eventually we will want to be able to create
@@ -1415,9 +1426,14 @@ set_qf_format:
                case Opt_abort:
                        set_opt(sbi->s_mount_opt, ABORT);
                        break;
+                case Opt_nobarrier:
+                        clear_opt(sbi->s_mount_opt, BARRIER);
+                        break;
                case Opt_barrier:
-                        if (match_int(&args[0], &option))
+                        if (match_int(&args[0], &option)) {
-                                return 0;
+                                set_opt(sbi->s_mount_opt, BARRIER);
+                                break;
+                        }
                        if (option)
                                set_opt(sbi->s_mount_opt, BARRIER);
                        else
@@ -1463,6 +1479,11 @@ set_qf_format:
                                return 0;
                        if (option < 0 || option > (1 << 30))
                                return 0;
+                        if (option & (option - 1)) {
+                                printk(KERN_ERR "EXT4-fs: inode_readahead_blks"
+                                       " must be a power of 2\n");
+                                return 0;
+                        }
                        sbi->s_inode_readahead_blks = option;
                        break;
                case Opt_journal_ioprio:
@@ -1473,6 +1494,19 @@ set_qf_format:
                        *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
                                                            option);
                        break;
+                case Opt_noauto_da_alloc:
+                        set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
+                        break;
+                case Opt_auto_da_alloc:
+                        if (match_int(&args[0], &option)) {
+                                clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
+                                break;
+                        }
+                        if (option)
+                                clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
+                        else
+                                set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
+                        break;
                default:
                        printk(KERN_ERR
                               "EXT4-fs: Unrecognized mount option \"%s\" "
@@ -1612,10 +1646,12 @@ static int ext4_fill_flex_info(struct super_block *sb)
                gdp = ext4_get_group_desc(sb, i, &bh);
                flex_group = ext4_flex_group(sbi, i);
-                sbi->s_flex_groups[flex_group].free_inodes +=
+                atomic_set(&sbi->s_flex_groups[flex_group].free_inodes,
-                        ext4_free_inodes_count(sb, gdp);
+                           ext4_free_inodes_count(sb, gdp));
-                sbi->s_flex_groups[flex_group].free_blocks +=
+                atomic_set(&sbi->s_flex_groups[flex_group].free_blocks,
-                        ext4_free_blks_count(sb, gdp);
+                           ext4_free_blks_count(sb, gdp));
+                atomic_set(&sbi->s_flex_groups[flex_group].used_dirs,
+                           ext4_used_dirs_count(sb, gdp));
        }
        return 1;
@@ -1991,6 +2027,181 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
        return 0;
 }
+/* sysfs supprt */
+struct ext4_attr {
+        struct attribute attr;
+        ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
+        ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, 
+                         const char *, size_t);
+        int offset;
+};
+static int parse_strtoul(const char *buf,
+                unsigned long max, unsigned long *value)
+{
+        char *endp;
+        while (*buf && isspace(*buf))
+                buf++;
+        *value = simple_strtoul(buf, &endp, 0);
+        while (*endp && isspace(*endp))
+                endp++;
+        if (*endp || *value > max)
+                return -EINVAL;
+        return 0;
+}
+static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
+                                              struct ext4_sb_info *sbi,
+                                              char *buf)
+{
+        return snprintf(buf, PAGE_SIZE, "%llu\n",
+                        (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
+}
+static ssize_t session_write_kbytes_show(struct ext4_attr *a,
+                                         struct ext4_sb_info *sbi, char *buf)
+{
+        struct super_block *sb = sbi->s_buddy_cache->i_sb;
+        return snprintf(buf, PAGE_SIZE, "%lu\n",
+                        (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
+                         sbi->s_sectors_written_start) >> 1);
+}
+static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
+                                          struct ext4_sb_info *sbi, char *buf)
+{
+        struct super_block *sb = sbi->s_buddy_cache->i_sb;
+        return snprintf(buf, PAGE_SIZE, "%llu\n",
+                        sbi->s_kbytes_written + 
+                        ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
+                          EXT4_SB(sb)->s_sectors_written_start) >> 1));
+}
+static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
+                                          struct ext4_sb_info *sbi,
+                                          const char *buf, size_t count)
+{
+        unsigned long t;
+        if (parse_strtoul(buf, 0x40000000, &t))
+                return -EINVAL;
+        /* inode_readahead_blks must be a power of 2 */
+        if (t & (t-1))
+                return -EINVAL;
+        sbi->s_inode_readahead_blks = t;
+        return count;
+}
+static ssize_t sbi_ui_show(struct ext4_attr *a,
+                                struct ext4_sb_info *sbi, char *buf)
+{
+        unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
+        return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
+}
+static ssize_t sbi_ui_store(struct ext4_attr *a,
+                            struct ext4_sb_info *sbi,
+                            const char *buf, size_t count)
+{
+        unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
+        unsigned long t;
+        if (parse_strtoul(buf, 0xffffffff, &t))
+                return -EINVAL;
+        *ui = t;
+        return count;
+}
+#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
+static struct ext4_attr ext4_attr_##_name = {                   \
+        .attr = {.name = __stringify(_name), .mode = _mode },   \
+        .show   = _show,                                        \
+        .store  = _store,                                       \
+        .offset = offsetof(struct ext4_sb_info, _elname),       \
+}
+#define EXT4_ATTR(name, mode, show, store) \
+static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
+#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
+#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
+#define EXT4_RW_ATTR_SBI_UI(name, elname)       \
+        EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
+#define ATTR_LIST(name) &ext4_attr_##name.attr
+EXT4_RO_ATTR(delayed_allocation_blocks);
+EXT4_RO_ATTR(session_write_kbytes);
+EXT4_RO_ATTR(lifetime_write_kbytes);
+EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
+                 inode_readahead_blks_store, s_inode_readahead_blks);
+EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
+EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
+EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
+EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
+EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
+EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
+static struct attribute *ext4_attrs[] = {
+        ATTR_LIST(delayed_allocation_blocks),
+        ATTR_LIST(session_write_kbytes),
+        ATTR_LIST(lifetime_write_kbytes),
+        ATTR_LIST(inode_readahead_blks),
+        ATTR_LIST(mb_stats),
+        ATTR_LIST(mb_max_to_scan),
+        ATTR_LIST(mb_min_to_scan),
+        ATTR_LIST(mb_order2_req),
+        ATTR_LIST(mb_stream_req),
+        ATTR_LIST(mb_group_prealloc),
+        NULL,
+};
+static ssize_t ext4_attr_show(struct kobject *kobj,
+                              struct attribute *attr, char *buf)
+{
+        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
+                                                s_kobj);
+        struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
+        return a->show ? a->show(a, sbi, buf) : 0;
+}
+static ssize_t ext4_attr_store(struct kobject *kobj,
+                               struct attribute *attr,
+                               const char *buf, size_t len)
+{
+        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
+                                                s_kobj);
+        struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
+        return a->store ? a->store(a, sbi, buf, len) : 0;
+}
+static void ext4_sb_release(struct kobject *kobj)
+{
+        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
+                                                s_kobj);
+        complete(&sbi->s_kobj_unregister);
+}
+static struct sysfs_ops ext4_attr_ops = {
+        .show   = ext4_attr_show,
+        .store  = ext4_attr_store,
+};
+static struct kobj_type ext4_ktype = {
+        .default_attrs  = ext4_attrs,
+        .sysfs_ops      = &ext4_attr_ops,
+        .release        = ext4_sb_release,
+};
 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                                __releases(kernel_lock)
                                __acquires(kernel_lock)
@@ -2021,12 +2232,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
        if (!sbi)
                return -ENOMEM;
+        sbi->s_blockgroup_lock =
+                kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
+        if (!sbi->s_blockgroup_lock) {
+                kfree(sbi);
+                return -ENOMEM;
+        }
        sb->s_fs_info = sbi;
        sbi->s_mount_opt = 0;
        sbi->s_resuid = EXT4_DEF_RESUID;
        sbi->s_resgid = EXT4_DEF_RESGID;
        sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
        sbi->s_sb_block = sb_block;
+        sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part,
+                                                      sectors[1]);
        unlock_kernel();
@@ -2064,6 +2284,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        sb->s_magic = le16_to_cpu(es->s_magic);
        if (sb->s_magic != EXT4_SUPER_MAGIC)
                goto cantfind_ext4;
+        sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
        /* Set defaults before we parse the mount options */
        def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
@@ -2101,7 +2322,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
        sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
-        set_opt(sbi->s_mount_opt, RESERVATION);
        set_opt(sbi->s_mount_opt, BARRIER);
        /*
@@ -2325,14 +2545,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 #ifdef CONFIG_PROC_FS
        if (ext4_proc_root)
                sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
-        if (sbi->s_proc)
-                proc_create_data("inode_readahead_blks", 0644, sbi->s_proc,
-                                 &ext4_ui_proc_fops,
-                                 &sbi->s_inode_readahead_blks);
 #endif
-        bgl_lock_init(&sbi->s_blockgroup_lock);
+        bgl_lock_init(sbi->s_blockgroup_lock);
        for (i = 0; i < db_count; i++) {
                block = descriptor_loc(sb, logical_sb_block, i);
@@ -2564,6 +2779,16 @@ no_journal:
                goto failed_mount4;
        }
+        sbi->s_kobj.kset = ext4_kset;
+        init_completion(&sbi->s_kobj_unregister);
+        err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
+                                   "%s", sb->s_id);
+        if (err) {
+                ext4_mb_release(sb);
+                ext4_ext_release(sb);
+                goto failed_mount4;
+        };
        /*
         * akpm: core read_super() calls in here with the superblock locked.
         * That deadlocks, because orphan cleanup needs to lock the superblock
@@ -2618,7 +2843,6 @@ failed_mount2:
        kfree(sbi->s_group_desc);
 failed_mount:
        if (sbi->s_proc) {
-                remove_proc_entry("inode_readahead_blks", sbi->s_proc);
                remove_proc_entry(sb->s_id, ext4_proc_root);
        }
 #ifdef CONFIG_QUOTA
@@ -2913,6 +3137,10 @@ static int ext4_commit_super(struct super_block *sb,
                set_buffer_uptodate(sbh);
        }
        es->s_wtime = cpu_to_le32(get_seconds());
+        es->s_kbytes_written =
+                cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 
+                            ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
+                              EXT4_SB(sb)->s_sectors_written_start) >> 1));
        ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
                                        &EXT4_SB(sb)->s_freeblocks_counter));
        es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
@@ -3647,45 +3875,6 @@ static int ext4_get_sb(struct file_system_type *fs_type,
        return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
 }
-#ifdef CONFIG_PROC_FS
-static int ext4_ui_proc_show(struct seq_file *m, void *v)
-{
-        unsigned int *p = m->private;
-        seq_printf(m, "%u\n", *p);
-        return 0;
-}
-static int ext4_ui_proc_open(struct inode *inode, struct file *file)
-{
-        return single_open(file, ext4_ui_proc_show, PDE(inode)->data);
-}
-static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf,
-                               size_t cnt, loff_t *ppos)
-{
-        unsigned long *p = PDE(file->f_path.dentry->d_inode)->data;
-        char str[32];
-        if (cnt >= sizeof(str))
-                return -EINVAL;
-        if (copy_from_user(str, buf, cnt))
-                return -EFAULT;
-        *p = simple_strtoul(str, NULL, 0);
-        return cnt;
-}
-const struct file_operations ext4_ui_proc_fops = {
-        .owner          = THIS_MODULE,
-        .open           = ext4_ui_proc_open,
-        .read           = seq_read,
-        .llseek         = seq_lseek,
-        .release        = single_release,
-        .write          = ext4_ui_proc_write,
-};
-#endif
 static struct file_system_type ext4_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "ext4",
@@ -3719,6 +3908,9 @@ static int __init init_ext4_fs(void)
 {
        int err;
+        ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
+        if (!ext4_kset)
+                return -ENOMEM;
        ext4_proc_root = proc_mkdir("fs/ext4", NULL);
        err = init_ext4_mballoc();
        if (err)
@@ -3760,6 +3952,7 @@ static void __exit exit_ext4_fs(void)
        exit_ext4_xattr();
        exit_ext4_mballoc();
        remove_proc_entry("fs/ext4", NULL);
+        kset_unregister(ext4_kset);
 }
 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
diff --git a/fs/fcntl.c b/fs/fcntl.c
index d865ca66ccba..cc8e4de2fee5 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -531,6 +531,12 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap
                if (!new)
                        return -ENOMEM;
        }
+        /*
+         * We need to take f_lock first since it's not an IRQ-safe
+         * lock.
+         */
+        spin_lock(&filp->f_lock);
        write_lock_irq(&fasync_lock);
        for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
                if (fa->fa_file == filp) {
@@ -555,14 +561,12 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap
                result = 1;
        }
 out:
-        /* Fix up FASYNC bit while still holding fasync_lock */
-        spin_lock(&filp->f_lock);
        if (on)
                filp->f_flags |= FASYNC;
        else
                filp->f_flags &= ~FASYNC;
-        spin_unlock(&filp->f_lock);
        write_unlock_irq(&fasync_lock);
+        spin_unlock(&filp->f_lock);
        return result;
 }
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index d9fdb7cec538..4e340fedf768 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1234,8 +1234,9 @@ static void fuse_vma_close(struct vm_area_struct *vma)
 * - sync(2)
 * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
 */
-static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+        struct page *page = vmf->page;
        /*
         * Don't use page->mapping as it may become NULL from a
         * concurrent truncate.
@@ -1465,7 +1466,7 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
        case SEEK_END:
                retval = fuse_update_attributes(inode, NULL, file, NULL);
                if (retval)
-                        return retval;
+                        goto exit;
                offset += i_size_read(inode);
                break;
        case SEEK_CUR:
@@ -1479,6 +1480,7 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
                }
                retval = offset;
        }
+exit:
        mutex_unlock(&inode->i_mutex);
        return retval;
 }
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 3b9e8de3500b..70b9b8548945 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -337,8 +337,9 @@ static int gfs2_allocate_page_backing(struct page *page)
 * blocks allocated on disk to back that page.
 */
-static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+        struct page *page = vmf->page;
        struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -412,6 +413,8 @@ out_unlock:
        gfs2_glock_dq(&gh);
 out:
        gfs2_holder_uninit(&gh);
+        if (ret)
+                ret = VM_FAULT_SIGBUS;
        return ret;
 }
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9b800d97a687..23a3c76711e0 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -943,14 +943,13 @@ static struct vfsmount *hugetlbfs_vfsmount;
 static int can_do_hugetlb_shm(void)
 {
-        return likely(capable(CAP_IPC_LOCK) ||
+        return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
-                        in_group_p(sysctl_hugetlb_shm_group) ||
-                        can_do_mlock());
 }
 struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag)
 {
        int error = -ENOMEM;
+        int unlock_shm = 0;
        struct file *file;
        struct inode *inode;
        struct dentry *dentry, *root;
@@ -960,11 +959,14 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag)
        if (!hugetlbfs_vfsmount)
                return ERR_PTR(-ENOENT);
-        if (!can_do_hugetlb_shm())
+        if (!can_do_hugetlb_shm()) {
-                return ERR_PTR(-EPERM);
+                if (user_shm_lock(size, user)) {
+                        unlock_shm = 1;
-        if (!user_shm_lock(size, user))
+                        WARN_ONCE(1,
-                return ERR_PTR(-ENOMEM);
+                          "Using mlock ulimits for SHM_HUGETLB deprecated\n");
+                } else
+                        return ERR_PTR(-EPERM);
+        }
        root = hugetlbfs_vfsmount->mnt_root;
        quick_string.name = name;
@@ -1004,7 +1006,8 @@ out_inode:
 out_dentry:
        dput(dentry);
 out_shm_unlock:
-        user_shm_unlock(size, user);
+        if (unlock_shm)
+                user_shm_unlock(size, user);
        return ERR_PTR(error);
 }
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 62804e57a44c..4ea72377c7a2 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -367,6 +367,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        int tag_bytes = journal_tag_bytes(journal);
        struct buffer_head *cbh = NULL; /* For transactional checksums */
        __u32 crc32_sum = ~0;
+        int write_op = WRITE;
        /*
         * First job: lock down the current transaction and wait for
@@ -401,6 +402,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        spin_lock(&journal->j_state_lock);
        commit_transaction->t_state = T_LOCKED;
+        if (commit_transaction->t_synchronous_commit)
+                write_op = WRITE_SYNC;
        stats.u.run.rs_wait = commit_transaction->t_max_wait;
        stats.u.run.rs_locked = jiffies;
        stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
@@ -680,7 +683,7 @@ start_journal_io:
                                clear_buffer_dirty(bh);
                                set_buffer_uptodate(bh);
                                bh->b_end_io = journal_end_buffer_io_sync;
-                                submit_bh(WRITE, bh);
+                                submit_bh(write_op, bh);
                        }
                        cond_resched();
                        stats.u.run.rs_blocks_logged += bufs;
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 257ff2625765..bbe6d592d8b3 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -55,6 +55,25 @@
 *                      need do nothing.
 * RevokeValid set, Revoked set:
 *                      buffer has been revoked.
+ *
+ * Locking rules:
+ * We keep two hash tables of revoke records. One hashtable belongs to the
+ * running transaction (is pointed to by journal->j_revoke), the other one
+ * belongs to the committing transaction. Accesses to the second hash table
+ * happen only from the kjournald and no other thread touches this table.  Also
+ * journal_switch_revoke_table() which switches which hashtable belongs to the
+ * running and which to the committing transaction is called only from
+ * kjournald. Therefore we need no locks when accessing the hashtable belonging
+ * to the committing transaction.
+ *
+ * All users operating on the hash table belonging to the running transaction
+ * have a handle to the transaction. Therefore they are safe from kjournald
+ * switching hash tables under them. For operations on the lists of entries in
+ * the hash table j_revoke_lock is used.
+ *
+ * Finally, also replay code uses the hash tables but at this moment noone else
+ * can touch them (filesystem isn't mounted yet) and hence no locking is
+ * needed.
 */
 #ifndef __KERNEL__
@@ -401,8 +420,6 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
 * the second time we would still have a pending revoke to cancel.  So,
 * do not trust the Revoked bit on buffers unless RevokeValid is also
 * set.
- *
- * The caller must have the journal locked.
 */
 int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
 {
@@ -480,10 +497,7 @@ void jbd2_journal_switch_revoke_table(journal_t *journal)
 /*
 * Write revoke records to the journal for all entries in the current
 * revoke hash, deleting the entries as we go.
- *
- * Called with the journal lock held.
 */
 void jbd2_journal_write_revoke_records(journal_t *journal,
                                  transaction_t *transaction)
 {
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 28ce21d8598e..996ffda06bf3 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1315,6 +1315,8 @@ int jbd2_journal_stop(handle_t *handle)
                }
        }
+        if (handle->h_sync)
+                transaction->t_synchronous_commit = 1;
        current->journal_info = NULL;
        spin_lock(&journal->j_state_lock);
        spin_lock(&transaction->t_handle_lock);
diff --git a/fs/jfs/Kconfig b/fs/jfs/Kconfig
index 9ff619a6f9cc..57cef19951db 100644
--- a/fs/jfs/Kconfig
+++ b/fs/jfs/Kconfig
@@ -1,6 +1,7 @@
 config JFS_FS
        tristate "JFS filesystem support"
        select NLS
+        select CRC32
        help
          This is a port of IBM's Journaled Filesystem .  More information is
          available in the file <file:Documentation/filesystems/jfs.txt>.
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 6a73de84bcef..dd824d9b0b1a 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -90,7 +90,6 @@ void jfs_proc_init(void)
        if (!(base = proc_mkdir("fs/jfs", NULL)))
                return;
-        base->owner = THIS_MODULE;
        for (i = 0; i < NPROCENT; i++)
                proc_create(Entries[i].name, 0, base, Entries[i].proc_fops);
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 169802ea07f9..bbbd5f202e37 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -362,11 +362,12 @@ exit:
 int extHint(struct inode *ip, s64 offset, xad_t * xp)
 {
        struct super_block *sb = ip->i_sb;
-        struct xadlist xadl;
+        int nbperpage = JFS_SBI(sb)->nbperpage;
-        struct lxdlist lxdl;
-        lxd_t lxd;
        s64 prev;
-        int rc, nbperpage = JFS_SBI(sb)->nbperpage;
+        int rc = 0;
+        s64 xaddr;
+        int xlen;
+        int xflag;
        /* init the hint as "no hint provided" */
        XADaddress(xp, 0);
@@ -376,46 +377,30 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
         */
        prev = ((offset & ~POFFSET) >> JFS_SBI(sb)->l2bsize) - nbperpage;
-        /* if the offsets in the first page of the file,
+        /* if the offset is in the first page of the file, no hint provided.
-         * no hint provided.
         */
        if (prev < 0)
-                return (0);
+                goto out;
-        /* prepare to lookup the previous page's extent info */
-        lxdl.maxnlxd = 1;
-        lxdl.nlxd = 1;
-        lxdl.lxd = &lxd;
-        LXDoffset(&lxd, prev)
-        LXDlength(&lxd, nbperpage);
-        xadl.maxnxad = 1;
-        xadl.nxad = 0;
-        xadl.xad = xp;
-        /* perform the lookup */
-        if ((rc = xtLookupList(ip, &lxdl, &xadl, 0)))
-                return (rc);
-        /* check if no extent exists for the previous page.
-         * this is possible for sparse files.
-         */
-        if (xadl.nxad == 0) {
-//              assert(ISSPARSE(ip));
-                return (0);
-        }
-        /* only preserve the abnr flag within the xad flags
+        rc = xtLookup(ip, prev, nbperpage, &xflag, &xaddr, &xlen, 0);
-         * of the returned hint.
-         */
-        xp->flag &= XAD_NOTRECORDED;
-        if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) {
+        if ((rc == 0) && xlen) {
-                jfs_error(ip->i_sb, "extHint: corrupt xtree");
+                if (xlen != nbperpage) {
-                return -EIO;
+                        jfs_error(ip->i_sb, "extHint: corrupt xtree");
-        }
+                        rc = -EIO;
+                }
+                XADaddress(xp, xaddr);
+                XADlength(xp, xlen);
+                /*
+                 * only preserve the abnr flag within the xad flags
+                 * of the returned hint.
+                 */
+                xp->flag  = xflag & XAD_NOTRECORDED;
+        } else
+                rc = 0;
-        return (0);
+out:
+        return (rc);
 }
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 0f94381ca6d0..346057218edc 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -57,12 +57,6 @@
 #include "jfs_debug.h"
 /*
- * __mark_inode_dirty expects inodes to be hashed.  Since we don't want
- * special inodes in the fileset inode space, we make them appear hashed,
- * but do not put on any lists.
- */
-/*
 * imap locks
 */
 /* iag free list lock */
@@ -497,7 +491,9 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
        release_metapage(mp);
        /*
-         * that will look hashed, but won't be on any list; hlist_del()
+         * __mark_inode_dirty expects inodes to be hashed.  Since we don't
+         * want special inodes in the fileset inode space, we make them
+         * appear hashed, but do not put on any lists.  hlist_del()
         * will work fine and require no locking.
         */
        ip->i_hash.pprev = &ip->i_hash.next;
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index c350057087dd..07b6c5dfb4b6 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -369,6 +369,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
        unsigned long bio_bytes = 0;
        unsigned long bio_offset = 0;
        int offset;
+        int bad_blocks = 0;
        page_start = (sector_t)page->index <<
                     (PAGE_CACHE_SHIFT - inode->i_blkbits);
@@ -394,6 +395,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
                }
                clear_bit(META_dirty, &mp->flag);
+                set_bit(META_io, &mp->flag);
                block_offset = offset >> inode->i_blkbits;
                lblock = page_start + block_offset;
                if (bio) {
@@ -402,7 +404,6 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
                                len = min(xlen, blocks_per_mp);
                                xlen -= len;
                                bio_bytes += len << inode->i_blkbits;
-                                set_bit(META_io, &mp->flag);
                                continue;
                        }
                        /* Not contiguous */
@@ -424,12 +425,14 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
                xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits;
                pblock = metapage_get_blocks(inode, lblock, &xlen);
                if (!pblock) {
-                        /* Need better error handling */
                        printk(KERN_ERR "JFS: metapage_get_blocks failed\n");
-                        dec_io(page, last_write_complete);
+                        /*
+                         * We already called inc_io(), but can't cancel it
+                         * with dec_io() until we're done with the page
+                         */
+                        bad_blocks++;
                        continue;
                }
-                set_bit(META_io, &mp->flag);
                len = min(xlen, (int)JFS_SBI(inode->i_sb)->nbperpage);
                bio = bio_alloc(GFP_NOFS, 1);
@@ -459,6 +462,9 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
        unlock_page(page);
+        if (bad_blocks)
+                goto err_out;
        if (nr_underway == 0)
                end_page_writeback(page);
@@ -474,7 +480,9 @@ skip:
        bio_put(bio);
        unlock_page(page);
        dec_io(page, last_write_complete);
+err_out:
+        while (bad_blocks--)
+                dec_io(page, last_write_complete);
        return -EIO;
 }
diff --git a/fs/jfs/jfs_types.h b/fs/jfs/jfs_types.h
index 649f9817accd..43ea3713c083 100644
--- a/fs/jfs/jfs_types.h
+++ b/fs/jfs/jfs_types.h
@@ -58,35 +58,6 @@ struct timestruc_t {
 #define ONES            0xffffffffu     /* all bit on           */
 /*
- *      logical xd (lxd)
- */
-typedef struct {
-        unsigned len:24;
-        unsigned off1:8;
-        u32 off2;
-} lxd_t;
-/* lxd_t field construction */
-#define LXDlength(lxd, length32)        ( (lxd)->len = length32 )
-#define LXDoffset(lxd, offset64)\
-{\
-        (lxd)->off1 = ((s64)offset64) >> 32;\
-        (lxd)->off2 = (offset64) & 0xffffffff;\
-}
-/* lxd_t field extraction */
-#define lengthLXD(lxd)  ( (lxd)->len )
-#define offsetLXD(lxd)\
-        ( ((s64)((lxd)->off1)) << 32 | (lxd)->off2 )
-/* lxd list */
-struct lxdlist {
-        s16 maxnlxd;
-        s16 nlxd;
-        lxd_t *lxd;
-};
-/*
 *      physical xd (pxd)
 */
 typedef struct {
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index a27e26c90568..d654a6458648 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -164,11 +164,8 @@ int xtLookup(struct inode *ip, s64 lstart,
                /* is lookup offset beyond eof ? */
                size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
                    JFS_SBI(ip->i_sb)->l2bsize;
-                if (lstart >= size) {
+                if (lstart >= size)
-                        jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)",
-                                (ulong) lstart, (ulong) size);
                        return 0;
-                }
        }
        /*
@@ -220,264 +217,6 @@ int xtLookup(struct inode *ip, s64 lstart,
        return rc;
 }
-/*
- *      xtLookupList()
- *
- * function: map a single logical extent into a list of physical extent;
- *
- * parameter:
- *      struct inode    *ip,
- *      struct lxdlist  *lxdlist,       lxd list (in)
- *      struct xadlist  *xadlist,       xad list (in/out)
- *      int             flag)
- *
- * coverage of lxd by xad under assumption of
- * . lxd's are ordered and disjoint.
- * . xad's are ordered and disjoint.
- *
- * return:
- *      0:      success
- *
- * note: a page being written (even a single byte) is backed fully,
- *      except the last page which is only backed with blocks
- *      required to cover the last byte;
- *      the extent backing a page is fully contained within an xad;
- */
-int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
-                 struct xadlist * xadlist, int flag)
-{
-        int rc = 0;
-        struct btstack btstack;
-        int cmp;
-        s64 bn;
-        struct metapage *mp;
-        xtpage_t *p;
-        int index;
-        lxd_t *lxd;
-        xad_t *xad, *pxd;
-        s64 size, lstart, lend, xstart, xend, pstart;
-        s64 llen, xlen, plen;
-        s64 xaddr, paddr;
-        int nlxd, npxd, maxnpxd;
-        npxd = xadlist->nxad = 0;
-        maxnpxd = xadlist->maxnxad;
-        pxd = xadlist->xad;
-        nlxd = lxdlist->nlxd;
-        lxd = lxdlist->lxd;
-        lstart = offsetLXD(lxd);
-        llen = lengthLXD(lxd);
-        lend = lstart + llen;
-        size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
-            JFS_SBI(ip->i_sb)->l2bsize;
-        /*
-         * search for the xad entry covering the logical extent
-         */
-      search:
-        if (lstart >= size)
-                return 0;
-        if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0)))
-                return rc;
-        /*
-         *      compute the physical extent covering logical extent
-         *
-         * N.B. search may have failed (e.g., hole in sparse file),
-         * and returned the index of the next entry.
-         */
-//map:
-        /* retrieve search result */
-        XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
-        /* is xad on the next sibling page ? */
-        if (index == le16_to_cpu(p->header.nextindex)) {
-                if (p->header.flag & BT_ROOT)
-                        goto mapend;
-                if ((bn = le64_to_cpu(p->header.next)) == 0)
-                        goto mapend;
-                XT_PUTPAGE(mp);
-                /* get next sibling page */
-                XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
-                if (rc)
-                        return rc;
-                index = XTENTRYSTART;
-        }
-        xad = &p->xad[index];
-        /*
-         * is lxd covered by xad ?
-         */
-      compare:
-        xstart = offsetXAD(xad);
-        xlen = lengthXAD(xad);
-        xend = xstart + xlen;
-        xaddr = addressXAD(xad);
-      compare1:
-        if (xstart < lstart)
-                goto compare2;
-        /* (lstart <= xstart) */
-        /* lxd is NOT covered by xad */
-        if (lend <= xstart) {
-                /*
-                 * get next lxd
-                 */
-                if (--nlxd == 0)
-                        goto mapend;
-                lxd++;
-                lstart = offsetLXD(lxd);
-                llen = lengthLXD(lxd);
-                lend = lstart + llen;
-                if (lstart >= size)
-                        goto mapend;
-                /* compare with the current xad */
-                goto compare1;
-        }
-        /* lxd is covered by xad */
-        else {                  /* (xstart < lend) */
-                /* initialize new pxd */
-                pstart = xstart;
-                plen = min(lend - xstart, xlen);
-                paddr = xaddr;
-                goto cover;
-        }
-        /* (xstart < lstart) */
-      compare2:
-        /* lxd is covered by xad */
-        if (lstart < xend) {
-                /* initialize new pxd */
-                pstart = lstart;
-                plen = min(xend - lstart, llen);
-                paddr = xaddr + (lstart - xstart);
-                goto cover;
-        }
-        /* lxd is NOT covered by xad */
-        else {                  /* (xend <= lstart) */
-                /*
-                 * get next xad
-                 *
-                 * linear search next xad covering lxd on
-                 * the current xad page, and then tree search
-                 */
-                if (index == le16_to_cpu(p->header.nextindex) - 1) {
-                        if (p->header.flag & BT_ROOT)
-                                goto mapend;
-                        XT_PUTPAGE(mp);
-                        goto search;
-                } else {
-                        index++;
-                        xad++;
-                        /* compare with new xad */
-                        goto compare;
-                }
-        }
-        /*
-         * lxd is covered by xad and a new pxd has been initialized
-         * (lstart <= xstart < lend) or (xstart < lstart < xend)
-         */
-      cover:
-        /* finalize pxd corresponding to current xad */
-        XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr);
-        if (++npxd >= maxnpxd)
-                goto mapend;
-        pxd++;
-        /*
-         * lxd is fully covered by xad
-         */
-        if (lend <= xend) {
-                /*
-                 * get next lxd
-                 */
-                if (--nlxd == 0)
-                        goto mapend;
-                lxd++;
-                lstart = offsetLXD(lxd);
-                llen = lengthLXD(lxd);
-                lend = lstart + llen;
-                if (lstart >= size)
-                        goto mapend;
-                /*
-                 * test for old xad covering new lxd
-                 * (old xstart < new lstart)
-                 */
-                goto compare2;
-        }
-        /*
-         * lxd is partially covered by xad
-         */
-        else {                  /* (xend < lend) */
-                /*
-                 * get next xad
-                 *
-                 * linear search next xad covering lxd on
-                 * the current xad page, and then next xad page search
-                 */
-                if (index == le16_to_cpu(p->header.nextindex) - 1) {
-                        if (p->header.flag & BT_ROOT)
-                                goto mapend;
-                        if ((bn = le64_to_cpu(p->header.next)) == 0)
-                                goto mapend;
-                        XT_PUTPAGE(mp);
-                        /* get next sibling page */
-                        XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
-                        if (rc)
-                                return rc;
-                        index = XTENTRYSTART;
-                        xad = &p->xad[index];
-                } else {
-                        index++;
-                        xad++;
-                }
-                /*
-                 * test for new xad covering old lxd
-                 * (old lstart < new xstart)
-                 */
-                goto compare;
-        }
-      mapend:
-        xadlist->nxad = npxd;
-//out:
-        XT_PUTPAGE(mp);
-        return rc;
-}
 /*
 *      xtSearch()
 *
diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h
index 70815c8a3d6a..08c0c749b986 100644
--- a/fs/jfs/jfs_xtree.h
+++ b/fs/jfs/jfs_xtree.h
@@ -110,8 +110,6 @@ typedef union {
 */
 extern int xtLookup(struct inode *ip, s64 lstart, s64 llen,
                    int *pflag, s64 * paddr, int *plen, int flag);
-extern int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
-                        struct xadlist * xadlist, int flag);
 extern void xtInitRoot(tid_t tid, struct inode *ip);
 extern int xtInsert(tid_t tid, struct inode *ip,
                    int xflag, s64 xoff, int xlen, s64 * xaddrp, int flag);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index b37d1f78b854..6f21adf9479a 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -29,6 +29,7 @@
 #include <linux/posix_acl.h>
 #include <linux/buffer_head.h>
 #include <linux/exportfs.h>
+#include <linux/crc32.h>
 #include <asm/uaccess.h>
 #include <linux/seq_file.h>
@@ -168,6 +169,9 @@ static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_files = maxinodes;
        buf->f_ffree = maxinodes - (atomic_read(&imap->im_numinos) -
                                    atomic_read(&imap->im_numfree));
+        buf->f_fsid.val[0] = (u32)crc32_le(0, sbi->uuid, sizeof(sbi->uuid)/2);
+        buf->f_fsid.val[1] = (u32)crc32_le(0, sbi->uuid + sizeof(sbi->uuid)/2,
+                                        sizeof(sbi->uuid)/2);
        buf->f_namelen = JFS_NAME_MAX;
        return 0;
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index aedc47a264c1..1f3b0fc0d351 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -139,55 +139,6 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
        return 0;
 }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static const struct in6_addr *nlmclnt_map_v4addr(const struct sockaddr *sap,
-                                                 struct in6_addr *addr_mapped)
-{
-        const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
-        switch (sap->sa_family) {
-        case AF_INET6:
-                return &((const struct sockaddr_in6 *)sap)->sin6_addr;
-        case AF_INET:
-                ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, addr_mapped);
-                return addr_mapped;
-        }
-        return NULL;
-}
-/*
- * If lockd is using a PF_INET6 listener, all incoming requests appear
- * to come from AF_INET6 remotes.  The address of AF_INET remotes are
- * mapped to AF_INET6 automatically by the network layer.  In case the
- * user passed an AF_INET server address at mount time, ensure both
- * addresses are AF_INET6 before comparing them.
- */
-static int nlmclnt_cmp_addr(const struct nlm_host *host,
-                            const struct sockaddr *sap)
-{
-        const struct in6_addr *addr1;
-        const struct in6_addr *addr2;
-        struct in6_addr addr1_mapped;
-        struct in6_addr addr2_mapped;
-        addr1 = nlmclnt_map_v4addr(nlm_addr(host), &addr1_mapped);
-        if (likely(addr1 != NULL)) {
-                addr2 = nlmclnt_map_v4addr(sap, &addr2_mapped);
-                if (likely(addr2 != NULL))
-                        return ipv6_addr_equal(addr1, addr2);
-        }
-        return 0;
-}
-#else   /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
-static int nlmclnt_cmp_addr(const struct nlm_host *host,
-                            const struct sockaddr *sap)
-{
-        return nlm_cmp_addr(nlm_addr(host), sap);
-}
-#endif  /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
 /*
 * The server lockd has called us back to tell us the lock was granted
 */
@@ -215,7 +166,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
                 */
                if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
                        continue;
-                if (!nlmclnt_cmp_addr(block->b_host, addr))
+                if (!nlm_cmp_addr(nlm_addr(block->b_host), addr))
                        continue;
                if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0)
                        continue;
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 5e2c4d5ac827..6d5d4a4169e5 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -16,6 +16,8 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
+#include <asm/unaligned.h>
 #define NLMDBG_FACILITY         NLMDBG_MONITOR
 #define NSM_PROGRAM             100024
 #define NSM_VERSION             1
@@ -274,10 +276,12 @@ static void nsm_init_private(struct nsm_handle *nsm)
 {
        u64 *p = (u64 *)&nsm->sm_priv.data;
        struct timespec ts;
+        s64 ns;
        ktime_get_ts(&ts);
-        *p++ = timespec_to_ns(&ts);
+        ns = timespec_to_ns(&ts);
-        *p = (unsigned long)nsm;
+        put_unaligned(ns, p);
+        put_unaligned((unsigned long)nsm, p + 1);
 }
 static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 64f1c31b5853..abf83881f68a 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -53,17 +53,6 @@ static struct svc_rqst		*nlmsvc_rqst;
 unsigned long                   nlmsvc_timeout;
 /*
- * If the kernel has IPv6 support available, always listen for
- * both AF_INET and AF_INET6 requests.
- */
-#if (defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) && \
-        defined(CONFIG_SUNRPC_REGISTER_V4)
-static const sa_family_t        nlmsvc_family = AF_INET6;
-#else   /* (CONFIG_IPV6 || CONFIG_IPV6_MODULE) && CONFIG_SUNRPC_REGISTER_V4 */
-static const sa_family_t        nlmsvc_family = AF_INET;
-#endif  /* (CONFIG_IPV6 || CONFIG_IPV6_MODULE) && CONFIG_SUNRPC_REGISTER_V4 */
-/*
 * These can be set at insmod time (useful for NFS as root filesystem),
 * and also changed through the sysctl interface.  -- Jamie Lokier, Aug 2003
 */
@@ -204,19 +193,30 @@ lockd(void *vrqstp)
        return 0;
 }
-static int create_lockd_listener(struct svc_serv *serv, char *name,
+static int create_lockd_listener(struct svc_serv *serv, const char *name,
-                                 unsigned short port)
+                                 const int family, const unsigned short port)
 {
        struct svc_xprt *xprt;
-        xprt = svc_find_xprt(serv, name, 0, 0);
+        xprt = svc_find_xprt(serv, name, family, 0);
        if (xprt == NULL)
-                return svc_create_xprt(serv, name, port, SVC_SOCK_DEFAULTS);
+                return svc_create_xprt(serv, name, family, port,
+                                                SVC_SOCK_DEFAULTS);
        svc_xprt_put(xprt);
        return 0;
 }
+static int create_lockd_family(struct svc_serv *serv, const int family)
+{
+        int err;
+        err = create_lockd_listener(serv, "udp", family, nlm_udpport);
+        if (err < 0)
+                return err;
+        return create_lockd_listener(serv, "tcp", family, nlm_tcpport);
+}
 /*
 * Ensure there are active UDP and TCP listeners for lockd.
 *
@@ -232,13 +232,15 @@ static int make_socks(struct svc_serv *serv)
        static int warned;
        int err;
-        err = create_lockd_listener(serv, "udp", nlm_udpport);
+        err = create_lockd_family(serv, PF_INET);
        if (err < 0)
                goto out_err;
-        err = create_lockd_listener(serv, "tcp", nlm_tcpport);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-        if (err < 0)
+        err = create_lockd_family(serv, PF_INET6);
+        if (err < 0 && err != -EAFNOSUPPORT)
                goto out_err;
+#endif  /* CONFIG_IPV6 || CONFIG_IPV6_MODULE */
        warned = 0;
        return 0;
@@ -274,7 +276,7 @@ int lockd_up(void)
                        "lockd_up: no pid, %d users??\n", nlmsvc_users);
        error = -ENOMEM;
-        serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, nlmsvc_family, NULL);
+        serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL);
        if (!serv) {
                printk(KERN_WARNING "lockd_up: create service failed\n");
                goto out;
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 3e634f2a1083..a886e692ddd0 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -38,19 +38,10 @@ static struct svc_program nfs4_callback_program;
 unsigned int nfs_callback_set_tcpport;
 unsigned short nfs_callback_tcpport;
+unsigned short nfs_callback_tcpport6;
 static const int nfs_set_port_min = 0;
 static const int nfs_set_port_max = 65535;
-/*
- * If the kernel has IPv6 support available, always listen for
- * both AF_INET and AF_INET6 requests.
- */
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static const sa_family_t        nfs_callback_family = AF_INET6;
-#else
-static const sa_family_t        nfs_callback_family = AF_INET;
-#endif
 static int param_set_port(const char *val, struct kernel_param *kp)
 {
        char *endp;
@@ -116,19 +107,29 @@ int nfs_callback_up(void)
        mutex_lock(&nfs_callback_mutex);
        if (nfs_callback_info.users++ || nfs_callback_info.task != NULL)
                goto out;
-        serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE,
+        serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
-                                nfs_callback_family, NULL);
        ret = -ENOMEM;
        if (!serv)
                goto out_err;
-        ret = svc_create_xprt(serv, "tcp", nfs_callback_set_tcpport,
+        ret = svc_create_xprt(serv, "tcp", PF_INET,
-                              SVC_SOCK_ANONYMOUS);
+                                nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
        if (ret <= 0)
                goto out_err;
        nfs_callback_tcpport = ret;
        dprintk("NFS: Callback listener port = %u (af %u)\n",
-                        nfs_callback_tcpport, nfs_callback_family);
+                        nfs_callback_tcpport, PF_INET);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+        ret = svc_create_xprt(serv, "tcp", PF_INET6,
+                                nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
+        if (ret > 0) {
+                nfs_callback_tcpport6 = ret;
+                dprintk("NFS: Callback listener port = %u (af %u)\n",
+                                nfs_callback_tcpport6, PF_INET6);
+        } else if (ret != -EAFNOSUPPORT)
+                goto out_err;
+#endif  /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
        nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]);
        if (IS_ERR(nfs_callback_info.rqst)) {
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index bb25d2135ff1..e110e286a262 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -72,5 +72,6 @@ extern void nfs_callback_down(void);
 extern unsigned int nfs_callback_set_tcpport;
 extern unsigned short nfs_callback_tcpport;
+extern unsigned short nfs_callback_tcpport6;
 #endif /* __LINUX_FS_NFS_CALLBACK_H */
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 574158ae2398..aba38017bdef 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -224,38 +224,6 @@ void nfs_put_client(struct nfs_client *clp)
 }
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static const struct in6_addr *nfs_map_ipv4_addr(const struct sockaddr *sa, struct in6_addr *addr_mapped)
-{
-        switch (sa->sa_family) {
-                default:
-                        return NULL;
-                case AF_INET6:
-                        return &((const struct sockaddr_in6 *)sa)->sin6_addr;
-                        break;
-                case AF_INET:
-                        ipv6_addr_set_v4mapped(((const struct sockaddr_in *)sa)->sin_addr.s_addr,
-                                        addr_mapped);
-                        return addr_mapped;
-        }
-}
-static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
-                const struct sockaddr *sa2)
-{
-        const struct in6_addr *addr1;
-        const struct in6_addr *addr2;
-        struct in6_addr addr1_mapped;
-        struct in6_addr addr2_mapped;
-        addr1 = nfs_map_ipv4_addr(sa1, &addr1_mapped);
-        if (likely(addr1 != NULL)) {
-                addr2 = nfs_map_ipv4_addr(sa2, &addr2_mapped);
-                if (likely(addr2 != NULL))
-                        return ipv6_addr_equal(addr1, addr2);
-        }
-        return 0;
-}
 /*
 * Test if two ip6 socket addresses refer to the same socket by
 * comparing relevant fields. The padding bytes specifically, are not
@@ -267,38 +235,21 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
 *
 * The caller should ensure both socket addresses are AF_INET6.
 */
-static int nfs_sockaddr_cmp_ip6(const struct sockaddr *sa1,
+static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
-                                const struct sockaddr *sa2)
+                                      const struct sockaddr *sa2)
 {
-        const struct sockaddr_in6 *saddr1 = (const struct sockaddr_in6 *)sa1;
+        const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1;
-        const struct sockaddr_in6 *saddr2 = (const struct sockaddr_in6 *)sa2;
+        const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2;
-        if (!ipv6_addr_equal(&saddr1->sin6_addr,
+        if (ipv6_addr_scope(&sin1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL &&
-                             &saddr1->sin6_addr))
+            sin1->sin6_scope_id != sin2->sin6_scope_id)
                return 0;
-        if (ipv6_addr_scope(&saddr1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL &&
-            saddr1->sin6_scope_id != saddr2->sin6_scope_id)
-                return 0;
-        return saddr1->sin6_port == saddr2->sin6_port;
-}
-#else
-static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1,
-                                 const struct sockaddr_in *sa2)
-{
-        return sa1->sin_addr.s_addr == sa2->sin_addr.s_addr;
-}
-static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
+        return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr);
-                                 const struct sockaddr *sa2)
-{
-        if (unlikely(sa1->sa_family != AF_INET || sa2->sa_family != AF_INET))
-                return 0;
-        return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1,
-                        (const struct sockaddr_in *)sa2);
 }
+#else   /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */
-static int nfs_sockaddr_cmp_ip6(const struct sockaddr * sa1,
+static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
-                                const struct sockaddr * sa2)
+                                      const struct sockaddr *sa2)
 {
        return 0;
 }
@@ -311,20 +262,57 @@ static int nfs_sockaddr_cmp_ip6(const struct sockaddr * sa1,
 *
 * The caller should ensure both socket addresses are AF_INET.
 */
+static int nfs_sockaddr_match_ipaddr4(const struct sockaddr *sa1,
+                                      const struct sockaddr *sa2)
+{
+        const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1;
+        const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2;
+        return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
+}
+static int nfs_sockaddr_cmp_ip6(const struct sockaddr *sa1,
+                                const struct sockaddr *sa2)
+{
+        const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1;
+        const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2;
+        return nfs_sockaddr_match_ipaddr6(sa1, sa2) &&
+                (sin1->sin6_port == sin2->sin6_port);
+}
 static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1,
                                const struct sockaddr *sa2)
 {
-        const struct sockaddr_in *saddr1 = (const struct sockaddr_in *)sa1;
+        const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1;
-        const struct sockaddr_in *saddr2 = (const struct sockaddr_in *)sa2;
+        const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2;
+        return nfs_sockaddr_match_ipaddr4(sa1, sa2) &&
+                (sin1->sin_port == sin2->sin_port);
+}
-        if (saddr1->sin_addr.s_addr != saddr2->sin_addr.s_addr)
+/*
+ * Test if two socket addresses represent the same actual socket,
+ * by comparing (only) relevant fields, excluding the port number.
+ */
+static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
+                                     const struct sockaddr *sa2)
+{
+        if (sa1->sa_family != sa2->sa_family)
                return 0;
-        return saddr1->sin_port == saddr2->sin_port;
+        switch (sa1->sa_family) {
+        case AF_INET:
+                return nfs_sockaddr_match_ipaddr4(sa1, sa2);
+        case AF_INET6:
+                return nfs_sockaddr_match_ipaddr6(sa1, sa2);
+        }
+        return 0;
 }
 /*
 * Test if two socket addresses represent the same actual socket,
- * by comparing (only) relevant fields.
+ * by comparing (only) relevant fields, including the port number.
 */
 static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
                            const struct sockaddr *sa2)
@@ -1606,8 +1594,6 @@ int __init nfs_fs_proc_init(void)
        if (!proc_fs_nfs)
                goto error_0;
-        proc_fs_nfs->owner = THIS_MODULE;
        /* a file of servers with which we're dealing */
        p = proc_create("servers", S_IFREG|S_IRUGO,
                        proc_fs_nfs, &nfs_server_list_fops);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 78bf72fc1db3..370b190a09d1 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1624,8 +1624,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                } else if (atomic_read(&new_dentry->d_count) > 1)
                        /* dentry still busy? */
                        goto out;
-        } else
+        }
-                nfs_drop_nlink(new_inode);
 go_ahead:
        /*
@@ -1638,10 +1637,8 @@ go_ahead:
        }
        nfs_inode_return_delegation(old_inode);
-        if (new_inode != NULL) {
+        if (new_inode != NULL)
                nfs_inode_return_delegation(new_inode);
-                d_delete(new_dentry);
-        }
        error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
                                           new_dir, &new_dentry->d_name);
@@ -1650,6 +1647,8 @@ out:
        if (rehash)
                d_rehash(rehash);
        if (!error) {
+                if (new_inode != NULL)
+                        nfs_drop_nlink(new_inode);
                d_move(old_dentry, new_dentry);
                nfs_set_verifier(new_dentry,
                                        nfs_save_change_attribute(new_dir));
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 90f292b520d2..0abf3f331f56 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -64,11 +64,7 @@ const struct file_operations nfs_file_operations = {
        .write          = do_sync_write,
        .aio_read       = nfs_file_read,
        .aio_write      = nfs_file_write,
-#ifdef CONFIG_MMU
        .mmap           = nfs_file_mmap,
-#else
-        .mmap           = generic_file_mmap,
-#endif
        .open           = nfs_file_open,
        .flush          = nfs_file_flush,
        .release        = nfs_file_release,
@@ -141,9 +137,6 @@ nfs_file_release(struct inode *inode, struct file *filp)
                        dentry->d_parent->d_name.name,
                        dentry->d_name.name);
-        /* Ensure that dirty pages are flushed out with the right creds */
-        if (filp->f_mode & FMODE_WRITE)
-                nfs_wb_all(dentry->d_inode);
        nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
        return nfs_release(inode, filp);
 }
@@ -235,7 +228,6 @@ nfs_file_flush(struct file *file, fl_owner_t id)
        struct nfs_open_context *ctx = nfs_file_open_context(file);
        struct dentry   *dentry = file->f_path.dentry;
        struct inode    *inode = dentry->d_inode;
-        int             status;
        dprintk("NFS: flush(%s/%s)\n",
                        dentry->d_parent->d_name.name,
@@ -245,11 +237,8 @@ nfs_file_flush(struct file *file, fl_owner_t id)
                return 0;
        nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
-        /* Ensure that data+attribute caches are up to date after close() */
+        /* Flush writes to the server and return any errors */
-        status = nfs_do_fsync(ctx, inode);
+        return nfs_do_fsync(ctx, inode);
-        if (!status)
-                nfs_revalidate_inode(NFS_SERVER(inode), inode);
-        return status;
 }
 static ssize_t
@@ -304,11 +293,13 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
        dprintk("NFS: mmap(%s/%s)\n",
                dentry->d_parent->d_name.name, dentry->d_name.name);
-        status = nfs_revalidate_mapping(inode, file->f_mapping);
+        /* Note: generic_file_mmap() returns ENOSYS on nommu systems
+         *       so we call that before revalidating the mapping
+         */
+        status = generic_file_mmap(file, vma);
        if (!status) {
                vma->vm_ops = &nfs_file_vm_ops;
-                vma->vm_flags |= VM_CAN_NONLINEAR;
+                status = nfs_revalidate_mapping(inode, file->f_mapping);
-                file_accessed(file);
        }
        return status;
 }
@@ -354,6 +345,15 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
                file->f_path.dentry->d_name.name,
                mapping->host->i_ino, len, (long long) pos);
+        /*
+         * Prevent starvation issues if someone is doing a consistency
+         * sync-to-disk
+         */
+        ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
+                        nfs_wait_bit_killable, TASK_KILLABLE);
+        if (ret)
+                return ret;
        page = grab_cache_page_write_begin(mapping, index, flags);
        if (!page)
                return -ENOMEM;
@@ -451,8 +451,9 @@ const struct address_space_operations nfs_file_aops = {
        .launder_page = nfs_launder_page,
 };
-static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+        struct page *page = vmf->page;
        struct file *filp = vma->vm_file;
        struct dentry *dentry = filp->f_path.dentry;
        unsigned pagelen;
@@ -483,6 +484,8 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
                ret = pagelen;
 out_unlock:
        unlock_page(page);
+        if (ret)
+                ret = VM_FAULT_SIGBUS;
        return ret;
 }
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index b7c9b2df1f29..46177cb87064 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -156,7 +156,7 @@ int nfs4_path_walk(struct nfs_server *server,
                return ret;
        }
-        if (fattr.type != NFDIR) {
+        if (!S_ISDIR(fattr.mode)) {
                printk(KERN_ERR "nfs4_get_root:"
                       " getroot encountered non-directory\n");
                return -ENOTDIR;
@@ -213,7 +213,7 @@ eat_dot_dir:
                return ret;
        }
-        if (fattr.type != NFDIR) {
+        if (!S_ISDIR(fattr.mode)) {
                printk(KERN_ERR "nfs4_get_root:"
                       " lookupfh encountered non-directory\n");
                return -ENOTDIR;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 0c381686171e..a834d1d850b7 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -66,6 +66,18 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
 }
 /**
+ * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
+ * @word: long word containing the bit lock
+ */
+int nfs_wait_bit_killable(void *word)
+{
+        if (fatal_signal_pending(current))
+                return -ERESTARTSYS;
+        schedule();
+        return 0;
+}
+/**
 * nfs_compat_user_ino64 - returns the user-visible inode number
 * @fileid: 64-bit fileid
 *
@@ -249,13 +261,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
        struct inode *inode = ERR_PTR(-ENOENT);
        unsigned long hash;
-        if ((fattr->valid & NFS_ATTR_FATTR) == 0)
+        if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0)
                goto out_no_inode;
+        if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0)
-        if (!fattr->nlink) {
-                printk("NFS: Buggy server - nlink == 0!\n");
                goto out_no_inode;
-        }
        hash = nfs_fattr_to_ino_t(fattr);
@@ -291,7 +300,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
                            && fattr->size <= NFS_LIMIT_READDIRPLUS)
                                set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
                        /* Deal with crossing mountpoints */
-                        if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) {
+                        if ((fattr->valid & NFS_ATTR_FATTR_FSID)
+                                        && !nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) {
                                if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
                                        inode->i_op = &nfs_referral_inode_operations;
                                else
@@ -304,28 +314,45 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
                else
                        init_special_inode(inode, inode->i_mode, fattr->rdev);
+                memset(&inode->i_atime, 0, sizeof(inode->i_atime));
+                memset(&inode->i_mtime, 0, sizeof(inode->i_mtime));
+                memset(&inode->i_ctime, 0, sizeof(inode->i_ctime));
+                nfsi->change_attr = 0;
+                inode->i_size = 0;
+                inode->i_nlink = 0;
+                inode->i_uid = -2;
+                inode->i_gid = -2;
+                inode->i_blocks = 0;
+                memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
                nfsi->read_cache_jiffies = fattr->time_start;
                nfsi->attr_gencount = fattr->gencount;
-                inode->i_atime = fattr->atime;
+                if (fattr->valid & NFS_ATTR_FATTR_ATIME)
-                inode->i_mtime = fattr->mtime;
+                        inode->i_atime = fattr->atime;
-                inode->i_ctime = fattr->ctime;
+                if (fattr->valid & NFS_ATTR_FATTR_MTIME)
-                if (fattr->valid & NFS_ATTR_FATTR_V4)
+                        inode->i_mtime = fattr->mtime;
+                if (fattr->valid & NFS_ATTR_FATTR_CTIME)
+                        inode->i_ctime = fattr->ctime;
+                if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
                        nfsi->change_attr = fattr->change_attr;
-                inode->i_size = nfs_size_to_loff_t(fattr->size);
+                if (fattr->valid & NFS_ATTR_FATTR_SIZE)
-                inode->i_nlink = fattr->nlink;
+                        inode->i_size = nfs_size_to_loff_t(fattr->size);
-                inode->i_uid = fattr->uid;
+                if (fattr->valid & NFS_ATTR_FATTR_NLINK)
-                inode->i_gid = fattr->gid;
+                        inode->i_nlink = fattr->nlink;
-                if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) {
+                if (fattr->valid & NFS_ATTR_FATTR_OWNER)
+                        inode->i_uid = fattr->uid;
+                if (fattr->valid & NFS_ATTR_FATTR_GROUP)
+                        inode->i_gid = fattr->gid;
+                if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
+                        inode->i_blocks = fattr->du.nfs2.blocks;
+                if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
                        /*
                         * report the blocks in 512byte units
                         */
                        inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
-                } else {
-                        inode->i_blocks = fattr->du.nfs2.blocks;
                }
                nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
                nfsi->attrtimeo_timestamp = now;
-                memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
                nfsi->access_cache = RB_ROOT;
                unlock_new_inode(inode);
@@ -514,6 +541,32 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
        return err;
 }
+/**
+ * nfs_close_context - Common close_context() routine NFSv2/v3
+ * @ctx: pointer to context
+ * @is_sync: is this a synchronous close
+ *
+ * always ensure that the attributes are up to date if we're mounted
+ * with close-to-open semantics
+ */
+void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
+{
+        struct inode *inode;
+        struct nfs_server *server;
+        if (!(ctx->mode & FMODE_WRITE))
+                return;
+        if (!is_sync)
+                return;
+        inode = ctx->path.dentry->d_inode;
+        if (!list_empty(&NFS_I(inode)->open_files))
+                return;
+        server = NFS_SERVER(inode);
+        if (server->flags & NFS_MOUNT_NOCTO)
+                return;
+        nfs_revalidate_inode(server, inode);
+}
 static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred)
 {
        struct nfs_open_context *ctx;
@@ -540,24 +593,15 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
        return ctx;
 }
-static void __put_nfs_open_context(struct nfs_open_context *ctx, int wait)
+static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
 {
-        struct inode *inode;
+        struct inode *inode = ctx->path.dentry->d_inode;
-        if (ctx == NULL)
-                return;
-        inode = ctx->path.dentry->d_inode;
        if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock))
                return;
        list_del(&ctx->list);
        spin_unlock(&inode->i_lock);
-        if (ctx->state != NULL) {
+        NFS_PROTO(inode)->close_context(ctx, is_sync);
-                if (wait)
-                        nfs4_close_sync(&ctx->path, ctx->state, ctx->mode);
-                else
-                        nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
-        }
        if (ctx->cred != NULL)
                put_rpccred(ctx->cred);
        path_put(&ctx->path);
@@ -670,9 +714,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
        if (NFS_STALE(inode))
                goto out;
-        if (NFS_STALE(inode))
-                goto out;
        nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
        status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
        if (status != 0) {
@@ -815,25 +856,31 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
-        if ((fattr->valid & NFS_ATTR_WCC_V4) != 0 &&
+        if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE)
-                        nfsi->change_attr == fattr->pre_change_attr) {
+                        && (fattr->valid & NFS_ATTR_FATTR_CHANGE)
+                        && nfsi->change_attr == fattr->pre_change_attr) {
                nfsi->change_attr = fattr->change_attr;
                if (S_ISDIR(inode->i_mode))
                        nfsi->cache_validity |= NFS_INO_INVALID_DATA;
        }
        /* If we have atomic WCC data, we may update some attributes */
-        if ((fattr->valid & NFS_ATTR_WCC) != 0) {
+        if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME)
-                if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime))
+                        && (fattr->valid & NFS_ATTR_FATTR_CTIME)
+                        && timespec_equal(&inode->i_ctime, &fattr->pre_ctime))
                        memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
-                if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
+        if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME)
+                        && (fattr->valid & NFS_ATTR_FATTR_MTIME)
+                        && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
                        memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
                        if (S_ISDIR(inode->i_mode))
                                nfsi->cache_validity |= NFS_INO_INVALID_DATA;
-                }
-                if (i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) &&
-                    nfsi->npages == 0)
-                        i_size_write(inode, nfs_size_to_loff_t(fattr->size));
        }
+        if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
+                        && (fattr->valid & NFS_ATTR_FATTR_SIZE)
+                        && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size)
+                        && nfsi->npages == 0)
+                        i_size_write(inode, nfs_size_to_loff_t(fattr->size));
 }
 /**
@@ -853,35 +900,39 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
        /* Has the inode gone and changed behind our back? */
-        if (nfsi->fileid != fattr->fileid
+        if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
-                        || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
+                return -EIO;
+        if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
                return -EIO;
-        }
-        if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
+        if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
                        nfsi->change_attr != fattr->change_attr)
                invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
        /* Verify a few of the more important attributes */
-        if (!timespec_equal(&inode->i_mtime, &fattr->mtime))
+        if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
                invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
-        cur_size = i_size_read(inode);
+        if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
-        new_isize = nfs_size_to_loff_t(fattr->size);
+                cur_size = i_size_read(inode);
-        if (cur_size != new_isize && nfsi->npages == 0)
+                new_isize = nfs_size_to_loff_t(fattr->size);
-                invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+                if (cur_size != new_isize && nfsi->npages == 0)
+                        invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+        }
        /* Have any file permissions changed? */
-        if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
+        if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
-                        || inode->i_uid != fattr->uid
+                invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
-                        || inode->i_gid != fattr->gid)
+        if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && inode->i_uid != fattr->uid)
+                invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
+        if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && inode->i_gid != fattr->gid)
                invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
        /* Has the link count changed? */
-        if (inode->i_nlink != fattr->nlink)
+        if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
                invalid |= NFS_INO_INVALID_ATTR;
-        if (!timespec_equal(&inode->i_atime, &fattr->atime))
+        if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&inode->i_atime, &fattr->atime))
                invalid |= NFS_INO_INVALID_ATIME;
        if (invalid != 0)
@@ -893,11 +944,15 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
 {
+        if (!(fattr->valid & NFS_ATTR_FATTR_CTIME))
+                return 0;
        return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0;
 }
 static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
 {
+        if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
+                return 0;
        return nfs_size_to_loff_t(fattr->size) > i_size_read(inode);
 }
@@ -1033,20 +1088,31 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
        /* Don't do a WCC update if these attributes are already stale */
        if ((fattr->valid & NFS_ATTR_FATTR) == 0 ||
                        !nfs_inode_attrs_need_update(inode, fattr)) {
-                fattr->valid &= ~(NFS_ATTR_WCC_V4|NFS_ATTR_WCC);
+                fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE
+                                | NFS_ATTR_FATTR_PRESIZE
+                                | NFS_ATTR_FATTR_PREMTIME
+                                | NFS_ATTR_FATTR_PRECTIME);
                goto out_noforce;
        }
-        if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
+        if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
-                        (fattr->valid & NFS_ATTR_WCC_V4) == 0) {
+                        (fattr->valid & NFS_ATTR_FATTR_PRECHANGE) == 0) {
                fattr->pre_change_attr = NFS_I(inode)->change_attr;
-                fattr->valid |= NFS_ATTR_WCC_V4;
+                fattr->valid |= NFS_ATTR_FATTR_PRECHANGE;
        }
-        if ((fattr->valid & NFS_ATTR_FATTR) != 0 &&
+        if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 &&
-                        (fattr->valid & NFS_ATTR_WCC) == 0) {
+                        (fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) {
                memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime));
+                fattr->valid |= NFS_ATTR_FATTR_PRECTIME;
+        }
+        if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 &&
+                        (fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) {
                memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime));
+                fattr->valid |= NFS_ATTR_FATTR_PREMTIME;
+        }
+        if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 &&
+                        (fattr->valid & NFS_ATTR_FATTR_PRESIZE) == 0) {
                fattr->pre_size = i_size_read(inode);
-                fattr->valid |= NFS_ATTR_WCC;
+                fattr->valid |= NFS_ATTR_FATTR_PRESIZE;
        }
 out_noforce:
        status = nfs_post_op_update_inode_locked(inode, fattr);
@@ -1078,18 +1144,18 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                        __func__, inode->i_sb->s_id, inode->i_ino,
                        atomic_read(&inode->i_count), fattr->valid);
-        if (nfsi->fileid != fattr->fileid)
+        if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
                goto out_fileid;
        /*
         * Make sure the inode's type hasn't changed.
         */
-        if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
+        if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
                goto out_changed;
        server = NFS_SERVER(inode);
        /* Update the fsid? */
-        if (S_ISDIR(inode->i_mode) &&
+        if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) &&
                        !nfs_fsid_equal(&server->fsid, &fattr->fsid) &&
                        !test_bit(NFS_INO_MOUNTPOINT, &nfsi->flags))
                server->fsid = fattr->fsid;
@@ -1099,14 +1165,27 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
         */
        nfsi->read_cache_jiffies = fattr->time_start;
-        nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME
+        if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) || (fattr->valid & (NFS_ATTR_FATTR_MTIME|NFS_ATTR_FATTR_CTIME)))
-                        | NFS_INO_REVAL_PAGECACHE);
+            nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
+                    | NFS_INO_INVALID_ATIME
+                    | NFS_INO_REVAL_PAGECACHE);
        /* Do atomic weak cache consistency updates */
        nfs_wcc_update_inode(inode, fattr);
        /* More cache consistency checks */
-        if (!(fattr->valid & NFS_ATTR_FATTR_V4)) {
+        if (fattr->valid & NFS_ATTR_FATTR_CHANGE) {
+                if (nfsi->change_attr != fattr->change_attr) {
+                        dprintk("NFS: change_attr change on server for file %s/%ld\n",
+                                        inode->i_sb->s_id, inode->i_ino);
+                        invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+                        if (S_ISDIR(inode->i_mode))
+                                nfs_force_lookup_revalidate(inode);
+                        nfsi->change_attr = fattr->change_attr;
+                }
+        }
+        if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
                /* NFSv2/v3: Check if the mtime agrees */
                if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
                        dprintk("NFS: mtime change on server for file %s/%ld\n",
@@ -1114,59 +1193,80 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                        invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
                        if (S_ISDIR(inode->i_mode))
                                nfs_force_lookup_revalidate(inode);
+                        memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
                }
+        }
+        if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
                /* If ctime has changed we should definitely clear access+acl caches */
-                if (!timespec_equal(&inode->i_ctime, &fattr->ctime))
+                if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
                        invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
-        } else if (nfsi->change_attr != fattr->change_attr) {
+                        /* and probably clear data for a directory too as utimes can cause
-                dprintk("NFS: change_attr change on server for file %s/%ld\n",
+                         * havoc with our cache.
-                                inode->i_sb->s_id, inode->i_ino);
+                         */
-                invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+                        if (S_ISDIR(inode->i_mode)) {
-                if (S_ISDIR(inode->i_mode))
+                                invalid |= NFS_INO_INVALID_DATA;
-                        nfs_force_lookup_revalidate(inode);
+                                nfs_force_lookup_revalidate(inode);
+                        }
+                        memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
+                }
        }
        /* Check if our cached file size is stale */
-        new_isize = nfs_size_to_loff_t(fattr->size);
+        if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
-        cur_isize = i_size_read(inode);
+                new_isize = nfs_size_to_loff_t(fattr->size);
-        if (new_isize != cur_isize) {
+                cur_isize = i_size_read(inode);
-                /* Do we perhaps have any outstanding writes, or has
+                if (new_isize != cur_isize) {
-                 * the file grown beyond our last write? */
+                        /* Do we perhaps have any outstanding writes, or has
-                if (nfsi->npages == 0 || new_isize > cur_isize) {
+                         * the file grown beyond our last write? */
-                        i_size_write(inode, new_isize);
+                        if (nfsi->npages == 0 || new_isize > cur_isize) {
-                        invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+                                i_size_write(inode, new_isize);
+                                invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+                        }
+                        dprintk("NFS: isize change on server for file %s/%ld\n",
+                                        inode->i_sb->s_id, inode->i_ino);
                }
-                dprintk("NFS: isize change on server for file %s/%ld\n",
-                                inode->i_sb->s_id, inode->i_ino);
        }
-        memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
+        if (fattr->valid & NFS_ATTR_FATTR_ATIME)
-        memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
+                memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
-        memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
-        nfsi->change_attr = fattr->change_attr;
-        if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
-            inode->i_uid != fattr->uid ||
-            inode->i_gid != fattr->gid)
-                invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
-        if (inode->i_nlink != fattr->nlink)
+        if (fattr->valid & NFS_ATTR_FATTR_MODE) {
-                invalid |= NFS_INO_INVALID_ATTR;
+                if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) {
+                        invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+                        inode->i_mode = fattr->mode;
+                }
+        }
+        if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
+                if (inode->i_uid != fattr->uid) {
+                        invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+                        inode->i_uid = fattr->uid;
+                }
+        }
+        if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
+                if (inode->i_gid != fattr->gid) {
+                        invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+                        inode->i_gid = fattr->gid;
+                }
+        }
-        inode->i_mode = fattr->mode;
+        if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
-        inode->i_nlink = fattr->nlink;
+                if (inode->i_nlink != fattr->nlink) {
-        inode->i_uid = fattr->uid;
+                        invalid |= NFS_INO_INVALID_ATTR;
-        inode->i_gid = fattr->gid;
+                        if (S_ISDIR(inode->i_mode))
+                                invalid |= NFS_INO_INVALID_DATA;
+                        inode->i_nlink = fattr->nlink;
+                }
+        }
-        if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) {
+        if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
                /*
                 * report the blocks in 512byte units
                 */
                inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
-        } else {
-                inode->i_blocks = fattr->du.nfs2.blocks;
        }
+        if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
+                inode->i_blocks = fattr->du.nfs2.blocks;
        /* Update attrtimeo value if we're out of the unstable period */
        if (invalid & NFS_INO_INVALID_ATTR) {
@@ -1274,7 +1374,6 @@ static void init_once(void *foo)
        INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
        INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
        INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
-        nfsi->ncommit = 0;
        nfsi->npages = 0;
        atomic_set(&nfsi->silly_count, 1);
        INIT_HLIST_HEAD(&nfsi->silly_list);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 340ede8f608f..2041f68ff1cc 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -152,6 +152,9 @@ extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus);
 extern struct rpc_procinfo nfs4_procedures[];
 #endif
+/* proc.c */
+void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
 /* dir.c */
 extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
@@ -165,6 +168,7 @@ extern void nfs_clear_inode(struct inode *);
 extern void nfs4_clear_inode(struct inode *);
 #endif
 void nfs_zap_acl_cache(struct inode *inode);
+extern int nfs_wait_bit_killable(void *word);
 /* super.c */
 void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 28bab67d1519..c862c9340f9a 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -120,8 +120,8 @@ xdr_decode_time(__be32 *p, struct timespec *timep)
 static __be32 *
 xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
 {
-        u32 rdev;
+        u32 rdev, type;
-        fattr->type = (enum nfs_ftype) ntohl(*p++);
+        type = ntohl(*p++);
        fattr->mode = ntohl(*p++);
        fattr->nlink = ntohl(*p++);
        fattr->uid = ntohl(*p++);
@@ -136,10 +136,9 @@ xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
        p = xdr_decode_time(p, &fattr->atime);
        p = xdr_decode_time(p, &fattr->mtime);
        p = xdr_decode_time(p, &fattr->ctime);
-        fattr->valid |= NFS_ATTR_FATTR;
+        fattr->valid |= NFS_ATTR_FATTR_V2;
        fattr->rdev = new_decode_dev(rdev);
-        if (fattr->type == NFCHR && rdev == NFS2_FIFO_DEV) {
+        if (type == NFCHR && rdev == NFS2_FIFO_DEV) {
-                fattr->type = NFFIFO;
                fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO;
                fattr->rdev = 0;
        }
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index c55be7a7679e..b82fe6847f14 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -834,4 +834,5 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
        .commit_done    = nfs3_commit_done,
        .lock           = nfs3_proc_lock,
        .clear_acl_cache = nfs3_forget_cached_acls,
+        .close_context  = nfs_close_context,
 };
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 6cdeacffde46..e6a1932c7110 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -91,19 +91,15 @@
 /*
 * Map file type to S_IFMT bits
 */
-static struct {
+static const umode_t nfs_type2fmt[] = {
-        unsigned int    mode;
+        [NF3BAD] = 0,
-        unsigned int    nfs2type;
+        [NF3REG] = S_IFREG,
-} nfs_type2fmt[] = {
+        [NF3DIR] = S_IFDIR,
-      { 0,              NFNON   },
+        [NF3BLK] = S_IFBLK,
-      { S_IFREG,        NFREG   },
+        [NF3CHR] = S_IFCHR,
-      { S_IFDIR,        NFDIR   },
+        [NF3LNK] = S_IFLNK,
-      { S_IFBLK,        NFBLK   },
+        [NF3SOCK] = S_IFSOCK,
-      { S_IFCHR,        NFCHR   },
+        [NF3FIFO] = S_IFIFO,
-      { S_IFLNK,        NFLNK   },
-      { S_IFSOCK,       NFSOCK  },
-      { S_IFIFO,        NFFIFO  },
-      { 0,              NFBAD   }
 };
 /*
@@ -148,13 +144,12 @@ static __be32 *
 xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
 {
        unsigned int    type, major, minor;
-        int             fmode;
+        umode_t         fmode;
        type = ntohl(*p++);
-        if (type >= NF3BAD)
+        if (type > NF3FIFO)
-                type = NF3BAD;
+                type = NF3NON;
-        fmode = nfs_type2fmt[type].mode;
+        fmode = nfs_type2fmt[type];
-        fattr->type = nfs_type2fmt[type].nfs2type;
        fattr->mode = (ntohl(*p++) & ~S_IFMT) | fmode;
        fattr->nlink = ntohl(*p++);
        fattr->uid = ntohl(*p++);
@@ -177,7 +172,7 @@ xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
        p = xdr_decode_time3(p, &fattr->ctime);
        /* Update the mode bits */
-        fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3);
+        fattr->valid |= NFS_ATTR_FATTR_V3;
        return p;
 }
@@ -233,7 +228,9 @@ xdr_decode_wcc_attr(__be32 *p, struct nfs_fattr *fattr)
        p = xdr_decode_hyper(p, &fattr->pre_size);
        p = xdr_decode_time3(p, &fattr->pre_mtime);
        p = xdr_decode_time3(p, &fattr->pre_ctime);
-        fattr->valid |= NFS_ATTR_WCC;
+        fattr->valid |= NFS_ATTR_FATTR_PRESIZE
+                | NFS_ATTR_FATTR_PREMTIME
+                | NFS_ATTR_FATTR_PRECTIME;
        return p;
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8dde84b988d9..97bacccff579 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -193,14 +193,6 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
        kunmap_atomic(start, KM_USER0);
 }
-static int nfs4_wait_bit_killable(void *word)
-{
-        if (fatal_signal_pending(current))
-                return -ERESTARTSYS;
-        schedule();
-        return 0;
-}
 static int nfs4_wait_clnt_recover(struct nfs_client *clp)
 {
        int res;
@@ -208,7 +200,7 @@ static int nfs4_wait_clnt_recover(struct nfs_client *clp)
        might_sleep();
        res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
-                        nfs4_wait_bit_killable, TASK_KILLABLE);
+                        nfs_wait_bit_killable, TASK_KILLABLE);
        return res;
 }
@@ -1439,7 +1431,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
        if (calldata->arg.seqid == NULL)
                goto out_free_calldata;
        calldata->arg.fmode = 0;
-        calldata->arg.bitmask = server->attr_bitmask;
+        calldata->arg.bitmask = server->cache_consistency_bitmask;
        calldata->res.fattr = &calldata->fattr;
        calldata->res.seqid = calldata->arg.seqid;
        calldata->res.server = server;
@@ -1580,6 +1572,15 @@ out_drop:
        return 0;
 }
+void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
+{
+        if (ctx->state == NULL)
+                return;
+        if (is_sync)
+                nfs4_close_sync(&ctx->path, ctx->state, ctx->mode);
+        else
+                nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
+}
 static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
 {
@@ -1600,6 +1601,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
                        server->caps |= NFS_CAP_HARDLINKS;
                if (res.has_symlinks != 0)
                        server->caps |= NFS_CAP_SYMLINKS;
+                memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask));
+                server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
+                server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
                server->acl_bitmask = res.acl_bitmask;
        }
        return status;
@@ -2079,7 +2083,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
        struct nfs_removeargs *args = msg->rpc_argp;
        struct nfs_removeres *res = msg->rpc_resp;
-        args->bitmask = server->attr_bitmask;
+        args->bitmask = server->cache_consistency_bitmask;
        res->server = server;
        msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
 }
@@ -2323,7 +2327,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
                .pages = &page,
                .pgbase = 0,
                .count = count,
-                .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask,
+                .bitmask = NFS_SERVER(dentry->d_inode)->cache_consistency_bitmask,
        };
        struct nfs4_readdir_res res;
        struct rpc_message msg = {
@@ -2552,7 +2556,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
 {
        struct nfs_server *server = NFS_SERVER(data->inode);
-        data->args.bitmask = server->attr_bitmask;
+        data->args.bitmask = server->cache_consistency_bitmask;
        data->res.server = server;
        data->timestamp   = jiffies;
@@ -2575,7 +2579,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa
 {
        struct nfs_server *server = NFS_SERVER(data->inode);
        
-        data->args.bitmask = server->attr_bitmask;
+        data->args.bitmask = server->cache_consistency_bitmask;
        data->res.server = server;
        msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
 }
@@ -3678,6 +3682,19 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
        return len;
 }
+static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr)
+{
+        if (!((fattr->valid & NFS_ATTR_FATTR_FILEID) &&
+                (fattr->valid & NFS_ATTR_FATTR_FSID) &&
+                (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)))
+                return;
+        fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
+                NFS_ATTR_FATTR_NLINK;
+        fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+        fattr->nlink = 2;
+}
 int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
                struct nfs4_fs_locations *fs_locations, struct page *page)
 {
@@ -3704,6 +3721,7 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
        fs_locations->server = server;
        fs_locations->nlocations = 0;
        status = rpc_call_sync(server->client, &msg, 0);
+        nfs_fixup_referral_attributes(&fs_locations->fattr);
        dprintk("%s: returned status = %d\n", __func__, status);
        return status;
 }
@@ -3767,6 +3785,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
        .commit_done    = nfs4_commit_done,
        .lock           = nfs4_proc_lock,
        .clear_acl_cache = nfs4_zap_acl_attr,
+        .close_context  = nfs4_close_context,
 };
 /*
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 2022fe47966f..0298e909559f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -62,8 +62,14 @@ static LIST_HEAD(nfs4_clientid_list);
 static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
 {
-        int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK,
+        unsigned short port;
-                        nfs_callback_tcpport, cred);
+        int status;
+        port = nfs_callback_tcpport;
+        if (clp->cl_addr.ss_family == AF_INET6)
+                port = nfs_callback_tcpport6;
+        status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred);
        if (status == 0)
                status = nfs4_proc_setclientid_confirm(clp, cred);
        if (status == 0)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index d1e4c8f8a0a9..1690f0e44b91 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -522,20 +522,17 @@ static int nfs4_stat_to_errno(int);
                                 decode_lookup_maxsz + \
                                 decode_fs_locations_maxsz)
-static struct {
+static const umode_t nfs_type2fmt[] = {
-        unsigned int    mode;
+        [NF4BAD] = 0,
-        unsigned int    nfs2type;
+        [NF4REG] = S_IFREG,
-} nfs_type2fmt[] = {
+        [NF4DIR] = S_IFDIR,
-        { 0,            NFNON        },
+        [NF4BLK] = S_IFBLK,
-        { S_IFREG,      NFREG        },
+        [NF4CHR] = S_IFCHR,
-        { S_IFDIR,      NFDIR        },
+        [NF4LNK] = S_IFLNK,
-        { S_IFBLK,      NFBLK        },
+        [NF4SOCK] = S_IFSOCK,
-        { S_IFCHR,      NFCHR        },
+        [NF4FIFO] = S_IFIFO,
-        { S_IFLNK,      NFLNK        },
+        [NF4ATTRDIR] = 0,
-        { S_IFSOCK,     NFSOCK       },
+        [NF4NAMEDATTR] = 0,
-        { S_IFIFO,      NFFIFO       },
-        { 0,            NFNON        },
-        { 0,            NFNON        },
 };
 struct compound_hdr {
@@ -2160,6 +2157,7 @@ static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint3
 static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *type)
 {
        __be32 *p;
+        int ret = 0;
        *type = 0;
        if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U)))
@@ -2172,14 +2170,16 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
                        return -EIO;
                }
                bitmap[0] &= ~FATTR4_WORD0_TYPE;
+                ret = NFS_ATTR_FATTR_TYPE;
        }
-        dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type].nfs2type);
+        dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]);
-        return 0;
+        return ret;
 }
 static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
 {
        __be32 *p;
+        int ret = 0;
        *change = 0;
        if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U)))
@@ -2188,15 +2188,17 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
                READ_BUF(8);
                READ64(*change);
                bitmap[0] &= ~FATTR4_WORD0_CHANGE;
+                ret = NFS_ATTR_FATTR_CHANGE;
        }
        dprintk("%s: change attribute=%Lu\n", __func__,
                        (unsigned long long)*change);
-        return 0;
+        return ret;
 }
 static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size)
 {
        __be32 *p;
+        int ret = 0;
        *size = 0;
        if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U)))
@@ -2205,9 +2207,10 @@ static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *
                READ_BUF(8);
                READ64(*size);
                bitmap[0] &= ~FATTR4_WORD0_SIZE;
+                ret = NFS_ATTR_FATTR_SIZE;
        }
        dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size);
-        return 0;
+        return ret;
 }
 static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2245,6 +2248,7 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap,
 static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid)
 {
        __be32 *p;
+        int ret = 0;
        fsid->major = 0;
        fsid->minor = 0;
@@ -2255,11 +2259,12 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
                READ64(fsid->major);
                READ64(fsid->minor);
                bitmap[0] &= ~FATTR4_WORD0_FSID;
+                ret = NFS_ATTR_FATTR_FSID;
        }
        dprintk("%s: fsid=(0x%Lx/0x%Lx)\n", __func__,
                        (unsigned long long)fsid->major,
                        (unsigned long long)fsid->minor);
-        return 0;
+        return ret;
 }
 static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2297,6 +2302,7 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint
 static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
 {
        __be32 *p;
+        int ret = 0;
        *fileid = 0;
        if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U)))
@@ -2305,14 +2311,16 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
                READ_BUF(8);
                READ64(*fileid);
                bitmap[0] &= ~FATTR4_WORD0_FILEID;
+                ret = NFS_ATTR_FATTR_FILEID;
        }
        dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
-        return 0;
+        return ret;
 }
 static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
 {
        __be32 *p;
+        int ret = 0;
        *fileid = 0;
        if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U)))
@@ -2321,9 +2329,10 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma
                READ_BUF(8);
                READ64(*fileid);
                bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
+                ret = NFS_ATTR_FATTR_FILEID;
        }
        dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
-        return 0;
+        return ret;
 }
 static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2479,6 +2488,8 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
                if (res->nlocations < NFS4_FS_LOCATIONS_MAXENTRIES)
                        res->nlocations++;
        }
+        if (res->nlocations != 0)
+                status = NFS_ATTR_FATTR_V4_REFERRAL;
 out:
        dprintk("%s: fs_locations done, error = %d\n", __func__, status);
        return status;
@@ -2580,26 +2591,30 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
        return status;
 }
-static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *mode)
+static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode)
 {
+        uint32_t tmp;
        __be32 *p;
+        int ret = 0;
        *mode = 0;
        if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U)))
                return -EIO;
        if (likely(bitmap[1] & FATTR4_WORD1_MODE)) {
                READ_BUF(4);
-                READ32(*mode);
+                READ32(tmp);
-                *mode &= ~S_IFMT;
+                *mode = tmp & ~S_IFMT;
                bitmap[1] &= ~FATTR4_WORD1_MODE;
+                ret = NFS_ATTR_FATTR_MODE;
        }
        dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode);
-        return 0;
+        return ret;
 }
 static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink)
 {
        __be32 *p;
+        int ret = 0;
        *nlink = 1;
        if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U)))
@@ -2608,15 +2623,17 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
                READ_BUF(4);
                READ32(*nlink);
                bitmap[1] &= ~FATTR4_WORD1_NUMLINKS;
+                ret = NFS_ATTR_FATTR_NLINK;
        }
        dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink);
-        return 0;
+        return ret;
 }
 static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *uid)
 {
        uint32_t len;
        __be32 *p;
+        int ret = 0;
        *uid = -2;
        if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
@@ -2626,7 +2643,9 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
                READ32(len);
                READ_BUF(len);
                if (len < XDR_MAX_NETOBJ) {
-                        if (nfs_map_name_to_uid(clp, (char *)p, len, uid) != 0)
+                        if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0)
+                                ret = NFS_ATTR_FATTR_OWNER;
+                        else
                                dprintk("%s: nfs_map_name_to_uid failed!\n",
                                                __func__);
                } else
@@ -2635,13 +2654,14 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
                bitmap[1] &= ~FATTR4_WORD1_OWNER;
        }
        dprintk("%s: uid=%d\n", __func__, (int)*uid);
-        return 0;
+        return ret;
 }
 static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *gid)
 {
        uint32_t len;
        __be32 *p;
+        int ret = 0;
        *gid = -2;
        if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
@@ -2651,7 +2671,9 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
                READ32(len);
                READ_BUF(len);
                if (len < XDR_MAX_NETOBJ) {
-                        if (nfs_map_group_to_gid(clp, (char *)p, len, gid) != 0)
+                        if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0)
+                                ret = NFS_ATTR_FATTR_GROUP;
+                        else
                                dprintk("%s: nfs_map_group_to_gid failed!\n",
                                                __func__);
                } else
@@ -2660,13 +2682,14 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
                bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
        }
        dprintk("%s: gid=%d\n", __func__, (int)*gid);
-        return 0;
+        return ret;
 }
 static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev)
 {
        uint32_t major = 0, minor = 0;
        __be32 *p;
+        int ret = 0;
        *rdev = MKDEV(0,0);
        if (unlikely(bitmap[1] & (FATTR4_WORD1_RAWDEV - 1U)))
@@ -2681,9 +2704,10 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
                if (MAJOR(tmp) == major && MINOR(tmp) == minor)
                        *rdev = tmp;
                bitmap[1] &= ~ FATTR4_WORD1_RAWDEV;
+                ret = NFS_ATTR_FATTR_RDEV;
        }
        dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor);
-        return 0;
+        return ret;
 }
 static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2740,6 +2764,7 @@ static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
 static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used)
 {
        __be32 *p;
+        int ret = 0;
        *used = 0;
        if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U)))
@@ -2748,10 +2773,11 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint
                READ_BUF(8);
                READ64(*used);
                bitmap[1] &= ~FATTR4_WORD1_SPACE_USED;
+                ret = NFS_ATTR_FATTR_SPACE_USED;
        }
        dprintk("%s: space used=%Lu\n", __func__,
                        (unsigned long long)*used);
-        return 0;
+        return ret;
 }
 static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
@@ -2778,6 +2804,8 @@ static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, str
                return -EIO;
        if (likely(bitmap[1] & FATTR4_WORD1_TIME_ACCESS)) {
                status = decode_attr_time(xdr, time);
+                if (status == 0)
+                        status = NFS_ATTR_FATTR_ATIME;
                bitmap[1] &= ~FATTR4_WORD1_TIME_ACCESS;
        }
        dprintk("%s: atime=%ld\n", __func__, (long)time->tv_sec);
@@ -2794,6 +2822,8 @@ static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, s
                return -EIO;
        if (likely(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) {
                status = decode_attr_time(xdr, time);
+                if (status == 0)
+                        status = NFS_ATTR_FATTR_CTIME;
                bitmap[1] &= ~FATTR4_WORD1_TIME_METADATA;
        }
        dprintk("%s: ctime=%ld\n", __func__, (long)time->tv_sec);
@@ -2810,6 +2840,8 @@ static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, str
                return -EIO;
        if (likely(bitmap[1] & FATTR4_WORD1_TIME_MODIFY)) {
                status = decode_attr_time(xdr, time);
+                if (status == 0)
+                        status = NFS_ATTR_FATTR_MTIME;
                bitmap[1] &= ~FATTR4_WORD1_TIME_MODIFY;
        }
        dprintk("%s: mtime=%ld\n", __func__, (long)time->tv_sec);
@@ -2994,63 +3026,116 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
        uint32_t attrlen,
                 bitmap[2] = {0},
                 type;
-        int status, fmode = 0;
+        int status;
+        umode_t fmode = 0;
        uint64_t fileid;
-        if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+        status = decode_op_hdr(xdr, OP_GETATTR);
-                goto xdr_error;
+        if (status < 0)
-        if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
                goto xdr_error;
-        fattr->bitmap[0] = bitmap[0];
+        status = decode_attr_bitmap(xdr, bitmap);
-        fattr->bitmap[1] = bitmap[1];
+        if (status < 0)
+                goto xdr_error;
-        if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+        status = decode_attr_length(xdr, &attrlen, &savep);
+        if (status < 0)
                goto xdr_error;
-        if ((status = decode_attr_type(xdr, bitmap, &type)) != 0)
+        status = decode_attr_type(xdr, bitmap, &type);
+        if (status < 0)
                goto xdr_error;
-        fattr->type = nfs_type2fmt[type].nfs2type;
+        fattr->mode = 0;
-        fmode = nfs_type2fmt[type].mode;
+        if (status != 0) {
+                fattr->mode |= nfs_type2fmt[type];
+                fattr->valid |= status;
+        }
-        if ((status = decode_attr_change(xdr, bitmap, &fattr->change_attr)) != 0)
+        status = decode_attr_change(xdr, bitmap, &fattr->change_attr);
+        if (status < 0)
                goto xdr_error;
-        if ((status = decode_attr_size(xdr, bitmap, &fattr->size)) != 0)
+        fattr->valid |= status;
+        status = decode_attr_size(xdr, bitmap, &fattr->size);
+        if (status < 0)
                goto xdr_error;
-        if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid)) != 0)
+        fattr->valid |= status;
+        status = decode_attr_fsid(xdr, bitmap, &fattr->fsid);
+        if (status < 0)
                goto xdr_error;
-        if ((status = decode_attr_fileid(xdr, bitmap, &fattr->fileid)) != 0)
+        fattr->valid |= status;
+        status = decode_attr_fileid(xdr, bitmap, &fattr->fileid);
+        if (status < 0)
                goto xdr_error;
-        if ((status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr,
+        fattr->valid |= status;
+        status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr,
                                                struct nfs4_fs_locations,
-                                                fattr))) != 0)
+                                                fattr));
+        if (status < 0)
                goto xdr_error;
-        if ((status = decode_attr_mode(xdr, bitmap, &fattr->mode)) != 0)
+        fattr->valid |= status;
+        status = decode_attr_mode(xdr, bitmap, &fmode);
+        if (status < 0)
                goto xdr_error;
-        fattr->mode |= fmode;
+        if (status != 0) {
-        if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0)
+                fattr->mode |= fmode;
+                fattr->valid |= status;
+        }
+        status = decode_attr_nlink(xdr, bitmap, &fattr->nlink);
+        if (status < 0)
                goto xdr_error;
-        if ((status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid)) != 0)
+        fattr->valid |= status;
+        status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid);
+        if (status < 0)
                goto xdr_error;
-        if ((status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid)) != 0)
+        fattr->valid |= status;
+        status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid);
+        if (status < 0)
                goto xdr_error;
-        if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0)
+        fattr->valid |= status;
+        status = decode_attr_rdev(xdr, bitmap, &fattr->rdev);
+        if (status < 0)
                goto xdr_error;
-        if ((status = decode_attr_space_used(xdr, bitmap, &fattr->du.nfs3.used)) != 0)
+        fattr->valid |= status;
+        status = decode_attr_space_used(xdr, bitmap, &fattr->du.nfs3.used);
+        if (status < 0)
                goto xdr_error;
-        if ((status = decode_attr_time_access(xdr, bitmap, &fattr->atime)) != 0)
+        fattr->valid |= status;
+        status = decode_attr_time_access(xdr, bitmap, &fattr->atime);
+        if (status < 0)
                goto xdr_error;
-        if ((status = decode_attr_time_metadata(xdr, bitmap, &fattr->ctime)) != 0)
+        fattr->valid |= status;
+        status = decode_attr_time_metadata(xdr, bitmap, &fattr->ctime);
+        if (status < 0)
                goto xdr_error;
-        if ((status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime)) != 0)
+        fattr->valid |= status;
+        status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime);
+        if (status < 0)
                goto xdr_error;
-        if ((status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid)) != 0)
+        fattr->valid |= status;
+        status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid);
+        if (status < 0)
                goto xdr_error;
-        if (fattr->fileid == 0 && fileid != 0)
+        if (status != 0 && !(fattr->valid & status)) {
                fattr->fileid = fileid;
-        if ((status = verify_attr_len(xdr, savep, attrlen)) == 0)
+                fattr->valid |= status;
-                fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4;
+        }
+        status = verify_attr_len(xdr, savep, attrlen);
 xdr_error:
        dprintk("%s: xdr returned %d\n", __func__, -status);
        return status;
@@ -4078,9 +4163,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_se
        status = decode_setattr(&xdr, res);
        if (status)
                goto out;
-        status = decode_getfattr(&xdr, res->fattr, res->server);
+        decode_getfattr(&xdr, res->fattr, res->server);
-        if (status == NFS4ERR_DELAY)
-                status = 0;
 out:
        return status;
 }
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7f079209d70a..e2975939126a 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -176,17 +176,6 @@ void nfs_release_request(struct nfs_page *req)
        kref_put(&req->wb_kref, nfs_free_request);
 }
-static int nfs_wait_bit_killable(void *word)
-{
-        int ret = 0;
-        if (fatal_signal_pending(current))
-                ret = -ERESTARTSYS;
-        else
-                schedule();
-        return ret;
-}
 /**
 * nfs_wait_on_request - Wait for a request to complete.
 * @req: request to wait upon.
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 193465210d7c..7be72d90d49d 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -663,4 +663,5 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
        .commit_setup   = nfs_proc_commit_setup,
        .lock           = nfs_proc_lock,
        .lock_check_bounds = nfs_lock_check_bounds,
+        .close_context  = nfs_close_context,
 };
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index d6686f4786dc..0942fcbbad3c 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1018,6 +1018,7 @@ static int nfs_parse_mount_options(char *raw,
                case Opt_rdma:
                        mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */
                        mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
+                        xprt_load_transport(p);
                        break;
                case Opt_acl:
                        mnt->flags &= ~NFS_MOUNT_NOACL;
@@ -1205,12 +1206,14 @@ static int nfs_parse_mount_options(char *raw,
                                /* vector side protocols to TCP */
                                mnt->flags |= NFS_MOUNT_TCP;
                                mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
+                                xprt_load_transport(string);
                                break;
                        default:
                                errors++;
                                dfprintk(MOUNT, "NFS:   unrecognized "
                                                "transport protocol\n");
                        }
+                        kfree(string);
                        break;
                case Opt_mountproto:
                        string = match_strdup(args);
@@ -1218,7 +1221,6 @@ static int nfs_parse_mount_options(char *raw,
                                goto out_nomem;
                        token = match_token(string,
                                            nfs_xprt_protocol_tokens, args);
-                        kfree(string);
                        switch (token) {
                        case Opt_xprt_udp:
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9f9845859fc1..e560a78995a3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -313,19 +313,34 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
        struct inode *inode = mapping->host;
+        unsigned long *bitlock = &NFS_I(inode)->flags;
        struct nfs_pageio_descriptor pgio;
        int err;
+        /* Stop dirtying of new pages while we sync */
+        err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING,
+                        nfs_wait_bit_killable, TASK_KILLABLE);
+        if (err)
+                goto out_err;
        nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
        nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
        err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
        nfs_pageio_complete(&pgio);
+        clear_bit_unlock(NFS_INO_FLUSHING, bitlock);
+        smp_mb__after_clear_bit();
+        wake_up_bit(bitlock, NFS_INO_FLUSHING);
        if (err < 0)
-                return err;
+                goto out_err;
-        if (pgio.pg_error < 0)
+        err = pgio.pg_error;
-                return pgio.pg_error;
+        if (err < 0)
+                goto out_err;
        return 0;
+out_err:
+        return err;
 }
 /*
@@ -404,7 +419,6 @@ nfs_mark_request_commit(struct nfs_page *req)
        struct nfs_inode *nfsi = NFS_I(inode);
        spin_lock(&inode->i_lock);
-        nfsi->ncommit++;
        set_bit(PG_CLEAN, &(req)->wb_flags);
        radix_tree_tag_set(&nfsi->nfs_page_tree,
                        req->wb_index,
@@ -524,6 +538,12 @@ static void nfs_cancel_commit_list(struct list_head *head)
 }
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static int
+nfs_need_commit(struct nfs_inode *nfsi)
+{
+        return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT);
+}
 /*
 * nfs_scan_commit - Scan an inode for commit requests
 * @inode: NFS inode to scan
@@ -538,16 +558,18 @@ static int
 nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
-        int res = 0;
-        if (nfsi->ncommit != 0) {
+        if (!nfs_need_commit(nfsi))
-                res = nfs_scan_list(nfsi, dst, idx_start, npages,
+                return 0;
-                                NFS_PAGE_TAG_COMMIT);
-                nfsi->ncommit -= res;
+        return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
-        }
-        return res;
 }
 #else
+static inline int nfs_need_commit(struct nfs_inode *nfsi)
+{
+        return 0;
+}
 static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
 {
        return 0;
@@ -820,7 +842,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
        data->args.stable  = NFS_UNSTABLE;
        if (how & FLUSH_STABLE) {
                data->args.stable = NFS_DATA_SYNC;
-                if (!NFS_I(inode)->ncommit)
+                if (!nfs_need_commit(NFS_I(inode)))
                        data->args.stable = NFS_FILE_SYNC;
        }
@@ -1425,18 +1447,13 @@ static int nfs_write_mapping(struct address_space *mapping, int how)
 {
        struct writeback_control wbc = {
                .bdi = mapping->backing_dev_info,
-                .sync_mode = WB_SYNC_NONE,
+                .sync_mode = WB_SYNC_ALL,
                .nr_to_write = LONG_MAX,
                .range_start = 0,
                .range_end = LLONG_MAX,
                .for_writepages = 1,
        };
-        int ret;
-        ret = __nfs_write_mapping(mapping, &wbc, how);
-        if (ret < 0)
-                return ret;
-        wbc.sync_mode = WB_SYNC_ALL;
        return __nfs_write_mapping(mapping, &wbc, how);
 }
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3d93b2064ce5..a4ed8644d69c 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -938,10 +938,12 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size)
                char transport[16];
                int port;
                if (sscanf(buf, "%15s %4d", transport, &port) == 2) {
+                        if (port < 1 || port > 65535)
+                                return -EINVAL;
                        err = nfsd_create_serv();
                        if (!err) {
                                err = svc_create_xprt(nfsd_serv,
-                                                      transport, port,
+                                                      transport, PF_INET, port,
                                                      SVC_SOCK_ANONYMOUS);
                                if (err == -ENOENT)
                                        /* Give a reasonable perror msg for
@@ -960,7 +962,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size)
                char transport[16];
                int port;
                if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
-                        if (port == 0)
+                        if (port < 1 || port > 65535)
                                return -EINVAL;
                        if (nfsd_serv) {
                                xprt = svc_find_xprt(nfsd_serv, transport,
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 07e4f5d7baa8..bc3567bab8c4 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -229,7 +229,6 @@ int nfsd_create_serv(void)
        atomic_set(&nfsd_busy, 0);
        nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
-                                      AF_INET,
                                      nfsd_last_thread, nfsd, THIS_MODULE);
        if (nfsd_serv == NULL)
                err = -ENOMEM;
@@ -244,7 +243,7 @@ static int nfsd_init_socks(int port)
        if (!list_empty(&nfsd_serv->sv_permsocks))
                return 0;
-        error = svc_create_xprt(nfsd_serv, "udp", port,
+        error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port,
                                        SVC_SOCK_DEFAULTS);
        if (error < 0)
                return error;
@@ -253,7 +252,7 @@ static int nfsd_init_socks(int port)
        if (error < 0)
                return error;
-        error = svc_create_xprt(nfsd_serv, "tcp", port,
+        error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port,
                                        SVC_SOCK_DEFAULTS);
        if (error < 0)
                return error;
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 34314b33dbd4..5a9e34475e37 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -32,8 +32,8 @@
 /**
 * The little endian Unicode string $I30 as a global constant.
 */
-ntfschar I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'),
+ntfschar I30[5] = { cpu_to_le16('$'), cpu_to_le16('I'),
-                const_cpu_to_le16('3'), const_cpu_to_le16('0'), 0 };
+                cpu_to_le16('3'),       cpu_to_le16('0'), 0 };
 /**
 * ntfs_lookup_inode_by_name - find an inode in a directory given its name
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 86bef156cf0a..82c5085559c6 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1975,8 +1975,7 @@ int ntfs_read_inode_mount(struct inode *vi)
                                goto em_put_err_out;
                        next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry +
                                        le16_to_cpu(al_entry->length));
-                        if (le32_to_cpu(al_entry->type) >
+                        if (le32_to_cpu(al_entry->type) > le32_to_cpu(AT_DATA))
-                                        const_le32_to_cpu(AT_DATA))
                                goto em_put_err_out;
                        if (AT_DATA != al_entry->type)
                                continue;
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 1e383328eceb..50931b1ce4b9 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -31,19 +31,8 @@
 #include "types.h"
-/*
- * Constant endianness conversion defines.
- */
-#define const_le16_to_cpu(x)    __constant_le16_to_cpu(x)
-#define const_le32_to_cpu(x)    __constant_le32_to_cpu(x)
-#define const_le64_to_cpu(x)    __constant_le64_to_cpu(x)
-#define const_cpu_to_le16(x)    __constant_cpu_to_le16(x)
-#define const_cpu_to_le32(x)    __constant_cpu_to_le32(x)
-#define const_cpu_to_le64(x)    __constant_cpu_to_le64(x)
 /* The NTFS oem_id "NTFS    " */
-#define magicNTFS       const_cpu_to_le64(0x202020205346544eULL)
+#define magicNTFS       cpu_to_le64(0x202020205346544eULL)
 /*
 * Location of bootsector on partition:
@@ -114,25 +103,25 @@ typedef struct {
 */
 enum {
        /* Found in $MFT/$DATA. */
-        magic_FILE = const_cpu_to_le32(0x454c4946), /* Mft entry. */
+        magic_FILE = cpu_to_le32(0x454c4946), /* Mft entry. */
-        magic_INDX = const_cpu_to_le32(0x58444e49), /* Index buffer. */
+        magic_INDX = cpu_to_le32(0x58444e49), /* Index buffer. */
-        magic_HOLE = const_cpu_to_le32(0x454c4f48), /* ? (NTFS 3.0+?) */
+        magic_HOLE = cpu_to_le32(0x454c4f48), /* ? (NTFS 3.0+?) */
        /* Found in $LogFile/$DATA. */
-        magic_RSTR = const_cpu_to_le32(0x52545352), /* Restart page. */
+        magic_RSTR = cpu_to_le32(0x52545352), /* Restart page. */
-        magic_RCRD = const_cpu_to_le32(0x44524352), /* Log record page. */
+        magic_RCRD = cpu_to_le32(0x44524352), /* Log record page. */
        /* Found in $LogFile/$DATA.  (May be found in $MFT/$DATA, also?) */
-        magic_CHKD = const_cpu_to_le32(0x444b4843), /* Modified by chkdsk. */
+        magic_CHKD = cpu_to_le32(0x444b4843), /* Modified by chkdsk. */
        /* Found in all ntfs record containing records. */
-        magic_BAAD = const_cpu_to_le32(0x44414142), /* Failed multi sector
+        magic_BAAD = cpu_to_le32(0x44414142), /* Failed multi sector
                                                       transfer was detected. */
        /*
         * Found in $LogFile/$DATA when a page is full of 0xff bytes and is
         * thus not initialized.  Page must be initialized before using it.
         */
-        magic_empty = const_cpu_to_le32(0xffffffff) /* Record is empty. */
+        magic_empty = cpu_to_le32(0xffffffff) /* Record is empty. */
 };
 typedef le32 NTFS_RECORD_TYPE;
@@ -258,8 +247,8 @@ typedef enum {
 * information about the mft record in which they are present.
 */
 enum {
-        MFT_RECORD_IN_USE       = const_cpu_to_le16(0x0001),
+        MFT_RECORD_IN_USE       = cpu_to_le16(0x0001),
-        MFT_RECORD_IS_DIRECTORY = const_cpu_to_le16(0x0002),
+        MFT_RECORD_IS_DIRECTORY = cpu_to_le16(0x0002),
 } __attribute__ ((__packed__));
 typedef le16 MFT_RECORD_FLAGS;
@@ -309,7 +298,7 @@ typedef le16 MFT_RECORD_FLAGS;
 * Note: The _LE versions will return a CPU endian formatted value!
 */
 #define MFT_REF_MASK_CPU 0x0000ffffffffffffULL
-#define MFT_REF_MASK_LE const_cpu_to_le64(MFT_REF_MASK_CPU)
+#define MFT_REF_MASK_LE cpu_to_le64(MFT_REF_MASK_CPU)
 typedef u64 MFT_REF;
 typedef le64 leMFT_REF;
@@ -477,25 +466,25 @@ typedef struct {
 * a revealing choice of symbol I do not know what is... (-;
 */
 enum {
-        AT_UNUSED                       = const_cpu_to_le32(         0),
+        AT_UNUSED                       = cpu_to_le32(         0),
-        AT_STANDARD_INFORMATION         = const_cpu_to_le32(      0x10),
+        AT_STANDARD_INFORMATION         = cpu_to_le32(      0x10),
-        AT_ATTRIBUTE_LIST               = const_cpu_to_le32(      0x20),
+        AT_ATTRIBUTE_LIST               = cpu_to_le32(      0x20),
-        AT_FILE_NAME                    = const_cpu_to_le32(      0x30),
+        AT_FILE_NAME                    = cpu_to_le32(      0x30),
-        AT_OBJECT_ID                    = const_cpu_to_le32(      0x40),
+        AT_OBJECT_ID                    = cpu_to_le32(      0x40),
-        AT_SECURITY_DESCRIPTOR          = const_cpu_to_le32(      0x50),
+        AT_SECURITY_DESCRIPTOR          = cpu_to_le32(      0x50),
-        AT_VOLUME_NAME                  = const_cpu_to_le32(      0x60),
+        AT_VOLUME_NAME                  = cpu_to_le32(      0x60),
-        AT_VOLUME_INFORMATION           = const_cpu_to_le32(      0x70),
+        AT_VOLUME_INFORMATION           = cpu_to_le32(      0x70),
-        AT_DATA                         = const_cpu_to_le32(      0x80),
+        AT_DATA                         = cpu_to_le32(      0x80),
-        AT_INDEX_ROOT                   = const_cpu_to_le32(      0x90),
+        AT_INDEX_ROOT                   = cpu_to_le32(      0x90),
-        AT_INDEX_ALLOCATION             = const_cpu_to_le32(      0xa0),
+        AT_INDEX_ALLOCATION             = cpu_to_le32(      0xa0),
-        AT_BITMAP                       = const_cpu_to_le32(      0xb0),
+        AT_BITMAP                       = cpu_to_le32(      0xb0),
-        AT_REPARSE_POINT                = const_cpu_to_le32(      0xc0),
+        AT_REPARSE_POINT                = cpu_to_le32(      0xc0),
-        AT_EA_INFORMATION               = const_cpu_to_le32(      0xd0),
+        AT_EA_INFORMATION               = cpu_to_le32(      0xd0),
-        AT_EA                           = const_cpu_to_le32(      0xe0),
+        AT_EA                           = cpu_to_le32(      0xe0),
-        AT_PROPERTY_SET                 = const_cpu_to_le32(      0xf0),
+        AT_PROPERTY_SET                 = cpu_to_le32(      0xf0),
-        AT_LOGGED_UTILITY_STREAM        = const_cpu_to_le32(     0x100),
+        AT_LOGGED_UTILITY_STREAM        = cpu_to_le32(     0x100),
-        AT_FIRST_USER_DEFINED_ATTRIBUTE = const_cpu_to_le32(    0x1000),
+        AT_FIRST_USER_DEFINED_ATTRIBUTE = cpu_to_le32(    0x1000),
-        AT_END                          = const_cpu_to_le32(0xffffffff)
+        AT_END                          = cpu_to_le32(0xffffffff)
 };
 typedef le32 ATTR_TYPE;
@@ -539,13 +528,13 @@ typedef le32 ATTR_TYPE;
 *      equal then the second le32 values would be compared, etc.
 */
 enum {
-        COLLATION_BINARY                = const_cpu_to_le32(0x00),
+        COLLATION_BINARY                = cpu_to_le32(0x00),
-        COLLATION_FILE_NAME             = const_cpu_to_le32(0x01),
+        COLLATION_FILE_NAME             = cpu_to_le32(0x01),
-        COLLATION_UNICODE_STRING        = const_cpu_to_le32(0x02),
+        COLLATION_UNICODE_STRING        = cpu_to_le32(0x02),
-        COLLATION_NTOFS_ULONG           = const_cpu_to_le32(0x10),
+        COLLATION_NTOFS_ULONG           = cpu_to_le32(0x10),
-        COLLATION_NTOFS_SID             = const_cpu_to_le32(0x11),
+        COLLATION_NTOFS_SID             = cpu_to_le32(0x11),
-        COLLATION_NTOFS_SECURITY_HASH   = const_cpu_to_le32(0x12),
+        COLLATION_NTOFS_SECURITY_HASH   = cpu_to_le32(0x12),
-        COLLATION_NTOFS_ULONGS          = const_cpu_to_le32(0x13),
+        COLLATION_NTOFS_ULONGS          = cpu_to_le32(0x13),
 };
 typedef le32 COLLATION_RULE;
@@ -559,25 +548,25 @@ typedef le32 COLLATION_RULE;
 * NT4.
 */
 enum {
-        ATTR_DEF_INDEXABLE      = const_cpu_to_le32(0x02), /* Attribute can be
+        ATTR_DEF_INDEXABLE      = cpu_to_le32(0x02), /* Attribute can be
                                        indexed. */
-        ATTR_DEF_MULTIPLE       = const_cpu_to_le32(0x04), /* Attribute type
+        ATTR_DEF_MULTIPLE       = cpu_to_le32(0x04), /* Attribute type
                                        can be present multiple times in the
                                        mft records of an inode. */
-        ATTR_DEF_NOT_ZERO       = const_cpu_to_le32(0x08), /* Attribute value
+        ATTR_DEF_NOT_ZERO       = cpu_to_le32(0x08), /* Attribute value
                                        must contain at least one non-zero
                                        byte. */
-        ATTR_DEF_INDEXED_UNIQUE = const_cpu_to_le32(0x10), /* Attribute must be
+        ATTR_DEF_INDEXED_UNIQUE = cpu_to_le32(0x10), /* Attribute must be
                                        indexed and the attribute value must be
                                        unique for the attribute type in all of
                                        the mft records of an inode. */
-        ATTR_DEF_NAMED_UNIQUE   = const_cpu_to_le32(0x20), /* Attribute must be
+        ATTR_DEF_NAMED_UNIQUE   = cpu_to_le32(0x20), /* Attribute must be
                                        named and the name must be unique for
                                        the attribute type in all of the mft
                                        records of an inode. */
-        ATTR_DEF_RESIDENT       = const_cpu_to_le32(0x40), /* Attribute must be
+        ATTR_DEF_RESIDENT       = cpu_to_le32(0x40), /* Attribute must be
                                        resident. */
-        ATTR_DEF_ALWAYS_LOG     = const_cpu_to_le32(0x80), /* Always log
+        ATTR_DEF_ALWAYS_LOG     = cpu_to_le32(0x80), /* Always log
                                        modifications to this attribute,
                                        regardless of whether it is resident or
                                        non-resident.  Without this, only log
@@ -614,12 +603,12 @@ typedef struct {
 * Attribute flags (16-bit).
 */
 enum {
-        ATTR_IS_COMPRESSED    = const_cpu_to_le16(0x0001),
+        ATTR_IS_COMPRESSED    = cpu_to_le16(0x0001),
-        ATTR_COMPRESSION_MASK = const_cpu_to_le16(0x00ff), /* Compression method
+        ATTR_COMPRESSION_MASK = cpu_to_le16(0x00ff), /* Compression method
                                                              mask.  Also, first
                                                              illegal value. */
-        ATTR_IS_ENCRYPTED     = const_cpu_to_le16(0x4000),
+        ATTR_IS_ENCRYPTED     = cpu_to_le16(0x4000),
-        ATTR_IS_SPARSE        = const_cpu_to_le16(0x8000),
+        ATTR_IS_SPARSE        = cpu_to_le16(0x8000),
 } __attribute__ ((__packed__));
 typedef le16 ATTR_FLAGS;
@@ -811,32 +800,32 @@ typedef ATTR_RECORD ATTR_REC;
 * flags appear in all of the above.
 */
 enum {
-        FILE_ATTR_READONLY              = const_cpu_to_le32(0x00000001),
+        FILE_ATTR_READONLY              = cpu_to_le32(0x00000001),
-        FILE_ATTR_HIDDEN                = const_cpu_to_le32(0x00000002),
+        FILE_ATTR_HIDDEN                = cpu_to_le32(0x00000002),
-        FILE_ATTR_SYSTEM                = const_cpu_to_le32(0x00000004),
+        FILE_ATTR_SYSTEM                = cpu_to_le32(0x00000004),
-        /* Old DOS volid. Unused in NT. = const_cpu_to_le32(0x00000008), */
+        /* Old DOS volid. Unused in NT. = cpu_to_le32(0x00000008), */
-        FILE_ATTR_DIRECTORY             = const_cpu_to_le32(0x00000010),
+        FILE_ATTR_DIRECTORY             = cpu_to_le32(0x00000010),
        /* Note, FILE_ATTR_DIRECTORY is not considered valid in NT.  It is
           reserved for the DOS SUBDIRECTORY flag. */
-        FILE_ATTR_ARCHIVE               = const_cpu_to_le32(0x00000020),
+        FILE_ATTR_ARCHIVE               = cpu_to_le32(0x00000020),
-        FILE_ATTR_DEVICE                = const_cpu_to_le32(0x00000040),
+        FILE_ATTR_DEVICE                = cpu_to_le32(0x00000040),
-        FILE_ATTR_NORMAL                = const_cpu_to_le32(0x00000080),
+        FILE_ATTR_NORMAL                = cpu_to_le32(0x00000080),
-        FILE_ATTR_TEMPORARY             = const_cpu_to_le32(0x00000100),
+        FILE_ATTR_TEMPORARY             = cpu_to_le32(0x00000100),
-        FILE_ATTR_SPARSE_FILE           = const_cpu_to_le32(0x00000200),
+        FILE_ATTR_SPARSE_FILE           = cpu_to_le32(0x00000200),
-        FILE_ATTR_REPARSE_POINT         = const_cpu_to_le32(0x00000400),
+        FILE_ATTR_REPARSE_POINT         = cpu_to_le32(0x00000400),
-        FILE_ATTR_COMPRESSED            = const_cpu_to_le32(0x00000800),
+        FILE_ATTR_COMPRESSED            = cpu_to_le32(0x00000800),
-        FILE_ATTR_OFFLINE               = const_cpu_to_le32(0x00001000),
+        FILE_ATTR_OFFLINE               = cpu_to_le32(0x00001000),
-        FILE_ATTR_NOT_CONTENT_INDEXED   = const_cpu_to_le32(0x00002000),
+        FILE_ATTR_NOT_CONTENT_INDEXED   = cpu_to_le32(0x00002000),
-        FILE_ATTR_ENCRYPTED             = const_cpu_to_le32(0x00004000),
+        FILE_ATTR_ENCRYPTED             = cpu_to_le32(0x00004000),
-        FILE_ATTR_VALID_FLAGS           = const_cpu_to_le32(0x00007fb7),
+        FILE_ATTR_VALID_FLAGS           = cpu_to_le32(0x00007fb7),
        /* Note, FILE_ATTR_VALID_FLAGS masks out the old DOS VolId and the
           FILE_ATTR_DEVICE and preserves everything else.  This mask is used
           to obtain all flags that are valid for reading. */
-        FILE_ATTR_VALID_SET_FLAGS       = const_cpu_to_le32(0x000031a7),
+        FILE_ATTR_VALID_SET_FLAGS       = cpu_to_le32(0x000031a7),
        /* Note, FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the
           F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT,
           F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest.  This mask
@@ -846,11 +835,11 @@ enum {
         * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION
         * attribute of an mft record.
         */
-        FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT   = const_cpu_to_le32(0x10000000),
+        FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT   = cpu_to_le32(0x10000000),
        /* Note, this is a copy of the corresponding bit from the mft record,
           telling us whether this is a directory or not, i.e. whether it has
           an index root attribute or not. */
-        FILE_ATTR_DUP_VIEW_INDEX_PRESENT        = const_cpu_to_le32(0x20000000),
+        FILE_ATTR_DUP_VIEW_INDEX_PRESENT        = cpu_to_le32(0x20000000),
        /* Note, this is a copy of the corresponding bit from the mft record,
           telling us whether this file has a view index present (eg. object id
           index, quota index, one of the security indexes or the encrypting
@@ -1446,42 +1435,42 @@ enum {
        /* Specific rights for files and directories are as follows: */
        /* Right to read data from the file. (FILE) */
-        FILE_READ_DATA                  = const_cpu_to_le32(0x00000001),
+        FILE_READ_DATA                  = cpu_to_le32(0x00000001),
        /* Right to list contents of a directory. (DIRECTORY) */
-        FILE_LIST_DIRECTORY             = const_cpu_to_le32(0x00000001),
+        FILE_LIST_DIRECTORY             = cpu_to_le32(0x00000001),
        /* Right to write data to the file. (FILE) */
-        FILE_WRITE_DATA                 = const_cpu_to_le32(0x00000002),
+        FILE_WRITE_DATA                 = cpu_to_le32(0x00000002),
        /* Right to create a file in the directory. (DIRECTORY) */
-        FILE_ADD_FILE                   = const_cpu_to_le32(0x00000002),
+        FILE_ADD_FILE                   = cpu_to_le32(0x00000002),
        /* Right to append data to the file. (FILE) */
-        FILE_APPEND_DATA                = const_cpu_to_le32(0x00000004),
+        FILE_APPEND_DATA                = cpu_to_le32(0x00000004),
        /* Right to create a subdirectory. (DIRECTORY) */
-        FILE_ADD_SUBDIRECTORY           = const_cpu_to_le32(0x00000004),
+        FILE_ADD_SUBDIRECTORY           = cpu_to_le32(0x00000004),
        /* Right to read extended attributes. (FILE/DIRECTORY) */
-        FILE_READ_EA                    = const_cpu_to_le32(0x00000008),
+        FILE_READ_EA                    = cpu_to_le32(0x00000008),
        /* Right to write extended attributes. (FILE/DIRECTORY) */
-        FILE_WRITE_EA                   = const_cpu_to_le32(0x00000010),
+        FILE_WRITE_EA                   = cpu_to_le32(0x00000010),
        /* Right to execute a file. (FILE) */
-        FILE_EXECUTE                    = const_cpu_to_le32(0x00000020),
+        FILE_EXECUTE                    = cpu_to_le32(0x00000020),
        /* Right to traverse the directory. (DIRECTORY) */
-        FILE_TRAVERSE                   = const_cpu_to_le32(0x00000020),
+        FILE_TRAVERSE                   = cpu_to_le32(0x00000020),
        /*
         * Right to delete a directory and all the files it contains (its
         * children), even if the files are read-only. (DIRECTORY)
         */
-        FILE_DELETE_CHILD               = const_cpu_to_le32(0x00000040),
+        FILE_DELETE_CHILD               = cpu_to_le32(0x00000040),
        /* Right to read file attributes. (FILE/DIRECTORY) */
-        FILE_READ_ATTRIBUTES            = const_cpu_to_le32(0x00000080),
+        FILE_READ_ATTRIBUTES            = cpu_to_le32(0x00000080),
        /* Right to change file attributes. (FILE/DIRECTORY) */
-        FILE_WRITE_ATTRIBUTES           = const_cpu_to_le32(0x00000100),
+        FILE_WRITE_ATTRIBUTES           = cpu_to_le32(0x00000100),
        /*
         * The standard rights (bits 16 to 23).  These are independent of the
@@ -1489,27 +1478,27 @@ enum {
         */
        /* Right to delete the object. */
-        DELETE                          = const_cpu_to_le32(0x00010000),
+        DELETE                          = cpu_to_le32(0x00010000),
        /*
         * Right to read the information in the object's security descriptor,
         * not including the information in the SACL, i.e. right to read the
         * security descriptor and owner.
         */
-        READ_CONTROL                    = const_cpu_to_le32(0x00020000),
+        READ_CONTROL                    = cpu_to_le32(0x00020000),
        /* Right to modify the DACL in the object's security descriptor. */
-        WRITE_DAC                       = const_cpu_to_le32(0x00040000),
+        WRITE_DAC                       = cpu_to_le32(0x00040000),
        /* Right to change the owner in the object's security descriptor. */
-        WRITE_OWNER                     = const_cpu_to_le32(0x00080000),
+        WRITE_OWNER                     = cpu_to_le32(0x00080000),
        /*
         * Right to use the object for synchronization.  Enables a process to
         * wait until the object is in the signalled state.  Some object types
         * do not support this access right.
         */
-        SYNCHRONIZE                     = const_cpu_to_le32(0x00100000),
+        SYNCHRONIZE                     = cpu_to_le32(0x00100000),
        /*
         * The following STANDARD_RIGHTS_* are combinations of the above for
@@ -1517,25 +1506,25 @@ enum {
         */
        /* These are currently defined to READ_CONTROL. */
-        STANDARD_RIGHTS_READ            = const_cpu_to_le32(0x00020000),
+        STANDARD_RIGHTS_READ            = cpu_to_le32(0x00020000),
-        STANDARD_RIGHTS_WRITE           = const_cpu_to_le32(0x00020000),
+        STANDARD_RIGHTS_WRITE           = cpu_to_le32(0x00020000),
-        STANDARD_RIGHTS_EXECUTE         = const_cpu_to_le32(0x00020000),
+        STANDARD_RIGHTS_EXECUTE         = cpu_to_le32(0x00020000),
        /* Combines DELETE, READ_CONTROL, WRITE_DAC, and WRITE_OWNER access. */
-        STANDARD_RIGHTS_REQUIRED        = const_cpu_to_le32(0x000f0000),
+        STANDARD_RIGHTS_REQUIRED        = cpu_to_le32(0x000f0000),
        /*
         * Combines DELETE, READ_CONTROL, WRITE_DAC, WRITE_OWNER, and
         * SYNCHRONIZE access.
         */
-        STANDARD_RIGHTS_ALL             = const_cpu_to_le32(0x001f0000),
+        STANDARD_RIGHTS_ALL             = cpu_to_le32(0x001f0000),
        /*
         * The access system ACL and maximum allowed access types (bits 24 to
         * 25, bits 26 to 27 are reserved).
         */
-        ACCESS_SYSTEM_SECURITY          = const_cpu_to_le32(0x01000000),
+        ACCESS_SYSTEM_SECURITY          = cpu_to_le32(0x01000000),
-        MAXIMUM_ALLOWED                 = const_cpu_to_le32(0x02000000),
+        MAXIMUM_ALLOWED                 = cpu_to_le32(0x02000000),
        /*
         * The generic rights (bits 28 to 31).  These map onto the standard and
@@ -1543,10 +1532,10 @@ enum {
         */
        /* Read, write, and execute access. */
-        GENERIC_ALL                     = const_cpu_to_le32(0x10000000),
+        GENERIC_ALL                     = cpu_to_le32(0x10000000),
        /* Execute access. */
-        GENERIC_EXECUTE                 = const_cpu_to_le32(0x20000000),
+        GENERIC_EXECUTE                 = cpu_to_le32(0x20000000),
        /*
         * Write access.  For files, this maps onto:
@@ -1555,7 +1544,7 @@ enum {
         * For directories, the mapping has the same numerical value.  See
         * above for the descriptions of the rights granted.
         */
-        GENERIC_WRITE                   = const_cpu_to_le32(0x40000000),
+        GENERIC_WRITE                   = cpu_to_le32(0x40000000),
        /*
         * Read access.  For files, this maps onto:
@@ -1564,7 +1553,7 @@ enum {
         * For directories, the mapping has the same numberical value.  See
         * above for the descriptions of the rights granted.
         */
-        GENERIC_READ                    = const_cpu_to_le32(0x80000000),
+        GENERIC_READ                    = cpu_to_le32(0x80000000),
 };
 typedef le32 ACCESS_MASK;
@@ -1604,8 +1593,8 @@ typedef struct {
 * The object ACE flags (32-bit).
 */
 enum {
-        ACE_OBJECT_TYPE_PRESENT                 = const_cpu_to_le32(1),
+        ACE_OBJECT_TYPE_PRESENT                 = cpu_to_le32(1),
-        ACE_INHERITED_OBJECT_TYPE_PRESENT       = const_cpu_to_le32(2),
+        ACE_INHERITED_OBJECT_TYPE_PRESENT       = cpu_to_le32(2),
 };
 typedef le32 OBJECT_ACE_FLAGS;
@@ -1706,23 +1695,23 @@ typedef enum {
 *      expressed as offsets from the beginning of the security descriptor.
 */
 enum {
-        SE_OWNER_DEFAULTED              = const_cpu_to_le16(0x0001),
+        SE_OWNER_DEFAULTED              = cpu_to_le16(0x0001),
-        SE_GROUP_DEFAULTED              = const_cpu_to_le16(0x0002),
+        SE_GROUP_DEFAULTED              = cpu_to_le16(0x0002),
-        SE_DACL_PRESENT                 = const_cpu_to_le16(0x0004),
+        SE_DACL_PRESENT                 = cpu_to_le16(0x0004),
-        SE_DACL_DEFAULTED               = const_cpu_to_le16(0x0008),
+        SE_DACL_DEFAULTED               = cpu_to_le16(0x0008),
-        SE_SACL_PRESENT                 = const_cpu_to_le16(0x0010),
+        SE_SACL_PRESENT                 = cpu_to_le16(0x0010),
-        SE_SACL_DEFAULTED               = const_cpu_to_le16(0x0020),
+        SE_SACL_DEFAULTED               = cpu_to_le16(0x0020),
-        SE_DACL_AUTO_INHERIT_REQ        = const_cpu_to_le16(0x0100),
+        SE_DACL_AUTO_INHERIT_REQ        = cpu_to_le16(0x0100),
-        SE_SACL_AUTO_INHERIT_REQ        = const_cpu_to_le16(0x0200),
+        SE_SACL_AUTO_INHERIT_REQ        = cpu_to_le16(0x0200),
-        SE_DACL_AUTO_INHERITED          = const_cpu_to_le16(0x0400),
+        SE_DACL_AUTO_INHERITED          = cpu_to_le16(0x0400),
-        SE_SACL_AUTO_INHERITED          = const_cpu_to_le16(0x0800),
+        SE_SACL_AUTO_INHERITED          = cpu_to_le16(0x0800),
-        SE_DACL_PROTECTED               = const_cpu_to_le16(0x1000),
+        SE_DACL_PROTECTED               = cpu_to_le16(0x1000),
-        SE_SACL_PROTECTED               = const_cpu_to_le16(0x2000),
+        SE_SACL_PROTECTED               = cpu_to_le16(0x2000),
-        SE_RM_CONTROL_VALID             = const_cpu_to_le16(0x4000),
+        SE_RM_CONTROL_VALID             = cpu_to_le16(0x4000),
-        SE_SELF_RELATIVE                = const_cpu_to_le16(0x8000)
+        SE_SELF_RELATIVE                = cpu_to_le16(0x8000)
 } __attribute__ ((__packed__));
 typedef le16 SECURITY_DESCRIPTOR_CONTROL;
@@ -1910,21 +1899,21 @@ typedef struct {
 * Possible flags for the volume (16-bit).
 */
 enum {
-        VOLUME_IS_DIRTY                 = const_cpu_to_le16(0x0001),
+        VOLUME_IS_DIRTY                 = cpu_to_le16(0x0001),
-        VOLUME_RESIZE_LOG_FILE          = const_cpu_to_le16(0x0002),
+        VOLUME_RESIZE_LOG_FILE          = cpu_to_le16(0x0002),
-        VOLUME_UPGRADE_ON_MOUNT         = const_cpu_to_le16(0x0004),
+        VOLUME_UPGRADE_ON_MOUNT         = cpu_to_le16(0x0004),
-        VOLUME_MOUNTED_ON_NT4           = const_cpu_to_le16(0x0008),
+        VOLUME_MOUNTED_ON_NT4           = cpu_to_le16(0x0008),
-        VOLUME_DELETE_USN_UNDERWAY      = const_cpu_to_le16(0x0010),
+        VOLUME_DELETE_USN_UNDERWAY      = cpu_to_le16(0x0010),
-        VOLUME_REPAIR_OBJECT_ID         = const_cpu_to_le16(0x0020),
+        VOLUME_REPAIR_OBJECT_ID         = cpu_to_le16(0x0020),
-        VOLUME_CHKDSK_UNDERWAY          = const_cpu_to_le16(0x4000),
+        VOLUME_CHKDSK_UNDERWAY          = cpu_to_le16(0x4000),
-        VOLUME_MODIFIED_BY_CHKDSK       = const_cpu_to_le16(0x8000),
+        VOLUME_MODIFIED_BY_CHKDSK       = cpu_to_le16(0x8000),
-        VOLUME_FLAGS_MASK               = const_cpu_to_le16(0xc03f),
+        VOLUME_FLAGS_MASK               = cpu_to_le16(0xc03f),
        /* To make our life easier when checking if we must mount read-only. */
-        VOLUME_MUST_MOUNT_RO_MASK       = const_cpu_to_le16(0xc027),
+        VOLUME_MUST_MOUNT_RO_MASK       = cpu_to_le16(0xc027),
 } __attribute__ ((__packed__));
 typedef le16 VOLUME_FLAGS;
@@ -2109,26 +2098,26 @@ typedef struct {
 * The user quota flags.  Names explain meaning.
 */
 enum {
-        QUOTA_FLAG_DEFAULT_LIMITS       = const_cpu_to_le32(0x00000001),
+        QUOTA_FLAG_DEFAULT_LIMITS       = cpu_to_le32(0x00000001),
-        QUOTA_FLAG_LIMIT_REACHED        = const_cpu_to_le32(0x00000002),
+        QUOTA_FLAG_LIMIT_REACHED        = cpu_to_le32(0x00000002),
-        QUOTA_FLAG_ID_DELETED           = const_cpu_to_le32(0x00000004),
+        QUOTA_FLAG_ID_DELETED           = cpu_to_le32(0x00000004),
-        QUOTA_FLAG_USER_MASK            = const_cpu_to_le32(0x00000007),
+        QUOTA_FLAG_USER_MASK            = cpu_to_le32(0x00000007),
        /* This is a bit mask for the user quota flags. */
        /*
         * These flags are only present in the quota defaults index entry, i.e.
         * in the entry where owner_id = QUOTA_DEFAULTS_ID.
         */
-        QUOTA_FLAG_TRACKING_ENABLED     = const_cpu_to_le32(0x00000010),
+        QUOTA_FLAG_TRACKING_ENABLED     = cpu_to_le32(0x00000010),
-        QUOTA_FLAG_ENFORCEMENT_ENABLED  = const_cpu_to_le32(0x00000020),
+        QUOTA_FLAG_ENFORCEMENT_ENABLED  = cpu_to_le32(0x00000020),
-        QUOTA_FLAG_TRACKING_REQUESTED   = const_cpu_to_le32(0x00000040),
+        QUOTA_FLAG_TRACKING_REQUESTED   = cpu_to_le32(0x00000040),
-        QUOTA_FLAG_LOG_THRESHOLD        = const_cpu_to_le32(0x00000080),
+        QUOTA_FLAG_LOG_THRESHOLD        = cpu_to_le32(0x00000080),
-        QUOTA_FLAG_LOG_LIMIT            = const_cpu_to_le32(0x00000100),
+        QUOTA_FLAG_LOG_LIMIT            = cpu_to_le32(0x00000100),
-        QUOTA_FLAG_OUT_OF_DATE          = const_cpu_to_le32(0x00000200),
+        QUOTA_FLAG_OUT_OF_DATE          = cpu_to_le32(0x00000200),
-        QUOTA_FLAG_CORRUPT              = const_cpu_to_le32(0x00000400),
+        QUOTA_FLAG_CORRUPT              = cpu_to_le32(0x00000400),
-        QUOTA_FLAG_PENDING_DELETES      = const_cpu_to_le32(0x00000800),
+        QUOTA_FLAG_PENDING_DELETES      = cpu_to_le32(0x00000800),
 };
 typedef le32 QUOTA_FLAGS;
@@ -2172,9 +2161,9 @@ typedef struct {
 * Predefined owner_id values (32-bit).
 */
 enum {
-        QUOTA_INVALID_ID        = const_cpu_to_le32(0x00000000),
+        QUOTA_INVALID_ID        = cpu_to_le32(0x00000000),
-        QUOTA_DEFAULTS_ID       = const_cpu_to_le32(0x00000001),
+        QUOTA_DEFAULTS_ID       = cpu_to_le32(0x00000001),
-        QUOTA_FIRST_USER_ID     = const_cpu_to_le32(0x00000100),
+        QUOTA_FIRST_USER_ID     = cpu_to_le32(0x00000100),
 };
 /*
@@ -2189,14 +2178,14 @@ typedef enum {
 * Index entry flags (16-bit).
 */
 enum {
-        INDEX_ENTRY_NODE = const_cpu_to_le16(1), /* This entry contains a
+        INDEX_ENTRY_NODE = cpu_to_le16(1), /* This entry contains a
                        sub-node, i.e. a reference to an index block in form of
                        a virtual cluster number (see below). */
-        INDEX_ENTRY_END  = const_cpu_to_le16(2), /* This signifies the last
+        INDEX_ENTRY_END  = cpu_to_le16(2), /* This signifies the last
                        entry in an index block.  The index entry does not
                        represent a file but it can point to a sub-node. */
-        INDEX_ENTRY_SPACE_FILLER = const_cpu_to_le16(0xffff), /* gcc: Force
+        INDEX_ENTRY_SPACE_FILLER = cpu_to_le16(0xffff), /* gcc: Force
                        enum bit width to 16-bit. */
 } __attribute__ ((__packed__));
@@ -2334,26 +2323,26 @@ typedef struct {
 * These are the predefined reparse point tags:
 */
 enum {
-        IO_REPARSE_TAG_IS_ALIAS         = const_cpu_to_le32(0x20000000),
+        IO_REPARSE_TAG_IS_ALIAS         = cpu_to_le32(0x20000000),
-        IO_REPARSE_TAG_IS_HIGH_LATENCY  = const_cpu_to_le32(0x40000000),
+        IO_REPARSE_TAG_IS_HIGH_LATENCY  = cpu_to_le32(0x40000000),
-        IO_REPARSE_TAG_IS_MICROSOFT     = const_cpu_to_le32(0x80000000),
+        IO_REPARSE_TAG_IS_MICROSOFT     = cpu_to_le32(0x80000000),
-        IO_REPARSE_TAG_RESERVED_ZERO    = const_cpu_to_le32(0x00000000),
+        IO_REPARSE_TAG_RESERVED_ZERO    = cpu_to_le32(0x00000000),
-        IO_REPARSE_TAG_RESERVED_ONE     = const_cpu_to_le32(0x00000001),
+        IO_REPARSE_TAG_RESERVED_ONE     = cpu_to_le32(0x00000001),
-        IO_REPARSE_TAG_RESERVED_RANGE   = const_cpu_to_le32(0x00000001),
+        IO_REPARSE_TAG_RESERVED_RANGE   = cpu_to_le32(0x00000001),
-        IO_REPARSE_TAG_NSS              = const_cpu_to_le32(0x68000005),
+        IO_REPARSE_TAG_NSS              = cpu_to_le32(0x68000005),
-        IO_REPARSE_TAG_NSS_RECOVER      = const_cpu_to_le32(0x68000006),
+        IO_REPARSE_TAG_NSS_RECOVER      = cpu_to_le32(0x68000006),
-        IO_REPARSE_TAG_SIS              = const_cpu_to_le32(0x68000007),
+        IO_REPARSE_TAG_SIS              = cpu_to_le32(0x68000007),
-        IO_REPARSE_TAG_DFS              = const_cpu_to_le32(0x68000008),
+        IO_REPARSE_TAG_DFS              = cpu_to_le32(0x68000008),
-        IO_REPARSE_TAG_MOUNT_POINT      = const_cpu_to_le32(0x88000003),
+        IO_REPARSE_TAG_MOUNT_POINT      = cpu_to_le32(0x88000003),
-        IO_REPARSE_TAG_HSM              = const_cpu_to_le32(0xa8000004),
+        IO_REPARSE_TAG_HSM              = cpu_to_le32(0xa8000004),
-        IO_REPARSE_TAG_SYMBOLIC_LINK    = const_cpu_to_le32(0xe8000000),
+        IO_REPARSE_TAG_SYMBOLIC_LINK    = cpu_to_le32(0xe8000000),
-        IO_REPARSE_TAG_VALID_VALUES     = const_cpu_to_le32(0xe000ffff),
+        IO_REPARSE_TAG_VALID_VALUES     = cpu_to_le32(0xe000ffff),
 };
 /*
diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h
index 9468e1c45ae3..b5a6f08bd35c 100644
--- a/fs/ntfs/logfile.h
+++ b/fs/ntfs/logfile.h
@@ -104,7 +104,7 @@ typedef struct {
 * in this particular client array.  Also inside the client records themselves,
 * this means that there are no client records preceding or following this one.
 */
-#define LOGFILE_NO_CLIENT       const_cpu_to_le16(0xffff)
+#define LOGFILE_NO_CLIENT       cpu_to_le16(0xffff)
 #define LOGFILE_NO_CLIENT_CPU   0xffff
 /*
@@ -112,8 +112,8 @@ typedef struct {
 * information about the log file in which they are present.
 */
 enum {
-        RESTART_VOLUME_IS_CLEAN = const_cpu_to_le16(0x0002),
+        RESTART_VOLUME_IS_CLEAN = cpu_to_le16(0x0002),
-        RESTART_SPACE_FILLER    = const_cpu_to_le16(0xffff), /* gcc: Force enum bit width to 16. */
+        RESTART_SPACE_FILLER    = cpu_to_le16(0xffff), /* gcc: Force enum bit width to 16. */
 } __attribute__ ((__packed__));
 typedef le16 RESTART_AREA_FLAGS;
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 17d32ca6bc35..23bf68453d7d 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -2839,7 +2839,7 @@ int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m)
         */
        /* Mark the mft record as not in use. */
-        m->flags &= const_cpu_to_le16(~const_le16_to_cpu(MFT_RECORD_IN_USE));
+        m->flags &= ~MFT_RECORD_IN_USE;
        /* Increment the sequence number, skipping zero, if it is not zero. */
        old_seq_no = m->sequence_number;
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 4a46743b5077..f76951dcd4a6 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -618,7 +618,7 @@ static bool is_boot_sector_ntfs(const struct super_block *sb,
         * many BIOSes will refuse to boot from a bootsector if the magic is
         * incorrect, so we emit a warning.
         */
-        if (!silent && b->end_of_sector_marker != const_cpu_to_le16(0xaa55))
+        if (!silent && b->end_of_sector_marker != cpu_to_le16(0xaa55))
                ntfs_warning(sb, "Invalid end of sector marker.");
        return true;
 not_ntfs:
@@ -1242,13 +1242,13 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
        u32 *kaddr, *kend;
        ntfs_name *name = NULL;
        int ret = 1;
-        static const ntfschar hiberfil[13] = { const_cpu_to_le16('h'),
+        static const ntfschar hiberfil[13] = { cpu_to_le16('h'),
-                        const_cpu_to_le16('i'), const_cpu_to_le16('b'),
+                        cpu_to_le16('i'), cpu_to_le16('b'),
-                        const_cpu_to_le16('e'), const_cpu_to_le16('r'),
+                        cpu_to_le16('e'), cpu_to_le16('r'),
-                        const_cpu_to_le16('f'), const_cpu_to_le16('i'),
+                        cpu_to_le16('f'), cpu_to_le16('i'),
-                        const_cpu_to_le16('l'), const_cpu_to_le16('.'),
+                        cpu_to_le16('l'), cpu_to_le16('.'),
-                        const_cpu_to_le16('s'), const_cpu_to_le16('y'),
+                        cpu_to_le16('s'), cpu_to_le16('y'),
-                        const_cpu_to_le16('s'), 0 };
+                        cpu_to_le16('s'), 0 };
        ntfs_debug("Entering.");
        /*
@@ -1296,7 +1296,7 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
                goto iput_out;
        }
        kaddr = (u32*)page_address(page);
-        if (*(le32*)kaddr == const_cpu_to_le32(0x72626968)/*'hibr'*/) {
+        if (*(le32*)kaddr == cpu_to_le32(0x72626968)/*'hibr'*/) {
                ntfs_debug("Magic \"hibr\" found in hiberfil.sys.  Windows is "
                                "hibernated on the volume.  This is the "
                                "system volume.");
@@ -1337,12 +1337,12 @@ static bool load_and_init_quota(ntfs_volume *vol)
        MFT_REF mref;
        struct inode *tmp_ino;
        ntfs_name *name = NULL;
-        static const ntfschar Quota[7] = { const_cpu_to_le16('$'),
+        static const ntfschar Quota[7] = { cpu_to_le16('$'),
-                        const_cpu_to_le16('Q'), const_cpu_to_le16('u'),
+                        cpu_to_le16('Q'), cpu_to_le16('u'),
-                        const_cpu_to_le16('o'), const_cpu_to_le16('t'),
+                        cpu_to_le16('o'), cpu_to_le16('t'),
-                        const_cpu_to_le16('a'), 0 };
+                        cpu_to_le16('a'), 0 };
-        static ntfschar Q[3] = { const_cpu_to_le16('$'),
+        static ntfschar Q[3] = { cpu_to_le16('$'),
-                        const_cpu_to_le16('Q'), 0 };
+                        cpu_to_le16('Q'), 0 };
        ntfs_debug("Entering.");
        /*
@@ -1416,16 +1416,16 @@ static bool load_and_init_usnjrnl(ntfs_volume *vol)
        struct page *page;
        ntfs_name *name = NULL;
        USN_HEADER *uh;
-        static const ntfschar UsnJrnl[9] = { const_cpu_to_le16('$'),
+        static const ntfschar UsnJrnl[9] = { cpu_to_le16('$'),
-                        const_cpu_to_le16('U'), const_cpu_to_le16('s'),
+                        cpu_to_le16('U'), cpu_to_le16('s'),
-                        const_cpu_to_le16('n'), const_cpu_to_le16('J'),
+                        cpu_to_le16('n'), cpu_to_le16('J'),
-                        const_cpu_to_le16('r'), const_cpu_to_le16('n'),
+                        cpu_to_le16('r'), cpu_to_le16('n'),
-                        const_cpu_to_le16('l'), 0 };
+                        cpu_to_le16('l'), 0 };
-        static ntfschar Max[5] = { const_cpu_to_le16('$'),
+        static ntfschar Max[5] = { cpu_to_le16('$'),
-                        const_cpu_to_le16('M'), const_cpu_to_le16('a'),
+                        cpu_to_le16('M'), cpu_to_le16('a'),
-                        const_cpu_to_le16('x'), 0 };
+                        cpu_to_le16('x'), 0 };
-        static ntfschar J[3] = { const_cpu_to_le16('$'),
+        static ntfschar J[3] = { cpu_to_le16('$'),
-                        const_cpu_to_le16('J'), 0 };
+                        cpu_to_le16('J'), 0 };
        ntfs_debug("Entering.");
        /*
diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h
index 4087fbdac327..00d8e6bd7c36 100644
--- a/fs/ntfs/usnjrnl.h
+++ b/fs/ntfs/usnjrnl.h
@@ -116,27 +116,27 @@ typedef struct {
 * documentation: http://www.linux-ntfs.org/
 */
 enum {
-        USN_REASON_DATA_OVERWRITE       = const_cpu_to_le32(0x00000001),
+        USN_REASON_DATA_OVERWRITE       = cpu_to_le32(0x00000001),
-        USN_REASON_DATA_EXTEND          = const_cpu_to_le32(0x00000002),
+        USN_REASON_DATA_EXTEND          = cpu_to_le32(0x00000002),
-        USN_REASON_DATA_TRUNCATION      = const_cpu_to_le32(0x00000004),
+        USN_REASON_DATA_TRUNCATION      = cpu_to_le32(0x00000004),
-        USN_REASON_NAMED_DATA_OVERWRITE = const_cpu_to_le32(0x00000010),
+        USN_REASON_NAMED_DATA_OVERWRITE = cpu_to_le32(0x00000010),
-        USN_REASON_NAMED_DATA_EXTEND    = const_cpu_to_le32(0x00000020),
+        USN_REASON_NAMED_DATA_EXTEND    = cpu_to_le32(0x00000020),
-        USN_REASON_NAMED_DATA_TRUNCATION= const_cpu_to_le32(0x00000040),
+        USN_REASON_NAMED_DATA_TRUNCATION= cpu_to_le32(0x00000040),
-        USN_REASON_FILE_CREATE          = const_cpu_to_le32(0x00000100),
+        USN_REASON_FILE_CREATE          = cpu_to_le32(0x00000100),
-        USN_REASON_FILE_DELETE          = const_cpu_to_le32(0x00000200),
+        USN_REASON_FILE_DELETE          = cpu_to_le32(0x00000200),
-        USN_REASON_EA_CHANGE            = const_cpu_to_le32(0x00000400),
+        USN_REASON_EA_CHANGE            = cpu_to_le32(0x00000400),
-        USN_REASON_SECURITY_CHANGE      = const_cpu_to_le32(0x00000800),
+        USN_REASON_SECURITY_CHANGE      = cpu_to_le32(0x00000800),
-        USN_REASON_RENAME_OLD_NAME      = const_cpu_to_le32(0x00001000),
+        USN_REASON_RENAME_OLD_NAME      = cpu_to_le32(0x00001000),
-        USN_REASON_RENAME_NEW_NAME      = const_cpu_to_le32(0x00002000),
+        USN_REASON_RENAME_NEW_NAME      = cpu_to_le32(0x00002000),
-        USN_REASON_INDEXABLE_CHANGE     = const_cpu_to_le32(0x00004000),
+        USN_REASON_INDEXABLE_CHANGE     = cpu_to_le32(0x00004000),
-        USN_REASON_BASIC_INFO_CHANGE    = const_cpu_to_le32(0x00008000),
+        USN_REASON_BASIC_INFO_CHANGE    = cpu_to_le32(0x00008000),
-        USN_REASON_HARD_LINK_CHANGE     = const_cpu_to_le32(0x00010000),
+        USN_REASON_HARD_LINK_CHANGE     = cpu_to_le32(0x00010000),
-        USN_REASON_COMPRESSION_CHANGE   = const_cpu_to_le32(0x00020000),
+        USN_REASON_COMPRESSION_CHANGE   = cpu_to_le32(0x00020000),
-        USN_REASON_ENCRYPTION_CHANGE    = const_cpu_to_le32(0x00040000),
+        USN_REASON_ENCRYPTION_CHANGE    = cpu_to_le32(0x00040000),
-        USN_REASON_OBJECT_ID_CHANGE     = const_cpu_to_le32(0x00080000),
+        USN_REASON_OBJECT_ID_CHANGE     = cpu_to_le32(0x00080000),
-        USN_REASON_REPARSE_POINT_CHANGE = const_cpu_to_le32(0x00100000),
+        USN_REASON_REPARSE_POINT_CHANGE = cpu_to_le32(0x00100000),
-        USN_REASON_STREAM_CHANGE        = const_cpu_to_le32(0x00200000),
+        USN_REASON_STREAM_CHANGE        = cpu_to_le32(0x00200000),
-        USN_REASON_CLOSE                = const_cpu_to_le32(0x80000000),
+        USN_REASON_CLOSE                = cpu_to_le32(0x80000000),
 };
 typedef le32 USN_REASON_FLAGS;
@@ -148,9 +148,9 @@ typedef le32 USN_REASON_FLAGS;
 *      http://www.linux-ntfs.org/
 */
 enum {
-        USN_SOURCE_DATA_MANAGEMENT        = const_cpu_to_le32(0x00000001),
+        USN_SOURCE_DATA_MANAGEMENT        = cpu_to_le32(0x00000001),
-        USN_SOURCE_AUXILIARY_DATA         = const_cpu_to_le32(0x00000002),
+        USN_SOURCE_AUXILIARY_DATA         = cpu_to_le32(0x00000002),
-        USN_SOURCE_REPLICATION_MANAGEMENT = const_cpu_to_le32(0x00000004),
+        USN_SOURCE_REPLICATION_MANAGEMENT = cpu_to_le32(0x00000004),
 };
 typedef le32 USN_SOURCE_INFO_FLAGS;
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index eea1d24713ea..b606496b72ec 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -154,8 +154,9 @@ out:
        return ret;
 }
-static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+        struct page *page = vmf->page;
        struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
        struct buffer_head *di_bh = NULL;
        sigset_t blocked, oldset;
@@ -196,7 +197,8 @@ out:
        ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
        if (ret2 < 0)
                mlog_errno(ret2);
+        if (ret)
+                ret = VM_FAULT_SIGBUS;
        return ret;
 }
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 5d2989e9dcc1..fa678abc9db1 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -37,7 +37,7 @@ static int proc_match(int len, const char *name, struct proc_dir_entry *de)
 #define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
 static ssize_t
-proc_file_read(struct file *file, char __user *buf, size_t nbytes,
+__proc_file_read(struct file *file, char __user *buf, size_t nbytes,
               loff_t *ppos)
 {
        struct inode * inode = file->f_path.dentry->d_inode;
@@ -183,19 +183,47 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes,
 }
 static ssize_t
+proc_file_read(struct file *file, char __user *buf, size_t nbytes,
+               loff_t *ppos)
+{
+        struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+        ssize_t rv = -EIO;
+        spin_lock(&pde->pde_unload_lock);
+        if (!pde->proc_fops) {
+                spin_unlock(&pde->pde_unload_lock);
+                return rv;
+        }
+        pde->pde_users++;
+        spin_unlock(&pde->pde_unload_lock);
+        rv = __proc_file_read(file, buf, nbytes, ppos);
+        pde_users_dec(pde);
+        return rv;
+}
+static ssize_t
 proc_file_write(struct file *file, const char __user *buffer,
                size_t count, loff_t *ppos)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
-        struct proc_dir_entry * dp;
+        ssize_t rv = -EIO;
-        
-        dp = PDE(inode);
+        if (pde->write_proc) {
+                spin_lock(&pde->pde_unload_lock);
-        if (!dp->write_proc)
+                if (!pde->proc_fops) {
-                return -EIO;
+                        spin_unlock(&pde->pde_unload_lock);
+                        return rv;
+                }
+                pde->pde_users++;
+                spin_unlock(&pde->pde_unload_lock);
-        /* FIXME: does this routine need ppos?  probably... */
+                /* FIXME: does this routine need ppos?  probably... */
-        return dp->write_proc(file, buffer, count, dp->data);
+                rv = pde->write_proc(file, buffer, count, pde->data);
+                pde_users_dec(pde);
+        }
+        return rv;
 }
@@ -307,6 +335,21 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
 /*
 * Return an inode number between PROC_DYNAMIC_FIRST and
 * 0xffffffff, or zero on failure.
+ *
+ * Current inode allocations in the proc-fs (hex-numbers):
+ *
+ * 00000000             reserved
+ * 00000001-00000fff    static entries  (goners)
+ *      001             root-ino
+ *
+ * 00001000-00001fff    unused
+ * 0001xxxx-7fffxxxx    pid-dir entries for pid 1-7fff
+ * 80000000-efffffff    unused
+ * f0000000-ffffffff    dynamic entries
+ *
+ * Goal:
+ *      Once we split the thing into several virtual filesystems,
+ *      we will get rid of magical ranges (and this comment, BTW).
 */
 static unsigned int get_inode_number(void)
 {
diff --git a/fs/proc/inode-alloc.txt b/fs/proc/inode-alloc.txt
deleted file mode 100644
index 77212f938c2c..000000000000
--- a/fs/proc/inode-alloc.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Current inode allocations in the proc-fs (hex-numbers):
-  00000000              reserved
-  00000001-00000fff     static entries  (goners)
-       001              root-ino
-  00001000-00001fff     unused
-  0001xxxx-7fffxxxx     pid-dir entries for pid 1-7fff
-  80000000-efffffff     unused
-  f0000000-ffffffff     dynamic entries
-Goal:
-        a) once we'll split the thing into several virtual filesystems we
-        will get rid of magical ranges (and this file, BTW).
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d8bb5c671f42..d78ade305541 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -58,11 +58,8 @@ static void proc_delete_inode(struct inode *inode)
        /* Let go of any associated proc directory entry */
        de = PROC_I(inode)->pde;
-        if (de) {
+        if (de)
-                if (de->owner)
-                        module_put(de->owner);
                de_put(de);
-        }
        if (PROC_I(inode)->sysctl)
                sysctl_head_put(PROC_I(inode)->sysctl);
        clear_inode(inode);
@@ -127,7 +124,7 @@ static void __pde_users_dec(struct proc_dir_entry *pde)
                complete(pde->pde_unload_completion);
 }
-static void pde_users_dec(struct proc_dir_entry *pde)
+void pde_users_dec(struct proc_dir_entry *pde)
 {
        spin_lock(&pde->pde_unload_lock);
        __pde_users_dec(pde);
@@ -449,12 +446,9 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
 {
        struct inode * inode;
-        if (!try_module_get(de->owner))
-                goto out_mod;
        inode = iget_locked(sb, ino);
        if (!inode)
-                goto out_ino;
+                return NULL;
        if (inode->i_state & I_NEW) {
                inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
                PROC_I(inode)->fd = 0;
@@ -485,16 +479,9 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
                        }
                }
                unlock_new_inode(inode);
-        } else {
+        } else
-               module_put(de->owner);
               de_put(de);
-        }
        return inode;
-out_ino:
-        module_put(de->owner);
-out_mod:
-        return NULL;
 }                       
 int proc_fill_super(struct super_block *s)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index cd53ff838498..f6db9618a888 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -91,3 +91,4 @@ struct pde_opener {
        int (*release)(struct inode *, struct file *);
        struct list_head lh;
 };
+void pde_users_dec(struct proc_dir_entry *pde);
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index d153946d6d15..83adcc869437 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -144,17 +144,12 @@ void proc_tty_register_driver(struct tty_driver *driver)
 {
        struct proc_dir_entry *ent;
                
-        if (!driver->ops->read_proc || !driver->driver_name ||
+        if (!driver->driver_name || driver->proc_entry ||
-            driver->proc_entry)
+            !driver->ops->proc_fops)
                return;
-        ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver);
+        ent = proc_create_data(driver->driver_name, 0, proc_tty_driver,
-        if (!ent)
+                               driver->ops->proc_fops, driver);
-                return;
-        ent->read_proc = driver->ops->read_proc;
-        ent->owner = driver->owner;
-        ent->data = driver;
        driver->proc_entry = ent;
 }
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 94063840832a..b0ae0be4801f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -693,8 +693,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
                goto out_pages;
        }
-        pm.out = (u64 *)buf;
+        pm.out = (u64 __user *)buf;
-        pm.end = (u64 *)(buf + count);
+        pm.end = (u64 __user *)(buf + count);
        pagemap_walk.pmd_entry = pagemap_pte_range;
        pagemap_walk.pte_hole = pagemap_pte_hole;
@@ -720,9 +720,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
        if (ret == PM_END_OF_BUFFER)
                ret = 0;
        /* don't need mmap_sem for these, but this looks cleaner */
-        *ppos += (char *)pm.out - buf;
+        *ppos += (char __user *)pm.out - buf;
        if (!ret)
-                ret = (char *)pm.out - buf;
+                ret = (char __user *)pm.out - buf;
 out_pages:
        for (; pagecount; pagecount--) {
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index df26aa88fa47..0c10a0b3f146 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -1,45 +1,43 @@
+#include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/sched.h>
+#include <linux/seq_file.h>
 #include <linux/time.h>
 #include <asm/cputime.h>
-static int proc_calc_metrics(char *page, char **start, off_t off,
+static int uptime_proc_show(struct seq_file *m, void *v)
-                                 int count, int *eof, int len)
-{
-        if (len <= off + count)
-                *eof = 1;
-        *start = page + off;
-        len -= off;
-        if (len > count)
-                len = count;
-        if (len < 0)
-                len = 0;
-        return len;
-}
-static int uptime_read_proc(char *page, char **start, off_t off, int count,
-                            int *eof, void *data)
 {
        struct timespec uptime;
        struct timespec idle;
-        int len;
        cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
        do_posix_clock_monotonic_gettime(&uptime);
        monotonic_to_bootbased(&uptime);
        cputime_to_timespec(idletime, &idle);
-        len = sprintf(page, "%lu.%02lu %lu.%02lu\n",
+        seq_printf(m, "%lu.%02lu %lu.%02lu\n",
                        (unsigned long) uptime.tv_sec,
                        (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
                        (unsigned long) idle.tv_sec,
                        (idle.tv_nsec / (NSEC_PER_SEC / 100)));
-        return proc_calc_metrics(page, start, off, count, eof, len);
+        return 0;
 }
+static int uptime_proc_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, uptime_proc_show, NULL);
+}
+static const struct file_operations uptime_proc_fops = {
+        .open           = uptime_proc_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
 static int __init proc_uptime_init(void)
 {
-        create_proc_read_entry("uptime", 0, NULL, uptime_read_proc, NULL);
+        proc_create("uptime", 0, NULL, &uptime_proc_fops);
        return 0;
 }
 module_init(proc_uptime_init);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 995ef1d6686c..ebb2c417912c 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -59,7 +59,6 @@ const struct inode_operations ramfs_file_inode_operations = {
 */
 int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
 {
-        struct pagevec lru_pvec;
        unsigned long npages, xpages, loop, limit;
        struct page *pages;
        unsigned order;
@@ -102,24 +101,20 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
        memset(data, 0, newsize);
        /* attach all the pages to the inode's address space */
-        pagevec_init(&lru_pvec, 0);
        for (loop = 0; loop < npages; loop++) {
                struct page *page = pages + loop;
-                ret = add_to_page_cache(page, inode->i_mapping, loop, GFP_KERNEL);
+                ret = add_to_page_cache_lru(page, inode->i_mapping, loop,
+                                        GFP_KERNEL);
                if (ret < 0)
                        goto add_error;
-                if (!pagevec_add(&lru_pvec, page))
-                        __pagevec_lru_add_file(&lru_pvec);
                /* prevent the page from being discarded on memory pressure */
                SetPageDirty(page);
                unlock_page(page);
        }
-        pagevec_lru_add_file(&lru_pvec);
        return 0;
 fsize_exceeded:
@@ -128,10 +123,8 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
        return -EFBIG;
 add_error:
-        pagevec_lru_add_file(&lru_pvec);
+        while (loop < npages)
-        page_cache_release(pages + loop);
+                __free_page(pages + loop++);
-        for (loop++; loop < npages; loop++)
-                __free_page(pages + loop);
        return ret;
 }
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index b7e6ac706b87..a404fb88e456 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -33,12 +33,15 @@
 #include <linux/backing-dev.h>
 #include <linux/ramfs.h>
 #include <linux/sched.h>
+#include <linux/parser.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 /* some random number */
 #define RAMFS_MAGIC     0x858458f6
+#define RAMFS_DEFAULT_MODE      0755
 static const struct super_operations ramfs_ops;
 static const struct inode_operations ramfs_dir_inode_operations;
@@ -158,12 +161,75 @@ static const struct inode_operations ramfs_dir_inode_operations = {
 static const struct super_operations ramfs_ops = {
        .statfs         = simple_statfs,
        .drop_inode     = generic_delete_inode,
+        .show_options   = generic_show_options,
+};
+struct ramfs_mount_opts {
+        umode_t mode;
+};
+enum {
+        Opt_mode,
+        Opt_err
+};
+static const match_table_t tokens = {
+        {Opt_mode, "mode=%o"},
+        {Opt_err, NULL}
+};
+struct ramfs_fs_info {
+        struct ramfs_mount_opts mount_opts;
 };
+static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts)
+{
+        substring_t args[MAX_OPT_ARGS];
+        int option;
+        int token;
+        char *p;
+        opts->mode = RAMFS_DEFAULT_MODE;
+        while ((p = strsep(&data, ",")) != NULL) {
+                if (!*p)
+                        continue;
+                token = match_token(p, tokens, args);
+                switch (token) {
+                case Opt_mode:
+                        if (match_octal(&args[0], &option))
+                                return -EINVAL;
+                        opts->mode = option & S_IALLUGO;
+                        break;
+                default:
+                        printk(KERN_ERR "ramfs: bad mount option: %s\n", p);
+                        return -EINVAL;
+                }
+        }
+        return 0;
+}
 static int ramfs_fill_super(struct super_block * sb, void * data, int silent)
 {
-        struct inode * inode;
+        struct ramfs_fs_info *fsi;
-        struct dentry * root;
+        struct inode *inode = NULL;
+        struct dentry *root;
+        int err;
+        save_mount_options(sb, data);
+        fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL);
+        if (!fsi) {
+                err = -ENOMEM;
+                goto fail;
+        }
+        sb->s_fs_info = fsi;
+        err = ramfs_parse_options(data, &fsi->mount_opts);
+        if (err)
+                goto fail;
        sb->s_maxbytes = MAX_LFS_FILESIZE;
        sb->s_blocksize = PAGE_CACHE_SIZE;
@@ -171,17 +237,23 @@ static int ramfs_fill_super(struct super_block * sb, void * data, int silent)
        sb->s_magic = RAMFS_MAGIC;
        sb->s_op = &ramfs_ops;
        sb->s_time_gran = 1;
-        inode = ramfs_get_inode(sb, S_IFDIR | 0755, 0);
+        inode = ramfs_get_inode(sb, S_IFDIR | fsi->mount_opts.mode, 0);
-        if (!inode)
+        if (!inode) {
-                return -ENOMEM;
+                err = -ENOMEM;
+                goto fail;
+        }
        root = d_alloc_root(inode);
        if (!root) {
-                iput(inode);
+                err = -ENOMEM;
-                return -ENOMEM;
+                goto fail;
        }
        sb->s_root = root;
        return 0;
+fail:
+        kfree(fsi);
+        iput(inode);
+        return err;
 }
 int ramfs_get_sb(struct file_system_type *fs_type,
@@ -197,10 +269,16 @@ static int rootfs_get_sb(struct file_system_type *fs_type,
                            mnt);
 }
+static void ramfs_kill_sb(struct super_block *sb)
+{
+        kfree(sb->s_fs_info);
+        kill_litter_super(sb);
+}
 static struct file_system_type ramfs_fs_type = {
        .name           = "ramfs",
        .get_sb         = ramfs_get_sb,
-        .kill_sb        = kill_litter_super,
+        .kill_sb        = ramfs_kill_sb,
 };
 static struct file_system_type rootfs_fs_type = {
        .name           = "rootfs",
diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile
index 0eb7ac080484..7c5ab6330dd6 100644
--- a/fs/reiserfs/Makefile
+++ b/fs/reiserfs/Makefile
@@ -7,10 +7,10 @@ obj-$(CONFIG_REISERFS_FS) += reiserfs.o
 reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \
                 super.o prints.o objectid.o lbalance.o ibalance.o stree.o \
                 hashes.o tail_conversion.o journal.o resize.o \
-                 item_ops.o ioctl.o procfs.o
+                 item_ops.o ioctl.o procfs.o xattr.o
 ifeq ($(CONFIG_REISERFS_FS_XATTR),y)
-reiserfs-objs += xattr.o xattr_user.o xattr_trusted.o
+reiserfs-objs += xattr_user.o xattr_trusted.o
 endif
 ifeq ($(CONFIG_REISERFS_FS_SECURITY),y)
diff --git a/fs/reiserfs/README b/fs/reiserfs/README
index 90e1670e4e6f..14e8c9d460e5 100644
--- a/fs/reiserfs/README
+++ b/fs/reiserfs/README
@@ -1,4 +1,4 @@
-[LICENSING] 
+[LICENSING]
 ReiserFS is hereby licensed under the GNU General
 Public License version 2.
@@ -31,7 +31,7 @@ the GPL as not allowing those additional licensing options, you read
 it wrongly, and Richard Stallman agrees with me, when carefully read
 you can see that those restrictions on additional terms do not apply
 to the owner of the copyright, and my interpretation of this shall
-govern for this license.  
+govern for this license.
 Finally, nothing in this license shall be interpreted to allow you to
 fail to fairly credit me, or to remove my credits, without my
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index f32d1425cc9f..e716161ab325 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -40,8 +40,8 @@
 #define SET_OPTION(optname) \
   do { \
-        reiserfs_warning(s, "reiserfs: option \"%s\" is set", #optname); \
+        reiserfs_info(s, "block allocator option \"%s\" is set", #optname); \
-        set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
+        set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
    } while(0)
 #define TEST_OPTION(optname, s) \
    test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
@@ -64,9 +64,9 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
        unsigned int bmap_count = reiserfs_bmap_count(s);
        if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
-                reiserfs_warning(s,
+                reiserfs_error(s, "vs-4010",
-                                 "vs-4010: is_reusable: block number is out of range %lu (%u)",
+                               "block number is out of range %lu (%u)",
-                                 block, SB_BLOCK_COUNT(s));
+                               block, SB_BLOCK_COUNT(s));
                return 0;
        }
@@ -79,31 +79,30 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
                b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
                if (block >= bmap1 &&
                    block <= bmap1 + bmap_count) {
-                        reiserfs_warning(s, "vs: 4019: is_reusable: "
+                        reiserfs_error(s, "vs-4019", "bitmap block %lu(%u) "
-                                         "bitmap block %lu(%u) can't be freed or reused",
+                                       "can't be freed or reused",
-                                         block, bmap_count);
+                                       block, bmap_count);
                        return 0;
                }
        } else {
                if (offset == 0) {
-                        reiserfs_warning(s, "vs: 4020: is_reusable: "
+                        reiserfs_error(s, "vs-4020", "bitmap block %lu(%u) "
-                                         "bitmap block %lu(%u) can't be freed or reused",
+                                       "can't be freed or reused",
-                                         block, bmap_count);
+                                       block, bmap_count);
                        return 0;
                }
        }
        if (bmap >= bmap_count) {
-                reiserfs_warning(s,
+                reiserfs_error(s, "vs-4030", "bitmap for requested block "
-                                 "vs-4030: is_reusable: there is no so many bitmap blocks: "
+                               "is out of range: block=%lu, bitmap_nr=%u",
-                                 "block=%lu, bitmap_nr=%u", block, bmap);
+                               block, bmap);
                return 0;
        }
        if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) {
-                reiserfs_warning(s,
+                reiserfs_error(s, "vs-4050", "this is root block (%u), "
-                                 "vs-4050: is_reusable: this is root block (%u), "
+                               "it must be busy", SB_ROOT_BLOCK(s));
-                                 "it must be busy", SB_ROOT_BLOCK(s));
                return 0;
        }
@@ -154,8 +153,8 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
 /* - I mean `a window of zero bits' as in description of this function - Zam. */
        if (!bi) {
-                reiserfs_warning(s, "NULL bitmap info pointer for bitmap %d",
+                reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer "
-                                 bmap_n);
+                               "for bitmap %d", bmap_n);
                return 0;
        }
@@ -400,11 +399,8 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
        get_bit_address(s, block, &nr, &offset);
        if (nr >= reiserfs_bmap_count(s)) {
-                reiserfs_warning(s, "vs-4075: reiserfs_free_block: "
+                reiserfs_error(s, "vs-4075", "block %lu is out of range",
-                                 "block %lu is out of range on %s "
+                               block);
-                                 "(nr=%u,max=%u)", block,
-                                 reiserfs_bdevname(s), nr,
-                                 reiserfs_bmap_count(s));
                return;
        }
@@ -416,9 +412,8 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
        /* clear bit for the given block in bit map */
        if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) {
-                reiserfs_warning(s, "vs-4080: reiserfs_free_block: "
+                reiserfs_error(s, "vs-4080",
-                                 "free_block (%s:%lu)[dev:blocknr]: bit already cleared",
+                               "block %lu: bit already cleared", block);
-                                 reiserfs_bdevname(s), block);
        }
        apbi[nr].free_count++;
        journal_mark_dirty(th, s, bmbh);
@@ -445,7 +440,7 @@ void reiserfs_free_block(struct reiserfs_transaction_handle *th,
                return;
        if (block > sb_block_count(REISERFS_SB(s)->s_rs)) {
-                reiserfs_panic(th->t_super, "bitmap-4072",
+                reiserfs_error(th->t_super, "bitmap-4072",
                               "Trying to free block outside file system "
                               "boundaries (%lu > %lu)",
                               block, sb_block_count(REISERFS_SB(s)->s_rs));
@@ -477,9 +472,8 @@ static void __discard_prealloc(struct reiserfs_transaction_handle *th,
        BUG_ON(!th->t_trans_id);
 #ifdef CONFIG_REISERFS_CHECK
        if (ei->i_prealloc_count < 0)
-                reiserfs_warning(th->t_super,
+                reiserfs_error(th->t_super, "zam-4001",
-                                 "zam-4001:%s: inode has negative prealloc blocks count.",
+                               "inode has negative prealloc blocks count.");
-                                 __func__);
 #endif
        while (ei->i_prealloc_count > 0) {
                reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
@@ -515,9 +509,9 @@ void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
                                i_prealloc_list);
 #ifdef CONFIG_REISERFS_CHECK
                if (!ei->i_prealloc_count) {
-                        reiserfs_warning(th->t_super,
+                        reiserfs_error(th->t_super, "zam-4001",
-                                         "zam-4001:%s: inode is in prealloc list but has no preallocated blocks.",
+                                       "inode is in prealloc list but has "
-                                         __func__);
+                                       "no preallocated blocks.");
                }
 #endif
                __discard_prealloc(th, ei);
@@ -631,12 +625,12 @@ int reiserfs_parse_alloc_options(struct super_block *s, char *options)
                        continue;
                }
-                reiserfs_warning(s, "zam-4001: %s : unknown option - %s",
+                reiserfs_warning(s, "zam-4001", "unknown option - %s",
-                                 __func__, this_char);
+                                 this_char);
                return 1;
        }
-        reiserfs_warning(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
+        reiserfs_info(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
        return 0;
 }
@@ -1221,7 +1215,9 @@ void reiserfs_cache_bitmap_metadata(struct super_block *sb,
        unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size);
        /* The first bit must ALWAYS be 1 */
-        BUG_ON(!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data));
+        if (!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data))
+                reiserfs_error(sb, "reiserfs-2025", "bitmap block %lu is "
+                               "corrupted: first bit must be 1", bh->b_blocknr);
        info->free_count = 0;
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index e6b03d2020c1..67a80d7e59e2 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -41,10 +41,10 @@ static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
 #define store_ih(where,what) copy_item_head (where, what)
-//
+int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
-static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
+                           filldir_t filldir, loff_t *pos)
 {
-        struct inode *inode = filp->f_path.dentry->d_inode;
+        struct inode *inode = dentry->d_inode;
        struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */
        INITIALIZE_PATH(path_to_entry);
        struct buffer_head *bh;
@@ -64,13 +64,9 @@ static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
        /* form key for search the next directory entry using f_pos field of
           file structure */
-        make_cpu_key(&pos_key, inode,
+        make_cpu_key(&pos_key, inode, *pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3);
-                     (filp->f_pos) ? (filp->f_pos) : DOT_OFFSET, TYPE_DIRENTRY,
-                     3);
        next_pos = cpu_key_k_offset(&pos_key);
-        /*  reiserfs_warning (inode->i_sb, "reiserfs_readdir 1: f_pos = %Ld", filp->f_pos); */
        path_to_entry.reada = PATH_READA;
        while (1) {
              research:
@@ -144,7 +140,7 @@ static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
                                /* Ignore the .reiserfs_priv entry */
                                if (reiserfs_xattrs(inode->i_sb) &&
                                    !old_format_only(inode->i_sb) &&
-                                    filp->f_path.dentry == inode->i_sb->s_root &&
+                                    dentry == inode->i_sb->s_root &&
                                    REISERFS_SB(inode->i_sb)->priv_root &&
                                    REISERFS_SB(inode->i_sb)->priv_root->d_inode
                                    && deh_objectid(deh) ==
@@ -156,7 +152,7 @@ static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
                                }
                                d_off = deh_offset(deh);
-                                filp->f_pos = d_off;
+                                *pos = d_off;
                                d_ino = deh_objectid(deh);
                                if (d_reclen <= 32) {
                                        local_buf = small_buf;
@@ -223,15 +219,21 @@ static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
        }                       /* while */
-      end:
+end:
-        filp->f_pos = next_pos;
+        *pos = next_pos;
        pathrelse(&path_to_entry);
        reiserfs_check_path(&path_to_entry);
-      out:
+out:
        reiserfs_write_unlock(inode->i_sb);
        return ret;
 }
+static int reiserfs_readdir(struct file *file, void *dirent, filldir_t filldir)
+{
+        struct dentry *dentry = file->f_path.dentry;
+        return reiserfs_readdir_dentry(dentry, dirent, filldir, &file->f_pos);
+}
 /* compose directory item containing "." and ".." entries (entries are
   not aligned to 4 byte boundary) */
 /* the last four params are LE */
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 2f87f5b14630..4beb964a2a3e 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -29,6 +29,43 @@ struct tree_balance *cur_tb = NULL;	/* detects whether more than one
                                           is interrupting do_balance */
 #endif
+static inline void buffer_info_init_left(struct tree_balance *tb,
+                                         struct buffer_info *bi)
+{
+        bi->tb          = tb;
+        bi->bi_bh       = tb->L[0];
+        bi->bi_parent   = tb->FL[0];
+        bi->bi_position = get_left_neighbor_position(tb, 0);
+}
+static inline void buffer_info_init_right(struct tree_balance *tb,
+                                          struct buffer_info *bi)
+{
+        bi->tb          = tb;
+        bi->bi_bh       = tb->R[0];
+        bi->bi_parent   = tb->FR[0];
+        bi->bi_position = get_right_neighbor_position(tb, 0);
+}
+static inline void buffer_info_init_tbS0(struct tree_balance *tb,
+                                         struct buffer_info *bi)
+{
+        bi->tb          = tb;
+        bi->bi_bh        = PATH_PLAST_BUFFER(tb->tb_path);
+        bi->bi_parent   = PATH_H_PPARENT(tb->tb_path, 0);
+        bi->bi_position = PATH_H_POSITION(tb->tb_path, 1);
+}
+static inline void buffer_info_init_bh(struct tree_balance *tb,
+                                       struct buffer_info *bi,
+                                       struct buffer_head *bh)
+{
+        bi->tb          = tb;
+        bi->bi_bh       = bh;
+        bi->bi_parent   = NULL;
+        bi->bi_position = 0;
+}
 inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
                                       struct buffer_head *bh, int flag)
 {
@@ -39,21 +76,21 @@ inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
-/* summary: 
+/* summary:
 if deleting something ( tb->insert_size[0] < 0 )
   return(balance_leaf_when_delete()); (flag d handled here)
 else
   if lnum is larger than 0 we put items into the left node
   if rnum is larger than 0 we put items into the right node
   if snum1 is larger than 0 we put items into the new node s1
-   if snum2 is larger than 0 we put items into the new node s2 
+   if snum2 is larger than 0 we put items into the new node s2
 Note that all *num* count new items being created.
 It would be easier to read balance_leaf() if each of these summary
 lines was a separate procedure rather than being inlined.  I think
 that there are many passages here and in balance_leaf_when_delete() in
 which two calls to one procedure can replace two passages, and it
-might save cache space and improve software maintenance costs to do so.  
+might save cache space and improve software maintenance costs to do so.
 Vladimir made the perceptive comment that we should offload most of
 the decision making in this function into fix_nodes/check_balance, and
@@ -86,6 +123,7 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
               "PAP-12010: tree can not be empty");
        ih = B_N_PITEM_HEAD(tbS0, item_pos);
+        buffer_info_init_tbS0(tb, &bi);
        /* Delete or truncate the item */
@@ -96,10 +134,6 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
                       "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
                       -tb->insert_size[0], ih);
-                bi.tb = tb;
-                bi.bi_bh = tbS0;
-                bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
-                bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
                leaf_delete_items(&bi, 0, item_pos, 1, -1);
                if (!item_pos && tb->CFL[0]) {
@@ -121,10 +155,6 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
                break;
        case M_CUT:{            /* cut item in S[0] */
-                        bi.tb = tb;
-                        bi.bi_bh = tbS0;
-                        bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
-                        bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
                        if (is_direntry_le_ih(ih)) {
                                /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
@@ -153,8 +183,8 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
        default:
                print_cur_tb("12040");
-                reiserfs_panic(tb->tb_sb,
+                reiserfs_panic(tb->tb_sb, "PAP-12040",
-                               "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)",
+                               "unexpected mode: %s(%d)",
                               (flag ==
                                M_PASTE) ? "PASTE" : ((flag ==
                                                       M_INSERT) ? "INSERT" :
@@ -258,15 +288,15 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
    )
 {
        struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
-        int item_pos = PATH_LAST_POSITION(tb->tb_path); /*  index into the array of item headers in S[0] 
+        int item_pos = PATH_LAST_POSITION(tb->tb_path); /*  index into the array of item headers in S[0]
                                                           of the affected item */
        struct buffer_info bi;
        struct buffer_head *S_new[2];   /* new nodes allocated to hold what could not fit into S */
        int snum[2];            /* number of items that will be placed
                                   into S_new (includes partially shifted
                                   items) */
-        int sbytes[2];          /* if an item is partially shifted into S_new then 
+        int sbytes[2];          /* if an item is partially shifted into S_new then
-                                   if it is a directory item 
+                                   if it is a directory item
                                   it is the number of entries from the item that are shifted into S_new
                                   else
                                   it is the number of bytes from the item that are shifted into S_new
@@ -325,11 +355,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                               ih_item_len(ih));
                                        /* Insert new item into L[0] */
-                                        bi.tb = tb;
+                                        buffer_info_init_left(tb, &bi);
-                                        bi.bi_bh = tb->L[0];
-                                        bi.bi_parent = tb->FL[0];
-                                        bi.bi_position =
-                                            get_left_neighbor_position(tb, 0);
                                        leaf_insert_into_buf(&bi,
                                                             n + item_pos -
                                                             ret_val, ih, body,
@@ -369,11 +395,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                            leaf_shift_left(tb, tb->lnum[0] - 1,
                                                            tb->lbytes);
                                        /* Insert new item into L[0] */
-                                        bi.tb = tb;
+                                        buffer_info_init_left(tb, &bi);
-                                        bi.bi_bh = tb->L[0];
-                                        bi.bi_parent = tb->FL[0];
-                                        bi.bi_position =
-                                            get_left_neighbor_position(tb, 0);
                                        leaf_insert_into_buf(&bi,
                                                             n + item_pos -
                                                             ret_val, ih, body,
@@ -429,13 +451,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                                        }
                                                        /* Append given directory entry to directory item */
-                                                        bi.tb = tb;
+                                                        buffer_info_init_left(tb, &bi);
-                                                        bi.bi_bh = tb->L[0];
-                                                        bi.bi_parent =
-                                                            tb->FL[0];
-                                                        bi.bi_position =
-                                                            get_left_neighbor_position
-                                                            (tb, 0);
                                                        leaf_paste_in_buffer
                                                            (&bi,
                                                             n + item_pos -
@@ -449,8 +465,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                                        /* when we have merge directory item, pos_in_item has been changed too */
                                                        /* paste new directory entry. 1 is entry number */
-                                                        leaf_paste_entries(bi.
+                                                        leaf_paste_entries(&bi,
-                                                                           bi_bh,
                                                                           n +
                                                                           item_pos
                                                                           -
@@ -524,13 +539,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                                                             (tbS0,
                                                                              item_pos)));
                                                        /* Append to body of item in L[0] */
-                                                        bi.tb = tb;
+                                                        buffer_info_init_left(tb, &bi);
-                                                        bi.bi_bh = tb->L[0];
-                                                        bi.bi_parent =
-                                                            tb->FL[0];
-                                                        bi.bi_position =
-                                                            get_left_neighbor_position
-                                                            (tb, 0);
                                                        leaf_paste_in_buffer
                                                            (&bi,
                                                             n + item_pos -
@@ -681,11 +690,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                            leaf_shift_left(tb, tb->lnum[0],
                                                            tb->lbytes);
                                        /* Append to body of item in L[0] */
-                                        bi.tb = tb;
+                                        buffer_info_init_left(tb, &bi);
-                                        bi.bi_bh = tb->L[0];
-                                        bi.bi_parent = tb->FL[0];
-                                        bi.bi_position =
-                                            get_left_neighbor_position(tb, 0);
                                        leaf_paste_in_buffer(&bi,
                                                             n + item_pos -
                                                             ret_val,
@@ -699,7 +704,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                                           n + item_pos -
                                                           ret_val);
                                        if (is_direntry_le_ih(pasted))
-                                                leaf_paste_entries(bi.bi_bh,
+                                                leaf_paste_entries(&bi,
                                                                   n +
                                                                   item_pos -
                                                                   ret_val,
@@ -722,8 +727,9 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                }
                                break;
                        default:        /* cases d and t */
-                                reiserfs_panic(tb->tb_sb,
+                                reiserfs_panic(tb->tb_sb, "PAP-12130",
-                                               "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)",
+                                               "lnum > 0: unexpected mode: "
+                                               " %s(%d)",
                                               (flag ==
                                                M_DELETE) ? "DELETE" : ((flag ==
                                                                         M_CUT)
@@ -776,11 +782,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                        set_le_ih_k_offset(ih, offset);
                                        put_ih_item_len(ih, tb->rbytes);
                                        /* Insert part of the item into R[0] */
-                                        bi.tb = tb;
+                                        buffer_info_init_right(tb, &bi);
-                                        bi.bi_bh = tb->R[0];
-                                        bi.bi_parent = tb->FR[0];
-                                        bi.bi_position =
-                                            get_right_neighbor_position(tb, 0);
                                        if ((old_len - tb->rbytes) > zeros_num) {
                                                r_zeros_number = 0;
                                                r_body =
@@ -817,11 +819,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                                             tb->rnum[0] - 1,
                                                             tb->rbytes);
                                        /* Insert new item into R[0] */
-                                        bi.tb = tb;
+                                        buffer_info_init_right(tb, &bi);
-                                        bi.bi_bh = tb->R[0];
-                                        bi.bi_parent = tb->FR[0];
-                                        bi.bi_position =
-                                            get_right_neighbor_position(tb, 0);
                                        leaf_insert_into_buf(&bi,
                                                             item_pos - n +
                                                             tb->rnum[0] - 1,
@@ -881,21 +879,14 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                                            pos_in_item -
                                                            entry_count +
                                                            tb->rbytes - 1;
-                                                        bi.tb = tb;
+                                                        buffer_info_init_right(tb, &bi);
-                                                        bi.bi_bh = tb->R[0];
-                                                        bi.bi_parent =
-                                                            tb->FR[0];
-                                                        bi.bi_position =
-                                                            get_right_neighbor_position
-                                                            (tb, 0);
                                                        leaf_paste_in_buffer
                                                            (&bi, 0,
                                                             paste_entry_position,
                                                             tb->insert_size[0],
                                                             body, zeros_num);
                                                        /* paste entry */
-                                                        leaf_paste_entries(bi.
+                                                        leaf_paste_entries(&bi,
-                                                                           bi_bh,
                                                                           0,
                                                                           paste_entry_position,
                                                                           1,
@@ -1019,12 +1010,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                                    (tb, tb->CFR[0], 0);
                                                /* Append part of body into R[0] */
-                                                bi.tb = tb;
+                                                buffer_info_init_right(tb, &bi);
-                                                bi.bi_bh = tb->R[0];
-                                                bi.bi_parent = tb->FR[0];
-                                                bi.bi_position =
-                                                    get_right_neighbor_position
-                                                    (tb, 0);
                                                if (n_rem > zeros_num) {
                                                        r_zeros_number = 0;
                                                        r_body =
@@ -1071,12 +1057,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                                             tb->rbytes);
                                        /* append item in R[0] */
                                        if (pos_in_item >= 0) {
-                                                bi.tb = tb;
+                                                buffer_info_init_right(tb, &bi);
-                                                bi.bi_bh = tb->R[0];
-                                                bi.bi_parent = tb->FR[0];
-                                                bi.bi_position =
-                                                    get_right_neighbor_position
-                                                    (tb, 0);
                                                leaf_paste_in_buffer(&bi,
                                                                     item_pos -
                                                                     n +
@@ -1096,7 +1077,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                                           tb->rnum[0]);
                                        if (is_direntry_le_ih(pasted)
                                            && pos_in_item >= 0) {
-                                                leaf_paste_entries(bi.bi_bh,
+                                                leaf_paste_entries(&bi,
                                                                   item_pos -
                                                                   n +
                                                                   tb->rnum[0],
@@ -1136,8 +1117,8 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                        }
                        break;
                default:        /* cases d and t */
-                        reiserfs_panic(tb->tb_sb,
+                        reiserfs_panic(tb->tb_sb, "PAP-12175",
-                                       "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)",
+                                       "rnum > 0: unexpected mode: %s(%d)",
                                       (flag ==
                                        M_DELETE) ? "DELETE" : ((flag ==
                                                                 M_CUT) ? "CUT"
@@ -1167,8 +1148,8 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                   not set correctly */
                if (tb->CFL[0]) {
                        if (!tb->CFR[0])
-                                reiserfs_panic(tb->tb_sb,
+                                reiserfs_panic(tb->tb_sb, "vs-12195",
-                                               "vs-12195: balance_leaf: CFR not initialized");
+                                               "CFR not initialized");
                        copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
                                 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]));
                        do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
@@ -1232,10 +1213,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                        put_ih_item_len(ih, sbytes[i]);
                                        /* Insert part of the item into S_new[i] before 0-th item */
-                                        bi.tb = tb;
+                                        buffer_info_init_bh(tb, &bi, S_new[i]);
-                                        bi.bi_bh = S_new[i];
-                                        bi.bi_parent = NULL;
-                                        bi.bi_position = 0;
                                        if ((old_len - sbytes[i]) > zeros_num) {
                                                r_zeros_number = 0;
@@ -1267,10 +1245,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                                        S_new[i]);
                                        /* Insert new item into S_new[i] */
-                                        bi.tb = tb;
+                                        buffer_info_init_bh(tb, &bi, S_new[i]);
-                                        bi.bi_bh = S_new[i];
-                                        bi.bi_parent = NULL;
-                                        bi.bi_position = 0;
                                        leaf_insert_into_buf(&bi,
                                                             item_pos - n +
                                                             snum[i] - 1, ih,
@@ -1327,10 +1302,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                                             sbytes[i] - 1,
                                                             S_new[i]);
                                                        /* Paste given directory entry to directory item */
-                                                        bi.tb = tb;
+                                                        buffer_info_init_bh(tb, &bi, S_new[i]);
-                                                        bi.bi_bh = S_new[i];
-                                                        bi.bi_parent = NULL;
-                                                        bi.bi_position = 0;
                                                        leaf_paste_in_buffer
                                                            (&bi, 0,
                                                             pos_in_item -
@@ -1339,8 +1311,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                                             tb->insert_size[0],
                                                             body, zeros_num);
                                                        /* paste new directory entry */
-                                                        leaf_paste_entries(bi.
+                                                        leaf_paste_entries(&bi,
-                                                                           bi_bh,
                                                                           0,
                                                                           pos_in_item
                                                                           -
@@ -1401,11 +1372,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                                if (n_rem < 0)
                                                        n_rem = 0;
                                                /* Append part of body into S_new[0] */
-                                                bi.tb = tb;
+                                                buffer_info_init_bh(tb, &bi, S_new[i]);
-                                                bi.bi_bh = S_new[i];
-                                                bi.bi_parent = NULL;
-                                                bi.bi_position = 0;
                                                if (n_rem > zeros_num) {
                                                        r_zeros_number = 0;
                                                        r_body =
@@ -1475,7 +1442,10 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                            && (pos_in_item != ih_item_len(ih_check)
                                                || tb->insert_size[0] <= 0))
                                                reiserfs_panic(tb->tb_sb,
-                                                               "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len");
+                                                             "PAP-12235",
+                                                             "pos_in_item "
+                                                             "must be equal "
+                                                             "to ih_item_len");
 #endif                          /* CONFIG_REISERFS_CHECK */
                                        leaf_mi =
@@ -1489,10 +1459,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                               leaf_mi);
                                        /* paste into item */
-                                        bi.tb = tb;
+                                        buffer_info_init_bh(tb, &bi, S_new[i]);
-                                        bi.bi_bh = S_new[i];
-                                        bi.bi_parent = NULL;
-                                        bi.bi_position = 0;
                                        leaf_paste_in_buffer(&bi,
                                                             item_pos - n +
                                                             snum[i],
@@ -1505,7 +1472,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                                           item_pos - n +
                                                           snum[i]);
                                        if (is_direntry_le_ih(pasted)) {
-                                                leaf_paste_entries(bi.bi_bh,
+                                                leaf_paste_entries(&bi,
                                                                   item_pos -
                                                                   n + snum[i],
                                                                   pos_in_item,
@@ -1535,8 +1502,8 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                        }
                        break;
                default:        /* cases d and t */
-                        reiserfs_panic(tb->tb_sb,
+                        reiserfs_panic(tb->tb_sb, "PAP-12245",
-                                       "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)",
+                                       "blknum > 2: unexpected mode: %s(%d)",
                                       (flag ==
                                        M_DELETE) ? "DELETE" : ((flag ==
                                                                 M_CUT) ? "CUT"
@@ -1559,10 +1526,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                switch (flag) {
                case M_INSERT:  /* insert item into S[0] */
-                        bi.tb = tb;
+                        buffer_info_init_tbS0(tb, &bi);
-                        bi.bi_bh = tbS0;
-                        bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
-                        bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
                        leaf_insert_into_buf(&bi, item_pos, ih, body,
                                             zeros_num);
@@ -1589,14 +1553,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                                       "PAP-12260: insert_size is 0 already");
                                                /* prepare space */
-                                                bi.tb = tb;
+                                                buffer_info_init_tbS0(tb, &bi);
-                                                bi.bi_bh = tbS0;
-                                                bi.bi_parent =
-                                                    PATH_H_PPARENT(tb->tb_path,
-                                                                   0);
-                                                bi.bi_position =
-                                                    PATH_H_POSITION(tb->tb_path,
-                                                                    1);
                                                leaf_paste_in_buffer(&bi,
                                                                     item_pos,
                                                                     pos_in_item,
@@ -1606,7 +1563,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                                                     zeros_num);
                                                /* paste entry */
-                                                leaf_paste_entries(bi.bi_bh,
+                                                leaf_paste_entries(&bi,
                                                                   item_pos,
                                                                   pos_in_item,
                                                                   1,
@@ -1644,14 +1601,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                                RFALSE(tb->insert_size[0] <= 0,
                                                       "PAP-12275: insert size must not be %d",
                                                       tb->insert_size[0]);
-                                                bi.tb = tb;
+                                                buffer_info_init_tbS0(tb, &bi);
-                                                bi.bi_bh = tbS0;
-                                                bi.bi_parent =
-                                                    PATH_H_PPARENT(tb->tb_path,
-                                                                   0);
-                                                bi.bi_position =
-                                                    PATH_H_POSITION(tb->tb_path,
-                                                                    1);
                                                leaf_paste_in_buffer(&bi,
                                                                     item_pos,
                                                                     pos_in_item,
@@ -1681,10 +1631,11 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                                        print_cur_tb("12285");
                                                        reiserfs_panic(tb->
                                                                       tb_sb,
-                                                                       "PAP-12285: balance_leaf: insert_size must be 0 (%d)",
+                                                            "PAP-12285",
-                                                                       tb->
+                                                            "insert_size "
-                                                                       insert_size
+                                                            "must be 0 "
-                                                                       [0]);
+                                                            "(%d)",
+                                                            tb->insert_size[0]);
                                                }
                                        }
 #endif                          /* CONFIG_REISERFS_CHECK */
@@ -1697,11 +1648,10 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
        if (flag == M_PASTE && tb->insert_size[0]) {
                print_cur_tb("12290");
                reiserfs_panic(tb->tb_sb,
-                               "PAP-12290: balance_leaf: insert_size is still not 0 (%d)",
+                               "PAP-12290", "insert_size is still not 0 (%d)",
                               tb->insert_size[0]);
        }
 #endif                          /* CONFIG_REISERFS_CHECK */
        return 0;
 }                               /* Leaf level of the tree is balanced (end of balance_leaf) */
@@ -1724,7 +1674,6 @@ void make_empty_node(struct buffer_info *bi)
 struct buffer_head *get_FEB(struct tree_balance *tb)
 {
        int i;
-        struct buffer_head *first_b;
        struct buffer_info bi;
        for (i = 0; i < MAX_FEB_SIZE; i++)
@@ -1732,19 +1681,15 @@ struct buffer_head *get_FEB(struct tree_balance *tb)
                        break;
        if (i == MAX_FEB_SIZE)
-                reiserfs_panic(tb->tb_sb,
+                reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty");
-                               "vs-12300: get_FEB: FEB list is empty");
-        bi.tb = tb;
+        buffer_info_init_bh(tb, &bi, tb->FEB[i]);
-        bi.bi_bh = first_b = tb->FEB[i];
-        bi.bi_parent = NULL;
-        bi.bi_position = 0;
        make_empty_node(&bi);
-        set_buffer_uptodate(first_b);
+        set_buffer_uptodate(tb->FEB[i]);
+        tb->used[i] = tb->FEB[i];
        tb->FEB[i] = NULL;
-        tb->used[i] = first_b;
-        return (first_b);
+        return tb->used[i];
 }
 /* This is now used because reiserfs_free_block has to be able to
@@ -1755,15 +1700,16 @@ static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
        int i;
        if (buffer_dirty(bh))
-                reiserfs_warning(tb->tb_sb,
+                reiserfs_warning(tb->tb_sb, "reiserfs-12320",
-                                 "store_thrown deals with dirty buffer");
+                                 "called with dirty buffer");
        for (i = 0; i < ARRAY_SIZE(tb->thrown); i++)
                if (!tb->thrown[i]) {
                        tb->thrown[i] = bh;
                        get_bh(bh);     /* free_thrown puts this */
                        return;
                }
-        reiserfs_warning(tb->tb_sb, "store_thrown: too many thrown buffers");
+        reiserfs_warning(tb->tb_sb, "reiserfs-12321",
+                         "too many thrown buffers");
 }
 static void free_thrown(struct tree_balance *tb)
@@ -1774,8 +1720,8 @@ static void free_thrown(struct tree_balance *tb)
                if (tb->thrown[i]) {
                        blocknr = tb->thrown[i]->b_blocknr;
                        if (buffer_dirty(tb->thrown[i]))
-                                reiserfs_warning(tb->tb_sb,
+                                reiserfs_warning(tb->tb_sb, "reiserfs-12322",
-                                                 "free_thrown deals with dirty buffer %d",
+                                                 "called with dirty buffer %d",
                                                 blocknr);
                        brelse(tb->thrown[i]);  /* incremented in store_thrown */
                        reiserfs_free_block(tb->transaction_handle, NULL,
@@ -1873,20 +1819,19 @@ static void check_internal_node(struct super_block *s, struct buffer_head *bh,
        for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
                if (!is_reusable(s, dc_block_number(dc), 1)) {
                        print_cur_tb(mes);
-                        reiserfs_panic(s,
+                        reiserfs_panic(s, "PAP-12338",
-                                       "PAP-12338: check_internal_node: invalid child pointer %y in %b",
+                                       "invalid child pointer %y in %b",
                                       dc, bh);
                }
        }
 }
-static int locked_or_not_in_tree(struct buffer_head *bh, char *which)
+static int locked_or_not_in_tree(struct tree_balance *tb,
+                                  struct buffer_head *bh, char *which)
 {
        if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
            !B_IS_IN_TREE(bh)) {
-                reiserfs_warning(NULL,
+                reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh);
-                                 "vs-12339: locked_or_not_in_tree: %s (%b)",
-                                 which, bh);
                return 1;
        }
        return 0;
@@ -1897,26 +1842,28 @@ static int check_before_balancing(struct tree_balance *tb)
        int retval = 0;
        if (cur_tb) {
-                reiserfs_panic(tb->tb_sb, "vs-12335: check_before_balancing: "
+                reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
-                               "suspect that schedule occurred based on cur_tb not being null at this point in code. "
+                               "occurred based on cur_tb not being null at "
-                               "do_balance cannot properly handle schedule occurring while it runs.");
+                               "this point in code. do_balance cannot properly "
+                               "handle schedule occurring while it runs.");
        }
        /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
           prepped all of these for us). */
        if (tb->lnum[0]) {
-                retval |= locked_or_not_in_tree(tb->L[0], "L[0]");
+                retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]");
-                retval |= locked_or_not_in_tree(tb->FL[0], "FL[0]");
+                retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]");
-                retval |= locked_or_not_in_tree(tb->CFL[0], "CFL[0]");
+                retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]");
                check_leaf(tb->L[0]);
        }
        if (tb->rnum[0]) {
-                retval |= locked_or_not_in_tree(tb->R[0], "R[0]");
+                retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]");
-                retval |= locked_or_not_in_tree(tb->FR[0], "FR[0]");
+                retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]");
-                retval |= locked_or_not_in_tree(tb->CFR[0], "CFR[0]");
+                retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]");
                check_leaf(tb->R[0]);
        }
-        retval |= locked_or_not_in_tree(PATH_PLAST_BUFFER(tb->tb_path), "S[0]");
+        retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path),
+                                        "S[0]");
        check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
        return retval;
@@ -1930,8 +1877,8 @@ static void check_after_balance_leaf(struct tree_balance *tb)
                    dc_size(B_N_CHILD
                            (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
                        print_cur_tb("12221");
-                        reiserfs_panic(tb->tb_sb,
+                        reiserfs_panic(tb->tb_sb, "PAP-12355",
-                                       "PAP-12355: check_after_balance_leaf: shift to left was incorrect");
+                                       "shift to left was incorrect");
                }
        }
        if (tb->rnum[0]) {
@@ -1940,8 +1887,8 @@ static void check_after_balance_leaf(struct tree_balance *tb)
                    dc_size(B_N_CHILD
                            (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
                        print_cur_tb("12222");
-                        reiserfs_panic(tb->tb_sb,
+                        reiserfs_panic(tb->tb_sb, "PAP-12360",
-                                       "PAP-12360: check_after_balance_leaf: shift to right was incorrect");
+                                       "shift to right was incorrect");
                }
        }
        if (PATH_H_PBUFFER(tb->tb_path, 1) &&
@@ -1955,7 +1902,7 @@ static void check_after_balance_leaf(struct tree_balance *tb)
                                               PATH_H_POSITION(tb->tb_path,
                                                               1))));
                print_cur_tb("12223");
-                reiserfs_warning(tb->tb_sb,
+                reiserfs_warning(tb->tb_sb, "reiserfs-12363",
                                 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
                                 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
                                 left,
@@ -1966,8 +1913,7 @@ static void check_after_balance_leaf(struct tree_balance *tb)
                                         (PATH_H_PBUFFER(tb->tb_path, 1),
                                          PATH_H_POSITION(tb->tb_path, 1))),
                                 right);
-                reiserfs_panic(tb->tb_sb,
+                reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect");
-                               "PAP-12365: check_after_balance_leaf: S is incorrect");
        }
 }
@@ -2037,7 +1983,7 @@ static inline void do_balance_starts(struct tree_balance *tb)
        /* store_print_tb (tb); */
        /* do not delete, just comment it out */
-/*    print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, 
+/*    print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
             "check");*/
        RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
 #ifdef CONFIG_REISERFS_CHECK
@@ -2102,14 +2048,13 @@ void do_balance(struct tree_balance *tb,	/* tree_balance structure */
        tb->need_balance_dirty = 0;
        if (FILESYSTEM_CHANGED_TB(tb)) {
-                reiserfs_panic(tb->tb_sb,
+                reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has "
-                               "clm-6000: do_balance, fs generation has changed\n");
+                               "changed");
        }
        /* if we have no real work to do  */
        if (!tb->insert_size[0]) {
-                reiserfs_warning(tb->tb_sb,
+                reiserfs_warning(tb->tb_sb, "PAP-12350",
-                                 "PAP-12350: do_balance: insert_size == 0, mode == %c",
+                                 "insert_size == 0, mode == %c", flag);
-                                 flag);
                unfix_nodes(tb);
                return;
        }
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 33408417038c..9f436668b7f8 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -20,14 +20,14 @@
 ** insertion/balancing, for files that are written in one write.
 ** It avoids unnecessary tail packings (balances) for files that are written in
 ** multiple writes and are small enough to have tails.
-** 
+**
 ** file_release is called by the VFS layer when the file is closed.  If
 ** this is the last open file descriptor, and the file
 ** small enough to have a tail, and the tail is currently in an
 ** unformatted node, the tail is converted back into a direct item.
-** 
+**
 ** We use reiserfs_truncate_file to pack the tail, since it already has
-** all the conditions coded.  
+** all the conditions coded.
 */
 static int reiserfs_file_release(struct inode *inode, struct file *filp)
 {
@@ -76,7 +76,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
                         * and let the admin know what is going on.
                         */
                        igrab(inode);
-                        reiserfs_warning(inode->i_sb,
+                        reiserfs_warning(inode->i_sb, "clm-9001",
                                         "pinning inode %lu because the "
                                         "preallocation can't be freed",
                                         inode->i_ino);
@@ -134,23 +134,23 @@ static void reiserfs_vfs_truncate_file(struct inode *inode)
 * be removed...
 */
-static int reiserfs_sync_file(struct file *p_s_filp,
+static int reiserfs_sync_file(struct file *filp,
-                              struct dentry *p_s_dentry, int datasync)
+                              struct dentry *dentry, int datasync)
 {
-        struct inode *p_s_inode = p_s_dentry->d_inode;
+        struct inode *inode = dentry->d_inode;
-        int n_err;
+        int err;
        int barrier_done;
-        BUG_ON(!S_ISREG(p_s_inode->i_mode));
+        BUG_ON(!S_ISREG(inode->i_mode));
-        n_err = sync_mapping_buffers(p_s_inode->i_mapping);
+        err = sync_mapping_buffers(inode->i_mapping);
-        reiserfs_write_lock(p_s_inode->i_sb);
+        reiserfs_write_lock(inode->i_sb);
-        barrier_done = reiserfs_commit_for_inode(p_s_inode);
+        barrier_done = reiserfs_commit_for_inode(inode);
-        reiserfs_write_unlock(p_s_inode->i_sb);
+        reiserfs_write_unlock(inode->i_sb);
-        if (barrier_done != 1 && reiserfs_barrier_flush(p_s_inode->i_sb))
+        if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
-                blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL);
+                blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
        if (barrier_done < 0)
                return barrier_done;
-        return (n_err < 0) ? -EIO : 0;
+        return (err < 0) ? -EIO : 0;
 }
 /* taken fs/buffer.c:__block_commit_write */
@@ -223,7 +223,7 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
 }
 /* Write @count bytes at position @ppos in a file indicated by @file
-   from the buffer @buf.  
+   from the buffer @buf.
   generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want
   something simple that works.  It is not for serious use by general purpose filesystems, excepting the one that it was
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 07d05e0842b7..5e5a4e6fbaf8 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -30,8 +30,8 @@
 ** get_direct_parent
 ** get_neighbors
 ** fix_nodes
- ** 
+ **
- ** 
+ **
 **/
 #include <linux/time.h>
@@ -135,8 +135,7 @@ static void create_virtual_node(struct tree_balance *tb, int h)
                vn->vn_free_ptr +=
                    op_create_vi(vn, vi, is_affected, tb->insert_size[0]);
                if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
-                        reiserfs_panic(tb->tb_sb,
+                        reiserfs_panic(tb->tb_sb, "vs-8030",
-                                       "vs-8030: create_virtual_node: "
                                       "virtual node space consumed");
                if (!is_affected)
@@ -186,8 +185,9 @@ static void create_virtual_node(struct tree_balance *tb, int h)
                             && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) {
                                /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */
                                print_block(Sh, 0, -1, -1);
-                                reiserfs_panic(tb->tb_sb,
+                                reiserfs_panic(tb->tb_sb, "vs-8045",
-                                               "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c",
+                                               "rdkey %k, affected item==%d "
+                                               "(mode==%c) Must be %c",
                                               key, vn->vn_affected_item_num,
                                               vn->vn_mode, M_DELETE);
                        }
@@ -377,9 +377,9 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
        int needed_nodes;
        int start_item,         /* position of item we start filling node from */
         end_item,              /* position of item we finish filling node by */
-         start_bytes,           /* number of first bytes (entries for directory) of start_item-th item 
+         start_bytes,           /* number of first bytes (entries for directory) of start_item-th item
                                   we do not include into node that is being filled */
-         end_bytes;             /* number of last bytes (entries for directory) of end_item-th item 
+         end_bytes;             /* number of last bytes (entries for directory) of end_item-th item
                                   we do node include into node that is being filled */
        int split_item_positions[2];    /* these are positions in virtual item of
                                           items, that are split between S[0] and
@@ -496,8 +496,8 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
                snum012[needed_nodes - 1 + 3] = units;
                if (needed_nodes > 2)
-                        reiserfs_warning(tb->tb_sb, "vs-8111: get_num_ver: "
+                        reiserfs_warning(tb->tb_sb, "vs-8111",
-                                         "split_item_position is out of boundary");
+                                         "split_item_position is out of range");
                snum012[needed_nodes - 1]++;
                split_item_positions[needed_nodes - 1] = i;
                needed_nodes++;
@@ -533,8 +533,8 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
                if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY &&
                    vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT)
-                        reiserfs_warning(tb->tb_sb, "vs-8115: get_num_ver: not "
+                        reiserfs_warning(tb->tb_sb, "vs-8115",
-                                         "directory or indirect item");
+                                         "not directory or indirect item");
        }
        /* now we know S2bytes, calculate S1bytes */
@@ -569,7 +569,7 @@ extern struct tree_balance *cur_tb;
 /* Set parameters for balancing.
 * Performs write of results of analysis of balancing into structure tb,
- * where it will later be used by the functions that actually do the balancing. 
+ * where it will later be used by the functions that actually do the balancing.
 * Parameters:
 *      tb      tree_balance structure;
 *      h       current level of the node;
@@ -749,25 +749,26 @@ else \
                  -1, -1);\
 }
-static void free_buffers_in_tb(struct tree_balance *p_s_tb)
+static void free_buffers_in_tb(struct tree_balance *tb)
 {
-        int n_counter;
+        int i;
-        decrement_counters_in_path(p_s_tb->tb_path);
+        pathrelse(tb->tb_path);
-        for (n_counter = 0; n_counter < MAX_HEIGHT; n_counter++) {
+        for (i = 0; i < MAX_HEIGHT; i++) {
-                decrement_bcount(p_s_tb->L[n_counter]);
+                brelse(tb->L[i]);
-                p_s_tb->L[n_counter] = NULL;
+                brelse(tb->R[i]);
-                decrement_bcount(p_s_tb->R[n_counter]);
+                brelse(tb->FL[i]);
-                p_s_tb->R[n_counter] = NULL;
+                brelse(tb->FR[i]);
-                decrement_bcount(p_s_tb->FL[n_counter]);
+                brelse(tb->CFL[i]);
-                p_s_tb->FL[n_counter] = NULL;
+                brelse(tb->CFR[i]);
-                decrement_bcount(p_s_tb->FR[n_counter]);
-                p_s_tb->FR[n_counter] = NULL;
+                tb->L[i] = NULL;
-                decrement_bcount(p_s_tb->CFL[n_counter]);
+                tb->R[i] = NULL;
-                p_s_tb->CFL[n_counter] = NULL;
+                tb->FL[i] = NULL;
-                decrement_bcount(p_s_tb->CFR[n_counter]);
+                tb->FR[i] = NULL;
-                p_s_tb->CFR[n_counter] = NULL;
+                tb->CFL[i] = NULL;
+                tb->CFR[i] = NULL;
        }
 }
@@ -777,14 +778,14 @@ static void free_buffers_in_tb(struct tree_balance *p_s_tb)
 *              NO_DISK_SPACE - no disk space.
 */
 /* The function is NOT SCHEDULE-SAFE! */
-static int get_empty_nodes(struct tree_balance *p_s_tb, int n_h)
+static int get_empty_nodes(struct tree_balance *tb, int h)
 {
-        struct buffer_head *p_s_new_bh,
+        struct buffer_head *new_bh,
-            *p_s_Sh = PATH_H_PBUFFER(p_s_tb->tb_path, n_h);
+            *Sh = PATH_H_PBUFFER(tb->tb_path, h);
-        b_blocknr_t *p_n_blocknr, a_n_blocknrs[MAX_AMOUNT_NEEDED] = { 0, };
+        b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, };
-        int n_counter, n_number_of_freeblk, n_amount_needed,    /* number of needed empty blocks */
+        int counter, number_of_freeblk, amount_needed,  /* number of needed empty blocks */
-         n_retval = CARRY_ON;
+         retval = CARRY_ON;
-        struct super_block *p_s_sb = p_s_tb->tb_sb;
+        struct super_block *sb = tb->tb_sb;
        /* number_of_freeblk is the number of empty blocks which have been
           acquired for use by the balancing algorithm minus the number of
@@ -792,7 +793,7 @@ static int get_empty_nodes(struct tree_balance *p_s_tb, int n_h)
           number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs
           after empty blocks are acquired, and the balancing analysis is
           then restarted, amount_needed is the number needed by this level
-           (n_h) of the balancing analysis.
+           (h) of the balancing analysis.
           Note that for systems with many processes writing, it would be
           more layout optimal to calculate the total number needed by all
@@ -800,54 +801,54 @@ static int get_empty_nodes(struct tree_balance *p_s_tb, int n_h)
        /* Initiate number_of_freeblk to the amount acquired prior to the restart of
           the analysis or 0 if not restarted, then subtract the amount needed
-           by all of the levels of the tree below n_h. */
+           by all of the levels of the tree below h. */
-        /* blknum includes S[n_h], so we subtract 1 in this calculation */
+        /* blknum includes S[h], so we subtract 1 in this calculation */
-        for (n_counter = 0, n_number_of_freeblk = p_s_tb->cur_blknum;
+        for (counter = 0, number_of_freeblk = tb->cur_blknum;
-             n_counter < n_h; n_counter++)
+             counter < h; counter++)
-                n_number_of_freeblk -=
+                number_of_freeblk -=
-                    (p_s_tb->blknum[n_counter]) ? (p_s_tb->blknum[n_counter] -
+                    (tb->blknum[counter]) ? (tb->blknum[counter] -
                                                   1) : 0;
        /* Allocate missing empty blocks. */
-        /* if p_s_Sh == 0  then we are getting a new root */
+        /* if Sh == 0  then we are getting a new root */
-        n_amount_needed = (p_s_Sh) ? (p_s_tb->blknum[n_h] - 1) : 1;
+        amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1;
        /*  Amount_needed = the amount that we need more than the amount that we have. */
-        if (n_amount_needed > n_number_of_freeblk)
+        if (amount_needed > number_of_freeblk)
-                n_amount_needed -= n_number_of_freeblk;
+                amount_needed -= number_of_freeblk;
        else                    /* If we have enough already then there is nothing to do. */
                return CARRY_ON;
        /* No need to check quota - is not allocated for blocks used for formatted nodes */
-        if (reiserfs_new_form_blocknrs(p_s_tb, a_n_blocknrs,
+        if (reiserfs_new_form_blocknrs(tb, blocknrs,
-                                       n_amount_needed) == NO_DISK_SPACE)
+                                       amount_needed) == NO_DISK_SPACE)
                return NO_DISK_SPACE;
        /* for each blocknumber we just got, get a buffer and stick it on FEB */
-        for (p_n_blocknr = a_n_blocknrs, n_counter = 0;
+        for (blocknr = blocknrs, counter = 0;
-             n_counter < n_amount_needed; p_n_blocknr++, n_counter++) {
+             counter < amount_needed; blocknr++, counter++) {
-                RFALSE(!*p_n_blocknr,
+                RFALSE(!*blocknr,
                       "PAP-8135: reiserfs_new_blocknrs failed when got new blocks");
-                p_s_new_bh = sb_getblk(p_s_sb, *p_n_blocknr);
+                new_bh = sb_getblk(sb, *blocknr);
-                RFALSE(buffer_dirty(p_s_new_bh) ||
+                RFALSE(buffer_dirty(new_bh) ||
-                       buffer_journaled(p_s_new_bh) ||
+                       buffer_journaled(new_bh) ||
-                       buffer_journal_dirty(p_s_new_bh),
+                       buffer_journal_dirty(new_bh),
                       "PAP-8140: journlaled or dirty buffer %b for the new block",
-                       p_s_new_bh);
+                       new_bh);
                /* Put empty buffers into the array. */
-                RFALSE(p_s_tb->FEB[p_s_tb->cur_blknum],
+                RFALSE(tb->FEB[tb->cur_blknum],
                       "PAP-8141: busy slot for new buffer");
-                set_buffer_journal_new(p_s_new_bh);
+                set_buffer_journal_new(new_bh);
-                p_s_tb->FEB[p_s_tb->cur_blknum++] = p_s_new_bh;
+                tb->FEB[tb->cur_blknum++] = new_bh;
        }
-        if (n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB(p_s_tb))
+        if (retval == CARRY_ON && FILESYSTEM_CHANGED_TB(tb))
-                n_retval = REPEAT_SEARCH;
+                retval = REPEAT_SEARCH;
-        return n_retval;
+        return retval;
 }
 /* Get free space of the left neighbor, which is stored in the parent
@@ -895,35 +896,36 @@ static int get_rfree(struct tree_balance *tb, int h)
 }
 /* Check whether left neighbor is in memory. */
-static int is_left_neighbor_in_cache(struct tree_balance *p_s_tb, int n_h)
+static int is_left_neighbor_in_cache(struct tree_balance *tb, int h)
 {
-        struct buffer_head *p_s_father, *left;
+        struct buffer_head *father, *left;
-        struct super_block *p_s_sb = p_s_tb->tb_sb;
+        struct super_block *sb = tb->tb_sb;
-        b_blocknr_t n_left_neighbor_blocknr;
+        b_blocknr_t left_neighbor_blocknr;
-        int n_left_neighbor_position;
+        int left_neighbor_position;
-        if (!p_s_tb->FL[n_h])   /* Father of the left neighbor does not exist. */
+        /* Father of the left neighbor does not exist. */
+        if (!tb->FL[h])
                return 0;
        /* Calculate father of the node to be balanced. */
-        p_s_father = PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1);
+        father = PATH_H_PBUFFER(tb->tb_path, h + 1);
-        RFALSE(!p_s_father ||
+        RFALSE(!father ||
-               !B_IS_IN_TREE(p_s_father) ||
+               !B_IS_IN_TREE(father) ||
-               !B_IS_IN_TREE(p_s_tb->FL[n_h]) ||
+               !B_IS_IN_TREE(tb->FL[h]) ||
-               !buffer_uptodate(p_s_father) ||
+               !buffer_uptodate(father) ||
-               !buffer_uptodate(p_s_tb->FL[n_h]),
+               !buffer_uptodate(tb->FL[h]),
               "vs-8165: F[h] (%b) or FL[h] (%b) is invalid",
-               p_s_father, p_s_tb->FL[n_h]);
+               father, tb->FL[h]);
        /* Get position of the pointer to the left neighbor into the left father. */
-        n_left_neighbor_position = (p_s_father == p_s_tb->FL[n_h]) ?
+        left_neighbor_position = (father == tb->FL[h]) ?
-            p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb->FL[n_h]);
+            tb->lkey[h] : B_NR_ITEMS(tb->FL[h]);
        /* Get left neighbor block number. */
-        n_left_neighbor_blocknr =
+        left_neighbor_blocknr =
-            B_N_CHILD_NUM(p_s_tb->FL[n_h], n_left_neighbor_position);
+            B_N_CHILD_NUM(tb->FL[h], left_neighbor_position);
        /* Look for the left neighbor in the cache. */
-        if ((left = sb_find_get_block(p_s_sb, n_left_neighbor_blocknr))) {
+        if ((left = sb_find_get_block(sb, left_neighbor_blocknr))) {
                RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left),
                       "vs-8170: left neighbor (%b %z) is not in the tree",
@@ -938,10 +940,10 @@ static int is_left_neighbor_in_cache(struct tree_balance *p_s_tb, int n_h)
 #define LEFT_PARENTS  'l'
 #define RIGHT_PARENTS 'r'
-static void decrement_key(struct cpu_key *p_s_key)
+static void decrement_key(struct cpu_key *key)
 {
        // call item specific function for this key
-        item_ops[cpu_key_k_type(p_s_key)]->decrement_key(p_s_key);
+        item_ops[cpu_key_k_type(key)]->decrement_key(key);
 }
 /* Calculate far left/right parent of the left/right neighbor of the current node, that
@@ -952,77 +954,77 @@ static void decrement_key(struct cpu_key *p_s_key)
                SCHEDULE_OCCURRED - schedule occurred while the function worked;
 *              CARRY_ON         - schedule didn't occur while the function worked;
 */
-static int get_far_parent(struct tree_balance *p_s_tb,
+static int get_far_parent(struct tree_balance *tb,
-                          int n_h,
+                          int h,
-                          struct buffer_head **pp_s_father,
+                          struct buffer_head **pfather,
-                          struct buffer_head **pp_s_com_father, char c_lr_par)
+                          struct buffer_head **pcom_father, char c_lr_par)
 {
-        struct buffer_head *p_s_parent;
+        struct buffer_head *parent;
        INITIALIZE_PATH(s_path_to_neighbor_father);
-        struct treepath *p_s_path = p_s_tb->tb_path;
+        struct treepath *path = tb->tb_path;
        struct cpu_key s_lr_father_key;
-        int n_counter,
+        int counter,
-            n_position = INT_MAX,
+            position = INT_MAX,
-            n_first_last_position = 0,
+            first_last_position = 0,
-            n_path_offset = PATH_H_PATH_OFFSET(p_s_path, n_h);
+            path_offset = PATH_H_PATH_OFFSET(path, h);
-        /* Starting from F[n_h] go upwards in the tree, and look for the common
+        /* Starting from F[h] go upwards in the tree, and look for the common
-           ancestor of F[n_h], and its neighbor l/r, that should be obtained. */
+           ancestor of F[h], and its neighbor l/r, that should be obtained. */
-        n_counter = n_path_offset;
+        counter = path_offset;
-        RFALSE(n_counter < FIRST_PATH_ELEMENT_OFFSET,
+        RFALSE(counter < FIRST_PATH_ELEMENT_OFFSET,
               "PAP-8180: invalid path length");
-        for (; n_counter > FIRST_PATH_ELEMENT_OFFSET; n_counter--) {
+        for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) {
                /* Check whether parent of the current buffer in the path is really parent in the tree. */
                if (!B_IS_IN_TREE
-                    (p_s_parent = PATH_OFFSET_PBUFFER(p_s_path, n_counter - 1)))
+                    (parent = PATH_OFFSET_PBUFFER(path, counter - 1)))
                        return REPEAT_SEARCH;
                /* Check whether position in the parent is correct. */
-                if ((n_position =
+                if ((position =
-                     PATH_OFFSET_POSITION(p_s_path,
+                     PATH_OFFSET_POSITION(path,
-                                          n_counter - 1)) >
+                                          counter - 1)) >
-                    B_NR_ITEMS(p_s_parent))
+                    B_NR_ITEMS(parent))
                        return REPEAT_SEARCH;
                /* Check whether parent at the path really points to the child. */
-                if (B_N_CHILD_NUM(p_s_parent, n_position) !=
+                if (B_N_CHILD_NUM(parent, position) !=
-                    PATH_OFFSET_PBUFFER(p_s_path, n_counter)->b_blocknr)
+                    PATH_OFFSET_PBUFFER(path, counter)->b_blocknr)
                        return REPEAT_SEARCH;
                /* Return delimiting key if position in the parent is not equal to first/last one. */
                if (c_lr_par == RIGHT_PARENTS)
-                        n_first_last_position = B_NR_ITEMS(p_s_parent);
+                        first_last_position = B_NR_ITEMS(parent);
-                if (n_position != n_first_last_position) {
+                if (position != first_last_position) {
-                        *pp_s_com_father = p_s_parent;
+                        *pcom_father = parent;
-                        get_bh(*pp_s_com_father);
+                        get_bh(*pcom_father);
-                        /*(*pp_s_com_father = p_s_parent)->b_count++; */
+                        /*(*pcom_father = parent)->b_count++; */
                        break;
                }
        }
        /* if we are in the root of the tree, then there is no common father */
-        if (n_counter == FIRST_PATH_ELEMENT_OFFSET) {
+        if (counter == FIRST_PATH_ELEMENT_OFFSET) {
                /* Check whether first buffer in the path is the root of the tree. */
                if (PATH_OFFSET_PBUFFER
-                    (p_s_tb->tb_path,
+                    (tb->tb_path,
                     FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
-                    SB_ROOT_BLOCK(p_s_tb->tb_sb)) {
+                    SB_ROOT_BLOCK(tb->tb_sb)) {
-                        *pp_s_father = *pp_s_com_father = NULL;
+                        *pfather = *pcom_father = NULL;
                        return CARRY_ON;
                }
                return REPEAT_SEARCH;
        }
-        RFALSE(B_LEVEL(*pp_s_com_father) <= DISK_LEAF_NODE_LEVEL,
+        RFALSE(B_LEVEL(*pcom_father) <= DISK_LEAF_NODE_LEVEL,
               "PAP-8185: (%b %z) level too small",
-               *pp_s_com_father, *pp_s_com_father);
+               *pcom_father, *pcom_father);
        /* Check whether the common parent is locked. */
-        if (buffer_locked(*pp_s_com_father)) {
+        if (buffer_locked(*pcom_father)) {
-                __wait_on_buffer(*pp_s_com_father);
+                __wait_on_buffer(*pcom_father);
-                if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
+                if (FILESYSTEM_CHANGED_TB(tb)) {
-                        decrement_bcount(*pp_s_com_father);
+                        brelse(*pcom_father);
                        return REPEAT_SEARCH;
                }
        }
@@ -1032,128 +1034,131 @@ static int get_far_parent(struct tree_balance *p_s_tb,
        /* Form key to get parent of the left/right neighbor. */
        le_key2cpu_key(&s_lr_father_key,
-                       B_N_PDELIM_KEY(*pp_s_com_father,
+                       B_N_PDELIM_KEY(*pcom_father,
                                      (c_lr_par ==
-                                       LEFT_PARENTS) ? (p_s_tb->lkey[n_h - 1] =
+                                       LEFT_PARENTS) ? (tb->lkey[h - 1] =
-                                                        n_position -
+                                                        position -
-                                                        1) : (p_s_tb->rkey[n_h -
+                                                        1) : (tb->rkey[h -
                                                                           1] =
-                                                              n_position)));
+                                                              position)));
        if (c_lr_par == LEFT_PARENTS)
                decrement_key(&s_lr_father_key);
        if (search_by_key
-            (p_s_tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father,
+            (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father,
-             n_h + 1) == IO_ERROR)
+             h + 1) == IO_ERROR)
                // path is released
                return IO_ERROR;
-        if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
+        if (FILESYSTEM_CHANGED_TB(tb)) {
-                decrement_counters_in_path(&s_path_to_neighbor_father);
+                pathrelse(&s_path_to_neighbor_father);
-                decrement_bcount(*pp_s_com_father);
+                brelse(*pcom_father);
                return REPEAT_SEARCH;
        }
-        *pp_s_father = PATH_PLAST_BUFFER(&s_path_to_neighbor_father);
+        *pfather = PATH_PLAST_BUFFER(&s_path_to_neighbor_father);
-        RFALSE(B_LEVEL(*pp_s_father) != n_h + 1,
+        RFALSE(B_LEVEL(*pfather) != h + 1,
-               "PAP-8190: (%b %z) level too small", *pp_s_father, *pp_s_father);
+               "PAP-8190: (%b %z) level too small", *pfather, *pfather);
        RFALSE(s_path_to_neighbor_father.path_length <
               FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small");
        s_path_to_neighbor_father.path_length--;
-        decrement_counters_in_path(&s_path_to_neighbor_father);
+        pathrelse(&s_path_to_neighbor_father);
        return CARRY_ON;
 }
-/* Get parents of neighbors of node in the path(S[n_path_offset]) and common parents of
+/* Get parents of neighbors of node in the path(S[path_offset]) and common parents of
- * S[n_path_offset] and L[n_path_offset]/R[n_path_offset]: F[n_path_offset], FL[n_path_offset],
+ * S[path_offset] and L[path_offset]/R[path_offset]: F[path_offset], FL[path_offset],
- * FR[n_path_offset], CFL[n_path_offset], CFR[n_path_offset].
+ * FR[path_offset], CFL[path_offset], CFR[path_offset].
- * Calculate numbers of left and right delimiting keys position: lkey[n_path_offset], rkey[n_path_offset].
+ * Calculate numbers of left and right delimiting keys position: lkey[path_offset], rkey[path_offset].
 * Returns:     SCHEDULE_OCCURRED - schedule occurred while the function worked;
 *              CARRY_ON - schedule didn't occur while the function worked;
 */
-static int get_parents(struct tree_balance *p_s_tb, int n_h)
+static int get_parents(struct tree_balance *tb, int h)
 {
-        struct treepath *p_s_path = p_s_tb->tb_path;
+        struct treepath *path = tb->tb_path;
-        int n_position,
+        int position,
-            n_ret_value,
+            ret,
-            n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
+            path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h);
-        struct buffer_head *p_s_curf, *p_s_curcf;
+        struct buffer_head *curf, *curcf;
        /* Current node is the root of the tree or will be root of the tree */
-        if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
+        if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
                /* The root can not have parents.
                   Release nodes which previously were obtained as parents of the current node neighbors. */
-                decrement_bcount(p_s_tb->FL[n_h]);
+                brelse(tb->FL[h]);
-                decrement_bcount(p_s_tb->CFL[n_h]);
+                brelse(tb->CFL[h]);
-                decrement_bcount(p_s_tb->FR[n_h]);
+                brelse(tb->FR[h]);
-                decrement_bcount(p_s_tb->CFR[n_h]);
+                brelse(tb->CFR[h]);
-                p_s_tb->FL[n_h] = p_s_tb->CFL[n_h] = p_s_tb->FR[n_h] =
+                tb->FL[h]  = NULL;
-                    p_s_tb->CFR[n_h] = NULL;
+                tb->CFL[h] = NULL;
+                tb->FR[h]  = NULL;
+                tb->CFR[h] = NULL;
                return CARRY_ON;
        }
-        /* Get parent FL[n_path_offset] of L[n_path_offset]. */
+        /* Get parent FL[path_offset] of L[path_offset]. */
-        if ((n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1))) {
+        position = PATH_OFFSET_POSITION(path, path_offset - 1);
+        if (position) {
                /* Current node is not the first child of its parent. */
-                /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */
+                curf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
-                p_s_curf = p_s_curcf =
+                curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
-                    PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
+                get_bh(curf);
-                get_bh(p_s_curf);
+                get_bh(curf);
-                get_bh(p_s_curf);
+                tb->lkey[h] = position - 1;
-                p_s_tb->lkey[n_h] = n_position - 1;
        } else {
-                /* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node.
+                /* Calculate current parent of L[path_offset], which is the left neighbor of the current node.
-                   Calculate current common parent of L[n_path_offset] and the current node. Note that
+                   Calculate current common parent of L[path_offset] and the current node. Note that
-                   CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset].
+                   CFL[path_offset] not equal FL[path_offset] and CFL[path_offset] not equal F[path_offset].
-                   Calculate lkey[n_path_offset]. */
+                   Calculate lkey[path_offset]. */
-                if ((n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf,
+                if ((ret = get_far_parent(tb, h + 1, &curf,
-                                                  &p_s_curcf,
+                                                  &curcf,
                                                  LEFT_PARENTS)) != CARRY_ON)
-                        return n_ret_value;
+                        return ret;
        }
-        decrement_bcount(p_s_tb->FL[n_h]);
+        brelse(tb->FL[h]);
-        p_s_tb->FL[n_h] = p_s_curf;     /* New initialization of FL[n_h]. */
+        tb->FL[h] = curf;       /* New initialization of FL[h]. */
-        decrement_bcount(p_s_tb->CFL[n_h]);
+        brelse(tb->CFL[h]);
-        p_s_tb->CFL[n_h] = p_s_curcf;   /* New initialization of CFL[n_h]. */
+        tb->CFL[h] = curcf;     /* New initialization of CFL[h]. */
-        RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) ||
+        RFALSE((curf && !B_IS_IN_TREE(curf)) ||
-               (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)),
+               (curcf && !B_IS_IN_TREE(curcf)),
-               "PAP-8195: FL (%b) or CFL (%b) is invalid", p_s_curf, p_s_curcf);
+               "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf);
-/* Get parent FR[n_h] of R[n_h]. */
+/* Get parent FR[h] of R[h]. */
-/* Current node is the last child of F[n_h]. FR[n_h] != F[n_h]. */
+/* Current node is the last child of F[h]. FR[h] != F[h]. */
-        if (n_position == B_NR_ITEMS(PATH_H_PBUFFER(p_s_path, n_h + 1))) {
+        if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) {
-/* Calculate current parent of R[n_h], which is the right neighbor of F[n_h].
+/* Calculate current parent of R[h], which is the right neighbor of F[h].
-   Calculate current common parent of R[n_h] and current node. Note that CFR[n_h]
+   Calculate current common parent of R[h] and current node. Note that CFR[h]
-   not equal FR[n_path_offset] and CFR[n_h] not equal F[n_h]. */
+   not equal FR[path_offset] and CFR[h] not equal F[h]. */
-                if ((n_ret_value =
+                if ((ret =
-                     get_far_parent(p_s_tb, n_h + 1, &p_s_curf, &p_s_curcf,
+                     get_far_parent(tb, h + 1, &curf, &curcf,
                                    RIGHT_PARENTS)) != CARRY_ON)
-                        return n_ret_value;
+                        return ret;
        } else {
-/* Current node is not the last child of its parent F[n_h]. */
+/* Current node is not the last child of its parent F[h]. */
-                /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */
+                curf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
-                p_s_curf = p_s_curcf =
+                curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
-                    PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
+                get_bh(curf);
-                get_bh(p_s_curf);
+                get_bh(curf);
-                get_bh(p_s_curf);
+                tb->rkey[h] = position;
-                p_s_tb->rkey[n_h] = n_position;
        }
-        decrement_bcount(p_s_tb->FR[n_h]);
+        brelse(tb->FR[h]);
-        p_s_tb->FR[n_h] = p_s_curf;     /* New initialization of FR[n_path_offset]. */
+        /* New initialization of FR[path_offset]. */
+        tb->FR[h] = curf;
-        decrement_bcount(p_s_tb->CFR[n_h]);
+        brelse(tb->CFR[h]);
-        p_s_tb->CFR[n_h] = p_s_curcf;   /* New initialization of CFR[n_path_offset]. */
+        /* New initialization of CFR[path_offset]. */
+        tb->CFR[h] = curcf;
-        RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) ||
+        RFALSE((curf && !B_IS_IN_TREE(curf)) ||
-               (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)),
+               (curcf && !B_IS_IN_TREE(curcf)),
-               "PAP-8205: FR (%b) or CFR (%b) is invalid", p_s_curf, p_s_curcf);
+               "PAP-8205: FR (%b) or CFR (%b) is invalid", curf, curcf);
        return CARRY_ON;
 }
@@ -1203,7 +1208,7 @@ static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
 *      h       current level of the node;
 *      inum    item number in S[h];
 *      mode    i - insert, p - paste;
- * Returns:     1 - schedule occurred; 
+ * Returns:     1 - schedule occurred;
 *              0 - balancing for higher levels needed;
 *             -1 - no balancing for higher levels needed;
 *             -2 - no disk space.
@@ -1217,7 +1222,7 @@ static int ip_check_balance(struct tree_balance *tb, int h)
                                   contains node being balanced.  The mnemonic is
                                   that the attempted change in node space used level
                                   is levbytes bytes. */
-         n_ret_value;
+         ret;
        int lfree, sfree, rfree /* free space in L, S and R */ ;
@@ -1238,7 +1243,7 @@ static int ip_check_balance(struct tree_balance *tb, int h)
        /* we perform 8 calls to get_num_ver().  For each call we calculate five parameters.
           where 4th parameter is s1bytes and 5th - s2bytes
         */
-        short snum012[40] = { 0, };     /* s0num, s1num, s2num for 8 cases 
+        short snum012[40] = { 0, };     /* s0num, s1num, s2num for 8 cases
                                           0,1 - do not shift and do not shift but bottle
                                           2 - shift only whole item to left
                                           3 - shift to left and bottle as much as possible
@@ -1255,24 +1260,24 @@ static int ip_check_balance(struct tree_balance *tb, int h)
        /* Calculate balance parameters for creating new root. */
        if (!Sh) {
                if (!h)
-                        reiserfs_panic(tb->tb_sb,
+                        reiserfs_panic(tb->tb_sb, "vs-8210",
-                                       "vs-8210: ip_check_balance: S[0] can not be 0");
+                                       "S[0] can not be 0");
-                switch (n_ret_value = get_empty_nodes(tb, h)) {
+                switch (ret = get_empty_nodes(tb, h)) {
                case CARRY_ON:
                        set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
                        return NO_BALANCING_NEEDED;     /* no balancing for higher levels needed */
                case NO_DISK_SPACE:
                case REPEAT_SEARCH:
-                        return n_ret_value;
+                        return ret;
                default:
-                        reiserfs_panic(tb->tb_sb,
+                        reiserfs_panic(tb->tb_sb, "vs-8215", "incorrect "
-                                       "vs-8215: ip_check_balance: incorrect return value of get_empty_nodes");
+                                       "return value of get_empty_nodes");
                }
        }
-        if ((n_ret_value = get_parents(tb, h)) != CARRY_ON)     /* get parents of S[h] neighbors. */
+        if ((ret = get_parents(tb, h)) != CARRY_ON)     /* get parents of S[h] neighbors. */
-                return n_ret_value;
+                return ret;
        sfree = B_FREE_SPACE(Sh);
@@ -1287,7 +1292,7 @@ static int ip_check_balance(struct tree_balance *tb, int h)
        create_virtual_node(tb, h);
-        /*  
+        /*
           determine maximal number of items we can shift to the left neighbor (in tb structure)
           and the maximal number of bytes that can flow to the left neighbor
           from the left most liquid item that cannot be shifted from S[0] entirely (returned value)
@@ -1348,13 +1353,13 @@ static int ip_check_balance(struct tree_balance *tb, int h)
        {
                int lpar, rpar, nset, lset, rset, lrset;
-                /* 
+                /*
                 * regular overflowing of the node
                 */
-                /* get_num_ver works in 2 modes (FLOW & NO_FLOW) 
+                /* get_num_ver works in 2 modes (FLOW & NO_FLOW)
                   lpar, rpar - number of items we can shift to left/right neighbor (including splitting item)
-                   nset, lset, rset, lrset - shows, whether flowing items give better packing 
+                   nset, lset, rset, lrset - shows, whether flowing items give better packing
                 */
 #define FLOW 1
 #define NO_FLOW 0               /* do not any splitting */
@@ -1544,7 +1549,7 @@ static int ip_check_balance(struct tree_balance *tb, int h)
 *      h       current level of the node;
 *      inum    item number in S[h];
 *      mode    i - insert, p - paste;
- * Returns:     1 - schedule occurred; 
+ * Returns:     1 - schedule occurred;
 *              0 - balancing for higher levels needed;
 *             -1 - no balancing for higher levels needed;
 *             -2 - no disk space.
@@ -1559,7 +1564,7 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
        /* Sh is the node whose balance is currently being checked,
           and Fh is its father.  */
        struct buffer_head *Sh, *Fh;
-        int maxsize, n_ret_value;
+        int maxsize, ret;
        int lfree, rfree /* free space in L and R */ ;
        Sh = PATH_H_PBUFFER(tb->tb_path, h);
@@ -1584,8 +1589,8 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
                return CARRY_ON;
        }
-        if ((n_ret_value = get_parents(tb, h)) != CARRY_ON)
+        if ((ret = get_parents(tb, h)) != CARRY_ON)
-                return n_ret_value;
+                return ret;
        /* get free space of neighbors */
        rfree = get_rfree(tb, h);
@@ -1727,7 +1732,7 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
 *      h       current level of the node;
 *      inum    item number in S[h];
 *      mode    i - insert, p - paste;
- * Returns:     1 - schedule occurred; 
+ * Returns:     1 - schedule occurred;
 *              0 - balancing for higher levels needed;
 *             -1 - no balancing for higher levels needed;
 *             -2 - no disk space.
@@ -1742,7 +1747,7 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h)
           attempted change in node space used level is levbytes bytes. */
        int levbytes;
        /* the maximal item size */
-        int maxsize, n_ret_value;
+        int maxsize, ret;
        /* S0 is the node whose balance is currently being checked,
           and F0 is its father.  */
        struct buffer_head *S0, *F0;
@@ -1764,8 +1769,8 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h)
                return NO_BALANCING_NEEDED;
        }
-        if ((n_ret_value = get_parents(tb, h)) != CARRY_ON)
+        if ((ret = get_parents(tb, h)) != CARRY_ON)
-                return n_ret_value;
+                return ret;
        /* get free space of neighbors */
        rfree = get_rfree(tb, h);
@@ -1821,7 +1826,7 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h)
 *      h       current level of the node;
 *      inum    item number in S[h];
 *      mode    d - delete, c - cut.
- * Returns:     1 - schedule occurred; 
+ * Returns:     1 - schedule occurred;
 *              0 - balancing for higher levels needed;
 *             -1 - no balancing for higher levels needed;
 *             -2 - no disk space.
@@ -1850,7 +1855,7 @@ static int dc_check_balance(struct tree_balance *tb, int h)
 *      h       current level of the node;
 *      inum    item number in S[h];
 *      mode    i - insert, p - paste, d - delete, c - cut.
- * Returns:     1 - schedule occurred; 
+ * Returns:     1 - schedule occurred;
 *              0 - balancing for higher levels needed;
 *             -1 - no balancing for higher levels needed;
 *             -2 - no disk space.
@@ -1884,137 +1889,138 @@ static int check_balance(int mode,
 }
 /* Check whether parent at the path is the really parent of the current node.*/
-static int get_direct_parent(struct tree_balance *p_s_tb, int n_h)
+static int get_direct_parent(struct tree_balance *tb, int h)
 {
-        struct buffer_head *p_s_bh;
+        struct buffer_head *bh;
-        struct treepath *p_s_path = p_s_tb->tb_path;
+        struct treepath *path = tb->tb_path;
-        int n_position,
+        int position,
-            n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
+            path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h);
        /* We are in the root or in the new root. */
-        if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
+        if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
-                RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET - 1,
+                RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET - 1,
                       "PAP-8260: invalid offset in the path");
-                if (PATH_OFFSET_PBUFFER(p_s_path, FIRST_PATH_ELEMENT_OFFSET)->
+                if (PATH_OFFSET_PBUFFER(path, FIRST_PATH_ELEMENT_OFFSET)->
-                    b_blocknr == SB_ROOT_BLOCK(p_s_tb->tb_sb)) {
+                    b_blocknr == SB_ROOT_BLOCK(tb->tb_sb)) {
                        /* Root is not changed. */
-                        PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1) = NULL;
+                        PATH_OFFSET_PBUFFER(path, path_offset - 1) = NULL;
-                        PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1) = 0;
+                        PATH_OFFSET_POSITION(path, path_offset - 1) = 0;
                        return CARRY_ON;
                }
                return REPEAT_SEARCH;   /* Root is changed and we must recalculate the path. */
        }
        if (!B_IS_IN_TREE
-            (p_s_bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1)))
+            (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1)))
                return REPEAT_SEARCH;   /* Parent in the path is not in the tree. */
-        if ((n_position =
+        if ((position =
-             PATH_OFFSET_POSITION(p_s_path,
+             PATH_OFFSET_POSITION(path,
-                                  n_path_offset - 1)) > B_NR_ITEMS(p_s_bh))
+                                  path_offset - 1)) > B_NR_ITEMS(bh))
                return REPEAT_SEARCH;
-        if (B_N_CHILD_NUM(p_s_bh, n_position) !=
+        if (B_N_CHILD_NUM(bh, position) !=
-            PATH_OFFSET_PBUFFER(p_s_path, n_path_offset)->b_blocknr)
+            PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr)
                /* Parent in the path is not parent of the current node in the tree. */
                return REPEAT_SEARCH;
-        if (buffer_locked(p_s_bh)) {
+        if (buffer_locked(bh)) {
-                __wait_on_buffer(p_s_bh);
+                __wait_on_buffer(bh);
-                if (FILESYSTEM_CHANGED_TB(p_s_tb))
+                if (FILESYSTEM_CHANGED_TB(tb))
                        return REPEAT_SEARCH;
        }
        return CARRY_ON;        /* Parent in the path is unlocked and really parent of the current node.  */
 }
-/* Using lnum[n_h] and rnum[n_h] we should determine what neighbors
+/* Using lnum[h] and rnum[h] we should determine what neighbors
- * of S[n_h] we
+ * of S[h] we
- * need in order to balance S[n_h], and get them if necessary.
+ * need in order to balance S[h], and get them if necessary.
 * Returns:     SCHEDULE_OCCURRED - schedule occurred while the function worked;
 *              CARRY_ON - schedule didn't occur while the function worked;
 */
-static int get_neighbors(struct tree_balance *p_s_tb, int n_h)
+static int get_neighbors(struct tree_balance *tb, int h)
 {
-        int n_child_position,
+        int child_position,
-            n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1);
+            path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h + 1);
-        unsigned long n_son_number;
+        unsigned long son_number;
-        struct super_block *p_s_sb = p_s_tb->tb_sb;
+        struct super_block *sb = tb->tb_sb;
-        struct buffer_head *p_s_bh;
+        struct buffer_head *bh;
-        PROC_INFO_INC(p_s_sb, get_neighbors[n_h]);
+        PROC_INFO_INC(sb, get_neighbors[h]);
-        if (p_s_tb->lnum[n_h]) {
+        if (tb->lnum[h]) {
-                /* We need left neighbor to balance S[n_h]. */
+                /* We need left neighbor to balance S[h]. */
-                PROC_INFO_INC(p_s_sb, need_l_neighbor[n_h]);
+                PROC_INFO_INC(sb, need_l_neighbor[h]);
-                p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
+                bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset);
-                RFALSE(p_s_bh == p_s_tb->FL[n_h] &&
+                RFALSE(bh == tb->FL[h] &&
-                       !PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset),
+                       !PATH_OFFSET_POSITION(tb->tb_path, path_offset),
                       "PAP-8270: invalid position in the parent");
-                n_child_position =
+                child_position =
-                    (p_s_bh ==
+                    (bh ==
-                     p_s_tb->FL[n_h]) ? p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb->
+                     tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb->
-                                                                       FL[n_h]);
+                                                                       FL[h]);
-                n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position);
+                son_number = B_N_CHILD_NUM(tb->FL[h], child_position);
-                p_s_bh = sb_bread(p_s_sb, n_son_number);
+                bh = sb_bread(sb, son_number);
-                if (!p_s_bh)
+                if (!bh)
                        return IO_ERROR;
-                if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
+                if (FILESYSTEM_CHANGED_TB(tb)) {
-                        decrement_bcount(p_s_bh);
+                        brelse(bh);
-                        PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]);
+                        PROC_INFO_INC(sb, get_neighbors_restart[h]);
                        return REPEAT_SEARCH;
                }
-                RFALSE(!B_IS_IN_TREE(p_s_tb->FL[n_h]) ||
+                RFALSE(!B_IS_IN_TREE(tb->FL[h]) ||
-                       n_child_position > B_NR_ITEMS(p_s_tb->FL[n_h]) ||
+                       child_position > B_NR_ITEMS(tb->FL[h]) ||
-                       B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position) !=
+                       B_N_CHILD_NUM(tb->FL[h], child_position) !=
-                       p_s_bh->b_blocknr, "PAP-8275: invalid parent");
+                       bh->b_blocknr, "PAP-8275: invalid parent");
-                RFALSE(!B_IS_IN_TREE(p_s_bh), "PAP-8280: invalid child");
+                RFALSE(!B_IS_IN_TREE(bh), "PAP-8280: invalid child");
-                RFALSE(!n_h &&
+                RFALSE(!h &&
-                       B_FREE_SPACE(p_s_bh) !=
+                       B_FREE_SPACE(bh) !=
-                       MAX_CHILD_SIZE(p_s_bh) -
+                       MAX_CHILD_SIZE(bh) -
-                       dc_size(B_N_CHILD(p_s_tb->FL[0], n_child_position)),
+                       dc_size(B_N_CHILD(tb->FL[0], child_position)),
                       "PAP-8290: invalid child size of left neighbor");
-                decrement_bcount(p_s_tb->L[n_h]);
+                brelse(tb->L[h]);
-                p_s_tb->L[n_h] = p_s_bh;
+                tb->L[h] = bh;
        }
-        if (p_s_tb->rnum[n_h]) {        /* We need right neighbor to balance S[n_path_offset]. */
+        /* We need right neighbor to balance S[path_offset]. */
-                PROC_INFO_INC(p_s_sb, need_r_neighbor[n_h]);
+        if (tb->rnum[h]) {      /* We need right neighbor to balance S[path_offset]. */
-                p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
+                PROC_INFO_INC(sb, need_r_neighbor[h]);
+                bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset);
-                RFALSE(p_s_bh == p_s_tb->FR[n_h] &&
+                RFALSE(bh == tb->FR[h] &&
-                       PATH_OFFSET_POSITION(p_s_tb->tb_path,
+                       PATH_OFFSET_POSITION(tb->tb_path,
-                                            n_path_offset) >=
+                                            path_offset) >=
-                       B_NR_ITEMS(p_s_bh),
+                       B_NR_ITEMS(bh),
                       "PAP-8295: invalid position in the parent");
-                n_child_position =
+                child_position =
-                    (p_s_bh == p_s_tb->FR[n_h]) ? p_s_tb->rkey[n_h] + 1 : 0;
+                    (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0;
-                n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position);
+                son_number = B_N_CHILD_NUM(tb->FR[h], child_position);
-                p_s_bh = sb_bread(p_s_sb, n_son_number);
+                bh = sb_bread(sb, son_number);
-                if (!p_s_bh)
+                if (!bh)
                        return IO_ERROR;
-                if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
+                if (FILESYSTEM_CHANGED_TB(tb)) {
-                        decrement_bcount(p_s_bh);
+                        brelse(bh);
-                        PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]);
+                        PROC_INFO_INC(sb, get_neighbors_restart[h]);
                        return REPEAT_SEARCH;
                }
-                decrement_bcount(p_s_tb->R[n_h]);
+                brelse(tb->R[h]);
-                p_s_tb->R[n_h] = p_s_bh;
+                tb->R[h] = bh;
-                RFALSE(!n_h
+                RFALSE(!h
-                       && B_FREE_SPACE(p_s_bh) !=
+                       && B_FREE_SPACE(bh) !=
-                       MAX_CHILD_SIZE(p_s_bh) -
+                       MAX_CHILD_SIZE(bh) -
-                       dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position)),
+                       dc_size(B_N_CHILD(tb->FR[0], child_position)),
                       "PAP-8300: invalid child size of right neighbor (%d != %d - %d)",
-                       B_FREE_SPACE(p_s_bh), MAX_CHILD_SIZE(p_s_bh),
+                       B_FREE_SPACE(bh), MAX_CHILD_SIZE(bh),
-                       dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position)));
+                       dc_size(B_N_CHILD(tb->FR[0], child_position)));
        }
        return CARRY_ON;
@@ -2088,52 +2094,46 @@ static int get_mem_for_virtual_node(struct tree_balance *tb)
 }
 #ifdef CONFIG_REISERFS_CHECK
-static void tb_buffer_sanity_check(struct super_block *p_s_sb,
+static void tb_buffer_sanity_check(struct super_block *sb,
-                                   struct buffer_head *p_s_bh,
+                                   struct buffer_head *bh,
                                   const char *descr, int level)
 {
-        if (p_s_bh) {
+        if (bh) {
-                if (atomic_read(&(p_s_bh->b_count)) <= 0) {
+                if (atomic_read(&(bh->b_count)) <= 0)
-                        reiserfs_panic(p_s_sb,
+                        reiserfs_panic(sb, "jmacd-1", "negative or zero "
-                                       "jmacd-1: tb_buffer_sanity_check(): negative or zero reference counter for buffer %s[%d] (%b)\n",
+                                       "reference counter for buffer %s[%d] "
-                                       descr, level, p_s_bh);
+                                       "(%b)", descr, level, bh);
-                }
+                if (!buffer_uptodate(bh))
-                if (!buffer_uptodate(p_s_bh)) {
+                        reiserfs_panic(sb, "jmacd-2", "buffer is not up "
-                        reiserfs_panic(p_s_sb,
+                                       "to date %s[%d] (%b)",
-                                       "jmacd-2: tb_buffer_sanity_check(): buffer is not up to date %s[%d] (%b)\n",
+                                       descr, level, bh);
-                                       descr, level, p_s_bh);
-                }
+                if (!B_IS_IN_TREE(bh))
+                        reiserfs_panic(sb, "jmacd-3", "buffer is not "
-                if (!B_IS_IN_TREE(p_s_bh)) {
+                                       "in tree %s[%d] (%b)",
-                        reiserfs_panic(p_s_sb,
+                                       descr, level, bh);
-                                       "jmacd-3: tb_buffer_sanity_check(): buffer is not in tree %s[%d] (%b)\n",
-                                       descr, level, p_s_bh);
+                if (bh->b_bdev != sb->s_bdev)
-                }
+                        reiserfs_panic(sb, "jmacd-4", "buffer has wrong "
+                                       "device %s[%d] (%b)",
-                if (p_s_bh->b_bdev != p_s_sb->s_bdev) {
+                                       descr, level, bh);
-                        reiserfs_panic(p_s_sb,
-                                       "jmacd-4: tb_buffer_sanity_check(): buffer has wrong device %s[%d] (%b)\n",
+                if (bh->b_size != sb->s_blocksize)
-                                       descr, level, p_s_bh);
+                        reiserfs_panic(sb, "jmacd-5", "buffer has wrong "
-                }
+                                       "blocksize %s[%d] (%b)",
+                                       descr, level, bh);
-                if (p_s_bh->b_size != p_s_sb->s_blocksize) {
-                        reiserfs_panic(p_s_sb,
+                if (bh->b_blocknr > SB_BLOCK_COUNT(sb))
-                                       "jmacd-5: tb_buffer_sanity_check(): buffer has wrong blocksize %s[%d] (%b)\n",
+                        reiserfs_panic(sb, "jmacd-6", "buffer block "
-                                       descr, level, p_s_bh);
+                                       "number too high %s[%d] (%b)",
-                }
+                                       descr, level, bh);
-                if (p_s_bh->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
-                        reiserfs_panic(p_s_sb,
-                                       "jmacd-6: tb_buffer_sanity_check(): buffer block number too high %s[%d] (%b)\n",
-                                       descr, level, p_s_bh);
-                }
        }
 }
 #else
-static void tb_buffer_sanity_check(struct super_block *p_s_sb,
+static void tb_buffer_sanity_check(struct super_block *sb,
-                                   struct buffer_head *p_s_bh,
+                                   struct buffer_head *bh,
                                   const char *descr, int level)
 {;
 }
@@ -2144,7 +2144,7 @@ static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh)
        return reiserfs_prepare_for_journal(s, bh, 0);
 }
-static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb)
+static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
 {
        struct buffer_head *locked;
 #ifdef CONFIG_REISERFS_CHECK
@@ -2156,95 +2156,94 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb)
                locked = NULL;
-                for (i = p_s_tb->tb_path->path_length;
+                for (i = tb->tb_path->path_length;
                     !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) {
-                        if (PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) {
+                        if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) {
                                /* if I understand correctly, we can only be sure the last buffer
                                 ** in the path is in the tree --clm
                                 */
 #ifdef CONFIG_REISERFS_CHECK
-                                if (PATH_PLAST_BUFFER(p_s_tb->tb_path) ==
+                                if (PATH_PLAST_BUFFER(tb->tb_path) ==
-                                    PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) {
+                                    PATH_OFFSET_PBUFFER(tb->tb_path, i))
-                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
+                                        tb_buffer_sanity_check(tb->tb_sb,
                                                               PATH_OFFSET_PBUFFER
-                                                               (p_s_tb->tb_path,
+                                                               (tb->tb_path,
                                                                i), "S",
-                                                               p_s_tb->tb_path->
+                                                               tb->tb_path->
                                                               path_length - i);
-                                }
 #endif
-                                if (!clear_all_dirty_bits(p_s_tb->tb_sb,
+                                if (!clear_all_dirty_bits(tb->tb_sb,
                                                          PATH_OFFSET_PBUFFER
-                                                          (p_s_tb->tb_path,
+                                                          (tb->tb_path,
                                                           i))) {
                                        locked =
-                                            PATH_OFFSET_PBUFFER(p_s_tb->tb_path,
+                                            PATH_OFFSET_PBUFFER(tb->tb_path,
                                                                i);
                                }
                        }
                }
-                for (i = 0; !locked && i < MAX_HEIGHT && p_s_tb->insert_size[i];
+                for (i = 0; !locked && i < MAX_HEIGHT && tb->insert_size[i];
                     i++) {
-                        if (p_s_tb->lnum[i]) {
+                        if (tb->lnum[i]) {
-                                if (p_s_tb->L[i]) {
+                                if (tb->L[i]) {
-                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
+                                        tb_buffer_sanity_check(tb->tb_sb,
-                                                               p_s_tb->L[i],
+                                                               tb->L[i],
                                                               "L", i);
                                        if (!clear_all_dirty_bits
-                                            (p_s_tb->tb_sb, p_s_tb->L[i]))
+                                            (tb->tb_sb, tb->L[i]))
-                                                locked = p_s_tb->L[i];
+                                                locked = tb->L[i];
                                }
-                                if (!locked && p_s_tb->FL[i]) {
+                                if (!locked && tb->FL[i]) {
-                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
+                                        tb_buffer_sanity_check(tb->tb_sb,
-                                                               p_s_tb->FL[i],
+                                                               tb->FL[i],
                                                               "FL", i);
                                        if (!clear_all_dirty_bits
-                                            (p_s_tb->tb_sb, p_s_tb->FL[i]))
+                                            (tb->tb_sb, tb->FL[i]))
-                                                locked = p_s_tb->FL[i];
+                                                locked = tb->FL[i];
                                }
-                                if (!locked && p_s_tb->CFL[i]) {
+                                if (!locked && tb->CFL[i]) {
-                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
+                                        tb_buffer_sanity_check(tb->tb_sb,
-                                                               p_s_tb->CFL[i],
+                                                               tb->CFL[i],
                                                               "CFL", i);
                                        if (!clear_all_dirty_bits
-                                            (p_s_tb->tb_sb, p_s_tb->CFL[i]))
+                                            (tb->tb_sb, tb->CFL[i]))
-                                                locked = p_s_tb->CFL[i];
+                                                locked = tb->CFL[i];
                                }
                        }
-                        if (!locked && (p_s_tb->rnum[i])) {
+                        if (!locked && (tb->rnum[i])) {
-                                if (p_s_tb->R[i]) {
+                                if (tb->R[i]) {
-                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
+                                        tb_buffer_sanity_check(tb->tb_sb,
-                                                               p_s_tb->R[i],
+                                                               tb->R[i],
                                                               "R", i);
                                        if (!clear_all_dirty_bits
-                                            (p_s_tb->tb_sb, p_s_tb->R[i]))
+                                            (tb->tb_sb, tb->R[i]))
-                                                locked = p_s_tb->R[i];
+                                                locked = tb->R[i];
                                }
-                                if (!locked && p_s_tb->FR[i]) {
+                                if (!locked && tb->FR[i]) {
-                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
+                                        tb_buffer_sanity_check(tb->tb_sb,
-                                                               p_s_tb->FR[i],
+                                                               tb->FR[i],
                                                               "FR", i);
                                        if (!clear_all_dirty_bits
-                                            (p_s_tb->tb_sb, p_s_tb->FR[i]))
+                                            (tb->tb_sb, tb->FR[i]))
-                                                locked = p_s_tb->FR[i];
+                                                locked = tb->FR[i];
                                }
-                                if (!locked && p_s_tb->CFR[i]) {
+                                if (!locked && tb->CFR[i]) {
-                                        tb_buffer_sanity_check(p_s_tb->tb_sb,
+                                        tb_buffer_sanity_check(tb->tb_sb,
-                                                               p_s_tb->CFR[i],
+                                                               tb->CFR[i],
                                                               "CFR", i);
                                        if (!clear_all_dirty_bits
-                                            (p_s_tb->tb_sb, p_s_tb->CFR[i]))
+                                            (tb->tb_sb, tb->CFR[i]))
-                                                locked = p_s_tb->CFR[i];
+                                                locked = tb->CFR[i];
                                }
                        }
                }
@@ -2257,10 +2256,10 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb)
                 ** --clm
                 */
                for (i = 0; !locked && i < MAX_FEB_SIZE; i++) {
-                        if (p_s_tb->FEB[i]) {
+                        if (tb->FEB[i]) {
                                if (!clear_all_dirty_bits
-                                    (p_s_tb->tb_sb, p_s_tb->FEB[i]))
+                                    (tb->tb_sb, tb->FEB[i]))
-                                        locked = p_s_tb->FEB[i];
+                                        locked = tb->FEB[i];
                        }
                }
@@ -2268,21 +2267,20 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb)
 #ifdef CONFIG_REISERFS_CHECK
                        repeat_counter++;
                        if ((repeat_counter % 10000) == 0) {
-                                reiserfs_warning(p_s_tb->tb_sb,
+                                reiserfs_warning(tb->tb_sb, "reiserfs-8200",
-                                                 "wait_tb_buffers_until_released(): too many "
+                                                 "too many iterations waiting "
-                                                 "iterations waiting for buffer to unlock "
+                                                 "for buffer to unlock "
                                                 "(%b)", locked);
                                /* Don't loop forever.  Try to recover from possible error. */
-                                return (FILESYSTEM_CHANGED_TB(p_s_tb)) ?
+                                return (FILESYSTEM_CHANGED_TB(tb)) ?
                                    REPEAT_SEARCH : CARRY_ON;
                        }
 #endif
                        __wait_on_buffer(locked);
-                        if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
+                        if (FILESYSTEM_CHANGED_TB(tb))
                                return REPEAT_SEARCH;
-                        }
                }
        } while (locked);
@@ -2295,15 +2293,15 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb)
 *      analyze what and where should be moved;
 *      get sufficient number of new nodes;
 * Balancing will start only after all resources will be collected at a time.
- * 
+ *
 * When ported to SMP kernels, only at the last moment after all needed nodes
 * are collected in cache, will the resources be locked using the usual
 * textbook ordered lock acquisition algorithms.  Note that ensuring that
 * this code neither write locks what it does not need to write lock nor locks out of order
 * will be a pain in the butt that could have been avoided.  Grumble grumble. -Hans
- * 
+ *
 * fix is meant in the sense of render unchanging
- * 
+ *
 * Latency might be improved by first gathering a list of what buffers are needed
 * and then getting as many of them in parallel as possible? -Hans
 *
@@ -2312,159 +2310,160 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb)
 *      tb      tree_balance structure;
 *      inum    item number in S[h];
 *      pos_in_item - comment this if you can
- *      ins_ih & ins_sd are used when inserting
+ *      ins_ih  item head of item being inserted
+ *      data    inserted item or data to be pasted
 * Returns:     1 - schedule occurred while the function worked;
 *              0 - schedule didn't occur while the function worked;
- *             -1 - if no_disk_space 
+ *             -1 - if no_disk_space
 */
-int fix_nodes(int n_op_mode, struct tree_balance *p_s_tb, struct item_head *p_s_ins_ih, // item head of item being inserted
+int fix_nodes(int op_mode, struct tree_balance *tb,
-              const void *data  // inserted item or data to be pasted
+              struct item_head *ins_ih, const void *data)
-    )
 {
-        int n_ret_value, n_h, n_item_num = PATH_LAST_POSITION(p_s_tb->tb_path);
+        int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path);
-        int n_pos_in_item;
+        int pos_in_item;
        /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared
         ** during wait_tb_buffers_run
         */
        int wait_tb_buffers_run = 0;
-        struct buffer_head *p_s_tbS0 = PATH_PLAST_BUFFER(p_s_tb->tb_path);
+        struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
-        ++REISERFS_SB(p_s_tb->tb_sb)->s_fix_nodes;
+        ++REISERFS_SB(tb->tb_sb)->s_fix_nodes;
-        n_pos_in_item = p_s_tb->tb_path->pos_in_item;
+        pos_in_item = tb->tb_path->pos_in_item;
-        p_s_tb->fs_gen = get_generation(p_s_tb->tb_sb);
+        tb->fs_gen = get_generation(tb->tb_sb);
        /* we prepare and log the super here so it will already be in the
         ** transaction when do_balance needs to change it.
         ** This way do_balance won't have to schedule when trying to prepare
         ** the super for logging
         */
-        reiserfs_prepare_for_journal(p_s_tb->tb_sb,
+        reiserfs_prepare_for_journal(tb->tb_sb,
-                                     SB_BUFFER_WITH_SB(p_s_tb->tb_sb), 1);
+                                     SB_BUFFER_WITH_SB(tb->tb_sb), 1);
-        journal_mark_dirty(p_s_tb->transaction_handle, p_s_tb->tb_sb,
+        journal_mark_dirty(tb->transaction_handle, tb->tb_sb,
-                           SB_BUFFER_WITH_SB(p_s_tb->tb_sb));
+                           SB_BUFFER_WITH_SB(tb->tb_sb));
-        if (FILESYSTEM_CHANGED_TB(p_s_tb))
+        if (FILESYSTEM_CHANGED_TB(tb))
                return REPEAT_SEARCH;
        /* if it possible in indirect_to_direct conversion */
-        if (buffer_locked(p_s_tbS0)) {
+        if (buffer_locked(tbS0)) {
-                __wait_on_buffer(p_s_tbS0);
+                __wait_on_buffer(tbS0);
-                if (FILESYSTEM_CHANGED_TB(p_s_tb))
+                if (FILESYSTEM_CHANGED_TB(tb))
                        return REPEAT_SEARCH;
        }
 #ifdef CONFIG_REISERFS_CHECK
        if (cur_tb) {
                print_cur_tb("fix_nodes");
-                reiserfs_panic(p_s_tb->tb_sb,
+                reiserfs_panic(tb->tb_sb, "PAP-8305",
-                               "PAP-8305: fix_nodes:  there is pending do_balance");
+                               "there is pending do_balance");
        }
-        if (!buffer_uptodate(p_s_tbS0) || !B_IS_IN_TREE(p_s_tbS0)) {
+        if (!buffer_uptodate(tbS0) || !B_IS_IN_TREE(tbS0))
-                reiserfs_panic(p_s_tb->tb_sb,
+                reiserfs_panic(tb->tb_sb, "PAP-8320", "S[0] (%b %z) is "
-                               "PAP-8320: fix_nodes: S[0] (%b %z) is not uptodate "
+                               "not uptodate at the beginning of fix_nodes "
-                               "at the beginning of fix_nodes or not in tree (mode %c)",
+                               "or not in tree (mode %c)",
-                               p_s_tbS0, p_s_tbS0, n_op_mode);
+                               tbS0, tbS0, op_mode);
-        }
        /* Check parameters. */
-        switch (n_op_mode) {
+        switch (op_mode) {
        case M_INSERT:
-                if (n_item_num <= 0 || n_item_num > B_NR_ITEMS(p_s_tbS0))
+                if (item_num <= 0 || item_num > B_NR_ITEMS(tbS0))
-                        reiserfs_panic(p_s_tb->tb_sb,
+                        reiserfs_panic(tb->tb_sb, "PAP-8330", "Incorrect "
-                                       "PAP-8330: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert",
+                                       "item number %d (in S0 - %d) in case "
-                                       n_item_num, B_NR_ITEMS(p_s_tbS0));
+                                       "of insert", item_num,
+                                       B_NR_ITEMS(tbS0));
                break;
        case M_PASTE:
        case M_DELETE:
        case M_CUT:
-                if (n_item_num < 0 || n_item_num >= B_NR_ITEMS(p_s_tbS0)) {
+                if (item_num < 0 || item_num >= B_NR_ITEMS(tbS0)) {
-                        print_block(p_s_tbS0, 0, -1, -1);
+                        print_block(tbS0, 0, -1, -1);
-                        reiserfs_panic(p_s_tb->tb_sb,
+                        reiserfs_panic(tb->tb_sb, "PAP-8335", "Incorrect "
-                                       "PAP-8335: fix_nodes: Incorrect item number(%d); mode = %c insert_size = %d\n",
+                                       "item number(%d); mode = %c "
-                                       n_item_num, n_op_mode,
+                                       "insert_size = %d",
-                                       p_s_tb->insert_size[0]);
+                                       item_num, op_mode,
+                                       tb->insert_size[0]);
                }
                break;
        default:
-                reiserfs_panic(p_s_tb->tb_sb,
+                reiserfs_panic(tb->tb_sb, "PAP-8340", "Incorrect mode "
-                               "PAP-8340: fix_nodes: Incorrect mode of operation");
+                               "of operation");
        }
 #endif
-        if (get_mem_for_virtual_node(p_s_tb) == REPEAT_SEARCH)
+        if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH)
                // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat
                return REPEAT_SEARCH;
-        /* Starting from the leaf level; for all levels n_h of the tree. */
+        /* Starting from the leaf level; for all levels h of the tree. */
-        for (n_h = 0; n_h < MAX_HEIGHT && p_s_tb->insert_size[n_h]; n_h++) {
+        for (h = 0; h < MAX_HEIGHT && tb->insert_size[h]; h++) {
-                if ((n_ret_value = get_direct_parent(p_s_tb, n_h)) != CARRY_ON) {
+                ret = get_direct_parent(tb, h);
+                if (ret != CARRY_ON)
                        goto repeat;
-                }
-                if ((n_ret_value =
+                ret = check_balance(op_mode, tb, h, item_num,
-                     check_balance(n_op_mode, p_s_tb, n_h, n_item_num,
+                                    pos_in_item, ins_ih, data);
-                                   n_pos_in_item, p_s_ins_ih,
+                if (ret != CARRY_ON) {
-                                   data)) != CARRY_ON) {
+                        if (ret == NO_BALANCING_NEEDED) {
-                        if (n_ret_value == NO_BALANCING_NEEDED) {
                                /* No balancing for higher levels needed. */
-                                if ((n_ret_value =
+                                ret = get_neighbors(tb, h);
-                                     get_neighbors(p_s_tb, n_h)) != CARRY_ON) {
+                                if (ret != CARRY_ON)
                                        goto repeat;
-                                }
+                                if (h != MAX_HEIGHT - 1)
-                                if (n_h != MAX_HEIGHT - 1)
+                                        tb->insert_size[h + 1] = 0;
-                                        p_s_tb->insert_size[n_h + 1] = 0;
                                /* ok, analysis and resource gathering are complete */
                                break;
                        }
                        goto repeat;
                }
-                if ((n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON) {
+                ret = get_neighbors(tb, h);
+                if (ret != CARRY_ON)
                        goto repeat;
-                }
-                if ((n_ret_value = get_empty_nodes(p_s_tb, n_h)) != CARRY_ON) {
+                /* No disk space, or schedule occurred and analysis may be
-                        goto repeat;    /* No disk space, or schedule occurred and
+                 * invalid and needs to be redone. */
-                                           analysis may be invalid and needs to be redone. */
+                ret = get_empty_nodes(tb, h);
-                }
+                if (ret != CARRY_ON)
+                        goto repeat;
-                if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h)) {
+                if (!PATH_H_PBUFFER(tb->tb_path, h)) {
                        /* We have a positive insert size but no nodes exist on this
                           level, this means that we are creating a new root. */
-                        RFALSE(p_s_tb->blknum[n_h] != 1,
+                        RFALSE(tb->blknum[h] != 1,
                               "PAP-8350: creating new empty root");
-                        if (n_h < MAX_HEIGHT - 1)
+                        if (h < MAX_HEIGHT - 1)
-                                p_s_tb->insert_size[n_h + 1] = 0;
+                                tb->insert_size[h + 1] = 0;
-                } else if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1)) {
+                } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) {
-                        if (p_s_tb->blknum[n_h] > 1) {
+                        if (tb->blknum[h] > 1) {
-                                /* The tree needs to be grown, so this node S[n_h]
+                                /* The tree needs to be grown, so this node S[h]
                                   which is the root node is split into two nodes,
-                                   and a new node (S[n_h+1]) will be created to
+                                   and a new node (S[h+1]) will be created to
                                   become the root node.  */
-                                RFALSE(n_h == MAX_HEIGHT - 1,
+                                RFALSE(h == MAX_HEIGHT - 1,
                                       "PAP-8355: attempt to create too high of a tree");
-                                p_s_tb->insert_size[n_h + 1] =
+                                tb->insert_size[h + 1] =
                                    (DC_SIZE +
-                                     KEY_SIZE) * (p_s_tb->blknum[n_h] - 1) +
+                                     KEY_SIZE) * (tb->blknum[h] - 1) +
                                    DC_SIZE;
-                        } else if (n_h < MAX_HEIGHT - 1)
+                        } else if (h < MAX_HEIGHT - 1)
-                                p_s_tb->insert_size[n_h + 1] = 0;
+                                tb->insert_size[h + 1] = 0;
                } else
-                        p_s_tb->insert_size[n_h + 1] =
+                        tb->insert_size[h + 1] =
-                            (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1);
+                            (DC_SIZE + KEY_SIZE) * (tb->blknum[h] - 1);
        }
-        if ((n_ret_value = wait_tb_buffers_until_unlocked(p_s_tb)) == CARRY_ON) {
+        ret = wait_tb_buffers_until_unlocked(tb);
-                if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
+        if (ret == CARRY_ON) {
+                if (FILESYSTEM_CHANGED_TB(tb)) {
                        wait_tb_buffers_run = 1;
-                        n_ret_value = REPEAT_SEARCH;
+                        ret = REPEAT_SEARCH;
                        goto repeat;
                } else {
                        return CARRY_ON;
@@ -2485,57 +2484,57 @@ int fix_nodes(int n_op_mode, struct tree_balance *p_s_tb, struct item_head *p_s_
                /* Release path buffers. */
                if (wait_tb_buffers_run) {
-                        pathrelse_and_restore(p_s_tb->tb_sb, p_s_tb->tb_path);
+                        pathrelse_and_restore(tb->tb_sb, tb->tb_path);
                } else {
-                        pathrelse(p_s_tb->tb_path);
+                        pathrelse(tb->tb_path);
                }
                /* brelse all resources collected for balancing */
                for (i = 0; i < MAX_HEIGHT; i++) {
                        if (wait_tb_buffers_run) {
-                                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
+                                reiserfs_restore_prepared_buffer(tb->tb_sb,
-                                                                 p_s_tb->L[i]);
+                                                                 tb->L[i]);
-                                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
+                                reiserfs_restore_prepared_buffer(tb->tb_sb,
-                                                                 p_s_tb->R[i]);
+                                                                 tb->R[i]);
-                                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
+                                reiserfs_restore_prepared_buffer(tb->tb_sb,
-                                                                 p_s_tb->FL[i]);
+                                                                 tb->FL[i]);
-                                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
+                                reiserfs_restore_prepared_buffer(tb->tb_sb,
-                                                                 p_s_tb->FR[i]);
+                                                                 tb->FR[i]);
-                                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
+                                reiserfs_restore_prepared_buffer(tb->tb_sb,
-                                                                 p_s_tb->
+                                                                 tb->
                                                                 CFL[i]);
-                                reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
+                                reiserfs_restore_prepared_buffer(tb->tb_sb,
-                                                                 p_s_tb->
+                                                                 tb->
                                                                 CFR[i]);
                        }
-                        brelse(p_s_tb->L[i]);
+                        brelse(tb->L[i]);
-                        p_s_tb->L[i] = NULL;
+                        brelse(tb->R[i]);
-                        brelse(p_s_tb->R[i]);
+                        brelse(tb->FL[i]);
-                        p_s_tb->R[i] = NULL;
+                        brelse(tb->FR[i]);
-                        brelse(p_s_tb->FL[i]);
+                        brelse(tb->CFL[i]);
-                        p_s_tb->FL[i] = NULL;
+                        brelse(tb->CFR[i]);
-                        brelse(p_s_tb->FR[i]);
-                        p_s_tb->FR[i] = NULL;
+                        tb->L[i] = NULL;
-                        brelse(p_s_tb->CFL[i]);
+                        tb->R[i] = NULL;
-                        p_s_tb->CFL[i] = NULL;
+                        tb->FL[i] = NULL;
-                        brelse(p_s_tb->CFR[i]);
+                        tb->FR[i] = NULL;
-                        p_s_tb->CFR[i] = NULL;
+                        tb->CFL[i] = NULL;
+                        tb->CFR[i] = NULL;
                }
                if (wait_tb_buffers_run) {
                        for (i = 0; i < MAX_FEB_SIZE; i++) {
-                                if (p_s_tb->FEB[i]) {
+                                if (tb->FEB[i])
                                        reiserfs_restore_prepared_buffer
-                                            (p_s_tb->tb_sb, p_s_tb->FEB[i]);
+                                            (tb->tb_sb, tb->FEB[i]);
-                                }
                        }
                }
-                return n_ret_value;
+                return ret;
        }
 }
-/* Anatoly will probably forgive me renaming p_s_tb to tb. I just
+/* Anatoly will probably forgive me renaming tb to tb. I just
   wanted to make lines shorter */
 void unfix_nodes(struct tree_balance *tb)
 {
diff --git a/fs/reiserfs/hashes.c b/fs/reiserfs/hashes.c
index e664ac16fad9..6471c670743e 100644
--- a/fs/reiserfs/hashes.c
+++ b/fs/reiserfs/hashes.c
@@ -7,7 +7,7 @@
 * (see Applied Cryptography, 2nd edition, p448).
 *
 * Jeremy Fitzhardinge <jeremy@zip.com.au> 1998
- * 
+ *
 * Jeremy has agreed to the contents of reiserfs/README. -Hans
 * Yura's function is added (04/07/2000)
 */
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index de391a82b999..2074fd95046b 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c
@@ -105,8 +105,8 @@ static void internal_define_dest_src_infos(int shift_mode,
                break;
        default:
-                reiserfs_panic(tb->tb_sb,
+                reiserfs_panic(tb->tb_sb, "ibalance-1",
-                               "internal_define_dest_src_infos: shift type is unknown (%d)",
+                               "shift type is unknown (%d)",
                               shift_mode);
        }
 }
@@ -278,7 +278,7 @@ static void internal_delete_childs(struct buffer_info *cur_bi, int from, int n)
 /* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest
 * last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest
- * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest 
+ * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest
 */
 static void internal_copy_pointers_items(struct buffer_info *dest_bi,
                                         struct buffer_head *src,
@@ -385,7 +385,7 @@ static void internal_move_pointers_items(struct buffer_info *dest_bi,
        if (last_first == FIRST_TO_LAST) {      /* shift_left occurs */
                first_pointer = 0;
                first_item = 0;
-                /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer, 
+                /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer,
                   for key - with first_item */
                internal_delete_pointers_items(src_bi, first_pointer,
                                               first_item, cpy_num - del_par);
@@ -453,7 +453,7 @@ static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_b
        }
 }
-/* Insert d_key'th (delimiting) key from buffer cfl to tail of dest. 
+/* Insert d_key'th (delimiting) key from buffer cfl to tail of dest.
 * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest.
 * Replace  d_key'th key in buffer cfl.
 * Delete pointer_amount items and node pointers from buffer src.
@@ -518,7 +518,7 @@ static void internal_shift1_left(struct tree_balance *tb,
        /*    internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1); */
 }
-/* Insert d_key'th (delimiting) key from buffer cfr to head of dest. 
+/* Insert d_key'th (delimiting) key from buffer cfr to head of dest.
 * Copy n node pointers and n - 1 items from buffer src to buffer dest.
 * Replace  d_key'th key in buffer cfr.
 * Delete n items and node pointers from buffer src.
@@ -702,8 +702,8 @@ static void balance_internal_when_delete(struct tree_balance *tb,
                return;
        }
-        reiserfs_panic(tb->tb_sb,
+        reiserfs_panic(tb->tb_sb, "ibalance-2",
-                       "balance_internal_when_delete: unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d",
+                       "unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d",
                       h, tb->lnum[h], h, tb->rnum[h]);
 }
@@ -749,7 +749,7 @@ int balance_internal(struct tree_balance *tb,	/* tree_balance structure
       this means that new pointers and items must be inserted AFTER *
       child_pos
       }
-       else 
+       else
       {
       it is the position of the leftmost pointer that must be deleted (together with
       its corresponding key to the left of the pointer)
@@ -940,8 +940,8 @@ int balance_internal(struct tree_balance *tb,	/* tree_balance structure
                struct block_head *blkh;
                if (tb->blknum[h] != 1)
-                        reiserfs_panic(NULL,
+                        reiserfs_panic(NULL, "ibalance-3", "One new node "
-                                       "balance_internal: One new node required for creating the new root");
+                                       "required for creating the new root");
                /* S[h] = empty buffer from the list FEB. */
                tbSh = get_FEB(tb);
                blkh = B_BLK_HEAD(tbSh);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 823227a7662a..6fd0f47e45db 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -363,7 +363,7 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
                }
                /* make sure we don't read more bytes than actually exist in
                 ** the file.  This can happen in odd cases where i_size isn't
-                 ** correct, and when direct item padding results in a few 
+                 ** correct, and when direct item padding results in a few
                 ** extra bytes at the end of the direct item
                 */
                if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
@@ -438,15 +438,15 @@ static int reiserfs_bmap(struct inode *inode, sector_t block,
 ** -ENOENT instead of a valid buffer.  block_prepare_write expects to
 ** be able to do i/o on the buffers returned, unless an error value
 ** is also returned.
-** 
+**
 ** So, this allows block_prepare_write to be used for reading a single block
 ** in a page.  Where it does not produce a valid page for holes, or past the
 ** end of the file.  This turns out to be exactly what we need for reading
 ** tails for conversion.
 **
 ** The point of the wrapper is forcing a certain value for create, even
-** though the VFS layer is calling this function with create==1.  If you 
+** though the VFS layer is calling this function with create==1.  If you
-** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, 
+** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
 ** don't use this function.
 */
 static int reiserfs_get_block_create_0(struct inode *inode, sector_t block,
@@ -602,7 +602,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
        int done;
        int fs_gen;
        struct reiserfs_transaction_handle *th = NULL;
-        /* space reserved in transaction batch: 
+        /* space reserved in transaction batch:
           . 3 balancings in direct->indirect conversion
           . 1 block involved into reiserfs_update_sd()
           XXX in practically impossible worst case direct2indirect()
@@ -754,7 +754,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
                reiserfs_write_unlock(inode->i_sb);
                /* the item was found, so new blocks were not added to the file
-                 ** there is no need to make sure the inode is updated with this 
+                 ** there is no need to make sure the inode is updated with this
                 ** transaction
                 */
                return retval;
@@ -841,10 +841,12 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
                                                          tail_offset);
                                if (retval) {
                                        if (retval != -ENOSPC)
-                                                reiserfs_warning(inode->i_sb,
+                                                reiserfs_error(inode->i_sb,
-                                                                 "clm-6004: convert tail failed inode %lu, error %d",
+                                                        "clm-6004",
-                                                                 inode->i_ino,
+                                                        "convert tail failed "
-                                                                 retval);
+                                                        "inode %lu, error %d",
+                                                        inode->i_ino,
+                                                        retval);
                                        if (allocated_block_nr) {
                                                /* the bitmap, the super, and the stat data == 3 */
                                                if (!th)
@@ -984,7 +986,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
                /* this loop could log more blocks than we had originally asked
                 ** for.  So, we have to allow the transaction to end if it is
-                 ** too big or too full.  Update the inode so things are 
+                 ** too big or too full.  Update the inode so things are
                 ** consistent if we crash before the function returns
                 **
                 ** release the path so that anybody waiting on the path before
@@ -995,7 +997,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
                        if (retval)
                                goto failure;
                }
-                /* inserting indirect pointers for a hole can take a 
+                /* inserting indirect pointers for a hole can take a
                 ** long time.  reschedule if needed
                 */
                cond_resched();
@@ -1006,8 +1008,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
                        goto failure;
                }
                if (retval == POSITION_FOUND) {
-                        reiserfs_warning(inode->i_sb,
+                        reiserfs_warning(inode->i_sb, "vs-825",
-                                         "vs-825: reiserfs_get_block: "
                                         "%K should not be found", &key);
                        retval = -EEXIST;
                        if (allocated_block_nr)
@@ -1299,8 +1300,7 @@ static void update_stat_data(struct treepath *path, struct inode *inode,
        ih = PATH_PITEM_HEAD(path);
        if (!is_statdata_le_ih(ih))
-                reiserfs_panic(inode->i_sb,
+                reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h",
-                               "vs-13065: update_stat_data: key %k, found item %h",
                               INODE_PKEY(inode), ih);
        if (stat_data_v1(ih)) {
@@ -1332,10 +1332,9 @@ void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
                /* look for the object's stat data */
                retval = search_item(inode->i_sb, &key, &path);
                if (retval == IO_ERROR) {
-                        reiserfs_warning(inode->i_sb,
+                        reiserfs_error(inode->i_sb, "vs-13050",
-                                         "vs-13050: reiserfs_update_sd: "
+                                       "i/o failure occurred trying to "
-                                         "i/o failure occurred trying to update %K stat data",
+                                       "update %K stat data", &key);
-                                         &key);
                        return;
                }
                if (retval == ITEM_NOT_FOUND) {
@@ -1345,9 +1344,9 @@ void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
                                /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */
                                return;
                        }
-                        reiserfs_warning(inode->i_sb,
+                        reiserfs_warning(inode->i_sb, "vs-13060",
-                                         "vs-13060: reiserfs_update_sd: "
+                                         "stat data of object %k (nlink == %d) "
-                                         "stat data of object %k (nlink == %d) not found (pos %d)",
+                                         "not found (pos %d)",
                                         INODE_PKEY(inode), inode->i_nlink,
                                         pos);
                        reiserfs_check_path(&path);
@@ -1424,10 +1423,9 @@ void reiserfs_read_locked_inode(struct inode *inode,
        /* look for the object's stat data */
        retval = search_item(inode->i_sb, &key, &path_to_sd);
        if (retval == IO_ERROR) {
-                reiserfs_warning(inode->i_sb,
+                reiserfs_error(inode->i_sb, "vs-13070",
-                                 "vs-13070: reiserfs_read_locked_inode: "
+                               "i/o failure occurred trying to find "
-                                 "i/o failure occurred trying to find stat data of %K",
+                               "stat data of %K", &key);
-                                 &key);
                reiserfs_make_bad_inode(inode);
                return;
        }
@@ -1446,7 +1444,7 @@ void reiserfs_read_locked_inode(struct inode *inode,
           update sd on unlink all that is required is to check for nlink
           here. This bug was first found by Sizif when debugging
           SquidNG/Butterfly, forgotten, and found again after Philippe
-           Gramoulle <philippe.gramoulle@mmania.com> reproduced it. 
+           Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
           More logical fix would require changes in fs/inode.c:iput() to
           remove inode from hash-table _after_ fs cleaned disk stuff up and
@@ -1457,8 +1455,7 @@ void reiserfs_read_locked_inode(struct inode *inode,
           during mount (fs/reiserfs/super.c:finish_unfinished()). */
        if ((inode->i_nlink == 0) &&
            !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) {
-                reiserfs_warning(inode->i_sb,
+                reiserfs_warning(inode->i_sb, "vs-13075",
-                                 "vs-13075: reiserfs_read_locked_inode: "
                                 "dead inode read from disk %K. "
                                 "This is likely to be race with knfsd. Ignore",
                                 &key);
@@ -1555,7 +1552,7 @@ struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
         */
        if (fh_type > fh_len) {
                if (fh_type != 6 || fh_len != 5)
-                        reiserfs_warning(sb,
+                        reiserfs_warning(sb, "reiserfs-13077",
                                "nfsd/reiserfs, fhtype=%d, len=%d - odd",
                                fh_type, fh_len);
                fh_type = 5;
@@ -1622,7 +1619,7 @@ int reiserfs_write_inode(struct inode *inode, int do_sync)
        if (inode->i_sb->s_flags & MS_RDONLY)
                return -EROFS;
        /* memory pressure can sometimes initiate write_inode calls with sync == 1,
-         ** these cases are just when the system needs ram, not when the 
+         ** these cases are just when the system needs ram, not when the
         ** inode needs to reach disk for safety, and they can safely be
         ** ignored because the altered inode has already been logged.
         */
@@ -1680,13 +1677,13 @@ static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
        /* look for place in the tree for new item */
        retval = search_item(sb, &key, path);
        if (retval == IO_ERROR) {
-                reiserfs_warning(sb, "vs-13080: reiserfs_new_directory: "
+                reiserfs_error(sb, "vs-13080",
-                                 "i/o failure occurred creating new directory");
+                               "i/o failure occurred creating new directory");
                return -EIO;
        }
        if (retval == ITEM_FOUND) {
                pathrelse(path);
-                reiserfs_warning(sb, "vs-13070: reiserfs_new_directory: "
+                reiserfs_warning(sb, "vs-13070",
                                 "object with this key exists (%k)",
                                 &(ih->ih_key));
                return -EEXIST;
@@ -1720,13 +1717,13 @@ static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct i
        /* look for place in the tree for new item */
        retval = search_item(sb, &key, path);
        if (retval == IO_ERROR) {
-                reiserfs_warning(sb, "vs-13080: reiserfs_new_symlinik: "
+                reiserfs_error(sb, "vs-13080",
-                                 "i/o failure occurred creating new symlink");
+                               "i/o failure occurred creating new symlink");
                return -EIO;
        }
        if (retval == ITEM_FOUND) {
                pathrelse(path);
-                reiserfs_warning(sb, "vs-13080: reiserfs_new_symlink: "
+                reiserfs_warning(sb, "vs-13080",
                                 "object with this key exists (%k)",
                                 &(ih->ih_key));
                return -EEXIST;
@@ -1739,7 +1736,7 @@ static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct i
 /* inserts the stat data into the tree, and then calls
   reiserfs_new_directory (to insert ".", ".." item if new object is
   directory) or reiserfs_new_symlink (to insert symlink body if new
-   object is symlink) or nothing (if new object is regular file) 
+   object is symlink) or nothing (if new object is regular file)
   NOTE! uid and gid must already be set in the inode.  If we return
   non-zero due to an error, we have to drop the quota previously allocated
@@ -1747,10 +1744,11 @@ static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct i
   if we return non-zero, we also end the transaction.  */
 int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
                       struct inode *dir, int mode, const char *symname,
-                       /* 0 for regular, EMTRY_DIR_SIZE for dirs, 
+                       /* 0 for regular, EMTRY_DIR_SIZE for dirs,
                          strlen (symname) for symlinks) */
                       loff_t i_size, struct dentry *dentry,
-                       struct inode *inode)
+                       struct inode *inode,
+                       struct reiserfs_security_handle *security)
 {
        struct super_block *sb;
        struct reiserfs_iget_args args;
@@ -1796,7 +1794,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
                goto out_bad_inode;
        }
        if (old_format_only(sb))
-                /* not a perfect generation count, as object ids can be reused, but 
+                /* not a perfect generation count, as object ids can be reused, but
                 ** this is as good as reiserfs can do right now.
                 ** note that the private part of inode isn't filled in yet, we have
                 ** to use the directory.
@@ -1917,9 +1915,8 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
                goto out_inserted_sd;
        }
-        /* XXX CHECK THIS */
        if (reiserfs_posixacl(inode->i_sb)) {
-                retval = reiserfs_inherit_default_acl(dir, dentry, inode);
+                retval = reiserfs_inherit_default_acl(th, dir, dentry, inode);
                if (retval) {
                        err = retval;
                        reiserfs_check_path(&path_to_key);
@@ -1927,10 +1924,23 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
                        goto out_inserted_sd;
                }
        } else if (inode->i_sb->s_flags & MS_POSIXACL) {
-                reiserfs_warning(inode->i_sb, "ACLs aren't enabled in the fs, "
+                reiserfs_warning(inode->i_sb, "jdm-13090",
+                                 "ACLs aren't enabled in the fs, "
                                 "but vfs thinks they are!");
-        } else if (is_reiserfs_priv_object(dir)) {
+        } else if (IS_PRIVATE(dir))
-                reiserfs_mark_inode_private(inode);
+                inode->i_flags |= S_PRIVATE;
+        if (security->name) {
+                retval = reiserfs_security_write(th, inode, security);
+                if (retval) {
+                        err = retval;
+                        reiserfs_check_path(&path_to_key);
+                        retval = journal_end(th, th->t_super,
+                                             th->t_blocks_allocated);
+                        if (retval)
+                                err = retval;
+                        goto out_inserted_sd;
+                }
        }
        reiserfs_update_sd(th, inode);
@@ -1960,19 +1970,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
        inode->i_nlink = 0;
        th->t_trans_id = 0;     /* so the caller can't use this handle later */
        unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
+        iput(inode);
-        /* If we were inheriting an ACL, we need to release the lock so that
-         * iput doesn't deadlock in reiserfs_delete_xattrs. The locking
-         * code really needs to be reworked, but this will take care of it
-         * for now. -jeffm */
-#ifdef CONFIG_REISERFS_FS_POSIX_ACL
-        if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) {
-                reiserfs_write_unlock_xattrs(dir->i_sb);
-                iput(inode);
-                reiserfs_write_lock_xattrs(dir->i_sb);
-        } else
-#endif
-                iput(inode);
        return err;
 }
@@ -1989,7 +1987,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 **
 ** on failure, nonzero is returned, page_result and bh_result are untouched.
 */
-static int grab_tail_page(struct inode *p_s_inode,
+static int grab_tail_page(struct inode *inode,
                          struct page **page_result,
                          struct buffer_head **bh_result)
 {
@@ -1997,11 +1995,11 @@ static int grab_tail_page(struct inode *p_s_inode,
        /* we want the page with the last byte in the file,
         ** not the page that will hold the next byte for appending
         */
-        unsigned long index = (p_s_inode->i_size - 1) >> PAGE_CACHE_SHIFT;
+        unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
        unsigned long pos = 0;
        unsigned long start = 0;
-        unsigned long blocksize = p_s_inode->i_sb->s_blocksize;
+        unsigned long blocksize = inode->i_sb->s_blocksize;
-        unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1);
+        unsigned long offset = (inode->i_size) & (PAGE_CACHE_SIZE - 1);
        struct buffer_head *bh;
        struct buffer_head *head;
        struct page *page;
@@ -2015,7 +2013,7 @@ static int grab_tail_page(struct inode *p_s_inode,
        if ((offset & (blocksize - 1)) == 0) {
                return -ENOENT;
        }
-        page = grab_cache_page(p_s_inode->i_mapping, index);
+        page = grab_cache_page(inode->i_mapping, index);
        error = -ENOMEM;
        if (!page) {
                goto out;
@@ -2044,10 +2042,8 @@ static int grab_tail_page(struct inode *p_s_inode,
                 ** I've screwed up the code to find the buffer, or the code to
                 ** call prepare_write
                 */
-                reiserfs_warning(p_s_inode->i_sb,
+                reiserfs_error(inode->i_sb, "clm-6000",
-                                 "clm-6000: error reading block %lu on dev %s",
+                               "error reading block %lu", bh->b_blocknr);
-                                 bh->b_blocknr,
-                                 reiserfs_bdevname(p_s_inode->i_sb));
                error = -EIO;
                goto unlock;
        }
@@ -2069,57 +2065,58 @@ static int grab_tail_page(struct inode *p_s_inode,
 **
 ** some code taken from block_truncate_page
 */
-int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps)
+int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
 {
        struct reiserfs_transaction_handle th;
        /* we want the offset for the first byte after the end of the file */
-        unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1);
+        unsigned long offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
-        unsigned blocksize = p_s_inode->i_sb->s_blocksize;
+        unsigned blocksize = inode->i_sb->s_blocksize;
        unsigned length;
        struct page *page = NULL;
        int error;
        struct buffer_head *bh = NULL;
        int err2;
-        reiserfs_write_lock(p_s_inode->i_sb);
+        reiserfs_write_lock(inode->i_sb);
-        if (p_s_inode->i_size > 0) {
+        if (inode->i_size > 0) {
-                if ((error = grab_tail_page(p_s_inode, &page, &bh))) {
+                error = grab_tail_page(inode, &page, &bh);
-                        // -ENOENT means we truncated past the end of the file, 
+                if (error) {
+                        // -ENOENT means we truncated past the end of the file,
                        // and get_block_create_0 could not find a block to read in,
                        // which is ok.
                        if (error != -ENOENT)
-                                reiserfs_warning(p_s_inode->i_sb,
+                                reiserfs_error(inode->i_sb, "clm-6001",
-                                                 "clm-6001: grab_tail_page failed %d",
+                                               "grab_tail_page failed %d",
-                                                 error);
+                                               error);
                        page = NULL;
                        bh = NULL;
                }
        }
-        /* so, if page != NULL, we have a buffer head for the offset at 
+        /* so, if page != NULL, we have a buffer head for the offset at
-         ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, 
+         ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
-         ** then we have an unformatted node.  Otherwise, we have a direct item, 
+         ** then we have an unformatted node.  Otherwise, we have a direct item,
-         ** and no zeroing is required on disk.  We zero after the truncate, 
+         ** and no zeroing is required on disk.  We zero after the truncate,
-         ** because the truncate might pack the item anyway 
+         ** because the truncate might pack the item anyway
         ** (it will unmap bh if it packs).
         */
        /* it is enough to reserve space in transaction for 2 balancings:
           one for "save" link adding and another for the first
           cut_from_item. 1 is for update_sd */
-        error = journal_begin(&th, p_s_inode->i_sb,
+        error = journal_begin(&th, inode->i_sb,
                              JOURNAL_PER_BALANCE_CNT * 2 + 1);
        if (error)
                goto out;
-        reiserfs_update_inode_transaction(p_s_inode);
+        reiserfs_update_inode_transaction(inode);
        if (update_timestamps)
                /* we are doing real truncate: if the system crashes before the last
                   transaction of truncating gets committed - on reboot the file
                   either appears truncated properly or not truncated at all */
-                add_save_link(&th, p_s_inode, 1);
+                add_save_link(&th, inode, 1);
-        err2 = reiserfs_do_truncate(&th, p_s_inode, page, update_timestamps);
+        err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps);
        error =
-            journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1);
+            journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1);
        if (error)
                goto out;
@@ -2130,7 +2127,7 @@ int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps)
        }
        
        if (update_timestamps) {
-                error = remove_save_link(p_s_inode, 1 /* truncate */ );
+                error = remove_save_link(inode, 1 /* truncate */);
                if (error)
                        goto out;
        }
@@ -2149,14 +2146,14 @@ int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps)
                page_cache_release(page);
        }
-        reiserfs_write_unlock(p_s_inode->i_sb);
+        reiserfs_write_unlock(inode->i_sb);
        return 0;
      out:
        if (page) {
                unlock_page(page);
                page_cache_release(page);
        }
-        reiserfs_write_unlock(p_s_inode->i_sb);
+        reiserfs_write_unlock(inode->i_sb);
        return error;
 }
@@ -2208,9 +2205,8 @@ static int map_block_for_writepage(struct inode *inode,
        /* we've found an unformatted node */
        if (indirect_item_found(retval, ih)) {
                if (bytes_copied > 0) {
-                        reiserfs_warning(inode->i_sb,
+                        reiserfs_warning(inode->i_sb, "clm-6002",
-                                         "clm-6002: bytes_copied %d",
+                                         "bytes_copied %d", bytes_copied);
-                                         bytes_copied);
                }
                if (!get_block_num(item, pos_in_item)) {
                        /* crap, we are writing to a hole */
@@ -2267,9 +2263,8 @@ static int map_block_for_writepage(struct inode *inode,
                        goto research;
                }
        } else {
-                reiserfs_warning(inode->i_sb,
+                reiserfs_warning(inode->i_sb, "clm-6003",
-                                 "clm-6003: bad item inode %lu, device %s",
+                                 "bad item inode %lu", inode->i_ino);
-                                 inode->i_ino, reiserfs_bdevname(inode->i_sb));
                retval = -EIO;
                goto out;
        }
@@ -2312,8 +2307,8 @@ static int map_block_for_writepage(struct inode *inode,
        return retval;
 }
-/* 
+/*
- * mason@suse.com: updated in 2.5.54 to follow the same general io 
+ * mason@suse.com: updated in 2.5.54 to follow the same general io
 * start/recovery path as __block_write_full_page, along with special
 * code to handle reiserfs tails.
 */
@@ -2453,7 +2448,7 @@ static int reiserfs_write_full_page(struct page *page,
        unlock_page(page);
        /*
-         * since any buffer might be the only dirty buffer on the page, 
+         * since any buffer might be the only dirty buffer on the page,
         * the first submit_bh can bring the page out of writeback.
         * be careful with the buffers.
         */
@@ -2472,8 +2467,8 @@ static int reiserfs_write_full_page(struct page *page,
        if (nr == 0) {
                /*
                 * if this page only had a direct item, it is very possible for
-                 * no io to be required without there being an error.  Or, 
+                 * no io to be required without there being an error.  Or,
-                 * someone else could have locked them and sent them down the 
+                 * someone else could have locked them and sent them down the
                 * pipe without locking the page
                 */
                bh = head;
@@ -2492,7 +2487,7 @@ static int reiserfs_write_full_page(struct page *page,
      fail:
        /* catches various errors, we need to make sure any valid dirty blocks
-         * get to the media.  The page is currently locked and not marked for 
+         * get to the media.  The page is currently locked and not marked for
         * writeback
         */
        ClearPageUptodate(page);
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 830332021ed4..0ccc3fdda7bf 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -189,7 +189,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
        }
        /* we unpack by finding the page with the tail, and calling
-         ** reiserfs_prepare_write on that page.  This will force a 
+         ** reiserfs_prepare_write on that page.  This will force a
         ** reiserfs_get_block to unpack the tail for us.
         */
        index = inode->i_size >> PAGE_CACHE_SHIFT;
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index 9475557ab499..72cb1cc51b87 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -97,7 +97,8 @@ static int sd_unit_num(struct virtual_item *vi)
 static void sd_print_vi(struct virtual_item *vi)
 {
-        reiserfs_warning(NULL, "STATDATA, index %d, type 0x%x, %h",
+        reiserfs_warning(NULL, "reiserfs-16100",
+                         "STATDATA, index %d, type 0x%x, %h",
                         vi->vi_index, vi->vi_type, vi->vi_ih);
 }
@@ -190,7 +191,8 @@ static int direct_unit_num(struct virtual_item *vi)
 static void direct_print_vi(struct virtual_item *vi)
 {
-        reiserfs_warning(NULL, "DIRECT, index %d, type 0x%x, %h",
+        reiserfs_warning(NULL, "reiserfs-16101",
+                         "DIRECT, index %d, type 0x%x, %h",
                         vi->vi_index, vi->vi_type, vi->vi_ih);
 }
@@ -278,7 +280,7 @@ static void indirect_print_item(struct item_head *ih, char *item)
        unp = (__le32 *) item;
        if (ih_item_len(ih) % UNFM_P_SIZE)
-                reiserfs_warning(NULL, "indirect_print_item: invalid item len");
+                reiserfs_warning(NULL, "reiserfs-16102", "invalid item len");
        printk("%d pointers\n[ ", (int)I_UNFM_NUM(ih));
        for (j = 0; j < I_UNFM_NUM(ih); j++) {
@@ -334,7 +336,8 @@ static int indirect_unit_num(struct virtual_item *vi)
 static void indirect_print_vi(struct virtual_item *vi)
 {
-        reiserfs_warning(NULL, "INDIRECT, index %d, type 0x%x, %h",
+        reiserfs_warning(NULL, "reiserfs-16103",
+                         "INDIRECT, index %d, type 0x%x, %h",
                         vi->vi_index, vi->vi_type, vi->vi_ih);
 }
@@ -359,7 +362,7 @@ static struct item_operations indirect_ops = {
 static int direntry_bytes_number(struct item_head *ih, int block_size)
 {
-        reiserfs_warning(NULL, "vs-16090: direntry_bytes_number: "
+        reiserfs_warning(NULL, "vs-16090",
                         "bytes number is asked for direntry");
        return 0;
 }
@@ -514,8 +517,9 @@ static int direntry_create_vi(struct virtual_node *vn,
                    ((is_affected
                      && (vn->vn_mode == M_PASTE
                          || vn->vn_mode == M_CUT)) ? insert_size : 0)) {
-                        reiserfs_panic(NULL,
+                        reiserfs_panic(NULL, "vs-8025", "(mode==%c, "
-                                       "vs-8025: set_entry_sizes: (mode==%c, insert_size==%d), invalid length of directory item",
+                                       "insert_size==%d), invalid length of "
+                                       "directory item",
                                       vn->vn_mode, insert_size);
                }
        }
@@ -546,7 +550,8 @@ static int direntry_check_left(struct virtual_item *vi, int free,
        }
        if (entries == dir_u->entry_count) {
-                reiserfs_panic(NULL, "free space %d, entry_count %d\n", free,
+                reiserfs_panic(NULL, "item_ops-1",
+                               "free space %d, entry_count %d", free,
                               dir_u->entry_count);
        }
@@ -614,7 +619,8 @@ static void direntry_print_vi(struct virtual_item *vi)
        int i;
        struct direntry_uarea *dir_u = vi->vi_uarea;
-        reiserfs_warning(NULL, "DIRENTRY, index %d, type 0x%x, %h, flags 0x%x",
+        reiserfs_warning(NULL, "reiserfs-16104",
+                         "DIRENTRY, index %d, type 0x%x, %h, flags 0x%x",
                         vi->vi_index, vi->vi_type, vi->vi_ih, dir_u->flags);
        printk("%d entries: ", dir_u->entry_count);
        for (i = 0; i < dir_u->entry_count; i++)
@@ -642,43 +648,43 @@ static struct item_operations direntry_ops = {
 //
 static int errcatch_bytes_number(struct item_head *ih, int block_size)
 {
-        reiserfs_warning(NULL,
+        reiserfs_warning(NULL, "green-16001",
-                         "green-16001: Invalid item type observed, run fsck ASAP");
+                         "Invalid item type observed, run fsck ASAP");
        return 0;
 }
 static void errcatch_decrement_key(struct cpu_key *key)
 {
-        reiserfs_warning(NULL,
+        reiserfs_warning(NULL, "green-16002",
-                         "green-16002: Invalid item type observed, run fsck ASAP");
+                         "Invalid item type observed, run fsck ASAP");
 }
 static int errcatch_is_left_mergeable(struct reiserfs_key *key,
                                      unsigned long bsize)
 {
-        reiserfs_warning(NULL,
+        reiserfs_warning(NULL, "green-16003",
-                         "green-16003: Invalid item type observed, run fsck ASAP");
+                         "Invalid item type observed, run fsck ASAP");
        return 0;
 }
 static void errcatch_print_item(struct item_head *ih, char *item)
 {
-        reiserfs_warning(NULL,
+        reiserfs_warning(NULL, "green-16004",
-                         "green-16004: Invalid item type observed, run fsck ASAP");
+                         "Invalid item type observed, run fsck ASAP");
 }
 static void errcatch_check_item(struct item_head *ih, char *item)
 {
-        reiserfs_warning(NULL,
+        reiserfs_warning(NULL, "green-16005",
-                         "green-16005: Invalid item type observed, run fsck ASAP");
+                         "Invalid item type observed, run fsck ASAP");
 }
 static int errcatch_create_vi(struct virtual_node *vn,
                              struct virtual_item *vi,
                              int is_affected, int insert_size)
 {
-        reiserfs_warning(NULL,
+        reiserfs_warning(NULL, "green-16006",
-                         "green-16006: Invalid item type observed, run fsck ASAP");
+                         "Invalid item type observed, run fsck ASAP");
        return 0;               // We might return -1 here as well, but it won't help as create_virtual_node() from where
        // this operation is called from is of return type void.
 }
@@ -686,36 +692,36 @@ static int errcatch_create_vi(struct virtual_node *vn,
 static int errcatch_check_left(struct virtual_item *vi, int free,
                               int start_skip, int end_skip)
 {
-        reiserfs_warning(NULL,
+        reiserfs_warning(NULL, "green-16007",
-                         "green-16007: Invalid item type observed, run fsck ASAP");
+                         "Invalid item type observed, run fsck ASAP");
        return -1;
 }
 static int errcatch_check_right(struct virtual_item *vi, int free)
 {
-        reiserfs_warning(NULL,
+        reiserfs_warning(NULL, "green-16008",
-                         "green-16008: Invalid item type observed, run fsck ASAP");
+                         "Invalid item type observed, run fsck ASAP");
        return -1;
 }
 static int errcatch_part_size(struct virtual_item *vi, int first, int count)
 {
-        reiserfs_warning(NULL,
+        reiserfs_warning(NULL, "green-16009",
-                         "green-16009: Invalid item type observed, run fsck ASAP");
+                         "Invalid item type observed, run fsck ASAP");
        return 0;
 }
 static int errcatch_unit_num(struct virtual_item *vi)
 {
-        reiserfs_warning(NULL,
+        reiserfs_warning(NULL, "green-16010",
-                         "green-16010: Invalid item type observed, run fsck ASAP");
+                         "Invalid item type observed, run fsck ASAP");
        return 0;
 }
 static void errcatch_print_vi(struct virtual_item *vi)
 {
-        reiserfs_warning(NULL,
+        reiserfs_warning(NULL, "green-16011",
-                         "green-16011: Invalid item type observed, run fsck ASAP");
+                         "Invalid item type observed, run fsck ASAP");
 }
 static struct item_operations errcatch_ops = {
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 9643c3bbeb3b..77f5bb746bf0 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1,36 +1,36 @@
 /*
 ** Write ahead logging implementation copyright Chris Mason 2000
 **
-** The background commits make this code very interelated, and 
+** The background commits make this code very interelated, and
 ** overly complex.  I need to rethink things a bit....The major players:
 **
-** journal_begin -- call with the number of blocks you expect to log.  
+** journal_begin -- call with the number of blocks you expect to log.
 **                  If the current transaction is too
-**                  old, it will block until the current transaction is 
+**                  old, it will block until the current transaction is
 **                  finished, and then start a new one.
-**                  Usually, your transaction will get joined in with 
+**                  Usually, your transaction will get joined in with
 **                  previous ones for speed.
 **
-** journal_join  -- same as journal_begin, but won't block on the current 
+** journal_join  -- same as journal_begin, but won't block on the current
 **                  transaction regardless of age.  Don't ever call
-**                  this.  Ever.  There are only two places it should be 
+**                  this.  Ever.  There are only two places it should be
 **                  called from, and they are both inside this file.
 **
-** journal_mark_dirty -- adds blocks into this transaction.  clears any flags 
+** journal_mark_dirty -- adds blocks into this transaction.  clears any flags
 **                       that might make them get sent to disk
-**                       and then marks them BH_JDirty.  Puts the buffer head 
+**                       and then marks them BH_JDirty.  Puts the buffer head
-**                       into the current transaction hash.  
+**                       into the current transaction hash.
 **
 ** journal_end -- if the current transaction is batchable, it does nothing
 **                   otherwise, it could do an async/synchronous commit, or
-**                   a full flush of all log and real blocks in the 
+**                   a full flush of all log and real blocks in the
 **                   transaction.
 **
-** flush_old_commits -- if the current transaction is too old, it is ended and 
+** flush_old_commits -- if the current transaction is too old, it is ended and
-**                      commit blocks are sent to disk.  Forces commit blocks 
+**                      commit blocks are sent to disk.  Forces commit blocks
-**                      to disk for all backgrounded commits that have been 
+**                      to disk for all backgrounded commits that have been
 **                      around too long.
-**                   -- Note, if you call this as an immediate flush from 
+**                   -- Note, if you call this as an immediate flush from
 **                      from within kupdate, it will ignore the immediate flag
 */
@@ -97,7 +97,7 @@ static int flush_commit_list(struct super_block *s,
                             struct reiserfs_journal_list *jl, int flushall);
 static int can_dirty(struct reiserfs_journal_cnode *cn);
 static int journal_join(struct reiserfs_transaction_handle *th,
-                        struct super_block *p_s_sb, unsigned long nblocks);
+                        struct super_block *sb, unsigned long nblocks);
 static int release_journal_dev(struct super_block *super,
                               struct reiserfs_journal *journal);
 static int dirty_one_transaction(struct super_block *s,
@@ -113,12 +113,12 @@ enum {
 };
 static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
-                              struct super_block *p_s_sb,
+                              struct super_block *sb,
                              unsigned long nblocks, int join);
-static void init_journal_hash(struct super_block *p_s_sb)
+static void init_journal_hash(struct super_block *sb)
 {
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        memset(journal->j_hash_table, 0,
               JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
 }
@@ -145,7 +145,7 @@ static void disable_barrier(struct super_block *s)
 }
 static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
-                                                         *p_s_sb)
+                                                         *sb)
 {
        struct reiserfs_bitmap_node *bn;
        static int id;
@@ -154,7 +154,7 @@ static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
        if (!bn) {
                return NULL;
        }
-        bn->data = kzalloc(p_s_sb->s_blocksize, GFP_NOFS);
+        bn->data = kzalloc(sb->s_blocksize, GFP_NOFS);
        if (!bn->data) {
                kfree(bn);
                return NULL;
@@ -164,9 +164,9 @@ static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
        return bn;
 }
-static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *p_s_sb)
+static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *sb)
 {
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        struct reiserfs_bitmap_node *bn = NULL;
        struct list_head *entry = journal->j_bitmap_nodes.next;
@@ -176,21 +176,21 @@ static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *p_s_sb)
        if (entry != &journal->j_bitmap_nodes) {
                bn = list_entry(entry, struct reiserfs_bitmap_node, list);
                list_del(entry);
-                memset(bn->data, 0, p_s_sb->s_blocksize);
+                memset(bn->data, 0, sb->s_blocksize);
                journal->j_free_bitmap_nodes--;
                return bn;
        }
-        bn = allocate_bitmap_node(p_s_sb);
+        bn = allocate_bitmap_node(sb);
        if (!bn) {
                yield();
                goto repeat;
        }
        return bn;
 }
-static inline void free_bitmap_node(struct super_block *p_s_sb,
+static inline void free_bitmap_node(struct super_block *sb,
                                    struct reiserfs_bitmap_node *bn)
 {
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        journal->j_used_bitmap_nodes--;
        if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
                kfree(bn->data);
@@ -201,46 +201,46 @@ static inline void free_bitmap_node(struct super_block *p_s_sb,
        }
 }
-static void allocate_bitmap_nodes(struct super_block *p_s_sb)
+static void allocate_bitmap_nodes(struct super_block *sb)
 {
        int i;
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        struct reiserfs_bitmap_node *bn = NULL;
        for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) {
-                bn = allocate_bitmap_node(p_s_sb);
+                bn = allocate_bitmap_node(sb);
                if (bn) {
                        list_add(&bn->list, &journal->j_bitmap_nodes);
                        journal->j_free_bitmap_nodes++;
                } else {
-                        break;  // this is ok, we'll try again when more are needed 
+                        break;  /* this is ok, we'll try again when more are needed */
                }
        }
 }
-static int set_bit_in_list_bitmap(struct super_block *p_s_sb,
+static int set_bit_in_list_bitmap(struct super_block *sb,
                                  b_blocknr_t block,
                                  struct reiserfs_list_bitmap *jb)
 {
-        unsigned int bmap_nr = block / (p_s_sb->s_blocksize << 3);
+        unsigned int bmap_nr = block / (sb->s_blocksize << 3);
-        unsigned int bit_nr = block % (p_s_sb->s_blocksize << 3);
+        unsigned int bit_nr = block % (sb->s_blocksize << 3);
        if (!jb->bitmaps[bmap_nr]) {
-                jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb);
+                jb->bitmaps[bmap_nr] = get_bitmap_node(sb);
        }
        set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data);
        return 0;
 }
-static void cleanup_bitmap_list(struct super_block *p_s_sb,
+static void cleanup_bitmap_list(struct super_block *sb,
                                struct reiserfs_list_bitmap *jb)
 {
        int i;
        if (jb->bitmaps == NULL)
                return;
-        for (i = 0; i < reiserfs_bmap_count(p_s_sb); i++) {
+        for (i = 0; i < reiserfs_bmap_count(sb); i++) {
                if (jb->bitmaps[i]) {
-                        free_bitmap_node(p_s_sb, jb->bitmaps[i]);
+                        free_bitmap_node(sb, jb->bitmaps[i]);
                        jb->bitmaps[i] = NULL;
                }
        }
@@ -249,7 +249,7 @@ static void cleanup_bitmap_list(struct super_block *p_s_sb,
 /*
 ** only call this on FS unmount.
 */
-static int free_list_bitmaps(struct super_block *p_s_sb,
+static int free_list_bitmaps(struct super_block *sb,
                             struct reiserfs_list_bitmap *jb_array)
 {
        int i;
@@ -257,16 +257,16 @@ static int free_list_bitmaps(struct super_block *p_s_sb,
        for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
                jb = jb_array + i;
                jb->journal_list = NULL;
-                cleanup_bitmap_list(p_s_sb, jb);
+                cleanup_bitmap_list(sb, jb);
                vfree(jb->bitmaps);
                jb->bitmaps = NULL;
        }
        return 0;
 }
-static int free_bitmap_nodes(struct super_block *p_s_sb)
+static int free_bitmap_nodes(struct super_block *sb)
 {
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        struct list_head *next = journal->j_bitmap_nodes.next;
        struct reiserfs_bitmap_node *bn;
@@ -283,10 +283,10 @@ static int free_bitmap_nodes(struct super_block *p_s_sb)
 }
 /*
-** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. 
+** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps.
 ** jb_array is the array to be filled in.
 */
-int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb,
+int reiserfs_allocate_list_bitmaps(struct super_block *sb,
                                   struct reiserfs_list_bitmap *jb_array,
                                   unsigned int bmap_nr)
 {
@@ -300,30 +300,30 @@ int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb,
                jb->journal_list = NULL;
                jb->bitmaps = vmalloc(mem);
                if (!jb->bitmaps) {
-                        reiserfs_warning(p_s_sb,
+                        reiserfs_warning(sb, "clm-2000", "unable to "
-                                         "clm-2000, unable to allocate bitmaps for journal lists");
+                                         "allocate bitmaps for journal lists");
                        failed = 1;
                        break;
                }
                memset(jb->bitmaps, 0, mem);
        }
        if (failed) {
-                free_list_bitmaps(p_s_sb, jb_array);
+                free_list_bitmaps(sb, jb_array);
                return -1;
        }
        return 0;
 }
 /*
-** find an available list bitmap.  If you can't find one, flush a commit list 
+** find an available list bitmap.  If you can't find one, flush a commit list
 ** and try again
 */
-static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *p_s_sb,
+static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb,
                                                    struct reiserfs_journal_list
                                                    *jl)
 {
        int i, j;
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        struct reiserfs_list_bitmap *jb = NULL;
        for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) {
@@ -331,7 +331,7 @@ static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *p_s_sb,
                journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS;
                jb = journal->j_list_bitmap + i;
                if (journal->j_list_bitmap[i].journal_list) {
-                        flush_commit_list(p_s_sb,
+                        flush_commit_list(sb,
                                          journal->j_list_bitmap[i].
                                          journal_list, 1);
                        if (!journal->j_list_bitmap[i].journal_list) {
@@ -348,7 +348,7 @@ static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *p_s_sb,
        return jb;
 }
-/* 
+/*
 ** allocates a new chunk of X nodes, and links them all together as a list.
 ** Uses the cnode->next and cnode->prev pointers
 ** returns NULL on failure
@@ -376,14 +376,14 @@ static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
 }
 /*
-** pulls a cnode off the free list, or returns NULL on failure 
+** pulls a cnode off the free list, or returns NULL on failure
 */
-static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb)
+static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb)
 {
        struct reiserfs_journal_cnode *cn;
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
-        reiserfs_check_lock_depth(p_s_sb, "get_cnode");
+        reiserfs_check_lock_depth(sb, "get_cnode");
        if (journal->j_cnode_free <= 0) {
                return NULL;
@@ -403,14 +403,14 @@ static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb)
 }
 /*
-** returns a cnode to the free list 
+** returns a cnode to the free list
 */
-static void free_cnode(struct super_block *p_s_sb,
+static void free_cnode(struct super_block *sb,
                       struct reiserfs_journal_cnode *cn)
 {
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
-        reiserfs_check_lock_depth(p_s_sb, "free_cnode");
+        reiserfs_check_lock_depth(sb, "free_cnode");
        journal->j_cnode_used--;
        journal->j_cnode_free++;
@@ -436,8 +436,8 @@ void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
 {
 #ifdef CONFIG_SMP
        if (current->lock_depth < 0) {
-                reiserfs_panic(sb, "%s called without kernel lock held",
+                reiserfs_panic(sb, "journal-1", "%s called without kernel "
-                               caller);
+                               "lock held", caller);
        }
 #else
        ;
@@ -481,11 +481,11 @@ static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
 ** reject it on the next call to reiserfs_in_journal
 **
 */
-int reiserfs_in_journal(struct super_block *p_s_sb,
+int reiserfs_in_journal(struct super_block *sb,
                        unsigned int bmap_nr, int bit_nr, int search_all,
                        b_blocknr_t * next_zero_bit)
 {
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        struct reiserfs_journal_cnode *cn;
        struct reiserfs_list_bitmap *jb;
        int i;
@@ -493,14 +493,14 @@ int reiserfs_in_journal(struct super_block *p_s_sb,
        *next_zero_bit = 0;     /* always start this at zero. */
-        PROC_INFO_INC(p_s_sb, journal.in_journal);
+        PROC_INFO_INC(sb, journal.in_journal);
        /* If we aren't doing a search_all, this is a metablock, and it will be logged before use.
         ** if we crash before the transaction that freed it commits,  this transaction won't
         ** have committed either, and the block will never be written
         */
        if (search_all) {
                for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
-                        PROC_INFO_INC(p_s_sb, journal.in_journal_bitmap);
+                        PROC_INFO_INC(sb, journal.in_journal_bitmap);
                        jb = journal->j_list_bitmap + i;
                        if (jb->journal_list && jb->bitmaps[bmap_nr] &&
                            test_bit(bit_nr,
@@ -510,28 +510,28 @@ int reiserfs_in_journal(struct super_block *p_s_sb,
                                    find_next_zero_bit((unsigned long *)
                                                       (jb->bitmaps[bmap_nr]->
                                                        data),
-                                                       p_s_sb->s_blocksize << 3,
+                                                       sb->s_blocksize << 3,
                                                       bit_nr + 1);
                                return 1;
                        }
                }
        }
-        bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr;
+        bl = bmap_nr * (sb->s_blocksize << 3) + bit_nr;
        /* is it in any old transactions? */
        if (search_all
            && (cn =
-                get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, bl))) {
+                get_journal_hash_dev(sb, journal->j_list_hash_table, bl))) {
                return 1;
        }
        /* is it in the current transaction.  This should never happen */
-        if ((cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, bl))) {
+        if ((cn = get_journal_hash_dev(sb, journal->j_hash_table, bl))) {
                BUG();
                return 1;
        }
-        PROC_INFO_INC(p_s_sb, journal.in_journal_reusable);
+        PROC_INFO_INC(sb, journal.in_journal_reusable);
        /* safe for reuse */
        return 0;
 }
@@ -553,16 +553,16 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
 }
 /* lock the current transaction */
-static inline void lock_journal(struct super_block *p_s_sb)
+static inline void lock_journal(struct super_block *sb)
 {
-        PROC_INFO_INC(p_s_sb, journal.lock_journal);
+        PROC_INFO_INC(sb, journal.lock_journal);
-        mutex_lock(&SB_JOURNAL(p_s_sb)->j_mutex);
+        mutex_lock(&SB_JOURNAL(sb)->j_mutex);
 }
 /* unlock the current transaction */
-static inline void unlock_journal(struct super_block *p_s_sb)
+static inline void unlock_journal(struct super_block *sb)
 {
-        mutex_unlock(&SB_JOURNAL(p_s_sb)->j_mutex);
+        mutex_unlock(&SB_JOURNAL(sb)->j_mutex);
 }
 static inline void get_journal_list(struct reiserfs_journal_list *jl)
@@ -574,7 +574,7 @@ static inline void put_journal_list(struct super_block *s,
                                    struct reiserfs_journal_list *jl)
 {
        if (jl->j_refcount < 1) {
-                reiserfs_panic(s, "trans id %lu, refcount at %d",
+                reiserfs_panic(s, "journal-2", "trans id %u, refcount at %d",
                               jl->j_trans_id, jl->j_refcount);
        }
        if (--jl->j_refcount == 0)
@@ -586,20 +586,20 @@ static inline void put_journal_list(struct super_block *s,
 ** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a
 ** transaction.
 */
-static void cleanup_freed_for_journal_list(struct super_block *p_s_sb,
+static void cleanup_freed_for_journal_list(struct super_block *sb,
                                           struct reiserfs_journal_list *jl)
 {
        struct reiserfs_list_bitmap *jb = jl->j_list_bitmap;
        if (jb) {
-                cleanup_bitmap_list(p_s_sb, jb);
+                cleanup_bitmap_list(sb, jb);
        }
        jl->j_list_bitmap->journal_list = NULL;
        jl->j_list_bitmap = NULL;
 }
 static int journal_list_still_alive(struct super_block *s,
-                                    unsigned long trans_id)
+                                    unsigned int trans_id)
 {
        struct reiserfs_journal *journal = SB_JOURNAL(s);
        struct list_head *entry = &journal->j_journal_list;
@@ -644,8 +644,8 @@ static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
        char b[BDEVNAME_SIZE];
        if (buffer_journaled(bh)) {
-                reiserfs_warning(NULL,
+                reiserfs_warning(NULL, "clm-2084",
-                                 "clm-2084: pinned buffer %lu:%s sent to disk",
+                                 "pinned buffer %lu:%s sent to disk",
                                 bh->b_blocknr, bdevname(bh->b_bdev, b));
        }
        if (uptodate)
@@ -933,9 +933,9 @@ static int flush_older_commits(struct super_block *s,
        struct reiserfs_journal_list *other_jl;
        struct reiserfs_journal_list *first_jl;
        struct list_head *entry;
-        unsigned long trans_id = jl->j_trans_id;
+        unsigned int trans_id = jl->j_trans_id;
-        unsigned long other_trans_id;
+        unsigned int other_trans_id;
-        unsigned long first_trans_id;
+        unsigned int first_trans_id;
      find_first:
        /*
@@ -1014,7 +1014,7 @@ static int flush_commit_list(struct super_block *s,
        int i;
        b_blocknr_t bn;
        struct buffer_head *tbh = NULL;
-        unsigned long trans_id = jl->j_trans_id;
+        unsigned int trans_id = jl->j_trans_id;
        struct reiserfs_journal *journal = SB_JOURNAL(s);
        int barrier = 0;
        int retval = 0;
@@ -1122,7 +1122,8 @@ static int flush_commit_list(struct super_block *s,
                        sync_dirty_buffer(tbh);
                if (unlikely(!buffer_uptodate(tbh))) {
 #ifdef CONFIG_REISERFS_CHECK
-                        reiserfs_warning(s, "journal-601, buffer write failed");
+                        reiserfs_warning(s, "journal-601",
+                                         "buffer write failed");
 #endif
                        retval = -EIO;
                }
@@ -1154,14 +1155,14 @@ static int flush_commit_list(struct super_block *s,
         * up propagating the write error out to the filesystem. */
        if (unlikely(!buffer_uptodate(jl->j_commit_bh))) {
 #ifdef CONFIG_REISERFS_CHECK
-                reiserfs_warning(s, "journal-615: buffer write failed");
+                reiserfs_warning(s, "journal-615", "buffer write failed");
 #endif
                retval = -EIO;
        }
        bforget(jl->j_commit_bh);
        if (journal->j_last_commit_id != 0 &&
            (jl->j_trans_id - journal->j_last_commit_id) != 1) {
-                reiserfs_warning(s, "clm-2200: last commit %lu, current %lu",
+                reiserfs_warning(s, "clm-2200", "last commit %lu, current %lu",
                                 journal->j_last_commit_id, jl->j_trans_id);
        }
        journal->j_last_commit_id = jl->j_trans_id;
@@ -1191,8 +1192,8 @@ static int flush_commit_list(struct super_block *s,
 }
 /*
-** flush_journal_list frequently needs to find a newer transaction for a given block.  This does that, or 
+** flush_journal_list frequently needs to find a newer transaction for a given block.  This does that, or
-** returns NULL if it can't find anything 
+** returns NULL if it can't find anything
 */
 static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
                                                          reiserfs_journal_cnode
@@ -1236,11 +1237,11 @@ static void remove_journal_hash(struct super_block *,
 ** journal list for this transaction.  Aside from freeing the cnode, this also allows the
 ** block to be reallocated for data blocks if it had been deleted.
 */
-static void remove_all_from_journal_list(struct super_block *p_s_sb,
+static void remove_all_from_journal_list(struct super_block *sb,
                                         struct reiserfs_journal_list *jl,
                                         int debug)
 {
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        struct reiserfs_journal_cnode *cn, *last;
        cn = jl->j_realblock;
@@ -1250,18 +1251,18 @@ static void remove_all_from_journal_list(struct super_block *p_s_sb,
        while (cn) {
                if (cn->blocknr != 0) {
                        if (debug) {
-                                reiserfs_warning(p_s_sb,
+                                reiserfs_warning(sb, "reiserfs-2201",
                                                 "block %u, bh is %d, state %ld",
                                                 cn->blocknr, cn->bh ? 1 : 0,
                                                 cn->state);
                        }
                        cn->state = 0;
-                        remove_journal_hash(p_s_sb, journal->j_list_hash_table,
+                        remove_journal_hash(sb, journal->j_list_hash_table,
                                            jl, cn->blocknr, 1);
                }
                last = cn;
                cn = cn->next;
-                free_cnode(p_s_sb, last);
+                free_cnode(sb, last);
        }
        jl->j_realblock = NULL;
 }
@@ -1273,12 +1274,12 @@ static void remove_all_from_journal_list(struct super_block *p_s_sb,
 ** called by flush_journal_list, before it calls remove_all_from_journal_list
 **
 */
-static int _update_journal_header_block(struct super_block *p_s_sb,
+static int _update_journal_header_block(struct super_block *sb,
                                        unsigned long offset,
-                                        unsigned long trans_id)
+                                        unsigned int trans_id)
 {
        struct reiserfs_journal_header *jh;
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        if (reiserfs_is_journal_aborted(journal))
                return -EIO;
@@ -1288,8 +1289,8 @@ static int _update_journal_header_block(struct super_block *p_s_sb,
                        wait_on_buffer((journal->j_header_bh));
                        if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
 #ifdef CONFIG_REISERFS_CHECK
-                                reiserfs_warning(p_s_sb,
+                                reiserfs_warning(sb, "journal-699",
-                                                 "journal-699: buffer write failed");
+                                                 "buffer write failed");
 #endif
                                return -EIO;
                        }
@@ -1302,49 +1303,49 @@ static int _update_journal_header_block(struct super_block *p_s_sb,
                jh->j_first_unflushed_offset = cpu_to_le32(offset);
                jh->j_mount_id = cpu_to_le32(journal->j_mount_id);
-                if (reiserfs_barrier_flush(p_s_sb)) {
+                if (reiserfs_barrier_flush(sb)) {
                        int ret;
                        lock_buffer(journal->j_header_bh);
                        ret = submit_barrier_buffer(journal->j_header_bh);
                        if (ret == -EOPNOTSUPP) {
                                set_buffer_uptodate(journal->j_header_bh);
-                                disable_barrier(p_s_sb);
+                                disable_barrier(sb);
                                goto sync;
                        }
                        wait_on_buffer(journal->j_header_bh);
-                        check_barrier_completion(p_s_sb, journal->j_header_bh);
+                        check_barrier_completion(sb, journal->j_header_bh);
                } else {
                      sync:
                        set_buffer_dirty(journal->j_header_bh);
                        sync_dirty_buffer(journal->j_header_bh);
                }
                if (!buffer_uptodate(journal->j_header_bh)) {
-                        reiserfs_warning(p_s_sb,
+                        reiserfs_warning(sb, "journal-837",
-                                         "journal-837: IO error during journal replay");
+                                         "IO error during journal replay");
                        return -EIO;
                }
        }
        return 0;
 }
-static int update_journal_header_block(struct super_block *p_s_sb,
+static int update_journal_header_block(struct super_block *sb,
                                       unsigned long offset,
-                                       unsigned long trans_id)
+                                       unsigned int trans_id)
 {
-        return _update_journal_header_block(p_s_sb, offset, trans_id);
+        return _update_journal_header_block(sb, offset, trans_id);
 }
-/* 
+/*
-** flush any and all journal lists older than you are 
+** flush any and all journal lists older than you are
 ** can only be called from flush_journal_list
 */
-static int flush_older_journal_lists(struct super_block *p_s_sb,
+static int flush_older_journal_lists(struct super_block *sb,
                                     struct reiserfs_journal_list *jl)
 {
        struct list_head *entry;
        struct reiserfs_journal_list *other_jl;
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
-        unsigned long trans_id = jl->j_trans_id;
+        unsigned int trans_id = jl->j_trans_id;
        /* we know we are the only ones flushing things, no extra race
         * protection is required.
@@ -1358,7 +1359,7 @@ static int flush_older_journal_lists(struct super_block *p_s_sb,
        if (other_jl->j_trans_id < trans_id) {
                BUG_ON(other_jl->j_refcount <= 0);
                /* do not flush all */
-                flush_journal_list(p_s_sb, other_jl, 0);
+                flush_journal_list(sb, other_jl, 0);
                /* other_jl is now deleted from the list */
                goto restart;
@@ -1381,8 +1382,8 @@ static void del_from_work_list(struct super_block *s,
 ** always set flushall to 1, unless you are calling from inside
 ** flush_journal_list
 **
-** IMPORTANT.  This can only be called while there are no journal writers, 
+** IMPORTANT.  This can only be called while there are no journal writers,
-** and the journal is locked.  That means it can only be called from 
+** and the journal is locked.  That means it can only be called from
 ** do_journal_end, or by journal_release
 */
 static int flush_journal_list(struct super_block *s,
@@ -1401,8 +1402,7 @@ static int flush_journal_list(struct super_block *s,
        BUG_ON(j_len_saved <= 0);
        if (atomic_read(&journal->j_wcount) != 0) {
-                reiserfs_warning(s,
+                reiserfs_warning(s, "clm-2048", "called with wcount %d",
-                                 "clm-2048: flush_journal_list called with wcount %d",
                                 atomic_read(&journal->j_wcount));
        }
        BUG_ON(jl->j_trans_id == 0);
@@ -1416,8 +1416,7 @@ static int flush_journal_list(struct super_block *s,
        count = 0;
        if (j_len_saved > journal->j_trans_max) {
-                reiserfs_panic(s,
+                reiserfs_panic(s, "journal-715", "length is %lu, trans id %lu",
-                               "journal-715: flush_journal_list, length is %lu, trans id %lu\n",
                               j_len_saved, jl->j_trans_id);
                return 0;
        }
@@ -1430,7 +1429,7 @@ static int flush_journal_list(struct super_block *s,
                goto flush_older_and_return;
        }
-        /* start by putting the commit list on disk.  This will also flush 
+        /* start by putting the commit list on disk.  This will also flush
         ** the commit lists of any olders transactions
         */
        flush_commit_list(s, jl, 1);
@@ -1445,12 +1444,12 @@ static int flush_journal_list(struct super_block *s,
                goto flush_older_and_return;
        }
-        /* loop through each cnode, see if we need to write it, 
+        /* loop through each cnode, see if we need to write it,
-         ** or wait on a more recent transaction, or just ignore it 
+         ** or wait on a more recent transaction, or just ignore it
         */
        if (atomic_read(&(journal->j_wcount)) != 0) {
-                reiserfs_panic(s,
+                reiserfs_panic(s, "journal-844", "journal list is flushing, "
-                               "journal-844: panic journal list is flushing, wcount is not 0\n");
+                               "wcount is not 0");
        }
        cn = jl->j_realblock;
        while (cn) {
@@ -1474,8 +1473,8 @@ static int flush_journal_list(struct super_block *s,
                if (!pjl && cn->bh) {
                        saved_bh = cn->bh;
-                        /* we do this to make sure nobody releases the buffer while 
+                        /* we do this to make sure nobody releases the buffer while
-                         ** we are working with it 
+                         ** we are working with it
                         */
                        get_bh(saved_bh);
@@ -1498,8 +1497,8 @@ static int flush_journal_list(struct super_block *s,
                        goto free_cnode;
                }
-                /* bh == NULL when the block got to disk on its own, OR, 
+                /* bh == NULL when the block got to disk on its own, OR,
-                 ** the block got freed in a future transaction 
+                 ** the block got freed in a future transaction
                 */
                if (saved_bh == NULL) {
                        goto free_cnode;
@@ -1510,8 +1509,8 @@ static int flush_journal_list(struct super_block *s,
                 ** is not marked JDirty_wait
                 */
                if ((!was_jwait) && !buffer_locked(saved_bh)) {
-                        reiserfs_warning(s,
+                        reiserfs_warning(s, "journal-813",
-                                         "journal-813: BAD! buffer %llu %cdirty %cjwait, "
+                                         "BAD! buffer %llu %cdirty %cjwait, "
                                         "not in a newer tranasction",
                                         (unsigned long long)saved_bh->
                                         b_blocknr, was_dirty ? ' ' : '!',
@@ -1529,8 +1528,8 @@ static int flush_journal_list(struct super_block *s,
                                unlock_buffer(saved_bh);
                        count++;
                } else {
-                        reiserfs_warning(s,
+                        reiserfs_warning(s, "clm-2082",
-                                         "clm-2082: Unable to flush buffer %llu in %s",
+                                         "Unable to flush buffer %llu in %s",
                                         (unsigned long long)saved_bh->
                                         b_blocknr, __func__);
                }
@@ -1541,8 +1540,8 @@ static int flush_journal_list(struct super_block *s,
                        /* we incremented this to keep others from taking the buffer head away */
                        put_bh(saved_bh);
                        if (atomic_read(&(saved_bh->b_count)) < 0) {
-                                reiserfs_warning(s,
+                                reiserfs_warning(s, "journal-945",
-                                                 "journal-945: saved_bh->b_count < 0");
+                                                 "saved_bh->b_count < 0");
                        }
                }
        }
@@ -1551,18 +1550,18 @@ static int flush_journal_list(struct super_block *s,
                while (cn) {
                        if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
                                if (!cn->bh) {
-                                        reiserfs_panic(s,
+                                        reiserfs_panic(s, "journal-1011",
-                                                       "journal-1011: cn->bh is NULL\n");
+                                                       "cn->bh is NULL");
                                }
                                wait_on_buffer(cn->bh);
                                if (!cn->bh) {
-                                        reiserfs_panic(s,
+                                        reiserfs_panic(s, "journal-1012",
-                                                       "journal-1012: cn->bh is NULL\n");
+                                                       "cn->bh is NULL");
                                }
                                if (unlikely(!buffer_uptodate(cn->bh))) {
 #ifdef CONFIG_REISERFS_CHECK
-                                        reiserfs_warning(s,
+                                        reiserfs_warning(s, "journal-949",
-                                                         "journal-949: buffer write failed\n");
+                                                         "buffer write failed");
 #endif
                                        err = -EIO;
                                }
@@ -1587,7 +1586,7 @@ static int flush_journal_list(struct super_block *s,
                               __func__);
      flush_older_and_return:
-        /* before we can update the journal header block, we _must_ flush all 
+        /* before we can update the journal header block, we _must_ flush all
         ** real blocks from all older transactions to disk.  This is because
         ** once the header block is updated, this transaction will not be
         ** replayed after a crash
@@ -1597,7 +1596,7 @@ static int flush_journal_list(struct super_block *s,
        }
        err = journal->j_errno;
-        /* before we can remove everything from the hash tables for this 
+        /* before we can remove everything from the hash tables for this
         ** transaction, we must make sure it can never be replayed
         **
         ** since we are only called from do_journal_end, we know for sure there
@@ -1623,7 +1622,7 @@ static int flush_journal_list(struct super_block *s,
        if (journal->j_last_flush_id != 0 &&
            (jl->j_trans_id - journal->j_last_flush_id) != 1) {
-                reiserfs_warning(s, "clm-2201: last flush %lu, current %lu",
+                reiserfs_warning(s, "clm-2201", "last flush %lu, current %lu",
                                 journal->j_last_flush_id, jl->j_trans_id);
        }
        journal->j_last_flush_id = jl->j_trans_id;
@@ -1758,13 +1757,13 @@ static int dirty_one_transaction(struct super_block *s,
 static int kupdate_transactions(struct super_block *s,
                                struct reiserfs_journal_list *jl,
                                struct reiserfs_journal_list **next_jl,
-                                unsigned long *next_trans_id,
+                                unsigned int *next_trans_id,
                                int num_blocks, int num_trans)
 {
        int ret = 0;
        int written = 0;
        int transactions_flushed = 0;
-        unsigned long orig_trans_id = jl->j_trans_id;
+        unsigned int orig_trans_id = jl->j_trans_id;
        struct buffer_chunk chunk;
        struct list_head *entry;
        struct reiserfs_journal *journal = SB_JOURNAL(s);
@@ -1833,7 +1832,7 @@ static int flush_used_journal_lists(struct super_block *s,
        int limit = 256;
        struct reiserfs_journal_list *tjl;
        struct reiserfs_journal_list *flush_jl;
-        unsigned long trans_id;
+        unsigned int trans_id;
        struct reiserfs_journal *journal = SB_JOURNAL(s);
        flush_jl = tjl = jl;
@@ -1909,22 +1908,22 @@ void remove_journal_hash(struct super_block *sb,
        }
 }
-static void free_journal_ram(struct super_block *p_s_sb)
+static void free_journal_ram(struct super_block *sb)
 {
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        kfree(journal->j_current_jl);
        journal->j_num_lists--;
        vfree(journal->j_cnode_free_orig);
-        free_list_bitmaps(p_s_sb, journal->j_list_bitmap);
+        free_list_bitmaps(sb, journal->j_list_bitmap);
-        free_bitmap_nodes(p_s_sb);      /* must be after free_list_bitmaps */
+        free_bitmap_nodes(sb);  /* must be after free_list_bitmaps */
        if (journal->j_header_bh) {
                brelse(journal->j_header_bh);
        }
        /* j_header_bh is on the journal dev, make sure not to release the journal
         * dev until we brelse j_header_bh
         */
-        release_journal_dev(p_s_sb, journal);
+        release_journal_dev(sb, journal);
        vfree(journal);
 }
@@ -1933,27 +1932,27 @@ static void free_journal_ram(struct super_block *p_s_sb)
 ** of read_super() yet.  Any other caller must keep error at 0.
 */
 static int do_journal_release(struct reiserfs_transaction_handle *th,
-                              struct super_block *p_s_sb, int error)
+                              struct super_block *sb, int error)
 {
        struct reiserfs_transaction_handle myth;
        int flushed = 0;
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        /* we only want to flush out transactions if we were called with error == 0
         */
-        if (!error && !(p_s_sb->s_flags & MS_RDONLY)) {
+        if (!error && !(sb->s_flags & MS_RDONLY)) {
                /* end the current trans */
                BUG_ON(!th->t_trans_id);
-                do_journal_end(th, p_s_sb, 10, FLUSH_ALL);
+                do_journal_end(th, sb, 10, FLUSH_ALL);
                /* make sure something gets logged to force our way into the flush code */
-                if (!journal_join(&myth, p_s_sb, 1)) {
+                if (!journal_join(&myth, sb, 1)) {
-                        reiserfs_prepare_for_journal(p_s_sb,
+                        reiserfs_prepare_for_journal(sb,
-                                                     SB_BUFFER_WITH_SB(p_s_sb),
+                                                     SB_BUFFER_WITH_SB(sb),
                                                     1);
-                        journal_mark_dirty(&myth, p_s_sb,
+                        journal_mark_dirty(&myth, sb,
-                                           SB_BUFFER_WITH_SB(p_s_sb));
+                                           SB_BUFFER_WITH_SB(sb));
-                        do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL);
+                        do_journal_end(&myth, sb, 1, FLUSH_ALL);
                        flushed = 1;
                }
        }
@@ -1961,26 +1960,26 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
        /* this also catches errors during the do_journal_end above */
        if (!error && reiserfs_is_journal_aborted(journal)) {
                memset(&myth, 0, sizeof(myth));
-                if (!journal_join_abort(&myth, p_s_sb, 1)) {
+                if (!journal_join_abort(&myth, sb, 1)) {
-                        reiserfs_prepare_for_journal(p_s_sb,
+                        reiserfs_prepare_for_journal(sb,
-                                                     SB_BUFFER_WITH_SB(p_s_sb),
+                                                     SB_BUFFER_WITH_SB(sb),
                                                     1);
-                        journal_mark_dirty(&myth, p_s_sb,
+                        journal_mark_dirty(&myth, sb,
-                                           SB_BUFFER_WITH_SB(p_s_sb));
+                                           SB_BUFFER_WITH_SB(sb));
-                        do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL);
+                        do_journal_end(&myth, sb, 1, FLUSH_ALL);
                }
        }
        reiserfs_mounted_fs_count--;
        /* wait for all commits to finish */
-        cancel_delayed_work(&SB_JOURNAL(p_s_sb)->j_work);
+        cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
        flush_workqueue(commit_wq);
        if (!reiserfs_mounted_fs_count) {
                destroy_workqueue(commit_wq);
                commit_wq = NULL;
        }
-        free_journal_ram(p_s_sb);
+        free_journal_ram(sb);
        return 0;
 }
@@ -1989,41 +1988,41 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
 ** call on unmount.  flush all journal trans, release all alloc'd ram
 */
 int journal_release(struct reiserfs_transaction_handle *th,
-                    struct super_block *p_s_sb)
+                    struct super_block *sb)
 {
-        return do_journal_release(th, p_s_sb, 0);
+        return do_journal_release(th, sb, 0);
 }
 /*
 ** only call from an error condition inside reiserfs_read_super!
 */
 int journal_release_error(struct reiserfs_transaction_handle *th,
-                          struct super_block *p_s_sb)
+                          struct super_block *sb)
 {
-        return do_journal_release(th, p_s_sb, 1);
+        return do_journal_release(th, sb, 1);
 }
 /* compares description block with commit block.  returns 1 if they differ, 0 if they are the same */
-static int journal_compare_desc_commit(struct super_block *p_s_sb,
+static int journal_compare_desc_commit(struct super_block *sb,
                                       struct reiserfs_journal_desc *desc,
                                       struct reiserfs_journal_commit *commit)
 {
        if (get_commit_trans_id(commit) != get_desc_trans_id(desc) ||
            get_commit_trans_len(commit) != get_desc_trans_len(desc) ||
-            get_commit_trans_len(commit) > SB_JOURNAL(p_s_sb)->j_trans_max ||
+            get_commit_trans_len(commit) > SB_JOURNAL(sb)->j_trans_max ||
            get_commit_trans_len(commit) <= 0) {
                return 1;
        }
        return 0;
 }
-/* returns 0 if it did not find a description block  
+/* returns 0 if it did not find a description block
 ** returns -1 if it found a corrupt commit block
-** returns 1 if both desc and commit were valid 
+** returns 1 if both desc and commit were valid
 */
-static int journal_transaction_is_valid(struct super_block *p_s_sb,
+static int journal_transaction_is_valid(struct super_block *sb,
                                        struct buffer_head *d_bh,
-                                        unsigned long *oldest_invalid_trans_id,
+                                        unsigned int *oldest_invalid_trans_id,
                                        unsigned long *newest_mount_id)
 {
        struct reiserfs_journal_desc *desc;
@@ -2039,7 +2038,7 @@ static int journal_transaction_is_valid(struct super_block *p_s_sb,
            && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) {
                if (oldest_invalid_trans_id && *oldest_invalid_trans_id
                    && get_desc_trans_id(desc) > *oldest_invalid_trans_id) {
-                        reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
+                        reiserfs_debug(sb, REISERFS_DEBUG_CODE,
                                       "journal-986: transaction "
                                       "is valid returning because trans_id %d is greater than "
                                       "oldest_invalid %lu",
@@ -2049,7 +2048,7 @@ static int journal_transaction_is_valid(struct super_block *p_s_sb,
                }
                if (newest_mount_id
                    && *newest_mount_id > get_desc_mount_id(desc)) {
-                        reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
+                        reiserfs_debug(sb, REISERFS_DEBUG_CODE,
                                       "journal-1087: transaction "
                                       "is valid returning because mount_id %d is less than "
                                       "newest_mount_id %lu",
@@ -2057,36 +2056,37 @@ static int journal_transaction_is_valid(struct super_block *p_s_sb,
                                       *newest_mount_id);
                        return -1;
                }
-                if (get_desc_trans_len(desc) > SB_JOURNAL(p_s_sb)->j_trans_max) {
+                if (get_desc_trans_len(desc) > SB_JOURNAL(sb)->j_trans_max) {
-                        reiserfs_warning(p_s_sb,
+                        reiserfs_warning(sb, "journal-2018",
-                                         "journal-2018: Bad transaction length %d encountered, ignoring transaction",
+                                         "Bad transaction length %d "
+                                         "encountered, ignoring transaction",
                                         get_desc_trans_len(desc));
                        return -1;
                }
-                offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
+                offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
                /* ok, we have a journal description block, lets see if the transaction was valid */
                c_bh =
-                    journal_bread(p_s_sb,
+                    journal_bread(sb,
-                                  SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
+                                  SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
                                  ((offset + get_desc_trans_len(desc) +
-                                    1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
+                                    1) % SB_ONDISK_JOURNAL_SIZE(sb)));
                if (!c_bh)
                        return 0;
                commit = (struct reiserfs_journal_commit *)c_bh->b_data;
-                if (journal_compare_desc_commit(p_s_sb, desc, commit)) {
+                if (journal_compare_desc_commit(sb, desc, commit)) {
-                        reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
+                        reiserfs_debug(sb, REISERFS_DEBUG_CODE,
                                       "journal_transaction_is_valid, commit offset %ld had bad "
                                       "time %d or length %d",
                                       c_bh->b_blocknr -
-                                       SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
+                                       SB_ONDISK_JOURNAL_1st_BLOCK(sb),
                                       get_commit_trans_id(commit),
                                       get_commit_trans_len(commit));
                        brelse(c_bh);
                        if (oldest_invalid_trans_id) {
                                *oldest_invalid_trans_id =
                                    get_desc_trans_id(desc);
-                                reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
+                                reiserfs_debug(sb, REISERFS_DEBUG_CODE,
                                               "journal-1004: "
                                               "transaction_is_valid setting oldest invalid trans_id "
                                               "to %d",
@@ -2095,11 +2095,11 @@ static int journal_transaction_is_valid(struct super_block *p_s_sb,
                        return -1;
                }
                brelse(c_bh);
-                reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
+                reiserfs_debug(sb, REISERFS_DEBUG_CODE,
                               "journal-1006: found valid "
                               "transaction start offset %llu, len %d id %d",
                               d_bh->b_blocknr -
-                               SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
+                               SB_ONDISK_JOURNAL_1st_BLOCK(sb),
                               get_desc_trans_len(desc),
                               get_desc_trans_id(desc));
                return 1;
@@ -2121,63 +2121,63 @@ static void brelse_array(struct buffer_head **heads, int num)
 ** this either reads in a replays a transaction, or returns because the transaction
 ** is invalid, or too old.
 */
-static int journal_read_transaction(struct super_block *p_s_sb,
+static int journal_read_transaction(struct super_block *sb,
                                    unsigned long cur_dblock,
                                    unsigned long oldest_start,
-                                    unsigned long oldest_trans_id,
+                                    unsigned int oldest_trans_id,
                                    unsigned long newest_mount_id)
 {
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        struct reiserfs_journal_desc *desc;
        struct reiserfs_journal_commit *commit;
-        unsigned long trans_id = 0;
+        unsigned int trans_id = 0;
        struct buffer_head *c_bh;
        struct buffer_head *d_bh;
        struct buffer_head **log_blocks = NULL;
        struct buffer_head **real_blocks = NULL;
-        unsigned long trans_offset;
+        unsigned int trans_offset;
        int i;
        int trans_half;
-        d_bh = journal_bread(p_s_sb, cur_dblock);
+        d_bh = journal_bread(sb, cur_dblock);
        if (!d_bh)
                return 1;
        desc = (struct reiserfs_journal_desc *)d_bh->b_data;
-        trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
+        trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
-        reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1037: "
+        reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1037: "
                       "journal_read_transaction, offset %llu, len %d mount_id %d",
-                       d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
+                       d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb),
                       get_desc_trans_len(desc), get_desc_mount_id(desc));
        if (get_desc_trans_id(desc) < oldest_trans_id) {
-                reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1039: "
+                reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1039: "
                               "journal_read_trans skipping because %lu is too old",
                               cur_dblock -
-                               SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb));
+                               SB_ONDISK_JOURNAL_1st_BLOCK(sb));
                brelse(d_bh);
                return 1;
        }
        if (get_desc_mount_id(desc) != newest_mount_id) {
-                reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1146: "
+                reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1146: "
                               "journal_read_trans skipping because %d is != "
                               "newest_mount_id %lu", get_desc_mount_id(desc),
                               newest_mount_id);
                brelse(d_bh);
                return 1;
        }
-        c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
+        c_bh = journal_bread(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
                             ((trans_offset + get_desc_trans_len(desc) + 1) %
-                              SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
+                              SB_ONDISK_JOURNAL_SIZE(sb)));
        if (!c_bh) {
                brelse(d_bh);
                return 1;
        }
        commit = (struct reiserfs_journal_commit *)c_bh->b_data;
-        if (journal_compare_desc_commit(p_s_sb, desc, commit)) {
+        if (journal_compare_desc_commit(sb, desc, commit)) {
-                reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
+                reiserfs_debug(sb, REISERFS_DEBUG_CODE,
                               "journal_read_transaction, "
                               "commit offset %llu had bad time %d or length %d",
                               c_bh->b_blocknr -
-                               SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
+                               SB_ONDISK_JOURNAL_1st_BLOCK(sb),
                               get_commit_trans_id(commit),
                               get_commit_trans_len(commit));
                brelse(c_bh);
@@ -2195,38 +2195,41 @@ static int journal_read_transaction(struct super_block *p_s_sb,
                brelse(d_bh);
                kfree(log_blocks);
                kfree(real_blocks);
-                reiserfs_warning(p_s_sb,
+                reiserfs_warning(sb, "journal-1169",
-                                 "journal-1169: kmalloc failed, unable to mount FS");
+                                 "kmalloc failed, unable to mount FS");
                return -1;
        }
        /* get all the buffer heads */
-        trans_half = journal_trans_half(p_s_sb->s_blocksize);
+        trans_half = journal_trans_half(sb->s_blocksize);
        for (i = 0; i < get_desc_trans_len(desc); i++) {
                log_blocks[i] =
-                    journal_getblk(p_s_sb,
+                    journal_getblk(sb,
-                                   SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
+                                   SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
                                   (trans_offset + 1 +
-                                    i) % SB_ONDISK_JOURNAL_SIZE(p_s_sb));
+                                    i) % SB_ONDISK_JOURNAL_SIZE(sb));
                if (i < trans_half) {
                        real_blocks[i] =
-                            sb_getblk(p_s_sb,
+                            sb_getblk(sb,
                                      le32_to_cpu(desc->j_realblock[i]));
                } else {
                        real_blocks[i] =
-                            sb_getblk(p_s_sb,
+                            sb_getblk(sb,
                                      le32_to_cpu(commit->
                                                  j_realblock[i - trans_half]));
                }
-                if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
+                if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(sb)) {
-                        reiserfs_warning(p_s_sb,
+                        reiserfs_warning(sb, "journal-1207",
-                                         "journal-1207: REPLAY FAILURE fsck required! Block to replay is outside of filesystem");
+                                         "REPLAY FAILURE fsck required! "
+                                         "Block to replay is outside of "
+                                         "filesystem");
                        goto abort_replay;
                }
                /* make sure we don't try to replay onto log or reserved area */
                if (is_block_in_log_or_reserved_area
-                    (p_s_sb, real_blocks[i]->b_blocknr)) {
+                    (sb, real_blocks[i]->b_blocknr)) {
-                        reiserfs_warning(p_s_sb,
+                        reiserfs_warning(sb, "journal-1204",
-                                         "journal-1204: REPLAY FAILURE fsck required! Trying to replay onto a log block");
+                                         "REPLAY FAILURE fsck required! "
+                                         "Trying to replay onto a log block");
                      abort_replay:
                        brelse_array(log_blocks, i);
                        brelse_array(real_blocks, i);
@@ -2242,8 +2245,9 @@ static int journal_read_transaction(struct super_block *p_s_sb,
        for (i = 0; i < get_desc_trans_len(desc); i++) {
                wait_on_buffer(log_blocks[i]);
                if (!buffer_uptodate(log_blocks[i])) {
-                        reiserfs_warning(p_s_sb,
+                        reiserfs_warning(sb, "journal-1212",
-                                         "journal-1212: REPLAY FAILURE fsck required! buffer write failed");
+                                         "REPLAY FAILURE fsck required! "
+                                         "buffer write failed");
                        brelse_array(log_blocks + i,
                                     get_desc_trans_len(desc) - i);
                        brelse_array(real_blocks, get_desc_trans_len(desc));
@@ -2266,8 +2270,9 @@ static int journal_read_transaction(struct super_block *p_s_sb,
        for (i = 0; i < get_desc_trans_len(desc); i++) {
                wait_on_buffer(real_blocks[i]);
                if (!buffer_uptodate(real_blocks[i])) {
-                        reiserfs_warning(p_s_sb,
+                        reiserfs_warning(sb, "journal-1226",
-                                         "journal-1226: REPLAY FAILURE, fsck required! buffer write failed");
+                                         "REPLAY FAILURE, fsck required! "
+                                         "buffer write failed");
                        brelse_array(real_blocks + i,
                                     get_desc_trans_len(desc) - i);
                        brelse(c_bh);
@@ -2279,15 +2284,15 @@ static int journal_read_transaction(struct super_block *p_s_sb,
                brelse(real_blocks[i]);
        }
        cur_dblock =
-            SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
+            SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
            ((trans_offset + get_desc_trans_len(desc) +
-              2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb));
+              2) % SB_ONDISK_JOURNAL_SIZE(sb));
-        reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
+        reiserfs_debug(sb, REISERFS_DEBUG_CODE,
                       "journal-1095: setting journal " "start to offset %ld",
-                       cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb));
+                       cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb));
        /* init starting values for the first transaction, in case this is the last transaction to be replayed. */
-        journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
+        journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
        journal->j_last_flush_trans_id = trans_id;
        journal->j_trans_id = trans_id + 1;
        /* check for trans_id overflow */
@@ -2352,12 +2357,12 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
 **
 ** On exit, it sets things up so the first transaction will work correctly.
 */
-static int journal_read(struct super_block *p_s_sb)
+static int journal_read(struct super_block *sb)
 {
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        struct reiserfs_journal_desc *desc;
-        unsigned long oldest_trans_id = 0;
+        unsigned int oldest_trans_id = 0;
-        unsigned long oldest_invalid_trans_id = 0;
+        unsigned int oldest_invalid_trans_id = 0;
        time_t start;
        unsigned long oldest_start = 0;
        unsigned long cur_dblock = 0;
@@ -2370,46 +2375,46 @@ static int journal_read(struct super_block *p_s_sb)
        int ret;
        char b[BDEVNAME_SIZE];
-        cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
+        cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(sb);
-        reiserfs_info(p_s_sb, "checking transaction log (%s)\n",
+        reiserfs_info(sb, "checking transaction log (%s)\n",
                      bdevname(journal->j_dev_bd, b));
        start = get_seconds();
-        /* step 1, read in the journal header block.  Check the transaction it says 
+        /* step 1, read in the journal header block.  Check the transaction it says
-         ** is the first unflushed, and if that transaction is not valid, 
+         ** is the first unflushed, and if that transaction is not valid,
         ** replay is done
         */
-        journal->j_header_bh = journal_bread(p_s_sb,
+        journal->j_header_bh = journal_bread(sb,
-                                             SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)
+                                             SB_ONDISK_JOURNAL_1st_BLOCK(sb)
-                                             + SB_ONDISK_JOURNAL_SIZE(p_s_sb));
+                                             + SB_ONDISK_JOURNAL_SIZE(sb));
        if (!journal->j_header_bh) {
                return 1;
        }
        jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data);
        if (le32_to_cpu(jh->j_first_unflushed_offset) <
-            SB_ONDISK_JOURNAL_SIZE(p_s_sb)
+            SB_ONDISK_JOURNAL_SIZE(sb)
            && le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
                oldest_start =
-                    SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
+                    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
                    le32_to_cpu(jh->j_first_unflushed_offset);
                oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1;
                newest_mount_id = le32_to_cpu(jh->j_mount_id);
-                reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
+                reiserfs_debug(sb, REISERFS_DEBUG_CODE,
                               "journal-1153: found in "
                               "header: first_unflushed_offset %d, last_flushed_trans_id "
                               "%lu", le32_to_cpu(jh->j_first_unflushed_offset),
                               le32_to_cpu(jh->j_last_flush_trans_id));
                valid_journal_header = 1;
-                /* now, we try to read the first unflushed offset.  If it is not valid, 
+                /* now, we try to read the first unflushed offset.  If it is not valid,
-                 ** there is nothing more we can do, and it makes no sense to read 
+                 ** there is nothing more we can do, and it makes no sense to read
                 ** through the whole log.
                 */
                d_bh =
-                    journal_bread(p_s_sb,
+                    journal_bread(sb,
-                                  SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
+                                  SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
                                  le32_to_cpu(jh->j_first_unflushed_offset));
-                ret = journal_transaction_is_valid(p_s_sb, d_bh, NULL, NULL);
+                ret = journal_transaction_is_valid(sb, d_bh, NULL, NULL);
                if (!ret) {
                        continue_replay = 0;
                }
@@ -2417,9 +2422,9 @@ static int journal_read(struct super_block *p_s_sb)
                goto start_log_replay;
        }
-        if (continue_replay && bdev_read_only(p_s_sb->s_bdev)) {
+        if (continue_replay && bdev_read_only(sb->s_bdev)) {
-                reiserfs_warning(p_s_sb,
+                reiserfs_warning(sb, "clm-2076",
-                                 "clm-2076: device is readonly, unable to replay log");
+                                 "device is readonly, unable to replay log");
                return -1;
        }
@@ -2428,17 +2433,17 @@ static int journal_read(struct super_block *p_s_sb)
         */
        while (continue_replay
               && cur_dblock <
-               (SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
+               (SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
-                SB_ONDISK_JOURNAL_SIZE(p_s_sb))) {
+                SB_ONDISK_JOURNAL_SIZE(sb))) {
                /* Note that it is required for blocksize of primary fs device and journal
                   device to be the same */
                d_bh =
                    reiserfs_breada(journal->j_dev_bd, cur_dblock,
-                                    p_s_sb->s_blocksize,
+                                    sb->s_blocksize,
-                                    SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
+                                    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
-                                    SB_ONDISK_JOURNAL_SIZE(p_s_sb));
+                                    SB_ONDISK_JOURNAL_SIZE(sb));
                ret =
-                    journal_transaction_is_valid(p_s_sb, d_bh,
+                    journal_transaction_is_valid(sb, d_bh,
                                                 &oldest_invalid_trans_id,
                                                 &newest_mount_id);
                if (ret == 1) {
@@ -2447,26 +2452,26 @@ static int journal_read(struct super_block *p_s_sb)
                                oldest_trans_id = get_desc_trans_id(desc);
                                oldest_start = d_bh->b_blocknr;
                                newest_mount_id = get_desc_mount_id(desc);
-                                reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
+                                reiserfs_debug(sb, REISERFS_DEBUG_CODE,
                                               "journal-1179: Setting "
                                               "oldest_start to offset %llu, trans_id %lu",
                                               oldest_start -
                                               SB_ONDISK_JOURNAL_1st_BLOCK
-                                               (p_s_sb), oldest_trans_id);
+                                               (sb), oldest_trans_id);
                        } else if (oldest_trans_id > get_desc_trans_id(desc)) {
                                /* one we just read was older */
                                oldest_trans_id = get_desc_trans_id(desc);
                                oldest_start = d_bh->b_blocknr;
-                                reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
+                                reiserfs_debug(sb, REISERFS_DEBUG_CODE,
                                               "journal-1180: Resetting "
                                               "oldest_start to offset %lu, trans_id %lu",
                                               oldest_start -
                                               SB_ONDISK_JOURNAL_1st_BLOCK
-                                               (p_s_sb), oldest_trans_id);
+                                               (sb), oldest_trans_id);
                        }
                        if (newest_mount_id < get_desc_mount_id(desc)) {
                                newest_mount_id = get_desc_mount_id(desc);
-                                reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
+                                reiserfs_debug(sb, REISERFS_DEBUG_CODE,
                                               "journal-1299: Setting "
                                               "newest_mount_id to %d",
                                               get_desc_mount_id(desc));
@@ -2481,17 +2486,17 @@ static int journal_read(struct super_block *p_s_sb)
      start_log_replay:
        cur_dblock = oldest_start;
        if (oldest_trans_id) {
-                reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
+                reiserfs_debug(sb, REISERFS_DEBUG_CODE,
                               "journal-1206: Starting replay "
                               "from offset %llu, trans_id %lu",
-                               cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
+                               cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb),
                               oldest_trans_id);
        }
        replay_count = 0;
        while (continue_replay && oldest_trans_id > 0) {
                ret =
-                    journal_read_transaction(p_s_sb, cur_dblock, oldest_start,
+                    journal_read_transaction(sb, cur_dblock, oldest_start,
                                             oldest_trans_id, newest_mount_id);
                if (ret < 0) {
                        return ret;
@@ -2499,14 +2504,14 @@ static int journal_read(struct super_block *p_s_sb)
                        break;
                }
                cur_dblock =
-                    SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start;
+                    SB_ONDISK_JOURNAL_1st_BLOCK(sb) + journal->j_start;
                replay_count++;
                if (cur_dblock == oldest_start)
                        break;
        }
        if (oldest_trans_id == 0) {
-                reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
+                reiserfs_debug(sb, REISERFS_DEBUG_CODE,
                               "journal-1225: No valid " "transactions found");
        }
        /* j_start does not get set correctly if we don't replay any transactions.
@@ -2526,16 +2531,16 @@ static int journal_read(struct super_block *p_s_sb)
        } else {
                journal->j_mount_id = newest_mount_id + 1;
        }
-        reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
+        reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
                       "newest_mount_id to %lu", journal->j_mount_id);
        journal->j_first_unflushed_offset = journal->j_start;
        if (replay_count > 0) {
-                reiserfs_info(p_s_sb,
+                reiserfs_info(sb,
                              "replayed %d transactions in %lu seconds\n",
                              replay_count, get_seconds() - start);
        }
-        if (!bdev_read_only(p_s_sb->s_bdev) &&
+        if (!bdev_read_only(sb->s_bdev) &&
-            _update_journal_header_block(p_s_sb, journal->j_start,
+            _update_journal_header_block(sb, journal->j_start,
                                         journal->j_last_flush_trans_id)) {
                /* replay failed, caller must call free_journal_ram and abort
                 ** the mount
@@ -2560,9 +2565,9 @@ static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
        return jl;
 }
-static void journal_list_init(struct super_block *p_s_sb)
+static void journal_list_init(struct super_block *sb)
 {
-        SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb);
+        SB_JOURNAL(sb)->j_current_jl = alloc_journal_list(sb);
 }
 static int release_journal_dev(struct super_block *super,
@@ -2580,9 +2585,8 @@ static int release_journal_dev(struct super_block *super,
        }
        if (result != 0) {
-                reiserfs_warning(super,
+                reiserfs_warning(super, "sh-457",
-                                 "sh-457: release_journal_dev: Cannot release journal device: %i",
+                                 "Cannot release journal device: %i", result);
-                                 result);
        }
        return result;
 }
@@ -2612,7 +2616,7 @@ static int journal_init_dev(struct super_block *super,
                if (IS_ERR(journal->j_dev_bd)) {
                        result = PTR_ERR(journal->j_dev_bd);
                        journal->j_dev_bd = NULL;
-                        reiserfs_warning(super, "sh-458: journal_init_dev: "
+                        reiserfs_warning(super, "sh-458",
                                         "cannot init journal device '%s': %i",
                                         __bdevname(jdev, b), result);
                        return result;
@@ -2662,30 +2666,30 @@ static int journal_init_dev(struct super_block *super,
 */
 #define REISERFS_STANDARD_BLKSIZE (4096)
-static int check_advise_trans_params(struct super_block *p_s_sb,
+static int check_advise_trans_params(struct super_block *sb,
                                     struct reiserfs_journal *journal)
 {
        if (journal->j_trans_max) {
                /* Non-default journal params.
                   Do sanity check for them. */
                int ratio = 1;
-                if (p_s_sb->s_blocksize < REISERFS_STANDARD_BLKSIZE)
+                if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE)
-                        ratio = REISERFS_STANDARD_BLKSIZE / p_s_sb->s_blocksize;
+                        ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize;
                if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio ||
                    journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio ||
-                    SB_ONDISK_JOURNAL_SIZE(p_s_sb) / journal->j_trans_max <
+                    SB_ONDISK_JOURNAL_SIZE(sb) / journal->j_trans_max <
                    JOURNAL_MIN_RATIO) {
-                        reiserfs_warning(p_s_sb,
+                        reiserfs_warning(sb, "sh-462",
-                                 "sh-462: bad transaction max size (%u). FSCK?",
+                                         "bad transaction max size (%u). "
-                                 journal->j_trans_max);
+                                         "FSCK?", journal->j_trans_max);
                        return 1;
                }
                if (journal->j_max_batch != (journal->j_trans_max) *
                        JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT) {
-                        reiserfs_warning(p_s_sb,
+                        reiserfs_warning(sb, "sh-463",
-                                "sh-463: bad transaction max batch (%u). FSCK?",
+                                         "bad transaction max batch (%u). "
-                                journal->j_max_batch);
+                                         "FSCK?", journal->j_max_batch);
                        return 1;
                }
        } else {
@@ -2693,9 +2697,11 @@ static int check_advise_trans_params(struct super_block *p_s_sb,
                   The file system was created by old version
                   of mkreiserfs, so some fields contain zeros,
                   and we need to advise proper values for them */
-                if (p_s_sb->s_blocksize != REISERFS_STANDARD_BLKSIZE)
+                if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) {
-                        reiserfs_panic(p_s_sb, "sh-464: bad blocksize (%u)",
+                        reiserfs_warning(sb, "sh-464", "bad blocksize (%u)",
-                                       p_s_sb->s_blocksize);
+                                         sb->s_blocksize);
+                        return 1;
+                }
                journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT;
                journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT;
                journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE;
@@ -2706,10 +2712,10 @@ static int check_advise_trans_params(struct super_block *p_s_sb,
 /*
 ** must be called once on fs mount.  calls journal_read for you
 */
-int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
+int journal_init(struct super_block *sb, const char *j_dev_name,
                 int old_format, unsigned int commit_max_age)
 {
-        int num_cnodes = SB_ONDISK_JOURNAL_SIZE(p_s_sb) * 2;
+        int num_cnodes = SB_ONDISK_JOURNAL_SIZE(sb) * 2;
        struct buffer_head *bhjh;
        struct reiserfs_super_block *rs;
        struct reiserfs_journal_header *jh;
@@ -2717,10 +2723,10 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
        struct reiserfs_journal_list *jl;
        char b[BDEVNAME_SIZE];
-        journal = SB_JOURNAL(p_s_sb) = vmalloc(sizeof(struct reiserfs_journal));
+        journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal));
        if (!journal) {
-                reiserfs_warning(p_s_sb,
+                reiserfs_warning(sb, "journal-1256",
-                                 "journal-1256: unable to get memory for journal structure");
+                                 "unable to get memory for journal structure");
                return 1;
        }
        memset(journal, 0, sizeof(struct reiserfs_journal));
@@ -2729,51 +2735,51 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
        INIT_LIST_HEAD(&journal->j_working_list);
        INIT_LIST_HEAD(&journal->j_journal_list);
        journal->j_persistent_trans = 0;
-        if (reiserfs_allocate_list_bitmaps(p_s_sb,
+        if (reiserfs_allocate_list_bitmaps(sb,
                                           journal->j_list_bitmap,
-                                           reiserfs_bmap_count(p_s_sb)))
+                                           reiserfs_bmap_count(sb)))
                goto free_and_return;
-        allocate_bitmap_nodes(p_s_sb);
+        allocate_bitmap_nodes(sb);
        /* reserved for journal area support */
-        SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ?
+        SB_JOURNAL_1st_RESERVED_BLOCK(sb) = (old_format ?
                                                 REISERFS_OLD_DISK_OFFSET_IN_BYTES
-                                                 / p_s_sb->s_blocksize +
+                                                 / sb->s_blocksize +
-                                                 reiserfs_bmap_count(p_s_sb) +
+                                                 reiserfs_bmap_count(sb) +
                                                 1 :
                                                 REISERFS_DISK_OFFSET_IN_BYTES /
-                                                 p_s_sb->s_blocksize + 2);
+                                                 sb->s_blocksize + 2);
        /* Sanity check to see is the standard journal fitting withing first bitmap
           (actual for small blocksizes) */
-        if (!SB_ONDISK_JOURNAL_DEVICE(p_s_sb) &&
+        if (!SB_ONDISK_JOURNAL_DEVICE(sb) &&
-            (SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) +
+            (SB_JOURNAL_1st_RESERVED_BLOCK(sb) +
-             SB_ONDISK_JOURNAL_SIZE(p_s_sb) > p_s_sb->s_blocksize * 8)) {
+             SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) {
-                reiserfs_warning(p_s_sb,
+                reiserfs_warning(sb, "journal-1393",
-                                 "journal-1393: journal does not fit for area "
+                                 "journal does not fit for area addressed "
-                                 "addressed by first of bitmap blocks. It starts at "
+                                 "by first of bitmap blocks. It starts at "
                                 "%u and its size is %u. Block size %ld",
-                                 SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb),
+                                 SB_JOURNAL_1st_RESERVED_BLOCK(sb),
-                                 SB_ONDISK_JOURNAL_SIZE(p_s_sb),
+                                 SB_ONDISK_JOURNAL_SIZE(sb),
-                                 p_s_sb->s_blocksize);
+                                 sb->s_blocksize);
                goto free_and_return;
        }
-        if (journal_init_dev(p_s_sb, journal, j_dev_name) != 0) {
+        if (journal_init_dev(sb, journal, j_dev_name) != 0) {
-                reiserfs_warning(p_s_sb,
+                reiserfs_warning(sb, "sh-462",
-                                 "sh-462: unable to initialize jornal device");
+                                 "unable to initialize jornal device");
                goto free_and_return;
        }
-        rs = SB_DISK_SUPER_BLOCK(p_s_sb);
+        rs = SB_DISK_SUPER_BLOCK(sb);
        /* read journal header */
-        bhjh = journal_bread(p_s_sb,
+        bhjh = journal_bread(sb,
-                             SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
+                             SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
-                             SB_ONDISK_JOURNAL_SIZE(p_s_sb));
+                             SB_ONDISK_JOURNAL_SIZE(sb));
        if (!bhjh) {
-                reiserfs_warning(p_s_sb,
+                reiserfs_warning(sb, "sh-459",
-                                 "sh-459: unable to read journal header");
+                                 "unable to read journal header");
                goto free_and_return;
        }
        jh = (struct reiserfs_journal_header *)(bhjh->b_data);
@@ -2782,10 +2788,10 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
        if (is_reiserfs_jr(rs)
            && (le32_to_cpu(jh->jh_journal.jp_journal_magic) !=
                sb_jp_journal_magic(rs))) {
-                reiserfs_warning(p_s_sb,
+                reiserfs_warning(sb, "sh-460",
-                                 "sh-460: journal header magic %x "
+                                 "journal header magic %x (device %s) does "
-                                 "(device %s) does not match to magic found in super "
+                                 "not match to magic found in super block %x",
-                                 "block %x", jh->jh_journal.jp_journal_magic,
+                                 jh->jh_journal.jp_journal_magic,
                                 bdevname(journal->j_dev_bd, b),
                                 sb_jp_journal_magic(rs));
                brelse(bhjh);
@@ -2798,7 +2804,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
            le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age);
        journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE;
-        if (check_advise_trans_params(p_s_sb, journal) != 0)
+        if (check_advise_trans_params(sb, journal) != 0)
                goto free_and_return;
        journal->j_default_max_commit_age = journal->j_max_commit_age;
@@ -2807,12 +2813,12 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
                journal->j_max_trans_age = commit_max_age;
        }
-        reiserfs_info(p_s_sb, "journal params: device %s, size %u, "
+        reiserfs_info(sb, "journal params: device %s, size %u, "
                      "journal first block %u, max trans len %u, max batch %u, "
                      "max commit age %u, max trans age %u\n",
                      bdevname(journal->j_dev_bd, b),
-                      SB_ONDISK_JOURNAL_SIZE(p_s_sb),
+                      SB_ONDISK_JOURNAL_SIZE(sb),
-                      SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
+                      SB_ONDISK_JOURNAL_1st_BLOCK(sb),
                      journal->j_trans_max,
                      journal->j_max_batch,
                      journal->j_max_commit_age, journal->j_max_trans_age);
@@ -2820,7 +2826,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
        brelse(bhjh);
        journal->j_list_bitmap_index = 0;
-        journal_list_init(p_s_sb);
+        journal_list_init(sb);
        memset(journal->j_list_hash_table, 0,
               JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
@@ -2852,7 +2858,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
        journal->j_must_wait = 0;
        if (journal->j_cnode_free == 0) {
-                reiserfs_warning(p_s_sb, "journal-2004: Journal cnode memory "
+                reiserfs_warning(sb, "journal-2004", "Journal cnode memory "
                                 "allocation failed (%ld bytes). Journal is "
                                 "too large for available memory. Usually "
                                 "this is due to a journal that is too large.",
@@ -2860,16 +2866,17 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
                goto free_and_return;
        }
-        init_journal_hash(p_s_sb);
+        init_journal_hash(sb);
        jl = journal->j_current_jl;
-        jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl);
+        jl->j_list_bitmap = get_list_bitmap(sb, jl);
        if (!jl->j_list_bitmap) {
-                reiserfs_warning(p_s_sb,
+                reiserfs_warning(sb, "journal-2005",
-                                 "journal-2005, get_list_bitmap failed for journal list 0");
+                                 "get_list_bitmap failed for journal list 0");
                goto free_and_return;
        }
-        if (journal_read(p_s_sb) < 0) {
+        if (journal_read(sb) < 0) {
-                reiserfs_warning(p_s_sb, "Replay Failure, unable to mount");
+                reiserfs_warning(sb, "reiserfs-2006",
+                                 "Replay Failure, unable to mount");
                goto free_and_return;
        }
@@ -2878,10 +2885,10 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
                commit_wq = create_workqueue("reiserfs");
        INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
-        journal->j_work_sb = p_s_sb;
+        journal->j_work_sb = sb;
        return 0;
      free_and_return:
-        free_journal_ram(p_s_sb);
+        free_journal_ram(sb);
        return 1;
 }
@@ -2912,7 +2919,7 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
        return 0;
 }
-/* this must be called inside a transaction, and requires the 
+/* this must be called inside a transaction, and requires the
 ** kernel_lock to be held
 */
 void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
@@ -2970,7 +2977,7 @@ static void wake_queued_writers(struct super_block *s)
                wake_up(&journal->j_join_wait);
 }
-static void let_transaction_grow(struct super_block *sb, unsigned long trans_id)
+static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
 {
        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        unsigned long bcount = journal->j_bcount;
@@ -2997,43 +3004,43 @@ static void let_transaction_grow(struct super_block *sb, unsigned long trans_id)
 ** expect to use in nblocks.
 */
 static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
-                              struct super_block *p_s_sb, unsigned long nblocks,
+                              struct super_block *sb, unsigned long nblocks,
                              int join)
 {
        time_t now = get_seconds();
-        int old_trans_id;
+        unsigned int old_trans_id;
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        struct reiserfs_transaction_handle myth;
        int sched_count = 0;
        int retval;
-        reiserfs_check_lock_depth(p_s_sb, "journal_begin");
+        reiserfs_check_lock_depth(sb, "journal_begin");
        BUG_ON(nblocks > journal->j_trans_max);
-        PROC_INFO_INC(p_s_sb, journal.journal_being);
+        PROC_INFO_INC(sb, journal.journal_being);
        /* set here for journal_join */
        th->t_refcount = 1;
-        th->t_super = p_s_sb;
+        th->t_super = sb;
      relock:
-        lock_journal(p_s_sb);
+        lock_journal(sb);
        if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) {
-                unlock_journal(p_s_sb);
+                unlock_journal(sb);
                retval = journal->j_errno;
                goto out_fail;
        }
        journal->j_bcount++;
        if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
-                unlock_journal(p_s_sb);
+                unlock_journal(sb);
-                reiserfs_wait_on_write_block(p_s_sb);
+                reiserfs_wait_on_write_block(sb);
-                PROC_INFO_INC(p_s_sb, journal.journal_relock_writers);
+                PROC_INFO_INC(sb, journal.journal_relock_writers);
                goto relock;
        }
        now = get_seconds();
        /* if there is no room in the journal OR
-         ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning 
+         ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning
         ** we don't sleep if there aren't other writers
         */
@@ -3048,7 +3055,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
            || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) {
                old_trans_id = journal->j_trans_id;
-                unlock_journal(p_s_sb); /* allow others to finish this transaction */
+                unlock_journal(sb);     /* allow others to finish this transaction */
                if (!join && (journal->j_len_alloc + nblocks + 2) >=
                    journal->j_max_batch &&
@@ -3056,7 +3063,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
                    (journal->j_len_alloc * 75)) {
                        if (atomic_read(&journal->j_wcount) > 10) {
                                sched_count++;
-                                queue_log_writer(p_s_sb);
+                                queue_log_writer(sb);
                                goto relock;
                        }
                }
@@ -3066,25 +3073,25 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
                if (atomic_read(&journal->j_jlock)) {
                        while (journal->j_trans_id == old_trans_id &&
                               atomic_read(&journal->j_jlock)) {
-                                queue_log_writer(p_s_sb);
+                                queue_log_writer(sb);
                        }
                        goto relock;
                }
-                retval = journal_join(&myth, p_s_sb, 1);
+                retval = journal_join(&myth, sb, 1);
                if (retval)
                        goto out_fail;
                /* someone might have ended the transaction while we joined */
                if (old_trans_id != journal->j_trans_id) {
-                        retval = do_journal_end(&myth, p_s_sb, 1, 0);
+                        retval = do_journal_end(&myth, sb, 1, 0);
                } else {
-                        retval = do_journal_end(&myth, p_s_sb, 1, COMMIT_NOW);
+                        retval = do_journal_end(&myth, sb, 1, COMMIT_NOW);
                }
                if (retval)
                        goto out_fail;
-                PROC_INFO_INC(p_s_sb, journal.journal_relock_wcount);
+                PROC_INFO_INC(sb, journal.journal_relock_wcount);
                goto relock;
        }
        /* we are the first writer, set trans_id */
@@ -3096,7 +3103,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
        th->t_blocks_logged = 0;
        th->t_blocks_allocated = nblocks;
        th->t_trans_id = journal->j_trans_id;
-        unlock_journal(p_s_sb);
+        unlock_journal(sb);
        INIT_LIST_HEAD(&th->t_list);
        get_fs_excl();
        return 0;
@@ -3106,7 +3113,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
        /* Re-set th->t_super, so we can properly keep track of how many
         * persistent transactions there are. We need to do this so if this
         * call is part of a failed restart_transaction, we can free it later */
-        th->t_super = p_s_sb;
+        th->t_super = sb;
        return retval;
 }
@@ -3157,7 +3164,7 @@ int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th)
 }
 static int journal_join(struct reiserfs_transaction_handle *th,
-                        struct super_block *p_s_sb, unsigned long nblocks)
+                        struct super_block *sb, unsigned long nblocks)
 {
        struct reiserfs_transaction_handle *cur_th = current->journal_info;
@@ -3166,11 +3173,11 @@ static int journal_join(struct reiserfs_transaction_handle *th,
         */
        th->t_handle_save = cur_th;
        BUG_ON(cur_th && cur_th->t_refcount > 1);
-        return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN);
+        return do_journal_begin_r(th, sb, nblocks, JBEGIN_JOIN);
 }
 int journal_join_abort(struct reiserfs_transaction_handle *th,
-                       struct super_block *p_s_sb, unsigned long nblocks)
+                       struct super_block *sb, unsigned long nblocks)
 {
        struct reiserfs_transaction_handle *cur_th = current->journal_info;
@@ -3179,11 +3186,11 @@ int journal_join_abort(struct reiserfs_transaction_handle *th,
         */
        th->t_handle_save = cur_th;
        BUG_ON(cur_th && cur_th->t_refcount > 1);
-        return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT);
+        return do_journal_begin_r(th, sb, nblocks, JBEGIN_ABORT);
 }
 int journal_begin(struct reiserfs_transaction_handle *th,
-                  struct super_block *p_s_sb, unsigned long nblocks)
+                  struct super_block *sb, unsigned long nblocks)
 {
        struct reiserfs_transaction_handle *cur_th = current->journal_info;
        int ret;
@@ -3191,28 +3198,29 @@ int journal_begin(struct reiserfs_transaction_handle *th,
        th->t_handle_save = NULL;
        if (cur_th) {
                /* we are nesting into the current transaction */
-                if (cur_th->t_super == p_s_sb) {
+                if (cur_th->t_super == sb) {
                        BUG_ON(!cur_th->t_refcount);
                        cur_th->t_refcount++;
                        memcpy(th, cur_th, sizeof(*th));
                        if (th->t_refcount <= 1)
-                                reiserfs_warning(p_s_sb,
+                                reiserfs_warning(sb, "reiserfs-2005",
-                                                 "BAD: refcount <= 1, but journal_info != 0");
+                                                 "BAD: refcount <= 1, but "
+                                                 "journal_info != 0");
                        return 0;
                } else {
                        /* we've ended up with a handle from a different filesystem.
                         ** save it and restore on journal_end.  This should never
                         ** really happen...
                         */
-                        reiserfs_warning(p_s_sb,
+                        reiserfs_warning(sb, "clm-2100",
-                                         "clm-2100: nesting info a different FS");
+                                         "nesting info a different FS");
                        th->t_handle_save = current->journal_info;
                        current->journal_info = th;
                }
        } else {
                current->journal_info = th;
        }
-        ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG);
+        ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG);
        BUG_ON(current->journal_info != th);
        /* I guess this boils down to being the reciprocal of clm-2100 above.
@@ -3232,32 +3240,32 @@ int journal_begin(struct reiserfs_transaction_handle *th,
 **
 ** if it was dirty, cleans and files onto the clean list.  I can't let it be dirty again until the
 ** transaction is committed.
-** 
+**
 ** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len.
 */
 int journal_mark_dirty(struct reiserfs_transaction_handle *th,
-                       struct super_block *p_s_sb, struct buffer_head *bh)
+                       struct super_block *sb, struct buffer_head *bh)
 {
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        struct reiserfs_journal_cnode *cn = NULL;
        int count_already_incd = 0;
        int prepared = 0;
        BUG_ON(!th->t_trans_id);
-        PROC_INFO_INC(p_s_sb, journal.mark_dirty);
+        PROC_INFO_INC(sb, journal.mark_dirty);
        if (th->t_trans_id != journal->j_trans_id) {
-                reiserfs_panic(th->t_super,
+                reiserfs_panic(th->t_super, "journal-1577",
-                               "journal-1577: handle trans id %ld != current trans id %ld\n",
+                               "handle trans id %ld != current trans id %ld",
                               th->t_trans_id, journal->j_trans_id);
        }
-        p_s_sb->s_dirt = 1;
+        sb->s_dirt = 1;
        prepared = test_clear_buffer_journal_prepared(bh);
        clear_buffer_journal_restore_dirty(bh);
        /* already in this transaction, we are done */
        if (buffer_journaled(bh)) {
-                PROC_INFO_INC(p_s_sb, journal.mark_dirty_already);
+                PROC_INFO_INC(sb, journal.mark_dirty_already);
                return 0;
        }
@@ -3266,7 +3274,8 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th,
         ** could get to disk too early.  NOT GOOD.
         */
        if (!prepared || buffer_dirty(bh)) {
-                reiserfs_warning(p_s_sb, "journal-1777: buffer %llu bad state "
+                reiserfs_warning(sb, "journal-1777",
+                                 "buffer %llu bad state "
                                 "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT",
                                 (unsigned long long)bh->b_blocknr,
                                 prepared ? ' ' : '!',
@@ -3276,23 +3285,23 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th,
        }
        if (atomic_read(&(journal->j_wcount)) <= 0) {
-                reiserfs_warning(p_s_sb,
+                reiserfs_warning(sb, "journal-1409",
-                                 "journal-1409: journal_mark_dirty returning because j_wcount was %d",
+                                 "returning because j_wcount was %d",
                                 atomic_read(&(journal->j_wcount)));
                return 1;
        }
-        /* this error means I've screwed up, and we've overflowed the transaction.  
+        /* this error means I've screwed up, and we've overflowed the transaction.
         ** Nothing can be done here, except make the FS readonly or panic.
         */
        if (journal->j_len >= journal->j_trans_max) {
-                reiserfs_panic(th->t_super,
+                reiserfs_panic(th->t_super, "journal-1413",
-                               "journal-1413: journal_mark_dirty: j_len (%lu) is too big\n",
+                               "j_len (%lu) is too big",
                               journal->j_len);
        }
        if (buffer_journal_dirty(bh)) {
                count_already_incd = 1;
-                PROC_INFO_INC(p_s_sb, journal.mark_dirty_notjournal);
+                PROC_INFO_INC(sb, journal.mark_dirty_notjournal);
                clear_buffer_journal_dirty(bh);
        }
@@ -3304,9 +3313,9 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th,
        /* now put this guy on the end */
        if (!cn) {
-                cn = get_cnode(p_s_sb);
+                cn = get_cnode(sb);
                if (!cn) {
-                        reiserfs_panic(p_s_sb, "get_cnode failed!\n");
+                        reiserfs_panic(sb, "journal-4", "get_cnode failed!");
                }
                if (th->t_blocks_logged == th->t_blocks_allocated) {
@@ -3318,7 +3327,7 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th,
                cn->bh = bh;
                cn->blocknr = bh->b_blocknr;
-                cn->sb = p_s_sb;
+                cn->sb = sb;
                cn->jlist = NULL;
                insert_journal_hash(journal->j_hash_table, cn);
                if (!count_already_incd) {
@@ -3339,11 +3348,11 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th,
 }
 int journal_end(struct reiserfs_transaction_handle *th,
-                struct super_block *p_s_sb, unsigned long nblocks)
+                struct super_block *sb, unsigned long nblocks)
 {
        if (!current->journal_info && th->t_refcount > 1)
-                reiserfs_warning(p_s_sb, "REISER-NESTING: th NULL, refcount %d",
+                reiserfs_warning(sb, "REISER-NESTING",
-                                 th->t_refcount);
+                                 "th NULL, refcount %d", th->t_refcount);
        if (!th->t_trans_id) {
                WARN_ON(1);
@@ -3366,26 +3375,26 @@ int journal_end(struct reiserfs_transaction_handle *th,
                }
                return 0;
        } else {
-                return do_journal_end(th, p_s_sb, nblocks, 0);
+                return do_journal_end(th, sb, nblocks, 0);
        }
 }
-/* removes from the current transaction, relsing and descrementing any counters.  
+/* removes from the current transaction, relsing and descrementing any counters.
 ** also files the removed buffer directly onto the clean list
 **
 ** called by journal_mark_freed when a block has been deleted
 **
 ** returns 1 if it cleaned and relsed the buffer. 0 otherwise
 */
-static int remove_from_transaction(struct super_block *p_s_sb,
+static int remove_from_transaction(struct super_block *sb,
                                   b_blocknr_t blocknr, int already_cleaned)
 {
        struct buffer_head *bh;
        struct reiserfs_journal_cnode *cn;
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        int ret = 0;
-        cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr);
+        cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr);
        if (!cn || !cn->bh) {
                return ret;
        }
@@ -3403,7 +3412,7 @@ static int remove_from_transaction(struct super_block *p_s_sb,
                journal->j_last = cn->prev;
        }
        if (bh)
-                remove_journal_hash(p_s_sb, journal->j_hash_table, NULL,
+                remove_journal_hash(sb, journal->j_hash_table, NULL,
                                    bh->b_blocknr, 0);
        clear_buffer_journaled(bh);     /* don't log this one */
@@ -3413,14 +3422,14 @@ static int remove_from_transaction(struct super_block *p_s_sb,
                clear_buffer_journal_test(bh);
                put_bh(bh);
                if (atomic_read(&(bh->b_count)) < 0) {
-                        reiserfs_warning(p_s_sb,
+                        reiserfs_warning(sb, "journal-1752",
-                                         "journal-1752: remove from trans, b_count < 0");
+                                         "b_count < 0");
                }
                ret = 1;
        }
        journal->j_len--;
        journal->j_len_alloc--;
-        free_cnode(p_s_sb, cn);
+        free_cnode(sb, cn);
        return ret;
 }
@@ -3468,22 +3477,22 @@ static int can_dirty(struct reiserfs_journal_cnode *cn)
 }
 /* syncs the commit blocks, but does not force the real buffers to disk
-** will wait until the current transaction is done/committed before returning 
+** will wait until the current transaction is done/committed before returning
 */
 int journal_end_sync(struct reiserfs_transaction_handle *th,
-                     struct super_block *p_s_sb, unsigned long nblocks)
+                     struct super_block *sb, unsigned long nblocks)
 {
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        BUG_ON(!th->t_trans_id);
        /* you can sync while nested, very, very bad */
        BUG_ON(th->t_refcount > 1);
        if (journal->j_len == 0) {
-                reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb),
+                reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
                                             1);
-                journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb));
+                journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb));
        }
-        return do_journal_end(th, p_s_sb, nblocks, COMMIT_NOW | WAIT);
+        return do_journal_end(th, sb, nblocks, COMMIT_NOW | WAIT);
 }
 /*
@@ -3493,7 +3502,7 @@ static void flush_async_commits(struct work_struct *work)
 {
        struct reiserfs_journal *journal =
                container_of(work, struct reiserfs_journal, j_work.work);
-        struct super_block *p_s_sb = journal->j_work_sb;
+        struct super_block *sb = journal->j_work_sb;
        struct reiserfs_journal_list *jl;
        struct list_head *entry;
@@ -3502,7 +3511,7 @@ static void flush_async_commits(struct work_struct *work)
                /* last entry is the youngest, commit it and you get everything */
                entry = journal->j_journal_list.prev;
                jl = JOURNAL_LIST_ENTRY(entry);
-                flush_commit_list(p_s_sb, jl, 1);
+                flush_commit_list(sb, jl, 1);
        }
        unlock_kernel();
 }
@@ -3511,11 +3520,11 @@ static void flush_async_commits(struct work_struct *work)
 ** flushes any old transactions to disk
 ** ends the current transaction if it is too old
 */
-int reiserfs_flush_old_commits(struct super_block *p_s_sb)
+int reiserfs_flush_old_commits(struct super_block *sb)
 {
        time_t now;
        struct reiserfs_transaction_handle th;
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        now = get_seconds();
        /* safety check so we don't flush while we are replaying the log during
@@ -3532,35 +3541,35 @@ int reiserfs_flush_old_commits(struct super_block *p_s_sb)
            journal->j_trans_start_time > 0 &&
            journal->j_len > 0 &&
            (now - journal->j_trans_start_time) > journal->j_max_trans_age) {
-                if (!journal_join(&th, p_s_sb, 1)) {
+                if (!journal_join(&th, sb, 1)) {
-                        reiserfs_prepare_for_journal(p_s_sb,
+                        reiserfs_prepare_for_journal(sb,
-                                                     SB_BUFFER_WITH_SB(p_s_sb),
+                                                     SB_BUFFER_WITH_SB(sb),
                                                     1);
-                        journal_mark_dirty(&th, p_s_sb,
+                        journal_mark_dirty(&th, sb,
-                                           SB_BUFFER_WITH_SB(p_s_sb));
+                                           SB_BUFFER_WITH_SB(sb));
                        /* we're only being called from kreiserfsd, it makes no sense to do
                         ** an async commit so that kreiserfsd can do it later
                         */
-                        do_journal_end(&th, p_s_sb, 1, COMMIT_NOW | WAIT);
+                        do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT);
                }
        }
-        return p_s_sb->s_dirt;
+        return sb->s_dirt;
 }
 /*
 ** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit
-** 
+**
-** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all 
+** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all
 ** the writers are done.  By the time it wakes up, the transaction it was called has already ended, so it just
 ** flushes the commit list and returns 0.
 **
 ** Won't batch when flush or commit_now is set.  Also won't batch when others are waiting on j_join_wait.
-** 
+**
 ** Note, we can't allow the journal_end to proceed while there are still writers in the log.
 */
 static int check_journal_end(struct reiserfs_transaction_handle *th,
-                             struct super_block *p_s_sb, unsigned long nblocks,
+                             struct super_block *sb, unsigned long nblocks,
                             int flags)
 {
@@ -3569,13 +3578,13 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
        int commit_now = flags & COMMIT_NOW;
        int wait_on_commit = flags & WAIT;
        struct reiserfs_journal_list *jl;
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        BUG_ON(!th->t_trans_id);
        if (th->t_trans_id != journal->j_trans_id) {
-                reiserfs_panic(th->t_super,
+                reiserfs_panic(th->t_super, "journal-1577",
-                               "journal-1577: handle trans id %ld != current trans id %ld\n",
+                               "handle trans id %ld != current trans id %ld",
                               th->t_trans_id, journal->j_trans_id);
        }
@@ -3584,7 +3593,7 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
                atomic_dec(&(journal->j_wcount));
        }
-        /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released 
+        /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released
         ** will be dealt with by next transaction that actually writes something, but should be taken
         ** care of in this trans
         */
@@ -3593,7 +3602,7 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
        /* if wcount > 0, and we are called to with flush or commit_now,
         ** we wait on j_join_wait.  We will wake up when the last writer has
         ** finished the transaction, and started it on its way to the disk.
-         ** Then, we flush the commit or journal list, and just return 0 
+         ** Then, we flush the commit or journal list, and just return 0
         ** because the rest of journal end was already done for this transaction.
         */
        if (atomic_read(&(journal->j_wcount)) > 0) {
@@ -3608,31 +3617,31 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
                        if (flush) {
                                journal->j_next_full_flush = 1;
                        }
-                        unlock_journal(p_s_sb);
+                        unlock_journal(sb);
                        /* sleep while the current transaction is still j_jlocked */
                        while (journal->j_trans_id == trans_id) {
                                if (atomic_read(&journal->j_jlock)) {
-                                        queue_log_writer(p_s_sb);
+                                        queue_log_writer(sb);
                                } else {
-                                        lock_journal(p_s_sb);
+                                        lock_journal(sb);
                                        if (journal->j_trans_id == trans_id) {
                                                atomic_set(&(journal->j_jlock),
                                                           1);
                                        }
-                                        unlock_journal(p_s_sb);
+                                        unlock_journal(sb);
                                }
                        }
                        BUG_ON(journal->j_trans_id == trans_id);
                        
                        if (commit_now
-                            && journal_list_still_alive(p_s_sb, trans_id)
+                            && journal_list_still_alive(sb, trans_id)
                            && wait_on_commit) {
-                                flush_commit_list(p_s_sb, jl, 1);
+                                flush_commit_list(sb, jl, 1);
                        }
                        return 0;
                }
-                unlock_journal(p_s_sb);
+                unlock_journal(sb);
                return 0;
        }
@@ -3649,13 +3658,13 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
            && journal->j_len_alloc < journal->j_max_batch
            && journal->j_cnode_free > (journal->j_trans_max * 3)) {
                journal->j_bcount++;
-                unlock_journal(p_s_sb);
+                unlock_journal(sb);
                return 0;
        }
-        if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) {
+        if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(sb)) {
-                reiserfs_panic(p_s_sb,
+                reiserfs_panic(sb, "journal-003",
-                               "journal-003: journal_end: j_start (%ld) is too high\n",
+                               "j_start (%ld) is too high",
                               journal->j_start);
        }
        return 1;
@@ -3664,7 +3673,7 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
 /*
 ** Does all the work that makes deleting blocks safe.
 ** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on.
-** 
+**
 ** otherwise:
 ** set a bit for the block in the journal bitmap.  That will prevent it from being allocated for unformatted nodes
 ** before this transaction has finished.
@@ -3676,16 +3685,16 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
 ** Then remove it from the current transaction, decrementing any counters and filing it on the clean list.
 */
 int journal_mark_freed(struct reiserfs_transaction_handle *th,
-                       struct super_block *p_s_sb, b_blocknr_t blocknr)
+                       struct super_block *sb, b_blocknr_t blocknr)
 {
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        struct reiserfs_journal_cnode *cn = NULL;
        struct buffer_head *bh = NULL;
        struct reiserfs_list_bitmap *jb = NULL;
        int cleaned = 0;
        BUG_ON(!th->t_trans_id);
-        cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr);
+        cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr);
        if (cn && cn->bh) {
                bh = cn->bh;
                get_bh(bh);
@@ -3695,15 +3704,15 @@ int journal_mark_freed(struct reiserfs_transaction_handle *th,
                clear_buffer_journal_new(bh);
                clear_prepared_bits(bh);
                reiserfs_clean_and_file_buffer(bh);
-                cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned);
+                cleaned = remove_from_transaction(sb, blocknr, cleaned);
        } else {
                /* set the bit for this block in the journal bitmap for this transaction */
                jb = journal->j_current_jl->j_list_bitmap;
                if (!jb) {
-                        reiserfs_panic(p_s_sb,
+                        reiserfs_panic(sb, "journal-1702",
-                                       "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n");
+                                       "journal_list_bitmap is NULL");
                }
-                set_bit_in_list_bitmap(p_s_sb, blocknr, jb);
+                set_bit_in_list_bitmap(sb, blocknr, jb);
                /* Note, the entire while loop is not allowed to schedule.  */
@@ -3711,13 +3720,13 @@ int journal_mark_freed(struct reiserfs_transaction_handle *th,
                        clear_prepared_bits(bh);
                        reiserfs_clean_and_file_buffer(bh);
                }
-                cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned);
+                cleaned = remove_from_transaction(sb, blocknr, cleaned);
                /* find all older transactions with this block, make sure they don't try to write it out */
-                cn = get_journal_hash_dev(p_s_sb, journal->j_list_hash_table,
+                cn = get_journal_hash_dev(sb, journal->j_list_hash_table,
                                          blocknr);
                while (cn) {
-                        if (p_s_sb == cn->sb && blocknr == cn->blocknr) {
+                        if (sb == cn->sb && blocknr == cn->blocknr) {
                                set_bit(BLOCK_FREED, &cn->state);
                                if (cn->bh) {
                                        if (!cleaned) {
@@ -3733,8 +3742,9 @@ int journal_mark_freed(struct reiserfs_transaction_handle *th,
                                                put_bh(cn->bh);
                                                if (atomic_read
                                                    (&(cn->bh->b_count)) < 0) {
-                                                        reiserfs_warning(p_s_sb,
+                                                        reiserfs_warning(sb,
-                                                                         "journal-2138: cn->bh->b_count < 0");
+                                                                 "journal-2138",
+                                                                 "cn->bh->b_count < 0");
                                                }
                                        }
                                        if (cn->jlist) {        /* since we are clearing the bh, we MUST dec nonzerolen */
@@ -3824,7 +3834,7 @@ static int __commit_trans_jl(struct inode *inode, unsigned long id,
 int reiserfs_commit_for_inode(struct inode *inode)
 {
-        unsigned long id = REISERFS_I(inode)->i_trans_id;
+        unsigned int id = REISERFS_I(inode)->i_trans_id;
        struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl;
        /* for the whole inode, assume unset id means it was
@@ -3839,18 +3849,18 @@ int reiserfs_commit_for_inode(struct inode *inode)
        return __commit_trans_jl(inode, id, jl);
 }
-void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb,
+void reiserfs_restore_prepared_buffer(struct super_block *sb,
                                      struct buffer_head *bh)
 {
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
-        PROC_INFO_INC(p_s_sb, journal.restore_prepared);
+        PROC_INFO_INC(sb, journal.restore_prepared);
        if (!bh) {
                return;
        }
        if (test_clear_buffer_journal_restore_dirty(bh) &&
            buffer_journal_dirty(bh)) {
                struct reiserfs_journal_cnode *cn;
-                cn = get_journal_hash_dev(p_s_sb,
+                cn = get_journal_hash_dev(sb,
                                          journal->j_list_hash_table,
                                          bh->b_blocknr);
                if (cn && can_dirty(cn)) {
@@ -3867,12 +3877,12 @@ extern struct tree_balance *cur_tb;
 ** be written to disk while we are altering it.  So, we must:
 ** clean it
 ** wait on it.
-** 
+**
 */
-int reiserfs_prepare_for_journal(struct super_block *p_s_sb,
+int reiserfs_prepare_for_journal(struct super_block *sb,
                                 struct buffer_head *bh, int wait)
 {
-        PROC_INFO_INC(p_s_sb, journal.prepare);
+        PROC_INFO_INC(sb, journal.prepare);
        if (!trylock_buffer(bh)) {
                if (!wait)
@@ -3909,7 +3919,7 @@ static void flush_old_journal_lists(struct super_block *s)
        }
 }
-/* 
+/*
 ** long and ugly.  If flush, will not return until all commit
 ** blocks and all real buffers in the trans are on disk.
 ** If no_async, won't return until all commit blocks are on disk.
@@ -3920,10 +3930,10 @@ static void flush_old_journal_lists(struct super_block *s)
 ** journal lists, etc just won't happen.
 */
 static int do_journal_end(struct reiserfs_transaction_handle *th,
-                          struct super_block *p_s_sb, unsigned long nblocks,
+                          struct super_block *sb, unsigned long nblocks,
                          int flags)
 {
-        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        struct reiserfs_journal_cnode *cn, *next, *jl_cn;
        struct reiserfs_journal_cnode *last_cn = NULL;
        struct reiserfs_journal_desc *desc;
@@ -3938,7 +3948,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
        struct reiserfs_journal_list *jl, *temp_jl;
        struct list_head *entry, *safe;
        unsigned long jindex;
-        unsigned long commit_trans_id;
+        unsigned int commit_trans_id;
        int trans_half;
        BUG_ON(th->t_refcount > 1);
@@ -3946,21 +3956,21 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
        /* protect flush_older_commits from doing mistakes if the
           transaction ID counter gets overflowed.  */
-        if (th->t_trans_id == ~0UL)
+        if (th->t_trans_id == ~0U)
                flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
        flush = flags & FLUSH_ALL;
        wait_on_commit = flags & WAIT;
        put_fs_excl();
        current->journal_info = th->t_handle_save;
-        reiserfs_check_lock_depth(p_s_sb, "journal end");
+        reiserfs_check_lock_depth(sb, "journal end");
        if (journal->j_len == 0) {
-                reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb),
+                reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
                                             1);
-                journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb));
+                journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb));
        }
-        lock_journal(p_s_sb);
+        lock_journal(sb);
        if (journal->j_next_full_flush) {
                flags |= FLUSH_ALL;
                flush = 1;
@@ -3970,13 +3980,13 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
                wait_on_commit = 1;
        }
-        /* check_journal_end locks the journal, and unlocks if it does not return 1 
+        /* check_journal_end locks the journal, and unlocks if it does not return 1
         ** it tells us if we should continue with the journal_end, or just return
         */
-        if (!check_journal_end(th, p_s_sb, nblocks, flags)) {
+        if (!check_journal_end(th, sb, nblocks, flags)) {
-                p_s_sb->s_dirt = 1;
+                sb->s_dirt = 1;
-                wake_queued_writers(p_s_sb);
+                wake_queued_writers(sb);
-                reiserfs_async_progress_wait(p_s_sb);
+                reiserfs_async_progress_wait(sb);
                goto out;
        }
@@ -4005,8 +4015,8 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
        /* setup description block */
        d_bh =
-            journal_getblk(p_s_sb,
+            journal_getblk(sb,
-                           SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
+                           SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
                           journal->j_start);
        set_buffer_uptodate(d_bh);
        desc = (struct reiserfs_journal_desc *)(d_bh)->b_data;
@@ -4015,9 +4025,9 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
        set_desc_trans_id(desc, journal->j_trans_id);
        /* setup commit block.  Don't write (keep it clean too) this one until after everyone else is written */
-        c_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
+        c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
                              ((journal->j_start + journal->j_len +
-                                1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
+                                1) % SB_ONDISK_JOURNAL_SIZE(sb)));
        commit = (struct reiserfs_journal_commit *)c_bh->b_data;
        memset(c_bh->b_data, 0, c_bh->b_size);
        set_commit_trans_id(commit, journal->j_trans_id);
@@ -4050,13 +4060,13 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
         **  for each real block, add it to the journal list hash,
         ** copy into real block index array in the commit or desc block
         */
-        trans_half = journal_trans_half(p_s_sb->s_blocksize);
+        trans_half = journal_trans_half(sb->s_blocksize);
        for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) {
                if (buffer_journaled(cn->bh)) {
-                        jl_cn = get_cnode(p_s_sb);
+                        jl_cn = get_cnode(sb);
                        if (!jl_cn) {
-                                reiserfs_panic(p_s_sb,
+                                reiserfs_panic(sb, "journal-1676",
-                                               "journal-1676, get_cnode returned NULL\n");
+                                               "get_cnode returned NULL");
                        }
                        if (i == 0) {
                                jl->j_realblock = jl_cn;
@@ -4067,18 +4077,19 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
                                last_cn->next = jl_cn;
                        }
                        last_cn = jl_cn;
-                        /* make sure the block we are trying to log is not a block 
+                        /* make sure the block we are trying to log is not a block
                           of journal or reserved area */
                        if (is_block_in_log_or_reserved_area
-                            (p_s_sb, cn->bh->b_blocknr)) {
+                            (sb, cn->bh->b_blocknr)) {
-                                reiserfs_panic(p_s_sb,
+                                reiserfs_panic(sb, "journal-2332",
-                                               "journal-2332: Trying to log block %lu, which is a log block\n",
+                                               "Trying to log block %lu, "
+                                               "which is a log block",
                                               cn->bh->b_blocknr);
                        }
                        jl_cn->blocknr = cn->bh->b_blocknr;
                        jl_cn->state = 0;
-                        jl_cn->sb = p_s_sb;
+                        jl_cn->sb = sb;
                        jl_cn->bh = cn->bh;
                        jl_cn->jlist = jl;
                        insert_journal_hash(journal->j_list_hash_table, jl_cn);
@@ -4119,11 +4130,11 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
                        char *addr;
                        struct page *page;
                        tmp_bh =
-                            journal_getblk(p_s_sb,
+                            journal_getblk(sb,
-                                           SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
+                                           SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
                                           ((cur_write_start +
                                             jindex) %
-                                            SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
+                                            SB_ONDISK_JOURNAL_SIZE(sb)));
                        set_buffer_uptodate(tmp_bh);
                        page = cn->bh->b_page;
                        addr = kmap(page);
@@ -4137,12 +4148,13 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
                        clear_buffer_journaled(cn->bh);
                } else {
                        /* JDirty cleared sometime during transaction.  don't log this one */
-                        reiserfs_warning(p_s_sb,
+                        reiserfs_warning(sb, "journal-2048",
-                                         "journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!");
+                                         "BAD, buffer in journal hash, "
+                                         "but not JDirty!");
                        brelse(cn->bh);
                }
                next = cn->next;
-                free_cnode(p_s_sb, cn);
+                free_cnode(sb, cn);
                cn = next;
                cond_resched();
        }
@@ -4152,7 +4164,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
         ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
         */
-        journal->j_current_jl = alloc_journal_list(p_s_sb);
+        journal->j_current_jl = alloc_journal_list(sb);
        /* now it is safe to insert this transaction on the main list */
        list_add_tail(&jl->j_list, &journal->j_journal_list);
@@ -4163,7 +4175,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
        old_start = journal->j_start;
        journal->j_start =
            (journal->j_start + journal->j_len +
-             2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb);
+             2) % SB_ONDISK_JOURNAL_SIZE(sb);
        atomic_set(&(journal->j_wcount), 0);
        journal->j_bcount = 0;
        journal->j_last = NULL;
@@ -4178,7 +4190,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
        journal->j_len_alloc = 0;
        journal->j_next_full_flush = 0;
        journal->j_next_async_flush = 0;
-        init_journal_hash(p_s_sb);
+        init_journal_hash(sb);
        // make sure reiserfs_add_jh sees the new current_jl before we
        // write out the tails
@@ -4207,14 +4219,14 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
         ** queue don't wait for this proc to flush journal lists and such.
         */
        if (flush) {
-                flush_commit_list(p_s_sb, jl, 1);
+                flush_commit_list(sb, jl, 1);
-                flush_journal_list(p_s_sb, jl, 1);
+                flush_journal_list(sb, jl, 1);
        } else if (!(jl->j_state & LIST_COMMIT_PENDING))
                queue_delayed_work(commit_wq, &journal->j_work, HZ / 10);
-        /* if the next transaction has any chance of wrapping, flush 
+        /* if the next transaction has any chance of wrapping, flush
-         ** transactions that might get overwritten.  If any journal lists are very 
+         ** transactions that might get overwritten.  If any journal lists are very
-         ** old flush them as well.  
+         ** old flush them as well.
         */
      first_jl:
        list_for_each_safe(entry, safe, &journal->j_journal_list) {
@@ -4222,11 +4234,11 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
                if (journal->j_start <= temp_jl->j_start) {
                        if ((journal->j_start + journal->j_trans_max + 1) >=
                            temp_jl->j_start) {
-                                flush_used_journal_lists(p_s_sb, temp_jl);
+                                flush_used_journal_lists(sb, temp_jl);
                                goto first_jl;
                        } else if ((journal->j_start +
                                    journal->j_trans_max + 1) <
-                                   SB_ONDISK_JOURNAL_SIZE(p_s_sb)) {
+                                   SB_ONDISK_JOURNAL_SIZE(sb)) {
                                /* if we don't cross into the next transaction and we don't
                                 * wrap, there is no way we can overlap any later transactions
                                 * break now
@@ -4235,11 +4247,11 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
                        }
                } else if ((journal->j_start +
                            journal->j_trans_max + 1) >
-                           SB_ONDISK_JOURNAL_SIZE(p_s_sb)) {
+                           SB_ONDISK_JOURNAL_SIZE(sb)) {
                        if (((journal->j_start + journal->j_trans_max + 1) %
-                             SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >=
+                             SB_ONDISK_JOURNAL_SIZE(sb)) >=
                            temp_jl->j_start) {
-                                flush_used_journal_lists(p_s_sb, temp_jl);
+                                flush_used_journal_lists(sb, temp_jl);
                                goto first_jl;
                        } else {
                                /* we don't overlap anything from out start to the end of the
@@ -4250,46 +4262,47 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
                        }
                }
        }
-        flush_old_journal_lists(p_s_sb);
+        flush_old_journal_lists(sb);
        journal->j_current_jl->j_list_bitmap =
-            get_list_bitmap(p_s_sb, journal->j_current_jl);
+            get_list_bitmap(sb, journal->j_current_jl);
        if (!(journal->j_current_jl->j_list_bitmap)) {
-                reiserfs_panic(p_s_sb,
+                reiserfs_panic(sb, "journal-1996",
-                               "journal-1996: do_journal_end, could not get a list bitmap\n");
+                               "could not get a list bitmap");
        }
        atomic_set(&(journal->j_jlock), 0);
-        unlock_journal(p_s_sb);
+        unlock_journal(sb);
        /* wake up any body waiting to join. */
        clear_bit(J_WRITERS_QUEUED, &journal->j_state);
        wake_up(&(journal->j_join_wait));
        if (!flush && wait_on_commit &&
-            journal_list_still_alive(p_s_sb, commit_trans_id)) {
+            journal_list_still_alive(sb, commit_trans_id)) {
-                flush_commit_list(p_s_sb, jl, 1);
+                flush_commit_list(sb, jl, 1);
        }
      out:
-        reiserfs_check_lock_depth(p_s_sb, "journal end2");
+        reiserfs_check_lock_depth(sb, "journal end2");
        memset(th, 0, sizeof(*th));
        /* Re-set th->t_super, so we can properly keep track of how many
         * persistent transactions there are. We need to do this so if this
         * call is part of a failed restart_transaction, we can free it later */
-        th->t_super = p_s_sb;
+        th->t_super = sb;
        return journal->j_errno;
 }
-static void __reiserfs_journal_abort_hard(struct super_block *sb)
+/* Send the file system read only and refuse new transactions */
+void reiserfs_abort_journal(struct super_block *sb, int errno)
 {
        struct reiserfs_journal *journal = SB_JOURNAL(sb);
        if (test_bit(J_ABORTED, &journal->j_state))
                return;
-        printk(KERN_CRIT "REISERFS: Aborting journal for filesystem on %s\n",
+        if (!journal->j_errno)
-               reiserfs_bdevname(sb));
+                journal->j_errno = errno;
        sb->s_flags |= MS_RDONLY;
        set_bit(J_ABORTED, &journal->j_state);
@@ -4299,19 +4312,3 @@ static void __reiserfs_journal_abort_hard(struct super_block *sb)
 #endif
 }
-static void __reiserfs_journal_abort_soft(struct super_block *sb, int errno)
-{
-        struct reiserfs_journal *journal = SB_JOURNAL(sb);
-        if (test_bit(J_ABORTED, &journal->j_state))
-                return;
-        if (!journal->j_errno)
-                journal->j_errno = errno;
-        __reiserfs_journal_abort_hard(sb);
-}
-void reiserfs_journal_abort(struct super_block *sb, int errno)
-{
-        __reiserfs_journal_abort_soft(sb, errno);
-}
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 6de060a6aa7f..381750a155f6 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -111,7 +111,7 @@ static void leaf_copy_dir_entries(struct buffer_info *dest_bi,
        item_num_in_dest =
            (last_first == FIRST_TO_LAST) ? (B_NR_ITEMS(dest) - 1) : 0;
-        leaf_paste_entries(dest_bi->bi_bh, item_num_in_dest,
+        leaf_paste_entries(dest_bi, item_num_in_dest,
                           (last_first ==
                            FIRST_TO_LAST) ? I_ENTRY_COUNT(B_N_PITEM_HEAD(dest,
                                                                          item_num_in_dest))
@@ -119,8 +119,8 @@ static void leaf_copy_dir_entries(struct buffer_info *dest_bi,
                           DEH_SIZE * copy_count + copy_records_len);
 }
-/* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or 
+/* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or
-   part of it or nothing (see the return 0 below) from SOURCE to the end 
+   part of it or nothing (see the return 0 below) from SOURCE to the end
   (if last_first) or beginning (!last_first) of the DEST */
 /* returns 1 if anything was copied, else 0 */
 static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
@@ -168,10 +168,11 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
                        if (bytes_or_entries == ih_item_len(ih)
                            && is_indirect_le_ih(ih))
                                if (get_ih_free_space(ih))
-                                        reiserfs_panic(NULL,
+                                        reiserfs_panic(sb_from_bi(dest_bi),
-                                                       "vs-10020: leaf_copy_boundary_item: "
+                                                       "vs-10020",
-                                                       "last unformatted node must be filled entirely (%h)",
+                                                       "last unformatted node "
-                                                       ih);
+                                                       "must be filled "
+                                                       "entirely (%h)", ih);
                }
 #endif
@@ -395,7 +396,7 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
                else {
                        struct item_head n_ih;
-                        /* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST 
+                        /* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST
                           part defined by 'cpy_bytes'; create new item header; change old item_header (????);
                           n_ih = new item_header;
                         */
@@ -425,7 +426,7 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
                else {
                        struct item_head n_ih;
-                        /* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST 
+                        /* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST
                           part defined by 'cpy_bytes'; create new item header;
                           n_ih = new item_header;
                         */
@@ -622,9 +623,8 @@ static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb,
                break;
        default:
-                reiserfs_panic(NULL,
+                reiserfs_panic(sb_from_bi(src_bi), "vs-10250",
-                               "vs-10250: leaf_define_dest_src_infos: shift type is unknown (%d)",
+                               "shift type is unknown (%d)", shift_mode);
-                               shift_mode);
        }
        RFALSE(!src_bi->bi_bh || !dest_bi->bi_bh,
               "vs-10260: mode==%d, source (%p) or dest (%p) buffer is initialized incorrectly",
@@ -674,9 +674,9 @@ int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes)
 #ifdef CONFIG_REISERFS_CHECK
                        if (tb->tb_mode == M_PASTE || tb->tb_mode == M_INSERT) {
                                print_cur_tb("vs-10275");
-                                reiserfs_panic(tb->tb_sb,
+                                reiserfs_panic(tb->tb_sb, "vs-10275",
-                                               "vs-10275: leaf_shift_left: balance condition corrupted (%c)",
+                                               "balance condition corrupted "
-                                               tb->tb_mode);
+                                               "(%c)", tb->tb_mode);
                        }
 #endif
@@ -724,7 +724,7 @@ int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes)
 static void leaf_delete_items_entirely(struct buffer_info *bi,
                                       int first, int del_num);
 /*  If del_bytes == -1, starting from position 'first' delete del_num items in whole in buffer CUR.
-    If not. 
+    If not.
    If last_first == 0. Starting from position 'first' delete del_num-1 items in whole. Delete part of body of
    the first item. Part defined by del_bytes. Don't delete first item header
    If last_first == 1. Starting from position 'first+1' delete del_num-1 items in whole. Delete part of body of
@@ -783,7 +783,7 @@ void leaf_delete_items(struct buffer_info *cur_bi, int last_first,
                                /* len = body len of item */
                                len = ih_item_len(ih);
-                        /* delete the part of the last item of the bh 
+                        /* delete the part of the last item of the bh
                           do not delete item header
                         */
                        leaf_cut_from_buffer(cur_bi, B_NR_ITEMS(bh) - 1,
@@ -865,7 +865,7 @@ void leaf_insert_into_buf(struct buffer_info *bi, int before,
        }
 }
-/* paste paste_size bytes to affected_item_num-th item. 
+/* paste paste_size bytes to affected_item_num-th item.
   When item is a directory, this only prepare space for new entries */
 void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num,
                          int pos_in_item, int paste_size,
@@ -889,9 +889,12 @@ void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num,
 #ifdef CONFIG_REISERFS_CHECK
        if (zeros_number > paste_size) {
+                struct super_block *sb = NULL;
+                if (bi && bi->tb)
+                        sb = bi->tb->tb_sb;
                print_cur_tb("10177");
-                reiserfs_panic(NULL,
+                reiserfs_panic(sb, "vs-10177",
-                               "vs-10177: leaf_paste_in_buffer: ero number == %d, paste_size == %d",
+                               "zeros_number == %d, paste_size == %d",
                               zeros_number, paste_size);
        }
 #endif                          /* CONFIG_REISERFS_CHECK */
@@ -1019,7 +1022,7 @@ static int leaf_cut_entries(struct buffer_head *bh,
 /*  when cut item is part of regular file
        pos_in_item - first byte that must be cut
        cut_size - number of bytes to be cut beginning from pos_in_item
- 
   when cut item is part of directory
        pos_in_item - number of first deleted entry
        cut_size - count of deleted entries
@@ -1191,7 +1194,7 @@ static void leaf_delete_items_entirely(struct buffer_info *bi,
 }
 /* paste new_entry_count entries (new_dehs, records) into position before to item_num-th item */
-void leaf_paste_entries(struct buffer_head *bh,
+void leaf_paste_entries(struct buffer_info *bi,
                        int item_num,
                        int before,
                        int new_entry_count,
@@ -1203,6 +1206,7 @@ void leaf_paste_entries(struct buffer_head *bh,
        struct reiserfs_de_head *deh;
        char *insert_point;
        int i, old_entry_num;
+        struct buffer_head *bh = bi->bi_bh;
        if (new_entry_count == 0)
                return;
@@ -1271,7 +1275,7 @@ void leaf_paste_entries(struct buffer_head *bh,
        /* change item key if necessary (when we paste before 0-th entry */
        if (!before) {
                set_le_ih_k_offset(ih, deh_offset(new_dehs));
-/*      memcpy (&ih->ih_key.k_offset, 
+/*      memcpy (&ih->ih_key.k_offset,
                       &new_dehs->deh_offset, SHORT_KEY_SIZE);*/
        }
 #ifdef CONFIG_REISERFS_CHECK
@@ -1287,13 +1291,17 @@ void leaf_paste_entries(struct buffer_head *bh,
                        prev = (i != 0) ? deh_location(&(deh[i - 1])) : 0;
                        if (prev && prev <= deh_location(&(deh[i])))
-                                reiserfs_warning(NULL,
+                                reiserfs_error(sb_from_bi(bi), "vs-10240",
-                                                 "vs-10240: leaf_paste_entries: directory item (%h) corrupted (prev %a, cur(%d) %a)",
+                                               "directory item (%h) "
-                                                 ih, deh + i - 1, i, deh + i);
+                                               "corrupted (prev %a, "
+                                               "cur(%d) %a)",
+                                               ih, deh + i - 1, i, deh + i);
                        if (next && next >= deh_location(&(deh[i])))
-                                reiserfs_warning(NULL,
+                                reiserfs_error(sb_from_bi(bi), "vs-10250",
-                                                 "vs-10250: leaf_paste_entries: directory item (%h) corrupted (cur(%d) %a, next %a)",
+                                               "directory item (%h) "
-                                                 ih, i, deh + i, deh + i + 1);
+                                               "corrupted (cur(%d) %a, "
+                                               "next %a)",
+                                               ih, i, deh + i, deh + i + 1);
                }
        }
 #endif
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 639d635d9d4b..efd4d720718e 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -106,7 +106,7 @@ key of the first directory entry in it.
 This function first calls search_by_key, then, if item whose first
 entry matches is not found it looks for the entry inside directory
 item found by search_by_key. Fills the path to the entry, and to the
-entry position in the item 
+entry position in the item
 */
@@ -120,8 +120,8 @@ int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
        switch (retval) {
        case ITEM_NOT_FOUND:
                if (!PATH_LAST_POSITION(path)) {
-                        reiserfs_warning(sb,
+                        reiserfs_error(sb, "vs-7000", "search_by_key "
-                                         "vs-7000: search_by_entry_key: search_by_key returned item position == 0");
+                                       "returned item position == 0");
                        pathrelse(path);
                        return IO_ERROR;
                }
@@ -135,8 +135,7 @@ int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
        default:
                pathrelse(path);
-                reiserfs_warning(sb,
+                reiserfs_error(sb, "vs-7002", "no path to here");
-                                 "vs-7002: search_by_entry_key: no path to here");
                return IO_ERROR;
        }
@@ -146,10 +145,9 @@ int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
        if (!is_direntry_le_ih(de->de_ih) ||
            COMP_SHORT_KEYS(&(de->de_ih->ih_key), key)) {
                print_block(de->de_bh, 0, -1, -1);
-                reiserfs_panic(sb,
+                reiserfs_panic(sb, "vs-7005", "found item %h is not directory "
-                               "vs-7005: search_by_entry_key: found item %h is not directory item or "
+                               "item or does not belong to the same directory "
-                               "does not belong to the same directory as key %K",
+                               "as key %K", de->de_ih, key);
-                               de->de_ih, key);
        }
 #endif                          /* CONFIG_REISERFS_CHECK */
@@ -300,8 +298,7 @@ static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen,
                    search_by_entry_key(dir->i_sb, &key_to_search,
                                        path_to_entry, de);
                if (retval == IO_ERROR) {
-                        reiserfs_warning(dir->i_sb, "zam-7001: io error in %s",
+                        reiserfs_error(dir->i_sb, "zam-7001", "io error");
-                                         __func__);
                        return IO_ERROR;
                }
@@ -361,9 +358,10 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
                        return ERR_PTR(-EACCES);
                }
-                /* Propogate the priv_object flag so we know we're in the priv tree */
+                /* Propagate the private flag so we know we're
-                if (is_reiserfs_priv_object(dir))
+                 * in the priv tree */
-                        reiserfs_mark_inode_private(inode);
+                if (IS_PRIVATE(dir))
+                        inode->i_flags |= S_PRIVATE;
        }
        reiserfs_write_unlock(dir->i_sb);
        if (retval == IO_ERROR) {
@@ -373,7 +371,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
        return d_splice_alias(inode, dentry);
 }
-/* 
+/*
 ** looks up the dentry of the parent directory for child.
 ** taken from ext2_get_parent
 */
@@ -403,7 +401,7 @@ struct dentry *reiserfs_get_parent(struct dentry *child)
        return d_obtain_alias(inode);
 }
-/* add entry to the directory (entry can be hidden). 
+/* add entry to the directory (entry can be hidden).
 insert definition of when hidden directories are used here -Hans
@@ -484,10 +482,9 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
                }
                if (retval != NAME_FOUND) {
-                        reiserfs_warning(dir->i_sb,
+                        reiserfs_error(dir->i_sb, "zam-7002",
-                                         "zam-7002:%s: \"reiserfs_find_entry\" "
+                                       "reiserfs_find_entry() returned "
-                                         "has returned unexpected value (%d)",
+                                       "unexpected value (%d)", retval);
-                                         __func__, retval);
                }
                return -EEXIST;
@@ -498,8 +495,9 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
                                MAX_GENERATION_NUMBER + 1);
        if (gen_number > MAX_GENERATION_NUMBER) {
                /* there is no free generation number */
-                reiserfs_warning(dir->i_sb,
+                reiserfs_warning(dir->i_sb, "reiserfs-7010",
-                                 "reiserfs_add_entry: Congratulations! we have got hash function screwed up");
+                                 "Congratulations! we have got hash function "
+                                 "screwed up");
                if (buffer != small_buf)
                        kfree(buffer);
                pathrelse(&path);
@@ -515,10 +513,9 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
        if (gen_number != 0) {  /* we need to re-search for the insertion point */
                if (search_by_entry_key(dir->i_sb, &entry_key, &path, &de) !=
                    NAME_NOT_FOUND) {
-                        reiserfs_warning(dir->i_sb,
+                        reiserfs_warning(dir->i_sb, "vs-7032",
-                                         "vs-7032: reiserfs_add_entry: "
+                                         "entry with this key (%K) already "
-                                         "entry with this key (%K) already exists",
+                                         "exists", &entry_key);
-                                         &entry_key);
                        if (buffer != small_buf)
                                kfree(buffer);
@@ -562,7 +559,7 @@ static int drop_new_inode(struct inode *inode)
        return 0;
 }
-/* utility function that does setup for reiserfs_new_inode.  
+/* utility function that does setup for reiserfs_new_inode.
 ** vfs_dq_init needs lots of credits so it's better to have it
 ** outside of a transaction, so we had to pull some bits of
 ** reiserfs_new_inode out into this func.
@@ -601,20 +598,22 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
            2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
                 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
        struct reiserfs_transaction_handle th;
-        int locked;
+        struct reiserfs_security_handle security;
        if (!(inode = new_inode(dir->i_sb))) {
                return -ENOMEM;
        }
        new_inode_init(inode, dir, mode);
-        locked = reiserfs_cache_default_acl(dir);
+        jbegin_count += reiserfs_cache_default_acl(dir);
+        retval = reiserfs_security_init(dir, inode, &security);
+        if (retval < 0) {
+                drop_new_inode(inode);
+                return retval;
+        }
+        jbegin_count += retval;
        reiserfs_write_lock(dir->i_sb);
-        if (locked)
-                reiserfs_write_lock_xattrs(dir->i_sb);
        retval = journal_begin(&th, dir->i_sb, jbegin_count);
        if (retval) {
                drop_new_inode(inode);
@@ -623,15 +622,10 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
        retval =
            reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry,
-                               inode);
+                               inode, &security);
        if (retval)
                goto out_failed;
-        if (locked) {
-                reiserfs_write_unlock_xattrs(dir->i_sb);
-                locked = 0;
-        }
        inode->i_op = &reiserfs_file_inode_operations;
        inode->i_fop = &reiserfs_file_operations;
        inode->i_mapping->a_ops = &reiserfs_address_space_operations;
@@ -658,8 +652,6 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
        retval = journal_end(&th, dir->i_sb, jbegin_count);
      out_failed:
-        if (locked)
-                reiserfs_write_unlock_xattrs(dir->i_sb);
        reiserfs_write_unlock(dir->i_sb);
        return retval;
 }
@@ -670,12 +662,12 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
        int retval;
        struct inode *inode;
        struct reiserfs_transaction_handle th;
+        struct reiserfs_security_handle security;
        /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
        int jbegin_count =
            JOURNAL_PER_BALANCE_CNT * 3 +
            2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
                 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
-        int locked;
        if (!new_valid_dev(rdev))
                return -EINVAL;
@@ -685,13 +677,15 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
        }
        new_inode_init(inode, dir, mode);
-        locked = reiserfs_cache_default_acl(dir);
+        jbegin_count += reiserfs_cache_default_acl(dir);
+        retval = reiserfs_security_init(dir, inode, &security);
+        if (retval < 0) {
+                drop_new_inode(inode);
+                return retval;
+        }
+        jbegin_count += retval;
        reiserfs_write_lock(dir->i_sb);
-        if (locked)
-                reiserfs_write_lock_xattrs(dir->i_sb);
        retval = journal_begin(&th, dir->i_sb, jbegin_count);
        if (retval) {
                drop_new_inode(inode);
@@ -700,16 +694,11 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
        retval =
            reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry,
-                               inode);
+                               inode, &security);
        if (retval) {
                goto out_failed;
        }
-        if (locked) {
-                reiserfs_write_unlock_xattrs(dir->i_sb);
-                locked = 0;
-        }
        inode->i_op = &reiserfs_special_inode_operations;
        init_special_inode(inode, inode->i_mode, rdev);
@@ -739,8 +728,6 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
        retval = journal_end(&th, dir->i_sb, jbegin_count);
      out_failed:
-        if (locked)
-                reiserfs_write_unlock_xattrs(dir->i_sb);
        reiserfs_write_unlock(dir->i_sb);
        return retval;
 }
@@ -750,12 +737,12 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        int retval;
        struct inode *inode;
        struct reiserfs_transaction_handle th;
+        struct reiserfs_security_handle security;
        /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
        int jbegin_count =
            JOURNAL_PER_BALANCE_CNT * 3 +
            2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
                 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
-        int locked;
 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
        /* set flag that new packing locality created and new blocks for the content     * of that directory are not displaced yet */
@@ -767,11 +754,14 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        }
        new_inode_init(inode, dir, mode);
-        locked = reiserfs_cache_default_acl(dir);
+        jbegin_count += reiserfs_cache_default_acl(dir);
+        retval = reiserfs_security_init(dir, inode, &security);
+        if (retval < 0) {
+                drop_new_inode(inode);
+                return retval;
+        }
+        jbegin_count += retval;
        reiserfs_write_lock(dir->i_sb);
-        if (locked)
-                reiserfs_write_lock_xattrs(dir->i_sb);
        retval = journal_begin(&th, dir->i_sb, jbegin_count);
        if (retval) {
@@ -787,17 +777,12 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
            retval = reiserfs_new_inode(&th, dir, mode, NULL /*symlink */ ,
                                        old_format_only(dir->i_sb) ?
                                        EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
-                                        dentry, inode);
+                                        dentry, inode, &security);
        if (retval) {
                dir->i_nlink--;
                goto out_failed;
        }
-        if (locked) {
-                reiserfs_write_unlock_xattrs(dir->i_sb);
-                locked = 0;
-        }
        reiserfs_update_inode_transaction(inode);
        reiserfs_update_inode_transaction(dir);
@@ -827,8 +812,6 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        unlock_new_inode(inode);
        retval = journal_end(&th, dir->i_sb, jbegin_count);
      out_failed:
-        if (locked)
-                reiserfs_write_unlock_xattrs(dir->i_sb);
        reiserfs_write_unlock(dir->i_sb);
        return retval;
 }
@@ -837,7 +820,7 @@ static inline int reiserfs_empty_dir(struct inode *inode)
 {
        /* we can cheat because an old format dir cannot have
         ** EMPTY_DIR_SIZE, and a new format dir cannot have
-         ** EMPTY_DIR_SIZE_V1.  So, if the inode is either size, 
+         ** EMPTY_DIR_SIZE_V1.  So, if the inode is either size,
         ** regardless of disk format version, the directory is empty.
         */
        if (inode->i_size != EMPTY_DIR_SIZE &&
@@ -903,8 +886,9 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
                goto end_rmdir;
        if (inode->i_nlink != 2 && inode->i_nlink != 1)
-                reiserfs_warning(inode->i_sb, "%s: empty directory has nlink "
+                reiserfs_error(inode->i_sb, "reiserfs-7040",
-                                 "!= 2 (%d)", __func__, inode->i_nlink);
+                               "empty directory has nlink != 2 (%d)",
+                               inode->i_nlink);
        clear_nlink(inode);
        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
@@ -980,10 +964,9 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
        }
        if (!inode->i_nlink) {
-                reiserfs_warning(inode->i_sb, "%s: deleting nonexistent file "
+                reiserfs_warning(inode->i_sb, "reiserfs-7042",
-                                 "(%s:%lu), %d", __func__,
+                                 "deleting nonexistent file (%lu), %d",
-                                 reiserfs_bdevname(inode->i_sb), inode->i_ino,
+                                 inode->i_ino, inode->i_nlink);
-                                 inode->i_nlink);
                inode->i_nlink = 1;
        }
@@ -1037,6 +1020,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
        char *name;
        int item_len;
        struct reiserfs_transaction_handle th;
+        struct reiserfs_security_handle security;
        int mode = S_IFLNK | S_IRWXUGO;
        /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
        int jbegin_count =
@@ -1049,6 +1033,13 @@ static int reiserfs_symlink(struct inode *parent_dir,
        }
        new_inode_init(inode, parent_dir, mode);
+        retval = reiserfs_security_init(parent_dir, inode, &security);
+        if (retval < 0) {
+                drop_new_inode(inode);
+                return retval;
+        }
+        jbegin_count += retval;
        reiserfs_write_lock(parent_dir->i_sb);
        item_len = ROUND_UP(strlen(symname));
        if (item_len > MAX_DIRECT_ITEM_LEN(parent_dir->i_sb->s_blocksize)) {
@@ -1066,8 +1057,6 @@ static int reiserfs_symlink(struct inode *parent_dir,
        memcpy(name, symname, strlen(symname));
        padd_item(name, item_len, strlen(symname));
-        /* We would inherit the default ACL here, but symlinks don't get ACLs */
        retval = journal_begin(&th, parent_dir->i_sb, jbegin_count);
        if (retval) {
                drop_new_inode(inode);
@@ -1077,7 +1066,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
        retval =
            reiserfs_new_inode(&th, parent_dir, mode, name, strlen(symname),
-                               dentry, inode);
+                               dentry, inode, &security);
        kfree(name);
        if (retval) {           /* reiserfs_new_inode iputs for us */
                goto out_failed;
@@ -1173,7 +1162,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
        return retval;
 }
-// de contains information pointing to an entry which 
+/* de contains information pointing to an entry which */
 static int de_still_valid(const char *name, int len,
                          struct reiserfs_dir_entry *de)
 {
@@ -1196,15 +1185,14 @@ static int entry_points_to_object(const char *name, int len,
        if (inode) {
                if (!de_visible(de->de_deh + de->de_entry_num))
-                        reiserfs_panic(NULL,
+                        reiserfs_panic(inode->i_sb, "vs-7042",
-                                       "vs-7042: entry_points_to_object: entry must be visible");
+                                       "entry must be visible");
                return (de->de_objectid == inode->i_ino) ? 1 : 0;
        }
        /* this must be added hidden entry */
        if (de_visible(de->de_deh + de->de_entry_num))
-                reiserfs_panic(NULL,
+                reiserfs_panic(NULL, "vs-7043", "entry must be visible");
-                               "vs-7043: entry_points_to_object: entry must be visible");
        return 1;
 }
@@ -1218,10 +1206,10 @@ static void set_ino_in_dir_entry(struct reiserfs_dir_entry *de,
        de->de_deh[de->de_entry_num].deh_objectid = key->k_objectid;
 }
-/* 
+/*
 * process, that is going to call fix_nodes/do_balance must hold only
 * one path. If it holds 2 or more, it can get into endless waiting in
- * get_empty_nodes or its clones 
+ * get_empty_nodes or its clones
 */
 static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                           struct inode *new_dir, struct dentry *new_dentry)
@@ -1275,7 +1263,7 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        old_inode_mode = old_inode->i_mode;
        if (S_ISDIR(old_inode_mode)) {
-                // make sure, that directory being renamed has correct ".." 
+                // make sure, that directory being renamed has correct ".."
                // and that its new parent directory has not too many links
                // already
@@ -1286,8 +1274,8 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                        }
                }
-                /* directory is renamed, its parent directory will be changed, 
+                /* directory is renamed, its parent directory will be changed,
-                 ** so find ".." entry 
+                 ** so find ".." entry
                 */
                dot_dot_de.de_gen_number_bit_string = NULL;
                retval =
@@ -1318,8 +1306,8 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                               new_dentry->d_name.len, old_inode, 0);
        if (retval == -EEXIST) {
                if (!new_dentry_inode) {
-                        reiserfs_panic(old_dir->i_sb,
+                        reiserfs_panic(old_dir->i_sb, "vs-7050",
-                                       "vs-7050: new entry is found, new inode == 0\n");
+                                       "new entry is found, new inode == 0");
                }
        } else if (retval) {
                int err = journal_end(&th, old_dir->i_sb, jbegin_count);
@@ -1397,9 +1385,9 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                   this stuff, yes? Then, having
                   gathered everything into RAM we
                   should lock the buffers, yes?  -Hans */
-                /* probably.  our rename needs to hold more 
+                /* probably.  our rename needs to hold more
-                 ** than one path at once.  The seals would 
+                 ** than one path at once.  The seals would
-                 ** have to be written to deal with multi-path 
+                 ** have to be written to deal with multi-path
                 ** issues -chris
                 */
                /* sanity checking before doing the rename - avoid races many
@@ -1477,7 +1465,7 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        }
        if (S_ISDIR(old_inode_mode)) {
-                // adjust ".." of renamed directory 
+                /* adjust ".." of renamed directory */
                set_ino_in_dir_entry(&dot_dot_de, INODE_PKEY(new_dir));
                journal_mark_dirty(&th, new_dir->i_sb, dot_dot_de.de_bh);
@@ -1499,8 +1487,8 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (reiserfs_cut_from_item
            (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL,
             0) < 0)
-                reiserfs_warning(old_dir->i_sb,
+                reiserfs_error(old_dir->i_sb, "vs-7060",
-                                 "vs-7060: reiserfs_rename: couldn't not cut old name. Fsck later?");
+                               "couldn't not cut old name. Fsck later?");
        old_dir->i_size -= DEH_SIZE + old_de.de_entrylen;
diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c
index ea0cf8c28a99..3a6de810bd61 100644
--- a/fs/reiserfs/objectid.c
+++ b/fs/reiserfs/objectid.c
@@ -18,8 +18,7 @@
 static void check_objectid_map(struct super_block *s, __le32 * map)
 {
        if (le32_to_cpu(map[0]) != 1)
-                reiserfs_panic(s,
+                reiserfs_panic(s, "vs-15010", "map corrupted: %lx",
-                               "vs-15010: check_objectid_map: map corrupted: %lx",
                               (long unsigned int)le32_to_cpu(map[0]));
        // FIXME: add something else here
@@ -61,7 +60,7 @@ __u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th)
        /* comment needed -Hans */
        unused_objectid = le32_to_cpu(map[1]);
        if (unused_objectid == U32_MAX) {
-                reiserfs_warning(s, "%s: no more object ids", __func__);
+                reiserfs_warning(s, "reiserfs-15100", "no more object ids");
                reiserfs_restore_prepared_buffer(s, SB_BUFFER_WITH_SB(s));
                return 0;
        }
@@ -160,9 +159,8 @@ void reiserfs_release_objectid(struct reiserfs_transaction_handle *th,
                i += 2;
        }
-        reiserfs_warning(s,
+        reiserfs_error(s, "vs-15011", "tried to free free object id (%lu)",
-                         "vs-15011: reiserfs_release_objectid: tried to free free object id (%lu)",
+                       (long unsigned)objectid_to_release);
-                         (long unsigned)objectid_to_release);
 }
 int reiserfs_convert_objectid_map_v1(struct super_block *s)
@@ -182,7 +180,7 @@ int reiserfs_convert_objectid_map_v1(struct super_block *s)
        if (cur_size > new_size) {
                /* mark everyone used that was listed as free at the end of the objectid
-                 ** map 
+                 ** map
                 */
                objectid_map[new_size - 1] = objectid_map[cur_size - 1];
                set_sb_oid_cursize(disk_sb, new_size);
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 740bb8c0c1ae..536eacaeb710 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -157,19 +157,16 @@ static void sprintf_disk_child(char *buf, struct disk_child *dc)
                dc_size(dc));
 }
-static char *is_there_reiserfs_struct(char *fmt, int *what, int *skip)
+static char *is_there_reiserfs_struct(char *fmt, int *what)
 {
        char *k = fmt;
-        *skip = 0;
        while ((k = strchr(k, '%')) != NULL) {
                if (k[1] == 'k' || k[1] == 'K' || k[1] == 'h' || k[1] == 't' ||
                    k[1] == 'z' || k[1] == 'b' || k[1] == 'y' || k[1] == 'a') {
                        *what = k[1];
                        break;
                }
-                (*skip)++;
                k++;
        }
        return k;
@@ -181,30 +178,29 @@ static char *is_there_reiserfs_struct(char *fmt, int *what, int *skip)
   appropriative printk. With this reiserfs_warning you can use format
   specification for complex structures like you used to do with
   printfs for integers, doubles and pointers. For instance, to print
-   out key structure you have to write just: 
+   out key structure you have to write just:
-   reiserfs_warning ("bad key %k", key); 
+   reiserfs_warning ("bad key %k", key);
-   instead of 
+   instead of
-   printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid, 
+   printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid,
-           key->k_offset, key->k_uniqueness); 
+           key->k_offset, key->k_uniqueness);
 */
+static DEFINE_SPINLOCK(error_lock);
 static void prepare_error_buf(const char *fmt, va_list args)
 {
        char *fmt1 = fmt_buf;
        char *k;
        char *p = error_buf;
-        int i, j, what, skip;
+        int what;
+        spin_lock(&error_lock);
        strcpy(fmt1, fmt);
-        while ((k = is_there_reiserfs_struct(fmt1, &what, &skip)) != NULL) {
+        while ((k = is_there_reiserfs_struct(fmt1, &what)) != NULL) {
                *k = 0;
                p += vsprintf(p, fmt1, args);
-                for (i = 0; i < skip; i++)
-                        j = va_arg(args, int);
                switch (what) {
                case 'k':
                        sprintf_le_key(p, va_arg(args, struct reiserfs_key *));
@@ -243,15 +239,16 @@ static void prepare_error_buf(const char *fmt, va_list args)
                fmt1 = k + 2;
        }
        vsprintf(p, fmt1, args);
+        spin_unlock(&error_lock);
 }
 /* in addition to usual conversion specifiers this accepts reiserfs
-   specific conversion specifiers: 
+   specific conversion specifiers:
-   %k to print little endian key, 
+   %k to print little endian key,
-   %K to print cpu key, 
+   %K to print cpu key,
   %h to print item_head,
-   %t to print directory entry 
+   %t to print directory entry
   %z to print block head (arg must be struct buffer_head *
   %b to print buffer_head
 */
@@ -264,14 +261,17 @@ static void prepare_error_buf(const char *fmt, va_list args)
    va_end( args );\
 }
-void reiserfs_warning(struct super_block *sb, const char *fmt, ...)
+void __reiserfs_warning(struct super_block *sb, const char *id,
+                         const char *function, const char *fmt, ...)
 {
        do_reiserfs_warning(fmt);
        if (sb)
-                printk(KERN_WARNING "ReiserFS: %s: warning: %s\n",
+                printk(KERN_WARNING "REISERFS warning (device %s): %s%s%s: "
-                       reiserfs_bdevname(sb), error_buf);
+                       "%s\n", sb->s_id, id ? id : "", id ? " " : "",
+                       function, error_buf);
        else
-                printk(KERN_WARNING "ReiserFS: warning: %s\n", error_buf);
+                printk(KERN_WARNING "REISERFS warning: %s%s%s: %s\n",
+                       id ? id : "", id ? " " : "", function, error_buf);
 }
 /* No newline.. reiserfs_info calls can be followed by printk's */
@@ -279,10 +279,10 @@ void reiserfs_info(struct super_block *sb, const char *fmt, ...)
 {
        do_reiserfs_warning(fmt);
        if (sb)
-                printk(KERN_NOTICE "ReiserFS: %s: %s",
+                printk(KERN_NOTICE "REISERFS (device %s): %s",
-                       reiserfs_bdevname(sb), error_buf);
+                       sb->s_id, error_buf);
        else
-                printk(KERN_NOTICE "ReiserFS: %s", error_buf);
+                printk(KERN_NOTICE "REISERFS %s:", error_buf);
 }
 /* No newline.. reiserfs_printk calls can be followed by printk's */
@@ -297,10 +297,10 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
 #ifdef CONFIG_REISERFS_CHECK
        do_reiserfs_warning(fmt);
        if (s)
-                printk(KERN_DEBUG "ReiserFS: %s: %s\n",
+                printk(KERN_DEBUG "REISERFS debug (device %s): %s\n",
-                       reiserfs_bdevname(s), error_buf);
+                       s->s_id, error_buf);
        else
-                printk(KERN_DEBUG "ReiserFS: %s\n", error_buf);
+                printk(KERN_DEBUG "REISERFS debug: %s\n", error_buf);
 #endif
 }
@@ -314,17 +314,17 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
    maintainer-errorid.  Don't bother with reusing errorids, there are
    lots of numbers out there.
-    Example: 
+    Example:
-    
    reiserfs_panic(
        p_sb, "reiser-29: reiserfs_new_blocknrs: "
        "one of search_start or rn(%d) is equal to MAX_B_NUM,"
-        "which means that we are optimizing location based on the bogus location of a temp buffer (%p).", 
+        "which means that we are optimizing location based on the bogus location of a temp buffer (%p).",
        rn, bh
    );
    Regular panic()s sometimes clear the screen before the message can
-    be read, thus the need for the while loop.  
+    be read, thus the need for the while loop.
    Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it
    pointless complexity):
@@ -353,14 +353,46 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
 extern struct tree_balance *cur_tb;
 #endif
-void reiserfs_panic(struct super_block *sb, const char *fmt, ...)
+void __reiserfs_panic(struct super_block *sb, const char *id,
+                      const char *function, const char *fmt, ...)
 {
        do_reiserfs_warning(fmt);
+#ifdef CONFIG_REISERFS_CHECK
        dump_stack();
+#endif
+        if (sb)
+                panic(KERN_WARNING "REISERFS panic (device %s): %s%s%s: %s\n",
+                      sb->s_id, id ? id : "", id ? " " : "",
+                      function, error_buf);
+        else
+                panic(KERN_WARNING "REISERFS panic: %s%s%s: %s\n",
+                      id ? id : "", id ? " " : "", function, error_buf);
+}
+void __reiserfs_error(struct super_block *sb, const char *id,
+                      const char *function, const char *fmt, ...)
+{
+        do_reiserfs_warning(fmt);
-        panic(KERN_EMERG "REISERFS: panic (device %s): %s\n",
+        BUG_ON(sb == NULL);
-               reiserfs_bdevname(sb), error_buf);
+        if (reiserfs_error_panic(sb))
+                __reiserfs_panic(sb, id, function, error_buf);
+        if (id && id[0])
+                printk(KERN_CRIT "REISERFS error (device %s): %s %s: %s\n",
+                       sb->s_id, id, function, error_buf);
+        else
+                printk(KERN_CRIT "REISERFS error (device %s): %s: %s\n",
+                       sb->s_id, function, error_buf);
+        if (sb->s_flags & MS_RDONLY)
+                return;
+        reiserfs_info(sb, "Remounting filesystem read-only\n");
+        sb->s_flags |= MS_RDONLY;
+        reiserfs_abort_journal(sb, -EIO);
 }
 void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...)
@@ -368,18 +400,18 @@ void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...)
        do_reiserfs_warning(fmt);
        if (reiserfs_error_panic(sb)) {
-                panic(KERN_CRIT "REISERFS: panic (device %s): %s\n",
+                panic(KERN_CRIT "REISERFS panic (device %s): %s\n", sb->s_id,
-                      reiserfs_bdevname(sb), error_buf);
+                      error_buf);
        }
-        if (sb->s_flags & MS_RDONLY)
+        if (reiserfs_is_journal_aborted(SB_JOURNAL(sb)))
                return;
-        printk(KERN_CRIT "REISERFS: abort (device %s): %s\n",
+        printk(KERN_CRIT "REISERFS abort (device %s): %s\n", sb->s_id,
-               reiserfs_bdevname(sb), error_buf);
+               error_buf);
        sb->s_flags |= MS_RDONLY;
-        reiserfs_journal_abort(sb, errno);
+        reiserfs_abort_journal(sb, errno);
 }
 /* this prints internal nodes (4 keys/items in line) (dc_number,
@@ -681,12 +713,10 @@ static void check_leaf_block_head(struct buffer_head *bh)
        blkh = B_BLK_HEAD(bh);
        nr = blkh_nr_item(blkh);
        if (nr > (bh->b_size - BLKH_SIZE) / IH_SIZE)
-                reiserfs_panic(NULL,
+                reiserfs_panic(NULL, "vs-6010", "invalid item number %z",
-                               "vs-6010: check_leaf_block_head: invalid item number %z",
                               bh);
        if (blkh_free_space(blkh) > bh->b_size - BLKH_SIZE - IH_SIZE * nr)
-                reiserfs_panic(NULL,
+                reiserfs_panic(NULL, "vs-6020", "invalid free space %z",
-                               "vs-6020: check_leaf_block_head: invalid free space %z",
                               bh);
 }
@@ -697,21 +727,15 @@ static void check_internal_block_head(struct buffer_head *bh)
        blkh = B_BLK_HEAD(bh);
        if (!(B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL && B_LEVEL(bh) <= MAX_HEIGHT))
-                reiserfs_panic(NULL,
+                reiserfs_panic(NULL, "vs-6025", "invalid level %z", bh);
-                               "vs-6025: check_internal_block_head: invalid level %z",
-                               bh);
        if (B_NR_ITEMS(bh) > (bh->b_size - BLKH_SIZE) / IH_SIZE)
-                reiserfs_panic(NULL,
+                reiserfs_panic(NULL, "vs-6030", "invalid item number %z", bh);
-                               "vs-6030: check_internal_block_head: invalid item number %z",
-                               bh);
        if (B_FREE_SPACE(bh) !=
            bh->b_size - BLKH_SIZE - KEY_SIZE * B_NR_ITEMS(bh) -
            DC_SIZE * (B_NR_ITEMS(bh) + 1))
-                reiserfs_panic(NULL,
+                reiserfs_panic(NULL, "vs-6040", "invalid free space %z", bh);
-                               "vs-6040: check_internal_block_head: invalid free space %z",
-                               bh);
 }
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 37173fa07d15..9229e5514a4e 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -321,7 +321,7 @@ static int show_journal(struct seq_file *m, struct super_block *sb)
                   /* incore fields */
                   "j_1st_reserved_block: \t%i\n"
                   "j_state: \t%li\n"
-                   "j_trans_id: \t%lu\n"
+                   "j_trans_id: \t%u\n"
                   "j_mount_id: \t%lu\n"
                   "j_start: \t%lu\n"
                   "j_len: \t%lu\n"
@@ -329,7 +329,7 @@ static int show_journal(struct seq_file *m, struct super_block *sb)
                   "j_wcount: \t%i\n"
                   "j_bcount: \t%lu\n"
                   "j_first_unflushed_offset: \t%lu\n"
-                   "j_last_flush_trans_id: \t%lu\n"
+                   "j_last_flush_trans_id: \t%u\n"
                   "j_trans_start_time: \t%li\n"
                   "j_list_bitmap_index: \t%i\n"
                   "j_must_wait: \t%i\n"
@@ -492,7 +492,6 @@ int reiserfs_proc_info_init(struct super_block *sb)
        spin_lock_init(&__PINFO(sb).lock);
        REISERFS_SB(sb)->procdir = proc_mkdir(b, proc_info_root);
        if (REISERFS_SB(sb)->procdir) {
-                REISERFS_SB(sb)->procdir->owner = THIS_MODULE;
                REISERFS_SB(sb)->procdir->data = sb;
                add_file(sb, "version", show_version);
                add_file(sb, "super", show_super);
@@ -503,7 +502,7 @@ int reiserfs_proc_info_init(struct super_block *sb)
                add_file(sb, "journal", show_journal);
                return 0;
        }
-        reiserfs_warning(sb, "reiserfs: cannot create /proc/%s/%s",
+        reiserfs_warning(sb, "cannot create /proc/%s/%s",
                         proc_info_root_name, b);
        return 1;
 }
@@ -556,11 +555,8 @@ int reiserfs_proc_info_global_init(void)
 {
        if (proc_info_root == NULL) {
                proc_info_root = proc_mkdir(proc_info_root_name, NULL);
-                if (proc_info_root) {
+                if (!proc_info_root) {
-                        proc_info_root->owner = THIS_MODULE;
+                        reiserfs_warning(NULL, "cannot create /proc/%s",
-                } else {
-                        reiserfs_warning(NULL,
-                                         "reiserfs: cannot create /proc/%s",
                                         proc_info_root_name);
                        return 1;
                }
@@ -634,7 +630,7 @@ int reiserfs_global_version_in_proc(char *buffer, char **start,
 *
 */
-/* 
+/*
 * Make Linus happy.
 * Local variables:
 * c-indentation-style: "K&R"
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index f71c3948edef..238e9d9b31e0 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -1,8 +1,8 @@
-/* 
+/*
 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
 */
-/* 
+/*
 * Written by Alexander Zarochentcev.
 *
 * The kernel part of the (on-line) reiserfs resizer.
@@ -101,7 +101,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
                        memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size);
                        /* just in case vfree schedules on us, copy the new
-                         ** pointer into the journal struct before freeing the 
+                         ** pointer into the journal struct before freeing the
                         ** old one
                         */
                        node_tmp = jb->bitmaps;
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 73aaa33f6735..d036ee5b1c81 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -23,7 +23,6 @@
 * get_rkey
 * key_in_buffer
 * decrement_bcount
- * decrement_counters_in_path
 * reiserfs_check_path
 * pathrelse_and_restore
 * pathrelse
@@ -57,28 +56,28 @@
 #include <linux/quotaops.h>
 /* Does the buffer contain a disk block which is in the tree. */
-inline int B_IS_IN_TREE(const struct buffer_head *p_s_bh)
+inline int B_IS_IN_TREE(const struct buffer_head *bh)
 {
-        RFALSE(B_LEVEL(p_s_bh) > MAX_HEIGHT,
+        RFALSE(B_LEVEL(bh) > MAX_HEIGHT,
-               "PAP-1010: block (%b) has too big level (%z)", p_s_bh, p_s_bh);
+               "PAP-1010: block (%b) has too big level (%z)", bh, bh);
-        return (B_LEVEL(p_s_bh) != FREE_LEVEL);
+        return (B_LEVEL(bh) != FREE_LEVEL);
 }
 //
 // to gets item head in le form
 //
-inline void copy_item_head(struct item_head *p_v_to,
+inline void copy_item_head(struct item_head *to,
-                           const struct item_head *p_v_from)
+                           const struct item_head *from)
 {
-        memcpy(p_v_to, p_v_from, IH_SIZE);
+        memcpy(to, from, IH_SIZE);
 }
 /* k1 is pointer to on-disk structure which is stored in little-endian
   form. k2 is pointer to cpu variable. For key of items of the same
   object this returns 0.
-   Returns: -1 if key1 < key2 
+   Returns: -1 if key1 < key2
   0 if key1 == key2
   1 if key1 > key2 */
 inline int comp_short_keys(const struct reiserfs_key *le_key,
@@ -136,15 +135,15 @@ static inline int comp_keys(const struct reiserfs_key *le_key,
 inline int comp_short_le_keys(const struct reiserfs_key *key1,
                              const struct reiserfs_key *key2)
 {
-        __u32 *p_s_1_u32, *p_s_2_u32;
+        __u32 *k1_u32, *k2_u32;
-        int n_key_length = REISERFS_SHORT_KEY_LEN;
+        int key_length = REISERFS_SHORT_KEY_LEN;
-        p_s_1_u32 = (__u32 *) key1;
+        k1_u32 = (__u32 *) key1;
-        p_s_2_u32 = (__u32 *) key2;
+        k2_u32 = (__u32 *) key2;
-        for (; n_key_length--; ++p_s_1_u32, ++p_s_2_u32) {
+        for (; key_length--; ++k1_u32, ++k2_u32) {
-                if (le32_to_cpu(*p_s_1_u32) < le32_to_cpu(*p_s_2_u32))
+                if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32))
                        return -1;
-                if (le32_to_cpu(*p_s_1_u32) > le32_to_cpu(*p_s_2_u32))
+                if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32))
                        return 1;
        }
        return 0;
@@ -175,52 +174,51 @@ inline int comp_le_keys(const struct reiserfs_key *k1,
 *  Binary search toolkit function                                        *
 *  Search for an item in the array by the item key                       *
 *  Returns:    1 if found,  0 if not found;                              *
- *        *p_n_pos = number of the searched element if found, else the    *
+ *        *pos = number of the searched element if found, else the        *
- *        number of the first element that is larger than p_v_key.        *
+ *        number of the first element that is larger than key.            *
 **************************************************************************/
-/* For those not familiar with binary search: n_lbound is the leftmost item that it
+/* For those not familiar with binary search: lbound is the leftmost item that it
- could be, n_rbound the rightmost item that it could be.  We examine the item
+ could be, rbound the rightmost item that it could be.  We examine the item
- halfway between n_lbound and n_rbound, and that tells us either that we can increase
+ halfway between lbound and rbound, and that tells us either that we can increase
- n_lbound, or decrease n_rbound, or that we have found it, or if n_lbound <= n_rbound that
+ lbound, or decrease rbound, or that we have found it, or if lbound <= rbound that
 there are no possible items, and we have not found it. With each examination we
 cut the number of possible items it could be by one more than half rounded down,
 or we find it. */
-static inline int bin_search(const void *p_v_key,       /* Key to search for.                   */
+static inline int bin_search(const void *key,   /* Key to search for. */
-                             const void *p_v_base,      /* First item in the array.             */
+                             const void *base,  /* First item in the array. */
-                             int p_n_num,       /* Number of items in the array.        */
+                             int num,   /* Number of items in the array. */
-                             int p_n_width,     /* Item size in the array.
+                             int width, /* Item size in the array.
-                                                   searched. Lest the reader be
+                                           searched. Lest the reader be
-                                                   confused, note that this is crafted
+                                           confused, note that this is crafted
-                                                   as a general function, and when it
+                                           as a general function, and when it
-                                                   is applied specifically to the array
+                                           is applied specifically to the array
-                                                   of item headers in a node, p_n_width
+                                           of item headers in a node, width
-                                                   is actually the item header size not
+                                           is actually the item header size not
-                                                   the item size.                      */
+                                           the item size. */
-                             int *p_n_pos       /* Number of the searched for element. */
+                             int *pos /* Number of the searched for element. */
    )
 {
-        int n_rbound, n_lbound, n_j;
+        int rbound, lbound, j;
-        for (n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0)) / 2;
+        for (j = ((rbound = num - 1) + (lbound = 0)) / 2;
-             n_lbound <= n_rbound; n_j = (n_rbound + n_lbound) / 2)
+             lbound <= rbound; j = (rbound + lbound) / 2)
                switch (comp_keys
-                        ((struct reiserfs_key *)((char *)p_v_base +
+                        ((struct reiserfs_key *)((char *)base + j * width),
-                                                 n_j * p_n_width),
+                         (struct cpu_key *)key)) {
-                         (struct cpu_key *)p_v_key)) {
                case -1:
-                        n_lbound = n_j + 1;
+                        lbound = j + 1;
                        continue;
                case 1:
-                        n_rbound = n_j - 1;
+                        rbound = j - 1;
                        continue;
                case 0:
-                        *p_n_pos = n_j;
+                        *pos = j;
                        return ITEM_FOUND;      /* Key found in the array.  */
                }
        /* bin_search did not find given key, it returns position of key,
           that is minimal and greater than the given one. */
-        *p_n_pos = n_lbound;
+        *pos = lbound;
        return ITEM_NOT_FOUND;
 }
@@ -243,90 +241,88 @@ static const struct reiserfs_key MAX_KEY = {
   of the path, and going upwards.  We must check the path's validity at each step.  If the key is not in
   the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
   case we return a special key, either MIN_KEY or MAX_KEY. */
-static inline const struct reiserfs_key *get_lkey(const struct treepath
+static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path,
-                                                  *p_s_chk_path,
+                                                  const struct super_block *sb)
-                                                  const struct super_block
-                                                  *p_s_sb)
 {
-        int n_position, n_path_offset = p_s_chk_path->path_length;
+        int position, path_offset = chk_path->path_length;
-        struct buffer_head *p_s_parent;
+        struct buffer_head *parent;
-        RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
+        RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
               "PAP-5010: invalid offset in the path");
        /* While not higher in path than first element. */
-        while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
+        while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
                RFALSE(!buffer_uptodate
-                       (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
+                       (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
                       "PAP-5020: parent is not uptodate");
                /* Parent at the path is not in the tree now. */
                if (!B_IS_IN_TREE
-                    (p_s_parent =
+                    (parent =
-                     PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)))
+                     PATH_OFFSET_PBUFFER(chk_path, path_offset)))
                        return &MAX_KEY;
                /* Check whether position in the parent is correct. */
-                if ((n_position =
+                if ((position =
-                     PATH_OFFSET_POSITION(p_s_chk_path,
+                     PATH_OFFSET_POSITION(chk_path,
-                                          n_path_offset)) >
+                                          path_offset)) >
-                    B_NR_ITEMS(p_s_parent))
+                    B_NR_ITEMS(parent))
                        return &MAX_KEY;
                /* Check whether parent at the path really points to the child. */
-                if (B_N_CHILD_NUM(p_s_parent, n_position) !=
+                if (B_N_CHILD_NUM(parent, position) !=
-                    PATH_OFFSET_PBUFFER(p_s_chk_path,
+                    PATH_OFFSET_PBUFFER(chk_path,
-                                        n_path_offset + 1)->b_blocknr)
+                                        path_offset + 1)->b_blocknr)
                        return &MAX_KEY;
                /* Return delimiting key if position in the parent is not equal to zero. */
-                if (n_position)
+                if (position)
-                        return B_N_PDELIM_KEY(p_s_parent, n_position - 1);
+                        return B_N_PDELIM_KEY(parent, position - 1);
        }
        /* Return MIN_KEY if we are in the root of the buffer tree. */
-        if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->
+        if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
-            b_blocknr == SB_ROOT_BLOCK(p_s_sb))
+            b_blocknr == SB_ROOT_BLOCK(sb))
                return &MIN_KEY;
        return &MAX_KEY;
 }
 /* Get delimiting key of the buffer at the path and its right neighbor. */
-inline const struct reiserfs_key *get_rkey(const struct treepath *p_s_chk_path,
+inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
-                                           const struct super_block *p_s_sb)
+                                           const struct super_block *sb)
 {
-        int n_position, n_path_offset = p_s_chk_path->path_length;
+        int position, path_offset = chk_path->path_length;
-        struct buffer_head *p_s_parent;
+        struct buffer_head *parent;
-        RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
+        RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
               "PAP-5030: invalid offset in the path");
-        while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
+        while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
                RFALSE(!buffer_uptodate
-                       (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
+                       (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
                       "PAP-5040: parent is not uptodate");
                /* Parent at the path is not in the tree now. */
                if (!B_IS_IN_TREE
-                    (p_s_parent =
+                    (parent =
-                     PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)))
+                     PATH_OFFSET_PBUFFER(chk_path, path_offset)))
                        return &MIN_KEY;
                /* Check whether position in the parent is correct. */
-                if ((n_position =
+                if ((position =
-                     PATH_OFFSET_POSITION(p_s_chk_path,
+                     PATH_OFFSET_POSITION(chk_path,
-                                          n_path_offset)) >
+                                          path_offset)) >
-                    B_NR_ITEMS(p_s_parent))
+                    B_NR_ITEMS(parent))
                        return &MIN_KEY;
                /* Check whether parent at the path really points to the child. */
-                if (B_N_CHILD_NUM(p_s_parent, n_position) !=
+                if (B_N_CHILD_NUM(parent, position) !=
-                    PATH_OFFSET_PBUFFER(p_s_chk_path,
+                    PATH_OFFSET_PBUFFER(chk_path,
-                                        n_path_offset + 1)->b_blocknr)
+                                        path_offset + 1)->b_blocknr)
                        return &MIN_KEY;
                /* Return delimiting key if position in the parent is not the last one. */
-                if (n_position != B_NR_ITEMS(p_s_parent))
+                if (position != B_NR_ITEMS(parent))
-                        return B_N_PDELIM_KEY(p_s_parent, n_position);
+                        return B_N_PDELIM_KEY(parent, position);
        }
        /* Return MAX_KEY if we are in the root of the buffer tree. */
-        if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->
+        if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
-            b_blocknr == SB_ROOT_BLOCK(p_s_sb))
+            b_blocknr == SB_ROOT_BLOCK(sb))
                return &MAX_KEY;
        return &MIN_KEY;
 }
@@ -336,60 +332,29 @@ inline const struct reiserfs_key *get_rkey(const struct treepath *p_s_chk_path,
   the path.  These delimiting keys are stored at least one level above that buffer in the tree. If the
   buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
   this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
-static inline int key_in_buffer(struct treepath *p_s_chk_path,  /* Path which should be checked.  */
+static inline int key_in_buffer(struct treepath *chk_path,      /* Path which should be checked.  */
-                                const struct cpu_key *p_s_key,  /* Key which should be checked.   */
+                                const struct cpu_key *key,      /* Key which should be checked.   */
-                                struct super_block *p_s_sb      /* Super block pointer.           */
+                                struct super_block *sb
    )
 {
-        RFALSE(!p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
+        RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
-               || p_s_chk_path->path_length > MAX_HEIGHT,
+               || chk_path->path_length > MAX_HEIGHT,
               "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
-               p_s_key, p_s_chk_path->path_length);
+               key, chk_path->path_length);
-        RFALSE(!PATH_PLAST_BUFFER(p_s_chk_path)->b_bdev,
+        RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev,
               "PAP-5060: device must not be NODEV");
-        if (comp_keys(get_lkey(p_s_chk_path, p_s_sb), p_s_key) == 1)
+        if (comp_keys(get_lkey(chk_path, sb), key) == 1)
                /* left delimiting key is bigger, that the key we look for */
                return 0;
-        //  if ( comp_keys(p_s_key, get_rkey(p_s_chk_path, p_s_sb)) != -1 )
+        /*  if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */
-        if (comp_keys(get_rkey(p_s_chk_path, p_s_sb), p_s_key) != 1)
+        if (comp_keys(get_rkey(chk_path, sb), key) != 1)
-                /* p_s_key must be less than right delimitiing key */
+                /* key must be less than right delimitiing key */
                return 0;
        return 1;
 }
-inline void decrement_bcount(struct buffer_head *p_s_bh)
-{
-        if (p_s_bh) {
-                if (atomic_read(&(p_s_bh->b_count))) {
-                        put_bh(p_s_bh);
-                        return;
-                }
-                reiserfs_panic(NULL,
-                               "PAP-5070: decrement_bcount: trying to free free buffer %b",
-                               p_s_bh);
-        }
-}
-/* Decrement b_count field of the all buffers in the path. */
-void decrement_counters_in_path(struct treepath *p_s_search_path)
-{
-        int n_path_offset = p_s_search_path->path_length;
-        RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET ||
-               n_path_offset > EXTENDED_MAX_HEIGHT - 1,
-               "PAP-5080: invalid path offset of %d", n_path_offset);
-        while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
-                struct buffer_head *bh;
-                bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--);
-                decrement_bcount(bh);
-        }
-        p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
-}
 int reiserfs_check_path(struct treepath *p)
 {
        RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
@@ -397,40 +362,38 @@ int reiserfs_check_path(struct treepath *p)
        return 0;
 }
-/* Release all buffers in the path. Restore dirty bits clean
+/* Drop the reference to each buffer in a path and restore
-** when preparing the buffer for the log
+ * dirty bits clean when preparing the buffer for the log.
-**
+ * This version should only be called from fix_nodes() */
-** only called from fix_nodes()
+void pathrelse_and_restore(struct super_block *sb,
-*/
+                           struct treepath *search_path)
-void pathrelse_and_restore(struct super_block *s, struct treepath *p_s_search_path)
 {
-        int n_path_offset = p_s_search_path->path_length;
+        int path_offset = search_path->path_length;
-        RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
+        RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
               "clm-4000: invalid path offset");
-        while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
+        while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
-                reiserfs_restore_prepared_buffer(s,
+                struct buffer_head *bh;
-                                                 PATH_OFFSET_PBUFFER
+                bh = PATH_OFFSET_PBUFFER(search_path, path_offset--);
-                                                 (p_s_search_path,
+                reiserfs_restore_prepared_buffer(sb, bh);
-                                                  n_path_offset));
+                brelse(bh);
-                brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
        }
-        p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
+        search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
 }
-/* Release all buffers in the path. */
+/* Drop the reference to each buffer in a path */
-void pathrelse(struct treepath *p_s_search_path)
+void pathrelse(struct treepath *search_path)
 {
-        int n_path_offset = p_s_search_path->path_length;
+        int path_offset = search_path->path_length;
-        RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
+        RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
               "PAP-5090: invalid path offset");
-        while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
+        while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
-                brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
+                brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--));
-        p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
+        search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
 }
 static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
@@ -444,23 +407,24 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
        blkh = (struct block_head *)buf;
        if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
-                reiserfs_warning(NULL,
+                reiserfs_warning(NULL, "reiserfs-5080",
-                                 "is_leaf: this should be caught earlier");
+                                 "this should be caught earlier");
                return 0;
        }
        nr = blkh_nr_item(blkh);
        if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
                /* item number is too big or too small */
-                reiserfs_warning(NULL, "is_leaf: nr_item seems wrong: %z", bh);
+                reiserfs_warning(NULL, "reiserfs-5081",
+                                 "nr_item seems wrong: %z", bh);
                return 0;
        }
        ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
        used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
        if (used_space != blocksize - blkh_free_space(blkh)) {
                /* free space does not match to calculated amount of use space */
-                reiserfs_warning(NULL, "is_leaf: free space seems wrong: %z",
+                reiserfs_warning(NULL, "reiserfs-5082",
-                                 bh);
+                                 "free space seems wrong: %z", bh);
                return 0;
        }
        // FIXME: it is_leaf will hit performance too much - we may have
@@ -471,29 +435,29 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
        prev_location = blocksize;
        for (i = 0; i < nr; i++, ih++) {
                if (le_ih_k_type(ih) == TYPE_ANY) {
-                        reiserfs_warning(NULL,
+                        reiserfs_warning(NULL, "reiserfs-5083",
-                                         "is_leaf: wrong item type for item %h",
+                                         "wrong item type for item %h",
                                         ih);
                        return 0;
                }
                if (ih_location(ih) >= blocksize
                    || ih_location(ih) < IH_SIZE * nr) {
-                        reiserfs_warning(NULL,
+                        reiserfs_warning(NULL, "reiserfs-5084",
-                                         "is_leaf: item location seems wrong: %h",
+                                         "item location seems wrong: %h",
                                         ih);
                        return 0;
                }
                if (ih_item_len(ih) < 1
                    || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
-                        reiserfs_warning(NULL,
+                        reiserfs_warning(NULL, "reiserfs-5085",
-                                         "is_leaf: item length seems wrong: %h",
+                                         "item length seems wrong: %h",
                                         ih);
                        return 0;
                }
                if (prev_location - ih_location(ih) != ih_item_len(ih)) {
-                        reiserfs_warning(NULL,
+                        reiserfs_warning(NULL, "reiserfs-5086",
-                                         "is_leaf: item location seems wrong (second one): %h",
+                                         "item location seems wrong "
-                                         ih);
+                                         "(second one): %h", ih);
                        return 0;
                }
                prev_location = ih_location(ih);
@@ -514,24 +478,23 @@ static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
        nr = blkh_level(blkh);
        if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
                /* this level is not possible for internal nodes */
-                reiserfs_warning(NULL,
+                reiserfs_warning(NULL, "reiserfs-5087",
-                                 "is_internal: this should be caught earlier");
+                                 "this should be caught earlier");
                return 0;
        }
        nr = blkh_nr_item(blkh);
        if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
                /* for internal which is not root we might check min number of keys */
-                reiserfs_warning(NULL,
+                reiserfs_warning(NULL, "reiserfs-5088",
-                                 "is_internal: number of key seems wrong: %z",
+                                 "number of key seems wrong: %z", bh);
-                                 bh);
                return 0;
        }
        used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
        if (used_space != blocksize - blkh_free_space(blkh)) {
-                reiserfs_warning(NULL,
+                reiserfs_warning(NULL, "reiserfs-5089",
-                                 "is_internal: free space seems wrong: %z", bh);
+                                 "free space seems wrong: %z", bh);
                return 0;
        }
        // one may imagine much more checks
@@ -543,8 +506,8 @@ static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
 static int is_tree_node(struct buffer_head *bh, int level)
 {
        if (B_LEVEL(bh) != level) {
-                reiserfs_warning(NULL,
+                reiserfs_warning(NULL, "reiserfs-5090", "node level %d does "
-                                 "is_tree_node: node level %d does not match to the expected one %d",
+                                 "not match to the expected one %d",
                                 B_LEVEL(bh), level);
                return 0;
        }
@@ -580,10 +543,10 @@ static void search_by_key_reada(struct super_block *s,
 /**************************************************************************
 * Algorithm   SearchByKey                                                *
 *             look for item in the Disk S+Tree by its key                *
- * Input:  p_s_sb   -  super block                                        *
+ * Input:  sb   -  super block                                            *
- *         p_s_key  - pointer to the key to search                        *
+ *         key  - pointer to the key to search                            *
 * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR                         *
- *         p_s_search_path - path from the root to the needed leaf        *
+ *         search_path - path from the root to the needed leaf            *
 **************************************************************************/
 /* This function fills up the path from the root to the leaf as it
@@ -600,22 +563,22 @@ static void search_by_key_reada(struct super_block *s,
   correctness of the top of the path but need not be checked for the
   correctness of the bottom of the path */
 /* The function is NOT SCHEDULE-SAFE! */
-int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key,    /* Key to search. */
+int search_by_key(struct super_block *sb, const struct cpu_key *key,    /* Key to search. */
-                  struct treepath *p_s_search_path,/* This structure was
+                  struct treepath *search_path,/* This structure was
                                                   allocated and initialized
                                                   by the calling
                                                   function. It is filled up
                                                   by this function.  */
-                  int n_stop_level      /* How far down the tree to search. To
+                  int stop_level        /* How far down the tree to search. To
                                           stop at leaf level - set to
                                           DISK_LEAF_NODE_LEVEL */
    )
 {
-        b_blocknr_t n_block_number;
+        b_blocknr_t block_number;
        int expected_level;
-        struct buffer_head *p_s_bh;
+        struct buffer_head *bh;
-        struct path_element *p_s_last_element;
+        struct path_element *last_element;
-        int n_node_level, n_retval;
+        int node_level, retval;
        int right_neighbor_of_leaf_node;
        int fs_gen;
        struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
@@ -623,80 +586,79 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key,	/*
        int reada_count = 0;
 #ifdef CONFIG_REISERFS_CHECK
-        int n_repeat_counter = 0;
+        int repeat_counter = 0;
 #endif
-        PROC_INFO_INC(p_s_sb, search_by_key);
+        PROC_INFO_INC(sb, search_by_key);
        /* As we add each node to a path we increase its count.  This means that
           we must be careful to release all nodes in a path before we either
           discard the path struct or re-use the path struct, as we do here. */
-        decrement_counters_in_path(p_s_search_path);
+        pathrelse(search_path);
        right_neighbor_of_leaf_node = 0;
        /* With each iteration of this loop we search through the items in the
           current node, and calculate the next current node(next path element)
           for the next iteration of this loop.. */
-        n_block_number = SB_ROOT_BLOCK(p_s_sb);
+        block_number = SB_ROOT_BLOCK(sb);
        expected_level = -1;
        while (1) {
 #ifdef CONFIG_REISERFS_CHECK
-                if (!(++n_repeat_counter % 50000))
+                if (!(++repeat_counter % 50000))
-                        reiserfs_warning(p_s_sb, "PAP-5100: search_by_key: %s:"
+                        reiserfs_warning(sb, "PAP-5100",
-                                         "there were %d iterations of while loop "
+                                         "%s: there were %d iterations of "
-                                         "looking for key %K",
+                                         "while loop looking for key %K",
-                                         current->comm, n_repeat_counter,
+                                         current->comm, repeat_counter,
-                                         p_s_key);
+                                         key);
 #endif
                /* prep path to have another element added to it. */
-                p_s_last_element =
+                last_element =
-                    PATH_OFFSET_PELEMENT(p_s_search_path,
+                    PATH_OFFSET_PELEMENT(search_path,
-                                         ++p_s_search_path->path_length);
+                                         ++search_path->path_length);
-                fs_gen = get_generation(p_s_sb);
+                fs_gen = get_generation(sb);
                /* Read the next tree node, and set the last element in the path to
                   have a pointer to it. */
-                if ((p_s_bh = p_s_last_element->pe_buffer =
+                if ((bh = last_element->pe_buffer =
-                     sb_getblk(p_s_sb, n_block_number))) {
+                     sb_getblk(sb, block_number))) {
-                        if (!buffer_uptodate(p_s_bh) && reada_count > 1) {
+                        if (!buffer_uptodate(bh) && reada_count > 1)
-                                search_by_key_reada(p_s_sb, reada_bh,
+                                search_by_key_reada(sb, reada_bh,
                                                    reada_blocks, reada_count);
-                        }
+                        ll_rw_block(READ, 1, &bh);
-                        ll_rw_block(READ, 1, &p_s_bh);
+                        wait_on_buffer(bh);
-                        wait_on_buffer(p_s_bh);
+                        if (!buffer_uptodate(bh))
-                        if (!buffer_uptodate(p_s_bh))
                                goto io_error;
                } else {
                      io_error:
-                        p_s_search_path->path_length--;
+                        search_path->path_length--;
-                        pathrelse(p_s_search_path);
+                        pathrelse(search_path);
                        return IO_ERROR;
                }
                reada_count = 0;
                if (expected_level == -1)
-                        expected_level = SB_TREE_HEIGHT(p_s_sb);
+                        expected_level = SB_TREE_HEIGHT(sb);
                expected_level--;
                /* It is possible that schedule occurred. We must check whether the key
                   to search is still in the tree rooted from the current buffer. If
                   not then repeat search from the root. */
-                if (fs_changed(fs_gen, p_s_sb) &&
+                if (fs_changed(fs_gen, sb) &&
-                    (!B_IS_IN_TREE(p_s_bh) ||
+                    (!B_IS_IN_TREE(bh) ||
-                     B_LEVEL(p_s_bh) != expected_level ||
+                     B_LEVEL(bh) != expected_level ||
-                     !key_in_buffer(p_s_search_path, p_s_key, p_s_sb))) {
+                     !key_in_buffer(search_path, key, sb))) {
-                        PROC_INFO_INC(p_s_sb, search_by_key_fs_changed);
+                        PROC_INFO_INC(sb, search_by_key_fs_changed);
-                        PROC_INFO_INC(p_s_sb, search_by_key_restarted);
+                        PROC_INFO_INC(sb, search_by_key_restarted);
-                        PROC_INFO_INC(p_s_sb,
+                        PROC_INFO_INC(sb,
                                      sbk_restarted[expected_level - 1]);
-                        decrement_counters_in_path(p_s_search_path);
+                        pathrelse(search_path);
                        /* Get the root block number so that we can repeat the search
                           starting from the root. */
-                        n_block_number = SB_ROOT_BLOCK(p_s_sb);
+                        block_number = SB_ROOT_BLOCK(sb);
                        expected_level = -1;
                        right_neighbor_of_leaf_node = 0;
@@ -704,53 +666,53 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key,	/*
                        continue;
                }
-                /* only check that the key is in the buffer if p_s_key is not
+                /* only check that the key is in the buffer if key is not
                   equal to the MAX_KEY. Latter case is only possible in
                   "finish_unfinished()" processing during mount. */
-                RFALSE(comp_keys(&MAX_KEY, p_s_key) &&
+                RFALSE(comp_keys(&MAX_KEY, key) &&
-                       !key_in_buffer(p_s_search_path, p_s_key, p_s_sb),
+                       !key_in_buffer(search_path, key, sb),
                       "PAP-5130: key is not in the buffer");
 #ifdef CONFIG_REISERFS_CHECK
                if (cur_tb) {
                        print_cur_tb("5140");
-                        reiserfs_panic(p_s_sb,
+                        reiserfs_panic(sb, "PAP-5140",
-                                       "PAP-5140: search_by_key: schedule occurred in do_balance!");
+                                       "schedule occurred in do_balance!");
                }
 #endif
                // make sure, that the node contents look like a node of
                // certain level
-                if (!is_tree_node(p_s_bh, expected_level)) {
+                if (!is_tree_node(bh, expected_level)) {
-                        reiserfs_warning(p_s_sb, "vs-5150: search_by_key: "
+                        reiserfs_error(sb, "vs-5150",
-                                         "invalid format found in block %ld. Fsck?",
+                                       "invalid format found in block %ld. "
-                                         p_s_bh->b_blocknr);
+                                       "Fsck?", bh->b_blocknr);
-                        pathrelse(p_s_search_path);
+                        pathrelse(search_path);
                        return IO_ERROR;
                }
                /* ok, we have acquired next formatted node in the tree */
-                n_node_level = B_LEVEL(p_s_bh);
+                node_level = B_LEVEL(bh);
-                PROC_INFO_BH_STAT(p_s_sb, p_s_bh, n_node_level - 1);
+                PROC_INFO_BH_STAT(sb, bh, node_level - 1);
-                RFALSE(n_node_level < n_stop_level,
+                RFALSE(node_level < stop_level,
                       "vs-5152: tree level (%d) is less than stop level (%d)",
-                       n_node_level, n_stop_level);
+                       node_level, stop_level);
-                n_retval = bin_search(p_s_key, B_N_PITEM_HEAD(p_s_bh, 0),
+                retval = bin_search(key, B_N_PITEM_HEAD(bh, 0),
-                                      B_NR_ITEMS(p_s_bh),
+                                      B_NR_ITEMS(bh),
-                                      (n_node_level ==
+                                      (node_level ==
                                       DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
                                      KEY_SIZE,
-                                      &(p_s_last_element->pe_position));
+                                      &(last_element->pe_position));
-                if (n_node_level == n_stop_level) {
+                if (node_level == stop_level) {
-                        return n_retval;
+                        return retval;
                }
                /* we are not in the stop level */
-                if (n_retval == ITEM_FOUND)
+                if (retval == ITEM_FOUND)
                        /* item has been found, so we choose the pointer which is to the right of the found one */
-                        p_s_last_element->pe_position++;
+                        last_element->pe_position++;
                /* if item was not found we choose the position which is to
                   the left of the found item. This requires no code,
@@ -759,24 +721,24 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key,	/*
                /* So we have chosen a position in the current node which is
                   an internal node.  Now we calculate child block number by
                   position in the node. */
-                n_block_number =
+                block_number =
-                    B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position);
+                    B_N_CHILD_NUM(bh, last_element->pe_position);
                /* if we are going to read leaf nodes, try for read ahead as well */
-                if ((p_s_search_path->reada & PATH_READA) &&
+                if ((search_path->reada & PATH_READA) &&
-                    n_node_level == DISK_LEAF_NODE_LEVEL + 1) {
+                    node_level == DISK_LEAF_NODE_LEVEL + 1) {
-                        int pos = p_s_last_element->pe_position;
+                        int pos = last_element->pe_position;
-                        int limit = B_NR_ITEMS(p_s_bh);
+                        int limit = B_NR_ITEMS(bh);
                        struct reiserfs_key *le_key;
-                        if (p_s_search_path->reada & PATH_READA_BACK)
+                        if (search_path->reada & PATH_READA_BACK)
                                limit = 0;
                        while (reada_count < SEARCH_BY_KEY_READA) {
                                if (pos == limit)
                                        break;
                                reada_blocks[reada_count++] =
-                                    B_N_CHILD_NUM(p_s_bh, pos);
+                                    B_N_CHILD_NUM(bh, pos);
-                                if (p_s_search_path->reada & PATH_READA_BACK)
+                                if (search_path->reada & PATH_READA_BACK)
                                        pos--;
                                else
                                        pos++;
@@ -784,9 +746,9 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key,	/*
                                /*
                                 * check to make sure we're in the same object
                                 */
-                                le_key = B_N_PDELIM_KEY(p_s_bh, pos);
+                                le_key = B_N_PDELIM_KEY(bh, pos);
                                if (le32_to_cpu(le_key->k_objectid) !=
-                                    p_s_key->on_disk_key.k_objectid) {
+                                    key->on_disk_key.k_objectid) {
                                        break;
                                }
                        }
@@ -795,11 +757,11 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key,	/*
 }
 /* Form the path to an item and position in this item which contains
-   file byte defined by p_s_key. If there is no such item
+   file byte defined by key. If there is no such item
   corresponding to the key, we point the path to the item with
-   maximal key less than p_s_key, and *p_n_pos_in_item is set to one
+   maximal key less than key, and *pos_in_item is set to one
   past the last entry/byte in the item.  If searching for entry in a
-   directory item, and it is not found, *p_n_pos_in_item is set to one
+   directory item, and it is not found, *pos_in_item is set to one
   entry more than the entry with maximal key which is less than the
   sought key.
@@ -810,48 +772,48 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key,	/*
   units of directory entries.  */
 /* The function is NOT SCHEDULE-SAFE! */
-int search_for_position_by_key(struct super_block *p_s_sb,      /* Pointer to the super block.          */
+int search_for_position_by_key(struct super_block *sb,  /* Pointer to the super block.          */
                               const struct cpu_key *p_cpu_key, /* Key to search (cpu variable)         */
-                               struct treepath *p_s_search_path /* Filled up by this function.          */
+                               struct treepath *search_path     /* Filled up by this function.          */
    )
 {
        struct item_head *p_le_ih;      /* pointer to on-disk structure */
-        int n_blk_size;
+        int blk_size;
        loff_t item_offset, offset;
        struct reiserfs_dir_entry de;
        int retval;
        /* If searching for directory entry. */
        if (is_direntry_cpu_key(p_cpu_key))
-                return search_by_entry_key(p_s_sb, p_cpu_key, p_s_search_path,
+                return search_by_entry_key(sb, p_cpu_key, search_path,
                                           &de);
        /* If not searching for directory entry. */
        /* If item is found. */
-        retval = search_item(p_s_sb, p_cpu_key, p_s_search_path);
+        retval = search_item(sb, p_cpu_key, search_path);
        if (retval == IO_ERROR)
                return retval;
        if (retval == ITEM_FOUND) {
                RFALSE(!ih_item_len
                       (B_N_PITEM_HEAD
-                        (PATH_PLAST_BUFFER(p_s_search_path),
+                        (PATH_PLAST_BUFFER(search_path),
-                         PATH_LAST_POSITION(p_s_search_path))),
+                         PATH_LAST_POSITION(search_path))),
                       "PAP-5165: item length equals zero");
-                pos_in_item(p_s_search_path) = 0;
+                pos_in_item(search_path) = 0;
                return POSITION_FOUND;
        }
-        RFALSE(!PATH_LAST_POSITION(p_s_search_path),
+        RFALSE(!PATH_LAST_POSITION(search_path),
               "PAP-5170: position equals zero");
        /* Item is not found. Set path to the previous item. */
        p_le_ih =
-            B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path),
+            B_N_PITEM_HEAD(PATH_PLAST_BUFFER(search_path),
-                           --PATH_LAST_POSITION(p_s_search_path));
+                           --PATH_LAST_POSITION(search_path));
-        n_blk_size = p_s_sb->s_blocksize;
+        blk_size = sb->s_blocksize;
        if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) {
                return FILE_NOT_FOUND;
@@ -863,10 +825,10 @@ int search_for_position_by_key(struct super_block *p_s_sb,	/* Pointer to the sup
        /* Needed byte is contained in the item pointed to by the path. */
        if (item_offset <= offset &&
-            item_offset + op_bytes_number(p_le_ih, n_blk_size) > offset) {
+            item_offset + op_bytes_number(p_le_ih, blk_size) > offset) {
-                pos_in_item(p_s_search_path) = offset - item_offset;
+                pos_in_item(search_path) = offset - item_offset;
                if (is_indirect_le_ih(p_le_ih)) {
-                        pos_in_item(p_s_search_path) /= n_blk_size;
+                        pos_in_item(search_path) /= blk_size;
                }
                return POSITION_FOUND;
        }
@@ -874,30 +836,30 @@ int search_for_position_by_key(struct super_block *p_s_sb,	/* Pointer to the sup
        /* Needed byte is not contained in the item pointed to by the
           path. Set pos_in_item out of the item. */
        if (is_indirect_le_ih(p_le_ih))
-                pos_in_item(p_s_search_path) =
+                pos_in_item(search_path) =
                    ih_item_len(p_le_ih) / UNFM_P_SIZE;
        else
-                pos_in_item(p_s_search_path) = ih_item_len(p_le_ih);
+                pos_in_item(search_path) = ih_item_len(p_le_ih);
        return POSITION_NOT_FOUND;
 }
 /* Compare given item and item pointed to by the path. */
-int comp_items(const struct item_head *stored_ih, const struct treepath *p_s_path)
+int comp_items(const struct item_head *stored_ih, const struct treepath *path)
 {
-        struct buffer_head *p_s_bh;
+        struct buffer_head *bh = PATH_PLAST_BUFFER(path);
        struct item_head *ih;
        /* Last buffer at the path is not in the tree. */
-        if (!B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path)))
+        if (!B_IS_IN_TREE(bh))
                return 1;
        /* Last path position is invalid. */
-        if (PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh))
+        if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh))
                return 1;
        /* we need only to know, whether it is the same item */
-        ih = get_ih(p_s_path);
+        ih = get_ih(path);
        return memcmp(stored_ih, ih, IH_SIZE);
 }
@@ -924,9 +886,9 @@ static inline int prepare_for_direct_item(struct treepath *path,
        }
        // new file gets truncated
        if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
-                // 
+                //
                round_len = ROUND_UP(new_file_length);
-                /* this was n_new_file_length < le_ih ... */
+                /* this was new_file_length < le_ih ... */
                if (round_len < le_ih_k_offset(le_ih)) {
                        *cut_size = -(IH_SIZE + ih_item_len(le_ih));
                        return M_DELETE;        /* Delete this item. */
@@ -986,96 +948,95 @@ static inline int prepare_for_direntry_item(struct treepath *path,
    In case of file truncate calculate whether this item must be deleted/truncated or last
    unformatted node of this item will be converted to a direct item.
    This function returns a determination of what balance mode the calling function should employ. */
-static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *p_s_path, const struct cpu_key *p_s_item_key, int *p_n_removed,     /* Number of unformatted nodes which were removed
+static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *path, const struct cpu_key *item_key, int *removed, /* Number of unformatted nodes which were removed
                                                                                                                                                                                   from end of the file. */
-                                      int *p_n_cut_size, unsigned long long n_new_file_length   /* MAX_KEY_OFFSET in case of delete. */
+                                      int *cut_size, unsigned long long new_file_length /* MAX_KEY_OFFSET in case of delete. */
    )
 {
-        struct super_block *p_s_sb = inode->i_sb;
+        struct super_block *sb = inode->i_sb;
-        struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_path);
+        struct item_head *p_le_ih = PATH_PITEM_HEAD(path);
-        struct buffer_head *p_s_bh = PATH_PLAST_BUFFER(p_s_path);
+        struct buffer_head *bh = PATH_PLAST_BUFFER(path);
        BUG_ON(!th->t_trans_id);
        /* Stat_data item. */
        if (is_statdata_le_ih(p_le_ih)) {
-                RFALSE(n_new_file_length != max_reiserfs_offset(inode),
+                RFALSE(new_file_length != max_reiserfs_offset(inode),
                       "PAP-5210: mode must be M_DELETE");
-                *p_n_cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
+                *cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
                return M_DELETE;
        }
        /* Directory item. */
        if (is_direntry_le_ih(p_le_ih))
-                return prepare_for_direntry_item(p_s_path, p_le_ih, inode,
+                return prepare_for_direntry_item(path, p_le_ih, inode,
-                                                 n_new_file_length,
+                                                 new_file_length,
-                                                 p_n_cut_size);
+                                                 cut_size);
        /* Direct item. */
        if (is_direct_le_ih(p_le_ih))
-                return prepare_for_direct_item(p_s_path, p_le_ih, inode,
+                return prepare_for_direct_item(path, p_le_ih, inode,
-                                               n_new_file_length, p_n_cut_size);
+                                               new_file_length, cut_size);
        /* Case of an indirect item. */
        {
-            int blk_size = p_s_sb->s_blocksize;
+            int blk_size = sb->s_blocksize;
            struct item_head s_ih;
            int need_re_search;
            int delete = 0;
            int result = M_CUT;
            int pos = 0;
-            if ( n_new_file_length == max_reiserfs_offset (inode) ) {
+            if ( new_file_length == max_reiserfs_offset (inode) ) {
                /* prepare_for_delete_or_cut() is called by
                 * reiserfs_delete_item() */
-                n_new_file_length = 0;
+                new_file_length = 0;
                delete = 1;
            }
            do {
                need_re_search = 0;
-                *p_n_cut_size = 0;
+                *cut_size = 0;
-                p_s_bh = PATH_PLAST_BUFFER(p_s_path);
+                bh = PATH_PLAST_BUFFER(path);
-                copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
+                copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
                pos = I_UNFM_NUM(&s_ih);
-                while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > n_new_file_length) {
+                while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) {
                    __le32 *unfm;
                    __u32 block;
                    /* Each unformatted block deletion may involve one additional
                     * bitmap block into the transaction, thereby the initial
                     * journal space reservation might not be enough. */
-                    if (!delete && (*p_n_cut_size) != 0 &&
+                    if (!delete && (*cut_size) != 0 &&
-                        reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
+                        reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD)
                        break;
-                    }
-                    unfm = (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + pos - 1;
+                    unfm = (__le32 *)B_I_PITEM(bh, &s_ih) + pos - 1;
                    block = get_block_num(unfm, 0);
                    if (block != 0) {
-                        reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1);
+                        reiserfs_prepare_for_journal(sb, bh, 1);
                        put_block_num(unfm, 0, 0);
-                        journal_mark_dirty (th, p_s_sb, p_s_bh);
+                        journal_mark_dirty(th, sb, bh);
                        reiserfs_free_block(th, inode, block, 1);
                    }
                    cond_resched();
-                    if (item_moved (&s_ih, p_s_path))  {
+                    if (item_moved (&s_ih, path))  {
                        need_re_search = 1;
                        break;
                    }
                    pos --;
-                    (*p_n_removed) ++;
+                    (*removed)++;
-                    (*p_n_cut_size) -= UNFM_P_SIZE;
+                    (*cut_size) -= UNFM_P_SIZE;
                    if (pos == 0) {
-                        (*p_n_cut_size) -= IH_SIZE;
+                        (*cut_size) -= IH_SIZE;
                        result = M_DELETE;
                        break;
                    }
@@ -1083,12 +1044,12 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
                /* a trick.  If the buffer has been logged, this will do nothing.  If
                ** we've broken the loop without logging it, it will restore the
                ** buffer */
-                reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh);
+                reiserfs_restore_prepared_buffer(sb, bh);
            } while (need_re_search &&
-                     search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_FOUND);
+                     search_for_position_by_key(sb, item_key, path) == POSITION_FOUND);
-            pos_in_item(p_s_path) = pos * UNFM_P_SIZE;
+            pos_in_item(path) = pos * UNFM_P_SIZE;
-            if (*p_n_cut_size == 0) {
+            if (*cut_size == 0) {
                /* Nothing were cut. maybe convert last unformatted node to the
                 * direct item? */
                result = M_CONVERT;
@@ -1098,45 +1059,45 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
 }
 /* Calculate number of bytes which will be deleted or cut during balance */
-static int calc_deleted_bytes_number(struct tree_balance *p_s_tb, char c_mode)
+static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
 {
-        int n_del_size;
+        int del_size;
-        struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_tb->tb_path);
+        struct item_head *p_le_ih = PATH_PITEM_HEAD(tb->tb_path);
        if (is_statdata_le_ih(p_le_ih))
                return 0;
-        n_del_size =
+        del_size =
-            (c_mode ==
+            (mode ==
-             M_DELETE) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0];
+             M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0];
        if (is_direntry_le_ih(p_le_ih)) {
-                // return EMPTY_DIR_SIZE; /* We delete emty directoris only. */
+                /* return EMPTY_DIR_SIZE; We delete emty directoris only.
-                // we can't use EMPTY_DIR_SIZE, as old format dirs have a different
+                 * we can't use EMPTY_DIR_SIZE, as old format dirs have a different
-                // empty size.  ick. FIXME, is this right?
+                 * empty size.  ick. FIXME, is this right? */
-                //
+                return del_size;
-                return n_del_size;
        }
        if (is_indirect_le_ih(p_le_ih))
-                n_del_size = (n_del_size / UNFM_P_SIZE) * (PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_size); // - get_ih_free_space (p_le_ih);
+                del_size = (del_size / UNFM_P_SIZE) *
-        return n_del_size;
+                                (PATH_PLAST_BUFFER(tb->tb_path)->b_size);
+        return del_size;
 }
 static void init_tb_struct(struct reiserfs_transaction_handle *th,
-                           struct tree_balance *p_s_tb,
+                           struct tree_balance *tb,
-                           struct super_block *p_s_sb,
+                           struct super_block *sb,
-                           struct treepath *p_s_path, int n_size)
+                           struct treepath *path, int size)
 {
        BUG_ON(!th->t_trans_id);
-        memset(p_s_tb, '\0', sizeof(struct tree_balance));
+        memset(tb, '\0', sizeof(struct tree_balance));
-        p_s_tb->transaction_handle = th;
+        tb->transaction_handle = th;
-        p_s_tb->tb_sb = p_s_sb;
+        tb->tb_sb = sb;
-        p_s_tb->tb_path = p_s_path;
+        tb->tb_path = path;
-        PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
+        PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
-        PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
+        PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
-        p_s_tb->insert_size[0] = n_size;
+        tb->insert_size[0] = size;
 }
 void padd_item(char *item, int total_length, int length)
@@ -1175,73 +1136,77 @@ char head2type(struct item_head *ih)
 }
 #endif
-/* Delete object item. */
+/* Delete object item.
-int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct treepath *p_s_path,     /* Path to the deleted item. */
+ * th       - active transaction handle
-                         const struct cpu_key *p_s_item_key,    /* Key to search for the deleted item.  */
+ * path     - path to the deleted item
-                         struct inode *p_s_inode,       /* inode is here just to update i_blocks and quotas */
+ * item_key - key to search for the deleted item
-                         struct buffer_head *p_s_un_bh)
+ * indode   - used for updating i_blocks and quotas
-{                               /* NULL or unformatted node pointer.    */
+ * un_bh    - NULL or unformatted node pointer
-        struct super_block *p_s_sb = p_s_inode->i_sb;
+ */
+int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
+                         struct treepath *path, const struct cpu_key *item_key,
+                         struct inode *inode, struct buffer_head *un_bh)
+{
+        struct super_block *sb = inode->i_sb;
        struct tree_balance s_del_balance;
        struct item_head s_ih;
        struct item_head *q_ih;
        int quota_cut_bytes;
-        int n_ret_value, n_del_size, n_removed;
+        int ret_value, del_size, removed;
 #ifdef CONFIG_REISERFS_CHECK
-        char c_mode;
+        char mode;
-        int n_iter = 0;
+        int iter = 0;
 #endif
        BUG_ON(!th->t_trans_id);
-        init_tb_struct(th, &s_del_balance, p_s_sb, p_s_path,
+        init_tb_struct(th, &s_del_balance, sb, path,
                       0 /*size is unknown */ );
        while (1) {
-                n_removed = 0;
+                removed = 0;
 #ifdef CONFIG_REISERFS_CHECK
-                n_iter++;
+                iter++;
-                c_mode =
+                mode =
 #endif
-                    prepare_for_delete_or_cut(th, p_s_inode, p_s_path,
+                    prepare_for_delete_or_cut(th, inode, path,
-                                              p_s_item_key, &n_removed,
+                                              item_key, &removed,
-                                              &n_del_size,
+                                              &del_size,
-                                              max_reiserfs_offset(p_s_inode));
+                                              max_reiserfs_offset(inode));
-                RFALSE(c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
+                RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
-                copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
+                copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
-                s_del_balance.insert_size[0] = n_del_size;
+                s_del_balance.insert_size[0] = del_size;
-                n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
+                ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
-                if (n_ret_value != REPEAT_SEARCH)
+                if (ret_value != REPEAT_SEARCH)
                        break;
-                PROC_INFO_INC(p_s_sb, delete_item_restarted);
+                PROC_INFO_INC(sb, delete_item_restarted);
                // file system changed, repeat search
-                n_ret_value =
+                ret_value =
-                    search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
+                    search_for_position_by_key(sb, item_key, path);
-                if (n_ret_value == IO_ERROR)
+                if (ret_value == IO_ERROR)
                        break;
-                if (n_ret_value == FILE_NOT_FOUND) {
+                if (ret_value == FILE_NOT_FOUND) {
-                        reiserfs_warning(p_s_sb,
+                        reiserfs_warning(sb, "vs-5340",
-                                         "vs-5340: reiserfs_delete_item: "
                                         "no items of the file %K found",
-                                         p_s_item_key);
+                                         item_key);
                        break;
                }
        }                       /* while (1) */
-        if (n_ret_value != CARRY_ON) {
+        if (ret_value != CARRY_ON) {
                unfix_nodes(&s_del_balance);
                return 0;
        }
        // reiserfs_delete_item returns item length when success
-        n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
+        ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
-        q_ih = get_ih(p_s_path);
+        q_ih = get_ih(path);
        quota_cut_bytes = ih_item_len(q_ih);
        /* hack so the quota code doesn't have to guess if the file
@@ -1250,15 +1215,15 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct treepath
         ** split into multiple items, and we only want to decrement for
         ** the unfm node once
         */
-        if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(q_ih)) {
+        if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) {
-                if ((le_ih_k_offset(q_ih) & (p_s_sb->s_blocksize - 1)) == 1) {
+                if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
-                        quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE;
+                        quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
                } else {
                        quota_cut_bytes = 0;
                }
        }
-        if (p_s_un_bh) {
+        if (un_bh) {
                int off;
                char *data;
@@ -1276,31 +1241,31 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct treepath
                 ** The unformatted node must be dirtied later on.  We can't be
                 ** sure here if the entire tail has been deleted yet.
                 **
-                 ** p_s_un_bh is from the page cache (all unformatted nodes are
+                 ** un_bh is from the page cache (all unformatted nodes are
                 ** from the page cache) and might be a highmem page.  So, we
-                 ** can't use p_s_un_bh->b_data.
+                 ** can't use un_bh->b_data.
                 ** -clm
                 */
-                data = kmap_atomic(p_s_un_bh->b_page, KM_USER0);
+                data = kmap_atomic(un_bh->b_page, KM_USER0);
                off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
                memcpy(data + off,
-                       B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih),
+                       B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih),
-                       n_ret_value);
+                       ret_value);
                kunmap_atomic(data, KM_USER0);
        }
        /* Perform balancing after all resources have been collected at once. */
        do_balance(&s_del_balance, NULL, NULL, M_DELETE);
 #ifdef REISERQUOTA_DEBUG
-        reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
+        reiserfs_debug(sb, REISERFS_DEBUG_CODE,
                       "reiserquota delete_item(): freeing %u, id=%u type=%c",
-                       quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih));
+                       quota_cut_bytes, inode->i_uid, head2type(&s_ih));
 #endif
-        vfs_dq_free_space_nodirty(p_s_inode, quota_cut_bytes);
+        vfs_dq_free_space_nodirty(inode, quota_cut_bytes);
        /* Return deleted body length */
-        return n_ret_value;
+        return ret_value;
 }
 /* Summary Of Mechanisms For Handling Collisions Between Processes:
@@ -1338,10 +1303,9 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
        while (1) {
                retval = search_item(th->t_super, &cpu_key, &path);
                if (retval == IO_ERROR) {
-                        reiserfs_warning(th->t_super,
+                        reiserfs_error(th->t_super, "vs-5350",
-                                         "vs-5350: reiserfs_delete_solid_item: "
+                                       "i/o failure occurred trying "
-                                         "i/o failure occurred trying to delete %K",
+                                       "to delete %K", &cpu_key);
-                                         &cpu_key);
                        break;
                }
                if (retval != ITEM_FOUND) {
@@ -1355,9 +1319,8 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
                             GET_GENERATION_NUMBER(le_key_k_offset
                                                   (le_key_version(key),
                                                    key)) == 1))
-                                reiserfs_warning(th->t_super,
+                                reiserfs_warning(th->t_super, "vs-5355",
-                                                 "vs-5355: reiserfs_delete_solid_item: %k not found",
+                                                 "%k not found", key);
-                                                 key);
                        break;
                }
                if (!tb_init) {
@@ -1389,8 +1352,7 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
                        break;
                }
                // IO_ERROR, NO_DISK_SPACE, etc
-                reiserfs_warning(th->t_super,
+                reiserfs_warning(th->t_super, "vs-5360",
-                                 "vs-5360: reiserfs_delete_solid_item: "
                                 "could not delete %K due to fix_nodes failure",
                                 &cpu_key);
                unfix_nodes(&tb);
@@ -1462,36 +1424,37 @@ static void unmap_buffers(struct page *page, loff_t pos)
 }
 static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
-                                    struct inode *p_s_inode,
+                                    struct inode *inode,
                                    struct page *page,
-                                    struct treepath *p_s_path,
+                                    struct treepath *path,
-                                    const struct cpu_key *p_s_item_key,
+                                    const struct cpu_key *item_key,
-                                    loff_t n_new_file_size, char *p_c_mode)
+                                    loff_t new_file_size, char *mode)
 {
-        struct super_block *p_s_sb = p_s_inode->i_sb;
+        struct super_block *sb = inode->i_sb;
-        int n_block_size = p_s_sb->s_blocksize;
+        int block_size = sb->s_blocksize;
        int cut_bytes;
        BUG_ON(!th->t_trans_id);
-        BUG_ON(n_new_file_size != p_s_inode->i_size);
+        BUG_ON(new_file_size != inode->i_size);
        /* the page being sent in could be NULL if there was an i/o error
         ** reading in the last block.  The user will hit problems trying to
         ** read the file, but for now we just skip the indirect2direct
         */
-        if (atomic_read(&p_s_inode->i_count) > 1 ||
+        if (atomic_read(&inode->i_count) > 1 ||
-            !tail_has_to_be_packed(p_s_inode) ||
+            !tail_has_to_be_packed(inode) ||
-            !page || (REISERFS_I(p_s_inode)->i_flags & i_nopack_mask)) {
+            !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
-                // leave tail in an unformatted node    
+                /* leave tail in an unformatted node */
-                *p_c_mode = M_SKIP_BALANCING;
+                *mode = M_SKIP_BALANCING;
                cut_bytes =
-                    n_block_size - (n_new_file_size & (n_block_size - 1));
+                    block_size - (new_file_size & (block_size - 1));
-                pathrelse(p_s_path);
+                pathrelse(path);
                return cut_bytes;
        }
-        /* Permorm the conversion to a direct_item. */
+        /* Perform the conversion to a direct_item. */
-        /*return indirect_to_direct (p_s_inode, p_s_path, p_s_item_key, n_new_file_size, p_c_mode); */
+        /* return indirect_to_direct(inode, path, item_key,
-        return indirect2direct(th, p_s_inode, page, p_s_path, p_s_item_key,
+                                  new_file_size, mode); */
-                               n_new_file_size, p_c_mode);
+        return indirect2direct(th, inode, page, path, item_key,
+                               new_file_size, mode);
 }
 /* we did indirect_to_direct conversion. And we have inserted direct
@@ -1515,8 +1478,8 @@ static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
                /* look for the last byte of the tail */
                if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
                    POSITION_NOT_FOUND)
-                        reiserfs_panic(inode->i_sb,
+                        reiserfs_panic(inode->i_sb, "vs-5615",
-                                       "vs-5615: indirect_to_direct_roll_back: found invalid item");
+                                       "found invalid item");
                RFALSE(path->pos_in_item !=
                       ih_item_len(PATH_PITEM_HEAD(path)) - 1,
                       "vs-5616: appended bytes found");
@@ -1533,38 +1496,39 @@ static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
                set_cpu_key_k_offset(&tail_key,
                                     cpu_key_k_offset(&tail_key) - removed);
        }
-        reiserfs_warning(inode->i_sb,
+        reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
-                         "indirect_to_direct_roll_back: indirect_to_direct conversion has been rolled back due to lack of disk space");
+                         "conversion has been rolled back due to "
+                         "lack of disk space");
        //mark_file_without_tail (inode);
        mark_inode_dirty(inode);
 }
 /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
 int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
-                           struct treepath *p_s_path,
+                           struct treepath *path,
-                           struct cpu_key *p_s_item_key,
+                           struct cpu_key *item_key,
-                           struct inode *p_s_inode,
+                           struct inode *inode,
-                           struct page *page, loff_t n_new_file_size)
+                           struct page *page, loff_t new_file_size)
 {
-        struct super_block *p_s_sb = p_s_inode->i_sb;
+        struct super_block *sb = inode->i_sb;
        /* Every function which is going to call do_balance must first
           create a tree_balance structure.  Then it must fill up this
           structure by using the init_tb_struct and fix_nodes functions.
           After that we can make tree balancing. */
        struct tree_balance s_cut_balance;
        struct item_head *p_le_ih;
-        int n_cut_size = 0,     /* Amount to be cut. */
+        int cut_size = 0,       /* Amount to be cut. */
-            n_ret_value = CARRY_ON, n_removed = 0,      /* Number of the removed unformatted nodes. */
+            ret_value = CARRY_ON, removed = 0,  /* Number of the removed unformatted nodes. */
-            n_is_inode_locked = 0;
+            is_inode_locked = 0;
-        char c_mode;            /* Mode of the balance. */
+        char mode;              /* Mode of the balance. */
        int retval2 = -1;
        int quota_cut_bytes;
        loff_t tail_pos = 0;
        BUG_ON(!th->t_trans_id);
-        init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path,
+        init_tb_struct(th, &s_cut_balance, inode->i_sb, path,
-                       n_cut_size);
+                       cut_size);
        /* Repeat this loop until we either cut the item without needing
           to balance, or we fix_nodes without schedule occurring */
@@ -1574,144 +1538,142 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
                   free unformatted nodes which are pointed to by the cut
                   pointers. */
-                c_mode =
+                mode =
-                    prepare_for_delete_or_cut(th, p_s_inode, p_s_path,
+                    prepare_for_delete_or_cut(th, inode, path,
-                                              p_s_item_key, &n_removed,
+                                              item_key, &removed,
-                                              &n_cut_size, n_new_file_size);
+                                              &cut_size, new_file_size);
-                if (c_mode == M_CONVERT) {
+                if (mode == M_CONVERT) {
                        /* convert last unformatted node to direct item or leave
                           tail in the unformatted node */
-                        RFALSE(n_ret_value != CARRY_ON,
+                        RFALSE(ret_value != CARRY_ON,
                               "PAP-5570: can not convert twice");
-                        n_ret_value =
+                        ret_value =
-                            maybe_indirect_to_direct(th, p_s_inode, page,
+                            maybe_indirect_to_direct(th, inode, page,
-                                                     p_s_path, p_s_item_key,
+                                                     path, item_key,
-                                                     n_new_file_size, &c_mode);
+                                                     new_file_size, &mode);
-                        if (c_mode == M_SKIP_BALANCING)
+                        if (mode == M_SKIP_BALANCING)
                                /* tail has been left in the unformatted node */
-                                return n_ret_value;
+                                return ret_value;
-                        n_is_inode_locked = 1;
+                        is_inode_locked = 1;
                        /* removing of last unformatted node will change value we
                           have to return to truncate. Save it */
-                        retval2 = n_ret_value;
+                        retval2 = ret_value;
-                        /*retval2 = p_s_sb->s_blocksize - (n_new_file_size & (p_s_sb->s_blocksize - 1)); */
+                        /*retval2 = sb->s_blocksize - (new_file_size & (sb->s_blocksize - 1)); */
                        /* So, we have performed the first part of the conversion:
                           inserting the new direct item.  Now we are removing the
                           last unformatted node pointer. Set key to search for
                           it. */
-                        set_cpu_key_k_type(p_s_item_key, TYPE_INDIRECT);
+                        set_cpu_key_k_type(item_key, TYPE_INDIRECT);
-                        p_s_item_key->key_length = 4;
+                        item_key->key_length = 4;
-                        n_new_file_size -=
+                        new_file_size -=
-                            (n_new_file_size & (p_s_sb->s_blocksize - 1));
+                            (new_file_size & (sb->s_blocksize - 1));
-                        tail_pos = n_new_file_size;
+                        tail_pos = new_file_size;
-                        set_cpu_key_k_offset(p_s_item_key, n_new_file_size + 1);
+                        set_cpu_key_k_offset(item_key, new_file_size + 1);
                        if (search_for_position_by_key
-                            (p_s_sb, p_s_item_key,
+                            (sb, item_key,
-                             p_s_path) == POSITION_NOT_FOUND) {
+                             path) == POSITION_NOT_FOUND) {
-                                print_block(PATH_PLAST_BUFFER(p_s_path), 3,
+                                print_block(PATH_PLAST_BUFFER(path), 3,
-                                            PATH_LAST_POSITION(p_s_path) - 1,
+                                            PATH_LAST_POSITION(path) - 1,
-                                            PATH_LAST_POSITION(p_s_path) + 1);
+                                            PATH_LAST_POSITION(path) + 1);
-                                reiserfs_panic(p_s_sb,
+                                reiserfs_panic(sb, "PAP-5580", "item to "
-                                               "PAP-5580: reiserfs_cut_from_item: item to convert does not exist (%K)",
+                                               "convert does not exist (%K)",
-                                               p_s_item_key);
+                                               item_key);
                        }
                        continue;
                }
-                if (n_cut_size == 0) {
+                if (cut_size == 0) {
-                        pathrelse(p_s_path);
+                        pathrelse(path);
                        return 0;
                }
-                s_cut_balance.insert_size[0] = n_cut_size;
+                s_cut_balance.insert_size[0] = cut_size;
-                n_ret_value = fix_nodes(c_mode, &s_cut_balance, NULL, NULL);
+                ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL);
-                if (n_ret_value != REPEAT_SEARCH)
+                if (ret_value != REPEAT_SEARCH)
                        break;
-                PROC_INFO_INC(p_s_sb, cut_from_item_restarted);
+                PROC_INFO_INC(sb, cut_from_item_restarted);
-                n_ret_value =
+                ret_value =
-                    search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
+                    search_for_position_by_key(sb, item_key, path);
-                if (n_ret_value == POSITION_FOUND)
+                if (ret_value == POSITION_FOUND)
                        continue;
-                reiserfs_warning(p_s_sb,
+                reiserfs_warning(sb, "PAP-5610", "item %K not found",
-                                 "PAP-5610: reiserfs_cut_from_item: item %K not found",
+                                 item_key);
-                                 p_s_item_key);
                unfix_nodes(&s_cut_balance);
-                return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT;
+                return (ret_value == IO_ERROR) ? -EIO : -ENOENT;
        }                       /* while */
        // check fix_nodes results (IO_ERROR or NO_DISK_SPACE)
-        if (n_ret_value != CARRY_ON) {
+        if (ret_value != CARRY_ON) {
-                if (n_is_inode_locked) {
+                if (is_inode_locked) {
                        // FIXME: this seems to be not needed: we are always able
                        // to cut item
-                        indirect_to_direct_roll_back(th, p_s_inode, p_s_path);
+                        indirect_to_direct_roll_back(th, inode, path);
                }
-                if (n_ret_value == NO_DISK_SPACE)
+                if (ret_value == NO_DISK_SPACE)
-                        reiserfs_warning(p_s_sb, "NO_DISK_SPACE");
+                        reiserfs_warning(sb, "reiserfs-5092",
+                                         "NO_DISK_SPACE");
                unfix_nodes(&s_cut_balance);
                return -EIO;
        }
        /* go ahead and perform balancing */
-        RFALSE(c_mode == M_PASTE || c_mode == M_INSERT, "invalid mode");
+        RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode");
        /* Calculate number of bytes that need to be cut from the item. */
        quota_cut_bytes =
-            (c_mode ==
+            (mode ==
-             M_DELETE) ? ih_item_len(get_ih(p_s_path)) : -s_cut_balance.
+             M_DELETE) ? ih_item_len(get_ih(path)) : -s_cut_balance.
            insert_size[0];
        if (retval2 == -1)
-                n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode);
+                ret_value = calc_deleted_bytes_number(&s_cut_balance, mode);
        else
-                n_ret_value = retval2;
+                ret_value = retval2;
        /* For direct items, we only change the quota when deleting the last
         ** item.
         */
        p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path);
-        if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) {
+        if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) {
-                if (c_mode == M_DELETE &&
+                if (mode == M_DELETE &&
-                    (le_ih_k_offset(p_le_ih) & (p_s_sb->s_blocksize - 1)) ==
+                    (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
                    1) {
                        // FIXME: this is to keep 3.5 happy
-                        REISERFS_I(p_s_inode)->i_first_direct_byte = U32_MAX;
+                        REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
-                        quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE;
+                        quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
                } else {
                        quota_cut_bytes = 0;
                }
        }
 #ifdef CONFIG_REISERFS_CHECK
-        if (n_is_inode_locked) {
+        if (is_inode_locked) {
                struct item_head *le_ih =
                    PATH_PITEM_HEAD(s_cut_balance.tb_path);
                /* we are going to complete indirect2direct conversion. Make
                   sure, that we exactly remove last unformatted node pointer
                   of the item */
                if (!is_indirect_le_ih(le_ih))
-                        reiserfs_panic(p_s_sb,
+                        reiserfs_panic(sb, "vs-5652",
-                                       "vs-5652: reiserfs_cut_from_item: "
                                       "item must be indirect %h", le_ih);
-                if (c_mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
+                if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
-                        reiserfs_panic(p_s_sb,
+                        reiserfs_panic(sb, "vs-5653", "completing "
-                                       "vs-5653: reiserfs_cut_from_item: "
+                                       "indirect2direct conversion indirect "
-                                       "completing indirect2direct conversion indirect item %h "
+                                       "item %h being deleted must be of "
-                                       "being deleted must be of 4 byte long",
+                                       "4 byte long", le_ih);
-                                       le_ih);
-                if (c_mode == M_CUT
+                if (mode == M_CUT
                    && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
-                        reiserfs_panic(p_s_sb,
+                        reiserfs_panic(sb, "vs-5654", "can not complete "
-                                       "vs-5654: reiserfs_cut_from_item: "
+                                       "indirect2direct conversion of %h "
-                                       "can not complete indirect2direct conversion of %h (CUT, insert_size==%d)",
+                                       "(CUT, insert_size==%d)",
                                       le_ih, s_cut_balance.insert_size[0]);
                }
                /* it would be useful to make sure, that right neighboring
@@ -1719,23 +1681,23 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
        }
 #endif
-        do_balance(&s_cut_balance, NULL, NULL, c_mode);
+        do_balance(&s_cut_balance, NULL, NULL, mode);
-        if (n_is_inode_locked) {
+        if (is_inode_locked) {
                /* we've done an indirect->direct conversion.  when the data block
                 ** was freed, it was removed from the list of blocks that must
                 ** be flushed before the transaction commits, make sure to
                 ** unmap and invalidate it
                 */
                unmap_buffers(page, tail_pos);
-                REISERFS_I(p_s_inode)->i_flags &= ~i_pack_on_close_mask;
+                REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
        }
 #ifdef REISERQUOTA_DEBUG
-        reiserfs_debug(p_s_inode->i_sb, REISERFS_DEBUG_CODE,
+        reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
                       "reiserquota cut_from_item(): freeing %u id=%u type=%c",
-                       quota_cut_bytes, p_s_inode->i_uid, '?');
+                       quota_cut_bytes, inode->i_uid, '?');
 #endif
-        vfs_dq_free_space_nodirty(p_s_inode, quota_cut_bytes);
+        vfs_dq_free_space_nodirty(inode, quota_cut_bytes);
-        return n_ret_value;
+        return ret_value;
 }
 static void truncate_directory(struct reiserfs_transaction_handle *th,
@@ -1743,8 +1705,7 @@ static void truncate_directory(struct reiserfs_transaction_handle *th,
 {
        BUG_ON(!th->t_trans_id);
        if (inode->i_nlink)
-                reiserfs_warning(inode->i_sb,
+                reiserfs_error(inode->i_sb, "vs-5655", "link count != 0");
-                                 "vs-5655: truncate_directory: link count != 0");
        set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
        set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
@@ -1756,8 +1717,8 @@ static void truncate_directory(struct reiserfs_transaction_handle *th,
 /* Truncate file to the new size. Note, this must be called with a transaction
   already started */
-int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p_s_inode,       /* ->i_size contains new
+int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
-                                                                                                   size */
+                          struct inode *inode,  /* ->i_size contains new size */
                         struct page *page,     /* up to date for last block */
                         int update_timestamps  /* when it is called by
                                                   file_release to convert
@@ -1768,47 +1729,45 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p
        INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
        struct item_head *p_le_ih;      /* Pointer to an item header. */
        struct cpu_key s_item_key;      /* Key to search for a previous file item. */
-        loff_t n_file_size,     /* Old file size. */
+        loff_t file_size,       /* Old file size. */
-         n_new_file_size;       /* New file size. */
+         new_file_size; /* New file size. */
-        int n_deleted;          /* Number of deleted or truncated bytes. */
+        int deleted;            /* Number of deleted or truncated bytes. */
        int retval;
        int err = 0;
        BUG_ON(!th->t_trans_id);
        if (!
-            (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode)
+            (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
-             || S_ISLNK(p_s_inode->i_mode)))
+             || S_ISLNK(inode->i_mode)))
                return 0;
-        if (S_ISDIR(p_s_inode->i_mode)) {
+        if (S_ISDIR(inode->i_mode)) {
                // deletion of directory - no need to update timestamps
-                truncate_directory(th, p_s_inode);
+                truncate_directory(th, inode);
                return 0;
        }
        /* Get new file size. */
-        n_new_file_size = p_s_inode->i_size;
+        new_file_size = inode->i_size;
        // FIXME: note, that key type is unimportant here
-        make_cpu_key(&s_item_key, p_s_inode, max_reiserfs_offset(p_s_inode),
+        make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode),
                     TYPE_DIRECT, 3);
        retval =
-            search_for_position_by_key(p_s_inode->i_sb, &s_item_key,
+            search_for_position_by_key(inode->i_sb, &s_item_key,
                                       &s_search_path);
        if (retval == IO_ERROR) {
-                reiserfs_warning(p_s_inode->i_sb,
+                reiserfs_error(inode->i_sb, "vs-5657",
-                                 "vs-5657: reiserfs_do_truncate: "
+                               "i/o failure occurred trying to truncate %K",
-                                 "i/o failure occurred trying to truncate %K",
+                               &s_item_key);
-                                 &s_item_key);
                err = -EIO;
                goto out;
        }
        if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
-                reiserfs_warning(p_s_inode->i_sb,
+                reiserfs_error(inode->i_sb, "PAP-5660",
-                                 "PAP-5660: reiserfs_do_truncate: "
+                               "wrong result %d of search for %K", retval,
-                                 "wrong result %d of search for %K", retval,
+                               &s_item_key);
-                                 &s_item_key);
                err = -EIO;
                goto out;
@@ -1819,56 +1778,56 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p
        /* Get real file size (total length of all file items) */
        p_le_ih = PATH_PITEM_HEAD(&s_search_path);
        if (is_statdata_le_ih(p_le_ih))
-                n_file_size = 0;
+                file_size = 0;
        else {
                loff_t offset = le_ih_k_offset(p_le_ih);
                int bytes =
-                    op_bytes_number(p_le_ih, p_s_inode->i_sb->s_blocksize);
+                    op_bytes_number(p_le_ih, inode->i_sb->s_blocksize);
                /* this may mismatch with real file size: if last direct item
                   had no padding zeros and last unformatted node had no free
                   space, this file would have this file size */
-                n_file_size = offset + bytes - 1;
+                file_size = offset + bytes - 1;
        }
        /*
         * are we doing a full truncate or delete, if so
         * kick in the reada code
         */
-        if (n_new_file_size == 0)
+        if (new_file_size == 0)
                s_search_path.reada = PATH_READA | PATH_READA_BACK;
-        if (n_file_size == 0 || n_file_size < n_new_file_size) {
+        if (file_size == 0 || file_size < new_file_size) {
                goto update_and_out;
        }
        /* Update key to search for the last file item. */
-        set_cpu_key_k_offset(&s_item_key, n_file_size);
+        set_cpu_key_k_offset(&s_item_key, file_size);
        do {
                /* Cut or delete file item. */
-                n_deleted =
+                deleted =
                    reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
-                                           p_s_inode, page, n_new_file_size);
+                                           inode, page, new_file_size);
-                if (n_deleted < 0) {
+                if (deleted < 0) {
-                        reiserfs_warning(p_s_inode->i_sb,
+                        reiserfs_warning(inode->i_sb, "vs-5665",
-                                         "vs-5665: reiserfs_do_truncate: reiserfs_cut_from_item failed");
+                                         "reiserfs_cut_from_item failed");
                        reiserfs_check_path(&s_search_path);
                        return 0;
                }
-                RFALSE(n_deleted > n_file_size,
+                RFALSE(deleted > file_size,
                       "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
-                       n_deleted, n_file_size, &s_item_key);
+                       deleted, file_size, &s_item_key);
                /* Change key to search the last file item. */
-                n_file_size -= n_deleted;
+                file_size -= deleted;
-                set_cpu_key_k_offset(&s_item_key, n_file_size);
+                set_cpu_key_k_offset(&s_item_key, file_size);
                /* While there are bytes to truncate and previous file item is presented in the tree. */
                /*
-                 ** This loop could take a really long time, and could log 
+                 ** This loop could take a really long time, and could log
                 ** many more blocks than a transaction can hold.  So, we do a polite
                 ** journal end here, and if the transaction needs ending, we make
                 ** sure the file is consistent before ending the current trans
@@ -1877,37 +1836,38 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p
                if (journal_transaction_should_end(th, 0) ||
                    reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
                        int orig_len_alloc = th->t_blocks_allocated;
-                        decrement_counters_in_path(&s_search_path);
+                        pathrelse(&s_search_path);
                        if (update_timestamps) {
-                                p_s_inode->i_mtime = p_s_inode->i_ctime =
+                                inode->i_mtime = CURRENT_TIME_SEC;
-                                    CURRENT_TIME_SEC;
+                                inode->i_ctime = CURRENT_TIME_SEC;
                        }
-                        reiserfs_update_sd(th, p_s_inode);
+                        reiserfs_update_sd(th, inode);
-                        err = journal_end(th, p_s_inode->i_sb, orig_len_alloc);
+                        err = journal_end(th, inode->i_sb, orig_len_alloc);
                        if (err)
                                goto out;
-                        err = journal_begin(th, p_s_inode->i_sb,
+                        err = journal_begin(th, inode->i_sb,
                                            JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
                        if (err)
                                goto out;
-                        reiserfs_update_inode_transaction(p_s_inode);
+                        reiserfs_update_inode_transaction(inode);
                }
-        } while (n_file_size > ROUND_UP(n_new_file_size) &&
+        } while (file_size > ROUND_UP(new_file_size) &&
-                 search_for_position_by_key(p_s_inode->i_sb, &s_item_key,
+                 search_for_position_by_key(inode->i_sb, &s_item_key,
                                            &s_search_path) == POSITION_FOUND);
-        RFALSE(n_file_size > ROUND_UP(n_new_file_size),
+        RFALSE(file_size > ROUND_UP(new_file_size),
               "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
-               n_new_file_size, n_file_size, s_item_key.on_disk_key.k_objectid);
+               new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
      update_and_out:
        if (update_timestamps) {
                // this is truncate, not file closing
-                p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC;
+                inode->i_mtime = CURRENT_TIME_SEC;
+                inode->i_ctime = CURRENT_TIME_SEC;
        }
-        reiserfs_update_sd(th, p_s_inode);
+        reiserfs_update_sd(th, inode);
      out:
        pathrelse(&s_search_path);
@@ -1917,7 +1877,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p
 #ifdef CONFIG_REISERFS_CHECK
 // this makes sure, that we __append__, not overwrite or add holes
 static void check_research_for_paste(struct treepath *path,
-                                     const struct cpu_key *p_s_key)
+                                     const struct cpu_key *key)
 {
        struct item_head *found_ih = get_ih(path);
@@ -1925,36 +1885,36 @@ static void check_research_for_paste(struct treepath *path,
                if (le_ih_k_offset(found_ih) +
                    op_bytes_number(found_ih,
                                    get_last_bh(path)->b_size) !=
-                    cpu_key_k_offset(p_s_key)
+                    cpu_key_k_offset(key)
                    || op_bytes_number(found_ih,
                                       get_last_bh(path)->b_size) !=
                    pos_in_item(path))
-                        reiserfs_panic(NULL,
+                        reiserfs_panic(NULL, "PAP-5720", "found direct item "
-                                       "PAP-5720: check_research_for_paste: "
+                                       "%h or position (%d) does not match "
-                                       "found direct item %h or position (%d) does not match to key %K",
+                                       "to key %K", found_ih,
-                                       found_ih, pos_in_item(path), p_s_key);
+                                       pos_in_item(path), key);
        }
        if (is_indirect_le_ih(found_ih)) {
                if (le_ih_k_offset(found_ih) +
                    op_bytes_number(found_ih,
                                    get_last_bh(path)->b_size) !=
-                    cpu_key_k_offset(p_s_key)
+                    cpu_key_k_offset(key)
                    || I_UNFM_NUM(found_ih) != pos_in_item(path)
                    || get_ih_free_space(found_ih) != 0)
-                        reiserfs_panic(NULL,
+                        reiserfs_panic(NULL, "PAP-5730", "found indirect "
-                                       "PAP-5730: check_research_for_paste: "
+                                       "item (%h) or position (%d) does not "
-                                       "found indirect item (%h) or position (%d) does not match to key (%K)",
+                                       "match to key (%K)",
-                                       found_ih, pos_in_item(path), p_s_key);
+                                       found_ih, pos_in_item(path), key);
        }
 }
 #endif                          /* config reiserfs check */
 /* Paste bytes to the existing item. Returns bytes number pasted into the item. */
-int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *p_s_search_path,  /* Path to the pasted item.          */
+int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *search_path,      /* Path to the pasted item.       */
-                             const struct cpu_key *p_s_key,     /* Key to search for the needed item. */
+                             const struct cpu_key *key, /* Key to search for the needed item. */
                             struct inode *inode,       /* Inode item belongs to */
-                             const char *p_c_body,      /* Pointer to the bytes to paste.    */
+                             const char *body,  /* Pointer to the bytes to paste.    */
-                             int n_pasted_size)
+                             int pasted_size)
 {                               /* Size of pasted bytes.             */
        struct tree_balance s_paste_balance;
        int retval;
@@ -1967,18 +1927,18 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
 #ifdef REISERQUOTA_DEBUG
        reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
                       "reiserquota paste_into_item(): allocating %u id=%u type=%c",
-                       n_pasted_size, inode->i_uid,
+                       pasted_size, inode->i_uid,
-                       key2type(&(p_s_key->on_disk_key)));
+                       key2type(&(key->on_disk_key)));
 #endif
-        if (vfs_dq_alloc_space_nodirty(inode, n_pasted_size)) {
+        if (vfs_dq_alloc_space_nodirty(inode, pasted_size)) {
-                pathrelse(p_s_search_path);
+                pathrelse(search_path);
                return -EDQUOT;
        }
-        init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path,
+        init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
-                       n_pasted_size);
+                       pasted_size);
 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
-        s_paste_balance.key = p_s_key->on_disk_key;
+        s_paste_balance.key = key->on_disk_key;
 #endif
        /* DQUOT_* can schedule, must check before the fix_nodes */
@@ -1988,33 +1948,33 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
        while ((retval =
                fix_nodes(M_PASTE, &s_paste_balance, NULL,
-                          p_c_body)) == REPEAT_SEARCH) {
+                          body)) == REPEAT_SEARCH) {
              search_again:
                /* file system changed while we were in the fix_nodes */
                PROC_INFO_INC(th->t_super, paste_into_item_restarted);
                retval =
-                    search_for_position_by_key(th->t_super, p_s_key,
+                    search_for_position_by_key(th->t_super, key,
-                                               p_s_search_path);
+                                               search_path);
                if (retval == IO_ERROR) {
                        retval = -EIO;
                        goto error_out;
                }
                if (retval == POSITION_FOUND) {
-                        reiserfs_warning(inode->i_sb,
+                        reiserfs_warning(inode->i_sb, "PAP-5710",
-                                         "PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists",
+                                         "entry or pasted byte (%K) exists",
-                                         p_s_key);
+                                         key);
                        retval = -EEXIST;
                        goto error_out;
                }
 #ifdef CONFIG_REISERFS_CHECK
-                check_research_for_paste(p_s_search_path, p_s_key);
+                check_research_for_paste(search_path, key);
 #endif
        }
        /* Perform balancing after all resources are collected by fix_nodes, and
           accessing them will not risk triggering schedule. */
        if (retval == CARRY_ON) {
-                do_balance(&s_paste_balance, NULL /*ih */ , p_c_body, M_PASTE);
+                do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE);
                return 0;
        }
        retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
@@ -2024,18 +1984,24 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
 #ifdef REISERQUOTA_DEBUG
        reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
                       "reiserquota paste_into_item(): freeing %u id=%u type=%c",
-                       n_pasted_size, inode->i_uid,
+                       pasted_size, inode->i_uid,
-                       key2type(&(p_s_key->on_disk_key)));
+                       key2type(&(key->on_disk_key)));
 #endif
-        vfs_dq_free_space_nodirty(inode, n_pasted_size);
+        vfs_dq_free_space_nodirty(inode, pasted_size);
        return retval;
 }
-/* Insert new item into the buffer at the path. */
+/* Insert new item into the buffer at the path.
-int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct treepath *p_s_path,     /* Path to the inserteded item.         */
+ * th   - active transaction handle
-                         const struct cpu_key *key, struct item_head *p_s_ih,   /* Pointer to the item header to insert. */
+ * path - path to the inserted item
-                         struct inode *inode, const char *p_c_body)
+ * ih   - pointer to the item header to insert
-{                               /* Pointer to the bytes to insert.      */
+ * body - pointer to the bytes to insert
+ */
+int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
+                         struct treepath *path, const struct cpu_key *key,
+                         struct item_head *ih, struct inode *inode,
+                         const char *body)
+{
        struct tree_balance s_ins_balance;
        int retval;
        int fs_gen = 0;
@@ -2045,28 +2011,27 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct treepath
        if (inode) {            /* Do we count quotas for item? */
                fs_gen = get_generation(inode->i_sb);
-                quota_bytes = ih_item_len(p_s_ih);
+                quota_bytes = ih_item_len(ih);
                /* hack so the quota code doesn't have to guess if the file has
                 ** a tail, links are always tails, so there's no guessing needed
                 */
-                if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_s_ih)) {
+                if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih))
                        quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
-                }
 #ifdef REISERQUOTA_DEBUG
                reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
                               "reiserquota insert_item(): allocating %u id=%u type=%c",
-                               quota_bytes, inode->i_uid, head2type(p_s_ih));
+                               quota_bytes, inode->i_uid, head2type(ih));
 #endif
                /* We can't dirty inode here. It would be immediately written but
                 * appropriate stat item isn't inserted yet... */
                if (vfs_dq_alloc_space_nodirty(inode, quota_bytes)) {
-                        pathrelse(p_s_path);
+                        pathrelse(path);
                        return -EDQUOT;
                }
        }
-        init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path,
+        init_tb_struct(th, &s_ins_balance, th->t_super, path,
-                       IH_SIZE + ih_item_len(p_s_ih));
+                       IH_SIZE + ih_item_len(ih));
 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
        s_ins_balance.key = key->on_disk_key;
 #endif
@@ -2076,19 +2041,18 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct treepath
        }
        while ((retval =
-                fix_nodes(M_INSERT, &s_ins_balance, p_s_ih,
+                fix_nodes(M_INSERT, &s_ins_balance, ih,
-                          p_c_body)) == REPEAT_SEARCH) {
+                          body)) == REPEAT_SEARCH) {
              search_again:
                /* file system changed while we were in the fix_nodes */
                PROC_INFO_INC(th->t_super, insert_item_restarted);
-                retval = search_item(th->t_super, key, p_s_path);
+                retval = search_item(th->t_super, key, path);
                if (retval == IO_ERROR) {
                        retval = -EIO;
                        goto error_out;
                }
                if (retval == ITEM_FOUND) {
-                        reiserfs_warning(th->t_super,
+                        reiserfs_warning(th->t_super, "PAP-5760",
-                                         "PAP-5760: reiserfs_insert_item: "
                                         "key %K already exists in the tree",
                                         key);
                        retval = -EEXIST;
@@ -2098,7 +2062,7 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct treepath
        /* make balancing after all resources will be collected at a time */
        if (retval == CARRY_ON) {
-                do_balance(&s_ins_balance, p_s_ih, p_c_body, M_INSERT);
+                do_balance(&s_ins_balance, ih, body, M_INSERT);
                return 0;
        }
@@ -2109,7 +2073,7 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct treepath
 #ifdef REISERQUOTA_DEBUG
        reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
                       "reiserquota insert_item(): freeing %u id=%u type=%c",
-                       quota_bytes, inode->i_uid, head2type(p_s_ih));
+                       quota_bytes, inode->i_uid, head2type(ih));
 #endif
        if (inode)
                vfs_dq_free_space_nodirty(inode, quota_bytes);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 5dbafb739401..972250c62896 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -183,9 +183,9 @@ static int finish_unfinished(struct super_block *s)
                if (REISERFS_SB(s)->s_qf_names[i]) {
                        int ret = reiserfs_quota_on_mount(s, i);
                        if (ret < 0)
-                                reiserfs_warning(s,
+                                reiserfs_warning(s, "reiserfs-2500",
-                                                 "reiserfs: cannot turn on journaled quota: error %d",
+                                                 "cannot turn on journaled "
-                                                 ret);
+                                                 "quota: error %d", ret);
                }
        }
 #endif
@@ -195,17 +195,16 @@ static int finish_unfinished(struct super_block *s)
        while (!retval) {
                retval = search_item(s, &max_cpu_key, &path);
                if (retval != ITEM_NOT_FOUND) {
-                        reiserfs_warning(s,
+                        reiserfs_error(s, "vs-2140",
-                                         "vs-2140: finish_unfinished: search_by_key returned %d",
+                                       "search_by_key returned %d", retval);
-                                         retval);
                        break;
                }
                bh = get_last_bh(&path);
                item_pos = get_item_pos(&path);
                if (item_pos != B_NR_ITEMS(bh)) {
-                        reiserfs_warning(s,
+                        reiserfs_warning(s, "vs-2060",
-                                         "vs-2060: finish_unfinished: wrong position found");
+                                         "wrong position found");
                        break;
                }
                item_pos--;
@@ -235,8 +234,7 @@ static int finish_unfinished(struct super_block *s)
                if (!inode) {
                        /* the unlink almost completed, it just did not manage to remove
                           "save" link and release objectid */
-                        reiserfs_warning(s,
+                        reiserfs_warning(s, "vs-2180", "iget failed for %K",
-                                         "vs-2180: finish_unfinished: iget failed for %K",
                                         &obj_key);
                        retval = remove_save_link_only(s, &save_link_key, 1);
                        continue;
@@ -244,8 +242,8 @@ static int finish_unfinished(struct super_block *s)
                if (!truncate && inode->i_nlink) {
                        /* file is not unlinked */
-                        reiserfs_warning(s,
+                        reiserfs_warning(s, "vs-2185",
-                                         "vs-2185: finish_unfinished: file %K is not unlinked",
+                                         "file %K is not unlinked",
                                         &obj_key);
                        retval = remove_save_link_only(s, &save_link_key, 0);
                        continue;
@@ -257,8 +255,9 @@ static int finish_unfinished(struct super_block *s)
                           The only imaginable way is to execute unfinished truncate request
                           then boot into old kernel, remove the file and create dir with
                           the same key. */
-                        reiserfs_warning(s,
+                        reiserfs_warning(s, "green-2101",
-                                         "green-2101: impossible truncate on a directory %k. Please report",
+                                         "impossible truncate on a "
+                                         "directory %k. Please report",
                                         INODE_PKEY(inode));
                        retval = remove_save_link_only(s, &save_link_key, 0);
                        truncate = 0;
@@ -288,9 +287,10 @@ static int finish_unfinished(struct super_block *s)
                                /* removal gets completed in iput */
                                retval = 0;
                        } else {
-                                reiserfs_warning(s, "Dead loop in "
+                                reiserfs_warning(s, "super-2189", "Dead loop "
-                                                "finish_unfinished detected, "
+                                                 "in finish_unfinished "
-                                                "just remove save link\n");
+                                                 "detected, just remove "
+                                                 "save link\n");
                                retval = remove_save_link_only(s,
                                                        &save_link_key, 0);
                        }
@@ -360,8 +360,9 @@ void add_save_link(struct reiserfs_transaction_handle *th,
        } else {
                /* truncate */
                if (S_ISDIR(inode->i_mode))
-                        reiserfs_warning(inode->i_sb,
+                        reiserfs_warning(inode->i_sb, "green-2102",
-                                         "green-2102: Adding a truncate savelink for a directory %k! Please report",
+                                         "Adding a truncate savelink for "
+                                         "a directory %k! Please report",
                                         INODE_PKEY(inode));
                set_cpu_key_k_offset(&key, 1);
                set_cpu_key_k_type(&key, TYPE_INDIRECT);
@@ -376,9 +377,9 @@ void add_save_link(struct reiserfs_transaction_handle *th,
        retval = search_item(inode->i_sb, &key, &path);
        if (retval != ITEM_NOT_FOUND) {
                if (retval != -ENOSPC)
-                        reiserfs_warning(inode->i_sb, "vs-2100: add_save_link:"
+                        reiserfs_error(inode->i_sb, "vs-2100",
-                                         "search_by_key (%K) returned %d", &key,
+                                       "search_by_key (%K) returned %d", &key,
-                                         retval);
+                                       retval);
                pathrelse(&path);
                return;
        }
@@ -391,9 +392,8 @@ void add_save_link(struct reiserfs_transaction_handle *th,
            reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link);
        if (retval) {
                if (retval != -ENOSPC)
-                        reiserfs_warning(inode->i_sb,
+                        reiserfs_error(inode->i_sb, "vs-2120",
-                                         "vs-2120: add_save_link: insert_item returned %d",
+                                       "insert_item returned %d", retval);
-                                         retval);
        } else {
                if (truncate)
                        REISERFS_I(inode)->i_flags |=
@@ -492,8 +492,7 @@ static void reiserfs_put_super(struct super_block *s)
        print_statistics(s);
        if (REISERFS_SB(s)->reserved_blocks != 0) {
-                reiserfs_warning(s,
+                reiserfs_warning(s, "green-2005", "reserved blocks left %d",
-                                 "green-2005: reiserfs_put_super: reserved blocks left %d",
                                 REISERFS_SB(s)->reserved_blocks);
        }
@@ -559,8 +558,8 @@ static void reiserfs_dirty_inode(struct inode *inode)
        int err = 0;
        if (inode->i_sb->s_flags & MS_RDONLY) {
-                reiserfs_warning(inode->i_sb,
+                reiserfs_warning(inode->i_sb, "clm-6006",
-                                 "clm-6006: writing inode %lu on readonly FS",
+                                 "writing inode %lu on readonly FS",
                                 inode->i_ino);
                return;
        }
@@ -757,7 +756,7 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
                           char **opt_arg, unsigned long *bit_flags)
 {
        char *p;
-        /* foo=bar, 
+        /* foo=bar,
           ^   ^  ^
           |   |  +-- option_end
           |   +-- arg_start
@@ -792,13 +791,15 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
                        if (bit_flags) {
                                if (opt->clrmask ==
                                    (1 << REISERFS_UNSUPPORTED_OPT))
-                                        reiserfs_warning(s, "%s not supported.",
+                                        reiserfs_warning(s, "super-6500",
+                                                         "%s not supported.\n",
                                                         p);
                                else
                                        *bit_flags &= ~opt->clrmask;
                                if (opt->setmask ==
                                    (1 << REISERFS_UNSUPPORTED_OPT))
-                                        reiserfs_warning(s, "%s not supported.",
+                                        reiserfs_warning(s, "super-6501",
+                                                         "%s not supported.\n",
                                                         p);
                                else
                                        *bit_flags |= opt->setmask;
@@ -807,7 +808,8 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
                }
        }
        if (!opt->option_name) {
-                reiserfs_warning(s, "unknown mount option \"%s\"", p);
+                reiserfs_warning(s, "super-6502",
+                                 "unknown mount option \"%s\"", p);
                return -1;
        }
@@ -815,8 +817,9 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
        switch (*p) {
        case '=':
                if (!opt->arg_required) {
-                        reiserfs_warning(s,
+                        reiserfs_warning(s, "super-6503",
-                                         "the option \"%s\" does not require an argument",
+                                         "the option \"%s\" does not "
+                                         "require an argument\n",
                                         opt->option_name);
                        return -1;
                }
@@ -824,14 +827,15 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
        case 0:
                if (opt->arg_required) {
-                        reiserfs_warning(s,
+                        reiserfs_warning(s, "super-6504",
-                                         "the option \"%s\" requires an argument",
+                                         "the option \"%s\" requires an "
-                                         opt->option_name);
+                                         "argument\n", opt->option_name);
                        return -1;
                }
                break;
        default:
-                reiserfs_warning(s, "head of option \"%s\" is only correct",
+                reiserfs_warning(s, "super-6505",
+                                 "head of option \"%s\" is only correct\n",
                                 opt->option_name);
                return -1;
        }
@@ -843,7 +847,8 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
            && !(opt->arg_required & (1 << REISERFS_OPT_ALLOWEMPTY))
            && !strlen(p)) {
                /* this catches "option=," if not allowed */
-                reiserfs_warning(s, "empty argument for \"%s\"",
+                reiserfs_warning(s, "super-6506",
+                                 "empty argument for \"%s\"\n",
                                 opt->option_name);
                return -1;
        }
@@ -865,7 +870,8 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
                }
        }
-        reiserfs_warning(s, "bad value \"%s\" for option \"%s\"", p,
+        reiserfs_warning(s, "super-6506",
+                         "bad value \"%s\" for option \"%s\"\n", p,
                         opt->option_name);
        return -1;
 }
@@ -955,9 +961,9 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
                                *blocks = simple_strtoul(arg, &p, 0);
                                if (*p != '\0') {
                                        /* NNN does not look like a number */
-                                        reiserfs_warning(s,
+                                        reiserfs_warning(s, "super-6507",
-                                                         "reiserfs_parse_options: bad value %s",
+                                                         "bad value %s for "
-                                                         arg);
+                                                         "-oresize\n", arg);
                                        return 0;
                                }
                        }
@@ -968,8 +974,8 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
                        unsigned long val = simple_strtoul(arg, &p, 0);
                        /* commit=NNN (time in seconds) */
                        if (*p != '\0' || val >= (unsigned int)-1) {
-                                reiserfs_warning(s,
+                                reiserfs_warning(s, "super-6508",
-                                                 "reiserfs_parse_options: bad value %s",
+                                                 "bad value %s for -ocommit\n",
                                                 arg);
                                return 0;
                        }
@@ -977,16 +983,18 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
                }
                if (c == 'w') {
-                        reiserfs_warning(s, "reiserfs: nolargeio option is no longer supported");
+                        reiserfs_warning(s, "super-6509", "nolargeio option "
+                                         "is no longer supported");
                        return 0;
                }
                if (c == 'j') {
                        if (arg && *arg && jdev_name) {
                                if (*jdev_name) {       //Hm, already assigned?
-                                        reiserfs_warning(s,
+                                        reiserfs_warning(s, "super-6510",
-                                                         "reiserfs_parse_options: journal device was already  specified to be %s",
+                                                         "journal device was "
-                                                         *jdev_name);
+                                                         "already specified to "
+                                                         "be %s", *jdev_name);
                                        return 0;
                                }
                                *jdev_name = arg;
@@ -998,29 +1006,35 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
                        if (sb_any_quota_loaded(s) &&
                            (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) {
-                                reiserfs_warning(s,
+                                reiserfs_warning(s, "super-6511",
-                                                 "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
+                                                 "cannot change journaled "
+                                                 "quota options when quota "
+                                                 "turned on.");
                                return 0;
                        }
                        if (*arg) {     /* Some filename specified? */
                                if (REISERFS_SB(s)->s_qf_names[qtype]
                                    && strcmp(REISERFS_SB(s)->s_qf_names[qtype],
                                              arg)) {
-                                        reiserfs_warning(s,
+                                        reiserfs_warning(s, "super-6512",
-                                                         "reiserfs_parse_options: %s quota file already specified.",
+                                                         "%s quota file "
+                                                         "already specified.",
                                                         QTYPE2NAME(qtype));
                                        return 0;
                                }
                                if (strchr(arg, '/')) {
-                                        reiserfs_warning(s,
+                                        reiserfs_warning(s, "super-6513",
-                                                         "reiserfs_parse_options: quotafile must be on filesystem root.");
+                                                         "quotafile must be "
+                                                         "on filesystem root.");
                                        return 0;
                                }
                                qf_names[qtype] =
                                    kmalloc(strlen(arg) + 1, GFP_KERNEL);
                                if (!qf_names[qtype]) {
-                                        reiserfs_warning(s,
+                                        reiserfs_warning(s, "reiserfs-2502",
-                                                         "reiserfs_parse_options: not enough memory for storing quotafile name.");
+                                                         "not enough memory "
+                                                         "for storing "
+                                                         "quotafile name.");
                                        return 0;
                                }
                                strcpy(qf_names[qtype], arg);
@@ -1038,21 +1052,24 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
                        else if (!strcmp(arg, "vfsv0"))
                                *qfmt = QFMT_VFS_V0;
                        else {
-                                reiserfs_warning(s,
+                                reiserfs_warning(s, "super-6514",
-                                                 "reiserfs_parse_options: unknown quota format specified.");
+                                                 "unknown quota format "
+                                                 "specified.");
                                return 0;
                        }
                        if (sb_any_quota_loaded(s) &&
                            *qfmt != REISERFS_SB(s)->s_jquota_fmt) {
-                                reiserfs_warning(s,
+                                reiserfs_warning(s, "super-6515",
-                                                 "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
+                                                 "cannot change journaled "
+                                                 "quota options when quota "
+                                                 "turned on.");
                                return 0;
                        }
                }
 #else
                if (c == 'u' || c == 'g' || c == 'f') {
-                        reiserfs_warning(s,
+                        reiserfs_warning(s, "reiserfs-2503", "journaled "
-                                         "reiserfs_parse_options: journaled quota options not supported.");
+                                         "quota options not supported.");
                        return 0;
                }
 #endif
@@ -1061,15 +1078,15 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
 #ifdef CONFIG_QUOTA
        if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt
            && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) {
-                reiserfs_warning(s,
+                reiserfs_warning(s, "super-6515",
-                                 "reiserfs_parse_options: journaled quota format not specified.");
+                                 "journaled quota format not specified.");
                return 0;
        }
        /* This checking is not precise wrt the quota type but for our purposes it is sufficient */
        if (!(*mount_options & (1 << REISERFS_QUOTA))
            && sb_any_quota_loaded(s)) {
-                reiserfs_warning(s,
+                reiserfs_warning(s, "super-6516", "quota options must "
-                                 "reiserfs_parse_options: quota options must be present when quota is turned on.");
+                                 "be present when quota is turned on.");
                return 0;
        }
 #endif
@@ -1129,14 +1146,15 @@ static void handle_attrs(struct super_block *s)
        if (reiserfs_attrs(s)) {
                if (old_format_only(s)) {
-                        reiserfs_warning(s,
+                        reiserfs_warning(s, "super-6517", "cannot support "
-                                         "reiserfs: cannot support attributes on 3.5.x disk format");
+                                         "attributes on 3.5.x disk format");
                        REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS);
                        return;
                }
                if (!(le32_to_cpu(rs->s_flags) & reiserfs_attrs_cleared)) {
-                        reiserfs_warning(s,
+                        reiserfs_warning(s, "super-6518", "cannot support "
-                                         "reiserfs: cannot support attributes until flag is set in super-block");
+                                         "attributes until flag is set in "
+                                         "super-block");
                        REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS);
                }
        }
@@ -1278,6 +1296,8 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
                REISERFS_SB(s)->s_mount_state = sb_umount_state(rs);
                s->s_flags &= ~MS_RDONLY;
                set_sb_umount_state(rs, REISERFS_ERROR_FS);
+                if (!old_format_only(s))
+                        set_sb_mnt_count(rs, sb_mnt_count(rs) + 1);
                /* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */
                journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
                REISERFS_SB(s)->s_mount_state = REISERFS_VALID_FS;
@@ -1312,7 +1332,7 @@ static int read_super_block(struct super_block *s, int offset)
        bh = sb_bread(s, offset / s->s_blocksize);
        if (!bh) {
-                reiserfs_warning(s, "sh-2006: read_super_block: "
+                reiserfs_warning(s, "sh-2006",
                                 "bread failed (dev %s, block %lu, size %lu)",
                                 reiserfs_bdevname(s), offset / s->s_blocksize,
                                 s->s_blocksize);
@@ -1326,15 +1346,15 @@ static int read_super_block(struct super_block *s, int offset)
        }
        //
        // ok, reiserfs signature (old or new) found in at the given offset
-        //    
+        //
        fs_blocksize = sb_blocksize(rs);
        brelse(bh);
        sb_set_blocksize(s, fs_blocksize);
        bh = sb_bread(s, offset / s->s_blocksize);
        if (!bh) {
-                reiserfs_warning(s, "sh-2007: read_super_block: "
+                reiserfs_warning(s, "sh-2007",
-                                 "bread failed (dev %s, block %lu, size %lu)\n",
+                                 "bread failed (dev %s, block %lu, size %lu)",
                                 reiserfs_bdevname(s), offset / s->s_blocksize,
                                 s->s_blocksize);
                return 1;
@@ -1342,8 +1362,8 @@ static int read_super_block(struct super_block *s, int offset)
        rs = (struct reiserfs_super_block *)bh->b_data;
        if (sb_blocksize(rs) != s->s_blocksize) {
-                reiserfs_warning(s, "sh-2011: read_super_block: "
+                reiserfs_warning(s, "sh-2011", "can't find a reiserfs "
-                                 "can't find a reiserfs filesystem on (dev %s, block %Lu, size %lu)\n",
+                                 "filesystem on (dev %s, block %Lu, size %lu)",
                                 reiserfs_bdevname(s),
                                 (unsigned long long)bh->b_blocknr,
                                 s->s_blocksize);
@@ -1353,9 +1373,10 @@ static int read_super_block(struct super_block *s, int offset)
        if (rs->s_v1.s_root_block == cpu_to_le32(-1)) {
                brelse(bh);
-                reiserfs_warning(s,
+                reiserfs_warning(s, "super-6519", "Unfinished reiserfsck "
-                                 "Unfinished reiserfsck --rebuild-tree run detected. Please run\n"
+                                 "--rebuild-tree run detected. Please run\n"
-                                 "reiserfsck --rebuild-tree and wait for a completion. If that fails\n"
+                                 "reiserfsck --rebuild-tree and wait for a "
+                                 "completion. If that fails\n"
                                 "get newer reiserfsprogs package");
                return 1;
        }
@@ -1367,18 +1388,15 @@ static int read_super_block(struct super_block *s, int offset)
                /* magic is of non-standard journal filesystem, look at s_version to
                   find which format is in use */
                if (sb_version(rs) == REISERFS_VERSION_2)
-                        reiserfs_warning(s,
+                        reiserfs_info(s, "found reiserfs format \"3.6\""
-                                         "read_super_block: found reiserfs format \"3.6\""
+                                      " with non-standard journal\n");
-                                         " with non-standard journal");
                else if (sb_version(rs) == REISERFS_VERSION_1)
-                        reiserfs_warning(s,
+                        reiserfs_info(s, "found reiserfs format \"3.5\""
-                                         "read_super_block: found reiserfs format \"3.5\""
+                                      " with non-standard journal\n");
-                                         " with non-standard journal");
                else {
-                        reiserfs_warning(s,
+                        reiserfs_warning(s, "sh-2012", "found unknown "
-                                         "sh-2012: read_super_block: found unknown "
+                                         "format \"%u\" of reiserfs with "
-                                         "format \"%u\" of reiserfs with non-standard magic",
+                                         "non-standard magic", sb_version(rs));
-                                         sb_version(rs));
                        return 1;
                }
        } else
@@ -1408,8 +1426,7 @@ static int reread_meta_blocks(struct super_block *s)
        ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s)));
        wait_on_buffer(SB_BUFFER_WITH_SB(s));
        if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
-                reiserfs_warning(s,
+                reiserfs_warning(s, "reiserfs-2504", "error reading the super");
-                                 "reread_meta_blocks, error reading the super");
                return 1;
        }
@@ -1452,8 +1469,8 @@ static __u32 find_hash_out(struct super_block *s)
                        if (reiserfs_rupasov_hash(s)) {
                                hash = YURA_HASH;
                        }
-                        reiserfs_warning(s, "FS seems to be empty, autodetect "
+                        reiserfs_info(s, "FS seems to be empty, autodetect "
-                                         "is using the default hash");
+                                         "is using the default hash\n");
                        break;
                }
                r5hash = GET_HASH_VALUE(r5_hash(de.de_name, de.de_namelen));
@@ -1473,10 +1490,10 @@ static __u32 find_hash_out(struct super_block *s)
                        && (yurahash ==
                            GET_HASH_VALUE(deh_offset
                                           (&(de.de_deh[de.de_entry_num])))))) {
-                        reiserfs_warning(s,
+                        reiserfs_warning(s, "reiserfs-2506", "Unable to "
-                                         "Unable to automatically detect hash function. "
+                                         "automatically detect hash function. "
-                                         "Please mount with -o hash={tea,rupasov,r5}",
+                                         "Please mount with -o "
-                                         reiserfs_bdevname(s));
+                                         "hash={tea,rupasov,r5}");
                        hash = UNSET_HASH;
                        break;
                }
@@ -1490,7 +1507,8 @@ static __u32 find_hash_out(struct super_block *s)
                         (deh_offset(&(de.de_deh[de.de_entry_num]))) == r5hash)
                        hash = R5_HASH;
                else {
-                        reiserfs_warning(s, "Unrecognised hash function");
+                        reiserfs_warning(s, "reiserfs-2506",
+                                         "Unrecognised hash function");
                        hash = UNSET_HASH;
                }
        } while (0);
@@ -1514,21 +1532,24 @@ static int what_hash(struct super_block *s)
                code = find_hash_out(s);
        if (code != UNSET_HASH && reiserfs_hash_detect(s)) {
-                /* detection has found the hash, and we must check against the 
+                /* detection has found the hash, and we must check against the
-                 ** mount options 
+                 ** mount options
                 */
                if (reiserfs_rupasov_hash(s) && code != YURA_HASH) {
-                        reiserfs_warning(s, "Error, %s hash detected, "
+                        reiserfs_warning(s, "reiserfs-2507",
+                                         "Error, %s hash detected, "
                                         "unable to force rupasov hash",
                                         reiserfs_hashname(code));
                        code = UNSET_HASH;
                } else if (reiserfs_tea_hash(s) && code != TEA_HASH) {
-                        reiserfs_warning(s, "Error, %s hash detected, "
+                        reiserfs_warning(s, "reiserfs-2508",
+                                         "Error, %s hash detected, "
                                         "unable to force tea hash",
                                         reiserfs_hashname(code));
                        code = UNSET_HASH;
                } else if (reiserfs_r5_hash(s) && code != R5_HASH) {
-                        reiserfs_warning(s, "Error, %s hash detected, "
+                        reiserfs_warning(s, "reiserfs-2509",
+                                         "Error, %s hash detected, "
                                         "unable to force r5 hash",
                                         reiserfs_hashname(code));
                        code = UNSET_HASH;
@@ -1544,7 +1565,7 @@ static int what_hash(struct super_block *s)
                }
        }
-        /* if we are mounted RW, and we have a new valid hash code, update 
+        /* if we are mounted RW, and we have a new valid hash code, update
         ** the super
         */
        if (code != UNSET_HASH &&
@@ -1587,9 +1608,9 @@ static int function2code(hashf_t func)
        return 0;
 }
-#define SWARN(silent, s, ...)                   \
+#define SWARN(silent, s, id, ...)                       \
        if (!(silent))                          \
-                reiserfs_warning (s, __VA_ARGS__)
+                reiserfs_warning(s, id, __VA_ARGS__)
 static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 {
@@ -1623,10 +1644,6 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
        REISERFS_SB(s)->s_alloc_options.preallocmin = 0;
        /* Preallocate by 16 blocks (17-1) at once */
        REISERFS_SB(s)->s_alloc_options.preallocsize = 17;
-#ifdef CONFIG_REISERFS_FS_XATTR
-        /* Initialize the rwsem for xattr dir */
-        init_rwsem(&REISERFS_SB(s)->xattr_dir_sem);
-#endif
        /* setup default block allocator options */
        reiserfs_init_alloc_options(s);
@@ -1641,8 +1658,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 #endif
        if (blocks) {
-                SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option "
+                SWARN(silent, s, "jmacd-7", "resize option for remount only");
-                      "for remount only");
                goto error;
        }
@@ -1651,8 +1667,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
                old_format = 1;
        /* try new format (64-th 1k block), which can contain reiserfs super block */
        else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) {
-                SWARN(silent, s,
+                SWARN(silent, s, "sh-2021", "can not find reiserfs on %s",
-                      "sh-2021: reiserfs_fill_super: can not find reiserfs on %s",
                      reiserfs_bdevname(s));
                goto error;
        }
@@ -1664,13 +1679,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
        if (s->s_bdev && s->s_bdev->bd_inode
            && i_size_read(s->s_bdev->bd_inode) <
            sb_block_count(rs) * sb_blocksize(rs)) {
-                SWARN(silent, s,
+                SWARN(silent, s, "", "Filesystem cannot be "
-                      "Filesystem on %s cannot be mounted because it is bigger than the device",
+                      "mounted because it is bigger than the device");
-                      reiserfs_bdevname(s));
+                SWARN(silent, s, "", "You may need to run fsck "
-                SWARN(silent, s,
+                      "or increase size of your LVM partition");
-                      "You may need to run fsck or increase size of your LVM partition");
+                SWARN(silent, s, "", "Or may be you forgot to "
-                SWARN(silent, s,
+                      "reboot after fdisk when it told you to");
-                      "Or may be you forgot to reboot after fdisk when it told you to");
                goto error;
        }
@@ -1678,14 +1692,13 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
        sbi->s_mount_state = REISERFS_VALID_FS;
        if ((errval = reiserfs_init_bitmap_cache(s))) {
-                SWARN(silent, s,
+                SWARN(silent, s, "jmacd-8", "unable to read bitmap");
-                      "jmacd-8: reiserfs_fill_super: unable to read bitmap");
                goto error;
        }
        errval = -EINVAL;
 #ifdef CONFIG_REISERFS_CHECK
-        SWARN(silent, s, "CONFIG_REISERFS_CHECK is set ON");
+        SWARN(silent, s, "", "CONFIG_REISERFS_CHECK is set ON");
-        SWARN(silent, s, "- it is slow mode for debugging.");
+        SWARN(silent, s, "", "- it is slow mode for debugging.");
 #endif
        /* make data=ordered the default */
@@ -1706,8 +1719,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
        }
        // set_device_ro(s->s_dev, 1) ;
        if (journal_init(s, jdev_name, old_format, commit_max_age)) {
-                SWARN(silent, s,
+                SWARN(silent, s, "sh-2022",
-                      "sh-2022: reiserfs_fill_super: unable to initialize journal space");
+                      "unable to initialize journal space");
                goto error;
        } else {
                jinit_done = 1; /* once this is set, journal_release must be called
@@ -1715,8 +1728,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
                                 */
        }
        if (reread_meta_blocks(s)) {
-                SWARN(silent, s,
+                SWARN(silent, s, "jmacd-9",
-                      "jmacd-9: reiserfs_fill_super: unable to reread meta blocks after journal init");
+                      "unable to reread meta blocks after journal init");
                goto error;
        }
@@ -1724,8 +1737,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
                goto error;
        if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) {
-                SWARN(silent, s,
+                SWARN(silent, s, "clm-7000",
-                      "clm-7000: Detected readonly device, marking FS readonly");
+                      "Detected readonly device, marking FS readonly");
                s->s_flags |= MS_RDONLY;
        }
        args.objectid = REISERFS_ROOT_OBJECTID;
@@ -1734,8 +1747,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
            iget5_locked(s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor,
                         reiserfs_init_locked_inode, (void *)(&args));
        if (!root_inode) {
-                SWARN(silent, s,
+                SWARN(silent, s, "jmacd-10", "get root inode failed");
-                      "jmacd-10: reiserfs_fill_super: get root inode failed");
                goto error;
        }
@@ -1784,7 +1796,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
                 * avoiding corruption. -jeffm */
                if (bmap_would_wrap(reiserfs_bmap_count(s)) &&
                    sb_bmap_nr(rs) != 0) {
-                        reiserfs_warning(s, "super-2030: This file system "
+                        reiserfs_warning(s, "super-2030", "This file system "
                                        "claims to use %u bitmap blocks in "
                                        "its super block, but requires %u. "
                                        "Clearing to zero.", sb_bmap_nr(rs),
@@ -1817,7 +1829,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
                        } else if (!silent) {
                                reiserfs_info(s, "using 3.5.x disk format\n");
                        }
-                }
+                } else
+                        set_sb_mnt_count(rs, sb_mnt_count(rs) + 1);
                journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
                errval = journal_end(&th, s, 1);
@@ -2031,8 +2045,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
        if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) {
                err = reiserfs_unpack(inode, NULL);
                if (err) {
-                        reiserfs_warning(sb,
+                        reiserfs_warning(sb, "super-6520",
-                                "reiserfs: Unpacking tail of quota file failed"
+                                "Unpacking tail of quota file failed"
                                " (%d). Cannot turn on quotas.", err);
                        err = -EINVAL;
                        goto out;
@@ -2043,8 +2057,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
        if (REISERFS_SB(sb)->s_qf_names[type]) {
                /* Quotafile not of fs root? */
                if (path.dentry->d_parent != sb->s_root)
-                        reiserfs_warning(sb,
+                        reiserfs_warning(sb, "super-6521",
-                                 "reiserfs: Quota file not on filesystem root. "
+                                 "Quota file not on filesystem root. "
                                 "Journalled quota will not work.");
        }
@@ -2195,9 +2209,6 @@ static int __init init_reiserfs_fs(void)
                return ret;
        }
-        if ((ret = reiserfs_xattr_register_handlers()))
-                goto failed_reiserfs_xattr_register_handlers;
        reiserfs_proc_info_global_init();
        reiserfs_proc_register_global("version",
                                      reiserfs_global_version_in_proc);
@@ -2208,9 +2219,6 @@ static int __init init_reiserfs_fs(void)
                return 0;
        }
-        reiserfs_xattr_unregister_handlers();
-      failed_reiserfs_xattr_register_handlers:
        reiserfs_proc_unregister_global("version");
        reiserfs_proc_info_global_done();
        destroy_inodecache();
@@ -2220,7 +2228,6 @@ static int __init init_reiserfs_fs(void)
 static void __exit exit_reiserfs_fs(void)
 {
-        reiserfs_xattr_unregister_handlers();
        reiserfs_proc_unregister_global("version");
        reiserfs_proc_info_global_done();
        unregister_filesystem(&reiserfs_fs_type);
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index f8121a1147e8..d7f6e51bef2a 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c
@@ -26,7 +26,7 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
                                   converted item. */
        struct item_head ind_ih;        /* new indirect item to be inserted or
                                           key of unfm pointer to be pasted */
-        int n_blk_size, n_retval;       /* returned value for reiserfs_insert_item and clones */
+        int blk_size, retval;   /* returned value for reiserfs_insert_item and clones */
        unp_t unfm_ptr;         /* Handle on an unformatted node
                                   that will be inserted in the
                                   tree. */
@@ -35,7 +35,7 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
        REISERFS_SB(sb)->s_direct2indirect++;
-        n_blk_size = sb->s_blocksize;
+        blk_size = sb->s_blocksize;
        /* and key to search for append or insert pointer to the new
           unformatted node. */
@@ -46,11 +46,11 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
        /* Set the key to search for the place for new unfm pointer */
        make_cpu_key(&end_key, inode, tail_offset, TYPE_INDIRECT, 4);
-        // FIXME: we could avoid this 
+        /* FIXME: we could avoid this */
        if (search_for_position_by_key(sb, &end_key, path) == POSITION_FOUND) {
-                reiserfs_warning(sb, "PAP-14030: direct2indirect: "
+                reiserfs_error(sb, "PAP-14030",
-                                 "pasted or inserted byte exists in the tree %K. "
+                               "pasted or inserted byte exists in "
-                                 "Use fsck to repair.", &end_key);
+                               "the tree %K. Use fsck to repair.", &end_key);
                pathrelse(path);
                return -EIO;
        }
@@ -64,17 +64,17 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
                set_ih_free_space(&ind_ih, 0);  /* delete at nearest future */
                put_ih_item_len(&ind_ih, UNFM_P_SIZE);
                PATH_LAST_POSITION(path)++;
-                n_retval =
+                retval =
                    reiserfs_insert_item(th, path, &end_key, &ind_ih, inode,
                                         (char *)&unfm_ptr);
        } else {
                /* Paste into last indirect item of an object. */
-                n_retval = reiserfs_paste_into_item(th, path, &end_key, inode,
+                retval = reiserfs_paste_into_item(th, path, &end_key, inode,
                                                    (char *)&unfm_ptr,
                                                    UNFM_P_SIZE);
        }
-        if (n_retval) {
+        if (retval) {
-                return n_retval;
+                return retval;
        }
        // note: from here there are two keys which have matching first
        // three key components. They only differ by the fourth one.
@@ -92,14 +92,13 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
                   last item of the file */
                if (search_for_position_by_key(sb, &end_key, path) ==
                    POSITION_FOUND)
-                        reiserfs_panic(sb,
+                        reiserfs_panic(sb, "PAP-14050",
-                                       "PAP-14050: direct2indirect: "
                                       "direct item (%K) not found", &end_key);
                p_le_ih = PATH_PITEM_HEAD(path);
                RFALSE(!is_direct_le_ih(p_le_ih),
                       "vs-14055: direct item expected(%K), found %h",
                       &end_key, p_le_ih);
-                tail_size = (le_ih_k_offset(p_le_ih) & (n_blk_size - 1))
+                tail_size = (le_ih_k_offset(p_le_ih) & (blk_size - 1))
                    + ih_item_len(p_le_ih) - 1;
                /* we only send the unbh pointer if the buffer is not up to date.
@@ -114,11 +113,11 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
                } else {
                        up_to_date_bh = unbh;
                }
-                n_retval = reiserfs_delete_item(th, path, &end_key, inode,
+                retval = reiserfs_delete_item(th, path, &end_key, inode,
                                                up_to_date_bh);
-                total_tail += n_retval;
+                total_tail += retval;
-                if (tail_size == n_retval)
+                if (tail_size == retval)
                        // done: file does not have direct items anymore
                        break;
@@ -130,7 +129,7 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
                unsigned pgoff =
                    (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1);
                char *kaddr = kmap_atomic(up_to_date_bh->b_page, KM_USER0);
-                memset(kaddr + pgoff, 0, n_blk_size - total_tail);
+                memset(kaddr + pgoff, 0, blk_size - total_tail);
                kunmap_atomic(kaddr, KM_USER0);
        }
@@ -171,14 +170,18 @@ void reiserfs_unmap_buffer(struct buffer_head *bh)
   what we expect from it (number of cut bytes). But when tail remains
   in the unformatted node, we set mode to SKIP_BALANCING and unlock
   inode */
-int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_inode, struct page *page, struct treepath *p_s_path,      /* path to the indirect item. */
+int indirect2direct(struct reiserfs_transaction_handle *th,
-                    const struct cpu_key *p_s_item_key, /* Key to look for unformatted node pointer to be cut. */
+                    struct inode *inode, struct page *page,
+                    struct treepath *path,      /* path to the indirect item. */
+                    const struct cpu_key *item_key,     /* Key to look for
+                                                         * unformatted node
+                                                         * pointer to be cut. */
                    loff_t n_new_file_size,     /* New file size. */
-                    char *p_c_mode)
+                    char *mode)
 {
-        struct super_block *p_s_sb = p_s_inode->i_sb;
+        struct super_block *sb = inode->i_sb;
        struct item_head s_ih;
-        unsigned long n_block_size = p_s_sb->s_blocksize;
+        unsigned long block_size = sb->s_blocksize;
        char *tail;
        int tail_len, round_tail_len;
        loff_t pos, pos1;       /* position of first byte of the tail */
@@ -186,22 +189,22 @@ int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_in
        BUG_ON(!th->t_trans_id);
-        REISERFS_SB(p_s_sb)->s_indirect2direct++;
+        REISERFS_SB(sb)->s_indirect2direct++;
-        *p_c_mode = M_SKIP_BALANCING;
+        *mode = M_SKIP_BALANCING;
        /* store item head path points to. */
-        copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
+        copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
-        tail_len = (n_new_file_size & (n_block_size - 1));
+        tail_len = (n_new_file_size & (block_size - 1));
-        if (get_inode_sd_version(p_s_inode) == STAT_DATA_V2)
+        if (get_inode_sd_version(inode) == STAT_DATA_V2)
                round_tail_len = ROUND_UP(tail_len);
        else
                round_tail_len = tail_len;
        pos =
            le_ih_k_offset(&s_ih) - 1 + (ih_item_len(&s_ih) / UNFM_P_SIZE -
-                                         1) * p_s_sb->s_blocksize;
+                                         1) * sb->s_blocksize;
        pos1 = pos;
        // we are protected by i_mutex. The tail can not disapper, not
@@ -210,27 +213,26 @@ int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_in
        tail = (char *)kmap(page);      /* this can schedule */
-        if (path_changed(&s_ih, p_s_path)) {
+        if (path_changed(&s_ih, path)) {
                /* re-search indirect item */
-                if (search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path)
+                if (search_for_position_by_key(sb, item_key, path)
                    == POSITION_NOT_FOUND)
-                        reiserfs_panic(p_s_sb,
+                        reiserfs_panic(sb, "PAP-5520",
-                                       "PAP-5520: indirect2direct: "
                                       "item to be converted %K does not exist",
-                                       p_s_item_key);
+                                       item_key);
-                copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
+                copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
 #ifdef CONFIG_REISERFS_CHECK
                pos = le_ih_k_offset(&s_ih) - 1 +
                    (ih_item_len(&s_ih) / UNFM_P_SIZE -
-                     1) * p_s_sb->s_blocksize;
+                     1) * sb->s_blocksize;
                if (pos != pos1)
-                        reiserfs_panic(p_s_sb, "vs-5530: indirect2direct: "
+                        reiserfs_panic(sb, "vs-5530", "tail position "
-                                       "tail position changed while we were reading it");
+                                       "changed while we were reading it");
 #endif
        }
        /* Set direct item header to insert. */
-        make_le_item_head(&s_ih, NULL, get_inode_item_key_version(p_s_inode),
+        make_le_item_head(&s_ih, NULL, get_inode_item_key_version(inode),
                          pos1 + 1, TYPE_DIRECT, round_tail_len,
                          0xffff /*ih_free_space */ );
@@ -240,13 +242,13 @@ int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_in
         */
        tail = tail + (pos & (PAGE_CACHE_SIZE - 1));
-        PATH_LAST_POSITION(p_s_path)++;
+        PATH_LAST_POSITION(path)++;
-        key = *p_s_item_key;
+        key = *item_key;
        set_cpu_key_k_type(&key, TYPE_DIRECT);
        key.key_length = 4;
        /* Insert tail as new direct item in the tree */
-        if (reiserfs_insert_item(th, p_s_path, &key, &s_ih, p_s_inode,
+        if (reiserfs_insert_item(th, path, &key, &s_ih, inode,
                                 tail ? tail : NULL) < 0) {
                /* No disk memory. So we can not convert last unformatted node
                   to the direct item.  In this case we used to adjust
@@ -255,12 +257,12 @@ int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_in
                   unformatted node. For now i_size is considered as guard for
                   going out of file size */
                kunmap(page);
-                return n_block_size - round_tail_len;
+                return block_size - round_tail_len;
        }
        kunmap(page);
        /* make sure to get the i_blocks changes from reiserfs_insert_item */
-        reiserfs_update_sd(th, p_s_inode);
+        reiserfs_update_sd(th, inode);
        // note: we have now the same as in above direct2indirect
        // conversion: there are two keys which have matching first three
@@ -268,11 +270,11 @@ int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_in
        /* We have inserted new direct item and must remove last
           unformatted node. */
-        *p_c_mode = M_CUT;
+        *mode = M_CUT;
        /* we store position of first direct item in the in-core inode */
-        //mark_file_with_tail (p_s_inode, pos1 + 1);
+        /* mark_file_with_tail (inode, pos1 + 1); */
-        REISERFS_I(p_s_inode)->i_first_direct_byte = pos1 + 1;
+        REISERFS_I(inode)->i_first_direct_byte = pos1 + 1;
-        return n_block_size - round_tail_len;
+        return block_size - round_tail_len;
 }
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index ae881ccd2f03..f83f52bae390 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -27,6 +27,10 @@
 * these are special cases for filesystem ACLs, they are interpreted by the
 * kernel, in addition, they are negatively and positively cached and attached
 * to the inode so that unnecessary lookups are avoided.
+ *
+ * Locking works like so:
+ * Directory components (xattr root, xattr dir) are protectd by their i_mutex.
+ * The xattrs themselves are protected by the xattr_sem.
 */
 #include <linux/reiserfs_fs.h>
@@ -44,328 +48,334 @@
 #include <net/checksum.h>
 #include <linux/smp_lock.h>
 #include <linux/stat.h>
+#include <linux/quotaops.h>
-#define FL_READONLY 128
-#define FL_DIR_SEM_HELD 256
 #define PRIVROOT_NAME ".reiserfs_priv"
 #define XAROOT_NAME   "xattrs"
-static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char
-                                                                *prefix);
-/* Returns the dentry referring to the root of the extended attribute
+/* Helpers for inode ops. We do this so that we don't have all the VFS
- * directory tree. If it has already been retrieved, it is used. If it
+ * overhead and also for proper i_mutex annotation.
- * hasn't been created and the flags indicate creation is allowed, we
+ * dir->i_mutex must be held for all of them. */
- * attempt to create it. On error, we return a pointer-encoded error.
+#ifdef CONFIG_REISERFS_FS_XATTR
- */
+static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
-static struct dentry *get_xa_root(struct super_block *sb, int flags)
 {
-        struct dentry *privroot = dget(REISERFS_SB(sb)->priv_root);
+        BUG_ON(!mutex_is_locked(&dir->i_mutex));
-        struct dentry *xaroot;
+        vfs_dq_init(dir);
+        return dir->i_op->create(dir, dentry, mode, NULL);
+}
+#endif
-        /* This needs to be created at mount-time */
+static int xattr_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-        if (!privroot)
+{
-                return ERR_PTR(-ENODATA);
+        BUG_ON(!mutex_is_locked(&dir->i_mutex));
+        vfs_dq_init(dir);
+        return dir->i_op->mkdir(dir, dentry, mode);
+}
-        mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR);
+/* We use I_MUTEX_CHILD here to silence lockdep. It's safe because xattr
-        if (REISERFS_SB(sb)->xattr_root) {
+ * mutation ops aren't called during rename or splace, which are the
-                xaroot = dget(REISERFS_SB(sb)->xattr_root);
+ * only other users of I_MUTEX_CHILD. It violates the ordering, but that's
-                goto out;
+ * better than allocating another subclass just for this code. */
-        }
+static int xattr_unlink(struct inode *dir, struct dentry *dentry)
+{
+        int error;
+        BUG_ON(!mutex_is_locked(&dir->i_mutex));
+        vfs_dq_init(dir);
-        xaroot = lookup_one_len(XAROOT_NAME, privroot, strlen(XAROOT_NAME));
+        mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
-        if (IS_ERR(xaroot)) {
+        error = dir->i_op->unlink(dir, dentry);
-                goto out;
+        mutex_unlock(&dentry->d_inode->i_mutex);
-        } else if (!xaroot->d_inode) {
+        if (!error)
+                d_delete(dentry);
+        return error;
+}
+static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
+{
+        int error;
+        BUG_ON(!mutex_is_locked(&dir->i_mutex));
+        vfs_dq_init(dir);
+        mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+        dentry_unhash(dentry);
+        error = dir->i_op->rmdir(dir, dentry);
+        if (!error)
+                dentry->d_inode->i_flags |= S_DEAD;
+        mutex_unlock(&dentry->d_inode->i_mutex);
+        if (!error)
+                d_delete(dentry);
+        dput(dentry);
+        return error;
+}
+#define xattr_may_create(flags) (!flags || flags & XATTR_CREATE)
+/* Returns and possibly creates the xattr dir. */
+static struct dentry *lookup_or_create_dir(struct dentry *parent,
+                                            const char *name, int flags)
+{
+        struct dentry *dentry;
+        BUG_ON(!parent);
+        dentry = lookup_one_len(name, parent, strlen(name));
+        if (IS_ERR(dentry))
+                return dentry;
+        else if (!dentry->d_inode) {
                int err = -ENODATA;
-                if (flags == 0 || flags & XATTR_CREATE)
-                        err = privroot->d_inode->i_op->mkdir(privroot->d_inode,
+                if (xattr_may_create(flags)) {
-                                                             xaroot, 0700);
+                        mutex_lock_nested(&parent->d_inode->i_mutex,
+                                          I_MUTEX_XATTR);
+                        err = xattr_mkdir(parent->d_inode, dentry, 0700);
+                        mutex_unlock(&parent->d_inode->i_mutex);
+                }
                if (err) {
-                        dput(xaroot);
+                        dput(dentry);
-                        xaroot = ERR_PTR(err);
+                        dentry = ERR_PTR(err);
-                        goto out;
                }
        }
-        REISERFS_SB(sb)->xattr_root = dget(xaroot);
-      out:
+        return dentry;
-        mutex_unlock(&privroot->d_inode->i_mutex);
+}
-        dput(privroot);
-        return xaroot;
+static struct dentry *open_xa_root(struct super_block *sb, int flags)
+{
+        struct dentry *privroot = REISERFS_SB(sb)->priv_root;
+        if (!privroot)
+                return ERR_PTR(-ENODATA);
+        return lookup_or_create_dir(privroot, XAROOT_NAME, flags);
 }
-/* Opens the directory corresponding to the inode's extended attribute store.
- * If flags allow, the tree to the directory may be created. If creation is
- * prohibited, -ENODATA is returned. */
 static struct dentry *open_xa_dir(const struct inode *inode, int flags)
 {
        struct dentry *xaroot, *xadir;
        char namebuf[17];
-        xaroot = get_xa_root(inode->i_sb, flags);
+        xaroot = open_xa_root(inode->i_sb, flags);
        if (IS_ERR(xaroot))
                return xaroot;
-        /* ok, we have xaroot open */
        snprintf(namebuf, sizeof(namebuf), "%X.%X",
                 le32_to_cpu(INODE_PKEY(inode)->k_objectid),
                 inode->i_generation);
-        xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
-        if (IS_ERR(xadir)) {
-                dput(xaroot);
-                return xadir;
-        }
-        if (!xadir->d_inode) {
-                int err;
-                if (flags == 0 || flags & XATTR_CREATE) {
-                        /* Although there is nothing else trying to create this directory,
-                         * another directory with the same hash may be created, so we need
-                         * to protect against that */
-                        err =
-                            xaroot->d_inode->i_op->mkdir(xaroot->d_inode, xadir,
-                                                         0700);
-                        if (err) {
-                                dput(xaroot);
-                                dput(xadir);
-                                return ERR_PTR(err);
-                        }
-                }
-                if (!xadir->d_inode) {
-                        dput(xaroot);
-                        dput(xadir);
-                        return ERR_PTR(-ENODATA);
-                }
-        }
+        xadir = lookup_or_create_dir(xaroot, namebuf, flags);
        dput(xaroot);
        return xadir;
 }
-/* Returns a dentry corresponding to a specific extended attribute file
+/* The following are side effects of other operations that aren't explicitly
- * for the inode. If flags allow, the file is created. Otherwise, a
+ * modifying extended attributes. This includes operations such as permissions
- * valid or negative dentry, or an error is returned. */
+ * or ownership changes, object deletions, etc. */
-static struct dentry *get_xa_file_dentry(const struct inode *inode,
+struct reiserfs_dentry_buf {
-                                         const char *name, int flags)
+        struct dentry *xadir;
-{
+        int count;
-        struct dentry *xadir, *xafile;
+        struct dentry *dentries[8];
-        int err = 0;
+};
-        xadir = open_xa_dir(inode, flags);
+static int
-        if (IS_ERR(xadir)) {
+fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset,
-                return ERR_CAST(xadir);
+                    u64 ino, unsigned int d_type)
-        } else if (!xadir->d_inode) {
+{
-                dput(xadir);
+        struct reiserfs_dentry_buf *dbuf = buf;
-                return ERR_PTR(-ENODATA);
+        struct dentry *dentry;
-        }
-        xafile = lookup_one_len(name, xadir, strlen(name));
+        if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
-        if (IS_ERR(xafile)) {
+                return -ENOSPC;
-                dput(xadir);
-                return ERR_CAST(xafile);
-        }
-        if (xafile->d_inode) {  /* file exists */
+        if (name[0] == '.' && (name[1] == '\0' ||
-                if (flags & XATTR_CREATE) {
+                               (name[1] == '.' && name[2] == '\0')))
-                        err = -EEXIST;
+                return 0;
-                        dput(xafile);
-                        goto out;
-                }
-        } else if (flags & XATTR_REPLACE || flags & FL_READONLY) {
-                goto out;
-        } else {
-                /* inode->i_mutex is down, so nothing else can try to create
-                 * the same xattr */
-                err = xadir->d_inode->i_op->create(xadir->d_inode, xafile,
-                                                   0700 | S_IFREG, NULL);
-                if (err) {
+        dentry = lookup_one_len(name, dbuf->xadir, namelen);
-                        dput(xafile);
+        if (IS_ERR(dentry)) {
-                        goto out;
+                return PTR_ERR(dentry);
-                }
+        } else if (!dentry->d_inode) {
+                /* A directory entry exists, but no file? */
+                reiserfs_error(dentry->d_sb, "xattr-20003",
+                               "Corrupted directory: xattr %s listed but "
+                               "not found for file %s.\n",
+                               dentry->d_name.name, dbuf->xadir->d_name.name);
+                dput(dentry);
+                return -EIO;
        }
-      out:
+        dbuf->dentries[dbuf->count++] = dentry;
-        dput(xadir);
+        return 0;
-        if (err)
-                xafile = ERR_PTR(err);
-        else if (!xafile->d_inode) {
-                dput(xafile);
-                xafile = ERR_PTR(-ENODATA);
-        }
-        return xafile;
 }
-/*
+static void
- * this is very similar to fs/reiserfs/dir.c:reiserfs_readdir, but
+cleanup_dentry_buf(struct reiserfs_dentry_buf *buf)
- * we need to drop the path before calling the filldir struct.  That
- * would be a big performance hit to the non-xattr case, so I've copied
- * the whole thing for now. --clm
- *
- * the big difference is that I go backwards through the directory,
- * and don't mess with f->f_pos, but the idea is the same.  Do some
- * action on each and every entry in the directory.
- *
- * we're called with i_mutex held, so there are no worries about the directory
- * changing underneath us.
- */
-static int __xattr_readdir(struct inode *inode, void *dirent, filldir_t filldir)
 {
-        struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */
+        int i;
-        INITIALIZE_PATH(path_to_entry);
+        for (i = 0; i < buf->count; i++)
-        struct buffer_head *bh;
+                if (buf->dentries[i])
-        int entry_num;
+                        dput(buf->dentries[i]);
-        struct item_head *ih, tmp_ih;
+}
-        int search_res;
-        char *local_buf;
+static int reiserfs_for_each_xattr(struct inode *inode,
-        loff_t next_pos;
+                                   int (*action)(struct dentry *, void *),
-        char small_buf[32];     /* avoid kmalloc if we can */
+                                   void *data)
-        struct reiserfs_de_head *deh;
+{
-        int d_reclen;
+        struct dentry *dir;
-        char *d_name;
+        int i, err = 0;
-        off_t d_off;
+        loff_t pos = 0;
-        ino_t d_ino;
+        struct reiserfs_dentry_buf buf = {
-        struct reiserfs_dir_entry de;
+                .count = 0,
+        };
-        /* form key for search the next directory entry using f_pos field of
-           file structure */
-        next_pos = max_reiserfs_offset(inode);
-        while (1) {
-              research:
-                if (next_pos <= DOT_DOT_OFFSET)
-                        break;
-                make_cpu_key(&pos_key, inode, next_pos, TYPE_DIRENTRY, 3);
-                search_res =
-                    search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry,
-                                        &de);
-                if (search_res == IO_ERROR) {
-                        // FIXME: we could just skip part of directory which could
-                        // not be read
-                        pathrelse(&path_to_entry);
-                        return -EIO;
-                }
-                if (search_res == NAME_NOT_FOUND)
+        /* Skip out, an xattr has no xattrs associated with it */
-                        de.de_entry_num--;
+        if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1)
+                return 0;
-                set_de_name_and_namelen(&de);
+        dir = open_xa_dir(inode, XATTR_REPLACE);
-                entry_num = de.de_entry_num;
+        if (IS_ERR(dir)) {
-                deh = &(de.de_deh[entry_num]);
+                err = PTR_ERR(dir);
+                goto out;
+        } else if (!dir->d_inode) {
+                err = 0;
+                goto out_dir;
+        }
-                bh = de.de_bh;
+        mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
-                ih = de.de_ih;
+        buf.xadir = dir;
+        err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos);
+        while ((err == 0 || err == -ENOSPC) && buf.count) {
+                err = 0;
-                if (!is_direntry_le_ih(ih)) {
+                for (i = 0; i < buf.count && buf.dentries[i]; i++) {
-                        reiserfs_warning(inode->i_sb, "not direntry %h", ih);
+                        int lerr = 0;
-                        break;
+                        struct dentry *dentry = buf.dentries[i];
-                }
-                copy_item_head(&tmp_ih, ih);
-                /* we must have found item, that is item of this directory, */
+                        if (err == 0 && !S_ISDIR(dentry->d_inode->i_mode))
-                RFALSE(COMP_SHORT_KEYS(&(ih->ih_key), &pos_key),
+                                lerr = action(dentry, data);
-                       "vs-9000: found item %h does not match to dir we readdir %K",
-                       ih, &pos_key);
-                if (deh_offset(deh) <= DOT_DOT_OFFSET) {
+                        dput(dentry);
-                        break;
+                        buf.dentries[i] = NULL;
+                        err = lerr ?: err;
                }
+                buf.count = 0;
+                if (!err)
+                        err = reiserfs_readdir_dentry(dir, &buf,
+                                                      fill_with_dentries, &pos);
+        }
+        mutex_unlock(&dir->d_inode->i_mutex);
-                /* look for the previous entry in the directory */
+        /* Clean up after a failed readdir */
-                next_pos = deh_offset(deh) - 1;
+        cleanup_dentry_buf(&buf);
-                if (!de_visible(deh))
-                        /* it is hidden entry */
-                        continue;
-                d_reclen = entry_length(bh, ih, entry_num);
+        if (!err) {
-                d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh);
+                /* We start a transaction here to avoid a ABBA situation
-                d_off = deh_offset(deh);
+                 * between the xattr root's i_mutex and the journal lock.
-                d_ino = deh_objectid(deh);
+                 * This doesn't incur much additional overhead since the
+                 * new transaction will just nest inside the
+                 * outer transaction. */
+                int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 +
+                             4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
+                struct reiserfs_transaction_handle th;
+                err = journal_begin(&th, inode->i_sb, blocks);
+                if (!err) {
+                        int jerror;
+                        mutex_lock_nested(&dir->d_parent->d_inode->i_mutex,
+                                          I_MUTEX_XATTR);
+                        err = action(dir, data);
+                        jerror = journal_end(&th, inode->i_sb, blocks);
+                        mutex_unlock(&dir->d_parent->d_inode->i_mutex);
+                        err = jerror ?: err;
+                }
+        }
+out_dir:
+        dput(dir);
+out:
+        /* -ENODATA isn't an error */
+        if (err == -ENODATA)
+                err = 0;
+        return err;
+}
-                if (!d_name[d_reclen - 1])
+static int delete_one_xattr(struct dentry *dentry, void *data)
-                        d_reclen = strlen(d_name);
+{
+        struct inode *dir = dentry->d_parent->d_inode;
-                if (d_reclen > REISERFS_MAX_NAME(inode->i_sb->s_blocksize)) {
+        /* This is the xattr dir, handle specially. */
-                        /* too big to send back to VFS */
+        if (S_ISDIR(dentry->d_inode->i_mode))
-                        continue;
+                return xattr_rmdir(dir, dentry);
-                }
-                /* Ignore the .reiserfs_priv entry */
+        return xattr_unlink(dir, dentry);
-                if (reiserfs_xattrs(inode->i_sb) &&
+}
-                    !old_format_only(inode->i_sb) &&
-                    deh_objectid(deh) ==
-                    le32_to_cpu(INODE_PKEY
-                                (REISERFS_SB(inode->i_sb)->priv_root->d_inode)->
-                                k_objectid))
-                        continue;
-                if (d_reclen <= 32) {
-                        local_buf = small_buf;
-                } else {
-                        local_buf = kmalloc(d_reclen, GFP_NOFS);
-                        if (!local_buf) {
-                                pathrelse(&path_to_entry);
-                                return -ENOMEM;
-                        }
-                        if (item_moved(&tmp_ih, &path_to_entry)) {
-                                kfree(local_buf);
-                                /* sigh, must retry.  Do this same offset again */
+static int chown_one_xattr(struct dentry *dentry, void *data)
-                                next_pos = d_off;
+{
-                                goto research;
+        struct iattr *attrs = data;
-                        }
+        return reiserfs_setattr(dentry, attrs);
-                }
+}
-                // Note, that we copy name to user space via temporary
+/* No i_mutex, but the inode is unconnected. */
-                // buffer (local_buf) because filldir will block if
+int reiserfs_delete_xattrs(struct inode *inode)
-                // user space buffer is swapped out. At that time
+{
-                // entry can move to somewhere else
+        int err = reiserfs_for_each_xattr(inode, delete_one_xattr, NULL);
-                memcpy(local_buf, d_name, d_reclen);
+        if (err)
+                reiserfs_warning(inode->i_sb, "jdm-20004",
-                /* the filldir function might need to start transactions,
+                                 "Couldn't delete all xattrs (%d)\n", err);
-                 * or do who knows what.  Release the path now that we've
+        return err;
-                 * copied all the important stuff out of the deh
+}
-                 */
-                pathrelse(&path_to_entry);
-                if (filldir(dirent, local_buf, d_reclen, d_off, d_ino,
-                            DT_UNKNOWN) < 0) {
-                        if (local_buf != small_buf) {
-                                kfree(local_buf);
-                        }
-                        goto end;
-                }
-                if (local_buf != small_buf) {
-                        kfree(local_buf);
-                }
-        }                       /* while */
-      end:
+/* inode->i_mutex: down */
-        pathrelse(&path_to_entry);
+int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
-        return 0;
+{
+        int err = reiserfs_for_each_xattr(inode, chown_one_xattr, attrs);
+        if (err)
+                reiserfs_warning(inode->i_sb, "jdm-20007",
+                                 "Couldn't chown all xattrs (%d)\n", err);
+        return err;
 }
-/*
+#ifdef CONFIG_REISERFS_FS_XATTR
- * this could be done with dedicated readdir ops for the xattr files,
+/* Returns a dentry corresponding to a specific extended attribute file
- * but I want to get something working asap
+ * for the inode. If flags allow, the file is created. Otherwise, a
- * this is stolen from vfs_readdir
+ * valid or negative dentry, or an error is returned. */
- *
+static struct dentry *xattr_lookup(struct inode *inode, const char *name,
- */
+                                    int flags)
-static
-int xattr_readdir(struct inode *inode, filldir_t filler, void *buf)
 {
-        int res = -ENOENT;
+        struct dentry *xadir, *xafile;
-        mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR);
+        int err = 0;
-        if (!IS_DEADDIR(inode)) {
-                lock_kernel();
+        xadir = open_xa_dir(inode, flags);
-                res = __xattr_readdir(inode, buf, filler);
+        if (IS_ERR(xadir))
-                unlock_kernel();
+                return ERR_CAST(xadir);
+        xafile = lookup_one_len(name, xadir, strlen(name));
+        if (IS_ERR(xafile)) {
+                err = PTR_ERR(xafile);
+                goto out;
        }
-        mutex_unlock(&inode->i_mutex);
-        return res;
+        if (xafile->d_inode && (flags & XATTR_CREATE))
+                err = -EEXIST;
+        if (!xafile->d_inode) {
+                err = -ENODATA;
+                if (xattr_may_create(flags)) {
+                        mutex_lock_nested(&xadir->d_inode->i_mutex,
+                                          I_MUTEX_XATTR);
+                        err = xattr_create(xadir->d_inode, xafile,
+                                              0700|S_IFREG);
+                        mutex_unlock(&xadir->d_inode->i_mutex);
+                }
+        }
+        if (err)
+                dput(xafile);
+out:
+        dput(xadir);
+        if (err)
+                return ERR_PTR(err);
+        return xafile;
 }
 /* Internal operations on file data */
@@ -375,14 +385,14 @@ static inline void reiserfs_put_page(struct page *page)
        page_cache_release(page);
 }
-static struct page *reiserfs_get_page(struct inode *dir, unsigned long n)
+static struct page *reiserfs_get_page(struct inode *dir, size_t n)
 {
        struct address_space *mapping = dir->i_mapping;
        struct page *page;
        /* We can deadlock if we try to free dentries,
           and an unlink/rmdir has just occured - GFP_NOFS avoids this */
        mapping_set_gfp_mask(mapping, GFP_NOFS);
-        page = read_mapping_page(mapping, n, NULL);
+        page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL);
        if (!IS_ERR(page)) {
                kmap(page);
                if (PageError(page))
@@ -405,6 +415,45 @@ int reiserfs_commit_write(struct file *f, struct page *page,
 int reiserfs_prepare_write(struct file *f, struct page *page,
                           unsigned from, unsigned to);
+static void update_ctime(struct inode *inode)
+{
+        struct timespec now = current_fs_time(inode->i_sb);
+        if (hlist_unhashed(&inode->i_hash) || !inode->i_nlink ||
+            timespec_equal(&inode->i_ctime, &now))
+                return;
+        inode->i_ctime = CURRENT_TIME_SEC;
+        mark_inode_dirty(inode);
+}
+static int lookup_and_delete_xattr(struct inode *inode, const char *name)
+{
+        int err = 0;
+        struct dentry *dentry, *xadir;
+        xadir = open_xa_dir(inode, XATTR_REPLACE);
+        if (IS_ERR(xadir))
+                return PTR_ERR(xadir);
+        dentry = lookup_one_len(name, xadir, strlen(name));
+        if (IS_ERR(dentry)) {
+                err = PTR_ERR(dentry);
+                goto out_dput;
+        }
+        if (dentry->d_inode) {
+                mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR);
+                err = xattr_unlink(xadir->d_inode, dentry);
+                mutex_unlock(&xadir->d_inode->i_mutex);
+                update_ctime(inode);
+        }
+        dput(dentry);
+out_dput:
+        dput(xadir);
+        return err;
+}
 /* Generic extended attribute operations that can be used by xa plugins */
@@ -412,58 +461,32 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
 * inode->i_mutex: down
 */
 int
-reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
+reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
-                   size_t buffer_size, int flags)
+                          struct inode *inode, const char *name,
+                          const void *buffer, size_t buffer_size, int flags)
 {
        int err = 0;
        struct dentry *dentry;
        struct page *page;
        char *data;
-        struct address_space *mapping;
        size_t file_pos = 0;
        size_t buffer_pos = 0;
-        struct inode *xinode;
+        size_t new_size;
-        struct iattr newattrs;
        __u32 xahash = 0;
        if (get_inode_sd_version(inode) == STAT_DATA_V1)
                return -EOPNOTSUPP;
-        /* Empty xattrs are ok, they're just empty files, no hash */
+        if (!buffer)
-        if (buffer && buffer_size)
+                return lookup_and_delete_xattr(inode, name);
-                xahash = xattr_hash(buffer, buffer_size);
-      open_file:
+        dentry = xattr_lookup(inode, name, flags);
-        dentry = get_xa_file_dentry(inode, name, flags);
+        if (IS_ERR(dentry))
-        if (IS_ERR(dentry)) {
+                return PTR_ERR(dentry);
-                err = PTR_ERR(dentry);
-                goto out;
-        }
-        xinode = dentry->d_inode;
-        REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
-        /* we need to copy it off.. */
+        down_write(&REISERFS_I(inode)->i_xattr_sem);
-        if (xinode->i_nlink > 1) {
-                dput(dentry);
-                err = reiserfs_xattr_del(inode, name);
-                if (err < 0)
-                        goto out;
-                /* We just killed the old one, we're not replacing anymore */
-                if (flags & XATTR_REPLACE)
-                        flags &= ~XATTR_REPLACE;
-                goto open_file;
-        }
-        /* Resize it so we're ok to write there */
+        xahash = xattr_hash(buffer, buffer_size);
-        newattrs.ia_size = buffer_size;
-        newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
-        mutex_lock_nested(&xinode->i_mutex, I_MUTEX_XATTR);
-        err = notify_change(dentry, &newattrs);
-        if (err)
-                goto out_filp;
-        mapping = xinode->i_mapping;
        while (buffer_pos < buffer_size || buffer_pos == 0) {
                size_t chunk;
                size_t skip = 0;
@@ -473,10 +496,10 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
                else
                        chunk = buffer_size - buffer_pos;
-                page = reiserfs_get_page(xinode, file_pos >> PAGE_CACHE_SHIFT);
+                page = reiserfs_get_page(dentry->d_inode, file_pos);
                if (IS_ERR(page)) {
                        err = PTR_ERR(page);
-                        goto out_filp;
+                        goto out_unlock;
                }
                lock_page(page);
@@ -510,28 +533,61 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
                        break;
        }
-        /* We can't mark the inode dirty if it's not hashed. This is the case
+        new_size = buffer_size + sizeof(struct reiserfs_xattr_header);
-         * when we're inheriting the default ACL. If we dirty it, the inode
+        if (!err && new_size < i_size_read(dentry->d_inode)) {
-         * gets marked dirty, but won't (ever) make it onto the dirty list until
+                struct iattr newattrs = {
-         * it's synced explicitly to clear I_DIRTY. This is bad. */
+                        .ia_ctime = current_fs_time(inode->i_sb),
-        if (!hlist_unhashed(&inode->i_hash)) {
+                        .ia_size = buffer_size,
-                inode->i_ctime = CURRENT_TIME_SEC;
+                        .ia_valid = ATTR_SIZE | ATTR_CTIME,
-                mark_inode_dirty(inode);
+                };
+                mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
+                down_write(&dentry->d_inode->i_alloc_sem);
+                err = reiserfs_setattr(dentry, &newattrs);
+                up_write(&dentry->d_inode->i_alloc_sem);
+                mutex_unlock(&dentry->d_inode->i_mutex);
+        } else
+                update_ctime(inode);
+out_unlock:
+        up_write(&REISERFS_I(inode)->i_xattr_sem);
+        dput(dentry);
+        return err;
+}
+/* We need to start a transaction to maintain lock ordering */
+int reiserfs_xattr_set(struct inode *inode, const char *name,
+                       const void *buffer, size_t buffer_size, int flags)
+{
+        struct reiserfs_transaction_handle th;
+        int error, error2;
+        size_t jbegin_count = reiserfs_xattr_nblocks(inode, buffer_size);
+        if (!(flags & XATTR_REPLACE))
+                jbegin_count += reiserfs_xattr_jcreate_nblocks(inode);
+        reiserfs_write_lock(inode->i_sb);
+        error = journal_begin(&th, inode->i_sb, jbegin_count);
+        if (error) {
+                reiserfs_write_unlock(inode->i_sb);
+                return error;
        }
-      out_filp:
+        error = reiserfs_xattr_set_handle(&th, inode, name,
-        mutex_unlock(&xinode->i_mutex);
+                                          buffer, buffer_size, flags);
-        dput(dentry);
-      out:
+        error2 = journal_end(&th, inode->i_sb, jbegin_count);
-        return err;
+        if (error == 0)
+                error = error2;
+        reiserfs_write_unlock(inode->i_sb);
+        return error;
 }
 /*
 * inode->i_mutex: down
 */
 int
-reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
+reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
                   size_t buffer_size)
 {
        ssize_t err = 0;
@@ -540,7 +596,6 @@ reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
        size_t file_pos = 0;
        size_t buffer_pos = 0;
        struct page *page;
-        struct inode *xinode;
        __u32 hash = 0;
        if (name == NULL)
@@ -551,25 +606,25 @@ reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
        if (get_inode_sd_version(inode) == STAT_DATA_V1)
                return -EOPNOTSUPP;
-        dentry = get_xa_file_dentry(inode, name, FL_READONLY);
+        dentry = xattr_lookup(inode, name, XATTR_REPLACE);
        if (IS_ERR(dentry)) {
                err = PTR_ERR(dentry);
                goto out;
        }
-        xinode = dentry->d_inode;
+        down_read(&REISERFS_I(inode)->i_xattr_sem);
-        isize = xinode->i_size;
-        REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
+        isize = i_size_read(dentry->d_inode);
        /* Just return the size needed */
        if (buffer == NULL) {
                err = isize - sizeof(struct reiserfs_xattr_header);
-                goto out_dput;
+                goto out_unlock;
        }
        if (buffer_size < isize - sizeof(struct reiserfs_xattr_header)) {
                err = -ERANGE;
-                goto out_dput;
+                goto out_unlock;
        }
        while (file_pos < isize) {
@@ -581,10 +636,10 @@ reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
                else
                        chunk = isize - file_pos;
-                page = reiserfs_get_page(xinode, file_pos >> PAGE_CACHE_SHIFT);
+                page = reiserfs_get_page(dentry->d_inode, file_pos);
                if (IS_ERR(page)) {
                        err = PTR_ERR(page);
-                        goto out_dput;
+                        goto out_unlock;
                }
                lock_page(page);
@@ -598,12 +653,12 @@ reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
                        if (rxh->h_magic != cpu_to_le32(REISERFS_XATTR_MAGIC)) {
                                unlock_page(page);
                                reiserfs_put_page(page);
-                                reiserfs_warning(inode->i_sb,
+                                reiserfs_warning(inode->i_sb, "jdm-20001",
                                                 "Invalid magic for xattr (%s) "
                                                 "associated with %k", name,
                                                 INODE_PKEY(inode));
                                err = -EIO;
-                                goto out_dput;
+                                goto out_unlock;
                        }
                        hash = le32_to_cpu(rxh->h_hash);
                }
@@ -618,256 +673,83 @@ reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
        if (xattr_hash(buffer, isize - sizeof(struct reiserfs_xattr_header)) !=
            hash) {
-                reiserfs_warning(inode->i_sb,
+                reiserfs_warning(inode->i_sb, "jdm-20002",
                                 "Invalid hash for xattr (%s) associated "
                                 "with %k", name, INODE_PKEY(inode));
                err = -EIO;
        }
-      out_dput:
+out_unlock:
+        up_read(&REISERFS_I(inode)->i_xattr_sem);
        dput(dentry);
-      out:
+out:
        return err;
 }
-static int
+/* Actual operations that are exported to VFS-land */
-__reiserfs_xattr_del(struct dentry *xadir, const char *name, int namelen)
+struct xattr_handler *reiserfs_xattr_handlers[] = {
-{
+        &reiserfs_xattr_user_handler,
-        struct dentry *dentry;
+        &reiserfs_xattr_trusted_handler,
-        struct inode *dir = xadir->d_inode;
+#ifdef CONFIG_REISERFS_FS_SECURITY
-        int err = 0;
+        &reiserfs_xattr_security_handler,
+#endif
-        dentry = lookup_one_len(name, xadir, namelen);
+#ifdef CONFIG_REISERFS_FS_POSIX_ACL
-        if (IS_ERR(dentry)) {
+        &reiserfs_posix_acl_access_handler,
-                err = PTR_ERR(dentry);
+        &reiserfs_posix_acl_default_handler,
-                goto out;
+#endif
-        } else if (!dentry->d_inode) {
+        NULL
-                err = -ENODATA;
-                goto out_file;
-        }
-        /* Skip directories.. */
-        if (S_ISDIR(dentry->d_inode->i_mode))
-                goto out_file;
-        if (!is_reiserfs_priv_object(dentry->d_inode)) {
-                reiserfs_warning(dir->i_sb, "OID %08x [%.*s/%.*s] doesn't have "
-                                 "priv flag set [parent is %sset].",
-                                 le32_to_cpu(INODE_PKEY(dentry->d_inode)->
-                                             k_objectid), xadir->d_name.len,
-                                 xadir->d_name.name, namelen, name,
-                                 is_reiserfs_priv_object(xadir->
-                                                         d_inode) ? "" :
-                                 "not ");
-                dput(dentry);
-                return -EIO;
-        }
-        err = dir->i_op->unlink(dir, dentry);
-        if (!err)
-                d_delete(dentry);
-      out_file:
-        dput(dentry);
-      out:
-        return err;
-}
-int reiserfs_xattr_del(struct inode *inode, const char *name)
-{
-        struct dentry *dir;
-        int err;
-        dir = open_xa_dir(inode, FL_READONLY);
-        if (IS_ERR(dir)) {
-                err = PTR_ERR(dir);
-                goto out;
-        }
-        err = __reiserfs_xattr_del(dir, name, strlen(name));
-        dput(dir);
-        if (!err) {
-                inode->i_ctime = CURRENT_TIME_SEC;
-                mark_inode_dirty(inode);
-        }
-      out:
-        return err;
-}
-/* The following are side effects of other operations that aren't explicitly
- * modifying extended attributes. This includes operations such as permissions
- * or ownership changes, object deletions, etc. */
-static int
-reiserfs_delete_xattrs_filler(void *buf, const char *name, int namelen,
-                              loff_t offset, u64 ino, unsigned int d_type)
-{
-        struct dentry *xadir = (struct dentry *)buf;
-        return __reiserfs_xattr_del(xadir, name, namelen);
-}
-/* This is called w/ inode->i_mutex downed */
-int reiserfs_delete_xattrs(struct inode *inode)
-{
-        struct dentry *dir, *root;
-        int err = 0;
-        /* Skip out, an xattr has no xattrs associated with it */
-        if (is_reiserfs_priv_object(inode) ||
-            get_inode_sd_version(inode) == STAT_DATA_V1 ||
-            !reiserfs_xattrs(inode->i_sb)) {
-                return 0;
-        }
-        reiserfs_read_lock_xattrs(inode->i_sb);
-        dir = open_xa_dir(inode, FL_READONLY);
-        reiserfs_read_unlock_xattrs(inode->i_sb);
-        if (IS_ERR(dir)) {
-                err = PTR_ERR(dir);
-                goto out;
-        } else if (!dir->d_inode) {
-                dput(dir);
-                return 0;
-        }
-        lock_kernel();
-        err = xattr_readdir(dir->d_inode, reiserfs_delete_xattrs_filler, dir);
-        if (err) {
-                unlock_kernel();
-                goto out_dir;
-        }
-        /* Leftovers besides . and .. -- that's not good. */
-        if (dir->d_inode->i_nlink <= 2) {
-                root = get_xa_root(inode->i_sb, XATTR_REPLACE);
-                reiserfs_write_lock_xattrs(inode->i_sb);
-                err = vfs_rmdir(root->d_inode, dir);
-                reiserfs_write_unlock_xattrs(inode->i_sb);
-                dput(root);
-        } else {
-                reiserfs_warning(inode->i_sb,
-                                 "Couldn't remove all entries in directory");
-        }
-        unlock_kernel();
-      out_dir:
-        dput(dir);
-      out:
-        if (!err)
-                REISERFS_I(inode)->i_flags =
-                    REISERFS_I(inode)->i_flags & ~i_has_xattr_dir;
-        return err;
-}
-struct reiserfs_chown_buf {
-        struct inode *inode;
-        struct dentry *xadir;
-        struct iattr *attrs;
 };
-/* XXX: If there is a better way to do this, I'd love to hear about it */
+/*
-static int
+ * In order to implement different sets of xattr operations for each xattr
-reiserfs_chown_xattrs_filler(void *buf, const char *name, int namelen,
+ * prefix with the generic xattr API, a filesystem should create a
-                             loff_t offset, u64 ino, unsigned int d_type)
+ * null-terminated array of struct xattr_handler (one for each prefix) and
-{
+ * hang a pointer to it off of the s_xattr field of the superblock.
-        struct reiserfs_chown_buf *chown_buf = (struct reiserfs_chown_buf *)buf;
+ *
-        struct dentry *xafile, *xadir = chown_buf->xadir;
+ * The generic_fooxattr() functions will use this list to dispatch xattr
-        struct iattr *attrs = chown_buf->attrs;
+ * operations to the correct xattr_handler.
-        int err = 0;
+ */
+#define for_each_xattr_handler(handlers, handler)               \
-        xafile = lookup_one_len(name, xadir, namelen);
+                for ((handler) = *(handlers)++;                 \
-        if (IS_ERR(xafile))
+                        (handler) != NULL;                      \
-                return PTR_ERR(xafile);
+                        (handler) = *(handlers)++)
-        else if (!xafile->d_inode) {
-                dput(xafile);
-                return -ENODATA;
-        }
-        if (!S_ISDIR(xafile->d_inode->i_mode))
-                err = notify_change(xafile, attrs);
-        dput(xafile);
-        return err;
-}
-int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
+/* This is the implementation for the xattr plugin infrastructure */
+static inline struct xattr_handler *
+find_xattr_handler_prefix(struct xattr_handler **handlers,
+                           const char *name)
 {
-        struct dentry *dir;
+        struct xattr_handler *xah;
-        int err = 0;
-        struct reiserfs_chown_buf buf;
-        unsigned int ia_valid = attrs->ia_valid;
-        /* Skip out, an xattr has no xattrs associated with it */
+        if (!handlers)
-        if (is_reiserfs_priv_object(inode) ||
+                return NULL;
-            get_inode_sd_version(inode) == STAT_DATA_V1 ||
-            !reiserfs_xattrs(inode->i_sb)) {
-                return 0;
-        }
-        reiserfs_read_lock_xattrs(inode->i_sb);
-        dir = open_xa_dir(inode, FL_READONLY);
-        reiserfs_read_unlock_xattrs(inode->i_sb);
-        if (IS_ERR(dir)) {
-                if (PTR_ERR(dir) != -ENODATA)
-                        err = PTR_ERR(dir);
-                goto out;
-        } else if (!dir->d_inode) {
-                dput(dir);
-                goto out;
-        }
-        lock_kernel();
+        for_each_xattr_handler(handlers, xah) {
+                if (strncmp(xah->prefix, name, strlen(xah->prefix)) == 0)
-        attrs->ia_valid &= (ATTR_UID | ATTR_GID | ATTR_CTIME);
+                        break;
-        buf.xadir = dir;
-        buf.attrs = attrs;
-        buf.inode = inode;
-        err = xattr_readdir(dir->d_inode, reiserfs_chown_xattrs_filler, &buf);
-        if (err) {
-                unlock_kernel();
-                goto out_dir;
        }
-        err = notify_change(dir, attrs);
+        return xah;
-        unlock_kernel();
-      out_dir:
-        dput(dir);
-      out:
-        attrs->ia_valid = ia_valid;
-        return err;
 }
-/* Actual operations that are exported to VFS-land */
 /*
 * Inode operation getxattr()
- * Preliminary locking: we down dentry->d_inode->i_mutex
 */
 ssize_t
 reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
                  size_t size)
 {
-        struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name);
+        struct inode *inode = dentry->d_inode;
-        int err;
+        struct xattr_handler *handler;
-        if (!xah || !reiserfs_xattrs(dentry->d_sb) ||
+        handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name);
-            get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
+        if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1)
                return -EOPNOTSUPP;
-        reiserfs_read_lock_xattr_i(dentry->d_inode);
+        return handler->get(inode, name, buffer, size);
-        reiserfs_read_lock_xattrs(dentry->d_sb);
-        err = xah->get(dentry->d_inode, name, buffer, size);
-        reiserfs_read_unlock_xattrs(dentry->d_sb);
-        reiserfs_read_unlock_xattr_i(dentry->d_inode);
-        return err;
 }
 /*
@@ -879,27 +761,15 @@ int
 reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
                  size_t size, int flags)
 {
-        struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name);
+        struct inode *inode = dentry->d_inode;
-        int err;
+        struct xattr_handler *handler;
-        int lock;
-        if (!xah || !reiserfs_xattrs(dentry->d_sb) ||
+        handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name);
-            get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
+        if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1)
                return -EOPNOTSUPP;
-        reiserfs_write_lock_xattr_i(dentry->d_inode);
+        return handler->set(inode, name, value, size, flags);
-        lock = !has_xattr_dir(dentry->d_inode);
-        if (lock)
-                reiserfs_write_lock_xattrs(dentry->d_sb);
-        else
-                reiserfs_read_lock_xattrs(dentry->d_sb);
-        err = xah->set(dentry->d_inode, name, value, size, flags);
-        if (lock)
-                reiserfs_write_unlock_xattrs(dentry->d_sb);
-        else
-                reiserfs_read_unlock_xattrs(dentry->d_sb);
-        reiserfs_write_unlock_xattr_i(dentry->d_inode);
-        return err;
 }
 /*
@@ -909,86 +779,66 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 */
 int reiserfs_removexattr(struct dentry *dentry, const char *name)
 {
-        int err;
+        struct inode *inode = dentry->d_inode;
-        struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name);
+        struct xattr_handler *handler;
+        handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name);
-        if (!xah || !reiserfs_xattrs(dentry->d_sb) ||
+        if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1)
-            get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
                return -EOPNOTSUPP;
-        reiserfs_write_lock_xattr_i(dentry->d_inode);
+        return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
-        reiserfs_read_lock_xattrs(dentry->d_sb);
-        /* Deletion pre-operation */
-        if (xah->del) {
-                err = xah->del(dentry->d_inode, name);
-                if (err)
-                        goto out;
-        }
-        err = reiserfs_xattr_del(dentry->d_inode, name);
-        dentry->d_inode->i_ctime = CURRENT_TIME_SEC;
-        mark_inode_dirty(dentry->d_inode);
-      out:
-        reiserfs_read_unlock_xattrs(dentry->d_sb);
-        reiserfs_write_unlock_xattr_i(dentry->d_inode);
-        return err;
 }
-/* This is what filldir will use:
+struct listxattr_buf {
- * r_pos will always contain the amount of space required for the entire
+        size_t size;
- * list. If r_pos becomes larger than r_size, we need more space and we
+        size_t pos;
- * return an error indicating this. If r_pos is less than r_size, then we've
+        char *buf;
- * filled the buffer successfully and we return success */
+        struct inode *inode;
-struct reiserfs_listxattr_buf {
-        int r_pos;
-        int r_size;
-        char *r_buf;
-        struct inode *r_inode;
 };
-static int
+static int listxattr_filler(void *buf, const char *name, int namelen,
-reiserfs_listxattr_filler(void *buf, const char *name, int namelen,
+                            loff_t offset, u64 ino, unsigned int d_type)
-                          loff_t offset, u64 ino, unsigned int d_type)
 {
-        struct reiserfs_listxattr_buf *b = (struct reiserfs_listxattr_buf *)buf;
+        struct listxattr_buf *b = (struct listxattr_buf *)buf;
-        int len = 0;
+        size_t size;
-        if (name[0] != '.'
+        if (name[0] != '.' ||
-            || (namelen != 1 && (name[1] != '.' || namelen != 2))) {
+            (namelen != 1 && (name[1] != '.' || namelen != 2))) {
-                struct reiserfs_xattr_handler *xah =
+                struct xattr_handler *handler;
-                    find_xattr_handler_prefix(name);
+                handler = find_xattr_handler_prefix(b->inode->i_sb->s_xattr,
-                if (!xah)
+                                                    name);
-                        return 0;       /* Unsupported xattr name, skip it */
+                if (!handler)   /* Unsupported xattr name */
+                        return 0;
-                /* We call ->list() twice because the operation isn't required to just
+                if (b->buf) {
-                 * return the name back - we want to make sure we have enough space */
+                        size = handler->list(b->inode, b->buf + b->pos,
-                len += xah->list(b->r_inode, name, namelen, NULL);
+                                         b->size, name, namelen);
+                        if (size > b->size)
-                if (len) {
+                                return -ERANGE;
-                        if (b->r_pos + len + 1 <= b->r_size) {
+                } else {
-                                char *p = b->r_buf + b->r_pos;
+                        size = handler->list(b->inode, NULL, 0, name, namelen);
-                                p += xah->list(b->r_inode, name, namelen, p);
-                                *p++ = '\0';
-                        }
-                        b->r_pos += len + 1;
                }
-        }
+                b->pos += size;
+        }
        return 0;
 }
 /*
 * Inode operation listxattr()
 *
- * Preliminary locking: we down dentry->d_inode->i_mutex
+ * We totally ignore the generic listxattr here because it would be stupid
+ * not to. Since the xattrs are organized in a directory, we can just
+ * readdir to find them.
 */
 ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
 {
        struct dentry *dir;
        int err = 0;
-        struct reiserfs_listxattr_buf buf;
+        loff_t pos = 0;
+        struct listxattr_buf buf = {
+                .inode = dentry->d_inode,
+                .buf = buffer,
+                .size = buffer ? size : 0,
+        };
        if (!dentry->d_inode)
                return -EINVAL;
@@ -997,130 +847,104 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
            get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
                return -EOPNOTSUPP;
-        reiserfs_read_lock_xattr_i(dentry->d_inode);
+        dir = open_xa_dir(dentry->d_inode, XATTR_REPLACE);
-        reiserfs_read_lock_xattrs(dentry->d_sb);
-        dir = open_xa_dir(dentry->d_inode, FL_READONLY);
-        reiserfs_read_unlock_xattrs(dentry->d_sb);
        if (IS_ERR(dir)) {
                err = PTR_ERR(dir);
                if (err == -ENODATA)
-                        err = 0;        /* Not an error if there aren't any xattrs */
+                        err = 0;  /* Not an error if there aren't any xattrs */
                goto out;
        }
-        buf.r_buf = buffer;
+        mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
-        buf.r_size = buffer ? size : 0;
+        err = reiserfs_readdir_dentry(dir, &buf, listxattr_filler, &pos);
-        buf.r_pos = 0;
+        mutex_unlock(&dir->d_inode->i_mutex);
-        buf.r_inode = dentry->d_inode;
-        REISERFS_I(dentry->d_inode)->i_flags |= i_has_xattr_dir;
+        if (!err)
+                err = buf.pos;
-        err = xattr_readdir(dir->d_inode, reiserfs_listxattr_filler, &buf);
-        if (err)
-                goto out_dir;
-        if (buf.r_pos > buf.r_size && buffer != NULL)
-                err = -ERANGE;
-        else
-                err = buf.r_pos;
-      out_dir:
        dput(dir);
+out:
-      out:
-        reiserfs_read_unlock_xattr_i(dentry->d_inode);
        return err;
 }
-/* This is the implementation for the xattr plugin infrastructure */
+static int reiserfs_check_acl(struct inode *inode, int mask)
-static LIST_HEAD(xattr_handlers);
-static DEFINE_RWLOCK(handler_lock);
-static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char
-                                                                *prefix)
 {
-        struct reiserfs_xattr_handler *xah = NULL;
+        struct posix_acl *acl;
-        struct list_head *p;
+        int error = -EAGAIN; /* do regular unix permission checks by default */
-        read_lock(&handler_lock);
+        acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
-        list_for_each(p, &xattr_handlers) {
-                xah = list_entry(p, struct reiserfs_xattr_handler, handlers);
+        if (acl) {
-                if (strncmp(xah->prefix, prefix, strlen(xah->prefix)) == 0)
+                if (!IS_ERR(acl)) {
-                        break;
+                        error = posix_acl_permission(inode, acl, mask);
-                xah = NULL;
+                        posix_acl_release(acl);
+                } else if (PTR_ERR(acl) != -ENODATA)
+                        error = PTR_ERR(acl);
        }
-        read_unlock(&handler_lock);
+        return error;
-        return xah;
 }
-static void __unregister_handlers(void)
+int reiserfs_permission(struct inode *inode, int mask)
 {
-        struct reiserfs_xattr_handler *xah;
+        /*
-        struct list_head *p, *tmp;
+         * We don't do permission checks on the internal objects.
+         * Permissions are determined by the "owning" object.
-        list_for_each_safe(p, tmp, &xattr_handlers) {
+         */
-                xah = list_entry(p, struct reiserfs_xattr_handler, handlers);
+        if (IS_PRIVATE(inode))
-                if (xah->exit)
+                return 0;
-                        xah->exit();
+        /*
+         * Stat data v1 doesn't support ACLs.
-                list_del_init(p);
+         */
-        }
+        if (get_inode_sd_version(inode) == STAT_DATA_V1)
-        INIT_LIST_HEAD(&xattr_handlers);
+                return generic_permission(inode, mask, NULL);
+        else
+                return generic_permission(inode, mask, reiserfs_check_acl);
 }
-int __init reiserfs_xattr_register_handlers(void)
+static int create_privroot(struct dentry *dentry)
 {
-        int err = 0;
+        int err;
-        struct reiserfs_xattr_handler *xah;
+        struct inode *inode = dentry->d_parent->d_inode;
-        struct list_head *p;
+        mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR);
+        err = xattr_mkdir(inode, dentry, 0700);
-        write_lock(&handler_lock);
+        mutex_unlock(&inode->i_mutex);
+        if (err) {
-        /* If we're already initialized, nothing to do */
+                dput(dentry);
-        if (!list_empty(&xattr_handlers)) {
+                dentry = NULL;
-                write_unlock(&handler_lock);
-                return 0;
-        }
-        /* Add the handlers */
-        list_add_tail(&user_handler.handlers, &xattr_handlers);
-        list_add_tail(&trusted_handler.handlers, &xattr_handlers);
-#ifdef CONFIG_REISERFS_FS_SECURITY
-        list_add_tail(&security_handler.handlers, &xattr_handlers);
-#endif
-#ifdef CONFIG_REISERFS_FS_POSIX_ACL
-        list_add_tail(&posix_acl_access_handler.handlers, &xattr_handlers);
-        list_add_tail(&posix_acl_default_handler.handlers, &xattr_handlers);
-#endif
-        /* Run initializers, if available */
-        list_for_each(p, &xattr_handlers) {
-                xah = list_entry(p, struct reiserfs_xattr_handler, handlers);
-                if (xah->init) {
-                        err = xah->init();
-                        if (err) {
-                                list_del_init(p);
-                                break;
-                        }
-                }
        }
-        /* Clean up other handlers, if any failed */
+        if (dentry && dentry->d_inode)
-        if (err)
+                reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr "
-                __unregister_handlers();
+                              "storage.\n", PRIVROOT_NAME);
-        write_unlock(&handler_lock);
        return err;
 }
-void reiserfs_xattr_unregister_handlers(void)
+static int xattr_mount_check(struct super_block *s)
 {
-        write_lock(&handler_lock);
+        /* We need generation numbers to ensure that the oid mapping is correct
-        __unregister_handlers();
+         * v3.5 filesystems don't have them. */
-        write_unlock(&handler_lock);
+        if (old_format_only(s)) {
+                if (reiserfs_xattrs_optional(s)) {
+                        /* Old format filesystem, but optional xattrs have
+                         * been enabled. Error out. */
+                        reiserfs_warning(s, "jdm-2005",
+                                         "xattrs/ACLs not supported "
+                                         "on pre-v3.6 format filesystems. "
+                                         "Failing mount.");
+                        return -EOPNOTSUPP;
+                }
+        }
+        return 0;
 }
+#else
+int __init reiserfs_xattr_register_handlers(void) { return 0; }
+void reiserfs_xattr_unregister_handlers(void) {}
+#endif
 /* This will catch lookups from the fs root to .reiserfs_priv */
 static int
 xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name)
@@ -1147,48 +971,23 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
 {
        int err = 0;
-        /* We need generation numbers to ensure that the oid mapping is correct
+#ifdef CONFIG_REISERFS_FS_XATTR
-         * v3.5 filesystems don't have them. */
+        err = xattr_mount_check(s);
-        if (!old_format_only(s)) {
+        if (err)
-                set_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt));
-        } else if (reiserfs_xattrs_optional(s)) {
-                /* Old format filesystem, but optional xattrs have been enabled
-                 * at mount time. Error out. */
-                reiserfs_warning(s, "xattrs/ACLs not supported on pre v3.6 "
-                                 "format filesystem. Failing mount.");
-                err = -EOPNOTSUPP;
                goto error;
-        } else {
+#endif
-                /* Old format filesystem, but no optional xattrs have been enabled. This
-                 * means we silently disable xattrs on the filesystem. */
-                clear_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt));
-        }
        /* If we don't have the privroot located yet - go find it */
-        if (reiserfs_xattrs(s) && !REISERFS_SB(s)->priv_root) {
+        if (!REISERFS_SB(s)->priv_root) {
                struct dentry *dentry;
                dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
                                        strlen(PRIVROOT_NAME));
                if (!IS_ERR(dentry)) {
-                        if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) {
+#ifdef CONFIG_REISERFS_FS_XATTR
-                                struct inode *inode = dentry->d_parent->d_inode;
+                        if (!(mount_flags & MS_RDONLY) && !dentry->d_inode)
-                                mutex_lock_nested(&inode->i_mutex,
+                                err = create_privroot(dentry);
-                                                  I_MUTEX_XATTR);
+#endif
-                                err = inode->i_op->mkdir(inode, dentry, 0700);
+                        if (!dentry->d_inode) {
-                                mutex_unlock(&inode->i_mutex);
-                                if (err) {
-                                        dput(dentry);
-                                        dentry = NULL;
-                                }
-                                if (dentry && dentry->d_inode)
-                                        reiserfs_warning(s,
-                                                         "Created %s on %s - reserved for "
-                                                         "xattr storage.",
-                                                         PRIVROOT_NAME,
-                                                         reiserfs_bdevname
-                                                         (inode->i_sb));
-                        } else if (!dentry->d_inode) {
                                dput(dentry);
                                dentry = NULL;
                        }
@@ -1197,73 +996,41 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
                if (!err && dentry) {
                        s->s_root->d_op = &xattr_lookup_poison_ops;
-                        reiserfs_mark_inode_private(dentry->d_inode);
+                        dentry->d_inode->i_flags |= S_PRIVATE;
                        REISERFS_SB(s)->priv_root = dentry;
-                } else if (!(mount_flags & MS_RDONLY)) {        /* xattrs are unavailable */
+#ifdef CONFIG_REISERFS_FS_XATTR
-                        /* If we're read-only it just means that the dir hasn't been
+                /* xattrs are unavailable */
-                         * created. Not an error -- just no xattrs on the fs. We'll
+                } else if (!(mount_flags & MS_RDONLY)) {
-                         * check again if we go read-write */
+                        /* If we're read-only it just means that the dir
-                        reiserfs_warning(s, "xattrs/ACLs enabled and couldn't "
+                         * hasn't been created. Not an error -- just no
-                                         "find/create .reiserfs_priv. Failing mount.");
+                         * xattrs on the fs. We'll check again if we
+                         * go read-write */
+                        reiserfs_warning(s, "jdm-20006",
+                                         "xattrs/ACLs enabled and couldn't "
+                                         "find/create .reiserfs_priv. "
+                                         "Failing mount.");
                        err = -EOPNOTSUPP;
+#endif
                }
        }
-      error:
+#ifdef CONFIG_REISERFS_FS_XATTR
-        /* This is only nonzero if there was an error initializing the xattr
+        if (!err)
-         * directory or if there is a condition where we don't support them. */
+                s->s_xattr = reiserfs_xattr_handlers;
+error:
        if (err) {
-                clear_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt));
                clear_bit(REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt));
                clear_bit(REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt));
        }
+#endif
        /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */
        s->s_flags = s->s_flags & ~MS_POSIXACL;
+#ifdef CONFIG_REISERFS_FS_POSIX_ACL
        if (reiserfs_posixacl(s))
                s->s_flags |= MS_POSIXACL;
+#endif
        return err;
 }
-static int reiserfs_check_acl(struct inode *inode, int mask)
-{
-        struct posix_acl *acl;
-        int error = -EAGAIN; /* do regular unix permission checks by default */
-        reiserfs_read_lock_xattr_i(inode);
-        reiserfs_read_lock_xattrs(inode->i_sb);
-        acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
-        reiserfs_read_unlock_xattrs(inode->i_sb);
-        reiserfs_read_unlock_xattr_i(inode);
-        if (acl) {
-                if (!IS_ERR(acl)) {
-                        error = posix_acl_permission(inode, acl, mask);
-                        posix_acl_release(acl);
-                } else if (PTR_ERR(acl) != -ENODATA)
-                        error = PTR_ERR(acl);
-        }
-        return error;
-}
-int reiserfs_permission(struct inode *inode, int mask)
-{
-        /*
-         * We don't do permission checks on the internal objects.
-         * Permissions are determined by the "owning" object.
-         */
-        if (is_reiserfs_priv_object(inode))
-                return 0;
-        /*
-         * Stat data v1 doesn't support ACLs.
-         */
-        if (get_inode_sd_version(inode) == STAT_DATA_V1)
-                return generic_permission(inode, mask, NULL);
-        else
-                return generic_permission(inode, mask, reiserfs_check_acl);
-}
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index b7e4fa4539de..d423416d93d1 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -10,15 +10,17 @@
 #include <linux/reiserfs_acl.h>
 #include <asm/uaccess.h>
-static int reiserfs_set_acl(struct inode *inode, int type,
+static int reiserfs_set_acl(struct reiserfs_transaction_handle *th,
+                            struct inode *inode, int type,
                            struct posix_acl *acl);
 static int
 xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
 {
        struct posix_acl *acl;
-        int error;
+        int error, error2;
+        struct reiserfs_transaction_handle th;
+        size_t jcreate_blocks;
        if (!reiserfs_posixacl(inode->i_sb))
                return -EOPNOTSUPP;
        if (!is_owner_or_cap(inode))
@@ -36,7 +38,21 @@ xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
        } else
                acl = NULL;
-        error = reiserfs_set_acl(inode, type, acl);
+        /* Pessimism: We can't assume that anything from the xattr root up
+         * has been created. */
+        jcreate_blocks = reiserfs_xattr_jcreate_nblocks(inode) +
+                         reiserfs_xattr_nblocks(inode, size) * 2;
+        reiserfs_write_lock(inode->i_sb);
+        error = journal_begin(&th, inode->i_sb, jcreate_blocks);
+        if (error == 0) {
+                error = reiserfs_set_acl(&th, inode, type, acl);
+                error2 = journal_end(&th, inode->i_sb, jcreate_blocks);
+                if (error2)
+                        error = error2;
+        }
+        reiserfs_write_unlock(inode->i_sb);
      release_and_out:
        posix_acl_release(acl);
@@ -172,6 +188,29 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size)
        return ERR_PTR(-EINVAL);
 }
+static inline void iset_acl(struct inode *inode, struct posix_acl **i_acl,
+                            struct posix_acl *acl)
+{
+        spin_lock(&inode->i_lock);
+        if (*i_acl != ERR_PTR(-ENODATA))
+                posix_acl_release(*i_acl);
+        *i_acl = posix_acl_dup(acl);
+        spin_unlock(&inode->i_lock);
+}
+static inline struct posix_acl *iget_acl(struct inode *inode,
+                                         struct posix_acl **i_acl)
+{
+        struct posix_acl *acl = ERR_PTR(-ENODATA);
+        spin_lock(&inode->i_lock);
+        if (*i_acl != ERR_PTR(-ENODATA))
+                acl = posix_acl_dup(*i_acl);
+        spin_unlock(&inode->i_lock);
+        return acl;
+}
 /*
 * Inode operation get_posix_acl().
 *
@@ -199,11 +238,11 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
                return ERR_PTR(-EINVAL);
        }
-        if (IS_ERR(*p_acl)) {
+        acl = iget_acl(inode, p_acl);
-                if (PTR_ERR(*p_acl) == -ENODATA)
+        if (acl && !IS_ERR(acl))
-                        return NULL;
+                return acl;
-        } else if (*p_acl != NULL)
+        else if (PTR_ERR(acl) == -ENODATA)
-                return posix_acl_dup(*p_acl);
+                return NULL;
        size = reiserfs_xattr_get(inode, name, NULL, 0);
        if (size < 0) {
@@ -229,7 +268,7 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
        } else {
                acl = posix_acl_from_disk(value, retval);
                if (!IS_ERR(acl))
-                        *p_acl = posix_acl_dup(acl);
+                        iset_acl(inode, p_acl, acl);
        }
        kfree(value);
@@ -243,12 +282,13 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
 * BKL held [before 2.5.x]
 */
 static int
-reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
+                 int type, struct posix_acl *acl)
 {
        char *name;
        void *value = NULL;
        struct posix_acl **p_acl;
-        size_t size;
+        size_t size = 0;
        int error;
        struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
@@ -285,31 +325,28 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
                value = posix_acl_to_disk(acl, &size);
                if (IS_ERR(value))
                        return (int)PTR_ERR(value);
-                error = reiserfs_xattr_set(inode, name, value, size, 0);
+        }
-        } else {
-                error = reiserfs_xattr_del(inode, name);
+        error = reiserfs_xattr_set_handle(th, inode, name, value, size, 0);
-                if (error == -ENODATA) {
-                        /* This may seem odd here, but it means that the ACL was set
+        /*
-                         * with a value representable with mode bits. If there was
+         * Ensure that the inode gets dirtied if we're only using
-                         * an ACL before, reiserfs_xattr_del already dirtied the inode.
+         * the mode bits and an old ACL didn't exist. We don't need
-                         */
+         * to check if the inode is hashed here since we won't get
+         * called by reiserfs_inherit_default_acl().
+         */
+        if (error == -ENODATA) {
+                error = 0;
+                if (type == ACL_TYPE_ACCESS) {
+                        inode->i_ctime = CURRENT_TIME_SEC;
                        mark_inode_dirty(inode);
-                        error = 0;
                }
        }
        kfree(value);
-        if (!error) {
+        if (!error)
-                /* Release the old one */
+                iset_acl(inode, p_acl, acl);
-                if (!IS_ERR(*p_acl) && *p_acl)
-                        posix_acl_release(*p_acl);
-                if (acl == NULL)
-                        *p_acl = ERR_PTR(-ENODATA);
-                else
-                        *p_acl = posix_acl_dup(acl);
-        }
        return error;
 }
@@ -317,7 +354,8 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 /* dir->i_mutex: locked,
 * inode is new and not released into the wild yet */
 int
-reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry,
+reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
+                             struct inode *dir, struct dentry *dentry,
                             struct inode *inode)
 {
        struct posix_acl *acl;
@@ -335,8 +373,8 @@ reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry,
        /* Don't apply ACLs to objects in the .reiserfs_priv tree.. This
         * would be useless since permissions are ignored, and a pain because
         * it introduces locking cycles */
-        if (is_reiserfs_priv_object(dir)) {
+        if (IS_PRIVATE(dir)) {
-                reiserfs_mark_inode_private(inode);
+                inode->i_flags |= S_PRIVATE;
                goto apply_umask;
        }
@@ -354,7 +392,8 @@ reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry,
                /* Copy the default ACL to the default ACL of a new directory */
                if (S_ISDIR(inode->i_mode)) {
-                        err = reiserfs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
+                        err = reiserfs_set_acl(th, inode, ACL_TYPE_DEFAULT,
+                                               acl);
                        if (err)
                                goto cleanup;
                }
@@ -375,9 +414,9 @@ reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry,
                        /* If we need an ACL.. */
                        if (need_acl > 0) {
-                                err =
+                                err = reiserfs_set_acl(th, inode,
-                                    reiserfs_set_acl(inode, ACL_TYPE_ACCESS,
+                                                       ACL_TYPE_ACCESS,
-                                                     acl_copy);
+                                                       acl_copy);
                                if (err)
                                        goto cleanup_copy;
                        }
@@ -395,25 +434,45 @@ reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry,
        return err;
 }
-/* Looks up and caches the result of the default ACL.
+/* This is used to cache the default acl before a new object is created.
- * We do this so that we don't need to carry the xattr_sem into
+ * The biggest reason for this is to get an idea of how many blocks will
- * reiserfs_new_inode if we don't need to */
+ * actually be required for the create operation if we must inherit an ACL.
+ * An ACL write can add up to 3 object creations and an additional file write
+ * so we'd prefer not to reserve that many blocks in the journal if we can.
+ * It also has the advantage of not loading the ACL with a transaction open,
+ * this may seem silly, but if the owner of the directory is doing the
+ * creation, the ACL may not be loaded since the permissions wouldn't require
+ * it.
+ * We return the number of blocks required for the transaction.
+ */
 int reiserfs_cache_default_acl(struct inode *inode)
 {
-        int ret = 0;
+        struct posix_acl *acl;
-        if (reiserfs_posixacl(inode->i_sb) && !is_reiserfs_priv_object(inode)) {
+        int nblocks = 0;
-                struct posix_acl *acl;
-                reiserfs_read_lock_xattr_i(inode);
+        if (IS_PRIVATE(inode))
-                reiserfs_read_lock_xattrs(inode->i_sb);
+                return 0;
-                acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT);
-                reiserfs_read_unlock_xattrs(inode->i_sb);
+        acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT);
-                reiserfs_read_unlock_xattr_i(inode);
-                ret = (acl && !IS_ERR(acl));
+        if (acl && !IS_ERR(acl)) {
-                if (ret)
+                int size = reiserfs_acl_size(acl->a_count);
-                        posix_acl_release(acl);
+                /* Other xattrs can be created during inode creation. We don't
+                 * want to claim too many blocks, so we check to see if we
+                 * we need to create the tree to the xattrs, and then we
+                 * just want two files. */
+                nblocks = reiserfs_xattr_jcreate_nblocks(inode);
+                nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
+                REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
+                /* We need to account for writes + bitmaps for two files */
+                nblocks += reiserfs_xattr_nblocks(inode, size) * 4;
+                posix_acl_release(acl);
        }
-        return ret;
+        return nblocks;
 }
 int reiserfs_acl_chmod(struct inode *inode)
@@ -429,9 +488,7 @@ int reiserfs_acl_chmod(struct inode *inode)
                return 0;
        }
-        reiserfs_read_lock_xattrs(inode->i_sb);
        acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
-        reiserfs_read_unlock_xattrs(inode->i_sb);
        if (!acl)
                return 0;
        if (IS_ERR(acl))
@@ -442,18 +499,20 @@ int reiserfs_acl_chmod(struct inode *inode)
                return -ENOMEM;
        error = posix_acl_chmod_masq(clone, inode->i_mode);
        if (!error) {
-                int lock = !has_xattr_dir(inode);
+                struct reiserfs_transaction_handle th;
-                reiserfs_write_lock_xattr_i(inode);
+                size_t size = reiserfs_xattr_nblocks(inode,
-                if (lock)
+                                             reiserfs_acl_size(clone->a_count));
-                        reiserfs_write_lock_xattrs(inode->i_sb);
+                reiserfs_write_lock(inode->i_sb);
-                else
+                error = journal_begin(&th, inode->i_sb, size * 2);
-                        reiserfs_read_lock_xattrs(inode->i_sb);
+                if (!error) {
-                error = reiserfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+                        int error2;
-                if (lock)
+                        error = reiserfs_set_acl(&th, inode, ACL_TYPE_ACCESS,
-                        reiserfs_write_unlock_xattrs(inode->i_sb);
+                                                 clone);
-                else
+                        error2 = journal_end(&th, inode->i_sb, size * 2);
-                        reiserfs_read_unlock_xattrs(inode->i_sb);
+                        if (error2)
-                reiserfs_write_unlock_xattr_i(inode);
+                                error = error2;
+                }
+                reiserfs_write_unlock(inode->i_sb);
        }
        posix_acl_release(clone);
        return error;
@@ -477,38 +536,22 @@ posix_acl_access_set(struct inode *inode, const char *name,
        return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
 }
-static int posix_acl_access_del(struct inode *inode, const char *name)
+static size_t posix_acl_access_list(struct inode *inode, char *list,
+                                    size_t list_size, const char *name,
+                                    size_t name_len)
 {
-        struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
+        const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
-        struct posix_acl **acl = &reiserfs_i->i_acl_access;
-        if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
-                return -EINVAL;
-        if (!IS_ERR(*acl) && *acl) {
-                posix_acl_release(*acl);
-                *acl = ERR_PTR(-ENODATA);
-        }
-        return 0;
-}
-static int
-posix_acl_access_list(struct inode *inode, const char *name, int namelen,
-                      char *out)
-{
-        int len = namelen;
        if (!reiserfs_posixacl(inode->i_sb))
                return 0;
-        if (out)
+        if (list && size <= list_size)
-                memcpy(out, name, len);
+                memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
+        return size;
-        return len;
 }
-struct reiserfs_xattr_handler posix_acl_access_handler = {
+struct xattr_handler reiserfs_posix_acl_access_handler = {
        .prefix = POSIX_ACL_XATTR_ACCESS,
        .get = posix_acl_access_get,
        .set = posix_acl_access_set,
-        .del = posix_acl_access_del,
        .list = posix_acl_access_list,
 };
@@ -530,37 +573,21 @@ posix_acl_default_set(struct inode *inode, const char *name,
        return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
 }
-static int posix_acl_default_del(struct inode *inode, const char *name)
+static size_t posix_acl_default_list(struct inode *inode, char *list,
+                                     size_t list_size, const char *name,
+                                     size_t name_len)
 {
-        struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
+        const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
-        struct posix_acl **acl = &reiserfs_i->i_acl_default;
-        if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
-                return -EINVAL;
-        if (!IS_ERR(*acl) && *acl) {
-                posix_acl_release(*acl);
-                *acl = ERR_PTR(-ENODATA);
-        }
-        return 0;
-}
-static int
-posix_acl_default_list(struct inode *inode, const char *name, int namelen,
-                       char *out)
-{
-        int len = namelen;
        if (!reiserfs_posixacl(inode->i_sb))
                return 0;
-        if (out)
+        if (list && size <= list_size)
-                memcpy(out, name, len);
+                memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
+        return size;
-        return len;
 }
-struct reiserfs_xattr_handler posix_acl_default_handler = {
+struct xattr_handler reiserfs_posix_acl_default_handler = {
        .prefix = POSIX_ACL_XATTR_DEFAULT,
        .get = posix_acl_default_get,
        .set = posix_acl_default_set,
-        .del = posix_acl_default_del,
        .list = posix_acl_default_list,
 };
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 056008db1377..4d3c20e787c3 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -4,6 +4,7 @@
 #include <linux/pagemap.h>
 #include <linux/xattr.h>
 #include <linux/reiserfs_xattr.h>
+#include <linux/security.h>
 #include <asm/uaccess.h>
 static int
@@ -12,7 +13,7 @@ security_get(struct inode *inode, const char *name, void *buffer, size_t size)
        if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
                return -EINVAL;
-        if (is_reiserfs_priv_object(inode))
+        if (IS_PRIVATE(inode))
                return -EPERM;
        return reiserfs_xattr_get(inode, name, buffer, size);
@@ -25,41 +26,84 @@ security_set(struct inode *inode, const char *name, const void *buffer,
        if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
                return -EINVAL;
-        if (is_reiserfs_priv_object(inode))
+        if (IS_PRIVATE(inode))
                return -EPERM;
        return reiserfs_xattr_set(inode, name, buffer, size, flags);
 }
-static int security_del(struct inode *inode, const char *name)
+static size_t security_list(struct inode *inode, char *list, size_t list_len,
+                            const char *name, size_t namelen)
 {
-        if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
+        const size_t len = namelen + 1;
-                return -EINVAL;
-        if (is_reiserfs_priv_object(inode))
+        if (IS_PRIVATE(inode))
-                return -EPERM;
+                return 0;
+        if (list && len <= list_len) {
+                memcpy(list, name, namelen);
+                list[namelen] = '\0';
+        }
-        return 0;
+        return len;
 }
-static int
+/* Initializes the security context for a new inode and returns the number
-security_list(struct inode *inode, const char *name, int namelen, char *out)
+ * of blocks needed for the transaction. If successful, reiserfs_security
+ * must be released using reiserfs_security_free when the caller is done. */
+int reiserfs_security_init(struct inode *dir, struct inode *inode,
+                           struct reiserfs_security_handle *sec)
 {
-        int len = namelen;
+        int blocks = 0;
+        int error = security_inode_init_security(inode, dir, &sec->name,
+                                                 &sec->value, &sec->length);
+        if (error) {
+                if (error == -EOPNOTSUPP)
+                        error = 0;
-        if (is_reiserfs_priv_object(inode))
+                sec->name = NULL;
-                return 0;
+                sec->value = NULL;
+                sec->length = 0;
+                return error;
+        }
-        if (out)
+        if (sec->length) {
-                memcpy(out, name, len);
+                blocks = reiserfs_xattr_jcreate_nblocks(inode) +
+                         reiserfs_xattr_nblocks(inode, sec->length);
+                /* We don't want to count the directories twice if we have
+                 * a default ACL. */
+                REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
+        }
+        return blocks;
+}
-        return len;
+int reiserfs_security_write(struct reiserfs_transaction_handle *th,
+                            struct inode *inode,
+                            struct reiserfs_security_handle *sec)
+{
+        int error;
+        if (strlen(sec->name) < sizeof(XATTR_SECURITY_PREFIX))
+                return -EINVAL;
+        error = reiserfs_xattr_set_handle(th, inode, sec->name, sec->value,
+                                          sec->length, XATTR_CREATE);
+        if (error == -ENODATA || error == -EOPNOTSUPP)
+                error = 0;
+        return error;
+}
+void reiserfs_security_free(struct reiserfs_security_handle *sec)
+{
+        kfree(sec->name);
+        kfree(sec->value);
+        sec->name = NULL;
+        sec->value = NULL;
 }
-struct reiserfs_xattr_handler security_handler = {
+struct xattr_handler reiserfs_xattr_security_handler = {
        .prefix = XATTR_SECURITY_PREFIX,
        .get = security_get,
        .set = security_set,
-        .del = security_del,
        .list = security_list,
 };
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 60abe2bb1f98..a865042f75e2 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -13,10 +13,7 @@ trusted_get(struct inode *inode, const char *name, void *buffer, size_t size)
        if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
                return -EINVAL;
-        if (!reiserfs_xattrs(inode->i_sb))
+        if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))
-                return -EOPNOTSUPP;
-        if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode)))
                return -EPERM;
        return reiserfs_xattr_get(inode, name, buffer, size);
@@ -29,50 +26,30 @@ trusted_set(struct inode *inode, const char *name, const void *buffer,
        if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
                return -EINVAL;
-        if (!reiserfs_xattrs(inode->i_sb))
+        if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))
-                return -EOPNOTSUPP;
-        if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode)))
                return -EPERM;
        return reiserfs_xattr_set(inode, name, buffer, size, flags);
 }
-static int trusted_del(struct inode *inode, const char *name)
+static size_t trusted_list(struct inode *inode, char *list, size_t list_size,
+                           const char *name, size_t name_len)
 {
-        if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
+        const size_t len = name_len + 1;
-                return -EINVAL;
-        if (!reiserfs_xattrs(inode->i_sb))
+        if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))
-                return -EOPNOTSUPP;
-        if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode)))
-                return -EPERM;
-        return 0;
-}
-static int
-trusted_list(struct inode *inode, const char *name, int namelen, char *out)
-{
-        int len = namelen;
-        if (!reiserfs_xattrs(inode->i_sb))
                return 0;
-        if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode)))
+        if (list && len <= list_size) {
-                return 0;
+                memcpy(list, name, name_len);
+                list[name_len] = '\0';
-        if (out)
+        }
-                memcpy(out, name, len);
        return len;
 }
-struct reiserfs_xattr_handler trusted_handler = {
+struct xattr_handler reiserfs_xattr_trusted_handler = {
        .prefix = XATTR_TRUSTED_PREFIX,
        .get = trusted_get,
        .set = trusted_set,
-        .del = trusted_del,
        .list = trusted_list,
 };
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 1384efcb938e..e3238dc4f3db 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -6,10 +6,6 @@
 #include <linux/reiserfs_xattr.h>
 #include <asm/uaccess.h>
-#ifdef CONFIG_REISERFS_FS_POSIX_ACL
-# include <linux/reiserfs_acl.h>
-#endif
 static int
 user_get(struct inode *inode, const char *name, void *buffer, size_t size)
 {
@@ -25,7 +21,6 @@ static int
 user_set(struct inode *inode, const char *name, const void *buffer,
         size_t size, int flags)
 {
        if (strlen(name) < sizeof(XATTR_USER_PREFIX))
                return -EINVAL;
@@ -34,33 +29,23 @@ user_set(struct inode *inode, const char *name, const void *buffer,
        return reiserfs_xattr_set(inode, name, buffer, size, flags);
 }
-static int user_del(struct inode *inode, const char *name)
+static size_t user_list(struct inode *inode, char *list, size_t list_size,
+                        const char *name, size_t name_len)
 {
-        if (strlen(name) < sizeof(XATTR_USER_PREFIX))
+        const size_t len = name_len + 1;
-                return -EINVAL;
-        if (!reiserfs_xattrs_user(inode->i_sb))
-                return -EOPNOTSUPP;
-        return 0;
-}
-static int
-user_list(struct inode *inode, const char *name, int namelen, char *out)
-{
-        int len = namelen;
        if (!reiserfs_xattrs_user(inode->i_sb))
                return 0;
+        if (list && len <= list_size) {
-        if (out)
+                memcpy(list, name, name_len);
-                memcpy(out, name, len);
+                list[name_len] = '\0';
+        }
        return len;
 }
-struct reiserfs_xattr_handler user_handler = {
+struct xattr_handler reiserfs_xattr_user_handler = {
        .prefix = XATTR_USER_PREFIX,
        .get = user_get,
        .set = user_set,
-        .del = user_del,
        .list = user_list,
 };
diff --git a/fs/seq_file.c b/fs/seq_file.c
index a1a4cfe19210..7f40f30c55c5 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -513,7 +513,7 @@ int seq_bitmap(struct seq_file *m, const unsigned long *bits,
 }
 EXPORT_SYMBOL(seq_bitmap);
-int seq_bitmap_list(struct seq_file *m, unsigned long *bits,
+int seq_bitmap_list(struct seq_file *m, const unsigned long *bits,
                unsigned int nr_bits)
 {
        if (m->count < m->size) {
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 07703d3ff4a1..93e0c0281d45 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -234,7 +234,7 @@ static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        return ret;
 }
-static int bin_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        struct file *file = vma->vm_file;
        struct bin_buffer *bb = file->private_data;
@@ -242,15 +242,15 @@ static int bin_page_mkwrite(struct vm_area_struct *vma, struct page *page)
        int ret;
        if (!bb->vm_ops)
-                return -EINVAL;
+                return VM_FAULT_SIGBUS;
        if (!bb->vm_ops->page_mkwrite)
                return 0;
        if (!sysfs_get_active_two(attr_sd))
-                return -EINVAL;
+                return VM_FAULT_SIGBUS;
-        ret = bb->vm_ops->page_mkwrite(vma, page);
+        ret = bb->vm_ops->page_mkwrite(vma, vmf);
        sysfs_put_active_two(attr_sd);
        return ret;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 93b6de51f261..0ff89fe71e51 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1434,8 +1434,9 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
 * mmap()d file has taken write protection fault and is being made
 * writable. UBIFS must ensure page is budgeted for.
 */
-static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+        struct page *page = vmf->page;
        struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
        struct ubifs_info *c = inode->i_sb->s_fs_info;
        struct timespec now = ubifs_current_time(inode);
@@ -1447,7 +1448,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
        ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY));
        if (unlikely(c->ro_media))
-                return -EROFS;
+                return VM_FAULT_SIGBUS; /* -EROFS */
        /*
         * We have not locked @page so far so we may budget for changing the
@@ -1480,7 +1481,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
                if (err == -ENOSPC)
                        ubifs_warn("out of space for mmapped file "
                                   "(inode number %lu)", inode->i_ino);
-                return err;
+                return VM_FAULT_SIGBUS;
        }
        lock_page(page);
@@ -1520,6 +1521,8 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 out_unlock:
        unlock_page(page);
        ubifs_release_budget(c, &req);
+        if (err)
+                err = VM_FAULT_SIGBUS;
        return err;
 }
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index e14c4e3aea0c..f4e255441574 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -234,9 +234,9 @@ xfs_file_mmap(
 STATIC int
 xfs_vm_page_mkwrite(
        struct vm_area_struct   *vma,
-        struct page             *page)
+        struct vm_fault         *vmf)
 {
-        return block_page_mkwrite(vma, page, xfs_get_blocks);
+        return block_page_mkwrite(vma, vmf, xfs_get_blocks);
 }
 const struct file_operations xfs_file_operations = {
author	Russell King <rmk@dyn-67.arm.linux.org.uk>	2009-04-02 18:22:11 -0400
committer	Russell King <rmk+kernel@arm.linux.org.uk>	2009-04-02 18:22:11 -0400
commit	cd02938a828f4b2098a074afb7454f106f2e8df5 (patch)
tree	7b543fd6aa82a62dc3a9614c26f89daca83e77d5 /fs
parent	9d681f3a1b27fdfc17ea251cf8d5f627dab34670 (diff)
parent	172ef275444efa12d834fb9d1b1acdac92db47f7 (diff)