79 files changed, 1483 insertions, 1776 deletions
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index d41a75ed3dce..2d6d682c206a 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -35,6 +35,7 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 * zero-initialized data and COW.
 */
 struct page *empty_zero_page;
+EXPORT_SYMBOL(empty_zero_page);
 /*
 * The pmd table for the upper-most set of pages.
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index a2bb01f59642..d8fb9c5303cc 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -69,6 +69,7 @@ void __init m68k_setup_node(int node)
 */
 void *empty_zero_page;
+EXPORT_SYMBOL(empty_zero_page);
 void show_mem(void)
 {
diff --git a/arch/sparc/kernel/sparc_ksyms.c b/arch/sparc/kernel/sparc_ksyms.c
index 0bcf98a7ef38..aa8ee06cf488 100644
--- a/arch/sparc/kernel/sparc_ksyms.c
+++ b/arch/sparc/kernel/sparc_ksyms.c
@@ -282,3 +282,5 @@ EXPORT_SYMBOL(do_BUG);
 /* Sun Power Management Idle Handler */
 EXPORT_SYMBOL(pm_idle);
+EXPORT_SYMBOL(empty_zero_page);
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 8c2b50e8abc6..4cad0b32b0af 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -160,6 +160,7 @@ extern unsigned int sparc_ramdisk_image;
 extern unsigned int sparc_ramdisk_size;
 struct page *mem_map_zero __read_mostly;
+EXPORT_SYMBOL(mem_map_zero);
 unsigned int sparc64_highest_unlocked_tlb_ent __read_mostly;
diff --git a/drivers/edac/pasemi_edac.c b/drivers/edac/pasemi_edac.c
index 3fd65a563848..8e6b91bd2e99 100644
--- a/drivers/edac/pasemi_edac.c
+++ b/drivers/edac/pasemi_edac.c
@@ -26,6 +26,7 @@
 #include <linux/pci.h>
 #include <linux/pci_ids.h>
 #include <linux/slab.h>
+#include <linux/edac.h>
 #include "edac_core.h"
 #define MODULE_NAME "pasemi_edac"
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index cc47b76091bf..6ae4ecf3ce40 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1261,10 +1261,11 @@ static int ext3_ordered_write_end(struct file *file,
                new_i_size = pos + copied;
                if (new_i_size > EXT3_I(inode)->i_disksize)
                        EXT3_I(inode)->i_disksize = new_i_size;
-                copied = ext3_generic_write_end(file, mapping, pos, len, copied,
+                ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
                                                        page, fsdata);
-                if (copied < 0)
+                copied = ret2;
-                        ret = copied;
+                if (ret2 < 0)
+                        ret = ret2;
        }
        ret2 = ext3_journal_stop(handle);
        if (!ret)
@@ -1289,10 +1290,11 @@ static int ext3_writeback_write_end(struct file *file,
        if (new_i_size > EXT3_I(inode)->i_disksize)
                EXT3_I(inode)->i_disksize = new_i_size;
-        copied = ext3_generic_write_end(file, mapping, pos, len, copied,
+        ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
                                                        page, fsdata);
-        if (copied < 0)
+        copied = ret2;
-                ret = copied;
+        if (ret2 < 0)
+                ret = ret2;
        ret2 = ext3_journal_stop(handle);
        if (!ret)
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index a8bae8cd1d5d..3c8dab880d91 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -9,8 +9,8 @@
 #include <linux/slab.h>
 #include <linux/capability.h>
 #include <linux/fs.h>
-#include <linux/ext4_jbd2.h>
+#include "ext4_jbd2.h"
-#include <linux/ext4_fs.h>
+#include "ext4.h"
 #include "xattr.h"
 #include "acl.h"
@@ -37,7 +37,7 @@ ext4_acl_from_disk(const void *value, size_t size)
                return ERR_PTR(-EINVAL);
        if (count == 0)
                return NULL;
-        acl = posix_acl_alloc(count, GFP_KERNEL);
+        acl = posix_acl_alloc(count, GFP_NOFS);
        if (!acl)
                return ERR_PTR(-ENOMEM);
        for (n=0; n < count; n++) {
@@ -91,7 +91,7 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
        *size = ext4_acl_size(acl->a_count);
        ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count *
-                        sizeof(ext4_acl_entry), GFP_KERNEL);
+                        sizeof(ext4_acl_entry), GFP_NOFS);
        if (!ext_acl)
                return ERR_PTR(-ENOMEM);
        ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
@@ -187,7 +187,7 @@ ext4_get_acl(struct inode *inode, int type)
        }
        retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
        if (retval > 0) {
-                value = kmalloc(retval, GFP_KERNEL);
+                value = kmalloc(retval, GFP_NOFS);
                if (!value)
                        return ERR_PTR(-ENOMEM);
                retval = ext4_xattr_get(inode, name_index, "", value, retval);
@@ -335,7 +335,7 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
                        if (error)
                                goto cleanup;
                }
-                clone = posix_acl_clone(acl, GFP_KERNEL);
+                clone = posix_acl_clone(acl, GFP_NOFS);
                error = -ENOMEM;
                if (!clone)
                        goto cleanup;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 0737e05ba3dd..da994374ec3b 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -15,12 +15,12 @@
 #include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
-#include <linux/ext4_jbd2.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
+#include "ext4.h"
+#include "ext4_jbd2.h"
 #include "group.h"
 /*
 * balloc.c contains the blocks allocation and deallocation routines
 */
@@ -48,7 +48,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
 unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
                 ext4_group_t block_group, struct ext4_group_desc *gdp)
 {
-        unsigned long start;
        int bit, bit_max;
        unsigned free_blocks, group_blocks;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -59,7 +58,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
                /* If checksum is bad mark all blocks used to prevent allocation
                 * essentially implementing a per-group read-only flag. */
                if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
-                        ext4_error(sb, __FUNCTION__,
+                        ext4_error(sb, __func__,
                                  "Checksum bad for group %lu\n", block_group);
                        gdp->bg_free_blocks_count = 0;
                        gdp->bg_free_inodes_count = 0;
@@ -106,11 +105,12 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
        free_blocks = group_blocks - bit_max;
        if (bh) {
+                ext4_fsblk_t start;
                for (bit = 0; bit < bit_max; bit++)
                        ext4_set_bit(bit, bh->b_data);
-                start = block_group * EXT4_BLOCKS_PER_GROUP(sb) +
+                start = ext4_group_first_block_no(sb, block_group);
-                        le32_to_cpu(sbi->s_es->s_first_data_block);
                /* Set bits for block and inode bitmaps, and inode table */
                ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
@@ -235,7 +235,7 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
                return 1;
 err_out:
-        ext4_error(sb, __FUNCTION__,
+        ext4_error(sb, __func__,
                        "Invalid block bitmap - "
                        "block_group = %d, block = %llu",
                        block_group, bitmap_blk);
@@ -264,7 +264,7 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
        bitmap_blk = ext4_block_bitmap(sb, desc);
        bh = sb_getblk(sb, bitmap_blk);
        if (unlikely(!bh)) {
-                ext4_error(sb, __FUNCTION__,
+                ext4_error(sb, __func__,
                            "Cannot read block bitmap - "
                            "block_group = %d, block_bitmap = %llu",
                            (int)block_group, (unsigned long long)bitmap_blk);
@@ -281,7 +281,7 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
        }
        if (bh_submit_read(bh) < 0) {
                put_bh(bh);
-                ext4_error(sb, __FUNCTION__,
+                ext4_error(sb, __func__,
                            "Cannot read block bitmap - "
                            "block_group = %d, block_bitmap = %llu",
                            (int)block_group, (unsigned long long)bitmap_blk);
@@ -360,7 +360,7 @@ restart:
                BUG();
 }
 #define rsv_window_dump(root, verbose) \
-        __rsv_window_dump((root), (verbose), __FUNCTION__)
+        __rsv_window_dump((root), (verbose), __func__)
 #else
 #define rsv_window_dump(root, verbose) do {} while (0)
 #endif
@@ -740,7 +740,7 @@ do_more:
                if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
                                                bit + i, bitmap_bh->b_data)) {
                        jbd_unlock_bh_state(bitmap_bh);
-                        ext4_error(sb, __FUNCTION__,
+                        ext4_error(sb, __func__,
                                   "bit already cleared for block %llu",
                                   (ext4_fsblk_t)(block + i));
                        jbd_lock_bh_state(bitmap_bh);
@@ -752,9 +752,7 @@ do_more:
        jbd_unlock_bh_state(bitmap_bh);
        spin_lock(sb_bgl_lock(sbi, block_group));
-        desc->bg_free_blocks_count =
+        le16_add_cpu(&desc->bg_free_blocks_count, group_freed);
-                cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
-                        group_freed);
        desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
        spin_unlock(sb_bgl_lock(sbi, block_group));
        percpu_counter_add(&sbi->s_freeblocks_counter, count);
@@ -1798,7 +1796,7 @@ allocated:
                        if (ext4_test_bit(grp_alloc_blk+i,
                                        bh2jh(bitmap_bh)->b_committed_data)) {
                                printk("%s: block was unexpectedly set in "
-                                        "b_committed_data\n", __FUNCTION__);
+                                        "b_committed_data\n", __func__);
                        }
                }
        }
@@ -1823,8 +1821,7 @@ allocated:
        spin_lock(sb_bgl_lock(sbi, group_no));
        if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
                gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
-        gdp->bg_free_blocks_count =
+        le16_add_cpu(&gdp->bg_free_blocks_count, -num);
-                        cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
        spin_unlock(sb_bgl_lock(sbi, group_no));
        percpu_counter_sub(&sbi->s_freeblocks_counter, num);
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index 420554f8f79d..d37ea6750454 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -9,7 +9,7 @@
 #include <linux/buffer_head.h>
 #include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
+#include "ext4.h"
 #ifdef EXT4FS_DEBUG
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 2c23bade9aa6..2bf0331ea194 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -23,10 +23,10 @@
 #include <linux/fs.h>
 #include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
 #include <linux/buffer_head.h>
 #include <linux/slab.h>
 #include <linux/rbtree.h>
+#include "ext4.h"
 static unsigned char ext4_filetype_table[] = {
        DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
@@ -42,7 +42,7 @@ const struct file_operations ext4_dir_operations = {
        .llseek         = generic_file_llseek,
        .read           = generic_read_dir,
        .readdir        = ext4_readdir,         /* we take BKL. needed?*/
-        .ioctl          = ext4_ioctl,           /* BKL held */
+        .unlocked_ioctl = ext4_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = ext4_compat_ioctl,
 #endif
diff --git a/include/linux/ext4_fs.h b/fs/ext4/ext4.h
index 250032548597..8158083f7ac0 100644
--- a/include/linux/ext4_fs.h
+++ b/fs/ext4/ext4.h
@@ -1,5 +1,5 @@
 /*
- *  linux/include/linux/ext4_fs.h
+ *  ext4.h
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
@@ -13,14 +13,13 @@
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */
-#ifndef _LINUX_EXT4_FS_H
+#ifndef _EXT4_H
-#define _LINUX_EXT4_FS_H
+#define _EXT4_H
 #include <linux/types.h>
 #include <linux/blkdev.h>
 #include <linux/magic.h>
+#include "ext4_i.h"
-#include <linux/ext4_fs_i.h>
 /*
 * The second extended filesystem constants/structures
@@ -176,8 +175,7 @@ struct ext4_group_desc
 #define EXT4_BG_INODE_ZEROED    0x0004 /* On-disk itable initialized to zero */
 #ifdef __KERNEL__
-#include <linux/ext4_fs_i.h>
+#include "ext4_sb.h"
-#include <linux/ext4_fs_sb.h>
 #endif
 /*
 * Macro-instructions used to manage group descriptors
@@ -231,6 +229,7 @@ struct ext4_group_desc
 #define EXT4_TOPDIR_FL                  0x00020000 /* Top of directory hierarchies*/
 #define EXT4_HUGE_FILE_FL               0x00040000 /* Set to each huge file */
 #define EXT4_EXTENTS_FL                 0x00080000 /* Inode uses extents */
+#define EXT4_EXT_MIGRATE                0x00100000 /* Inode is migrating */
 #define EXT4_RESERVED_FL                0x80000000 /* reserved for ext4 lib */
 #define EXT4_FL_USER_VISIBLE            0x000BDFFF /* User visible flags */
@@ -1049,8 +1048,7 @@ extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
                struct address_space *mapping, loff_t from);
 /* ioctl.c */
-extern int ext4_ioctl (struct inode *, struct file *, unsigned int,
+extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
-                       unsigned long);
 extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
 /* migrate.c */
@@ -1204,4 +1202,4 @@ extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
                        int extend_disksize);
 #endif  /* __KERNEL__ */
-#endif  /* _LINUX_EXT4_FS_H */
+#endif  /* _EXT4_H */
diff --git a/include/linux/ext4_fs_extents.h b/fs/ext4/ext4_extents.h
index 1285c583b2d8..75333b595fab 100644
--- a/include/linux/ext4_fs_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -16,10 +16,10 @@
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
 */
-#ifndef _LINUX_EXT4_EXTENTS
+#ifndef _EXT4_EXTENTS
-#define _LINUX_EXT4_EXTENTS
+#define _EXT4_EXTENTS
-#include <linux/ext4_fs.h>
+#include "ext4.h"
 /*
 * With AGGRESSIVE_TEST defined, the capacity of index/leaf blocks
@@ -228,5 +228,5 @@ extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
 extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
                                                ext4_lblk_t *, ext4_fsblk_t *);
 extern void ext4_ext_drop_refs(struct ext4_ext_path *);
-#endif /* _LINUX_EXT4_EXTENTS */
+#endif /* _EXT4_EXTENTS */
diff --git a/include/linux/ext4_fs_i.h b/fs/ext4/ext4_i.h
index d5508d3cf290..26a4ae255d79 100644
--- a/include/linux/ext4_fs_i.h
+++ b/fs/ext4/ext4_i.h
@@ -1,5 +1,5 @@
 /*
- *  linux/include/linux/ext4_fs_i.h
+ *  ext4_i.h
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
@@ -13,8 +13,8 @@
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */
-#ifndef _LINUX_EXT4_FS_I
+#ifndef _EXT4_I
-#define _LINUX_EXT4_FS_I
+#define _EXT4_I
 #include <linux/rwsem.h>
 #include <linux/rbtree.h>
@@ -164,4 +164,4 @@ struct ext4_inode_info {
        spinlock_t i_prealloc_lock;
 };
-#endif  /* _LINUX_EXT4_FS_I */
+#endif  /* _EXT4_I */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index d6afe4e27340..c75384b34f2c 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -2,14 +2,14 @@
 * Interface between ext4 and JBD
 */
-#include <linux/ext4_jbd2.h>
+#include "ext4_jbd2.h"
 int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
                                struct buffer_head *bh)
 {
        int err = jbd2_journal_get_undo_access(handle, bh);
        if (err)
-                ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+                ext4_journal_abort_handle(where, __func__, bh, handle, err);
        return err;
 }
@@ -18,7 +18,7 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
 {
        int err = jbd2_journal_get_write_access(handle, bh);
        if (err)
-                ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+                ext4_journal_abort_handle(where, __func__, bh, handle, err);
        return err;
 }
@@ -27,7 +27,7 @@ int __ext4_journal_forget(const char *where, handle_t *handle,
 {
        int err = jbd2_journal_forget(handle, bh);
        if (err)
-                ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+                ext4_journal_abort_handle(where, __func__, bh, handle, err);
        return err;
 }
@@ -36,7 +36,7 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
 {
        int err = jbd2_journal_revoke(handle, blocknr, bh);
        if (err)
-                ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+                ext4_journal_abort_handle(where, __func__, bh, handle, err);
        return err;
 }
@@ -45,7 +45,7 @@ int __ext4_journal_get_create_access(const char *where,
 {
        int err = jbd2_journal_get_create_access(handle, bh);
        if (err)
-                ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+                ext4_journal_abort_handle(where, __func__, bh, handle, err);
        return err;
 }
@@ -54,6 +54,6 @@ int __ext4_journal_dirty_metadata(const char *where,
 {
        int err = jbd2_journal_dirty_metadata(handle, bh);
        if (err)
-                ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+                ext4_journal_abort_handle(where, __func__, bh, handle, err);
        return err;
 }
diff --git a/include/linux/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 38c71d3c8dbf..9255a7d28b24 100644
--- a/include/linux/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -1,5 +1,5 @@
 /*
- * linux/include/linux/ext4_jbd2.h
+ * ext4_jbd2.h
 *
 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
 *
@@ -12,12 +12,12 @@
 * Ext4-specific journaling extensions.
 */
-#ifndef _LINUX_EXT4_JBD2_H
+#ifndef _EXT4_JBD2_H
-#define _LINUX_EXT4_JBD2_H
+#define _EXT4_JBD2_H
 #include <linux/fs.h>
 #include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
+#include "ext4.h"
 #define EXT4_JOURNAL(inode)     (EXT4_SB((inode)->i_sb)->s_journal)
@@ -228,4 +228,4 @@ static inline int ext4_should_writeback_data(struct inode *inode)
        return 0;
 }
-#endif  /* _LINUX_EXT4_JBD2_H */
+#endif  /* _EXT4_JBD2_H */
diff --git a/include/linux/ext4_fs_sb.h b/fs/ext4/ext4_sb.h
index abaae2c8cccf..5802e69f2191 100644
--- a/include/linux/ext4_fs_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -1,5 +1,5 @@
 /*
- *  linux/include/linux/ext4_fs_sb.h
+ *  ext4_sb.h
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
@@ -13,8 +13,8 @@
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */
-#ifndef _LINUX_EXT4_FS_SB
+#ifndef _EXT4_SB
-#define _LINUX_EXT4_FS_SB
+#define _EXT4_SB
 #ifdef __KERNEL__
 #include <linux/timer.h>
@@ -145,4 +145,4 @@ struct ext4_sb_info {
        struct ext4_locality_group *s_locality_groups;
 };
-#endif  /* _LINUX_EXT4_FS_SB */
+#endif  /* _EXT4_SB */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 9ae6e67090cd..47929c4e3dae 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -32,7 +32,6 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/time.h>
-#include <linux/ext4_jbd2.h>
 #include <linux/jbd2.h>
 #include <linux/highuid.h>
 #include <linux/pagemap.h>
@@ -40,8 +39,9 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/falloc.h>
-#include <linux/ext4_fs_extents.h>
 #include <asm/uaccess.h>
+#include "ext4_jbd2.h"
+#include "ext4_extents.h"
 /*
@@ -308,7 +308,7 @@ corrupted:
 }
 #define ext4_ext_check_header(inode, eh, depth) \
-        __ext4_ext_check_header(__FUNCTION__, inode, eh, depth)
+        __ext4_ext_check_header(__func__, inode, eh, depth)
 #ifdef EXT_DEBUG
 static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
@@ -614,7 +614,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
        ix->ei_block = cpu_to_le32(logical);
        ext4_idx_store_pblock(ix, ptr);
-        curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1);
+        le16_add_cpu(&curp->p_hdr->eh_entries, 1);
        BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
                             > le16_to_cpu(curp->p_hdr->eh_max));
@@ -736,7 +736,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
        }
        if (m) {
                memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m);
-                neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m);
+                le16_add_cpu(&neh->eh_entries, m);
        }
        set_buffer_uptodate(bh);
@@ -753,8 +753,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                err = ext4_ext_get_access(handle, inode, path + depth);
                if (err)
                        goto cleanup;
-                path[depth].p_hdr->eh_entries =
+                le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
-                     cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m);
                err = ext4_ext_dirty(handle, inode, path + depth);
                if (err)
                        goto cleanup;
@@ -817,8 +816,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                if (m) {
                        memmove(++fidx, path[i].p_idx - m,
                                sizeof(struct ext4_extent_idx) * m);
-                        neh->eh_entries =
+                        le16_add_cpu(&neh->eh_entries, m);
-                                cpu_to_le16(le16_to_cpu(neh->eh_entries) + m);
                }
                set_buffer_uptodate(bh);
                unlock_buffer(bh);
@@ -834,7 +832,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                        err = ext4_ext_get_access(handle, inode, path + i);
                        if (err)
                                goto cleanup;
-                        path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m);
+                        le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
                        err = ext4_ext_dirty(handle, inode, path + i);
                        if (err)
                                goto cleanup;
@@ -1369,7 +1367,7 @@ int ext4_ext_try_to_merge(struct inode *inode,
                                * sizeof(struct ext4_extent);
                        memmove(ex + 1, ex + 2, len);
                }
-                eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1);
+                le16_add_cpu(&eh->eh_entries, -1);
                merge_done = 1;
                WARN_ON(eh->eh_entries == 0);
                if (!eh->eh_entries)
@@ -1560,7 +1558,7 @@ has_space:
                path[depth].p_ext = nearex;
        }
-        eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1);
+        le16_add_cpu(&eh->eh_entries, 1);
        nearex = path[depth].p_ext;
        nearex->ee_block = newext->ee_block;
        ext4_ext_store_pblock(nearex, ext_pblock(newext));
@@ -1699,7 +1697,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
        err = ext4_ext_get_access(handle, inode, path);
        if (err)
                return err;
-        path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1);
+        le16_add_cpu(&path->p_hdr->eh_entries, -1);
        err = ext4_ext_dirty(handle, inode, path);
        if (err)
                return err;
@@ -1902,7 +1900,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                if (num == 0) {
                        /* this extent is removed; mark slot entirely unused */
                        ext4_ext_store_pblock(ex, 0);
-                        eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
+                        le16_add_cpu(&eh->eh_entries, -1);
                }
                ex->ee_block = cpu_to_le32(block);
@@ -1979,7 +1977,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
         * We start scanning from right side, freeing all the blocks
         * after i_size and walking into the tree depth-wise.
         */
-        path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL);
+        path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
        if (path == NULL) {
                ext4_journal_stop(handle);
                return -ENOMEM;
@@ -2138,6 +2136,82 @@ void ext4_ext_release(struct super_block *sb)
 #endif
 }
+static void bi_complete(struct bio *bio, int error)
+{
+        complete((struct completion *)bio->bi_private);
+}
+/* FIXME!! we need to try to merge to left or right after zero-out  */
+static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
+{
+        int ret = -EIO;
+        struct bio *bio;
+        int blkbits, blocksize;
+        sector_t ee_pblock;
+        struct completion event;
+        unsigned int ee_len, len, done, offset;
+        blkbits   = inode->i_blkbits;
+        blocksize = inode->i_sb->s_blocksize;
+        ee_len    = ext4_ext_get_actual_len(ex);
+        ee_pblock = ext_pblock(ex);
+        /* convert ee_pblock to 512 byte sectors */
+        ee_pblock = ee_pblock << (blkbits - 9);
+        while (ee_len > 0) {
+                if (ee_len > BIO_MAX_PAGES)
+                        len = BIO_MAX_PAGES;
+                else
+                        len = ee_len;
+                bio = bio_alloc(GFP_NOIO, len);
+                if (!bio)
+                        return -ENOMEM;
+                bio->bi_sector = ee_pblock;
+                bio->bi_bdev   = inode->i_sb->s_bdev;
+                done = 0;
+                offset = 0;
+                while (done < len) {
+                        ret = bio_add_page(bio, ZERO_PAGE(0),
+                                                        blocksize, offset);
+                        if (ret != blocksize) {
+                                /*
+                                 * We can't add any more pages because of
+                                 * hardware limitations.  Start a new bio.
+                                 */
+                                break;
+                        }
+                        done++;
+                        offset += blocksize;
+                        if (offset >= PAGE_CACHE_SIZE)
+                                offset = 0;
+                }
+                init_completion(&event);
+                bio->bi_private = &event;
+                bio->bi_end_io = bi_complete;
+                submit_bio(WRITE, bio);
+                wait_for_completion(&event);
+                if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+                        ret = 0;
+                else {
+                        ret = -EIO;
+                        break;
+                }
+                bio_put(bio);
+                ee_len    -= done;
+                ee_pblock += done  << (blkbits - 9);
+        }
+        return ret;
+}
+#define EXT4_EXT_ZERO_LEN 7
 /*
 * This function is called by ext4_ext_get_blocks() if someone tries to write
 * to an uninitialized extent. It may result in splitting the uninitialized
@@ -2154,7 +2228,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                                                ext4_lblk_t iblock,
                                                unsigned long max_blocks)
 {
-        struct ext4_extent *ex, newex;
+        struct ext4_extent *ex, newex, orig_ex;
        struct ext4_extent *ex1 = NULL;
        struct ext4_extent *ex2 = NULL;
        struct ext4_extent *ex3 = NULL;
@@ -2173,10 +2247,26 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
        allocated = ee_len - (iblock - ee_block);
        newblock = iblock - ee_block + ext_pblock(ex);
        ex2 = ex;
+        orig_ex.ee_block = ex->ee_block;
+        orig_ex.ee_len   = cpu_to_le16(ee_len);
+        ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
        err = ext4_ext_get_access(handle, inode, path + depth);
        if (err)
                goto out;
+        /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
+        if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
+                err =  ext4_ext_zeroout(inode, &orig_ex);
+                if (err)
+                        goto fix_extent_len;
+                /* update the extent length and mark as initialized */
+                ex->ee_block = orig_ex.ee_block;
+                ex->ee_len   = orig_ex.ee_len;
+                ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+                ext4_ext_dirty(handle, inode, path + depth);
+                /* zeroed the full extent */
+                return allocated;
+        }
        /* ex1: ee_block to iblock - 1 : uninitialized */
        if (iblock > ee_block) {
@@ -2195,19 +2285,103 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
        /* ex3: to ee_block + ee_len : uninitialised */
        if (allocated > max_blocks) {
                unsigned int newdepth;
+                /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
+                if (allocated <= EXT4_EXT_ZERO_LEN) {
+                        /* Mark first half uninitialized.
+                         * Mark second half initialized and zero out the
+                         * initialized extent
+                         */
+                        ex->ee_block = orig_ex.ee_block;
+                        ex->ee_len   = cpu_to_le16(ee_len - allocated);
+                        ext4_ext_mark_uninitialized(ex);
+                        ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+                        ext4_ext_dirty(handle, inode, path + depth);
+                        ex3 = &newex;
+                        ex3->ee_block = cpu_to_le32(iblock);
+                        ext4_ext_store_pblock(ex3, newblock);
+                        ex3->ee_len = cpu_to_le16(allocated);
+                        err = ext4_ext_insert_extent(handle, inode, path, ex3);
+                        if (err == -ENOSPC) {
+                                err =  ext4_ext_zeroout(inode, &orig_ex);
+                                if (err)
+                                        goto fix_extent_len;
+                                ex->ee_block = orig_ex.ee_block;
+                                ex->ee_len   = orig_ex.ee_len;
+                                ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+                                ext4_ext_dirty(handle, inode, path + depth);
+                                /* zeroed the full extent */
+                                return allocated;
+                        } else if (err)
+                                goto fix_extent_len;
+                        /*
+                         * We need to zero out the second half because
+                         * an fallocate request can update file size and
+                         * converting the second half to initialized extent
+                         * implies that we can leak some junk data to user
+                         * space.
+                         */
+                        err =  ext4_ext_zeroout(inode, ex3);
+                        if (err) {
+                                /*
+                                 * We should actually mark the
+                                 * second half as uninit and return error
+                                 * Insert would have changed the extent
+                                 */
+                                depth = ext_depth(inode);
+                                ext4_ext_drop_refs(path);
+                                path = ext4_ext_find_extent(inode,
+                                                                iblock, path);
+                                if (IS_ERR(path)) {
+                                        err = PTR_ERR(path);
+                                        return err;
+                                }
+                                ex = path[depth].p_ext;
+                                err = ext4_ext_get_access(handle, inode,
+                                                                path + depth);
+                                if (err)
+                                        return err;
+                                ext4_ext_mark_uninitialized(ex);
+                                ext4_ext_dirty(handle, inode, path + depth);
+                                return err;
+                        }
+                        /* zeroed the second half */
+                        return allocated;
+                }
                ex3 = &newex;
                ex3->ee_block = cpu_to_le32(iblock + max_blocks);
                ext4_ext_store_pblock(ex3, newblock + max_blocks);
                ex3->ee_len = cpu_to_le16(allocated - max_blocks);
                ext4_ext_mark_uninitialized(ex3);
                err = ext4_ext_insert_extent(handle, inode, path, ex3);
-                if (err)
+                if (err == -ENOSPC) {
-                        goto out;
+                        err =  ext4_ext_zeroout(inode, &orig_ex);
+                        if (err)
+                                goto fix_extent_len;
+                        /* update the extent length and mark as initialized */
+                        ex->ee_block = orig_ex.ee_block;
+                        ex->ee_len   = orig_ex.ee_len;
+                        ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+                        ext4_ext_dirty(handle, inode, path + depth);
+                        /* zeroed the full extent */
+                        return allocated;
+                } else if (err)
+                        goto fix_extent_len;
                /*
                 * The depth, and hence eh & ex might change
                 * as part of the insert above.
                 */
                newdepth = ext_depth(inode);
+                /*
+                 * update the extent length after successfull insert of the
+                 * split extent
+                 */
+                orig_ex.ee_len = cpu_to_le16(ee_len -
+                                                ext4_ext_get_actual_len(ex3));
                if (newdepth != depth) {
                        depth = newdepth;
                        ext4_ext_drop_refs(path);
@@ -2226,6 +2400,24 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                                goto out;
                }
                allocated = max_blocks;
+                /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
+                 * to insert a extent in the middle zerout directly
+                 * otherwise give the extent a chance to merge to left
+                 */
+                if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
+                                                        iblock != ee_block) {
+                        err =  ext4_ext_zeroout(inode, &orig_ex);
+                        if (err)
+                                goto fix_extent_len;
+                        /* update the extent length and mark as initialized */
+                        ex->ee_block = orig_ex.ee_block;
+                        ex->ee_len   = orig_ex.ee_len;
+                        ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+                        ext4_ext_dirty(handle, inode, path + depth);
+                        /* zero out the first half */
+                        return allocated;
+                }
        }
        /*
         * If there was a change of depth as part of the
@@ -2282,8 +2474,29 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
        goto out;
 insert:
        err = ext4_ext_insert_extent(handle, inode, path, &newex);
+        if (err == -ENOSPC) {
+                err =  ext4_ext_zeroout(inode, &orig_ex);
+                if (err)
+                        goto fix_extent_len;
+                /* update the extent length and mark as initialized */
+                ex->ee_block = orig_ex.ee_block;
+                ex->ee_len   = orig_ex.ee_len;
+                ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+                ext4_ext_dirty(handle, inode, path + depth);
+                /* zero out the first half */
+                return allocated;
+        } else if (err)
+                goto fix_extent_len;
 out:
        return err ? err : allocated;
+fix_extent_len:
+        ex->ee_block = orig_ex.ee_block;
+        ex->ee_len   = orig_ex.ee_len;
+        ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+        ext4_ext_mark_uninitialized(ex);
+        ext4_ext_dirty(handle, inode, path + depth);
+        return err;
 }
 /*
@@ -2393,8 +2606,20 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                        }
                        if (create == EXT4_CREATE_UNINITIALIZED_EXT)
                                goto out;
-                        if (!create)
+                        if (!create) {
+                                /*
+                                 * We have blocks reserved already.  We
+                                 * return allocated blocks so that delalloc
+                                 * won't do block reservation for us.  But
+                                 * the buffer head will be unmapped so that
+                                 * a read from the block returns 0s.
+                                 */
+                                if (allocated > max_blocks)
+                                        allocated = max_blocks;
+                                /* mark the buffer unwritten */
+                                __set_bit(BH_Unwritten, &bh_result->b_state);
                                goto out2;
+                        }
                        ret = ext4_ext_convert_to_initialized(handle, inode,
                                                                path, iblock,
@@ -2584,6 +2809,8 @@ out_stop:
                ext4_orphan_del(handle, inode);
        up_write(&EXT4_I(inode)->i_data_sem);
+        inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+        ext4_mark_inode_dirty(handle, inode);
        ext4_journal_stop(handle);
 }
@@ -2608,6 +2835,28 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
        return needed;
 }
+static void ext4_falloc_update_inode(struct inode *inode,
+                                int mode, loff_t new_size, int update_ctime)
+{
+        struct timespec now;
+        if (update_ctime) {
+                now = current_fs_time(inode->i_sb);
+                if (!timespec_equal(&inode->i_ctime, &now))
+                        inode->i_ctime = now;
+        }
+        /*
+         * Update only when preallocation was requested beyond
+         * the file size.
+         */
+        if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+                                new_size > i_size_read(inode)) {
+                i_size_write(inode, new_size);
+                EXT4_I(inode)->i_disksize = new_size;
+        }
+}
 /*
 * preallocate space for a file. This implements ext4's fallocate inode
 * operation, which gets called from sys_fallocate system call.
@@ -2619,8 +2868,8 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
 {
        handle_t *handle;
        ext4_lblk_t block;
+        loff_t new_size;
        unsigned long max_blocks;
-        ext4_fsblk_t nblocks = 0;
        int ret = 0;
        int ret2 = 0;
        int retries = 0;
@@ -2639,9 +2888,12 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
                return -ENODEV;
        block = offset >> blkbits;
+        /*
+         * We can't just convert len to max_blocks because
+         * If blocksize = 4096 offset = 3072 and len = 2048
+         */
        max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
-                        - block;
+                                                        - block;
        /*
         * credits to insert 1 extent into extent tree + buffers to be able to
         * modify 1 super block, 1 block bitmap and 1 group descriptor.
@@ -2657,7 +2909,6 @@ retry:
                        ret = PTR_ERR(handle);
                        break;
                }
                ret = ext4_get_blocks_wrap(handle, inode, block,
                                          max_blocks, &map_bh,
                                          EXT4_CREATE_UNINITIALIZED_EXT, 0);
@@ -2673,61 +2924,24 @@ retry:
                        ret2 = ext4_journal_stop(handle);
                        break;
                }
-                if (ret > 0) {
+                if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
-                        /* check wrap through sign-bit/zero here */
+                                                blkbits) >> blkbits))
-                        if ((block + ret) < 0 || (block + ret) < block) {
+                        new_size = offset + len;
-                                ret = -EIO;
+                else
-                                ext4_mark_inode_dirty(handle, inode);
+                        new_size = (block + ret) << blkbits;
-                                ret2 = ext4_journal_stop(handle);
-                                break;
-                        }
-                        if (buffer_new(&map_bh) && ((block + ret) >
-                            (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits)
-                            >> blkbits)))
-                                        nblocks = nblocks + ret;
-                }
-                /* Update ctime if new blocks get allocated */
-                if (nblocks) {
-                        struct timespec now;
-                        now = current_fs_time(inode->i_sb);
-                        if (!timespec_equal(&inode->i_ctime, &now))
-                                inode->i_ctime = now;
-                }
+                ext4_falloc_update_inode(inode, mode, new_size,
+                                                buffer_new(&map_bh));
                ext4_mark_inode_dirty(handle, inode);
                ret2 = ext4_journal_stop(handle);
                if (ret2)
                        break;
        }
+        if (ret == -ENOSPC &&
-        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+                        ext4_should_retry_alloc(inode->i_sb, &retries)) {
+                ret = 0;
                goto retry;
-        /*
-         * Time to update the file size.
-         * Update only when preallocation was requested beyond the file size.
-         */
-        if (!(mode & FALLOC_FL_KEEP_SIZE) &&
-            (offset + len) > i_size_read(inode)) {
-                if (ret > 0) {
-                        /*
-                         * if no error, we assume preallocation succeeded
-                         * completely
-                         */
-                        i_size_write(inode, offset + len);
-                        EXT4_I(inode)->i_disksize = i_size_read(inode);
-                } else if (ret < 0 && nblocks) {
-                        /* Handle partial allocation scenario */
-                        loff_t newsize;
-                        newsize  = (nblocks << blkbits) + i_size_read(inode);
-                        i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
-                        EXT4_I(inode)->i_disksize = i_size_read(inode);
-                }
        }
        mutex_unlock(&inode->i_mutex);
        return ret > 0 ? ret2 : ret;
 }
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ac35ec58db55..4159be6366ab 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -21,8 +21,8 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
+#include "ext4.h"
-#include <linux/ext4_jbd2.h>
+#include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -129,7 +129,7 @@ const struct file_operations ext4_file_operations = {
        .write          = do_sync_write,
        .aio_read       = generic_file_aio_read,
        .aio_write      = ext4_file_write,
-        .ioctl          = ext4_ioctl,
+        .unlocked_ioctl = ext4_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = ext4_compat_ioctl,
 #endif
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 8d50879d1c2c..1c8ba48d4f8d 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -27,8 +27,8 @@
 #include <linux/sched.h>
 #include <linux/writeback.h>
 #include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
+#include "ext4.h"
-#include <linux/ext4_jbd2.h>
+#include "ext4_jbd2.h"
 /*
 * akpm: A new design for ext4_sync_file().
@@ -72,6 +72,9 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
                goto out;
        }
+        if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+                goto out;
        /*
         * The VFS has written the file data.  If the inode is unaltered
         * then we need not start a commit.
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 1555024e3b36..1d6329dbe390 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -11,8 +11,8 @@
 #include <linux/fs.h>
 #include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
 #include <linux/cryptohash.h>
+#include "ext4.h"
 #define DELTA 0x9E3779B9
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 486e46a3918d..c6efbab0c801 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -15,8 +15,6 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
-#include <linux/ext4_jbd2.h>
 #include <linux/stat.h>
 #include <linux/string.h>
 #include <linux/quotaops.h>
@@ -25,7 +23,8 @@
 #include <linux/bitops.h>
 #include <linux/blkdev.h>
 #include <asm/byteorder.h>
+#include "ext4.h"
+#include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
 #include "group.h"
@@ -75,7 +74,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
        /* If checksum is bad mark all blocks and inodes use to prevent
         * allocation, essentially implementing a per-group read-only flag. */
        if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
-                ext4_error(sb, __FUNCTION__, "Checksum bad for group %lu\n",
+                ext4_error(sb, __func__, "Checksum bad for group %lu\n",
                           block_group);
                gdp->bg_free_blocks_count = 0;
                gdp->bg_free_inodes_count = 0;
@@ -223,11 +222,9 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
                if (gdp) {
                        spin_lock(sb_bgl_lock(sbi, block_group));
-                        gdp->bg_free_inodes_count = cpu_to_le16(
+                        le16_add_cpu(&gdp->bg_free_inodes_count, 1);
-                                le16_to_cpu(gdp->bg_free_inodes_count) + 1);
                        if (is_directory)
-                                gdp->bg_used_dirs_count = cpu_to_le16(
+                                le16_add_cpu(&gdp->bg_used_dirs_count, -1);
-                                  le16_to_cpu(gdp->bg_used_dirs_count) - 1);
                        gdp->bg_checksum = ext4_group_desc_csum(sbi,
                                                        block_group, gdp);
                        spin_unlock(sb_bgl_lock(sbi, block_group));
@@ -588,7 +585,7 @@ got:
        ino++;
        if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
            ino > EXT4_INODES_PER_GROUP(sb)) {
-                ext4_error(sb, __FUNCTION__,
+                ext4_error(sb, __func__,
                           "reserved inode or inode > inodes count - "
                           "block_group = %lu, inode=%lu", group,
                           ino + group * EXT4_INODES_PER_GROUP(sb));
@@ -664,11 +661,9 @@ got:
                                cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
        }
-        gdp->bg_free_inodes_count =
+        le16_add_cpu(&gdp->bg_free_inodes_count, -1);
-                cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
        if (S_ISDIR(mode)) {
-                gdp->bg_used_dirs_count =
+                le16_add_cpu(&gdp->bg_used_dirs_count, 1);
-                        cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
        }
        gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
        spin_unlock(sb_bgl_lock(sbi, group));
@@ -744,23 +739,24 @@ got:
        if (err)
                goto fail_free_drop;
-        err = ext4_mark_inode_dirty(handle, inode);
-        if (err) {
-                ext4_std_error(sb, err);
-                goto fail_free_drop;
-        }
        if (test_opt(sb, EXTENTS)) {
-                /* set extent flag only for directory and file */
+                /* set extent flag only for diretory, file and normal symlink*/
-                if (S_ISDIR(mode) || S_ISREG(mode)) {
+                if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
                        EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
                        ext4_ext_tree_init(handle, inode);
                        err = ext4_update_incompat_feature(handle, sb,
                                        EXT4_FEATURE_INCOMPAT_EXTENTS);
                        if (err)
-                                goto fail;
+                                goto fail_free_drop;
                }
        }
+        err = ext4_mark_inode_dirty(handle, inode);
+        if (err) {
+                ext4_std_error(sb, err);
+                goto fail_free_drop;
+        }
        ext4_debug("allocating inode %lu\n", inode->i_ino);
        goto really_out;
 fail:
@@ -796,7 +792,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
        /* Error cases - e2fsck has already cleaned up for us */
        if (ino > max_ino) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "bad orphan ino %lu!  e2fsck was run?", ino);
                goto error;
        }
@@ -805,7 +801,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
        bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
        bitmap_bh = read_inode_bitmap(sb, block_group);
        if (!bitmap_bh) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "inode bitmap error for orphan %lu", ino);
                goto error;
        }
@@ -830,7 +826,7 @@ iget_failed:
        err = PTR_ERR(inode);
        inode = NULL;
 bad_orphan:
-        ext4_warning(sb, __FUNCTION__,
+        ext4_warning(sb, __func__,
                     "bad orphan inode %lu!  e2fsck was run?", ino);
        printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
               bit, (unsigned long long)bitmap_bh->b_blocknr,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8fab233cb05f..8d9707746413 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -25,7 +25,6 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/time.h>
-#include <linux/ext4_jbd2.h>
 #include <linux/jbd2.h>
 #include <linux/highuid.h>
 #include <linux/pagemap.h>
@@ -36,6 +35,7 @@
 #include <linux/mpage.h>
 #include <linux/uio.h>
 #include <linux/bio.h>
+#include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -93,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
        BUFFER_TRACE(bh, "call ext4_journal_revoke");
        err = ext4_journal_revoke(handle, blocknr, bh);
        if (err)
-                ext4_abort(inode->i_sb, __FUNCTION__,
+                ext4_abort(inode->i_sb, __func__,
                           "error %d when attempting revoke", err);
        BUFFER_TRACE(bh, "exit");
        return err;
@@ -985,6 +985,16 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
        } else {
                retval = ext4_get_blocks_handle(handle, inode, block,
                                max_blocks, bh, create, extend_disksize);
+                if (retval > 0 && buffer_new(bh)) {
+                        /*
+                         * We allocated new blocks which will result in
+                         * i_data's format changing.  Force the migrate
+                         * to fail by clearing migrate flags
+                         */
+                        EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
+                                                        ~EXT4_EXT_MIGRATE;
+                }
        }
        up_write((&EXT4_I(inode)->i_data_sem));
        return retval;
@@ -1230,7 +1240,7 @@ int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 {
        int err = jbd2_journal_dirty_data(handle, bh);
        if (err)
-                ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__,
+                ext4_journal_abort_handle(__func__, __func__,
                                                bh, handle, err);
        return err;
 }
@@ -1301,10 +1311,11 @@ static int ext4_ordered_write_end(struct file *file,
                new_i_size = pos + copied;
                if (new_i_size > EXT4_I(inode)->i_disksize)
                        EXT4_I(inode)->i_disksize = new_i_size;
-                copied = ext4_generic_write_end(file, mapping, pos, len, copied,
+                ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
                                                        page, fsdata);
-                if (copied < 0)
+                copied = ret2;
-                        ret = copied;
+                if (ret2 < 0)
+                        ret = ret2;
        }
        ret2 = ext4_journal_stop(handle);
        if (!ret)
@@ -1329,10 +1340,11 @@ static int ext4_writeback_write_end(struct file *file,
        if (new_i_size > EXT4_I(inode)->i_disksize)
                EXT4_I(inode)->i_disksize = new_i_size;
-        copied = ext4_generic_write_end(file, mapping, pos, len, copied,
+        ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
                                                        page, fsdata);
-        if (copied < 0)
+        copied = ret2;
-                ret = copied;
+        if (ret2 < 0)
+                ret = ret2;
        ret2 = ext4_journal_stop(handle);
        if (!ret)
@@ -2501,12 +2513,10 @@ out_stop:
 static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
                unsigned long ino, struct ext4_iloc *iloc)
 {
-        unsigned long desc, group_desc;
        ext4_group_t block_group;
        unsigned long offset;
        ext4_fsblk_t block;
-        struct buffer_head *bh;
+        struct ext4_group_desc *gdp;
-        struct ext4_group_desc * gdp;
        if (!ext4_valid_inum(sb, ino)) {
                /*
@@ -2518,22 +2528,10 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
        }
        block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
-        if (block_group >= EXT4_SB(sb)->s_groups_count) {
+        gdp = ext4_get_group_desc(sb, block_group, NULL);
-                ext4_error(sb,"ext4_get_inode_block","group >= groups count");
+        if (!gdp)
                return 0;
-        }
-        smp_rmb();
-        group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
-        desc = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
-        bh = EXT4_SB(sb)->s_group_desc[group_desc];
-        if (!bh) {
-                ext4_error (sb, "ext4_get_inode_block",
-                            "Descriptor not loaded");
-                return 0;
-        }
-        gdp = (struct ext4_group_desc *)((__u8 *)bh->b_data +
-                desc * EXT4_DESC_SIZE(sb));
        /*
         * Figure out the offset within the block group inode table
         */
@@ -2976,7 +2974,8 @@ static int ext4_do_update_inode(handle_t *handle,
        if (ext4_inode_blocks_set(handle, raw_inode, ei))
                goto out_brelse;
        raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
-        raw_inode->i_flags = cpu_to_le32(ei->i_flags);
+        /* clear the migrate flag in the raw_inode */
+        raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE);
        if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
            cpu_to_le32(EXT4_OS_HURD))
                raw_inode->i_file_acl_high =
@@ -3374,7 +3373,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
                                EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
                                if (mnt_count !=
                                        le16_to_cpu(sbi->s_es->s_mnt_count)) {
-                                        ext4_warning(inode->i_sb, __FUNCTION__,
+                                        ext4_warning(inode->i_sb, __func__,
                                        "Unable to expand inode %lu. Delete"
                                        " some EAs or run e2fsck.",
                                        inode->i_ino);
@@ -3415,7 +3414,7 @@ void ext4_dirty_inode(struct inode *inode)
                current_handle->h_transaction != handle->h_transaction) {
                /* This task has a transaction open against a different fs */
                printk(KERN_EMERG "%s: transactions do not match!\n",
-                       __FUNCTION__);
+                       __func__);
        } else {
                jbd_debug(5, "marking dirty.  outer handle=%p\n",
                                current_handle);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 25b13ede8086..7a6c2f1faba6 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -10,17 +10,17 @@
 #include <linux/fs.h>
 #include <linux/jbd2.h>
 #include <linux/capability.h>
-#include <linux/ext4_fs.h>
-#include <linux/ext4_jbd2.h>
 #include <linux/time.h>
 #include <linux/compat.h>
 #include <linux/smp_lock.h>
 #include <linux/mount.h>
 #include <asm/uaccess.h>
+#include "ext4_jbd2.h"
+#include "ext4.h"
-int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
+long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-                unsigned long arg)
 {
+        struct inode *inode = filp->f_dentry->d_inode;
        struct ext4_inode_info *ei = EXT4_I(inode);
        unsigned int flags;
        unsigned short rsv_window_size;
@@ -277,9 +277,6 @@ setversion_out:
 #ifdef CONFIG_COMPAT
 long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
-        int ret;
        /* These are just misnamed, they actually get/put from/to user an int */
        switch (cmd) {
        case EXT4_IOC32_GETFLAGS:
@@ -319,9 +316,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        default:
                return -ENOIOCTLCMD;
        }
-        lock_kernel();
+        return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
-        ret = ext4_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
-        unlock_kernel();
-        return ret;
 }
 #endif
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 9d57695de746..fbec2ef93797 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -21,21 +21,7 @@
 * mballoc.c contains the multiblocks allocation routines
 */
-#include <linux/time.h>
+#include "mballoc.h"
-#include <linux/fs.h>
-#include <linux/namei.h>
-#include <linux/ext4_jbd2.h>
-#include <linux/ext4_fs.h>
-#include <linux/quotaops.h>
-#include <linux/buffer_head.h>
-#include <linux/module.h>
-#include <linux/swap.h>
-#include <linux/proc_fs.h>
-#include <linux/pagemap.h>
-#include <linux/seq_file.h>
-#include <linux/version.h>
-#include "group.h"
 /*
 * MUSTDO:
 *   - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -345,288 +331,6 @@
 *
 */
-/*
- * with AGGRESSIVE_CHECK allocator runs consistency checks over
- * structures. these checks slow things down a lot
- */
-#define AGGRESSIVE_CHECK__
-/*
- * with DOUBLE_CHECK defined mballoc creates persistent in-core
- * bitmaps, maintains and uses them to check for double allocations
- */
-#define DOUBLE_CHECK__
-/*
- */
-#define MB_DEBUG__
-#ifdef MB_DEBUG
-#define mb_debug(fmt, a...)     printk(fmt, ##a)
-#else
-#define mb_debug(fmt, a...)
-#endif
-/*
- * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
- * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
- */
-#define EXT4_MB_HISTORY
-#define EXT4_MB_HISTORY_ALLOC           1       /* allocation */
-#define EXT4_MB_HISTORY_PREALLOC        2       /* preallocated blocks used */
-#define EXT4_MB_HISTORY_DISCARD         4       /* preallocation discarded */
-#define EXT4_MB_HISTORY_FREE            8       /* free */
-#define EXT4_MB_HISTORY_DEFAULT         (EXT4_MB_HISTORY_ALLOC | \
-                                         EXT4_MB_HISTORY_PREALLOC)
-/*
- * How long mballoc can look for a best extent (in found extents)
- */
-#define MB_DEFAULT_MAX_TO_SCAN          200
-/*
- * How long mballoc must look for a best extent
- */
-#define MB_DEFAULT_MIN_TO_SCAN          10
-/*
- * How many groups mballoc will scan looking for the best chunk
- */
-#define MB_DEFAULT_MAX_GROUPS_TO_SCAN   5
-/*
- * with 'ext4_mb_stats' allocator will collect stats that will be
- * shown at umount. The collecting costs though!
- */
-#define MB_DEFAULT_STATS                1
-/*
- * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
- * by the stream allocator, which purpose is to pack requests
- * as close each to other as possible to produce smooth I/O traffic
- * We use locality group prealloc space for stream request.
- * We can tune the same via /proc/fs/ext4/<parition>/stream_req
- */
-#define MB_DEFAULT_STREAM_THRESHOLD     16      /* 64K */
-/*
- * for which requests use 2^N search using buddies
- */
-#define MB_DEFAULT_ORDER2_REQS          2
-/*
- * default group prealloc size 512 blocks
- */
-#define MB_DEFAULT_GROUP_PREALLOC       512
-static struct kmem_cache *ext4_pspace_cachep;
-static struct kmem_cache *ext4_ac_cachep;
-#ifdef EXT4_BB_MAX_BLOCKS
-#undef EXT4_BB_MAX_BLOCKS
-#endif
-#define EXT4_BB_MAX_BLOCKS      30
-struct ext4_free_metadata {
-        ext4_group_t group;
-        unsigned short num;
-        ext4_grpblk_t  blocks[EXT4_BB_MAX_BLOCKS];
-        struct list_head list;
-};
-struct ext4_group_info {
-        unsigned long   bb_state;
-        unsigned long   bb_tid;
-        struct ext4_free_metadata *bb_md_cur;
-        unsigned short  bb_first_free;
-        unsigned short  bb_free;
-        unsigned short  bb_fragments;
-        struct          list_head bb_prealloc_list;
-#ifdef DOUBLE_CHECK
-        void            *bb_bitmap;
-#endif
-        unsigned short  bb_counters[];
-};
-#define EXT4_GROUP_INFO_NEED_INIT_BIT   0
-#define EXT4_GROUP_INFO_LOCKED_BIT      1
-#define EXT4_MB_GRP_NEED_INIT(grp)      \
-        (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
-struct ext4_prealloc_space {
-        struct list_head        pa_inode_list;
-        struct list_head        pa_group_list;
-        union {
-                struct list_head pa_tmp_list;
-                struct rcu_head pa_rcu;
-        } u;
-        spinlock_t              pa_lock;
-        atomic_t                pa_count;
-        unsigned                pa_deleted;
-        ext4_fsblk_t            pa_pstart;      /* phys. block */
-        ext4_lblk_t             pa_lstart;      /* log. block */
-        unsigned short          pa_len;         /* len of preallocated chunk */
-        unsigned short          pa_free;        /* how many blocks are free */
-        unsigned short          pa_linear;      /* consumed in one direction
-                                                 * strictly, for grp prealloc */
-        spinlock_t              *pa_obj_lock;
-        struct inode            *pa_inode;      /* hack, for history only */
-};
-struct ext4_free_extent {
-        ext4_lblk_t fe_logical;
-        ext4_grpblk_t fe_start;
-        ext4_group_t fe_group;
-        int fe_len;
-};
-/*
- * Locality group:
- *   we try to group all related changes together
- *   so that writeback can flush/allocate them together as well
- */
-struct ext4_locality_group {
-        /* for allocator */
-        struct mutex            lg_mutex;       /* to serialize allocates */
-        struct list_head        lg_prealloc_list;/* list of preallocations */
-        spinlock_t              lg_prealloc_lock;
-};
-struct ext4_allocation_context {
-        struct inode *ac_inode;
-        struct super_block *ac_sb;
-        /* original request */
-        struct ext4_free_extent ac_o_ex;
-        /* goal request (after normalization) */
-        struct ext4_free_extent ac_g_ex;
-        /* the best found extent */
-        struct ext4_free_extent ac_b_ex;
-        /* copy of the bext found extent taken before preallocation efforts */
-        struct ext4_free_extent ac_f_ex;
-        /* number of iterations done. we have to track to limit searching */
-        unsigned long ac_ex_scanned;
-        __u16 ac_groups_scanned;
-        __u16 ac_found;
-        __u16 ac_tail;
-        __u16 ac_buddy;
-        __u16 ac_flags;         /* allocation hints */
-        __u8 ac_status;
-        __u8 ac_criteria;
-        __u8 ac_repeats;
-        __u8 ac_2order;         /* if request is to allocate 2^N blocks and
-                                 * N > 0, the field stores N, otherwise 0 */
-        __u8 ac_op;             /* operation, for history only */
-        struct page *ac_bitmap_page;
-        struct page *ac_buddy_page;
-        struct ext4_prealloc_space *ac_pa;
-        struct ext4_locality_group *ac_lg;
-};
-#define AC_STATUS_CONTINUE      1
-#define AC_STATUS_FOUND         2
-#define AC_STATUS_BREAK         3
-struct ext4_mb_history {
-        struct ext4_free_extent orig;   /* orig allocation */
-        struct ext4_free_extent goal;   /* goal allocation */
-        struct ext4_free_extent result; /* result allocation */
-        unsigned pid;
-        unsigned ino;
-        __u16 found;    /* how many extents have been found */
-        __u16 groups;   /* how many groups have been scanned */
-        __u16 tail;     /* what tail broke some buddy */
-        __u16 buddy;    /* buddy the tail ^^^ broke */
-        __u16 flags;
-        __u8 cr:3;      /* which phase the result extent was found at */
-        __u8 op:4;
-        __u8 merged:1;
-};
-struct ext4_buddy {
-        struct page *bd_buddy_page;
-        void *bd_buddy;
-        struct page *bd_bitmap_page;
-        void *bd_bitmap;
-        struct ext4_group_info *bd_info;
-        struct super_block *bd_sb;
-        __u16 bd_blkbits;
-        ext4_group_t bd_group;
-};
-#define EXT4_MB_BITMAP(e4b)     ((e4b)->bd_bitmap)
-#define EXT4_MB_BUDDY(e4b)      ((e4b)->bd_buddy)
-#ifndef EXT4_MB_HISTORY
-static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
-{
-        return;
-}
-#else
-static void ext4_mb_store_history(struct ext4_allocation_context *ac);
-#endif
-#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
-static struct proc_dir_entry *proc_root_ext4;
-struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
-ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
-                        ext4_fsblk_t goal, unsigned long *count, int *errp);
-static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
-                                        ext4_group_t group);
-static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
-static void ext4_mb_free_committed_blocks(struct super_block *);
-static void ext4_mb_return_to_preallocation(struct inode *inode,
-                                        struct ext4_buddy *e4b, sector_t block,
-                                        int count);
-static void ext4_mb_put_pa(struct ext4_allocation_context *,
-                        struct super_block *, struct ext4_prealloc_space *pa);
-static int ext4_mb_init_per_dev_proc(struct super_block *sb);
-static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
-static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
-{
-        struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-        bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
-}
-static inline void ext4_unlock_group(struct super_block *sb,
-                                        ext4_group_t group)
-{
-        struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-        bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
-}
-static inline int ext4_is_group_locked(struct super_block *sb,
-                                        ext4_group_t group)
-{
-        struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-        return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
-                                                &(grinfo->bb_state));
-}
-static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
-                                        struct ext4_free_extent *fex)
-{
-        ext4_fsblk_t block;
-        block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
-                        + fex->fe_start
-                        + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
-        return block;
-}
 static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
 {
 #if BITS_PER_LONG == 64
@@ -736,7 +440,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
                        blocknr +=
                            le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
-                        ext4_error(sb, __FUNCTION__, "double-free of inode"
+                        ext4_error(sb, __func__, "double-free of inode"
                                   " %lu's block %llu(bit %u in group %lu)\n",
                                   inode ? inode->i_ino : 0, blocknr,
                                   first + i, e4b->bd_group);
@@ -898,17 +602,17 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
        list_for_each(cur, &grp->bb_prealloc_list) {
                ext4_group_t groupnr;
                struct ext4_prealloc_space *pa;
-                pa = list_entry(cur, struct ext4_prealloc_space, group_list);
+                pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
-                ext4_get_group_no_and_offset(sb, pa->pstart, &groupnr, &k);
+                ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
                MB_CHECK_ASSERT(groupnr == e4b->bd_group);
-                for (i = 0; i < pa->len; i++)
+                for (i = 0; i < pa->pa_len; i++)
                        MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
        }
        return 0;
 }
 #undef MB_CHECK_ASSERT
 #define mb_check_buddy(e4b) __mb_check_buddy(e4b,       \
-                                        __FILE__, __FUNCTION__, __LINE__)
+                                        __FILE__, __func__, __LINE__)
 #else
 #define mb_check_buddy(e4b)
 #endif
@@ -982,7 +686,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
        grp->bb_fragments = fragments;
        if (free != grp->bb_free) {
-                ext4_error(sb, __FUNCTION__,
+                ext4_error(sb, __func__,
                        "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
                        group, free, grp->bb_free);
                /*
@@ -1168,8 +872,9 @@ out:
        return err;
 }
-static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+static noinline_for_stack int
-                struct ext4_buddy *e4b)
+ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+                                        struct ext4_buddy *e4b)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct inode *inode = sbi->s_buddy_cache;
@@ -1367,7 +1072,7 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
                        blocknr +=
                            le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
-                        ext4_error(sb, __FUNCTION__, "double-free of inode"
+                        ext4_error(sb, __func__, "double-free of inode"
                                   " %lu's block %llu(bit %u in group %lu)\n",
                                   inode ? inode->i_ino : 0, blocknr, block,
                                   e4b->bd_group);
@@ -1848,7 +1553,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
                         * free blocks even though group info says we
                         * we have free blocks
                         */
-                        ext4_error(sb, __FUNCTION__, "%d free blocks as per "
+                        ext4_error(sb, __func__, "%d free blocks as per "
                                        "group info. But bitmap says 0\n",
                                        free);
                        break;
@@ -1857,7 +1562,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
                mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
                BUG_ON(ex.fe_len <= 0);
                if (free < ex.fe_len) {
-                        ext4_error(sb, __FUNCTION__, "%d free blocks as per "
+                        ext4_error(sb, __func__, "%d free blocks as per "
                                        "group info. But got %d blocks\n",
                                        free, ex.fe_len);
                        /*
@@ -1965,7 +1670,8 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
        return 0;
 }
-static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 {
        ext4_group_t group;
        ext4_group_t i;
@@ -2465,7 +2171,8 @@ static void ext4_mb_history_init(struct super_block *sb)
        /* if we can't allocate history, then we simple won't use it */
 }
-static void ext4_mb_store_history(struct ext4_allocation_context *ac)
+static noinline_for_stack void
+ext4_mb_store_history(struct ext4_allocation_context *ac)
 {
        struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
        struct ext4_mb_history h;
@@ -2565,13 +2272,13 @@ static int ext4_mb_init_backend(struct super_block *sb)
                meta_group_info[j] = kzalloc(len, GFP_KERNEL);
                if (meta_group_info[j] == NULL) {
                        printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
-                        i--;
                        goto err_freebuddy;
                }
                desc = ext4_get_group_desc(sb, i, NULL);
                if (desc == NULL) {
                        printk(KERN_ERR
                                "EXT4-fs: can't read descriptor %lu\n", i);
+                        i++;
                        goto err_freebuddy;
                }
                memset(meta_group_info[j], 0, len);
@@ -2611,13 +2318,11 @@ static int ext4_mb_init_backend(struct super_block *sb)
        return 0;
 err_freebuddy:
-        while (i >= 0) {
+        while (i-- > 0)
                kfree(ext4_get_group_info(sb, i));
-                i--;
-        }
        i = num_meta_group_infos;
 err_freemeta:
-        while (--i >= 0)
+        while (i-- > 0)
                kfree(sbi->s_group_info[i]);
        iput(sbi->s_buddy_cache);
 err_freesgi:
@@ -2801,7 +2506,8 @@ int ext4_mb_release(struct super_block *sb)
        return 0;
 }
-static void ext4_mb_free_committed_blocks(struct super_block *sb)
+static noinline_for_stack void
+ext4_mb_free_committed_blocks(struct super_block *sb)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        int err;
@@ -3021,7 +2727,8 @@ void exit_ext4_mballoc(void)
 * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps
 * Returns 0 if success or error code
 */
-static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+static noinline_for_stack int
+ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                                handle_t *handle)
 {
        struct buffer_head *bitmap_bh = NULL;
@@ -3070,7 +2777,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                        in_range(block, ext4_inode_table(sb, gdp),
                                EXT4_SB(sb)->s_itb_per_group)) {
-                ext4_error(sb, __FUNCTION__,
+                ext4_error(sb, __func__,
                           "Allocating block in system zone - block = %llu",
                           block);
        }
@@ -3094,9 +2801,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                                                ac->ac_b_ex.fe_group,
                                                gdp));
        }
-        gdp->bg_free_blocks_count =
+        le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
-                cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)
-                                - ac->ac_b_ex.fe_len);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
        spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
        percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
@@ -3130,7 +2835,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
                ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
        else
                ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
-        mb_debug("#%u: goal %lu blocks for locality group\n",
+        mb_debug("#%u: goal %u blocks for locality group\n",
                current->pid, ac->ac_g_ex.fe_len);
 }
@@ -3138,15 +2843,16 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
 * Normalization means making request better in terms of
 * size and alignment
 */
-static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
+static noinline_for_stack void
+ext4_mb_normalize_request(struct ext4_allocation_context *ac,
                                struct ext4_allocation_request *ar)
 {
        int bsbits, max;
        ext4_lblk_t end;
-        struct list_head *cur;
        loff_t size, orig_size, start_off;
        ext4_lblk_t start, orig_start;
        struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
+        struct ext4_prealloc_space *pa;
        /* do normalize only data requests, metadata requests
           do not need preallocation */
@@ -3232,12 +2938,9 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
        /* check we don't cross already preallocated blocks */
        rcu_read_lock();
-        list_for_each_rcu(cur, &ei->i_prealloc_list) {
+        list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
-                struct ext4_prealloc_space *pa;
                unsigned long pa_end;
-                pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
                if (pa->pa_deleted)
                        continue;
                spin_lock(&pa->pa_lock);
@@ -3279,10 +2982,8 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
        /* XXX: extra loop to check we really don't overlap preallocations */
        rcu_read_lock();
-        list_for_each_rcu(cur, &ei->i_prealloc_list) {
+        list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
-                struct ext4_prealloc_space *pa;
                unsigned long pa_end;
-                pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
                spin_lock(&pa->pa_lock);
                if (pa->pa_deleted == 0) {
                        pa_end = pa->pa_lstart + pa->pa_len;
@@ -3374,7 +3075,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
        BUG_ON(pa->pa_free < len);
        pa->pa_free -= len;
-        mb_debug("use %llu/%lu from inode pa %p\n", start, len, pa);
+        mb_debug("use %llu/%u from inode pa %p\n", start, len, pa);
 }
 /*
@@ -3404,12 +3105,12 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
 /*
 * search goal blocks in preallocated space
 */
-static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 {
        struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
        struct ext4_locality_group *lg;
        struct ext4_prealloc_space *pa;
-        struct list_head *cur;
        /* only data can be preallocated */
        if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3417,8 +3118,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
        /* first, try per-file preallocation */
        rcu_read_lock();
-        list_for_each_rcu(cur, &ei->i_prealloc_list) {
+        list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
-                pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
                /* all fields in this condition don't change,
                 * so we can skip locking for them */
@@ -3450,8 +3150,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
                return 0;
        rcu_read_lock();
-        list_for_each_rcu(cur, &lg->lg_prealloc_list) {
+        list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
-                pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
                spin_lock(&pa->pa_lock);
                if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
                        atomic_inc(&pa->pa_count);
@@ -3571,7 +3270,8 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
 /*
 * creates new preallocated space for given inode
 */
-static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 {
        struct super_block *sb = ac->ac_sb;
        struct ext4_prealloc_space *pa;
@@ -3658,7 +3358,8 @@ static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 /*
 * creates new preallocated space for locality group inodes belongs to
 */
-static int ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
 {
        struct super_block *sb = ac->ac_sb;
        struct ext4_locality_group *lg;
@@ -3731,11 +3432,11 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
 * the caller MUST hold group/inode locks.
 * TODO: optimize the case when there are no in-core structures yet
 */
-static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
+static noinline_for_stack int
-                                struct buffer_head *bitmap_bh,
+ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
-                                struct ext4_prealloc_space *pa)
+                        struct ext4_prealloc_space *pa,
+                        struct ext4_allocation_context *ac)
 {
-        struct ext4_allocation_context *ac;
        struct super_block *sb = e4b->bd_sb;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        unsigned long end;
@@ -3751,8 +3452,6 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
        BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
        end = bit + pa->pa_len;
-        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
        if (ac) {
                ac->ac_sb = sb;
                ac->ac_inode = pa->pa_inode;
@@ -3789,7 +3488,7 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
                        pa, (unsigned long) pa->pa_lstart,
                        (unsigned long) pa->pa_pstart,
                        (unsigned long) pa->pa_len);
-                ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n",
+                ext4_error(sb, __func__, "free %u, pa_free %u\n",
                                                free, pa->pa_free);
                /*
                 * pa is already deleted so we use the value obtained
@@ -3797,22 +3496,19 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
                 */
        }
        atomic_add(free, &sbi->s_mb_discarded);
-        if (ac)
-                kmem_cache_free(ext4_ac_cachep, ac);
        return err;
 }
-static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
+static noinline_for_stack int
-                                struct ext4_prealloc_space *pa)
+ext4_mb_release_group_pa(struct ext4_buddy *e4b,
+                                struct ext4_prealloc_space *pa,
+                                struct ext4_allocation_context *ac)
 {
-        struct ext4_allocation_context *ac;
        struct super_block *sb = e4b->bd_sb;
        ext4_group_t group;
        ext4_grpblk_t bit;
-        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
        if (ac)
                ac->ac_op = EXT4_MB_HISTORY_DISCARD;
@@ -3830,7 +3526,6 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
                ac->ac_b_ex.fe_len = pa->pa_len;
                ac->ac_b_ex.fe_logical = 0;
                ext4_mb_store_history(ac);
-                kmem_cache_free(ext4_ac_cachep, ac);
        }
        return 0;
@@ -3845,12 +3540,14 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
 * - how many do we discard
 *   1) how many requested
 */
-static int ext4_mb_discard_group_preallocations(struct super_block *sb,
+static noinline_for_stack int
+ext4_mb_discard_group_preallocations(struct super_block *sb,
                                        ext4_group_t group, int needed)
 {
        struct ext4_group_info *grp = ext4_get_group_info(sb, group);
        struct buffer_head *bitmap_bh = NULL;
        struct ext4_prealloc_space *pa, *tmp;
+        struct ext4_allocation_context *ac;
        struct list_head list;
        struct ext4_buddy e4b;
        int err;
@@ -3878,6 +3575,7 @@ static int ext4_mb_discard_group_preallocations(struct super_block *sb,
        grp = ext4_get_group_info(sb, group);
        INIT_LIST_HEAD(&list);
+        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
 repeat:
        ext4_lock_group(sb, group);
        list_for_each_entry_safe(pa, tmp,
@@ -3932,9 +3630,9 @@ repeat:
                spin_unlock(pa->pa_obj_lock);
                if (pa->pa_linear)
-                        ext4_mb_release_group_pa(&e4b, pa);
+                        ext4_mb_release_group_pa(&e4b, pa, ac);
                else
-                        ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
+                        ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
                list_del(&pa->u.pa_tmp_list);
                call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3942,6 +3640,8 @@ repeat:
 out:
        ext4_unlock_group(sb, group);
+        if (ac)
+                kmem_cache_free(ext4_ac_cachep, ac);
        ext4_mb_release_desc(&e4b);
        put_bh(bitmap_bh);
        return free;
@@ -3962,6 +3662,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
        struct super_block *sb = inode->i_sb;
        struct buffer_head *bitmap_bh = NULL;
        struct ext4_prealloc_space *pa, *tmp;
+        struct ext4_allocation_context *ac;
        ext4_group_t group = 0;
        struct list_head list;
        struct ext4_buddy e4b;
@@ -3976,6 +3677,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
        INIT_LIST_HEAD(&list);
+        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
 repeat:
        /* first, collect all pa's in the inode */
        spin_lock(&ei->i_prealloc_lock);
@@ -4040,7 +3742,7 @@ repeat:
                ext4_lock_group(sb, group);
                list_del(&pa->pa_group_list);
-                ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
+                ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
                ext4_unlock_group(sb, group);
                ext4_mb_release_desc(&e4b);
@@ -4049,6 +3751,8 @@ repeat:
                list_del(&pa->u.pa_tmp_list);
                call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
        }
+        if (ac)
+                kmem_cache_free(ext4_ac_cachep, ac);
 }
 /*
@@ -4108,7 +3812,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
                        printk(KERN_ERR "PA:%lu:%d:%u \n", i,
                                                        start, pa->pa_len);
                }
-                ext4_lock_group(sb, i);
+                ext4_unlock_group(sb, i);
                if (grp->bb_free == 0)
                        continue;
@@ -4167,7 +3871,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
        mutex_lock(&ac->ac_lg->lg_mutex);
 }
-static int ext4_mb_initialize_context(struct ext4_allocation_context *ac,
+static noinline_for_stack int
+ext4_mb_initialize_context(struct ext4_allocation_context *ac,
                                struct ext4_allocation_request *ar)
 {
        struct super_block *sb = ar->inode->i_sb;
@@ -4398,7 +4103,8 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb,
        ext4_mb_free_committed_blocks(sb);
 }
-static int ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
+static noinline_for_stack int
+ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
                          ext4_group_t group, ext4_grpblk_t block, int count)
 {
        struct ext4_group_info *db = e4b->bd_info;
@@ -4489,7 +4195,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
        if (block < le32_to_cpu(es->s_first_data_block) ||
            block + count < block ||
            block + count > ext4_blocks_count(es)) {
-                ext4_error(sb, __FUNCTION__,
+                ext4_error(sb, __func__,
                            "Freeing blocks not in datazone - "
                            "block = %lu, count = %lu", block, count);
                goto error_return;
@@ -4530,7 +4236,7 @@ do_more:
            in_range(block + count - 1, ext4_inode_table(sb, gdp),
                      EXT4_SB(sb)->s_itb_per_group)) {
-                ext4_error(sb, __FUNCTION__,
+                ext4_error(sb, __func__,
                           "Freeing blocks in system zone - "
                           "Block = %lu, count = %lu", block, count);
        }
@@ -4588,8 +4294,7 @@ do_more:
        }
        spin_lock(sb_bgl_lock(sbi, block_group));
-        gdp->bg_free_blocks_count =
+        le16_add_cpu(&gdp->bg_free_blocks_count, count);
-                cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
        spin_unlock(sb_bgl_lock(sbi, block_group));
        percpu_counter_add(&sbi->s_freeblocks_counter, count);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
new file mode 100644
index 000000000000..bfe6add46bcf
--- /dev/null
+++ b/fs/ext4/mballoc.h
@@ -0,0 +1,304 @@
+/*
+ *  fs/ext4/mballoc.h
+ *
+ *  Written by: Alex Tomas <alex@clusterfs.com>
+ *
+ */
+#ifndef _EXT4_MBALLOC_H
+#define _EXT4_MBALLOC_H
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
+#include <linux/module.h>
+#include <linux/swap.h>
+#include <linux/proc_fs.h>
+#include <linux/pagemap.h>
+#include <linux/seq_file.h>
+#include <linux/version.h>
+#include "ext4_jbd2.h"
+#include "ext4.h"
+#include "group.h"
+/*
+ * with AGGRESSIVE_CHECK allocator runs consistency checks over
+ * structures. these checks slow things down a lot
+ */
+#define AGGRESSIVE_CHECK__
+/*
+ * with DOUBLE_CHECK defined mballoc creates persistent in-core
+ * bitmaps, maintains and uses them to check for double allocations
+ */
+#define DOUBLE_CHECK__
+/*
+ */
+#define MB_DEBUG__
+#ifdef MB_DEBUG
+#define mb_debug(fmt, a...)     printk(fmt, ##a)
+#else
+#define mb_debug(fmt, a...)
+#endif
+/*
+ * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
+ * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
+ */
+#define EXT4_MB_HISTORY
+#define EXT4_MB_HISTORY_ALLOC           1       /* allocation */
+#define EXT4_MB_HISTORY_PREALLOC        2       /* preallocated blocks used */
+#define EXT4_MB_HISTORY_DISCARD         4       /* preallocation discarded */
+#define EXT4_MB_HISTORY_FREE            8       /* free */
+#define EXT4_MB_HISTORY_DEFAULT         (EXT4_MB_HISTORY_ALLOC | \
+                                         EXT4_MB_HISTORY_PREALLOC)
+/*
+ * How long mballoc can look for a best extent (in found extents)
+ */
+#define MB_DEFAULT_MAX_TO_SCAN          200
+/*
+ * How long mballoc must look for a best extent
+ */
+#define MB_DEFAULT_MIN_TO_SCAN          10
+/*
+ * How many groups mballoc will scan looking for the best chunk
+ */
+#define MB_DEFAULT_MAX_GROUPS_TO_SCAN   5
+/*
+ * with 'ext4_mb_stats' allocator will collect stats that will be
+ * shown at umount. The collecting costs though!
+ */
+#define MB_DEFAULT_STATS                1
+/*
+ * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
+ * by the stream allocator, which purpose is to pack requests
+ * as close each to other as possible to produce smooth I/O traffic
+ * We use locality group prealloc space for stream request.
+ * We can tune the same via /proc/fs/ext4/<parition>/stream_req
+ */
+#define MB_DEFAULT_STREAM_THRESHOLD     16      /* 64K */
+/*
+ * for which requests use 2^N search using buddies
+ */
+#define MB_DEFAULT_ORDER2_REQS          2
+/*
+ * default group prealloc size 512 blocks
+ */
+#define MB_DEFAULT_GROUP_PREALLOC       512
+static struct kmem_cache *ext4_pspace_cachep;
+static struct kmem_cache *ext4_ac_cachep;
+#ifdef EXT4_BB_MAX_BLOCKS
+#undef EXT4_BB_MAX_BLOCKS
+#endif
+#define EXT4_BB_MAX_BLOCKS      30
+struct ext4_free_metadata {
+        ext4_group_t group;
+        unsigned short num;
+        ext4_grpblk_t  blocks[EXT4_BB_MAX_BLOCKS];
+        struct list_head list;
+};
+struct ext4_group_info {
+        unsigned long   bb_state;
+        unsigned long   bb_tid;
+        struct ext4_free_metadata *bb_md_cur;
+        unsigned short  bb_first_free;
+        unsigned short  bb_free;
+        unsigned short  bb_fragments;
+        struct          list_head bb_prealloc_list;
+#ifdef DOUBLE_CHECK
+        void            *bb_bitmap;
+#endif
+        unsigned short  bb_counters[];
+};
+#define EXT4_GROUP_INFO_NEED_INIT_BIT   0
+#define EXT4_GROUP_INFO_LOCKED_BIT      1
+#define EXT4_MB_GRP_NEED_INIT(grp)      \
+        (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
+struct ext4_prealloc_space {
+        struct list_head        pa_inode_list;
+        struct list_head        pa_group_list;
+        union {
+                struct list_head pa_tmp_list;
+                struct rcu_head pa_rcu;
+        } u;
+        spinlock_t              pa_lock;
+        atomic_t                pa_count;
+        unsigned                pa_deleted;
+        ext4_fsblk_t            pa_pstart;      /* phys. block */
+        ext4_lblk_t             pa_lstart;      /* log. block */
+        unsigned short          pa_len;         /* len of preallocated chunk */
+        unsigned short          pa_free;        /* how many blocks are free */
+        unsigned short          pa_linear;      /* consumed in one direction
+                                                 * strictly, for grp prealloc */
+        spinlock_t              *pa_obj_lock;
+        struct inode            *pa_inode;      /* hack, for history only */
+};
+struct ext4_free_extent {
+        ext4_lblk_t fe_logical;
+        ext4_grpblk_t fe_start;
+        ext4_group_t fe_group;
+        int fe_len;
+};
+/*
+ * Locality group:
+ *   we try to group all related changes together
+ *   so that writeback can flush/allocate them together as well
+ */
+struct ext4_locality_group {
+        /* for allocator */
+        struct mutex            lg_mutex;       /* to serialize allocates */
+        struct list_head        lg_prealloc_list;/* list of preallocations */
+        spinlock_t              lg_prealloc_lock;
+};
+struct ext4_allocation_context {
+        struct inode *ac_inode;
+        struct super_block *ac_sb;
+        /* original request */
+        struct ext4_free_extent ac_o_ex;
+        /* goal request (after normalization) */
+        struct ext4_free_extent ac_g_ex;
+        /* the best found extent */
+        struct ext4_free_extent ac_b_ex;
+        /* copy of the bext found extent taken before preallocation efforts */
+        struct ext4_free_extent ac_f_ex;
+        /* number of iterations done. we have to track to limit searching */
+        unsigned long ac_ex_scanned;
+        __u16 ac_groups_scanned;
+        __u16 ac_found;
+        __u16 ac_tail;
+        __u16 ac_buddy;
+        __u16 ac_flags;         /* allocation hints */
+        __u8 ac_status;
+        __u8 ac_criteria;
+        __u8 ac_repeats;
+        __u8 ac_2order;         /* if request is to allocate 2^N blocks and
+                                 * N > 0, the field stores N, otherwise 0 */
+        __u8 ac_op;             /* operation, for history only */
+        struct page *ac_bitmap_page;
+        struct page *ac_buddy_page;
+        struct ext4_prealloc_space *ac_pa;
+        struct ext4_locality_group *ac_lg;
+};
+#define AC_STATUS_CONTINUE      1
+#define AC_STATUS_FOUND         2
+#define AC_STATUS_BREAK         3
+struct ext4_mb_history {
+        struct ext4_free_extent orig;   /* orig allocation */
+        struct ext4_free_extent goal;   /* goal allocation */
+        struct ext4_free_extent result; /* result allocation */
+        unsigned pid;
+        unsigned ino;
+        __u16 found;    /* how many extents have been found */
+        __u16 groups;   /* how many groups have been scanned */
+        __u16 tail;     /* what tail broke some buddy */
+        __u16 buddy;    /* buddy the tail ^^^ broke */
+        __u16 flags;
+        __u8 cr:3;      /* which phase the result extent was found at */
+        __u8 op:4;
+        __u8 merged:1;
+};
+struct ext4_buddy {
+        struct page *bd_buddy_page;
+        void *bd_buddy;
+        struct page *bd_bitmap_page;
+        void *bd_bitmap;
+        struct ext4_group_info *bd_info;
+        struct super_block *bd_sb;
+        __u16 bd_blkbits;
+        ext4_group_t bd_group;
+};
+#define EXT4_MB_BITMAP(e4b)     ((e4b)->bd_bitmap)
+#define EXT4_MB_BUDDY(e4b)      ((e4b)->bd_buddy)
+#ifndef EXT4_MB_HISTORY
+static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
+{
+        return;
+}
+#else
+static void ext4_mb_store_history(struct ext4_allocation_context *ac);
+#endif
+#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
+static struct proc_dir_entry *proc_root_ext4;
+struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
+static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+                                        ext4_group_t group);
+static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
+static void ext4_mb_free_committed_blocks(struct super_block *);
+static void ext4_mb_return_to_preallocation(struct inode *inode,
+                                        struct ext4_buddy *e4b, sector_t block,
+                                        int count);
+static void ext4_mb_put_pa(struct ext4_allocation_context *,
+                        struct super_block *, struct ext4_prealloc_space *pa);
+static int ext4_mb_init_per_dev_proc(struct super_block *sb);
+static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
+static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
+{
+        struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+        bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
+}
+static inline void ext4_unlock_group(struct super_block *sb,
+                                        ext4_group_t group)
+{
+        struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+        bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
+}
+static inline int ext4_is_group_locked(struct super_block *sb,
+                                        ext4_group_t group)
+{
+        struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+        return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
+                                                &(grinfo->bb_state));
+}
+static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
+                                        struct ext4_free_extent *fex)
+{
+        ext4_fsblk_t block;
+        block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
+                        + fex->fe_start
+                        + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+        return block;
+}
+#endif
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 5c1e27de7755..b9e077ba07e9 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -13,8 +13,8 @@
 */
 #include <linux/module.h>
-#include <linux/ext4_jbd2.h>
+#include "ext4_jbd2.h"
-#include <linux/ext4_fs_extents.h>
+#include "ext4_extents.h"
 /*
 * The contiguous blocks details which can be
@@ -327,7 +327,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
 }
 static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
-                                struct inode *tmp_inode)
+                                                struct inode *tmp_inode)
 {
        int retval;
        __le32  i_data[3];
@@ -339,7 +339,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
         * i_data field of the original inode
         */
        retval = ext4_journal_extend(handle, 1);
-        if (retval != 0) {
+        if (retval) {
                retval = ext4_journal_restart(handle, 1);
                if (retval)
                        goto err_out;
@@ -351,6 +351,18 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
        down_write(&EXT4_I(inode)->i_data_sem);
        /*
+         * if EXT4_EXT_MIGRATE is cleared a block allocation
+         * happened after we started the migrate. We need to
+         * fail the migrate
+         */
+        if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) {
+                retval = -EAGAIN;
+                up_write(&EXT4_I(inode)->i_data_sem);
+                goto err_out;
+        } else
+                EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
+                                                        ~EXT4_EXT_MIGRATE;
+        /*
         * We have the extent map build with the tmp inode.
         * Now copy the i_data across
         */
@@ -508,6 +520,17 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
         * switch the inode format to prevent read.
         */
        mutex_lock(&(inode->i_mutex));
+        /*
+         * Even though we take i_mutex we can still cause block allocation
+         * via mmap write to holes. If we have allocated new blocks we fail
+         * migrate.  New block allocation will clear EXT4_EXT_MIGRATE flag.
+         * The flag is updated with i_data_sem held to prevent racing with
+         * block allocation.
+         */
+        down_read((&EXT4_I(inode)->i_data_sem));
+        EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE;
+        up_read((&EXT4_I(inode)->i_data_sem));
        handle = ext4_journal_start(inode, 1);
        ei = EXT4_I(inode);
@@ -559,9 +582,15 @@ err_out:
                 * tmp_inode
                 */
                free_ext_block(handle, tmp_inode);
-        else
+        else {
-                retval = ext4_ext_swap_inode_data(handle, inode,
+                retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode);
-                                                        tmp_inode);
+                if (retval)
+                        /*
+                         * if we fail to swap inode data free the extent
+                         * details of the tmp inode
+                         */
+                        free_ext_block(handle, tmp_inode);
+        }
        /* We mark the tmp_inode dirty via ext4_ext_tree_init. */
        if (ext4_journal_extend(handle, 1) != 0)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 28aa2ed4297e..ab16beaa830d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -28,14 +28,14 @@
 #include <linux/pagemap.h>
 #include <linux/jbd2.h>
 #include <linux/time.h>
-#include <linux/ext4_fs.h>
-#include <linux/ext4_jbd2.h>
 #include <linux/fcntl.h>
 #include <linux/stat.h>
 #include <linux/string.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 #include <linux/bio.h>
+#include "ext4.h"
+#include "ext4_jbd2.h"
 #include "namei.h"
 #include "xattr.h"
@@ -57,10 +57,15 @@ static struct buffer_head *ext4_append(handle_t *handle,
        *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
-        if ((bh = ext4_bread(handle, inode, *block, 1, err))) {
+        bh = ext4_bread(handle, inode, *block, 1, err);
+        if (bh) {
                inode->i_size += inode->i_sb->s_blocksize;
                EXT4_I(inode)->i_disksize = inode->i_size;
-                ext4_journal_get_write_access(handle,bh);
+                *err = ext4_journal_get_write_access(handle, bh);
+                if (*err) {
+                        brelse(bh);
+                        bh = NULL;
+                }
        }
        return bh;
 }
@@ -348,7 +353,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
        if (root->info.hash_version != DX_HASH_TEA &&
            root->info.hash_version != DX_HASH_HALF_MD4 &&
            root->info.hash_version != DX_HASH_LEGACY) {
-                ext4_warning(dir->i_sb, __FUNCTION__,
+                ext4_warning(dir->i_sb, __func__,
                             "Unrecognised inode hash code %d",
                             root->info.hash_version);
                brelse(bh);
@@ -362,7 +367,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
        hash = hinfo->hash;
        if (root->info.unused_flags & 1) {
-                ext4_warning(dir->i_sb, __FUNCTION__,
+                ext4_warning(dir->i_sb, __func__,
                             "Unimplemented inode hash flags: %#06x",
                             root->info.unused_flags);
                brelse(bh);
@@ -371,7 +376,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
        }
        if ((indirect = root->info.indirect_levels) > 1) {
-                ext4_warning(dir->i_sb, __FUNCTION__,
+                ext4_warning(dir->i_sb, __func__,
                             "Unimplemented inode hash depth: %#06x",
                             root->info.indirect_levels);
                brelse(bh);
@@ -384,7 +389,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
        if (dx_get_limit(entries) != dx_root_limit(dir,
                                                   root->info.info_length)) {
-                ext4_warning(dir->i_sb, __FUNCTION__,
+                ext4_warning(dir->i_sb, __func__,
                             "dx entry: limit != root limit");
                brelse(bh);
                *err = ERR_BAD_DX_DIR;
@@ -396,7 +401,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
        {
                count = dx_get_count(entries);
                if (!count || count > dx_get_limit(entries)) {
-                        ext4_warning(dir->i_sb, __FUNCTION__,
+                        ext4_warning(dir->i_sb, __func__,
                                     "dx entry: no count or count > limit");
                        brelse(bh);
                        *err = ERR_BAD_DX_DIR;
@@ -441,7 +446,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
                        goto fail2;
                at = entries = ((struct dx_node *) bh->b_data)->entries;
                if (dx_get_limit(entries) != dx_node_limit (dir)) {
-                        ext4_warning(dir->i_sb, __FUNCTION__,
+                        ext4_warning(dir->i_sb, __func__,
                                     "dx entry: limit != node limit");
                        brelse(bh);
                        *err = ERR_BAD_DX_DIR;
@@ -457,7 +462,7 @@ fail2:
        }
 fail:
        if (*err == ERR_BAD_DX_DIR)
-                ext4_warning(dir->i_sb, __FUNCTION__,
+                ext4_warning(dir->i_sb, __func__,
                             "Corrupt dir inode %ld, running e2fsck is "
                             "recommended.", dir->i_ino);
        return NULL;
@@ -914,7 +919,7 @@ restart:
                wait_on_buffer(bh);
                if (!buffer_uptodate(bh)) {
                        /* read error, skip block & hope for the best */
-                        ext4_error(sb, __FUNCTION__, "reading directory #%lu "
+                        ext4_error(sb, __func__, "reading directory #%lu "
                                   "offset %lu", dir->i_ino,
                                   (unsigned long)block);
                        brelse(bh);
@@ -1007,7 +1012,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
                retval = ext4_htree_next_block(dir, hash, frame,
                                               frames, NULL);
                if (retval < 0) {
-                        ext4_warning(sb, __FUNCTION__,
+                        ext4_warning(sb, __func__,
                             "error reading index page in directory #%lu",
                             dir->i_ino);
                        *err = retval;
@@ -1532,7 +1537,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                if (levels && (dx_get_count(frames->entries) ==
                               dx_get_limit(frames->entries))) {
-                        ext4_warning(sb, __FUNCTION__,
+                        ext4_warning(sb, __func__,
                                     "Directory index full!");
                        err = -ENOSPC;
                        goto cleanup;
@@ -1860,11 +1865,11 @@ static int empty_dir (struct inode * inode)
        if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
            !(bh = ext4_bread (NULL, inode, 0, 0, &err))) {
                if (err)
-                        ext4_error(inode->i_sb, __FUNCTION__,
+                        ext4_error(inode->i_sb, __func__,
                                   "error %d reading directory #%lu offset 0",
                                   err, inode->i_ino);
                else
-                        ext4_warning(inode->i_sb, __FUNCTION__,
+                        ext4_warning(inode->i_sb, __func__,
                                     "bad directory (dir #%lu) - no data block",
                                     inode->i_ino);
                return 1;
@@ -1893,7 +1898,7 @@ static int empty_dir (struct inode * inode)
                                offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
                        if (!bh) {
                                if (err)
-                                        ext4_error(sb, __FUNCTION__,
+                                        ext4_error(sb, __func__,
                                                   "error %d reading directory"
                                                   " #%lu offset %lu",
                                                   err, inode->i_ino, offset);
@@ -2217,6 +2222,8 @@ retry:
                        goto out_stop;
                }
        } else {
+                /* clear the extent format for fast symlink */
+                EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
                inode->i_op = &ext4_fast_symlink_inode_operations;
                memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
                inode->i_size = l-1;
@@ -2347,6 +2354,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
                                              EXT4_FEATURE_INCOMPAT_FILETYPE))
                        new_de->file_type = old_de->file_type;
                new_dir->i_version++;
+                new_dir->i_ctime = new_dir->i_mtime =
+                                        ext4_current_time(new_dir);
+                ext4_mark_inode_dirty(handle, new_dir);
                BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata");
                ext4_journal_dirty_metadata(handle, new_bh);
                brelse(new_bh);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index e29efa0f9d62..9f086a6a472b 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -11,11 +11,10 @@
 #define EXT4FS_DEBUG
-#include <linux/ext4_jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include "ext4_jbd2.h"
 #include "group.h"
 #define outside(b, first, last) ((b) < (first) || (b) >= (last))
@@ -50,63 +49,63 @@ static int verify_group_input(struct super_block *sb,
        ext4_get_group_no_and_offset(sb, start, NULL, &offset);
        if (group != sbi->s_groups_count)
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Cannot add at group %u (only %lu groups)",
                             input->group, sbi->s_groups_count);
        else if (offset != 0)
-                        ext4_warning(sb, __FUNCTION__, "Last group not full");
+                        ext4_warning(sb, __func__, "Last group not full");
        else if (input->reserved_blocks > input->blocks_count / 5)
-                ext4_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)",
+                ext4_warning(sb, __func__, "Reserved blocks too high (%u)",
                             input->reserved_blocks);
        else if (free_blocks_count < 0)
-                ext4_warning(sb, __FUNCTION__, "Bad blocks count %u",
+                ext4_warning(sb, __func__, "Bad blocks count %u",
                             input->blocks_count);
        else if (!(bh = sb_bread(sb, end - 1)))
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Cannot read last block (%llu)",
                             end - 1);
        else if (outside(input->block_bitmap, start, end))
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Block bitmap not in group (block %llu)",
                             (unsigned long long)input->block_bitmap);
        else if (outside(input->inode_bitmap, start, end))
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Inode bitmap not in group (block %llu)",
                             (unsigned long long)input->inode_bitmap);
        else if (outside(input->inode_table, start, end) ||
                 outside(itend - 1, start, end))
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Inode table not in group (blocks %llu-%llu)",
                             (unsigned long long)input->inode_table, itend - 1);
        else if (input->inode_bitmap == input->block_bitmap)
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Block bitmap same as inode bitmap (%llu)",
                             (unsigned long long)input->block_bitmap);
        else if (inside(input->block_bitmap, input->inode_table, itend))
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Block bitmap (%llu) in inode table (%llu-%llu)",
                             (unsigned long long)input->block_bitmap,
                             (unsigned long long)input->inode_table, itend - 1);
        else if (inside(input->inode_bitmap, input->inode_table, itend))
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Inode bitmap (%llu) in inode table (%llu-%llu)",
                             (unsigned long long)input->inode_bitmap,
                             (unsigned long long)input->inode_table, itend - 1);
        else if (inside(input->block_bitmap, start, metaend))
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Block bitmap (%llu) in GDT table"
                             " (%llu-%llu)",
                             (unsigned long long)input->block_bitmap,
                             start, metaend - 1);
        else if (inside(input->inode_bitmap, start, metaend))
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Inode bitmap (%llu) in GDT table"
                             " (%llu-%llu)",
                             (unsigned long long)input->inode_bitmap,
                             start, metaend - 1);
        else if (inside(input->inode_table, start, metaend) ||
                 inside(itend - 1, start, metaend))
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Inode table (%llu-%llu) overlaps"
                             "GDT table (%llu-%llu)",
                             (unsigned long long)input->inode_table,
@@ -368,7 +367,7 @@ static int verify_reserved_gdb(struct super_block *sb,
        while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
                if (le32_to_cpu(*p++) !=
                    grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
-                        ext4_warning(sb, __FUNCTION__,
+                        ext4_warning(sb, __func__,
                                     "reserved GDT %llu"
                                     " missing grp %d (%llu)",
                                     blk, grp,
@@ -424,7 +423,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
         */
        if (EXT4_SB(sb)->s_sbh->b_blocknr !=
            le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                        "won't resize using backup superblock at %llu",
                        (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
                return -EPERM;
@@ -448,7 +447,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
        data = (__le32 *)dind->b_data;
        if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "new group %u GDT block %llu not reserved",
                             input->group, gdblock);
                err = -EINVAL;
@@ -469,10 +468,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
                goto exit_dindj;
        n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
-                        GFP_KERNEL);
+                        GFP_NOFS);
        if (!n_group_desc) {
                err = -ENOMEM;
-                ext4_warning (sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                              "not enough memory for %lu groups", gdb_num + 1);
                goto exit_inode;
        }
@@ -502,8 +501,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
        EXT4_SB(sb)->s_gdb_count++;
        kfree(o_group_desc);
-        es->s_reserved_gdt_blocks =
+        le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
-                cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1);
        ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
        return 0;
@@ -553,7 +551,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
        int res, i;
        int err;
-        primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL);
+        primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS);
        if (!primary)
                return -ENOMEM;
@@ -571,7 +569,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
        /* Get each reserved primary GDT block and verify it holds backups */
        for (res = 0; res < reserved_gdb; res++, blk++) {
                if (le32_to_cpu(*data) != blk) {
-                        ext4_warning(sb, __FUNCTION__,
+                        ext4_warning(sb, __func__,
                                     "reserved block %llu"
                                     " not at offset %ld",
                                     blk,
@@ -715,7 +713,7 @@ static void update_backups(struct super_block *sb,
         */
 exit_err:
        if (err) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "can't update backup for group %lu (err %d), "
                             "forcing fsck on next reboot", group, err);
                sbi->s_mount_state &= ~EXT4_VALID_FS;
@@ -755,33 +753,33 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb,
                                        EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Can't resize non-sparse filesystem further");
                return -EPERM;
        }
        if (ext4_blocks_count(es) + input->blocks_count <
            ext4_blocks_count(es)) {
-                ext4_warning(sb, __FUNCTION__, "blocks_count overflow\n");
+                ext4_warning(sb, __func__, "blocks_count overflow\n");
                return -EINVAL;
        }
        if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
            le32_to_cpu(es->s_inodes_count)) {
-                ext4_warning(sb, __FUNCTION__, "inodes_count overflow\n");
+                ext4_warning(sb, __func__, "inodes_count overflow\n");
                return -EINVAL;
        }
        if (reserved_gdb || gdb_off == 0) {
                if (!EXT4_HAS_COMPAT_FEATURE(sb,
                                             EXT4_FEATURE_COMPAT_RESIZE_INODE)){
-                        ext4_warning(sb, __FUNCTION__,
+                        ext4_warning(sb, __func__,
                                     "No reserved GDT blocks, can't resize");
                        return -EPERM;
                }
                inode = ext4_iget(sb, EXT4_RESIZE_INO);
                if (IS_ERR(inode)) {
-                        ext4_warning(sb, __FUNCTION__,
+                        ext4_warning(sb, __func__,
                                     "Error opening resize inode");
                        return PTR_ERR(inode);
                }
@@ -810,7 +808,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        lock_super(sb);
        if (input->group != sbi->s_groups_count) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "multiple resizers run on filesystem!");
                err = -EBUSY;
                goto exit_journal;
@@ -877,8 +875,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
         */
        ext4_blocks_count_set(es, ext4_blocks_count(es) +
                input->blocks_count);
-        es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) +
+        le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb));
-                EXT4_INODES_PER_GROUP(sb));
        /*
         * We need to protect s_groups_count against other CPUs seeing
@@ -977,13 +974,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
                        " too large to resize to %llu blocks safely\n",
                        sb->s_id, n_blocks_count);
                if (sizeof(sector_t) < 8)
-                        ext4_warning(sb, __FUNCTION__,
+                        ext4_warning(sb, __func__,
                        "CONFIG_LBD not enabled\n");
                return -EINVAL;
        }
        if (n_blocks_count < o_blocks_count) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "can't shrink FS - resize aborted");
                return -EBUSY;
        }
@@ -992,7 +989,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
        ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last);
        if (last == 0) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "need to use ext2online to resize further");
                return -EPERM;
        }
@@ -1000,7 +997,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
        add = EXT4_BLOCKS_PER_GROUP(sb) - last;
        if (o_blocks_count + add < o_blocks_count) {
-                ext4_warning(sb, __FUNCTION__, "blocks_count overflow");
+                ext4_warning(sb, __func__, "blocks_count overflow");
                return -EINVAL;
        }
@@ -1008,7 +1005,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
                add = n_blocks_count - o_blocks_count;
        if (o_blocks_count + add < n_blocks_count)
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "will only finish group (%llu"
                             " blocks, %u new)",
                             o_blocks_count + add, add);
@@ -1016,7 +1013,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
        /* See if the device is actually as big as what was requested */
        bh = sb_bread(sb, o_blocks_count + add -1);
        if (!bh) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "can't read last block, resize aborted");
                return -ENOSPC;
        }
@@ -1028,13 +1025,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
        handle = ext4_journal_start_sb(sb, 3);
        if (IS_ERR(handle)) {
                err = PTR_ERR(handle);
-                ext4_warning(sb, __FUNCTION__, "error %d on journal start",err);
+                ext4_warning(sb, __func__, "error %d on journal start", err);
                goto exit_put;
        }
        lock_super(sb);
        if (o_blocks_count != ext4_blocks_count(es)) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "multiple resizers run on filesystem!");
                unlock_super(sb);
                ext4_journal_stop(handle);
@@ -1044,7 +1041,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
        if ((err = ext4_journal_get_write_access(handle,
                                                 EXT4_SB(sb)->s_sbh))) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "error %d on journal write access", err);
                unlock_super(sb);
                ext4_journal_stop(handle);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c81a8e759bad..52dd0679a4e2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -21,8 +21,6 @@
 #include <linux/fs.h>
 #include <linux/time.h>
 #include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
-#include <linux/ext4_jbd2.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/blkdev.h>
@@ -38,9 +36,10 @@
 #include <linux/seq_file.h>
 #include <linux/log2.h>
 #include <linux/crc16.h>
 #include <asm/uaccess.h>
+#include "ext4.h"
+#include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
 #include "namei.h"
@@ -135,7 +134,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
         * take the FS itself readonly cleanly. */
        journal = EXT4_SB(sb)->s_journal;
        if (is_journal_aborted(journal)) {
-                ext4_abort(sb, __FUNCTION__,
+                ext4_abort(sb, __func__,
                           "Detected aborted journal");
                return ERR_PTR(-EROFS);
        }
@@ -355,7 +354,7 @@ void ext4_update_dynamic_rev(struct super_block *sb)
        if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
                return;
-        ext4_warning(sb, __FUNCTION__,
+        ext4_warning(sb, __func__,
                     "updating to rev %d because of new feature flag, "
                     "running e2fsck is recommended",
                     EXT4_DYNAMIC_REV);
@@ -945,8 +944,8 @@ static match_table_t tokens = {
        {Opt_mballoc, "mballoc"},
        {Opt_nomballoc, "nomballoc"},
        {Opt_stripe, "stripe=%u"},
-        {Opt_err, NULL},
        {Opt_resize, "resize"},
+        {Opt_err, NULL},
 };
 static ext4_fsblk_t get_sb_block(void **data)
@@ -1388,11 +1387,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
                 * a plain journaled filesystem we can keep it set as
                 * valid forever! :)
                 */
-        es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT4_VALID_FS);
+        es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
 #endif
        if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
                es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
-        es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
+        le16_add_cpu(&es->s_mnt_count, 1);
        es->s_mtime = cpu_to_le32(get_seconds());
        ext4_update_dynamic_rev(sb);
        EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -1485,36 +1484,33 @@ static int ext4_check_descriptors(struct super_block *sb)
                block_bitmap = ext4_block_bitmap(sb, gdp);
                if (block_bitmap < first_block || block_bitmap > last_block)
                {
-                        ext4_error (sb, "ext4_check_descriptors",
+                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-                                    "Block bitmap for group %lu"
+                               "Block bitmap for group %lu not in group "
-                                    " not in group (block %llu)!",
+                               "(block %llu)!", i, block_bitmap);
-                                    i, block_bitmap);
                        return 0;
                }
                inode_bitmap = ext4_inode_bitmap(sb, gdp);
                if (inode_bitmap < first_block || inode_bitmap > last_block)
                {
-                        ext4_error (sb, "ext4_check_descriptors",
+                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-                                    "Inode bitmap for group %lu"
+                               "Inode bitmap for group %lu not in group "
-                                    " not in group (block %llu)!",
+                               "(block %llu)!", i, inode_bitmap);
-                                    i, inode_bitmap);
                        return 0;
                }
                inode_table = ext4_inode_table(sb, gdp);
                if (inode_table < first_block ||
                    inode_table + sbi->s_itb_per_group - 1 > last_block)
                {
-                        ext4_error (sb, "ext4_check_descriptors",
+                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-                                    "Inode table for group %lu"
+                               "Inode table for group %lu not in group "
-                                    " not in group (block %llu)!",
+                               "(block %llu)!", i, inode_table);
-                                    i, inode_table);
                        return 0;
                }
                if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
-                        ext4_error(sb, __FUNCTION__,
+                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-                                   "Checksum for group %lu failed (%u!=%u)\n",
+                               "Checksum for group %lu failed (%u!=%u)\n",
-                                    i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
+                               i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
-                                    gdp)), le16_to_cpu(gdp->bg_checksum));
+                               gdp)), le16_to_cpu(gdp->bg_checksum));
                        return 0;
                }
                if (!flexbg_flag)
@@ -1594,8 +1590,8 @@ static void ext4_orphan_cleanup (struct super_block * sb,
        while (es->s_last_orphan) {
                struct inode *inode;
-                if (!(inode =
+                inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
-                      ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) {
+                if (IS_ERR(inode)) {
                        es->s_last_orphan = 0;
                        break;
                }
@@ -1605,7 +1601,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
                if (inode->i_nlink) {
                        printk(KERN_DEBUG
                                "%s: truncating inode %lu to %Ld bytes\n",
-                                __FUNCTION__, inode->i_ino, inode->i_size);
+                                __func__, inode->i_ino, inode->i_size);
                        jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
                                  inode->i_ino, inode->i_size);
                        ext4_truncate(inode);
@@ -1613,7 +1609,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
                } else {
                        printk(KERN_DEBUG
                                "%s: deleting unreferenced inode %lu\n",
-                                __FUNCTION__, inode->i_ino);
+                                __func__, inode->i_ino);
                        jbd_debug(2, "deleting unreferenced inode %lu\n",
                                  inode->i_ino);
                        nr_orphans++;
@@ -2699,9 +2695,9 @@ static void ext4_clear_journal_err(struct super_block * sb,
                char nbuf[16];
                errstr = ext4_decode_error(sb, j_errno, nbuf);
-                ext4_warning(sb, __FUNCTION__, "Filesystem error recorded "
+                ext4_warning(sb, __func__, "Filesystem error recorded "
                             "from previous mount: %s", errstr);
-                ext4_warning(sb, __FUNCTION__, "Marking fs in need of "
+                ext4_warning(sb, __func__, "Marking fs in need of "
                             "filesystem check.");
                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
@@ -2828,7 +2824,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
        }
        if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
-                ext4_abort(sb, __FUNCTION__, "Abort forced by user");
+                ext4_abort(sb, __func__, "Abort forced by user");
        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
                ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
@@ -3040,8 +3036,14 @@ static int ext4_dquot_drop(struct inode *inode)
        /* We may delete quota structure so we need to reserve enough blocks */
        handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb));
-        if (IS_ERR(handle))
+        if (IS_ERR(handle)) {
+                /*
+                 * We call dquot_drop() anyway to at least release references
+                 * to quota structures so that umount does not hang.
+                 */
+                dquot_drop(inode);
                return PTR_ERR(handle);
+        }
        ret = dquot_drop(inode);
        err = ext4_journal_stop(handle);
        if (!ret)
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index e6f9da4287c4..e9178643dc01 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -19,8 +19,8 @@
 #include <linux/fs.h>
 #include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
 #include <linux/namei.h>
+#include "ext4.h"
 #include "xattr.h"
 static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index e9054c1c7d93..3fbc2c6c3d0e 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -53,11 +53,11 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
-#include <linux/ext4_jbd2.h>
-#include <linux/ext4_fs.h>
 #include <linux/mbcache.h>
 #include <linux/quotaops.h>
 #include <linux/rwsem.h>
+#include "ext4_jbd2.h"
+#include "ext4.h"
 #include "xattr.h"
 #include "acl.h"
@@ -92,6 +92,8 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
                                                 struct mb_cache_entry **);
 static void ext4_xattr_rehash(struct ext4_xattr_header *,
                              struct ext4_xattr_entry *);
+static int ext4_xattr_list(struct inode *inode, char *buffer,
+                           size_t buffer_size);
 static struct mb_cache *ext4_xattr_cache;
@@ -225,7 +227,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
        ea_bdebug(bh, "b_count=%d, refcount=%d",
                atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
        if (ext4_xattr_check_block(bh)) {
-bad_block:      ext4_error(inode->i_sb, __FUNCTION__,
+bad_block:      ext4_error(inode->i_sb, __func__,
                           "inode %lu: bad block %llu", inode->i_ino,
                           EXT4_I(inode)->i_file_acl);
                error = -EIO;
@@ -367,7 +369,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
        ea_bdebug(bh, "b_count=%d, refcount=%d",
                atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
        if (ext4_xattr_check_block(bh)) {
-                ext4_error(inode->i_sb, __FUNCTION__,
+                ext4_error(inode->i_sb, __func__,
                           "inode %lu: bad block %llu", inode->i_ino,
                           EXT4_I(inode)->i_file_acl);
                error = -EIO;
@@ -420,7 +422,7 @@ cleanup:
 * Returns a negative error number on failure, or the number of bytes
 * used / required on success.
 */
-int
+static int
 ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
 {
        int i_error, b_error;
@@ -484,8 +486,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
                get_bh(bh);
                ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
        } else {
-                BHDR(bh)->h_refcount = cpu_to_le32(
+                le32_add_cpu(&BHDR(bh)->h_refcount, -1);
-                                le32_to_cpu(BHDR(bh)->h_refcount) - 1);
                error = ext4_journal_dirty_metadata(handle, bh);
                if (IS_SYNC(inode))
                        handle->h_sync = 1;
@@ -660,7 +661,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
                        atomic_read(&(bs->bh->b_count)),
                        le32_to_cpu(BHDR(bs->bh)->h_refcount));
                if (ext4_xattr_check_block(bs->bh)) {
-                        ext4_error(sb, __FUNCTION__,
+                        ext4_error(sb, __func__,
                                "inode %lu: bad block %llu", inode->i_ino,
                                EXT4_I(inode)->i_file_acl);
                        error = -EIO;
@@ -738,7 +739,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
                                ce = NULL;
                        }
                        ea_bdebug(bs->bh, "cloning");
-                        s->base = kmalloc(bs->bh->b_size, GFP_KERNEL);
+                        s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
                        error = -ENOMEM;
                        if (s->base == NULL)
                                goto cleanup;
@@ -750,7 +751,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
                }
        } else {
                /* Allocate a buffer where we construct the new block. */
-                s->base = kzalloc(sb->s_blocksize, GFP_KERNEL);
+                s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
                /* assert(header == s->base) */
                error = -ENOMEM;
                if (s->base == NULL)
@@ -789,8 +790,7 @@ inserted:
                                if (error)
                                        goto cleanup_dquot;
                                lock_buffer(new_bh);
-                                BHDR(new_bh)->h_refcount = cpu_to_le32(1 +
+                                le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
-                                        le32_to_cpu(BHDR(new_bh)->h_refcount));
                                ea_bdebug(new_bh, "reusing; refcount now=%d",
                                        le32_to_cpu(BHDR(new_bh)->h_refcount));
                                unlock_buffer(new_bh);
@@ -808,10 +808,8 @@ inserted:
                        get_bh(new_bh);
                } else {
                        /* We need to allocate a new block */
-                        ext4_fsblk_t goal = le32_to_cpu(
+                        ext4_fsblk_t goal = ext4_group_first_block_no(sb,
-                                        EXT4_SB(sb)->s_es->s_first_data_block) +
+                                                EXT4_I(inode)->i_block_group);
-                                (ext4_fsblk_t)EXT4_I(inode)->i_block_group *
-                                EXT4_BLOCKS_PER_GROUP(sb);
                        ext4_fsblk_t block = ext4_new_block(handle, inode,
                                                        goal, &error);
                        if (error)
@@ -863,7 +861,7 @@ cleanup_dquot:
        goto cleanup;
 bad_block:
-        ext4_error(inode->i_sb, __FUNCTION__,
+        ext4_error(inode->i_sb, __func__,
                   "inode %lu: bad block %llu", inode->i_ino,
                   EXT4_I(inode)->i_file_acl);
        goto cleanup;
@@ -1166,7 +1164,7 @@ retry:
                if (!bh)
                        goto cleanup;
                if (ext4_xattr_check_block(bh)) {
-                        ext4_error(inode->i_sb, __FUNCTION__,
+                        ext4_error(inode->i_sb, __func__,
                                "inode %lu: bad block %llu", inode->i_ino,
                                EXT4_I(inode)->i_file_acl);
                        error = -EIO;
@@ -1341,14 +1339,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
                goto cleanup;
        bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
        if (!bh) {
-                ext4_error(inode->i_sb, __FUNCTION__,
+                ext4_error(inode->i_sb, __func__,
                        "inode %lu: block %llu read error", inode->i_ino,
                        EXT4_I(inode)->i_file_acl);
                goto cleanup;
        }
        if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
            BHDR(bh)->h_blocks != cpu_to_le32(1)) {
-                ext4_error(inode->i_sb, __FUNCTION__,
+                ext4_error(inode->i_sb, __func__,
                        "inode %lu: bad block %llu", inode->i_ino,
                        EXT4_I(inode)->i_file_acl);
                goto cleanup;
@@ -1475,7 +1473,7 @@ again:
                }
                bh = sb_bread(inode->i_sb, ce->e_block);
                if (!bh) {
-                        ext4_error(inode->i_sb, __FUNCTION__,
+                        ext4_error(inode->i_sb, __func__,
                                "inode %lu: block %lu read error",
                                inode->i_ino, (unsigned long) ce->e_block);
                } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index d7f5d6a12651..5992fe979bb9 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -74,7 +74,6 @@ extern struct xattr_handler ext4_xattr_security_handler;
 extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
 extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
-extern int ext4_xattr_list(struct inode *, char *, size_t);
 extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
 extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
@@ -99,12 +98,6 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
 }
 static inline int
-ext4_xattr_list(struct inode *inode, void *buffer, size_t size)
-{
-        return -EOPNOTSUPP;
-}
-static inline int
 ext4_xattr_set(struct inode *inode, int name_index, const char *name,
               const void *value, size_t size, int flags)
 {
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index f17eaf2321b9..ca5f89fc6cae 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -6,9 +6,9 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/fs.h>
-#include <linux/ext4_jbd2.h>
-#include <linux/ext4_fs.h>
 #include <linux/security.h>
+#include "ext4_jbd2.h"
+#include "ext4.h"
 #include "xattr.h"
 static size_t
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index e0f05acdafec..fff33382cadc 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -9,8 +9,8 @@
 #include <linux/string.h>
 #include <linux/capability.h>
 #include <linux/fs.h>
-#include <linux/ext4_jbd2.h>
+#include "ext4_jbd2.h"
-#include <linux/ext4_fs.h>
+#include "ext4.h"
 #include "xattr.h"
 #define XATTR_TRUSTED_PREFIX "trusted."
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index 7ed3d8ebf096..67be723fcc4e 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -8,8 +8,8 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/fs.h>
-#include <linux/ext4_jbd2.h>
+#include "ext4_jbd2.h"
-#include <linux/ext4_fs.h>
+#include "ext4.h"
 #include "xattr.h"
 #define XATTR_USER_PREFIX "user."
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index a8173081f831..e0139786f717 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -520,22 +520,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        jbd_debug (3, "JBD: commit phase 2\n");
        /*
-         * First, drop modified flag: all accesses to the buffers
-         * will be tracked for a new trasaction only -bzzz
-         */
-        spin_lock(&journal->j_list_lock);
-        if (commit_transaction->t_buffers) {
-                new_jh = jh = commit_transaction->t_buffers->b_tnext;
-                do {
-                        J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
-                                        new_jh->b_modified == 0);
-                        new_jh->b_modified = 0;
-                        new_jh = new_jh->b_tnext;
-                } while (new_jh != jh);
-        }
-        spin_unlock(&journal->j_list_lock);
-        /*
         * Now start flushing things to disk, in the order they appear
         * on the transaction lists.  Data blocks go first.
         */
@@ -584,6 +568,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits;
        stats.u.run.rs_blocks_logged = 0;
+        J_ASSERT(commit_transaction->t_nr_buffers <=
+                 commit_transaction->t_outstanding_credits);
        descriptor = NULL;
        bufs = 0;
        while (commit_transaction->t_buffers) {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index eb7eb6c27bcb..53632e3e8457 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -534,7 +534,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
        if (!tid_geq(journal->j_commit_request, tid)) {
                printk(KERN_EMERG
                       "%s: error: j_commit_request=%d, tid=%d\n",
-                       __FUNCTION__, journal->j_commit_request, tid);
+                       __func__, journal->j_commit_request, tid);
        }
        spin_unlock(&journal->j_state_lock);
 #endif
@@ -599,7 +599,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
                        printk(KERN_ALERT "%s: journal block not found "
                                        "at offset %lu on %s\n",
-                                __FUNCTION__,
+                                __func__,
                                blocknr,
                                bdevname(journal->j_dev, b));
                        err = -EIO;
@@ -997,13 +997,14 @@ fail:
 */
 /**
- *  journal_t * jbd2_journal_init_dev() - creates an initialises a journal structure
+ *  journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure
 *  @bdev: Block device on which to create the journal
 *  @fs_dev: Device which hold journalled filesystem for this journal.
 *  @start: Block nr Start of journal.
 *  @len:  Length of the journal in blocks.
 *  @blocksize: blocksize of journalling device
- *  @returns: a newly created journal_t *
+ *
+ *  Returns: a newly created journal_t *
 *
 *  jbd2_journal_init_dev creates a journal which maps a fixed contiguous
 *  range of blocks on an arbitrary block device.
@@ -1027,7 +1028,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
        journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
        if (!journal->j_wbuf) {
                printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
-                        __FUNCTION__);
+                        __func__);
                kfree(journal);
                journal = NULL;
                goto out;
@@ -1083,7 +1084,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
        journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
        if (!journal->j_wbuf) {
                printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
-                        __FUNCTION__);
+                        __func__);
                kfree(journal);
                return NULL;
        }
@@ -1092,7 +1093,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
        /* If that failed, give up */
        if (err) {
                printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
-                       __FUNCTION__);
+                       __func__);
                kfree(journal);
                return NULL;
        }
@@ -1178,7 +1179,7 @@ int jbd2_journal_create(journal_t *journal)
                 */
                printk(KERN_EMERG
                       "%s: creation of journal on external device!\n",
-                       __FUNCTION__);
+                       __func__);
                BUG();
        }
@@ -1976,9 +1977,10 @@ static int journal_init_jbd2_journal_head_cache(void)
 static void jbd2_journal_destroy_jbd2_journal_head_cache(void)
 {
-        J_ASSERT(jbd2_journal_head_cache != NULL);
+        if (jbd2_journal_head_cache) {
-        kmem_cache_destroy(jbd2_journal_head_cache);
+                kmem_cache_destroy(jbd2_journal_head_cache);
-        jbd2_journal_head_cache = NULL;
+                jbd2_journal_head_cache = NULL;
+        }
 }
 /*
@@ -1997,7 +1999,7 @@ static struct journal_head *journal_alloc_journal_head(void)
                jbd_debug(1, "out of memory for journal_head\n");
                if (time_after(jiffies, last_warning + 5*HZ)) {
                        printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
-                               __FUNCTION__);
+                               __func__);
                        last_warning = jiffies;
                }
                while (!ret) {
@@ -2134,13 +2136,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
                        if (jh->b_frozen_data) {
                                printk(KERN_WARNING "%s: freeing "
                                                "b_frozen_data\n",
-                                                __FUNCTION__);
+                                                __func__);
                                jbd2_free(jh->b_frozen_data, bh->b_size);
                        }
                        if (jh->b_committed_data) {
                                printk(KERN_WARNING "%s: freeing "
                                                "b_committed_data\n",
-                                                __FUNCTION__);
+                                                __func__);
                                jbd2_free(jh->b_committed_data, bh->b_size);
                        }
                        bh->b_private = NULL;
@@ -2305,10 +2307,12 @@ static int __init journal_init(void)
        BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
        ret = journal_init_caches();
-        if (ret != 0)
+        if (ret == 0) {
+                jbd2_create_debugfs_entry();
+                jbd2_create_jbd_stats_proc_entry();
+        } else {
                jbd2_journal_destroy_caches();
-        jbd2_create_debugfs_entry();
+        }
-        jbd2_create_jbd_stats_proc_entry();
        return ret;
 }
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 2e1453a5e998..257ff2625765 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -139,7 +139,7 @@ repeat:
 oom:
        if (!journal_oom_retry)
                return -ENOMEM;
-        jbd_debug(1, "ENOMEM in %s, retrying\n", __FUNCTION__);
+        jbd_debug(1, "ENOMEM in %s, retrying\n", __func__);
        yield();
        goto repeat;
 }
@@ -167,138 +167,121 @@ static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal,
        return NULL;
 }
+void jbd2_journal_destroy_revoke_caches(void)
+{
+        if (jbd2_revoke_record_cache) {
+                kmem_cache_destroy(jbd2_revoke_record_cache);
+                jbd2_revoke_record_cache = NULL;
+        }
+        if (jbd2_revoke_table_cache) {
+                kmem_cache_destroy(jbd2_revoke_table_cache);
+                jbd2_revoke_table_cache = NULL;
+        }
+}
 int __init jbd2_journal_init_revoke_caches(void)
 {
+        J_ASSERT(!jbd2_revoke_record_cache);
+        J_ASSERT(!jbd2_revoke_table_cache);
        jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record",
                                           sizeof(struct jbd2_revoke_record_s),
                                           0,
                                           SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
                                           NULL);
        if (!jbd2_revoke_record_cache)
-                return -ENOMEM;
+                goto record_cache_failure;
        jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
                                           sizeof(struct jbd2_revoke_table_s),
                                           0, SLAB_TEMPORARY, NULL);
-        if (!jbd2_revoke_table_cache) {
+        if (!jbd2_revoke_table_cache)
-                kmem_cache_destroy(jbd2_revoke_record_cache);
+                goto table_cache_failure;
-                jbd2_revoke_record_cache = NULL;
-                return -ENOMEM;
-        }
        return 0;
+table_cache_failure:
+        jbd2_journal_destroy_revoke_caches();
+record_cache_failure:
+                return -ENOMEM;
 }
-void jbd2_journal_destroy_revoke_caches(void)
+static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size)
 {
-        kmem_cache_destroy(jbd2_revoke_record_cache);
+        int shift = 0;
-        jbd2_revoke_record_cache = NULL;
+        int tmp = hash_size;
-        kmem_cache_destroy(jbd2_revoke_table_cache);
+        struct jbd2_revoke_table_s *table;
-        jbd2_revoke_table_cache = NULL;
-}
-/* Initialise the revoke table for a given journal to a given size. */
-int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
-{
-        int shift, tmp;
-        J_ASSERT (journal->j_revoke_table[0] == NULL);
+        table = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
+        if (!table)
+                goto out;
-        shift = 0;
-        tmp = hash_size;
        while((tmp >>= 1UL) != 0UL)
                shift++;
-        journal->j_revoke_table[0] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
+        table->hash_size = hash_size;
-        if (!journal->j_revoke_table[0])
+        table->hash_shift = shift;
-                return -ENOMEM;
+        table->hash_table =
-        journal->j_revoke = journal->j_revoke_table[0];
-        /* Check that the hash_size is a power of two */
-        J_ASSERT(is_power_of_2(hash_size));
-        journal->j_revoke->hash_size = hash_size;
-        journal->j_revoke->hash_shift = shift;
-        journal->j_revoke->hash_table =
                kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
-        if (!journal->j_revoke->hash_table) {
+        if (!table->hash_table) {
-                kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
+                kmem_cache_free(jbd2_revoke_table_cache, table);
-                journal->j_revoke = NULL;
+                table = NULL;
-                return -ENOMEM;
+                goto out;
        }
        for (tmp = 0; tmp < hash_size; tmp++)
-                INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+                INIT_LIST_HEAD(&table->hash_table[tmp]);
-        journal->j_revoke_table[1] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
+out:
-        if (!journal->j_revoke_table[1]) {
+        return table;
-                kfree(journal->j_revoke_table[0]->hash_table);
+}
-                kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
-                return -ENOMEM;
+static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table)
+{
+        int i;
+        struct list_head *hash_list;
+        for (i = 0; i < table->hash_size; i++) {
+                hash_list = &table->hash_table[i];
+                J_ASSERT(list_empty(hash_list));
        }
-        journal->j_revoke = journal->j_revoke_table[1];
+        kfree(table->hash_table);
+        kmem_cache_free(jbd2_revoke_table_cache, table);
+}
-        /* Check that the hash_size is a power of two */
+/* Initialise the revoke table for a given journal to a given size. */
+int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
+{
+        J_ASSERT(journal->j_revoke_table[0] == NULL);
        J_ASSERT(is_power_of_2(hash_size));
-        journal->j_revoke->hash_size = hash_size;
+        journal->j_revoke_table[0] = jbd2_journal_init_revoke_table(hash_size);
+        if (!journal->j_revoke_table[0])
-        journal->j_revoke->hash_shift = shift;
+                goto fail0;
-        journal->j_revoke->hash_table =
+        journal->j_revoke_table[1] = jbd2_journal_init_revoke_table(hash_size);
-                kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
+        if (!journal->j_revoke_table[1])
-        if (!journal->j_revoke->hash_table) {
+                goto fail1;
-                kfree(journal->j_revoke_table[0]->hash_table);
-                kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
-                kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[1]);
-                journal->j_revoke = NULL;
-                return -ENOMEM;
-        }
-        for (tmp = 0; tmp < hash_size; tmp++)
+        journal->j_revoke = journal->j_revoke_table[1];
-                INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
        spin_lock_init(&journal->j_revoke_lock);
        return 0;
-}
-/* Destoy a journal's revoke table.  The table must already be empty! */
+fail1:
+        jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
+fail0:
+        return -ENOMEM;
+}
+/* Destroy a journal's revoke table.  The table must already be empty! */
 void jbd2_journal_destroy_revoke(journal_t *journal)
 {
-        struct jbd2_revoke_table_s *table;
-        struct list_head *hash_list;
-        int i;
-        table = journal->j_revoke_table[0];
-        if (!table)
-                return;
-        for (i=0; i<table->hash_size; i++) {
-                hash_list = &table->hash_table[i];
-                J_ASSERT (list_empty(hash_list));
-        }
-        kfree(table->hash_table);
-        kmem_cache_free(jbd2_revoke_table_cache, table);
-        journal->j_revoke = NULL;
-        table = journal->j_revoke_table[1];
-        if (!table)
-                return;
-        for (i=0; i<table->hash_size; i++) {
-                hash_list = &table->hash_table[i];
-                J_ASSERT (list_empty(hash_list));
-        }
-        kfree(table->hash_table);
-        kmem_cache_free(jbd2_revoke_table_cache, table);
        journal->j_revoke = NULL;
+        if (journal->j_revoke_table[0])
+                jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
+        if (journal->j_revoke_table[1])
+                jbd2_journal_destroy_revoke_table(journal->j_revoke_table[1]);
 }
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index b9b0b6f899b9..d6e006e67804 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -618,6 +618,12 @@ repeat:
                goto done;
        /*
+         * this is the first time this transaction is touching this buffer,
+         * reset the modified flag
+         */
+       jh->b_modified = 0;
+        /*
         * If there is already a copy-out version of this buffer, then we don't
         * need to make another one
         */
@@ -690,7 +696,7 @@ repeat:
                                if (!frozen_buffer) {
                                        printk(KERN_EMERG
                                               "%s: OOM for frozen_buffer\n",
-                                               __FUNCTION__);
+                                               __func__);
                                        JBUFFER_TRACE(jh, "oom!");
                                        error = -ENOMEM;
                                        jbd_lock_bh_state(bh);
@@ -829,9 +835,16 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
        if (jh->b_transaction == NULL) {
                jh->b_transaction = transaction;
+                /* first access by this transaction */
+                jh->b_modified = 0;
                JBUFFER_TRACE(jh, "file as BJ_Reserved");
                __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
        } else if (jh->b_transaction == journal->j_committing_transaction) {
+                /* first access by this transaction */
+                jh->b_modified = 0;
                JBUFFER_TRACE(jh, "set next transaction");
                jh->b_next_transaction = transaction;
        }
@@ -901,7 +914,7 @@ repeat:
                committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
                if (!committed_data) {
                        printk(KERN_EMERG "%s: No memory for committed data\n",
-                                __FUNCTION__);
+                                __func__);
                        err = -ENOMEM;
                        goto out;
                }
@@ -1230,6 +1243,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
        struct journal_head *jh;
        int drop_reserve = 0;
        int err = 0;
+        int was_modified = 0;
        BUFFER_TRACE(bh, "entry");
@@ -1248,6 +1262,9 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
                goto not_jbd;
        }
+        /* keep track of wether or not this transaction modified us */
+        was_modified = jh->b_modified;
        /*
         * The buffer's going from the transaction, we must drop
         * all references -bzzz
@@ -1265,7 +1282,12 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
                JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
-                drop_reserve = 1;
+                /*
+                 * we only want to drop a reference if this transaction
+                 * modified the buffer
+                 */
+                if (was_modified)
+                        drop_reserve = 1;
                /*
                 * We are no longer going to journal this buffer.
@@ -1305,7 +1327,13 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
                if (jh->b_next_transaction) {
                        J_ASSERT(jh->b_next_transaction == transaction);
                        jh->b_next_transaction = NULL;
-                        drop_reserve = 1;
+                        /*
+                         * only drop a reference if this transaction modified
+                         * the buffer
+                         */
+                        if (was_modified)
+                                drop_reserve = 1;
                }
        }
@@ -1434,7 +1462,8 @@ int jbd2_journal_stop(handle_t *handle)
        return err;
 }
-/**int jbd2_journal_force_commit() - force any uncommitted transactions
+/**
+ * int jbd2_journal_force_commit() - force any uncommitted transactions
 * @journal: journal to force
 *
 * For synchronous operations: force any uncommitted transactions
@@ -2077,7 +2106,7 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
        jh->b_transaction = jh->b_next_transaction;
        jh->b_next_transaction = NULL;
        __jbd2_journal_file_buffer(jh, jh->b_transaction,
-                                was_dirty ? BJ_Metadata : BJ_Reserved);
+                                jh->b_modified ? BJ_Metadata : BJ_Reserved);
        J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
        if (was_dirty)
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 524021ff5436..3f53dd101f99 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -64,3 +64,16 @@ config XFS_RT
          See the xfs man page in section 5 for additional information.
          If unsure, say N.
+config XFS_DEBUG
+        bool "XFS Debugging support (EXPERIMENTAL)"
+        depends on XFS_FS && EXPERIMENTAL
+        help
+          Say Y here to get an XFS build with many debugging features,
+          including ASSERT checks, function wrappers around macros,
+          and extra sanity-checking functions in various code paths.
+          Note that the resulting code will be HUGE and SLOW, and probably
+          not useful unless you are debugging a particular problem.
+          Say N unless you are an XFS developer, or you play one on TV.
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
index c110bb002665..ff6a19873e5c 100644
--- a/fs/xfs/linux-2.6/mrlock.h
+++ b/fs/xfs/linux-2.6/mrlock.h
@@ -20,29 +20,24 @@
 #include <linux/rwsem.h>
-enum { MR_NONE, MR_ACCESS, MR_UPDATE };
 typedef struct {
        struct rw_semaphore     mr_lock;
+#ifdef DEBUG
        int                     mr_writer;
+#endif
 } mrlock_t;
+#ifdef DEBUG
 #define mrinit(mrp, name)       \
        do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
+#else
+#define mrinit(mrp, name)       \
+        do { init_rwsem(&(mrp)->mr_lock); } while (0)
+#endif
 #define mrlock_init(mrp, t,n,s) mrinit(mrp, n)
 #define mrfree(mrp)             do { } while (0)
-static inline void mraccess(mrlock_t *mrp)
-{
-        down_read(&mrp->mr_lock);
-}
-static inline void mrupdate(mrlock_t *mrp)
-{
-        down_write(&mrp->mr_lock);
-        mrp->mr_writer = 1;
-}
 static inline void mraccess_nested(mrlock_t *mrp, int subclass)
 {
        down_read_nested(&mrp->mr_lock, subclass);
@@ -51,10 +46,11 @@ static inline void mraccess_nested(mrlock_t *mrp, int subclass)
 static inline void mrupdate_nested(mrlock_t *mrp, int subclass)
 {
        down_write_nested(&mrp->mr_lock, subclass);
+#ifdef DEBUG
        mrp->mr_writer = 1;
+#endif
 }
 static inline int mrtryaccess(mrlock_t *mrp)
 {
        return down_read_trylock(&mrp->mr_lock);
@@ -64,39 +60,31 @@ static inline int mrtryupdate(mrlock_t *mrp)
 {
        if (!down_write_trylock(&mrp->mr_lock))
                return 0;
+#ifdef DEBUG
        mrp->mr_writer = 1;
+#endif
        return 1;
 }
-static inline void mrunlock(mrlock_t *mrp)
+static inline void mrunlock_excl(mrlock_t *mrp)
 {
-        if (mrp->mr_writer) {
+#ifdef DEBUG
-                mrp->mr_writer = 0;
+        mrp->mr_writer = 0;
-                up_write(&mrp->mr_lock);
+#endif
-        } else {
+        up_write(&mrp->mr_lock);
-                up_read(&mrp->mr_lock);
-        }
 }
-static inline void mrdemote(mrlock_t *mrp)
+static inline void mrunlock_shared(mrlock_t *mrp)
 {
-        mrp->mr_writer = 0;
+        up_read(&mrp->mr_lock);
-        downgrade_write(&mrp->mr_lock);
 }
-#ifdef DEBUG
+static inline void mrdemote(mrlock_t *mrp)
-/*
- * Debug-only routine, without some platform-specific asm code, we can
- * now only answer requests regarding whether we hold the lock for write
- * (reader state is outside our visibility, we only track writer state).
- * Note: means !ismrlocked would give false positives, so don't do that.
- */
-static inline int ismrlocked(mrlock_t *mrp, int type)
 {
-        if (mrp && type == MR_UPDATE)
+#ifdef DEBUG
-                return mrp->mr_writer;
+        mrp->mr_writer = 0;
-        return 1;
-}
 #endif
+        downgrade_write(&mrp->mr_lock);
+}
 #endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 52f6846101d5..5105015a75ad 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -886,7 +886,7 @@ int
 xfs_buf_lock_value(
        xfs_buf_t               *bp)
 {
-        return atomic_read(&bp->b_sema.count);
+        return bp->b_sema.count;
 }
 #endif
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 265f0168ab76..c672b3238b14 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -133,7 +133,7 @@ xfs_nfs_get_inode(
        if (!ip)
                return ERR_PTR(-EIO);
-        if (!ip->i_d.di_mode || ip->i_d.di_gen != generation) {
+        if (ip->i_d.di_gen != generation) {
                xfs_iput_new(ip, XFS_ILOCK_SHARED);
                return ERR_PTR(-ENOENT);
        }
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 05905246434d..65e78c13d4ae 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -43,9 +43,6 @@
 #include <linux/smp_lock.h>
 static struct vm_operations_struct xfs_file_vm_ops;
-#ifdef CONFIG_XFS_DMAPI
-static struct vm_operations_struct xfs_dmapi_file_vm_ops;
-#endif
 STATIC_INLINE ssize_t
 __xfs_file_read(
@@ -202,22 +199,6 @@ xfs_file_fsync(
                        (xfs_off_t)0, (xfs_off_t)-1);
 }
-#ifdef CONFIG_XFS_DMAPI
-STATIC int
-xfs_vm_fault(
-        struct vm_area_struct   *vma,
-        struct vm_fault *vmf)
-{
-        struct inode    *inode = vma->vm_file->f_path.dentry->d_inode;
-        bhv_vnode_t     *vp = vn_from_inode(inode);
-        ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
-        if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0))
-                return VM_FAULT_SIGBUS;
-        return filemap_fault(vma, vmf);
-}
-#endif /* CONFIG_XFS_DMAPI */
 /*
 * Unfortunately we can't just use the clean and simple readdir implementation
 * below, because nfs might call back into ->lookup from the filldir callback
@@ -386,11 +367,6 @@ xfs_file_mmap(
        vma->vm_ops = &xfs_file_vm_ops;
        vma->vm_flags |= VM_CAN_NONLINEAR;
-#ifdef CONFIG_XFS_DMAPI
-        if (XFS_M(filp->f_path.dentry->d_inode->i_sb)->m_flags & XFS_MOUNT_DMAPI)
-                vma->vm_ops = &xfs_dmapi_file_vm_ops;
-#endif /* CONFIG_XFS_DMAPI */
        file_accessed(filp);
        return 0;
 }
@@ -437,47 +413,6 @@ xfs_file_ioctl_invis(
        return error;
 }
-#ifdef CONFIG_XFS_DMAPI
-#ifdef HAVE_VMOP_MPROTECT
-STATIC int
-xfs_vm_mprotect(
-        struct vm_area_struct *vma,
-        unsigned int    newflags)
-{
-        struct inode    *inode = vma->vm_file->f_path.dentry->d_inode;
-        struct xfs_mount *mp = XFS_M(inode->i_sb);
-        int             error = 0;
-        if (mp->m_flags & XFS_MOUNT_DMAPI) {
-                if ((vma->vm_flags & VM_MAYSHARE) &&
-                    (newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE))
-                        error = XFS_SEND_MMAP(mp, vma, VM_WRITE);
-        }
-        return error;
-}
-#endif /* HAVE_VMOP_MPROTECT */
-#endif /* CONFIG_XFS_DMAPI */
-#ifdef HAVE_FOP_OPEN_EXEC
-/* If the user is attempting to execute a file that is offline then
- * we have to trigger a DMAPI READ event before the file is marked as busy
- * otherwise the invisible I/O will not be able to write to the file to bring
- * it back online.
- */
-STATIC int
-xfs_file_open_exec(
-        struct inode    *inode)
-{
-        struct xfs_mount *mp = XFS_M(inode->i_sb);
-        struct xfs_inode *ip = XFS_I(inode);
-        if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI) &&
-                     DM_EVENT_ENABLED(ip, DM_EVENT_READ))
-                return -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
-        return 0;
-}
-#endif /* HAVE_FOP_OPEN_EXEC */
 /*
 * mmap()d file has taken write protection fault and is being made
 * writable. We can set the page state up correctly for a writable
@@ -546,13 +481,3 @@ static struct vm_operations_struct xfs_file_vm_ops = {
        .fault          = filemap_fault,
        .page_mkwrite   = xfs_vm_page_mkwrite,
 };
-#ifdef CONFIG_XFS_DMAPI
-static struct vm_operations_struct xfs_dmapi_file_vm_ops = {
-        .fault          = xfs_vm_fault,
-        .page_mkwrite   = xfs_vm_page_mkwrite,
-#ifdef HAVE_VMOP_MPROTECT
-        .mprotect       = xfs_vm_mprotect,
-#endif
-};
-#endif /* CONFIG_XFS_DMAPI */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 4ddb86b73c6b..a42ba9d71156 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -238,7 +238,7 @@ xfs_vget_fsop_handlereq(
                return error;
        if (ip == NULL)
                return XFS_ERROR(EIO);
-        if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) {
+        if (ip->i_d.di_gen != igen) {
                xfs_iput_new(ip, XFS_ILOCK_SHARED);
                return XFS_ERROR(ENOENT);
        }
@@ -505,14 +505,14 @@ xfs_attrmulti_attr_get(
 {
        char                    *kbuf;
        int                     error = EFAULT;
-        
        if (*len > XATTR_SIZE_MAX)
                return EINVAL;
        kbuf = kmalloc(*len, GFP_KERNEL);
        if (!kbuf)
                return ENOMEM;
-        error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags, NULL);
+        error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
        if (error)
                goto out_kfree;
@@ -546,7 +546,7 @@ xfs_attrmulti_attr_set(
        if (copy_from_user(kbuf, ubuf, len))
                goto out_kfree;
-                        
        error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
 out_kfree:
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index a1237dad6430..2bf287ef5489 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -511,7 +511,8 @@ xfs_vn_rename(
        xfs_dentry_to_name(&nname, ndentry);
        error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
-                                                        XFS_I(ndir), &nname);
+                           XFS_I(ndir), &nname, new_inode ?
+                                                XFS_I(new_inode) : NULL);
        if (likely(!error)) {
                if (new_inode)
                        xfs_validate_fields(new_inode);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index e5143323e71f..1bc9f600365f 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -99,7 +99,6 @@
 /*
 * Feature macros (disable/enable)
 */
-#define HAVE_SPLICE     /* a splice(2) exists in 2.6, but not in 2.4 */
 #ifdef CONFIG_SMP
 #define HAVE_PERCPU_SB  /* per cpu superblock counters are a 2.6 feature */
 #else
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 1ebd8004469c..5e3b57516ec7 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -394,7 +394,7 @@ xfs_zero_last_block(
        int             error = 0;
        xfs_bmbt_irec_t imap;
-        ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        zero_offset = XFS_B_FSB_OFFSET(mp, isize);
        if (zero_offset == 0) {
@@ -425,14 +425,14 @@ xfs_zero_last_block(
         * out sync.  We need to drop the ilock while we do this so we
         * don't deadlock when the buffer cache calls back to us.
         */
-        xfs_iunlock(ip, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        zero_len = mp->m_sb.sb_blocksize - zero_offset;
        if (isize + zero_len > offset)
                zero_len = offset - isize;
        error = xfs_iozero(ip, isize, zero_len);
-        xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+        xfs_ilock(ip, XFS_ILOCK_EXCL);
        ASSERT(error >= 0);
        return error;
 }
@@ -465,8 +465,7 @@ xfs_zero_eof(
        int             error = 0;
        xfs_bmbt_irec_t imap;
-        ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-        ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
        ASSERT(offset > isize);
        /*
@@ -475,8 +474,7 @@ xfs_zero_eof(
         */
        error = xfs_zero_last_block(ip, offset, isize);
        if (error) {
-                ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+                ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-                ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
                return error;
        }
@@ -507,8 +505,7 @@ xfs_zero_eof(
                error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
                                  0, NULL, 0, &imap, &nimaps, NULL, NULL);
                if (error) {
-                        ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+                        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-                        ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
                        return error;
                }
                ASSERT(nimaps > 0);
@@ -532,7 +529,7 @@ xfs_zero_eof(
                 * Drop the inode lock while we're doing the I/O.
                 * We'll still have the iolock to protect us.
                 */
-                xfs_iunlock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+                xfs_iunlock(ip, XFS_ILOCK_EXCL);
                zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
                zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
@@ -548,13 +545,13 @@ xfs_zero_eof(
                start_zero_fsb = imap.br_startoff + imap.br_blockcount;
                ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-                xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+                xfs_ilock(ip, XFS_ILOCK_EXCL);
        }
        return 0;
 out_lock:
-        xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+        xfs_ilock(ip, XFS_ILOCK_EXCL);
        ASSERT(error >= 0);
        return error;
 }
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index e1d498b4ba7a..e6be37dbd0e9 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -50,7 +50,6 @@ struct xfs_iomap;
 #define XFS_INVAL_CACHED        18
 #define XFS_DIORD_ENTER         19
 #define XFS_DIOWR_ENTER         20
-#define XFS_SENDFILE_ENTER      21
 #define XFS_WRITEPAGE_ENTER     22
 #define XFS_RELEASEPAGE_ENTER   23
 #define XFS_INVALIDPAGE_ENTER   24
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 865eb708aa95..742b2c7852c1 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1181,7 +1181,7 @@ xfs_fs_statfs(
        statp->f_fsid.val[0] = (u32)id;
        statp->f_fsid.val[1] = (u32)(id >> 32);
-        xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT);
+        xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
        spin_lock(&mp->m_sb_lock);
        statp->f_bsize = sbp->sb_blocksize;
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 8b4d63ce8694..9d73cb5c0fc7 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -25,12 +25,6 @@ struct attrlist_cursor_kern;
 typedef struct inode    bhv_vnode_t;
-#define VN_ISLNK(vp)    S_ISLNK((vp)->i_mode)
-#define VN_ISREG(vp)    S_ISREG((vp)->i_mode)
-#define VN_ISDIR(vp)    S_ISDIR((vp)->i_mode)
-#define VN_ISCHR(vp)    S_ISCHR((vp)->i_mode)
-#define VN_ISBLK(vp)    S_ISBLK((vp)->i_mode)
 /*
 * Vnode to Linux inode mapping.
 */
@@ -151,24 +145,6 @@ typedef struct bhv_vattr {
                XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
                XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
-/*
- *  Modes.
- */
-#define VSUID   S_ISUID         /* set user id on execution */
-#define VSGID   S_ISGID         /* set group id on execution */
-#define VSVTX   S_ISVTX         /* save swapped text even after use */
-#define VREAD   S_IRUSR         /* read, write, execute permissions */
-#define VWRITE  S_IWUSR
-#define VEXEC   S_IXUSR
-#define MODEMASK S_IALLUGO      /* mode bits plus permission bits */
-/*
- * Check whether mandatory file locking is enabled.
- */
-#define MANDLOCK(vp, mode)      \
-        (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
 extern void     vn_init(void);
 extern int      vn_revalidate(bhv_vnode_t *);
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 631ebb31b295..85df3288efd5 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -933,7 +933,7 @@ xfs_qm_dqget(
               type == XFS_DQ_PROJ ||
               type == XFS_DQ_GROUP);
        if (ip) {
-                ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+                ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
                if (type == XFS_DQ_USER)
                        ASSERT(ip->i_udquot == NULL);
                else
@@ -1088,7 +1088,7 @@ xfs_qm_dqget(
        xfs_qm_mplist_unlock(mp);
        XFS_DQ_HASH_UNLOCK(h);
 dqret:
-        ASSERT((ip == NULL) || XFS_ISLOCKED_INODE_EXCL(ip));
+        ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
        xfs_dqtrace_entry(dqp, "DQGET DONE");
        *O_dqpp = dqp;
        return (0);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 40ea56409561..d31cce1165c5 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -670,7 +670,7 @@ xfs_qm_dqattach_one(
        xfs_dquot_t     *dqp;
        int             error;
-        ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        error = 0;
        /*
         * See if we already have it in the inode itself. IO_idqpp is
@@ -874,7 +874,7 @@ xfs_qm_dqattach(
                return 0;
        ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
-               XFS_ISLOCKED_INODE_EXCL(ip));
+               xfs_isilocked(ip, XFS_ILOCK_EXCL));
        if (! (flags & XFS_QMOPT_ILOCKED))
                xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -888,7 +888,8 @@ xfs_qm_dqattach(
                        goto done;
                nquotas++;
        }
-        ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        if (XFS_IS_OQUOTA_ON(mp)) {
                error = XFS_IS_GQUOTA_ON(mp) ?
                        xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
@@ -913,7 +914,7 @@ xfs_qm_dqattach(
         * This WON'T, in general, result in a thrash.
         */
        if (nquotas == 2) {
-                ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+                ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
                ASSERT(ip->i_udquot);
                ASSERT(ip->i_gdquot);
@@ -956,7 +957,7 @@ xfs_qm_dqattach(
 #ifdef QUOTADEBUG
        else
-                ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+                ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 #endif
        return error;
 }
@@ -1291,7 +1292,7 @@ xfs_qm_dqget_noattach(
        xfs_mount_t     *mp;
        xfs_dquot_t     *udqp, *gdqp;
-        ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        mp = ip->i_mount;
        udqp = NULL;
        gdqp = NULL;
@@ -1392,7 +1393,7 @@ xfs_qm_qino_alloc(
         * Keep an extra reference to this quota inode. This inode is
         * locked exclusively and joined to the transaction already.
         */
-        ASSERT(XFS_ISLOCKED_INODE_EXCL(*ip));
+        ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
        VN_HOLD(XFS_ITOV((*ip)));
        /*
@@ -1737,12 +1738,6 @@ xfs_qm_dqusage_adjust(
                return error;
        }
-        if (ip->i_d.di_mode == 0) {
-                xfs_iput_new(ip, XFS_ILOCK_EXCL);
-                *res = BULKSTAT_RV_NOTHING;
-                return XFS_ERROR(ENOENT);
-        }
        /*
         * Obtain the locked dquots. In case of an error (eg. allocation
         * fails for ENOSPC), we return the negative of the error number
@@ -2563,7 +2558,7 @@ xfs_qm_vop_chown(
        uint            bfield = XFS_IS_REALTIME_INODE(ip) ?
                                 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
-        ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
        /* old dquot */
@@ -2607,7 +2602,7 @@ xfs_qm_vop_chown_reserve(
        uint            delblks, blkflags, prjflags = 0;
        xfs_dquot_t     *unresudq, *unresgdq, *delblksudq, *delblksgdq;
-        ASSERT(XFS_ISLOCKED_INODE(ip));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
        mp = ip->i_mount;
        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -2717,7 +2712,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
        if (!XFS_IS_QUOTA_ON(tp->t_mountp))
                return;
-        ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
        if (udqp) {
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 8342823dbdc3..768a3b27d2b6 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -1366,12 +1366,6 @@ xfs_qm_internalqcheck_adjust(
                return (error);
        }
-        if (ip->i_d.di_mode == 0) {
-                xfs_iput_new(ip, lock_flags);
-                *res = BULKSTAT_RV_NOTHING;
-                return XFS_ERROR(ENOENT);
-        }
        /*
         * This inode can have blocks after eof which can get released
         * when we send it to inactive. Since we don't check the dquot
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index a8b85e2be9d5..5e4a40b1c565 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -27,11 +27,6 @@
 /* Number of dquots that fit in to a dquot block */
 #define XFS_QM_DQPERBLK(mp)     ((mp)->m_quotainfo->qi_dqperchunk)
-#define XFS_ISLOCKED_INODE(ip)          (ismrlocked(&(ip)->i_lock, \
-                                            MR_UPDATE | MR_ACCESS) != 0)
-#define XFS_ISLOCKED_INODE_EXCL(ip)     (ismrlocked(&(ip)->i_lock, \
-                                            MR_UPDATE) != 0)
 #define XFS_DQ_IS_ADDEDTO_TRX(t, d)     ((d)->q_transp == (t))
 #define XFS_QI_MPLRECLAIMS(mp)  ((mp)->m_quotainfo->qi_dqreclaims)
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index f441f836ca8b..99611381e740 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -834,7 +834,7 @@ xfs_trans_reserve_quota_nblks(
        ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
        ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
-        ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
        ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
                                XFS_TRANS_DQ_RES_RTBLKS ||
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 765aaf65e2d3..540e4c989825 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -22,7 +22,7 @@
 #define STATIC
 #define DEBUG 1
 #define XFS_BUF_LOCK_TRACKING 1
-#define QUOTADEBUG 1
+/* #define QUOTADEBUG 1 */
 #endif
 #ifdef CONFIG_XFS_TRACE
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 8e130b9720ae..ebee3a4f703a 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -72,7 +72,7 @@ xfs_acl_vhasacl_default(
 {
        int             error;
-        if (!VN_ISDIR(vp))
+        if (!S_ISDIR(vp->i_mode))
                return 0;
        xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error);
        return (error == 0);
@@ -238,15 +238,8 @@ xfs_acl_vget(
                        error = EINVAL;
                        goto out;
                }
-                if (kind == _ACL_TYPE_ACCESS) {
+                if (kind == _ACL_TYPE_ACCESS)
-                        bhv_vattr_t     va;
+                        xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, xfs_acl);
-                        va.va_mask = XFS_AT_MODE;
-                        error = xfs_getattr(xfs_vtoi(vp), &va, 0);
-                        if (error)
-                                goto out;
-                        xfs_acl_sync_mode(va.va_mode, xfs_acl);
-                }
                error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size);
        }
 out:
@@ -341,14 +334,15 @@ xfs_acl_iaccess(
 {
        xfs_acl_t       *acl;
        int             rval;
+        struct xfs_name acl_name = {SGI_ACL_FILE, SGI_ACL_FILE_SIZE};
        if (!(_ACL_ALLOC(acl)))
                return -1;
        /* If the file has no ACL return -1. */
        rval = sizeof(xfs_acl_t);
-        if (xfs_attr_fetch(ip, SGI_ACL_FILE, SGI_ACL_FILE_SIZE,
+        if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval,
-                        (char *)acl, &rval, ATTR_ROOT | ATTR_KERNACCESS, cr)) {
+                                        ATTR_ROOT | ATTR_KERNACCESS)) {
                _ACL_FREE(acl);
                return -1;
        }
@@ -373,23 +367,15 @@ xfs_acl_allow_set(
        bhv_vnode_t     *vp,
        int             kind)
 {
-        xfs_inode_t     *ip = xfs_vtoi(vp);
-        bhv_vattr_t     va;
-        int             error;
        if (vp->i_flags & (S_IMMUTABLE|S_APPEND))
                return EPERM;
-        if (kind == _ACL_TYPE_DEFAULT && !VN_ISDIR(vp))
+        if (kind == _ACL_TYPE_DEFAULT && !S_ISDIR(vp->i_mode))
                return ENOTDIR;
        if (vp->i_sb->s_flags & MS_RDONLY)
                return EROFS;
-        va.va_mask = XFS_AT_UID;
+        if (xfs_vtoi(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER))
-        error = xfs_getattr(ip, &va, 0);
-        if (error)
-                return error;
-        if (va.va_uid != current->fsuid && !capable(CAP_FOWNER))
                return EPERM;
-        return error;
+        return 0;
 }
 /*
@@ -594,7 +580,7 @@ xfs_acl_get_attr(
        *error = xfs_attr_get(xfs_vtoi(vp),
                                        kind == _ACL_TYPE_ACCESS ?
                                        SGI_ACL_FILE : SGI_ACL_DEFAULT,
-                                        (char *)aclp, &len, flags, sys_cred);
+                                        (char *)aclp, &len, flags);
        if (*error || (flags & ATTR_KERNOVAL))
                return;
        xfs_acl_get_endian(aclp);
@@ -643,7 +629,6 @@ xfs_acl_vtoacl(
        xfs_acl_t       *access_acl,
        xfs_acl_t       *default_acl)
 {
-        bhv_vattr_t     va;
        int             error = 0;
        if (access_acl) {
@@ -652,16 +637,10 @@ xfs_acl_vtoacl(
                 * be obtained for some reason, invalidate the access ACL.
                 */
                xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error);
-                if (!error) {
-                        /* Got the ACL, need the mode... */
-                        va.va_mask = XFS_AT_MODE;
-                        error = xfs_getattr(xfs_vtoi(vp), &va, 0);
-                }
                if (error)
                        access_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
                else /* We have a good ACL and the file mode, synchronize. */
-                        xfs_acl_sync_mode(va.va_mode, access_acl);
+                        xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, access_acl);
        }
        if (default_acl) {
@@ -719,7 +698,7 @@ xfs_acl_inherit(
         * If the new file is a directory, its default ACL is a copy of
         * the containing directory's default ACL.
         */
-        if (VN_ISDIR(vp))
+        if (S_ISDIR(vp->i_mode))
                xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
        if (!error && !basicperms)
                xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
@@ -744,7 +723,7 @@ xfs_acl_setmode(
        bhv_vattr_t     va;
        xfs_acl_entry_t *ap;
        xfs_acl_entry_t *gap = NULL;
-        int             i, error, nomask = 1;
+        int             i, nomask = 1;
        *basicperms = 1;
@@ -756,11 +735,7 @@ xfs_acl_setmode(
         * mode.  The m:: bits take precedence over the g:: bits.
         */
        va.va_mask = XFS_AT_MODE;
-        error = xfs_getattr(xfs_vtoi(vp), &va, 0);
+        va.va_mode = xfs_vtoi(vp)->i_d.di_mode;
-        if (error)
-                return error;
-        va.va_mask = XFS_AT_MODE;
        va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
        ap = acl->acl_entry;
        for (i = 0; i < acl->acl_cnt; ++i) {
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 36d781ee5fcc..df151a859186 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -101,14 +101,28 @@ STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
 ktrace_t *xfs_attr_trace_buf;
 #endif
+STATIC int
+xfs_attr_name_to_xname(
+        struct xfs_name *xname,
+        const char      *aname)
+{
+        if (!aname)
+                return EINVAL;
+        xname->name = aname;
+        xname->len = strlen(aname);
+        if (xname->len >= MAXNAMELEN)
+                return EFAULT;          /* match IRIX behaviour */
+        return 0;
+}
 /*========================================================================
 * Overall external interface routines.
 *========================================================================*/
 int
-xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen,
+xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
-               char *value, int *valuelenp, int flags, struct cred *cred)
+                char *value, int *valuelenp, int flags)
 {
        xfs_da_args_t   args;
        int             error;
@@ -122,8 +136,8 @@ xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen,
         * Fill in the arg structure for this request.
         */
        memset((char *)&args, 0, sizeof(args));
-        args.name = name;
+        args.name = name->name;
-        args.namelen = namelen;
+        args.namelen = name->len;
        args.value = value;
        args.valuelen = *valuelenp;
        args.flags = flags;
@@ -162,31 +176,29 @@ xfs_attr_get(
        const char      *name,
        char            *value,
        int             *valuelenp,
-        int             flags,
+        int             flags)
-        cred_t          *cred)
 {
-        int             error, namelen;
+        int             error;
+        struct xfs_name xname;
        XFS_STATS_INC(xs_attr_get);
-        if (!name)
-                return(EINVAL);
-        namelen = strlen(name);
-        if (namelen >= MAXNAMELEN)
-                return(EFAULT);         /* match IRIX behaviour */
        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
                return(EIO);
+        error = xfs_attr_name_to_xname(&xname, name);
+        if (error)
+                return error;
        xfs_ilock(ip, XFS_ILOCK_SHARED);
-        error = xfs_attr_fetch(ip, name, namelen, value, valuelenp, flags, cred);
+        error = xfs_attr_fetch(ip, &xname, value, valuelenp, flags);
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
        return(error);
 }
-int
+STATIC int
-xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
+xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
-                 char *value, int valuelen, int flags)
+                char *value, int valuelen, int flags)
 {
        xfs_da_args_t   args;
        xfs_fsblock_t   firstblock;
@@ -209,7 +221,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
         */
        if (XFS_IFORK_Q(dp) == 0) {
                int sf_size = sizeof(xfs_attr_sf_hdr_t) +
-                              XFS_ATTR_SF_ENTSIZE_BYNAME(namelen, valuelen);
+                              XFS_ATTR_SF_ENTSIZE_BYNAME(name->len, valuelen);
                if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd)))
                        return(error);
@@ -219,8 +231,8 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
         * Fill in the arg structure for this request.
         */
        memset((char *)&args, 0, sizeof(args));
-        args.name = name;
+        args.name = name->name;
-        args.namelen = namelen;
+        args.namelen = name->len;
        args.value = value;
        args.valuelen = valuelen;
        args.flags = flags;
@@ -236,7 +248,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
         * Determine space new attribute will use, and if it would be
         * "local" or "remote" (note: local != inline).
         */
-        size = xfs_attr_leaf_newentsize(namelen, valuelen,
+        size = xfs_attr_leaf_newentsize(name->len, valuelen,
                                        mp->m_sb.sb_blocksize, &local);
        nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
@@ -429,26 +441,27 @@ xfs_attr_set(
        int             valuelen,
        int             flags)
 {
-        int             namelen;
+        int             error;
+        struct xfs_name xname;
-        namelen = strlen(name);
-        if (namelen >= MAXNAMELEN)
-                return EFAULT;          /* match IRIX behaviour */
        XFS_STATS_INC(xs_attr_set);
        if (XFS_FORCED_SHUTDOWN(dp->i_mount))
                return (EIO);
-        return xfs_attr_set_int(dp, name, namelen, value, valuelen, flags);
+        error = xfs_attr_name_to_xname(&xname, name);
+        if (error)
+                return error;
+        return xfs_attr_set_int(dp, &xname, value, valuelen, flags);
 }
 /*
 * Generic handler routine to remove a name from an attribute list.
 * Transitions attribute list from Btree to shortform as necessary.
 */
-int
+STATIC int
-xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags)
+xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
 {
        xfs_da_args_t   args;
        xfs_fsblock_t   firstblock;
@@ -460,8 +473,8 @@ xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags)
         * Fill in the arg structure for this request.
         */
        memset((char *)&args, 0, sizeof(args));
-        args.name = name;
+        args.name = name->name;
-        args.namelen = namelen;
+        args.namelen = name->len;
        args.flags = flags;
        args.hashval = xfs_da_hashname(args.name, args.namelen);
        args.dp = dp;
@@ -575,17 +588,18 @@ xfs_attr_remove(
        const char      *name,
        int             flags)
 {
-        int             namelen;
+        int             error;
+        struct xfs_name xname;
-        namelen = strlen(name);
-        if (namelen >= MAXNAMELEN)
-                return EFAULT;          /* match IRIX behaviour */
        XFS_STATS_INC(xs_attr_remove);
        if (XFS_FORCED_SHUTDOWN(dp->i_mount))
                return (EIO);
+        error = xfs_attr_name_to_xname(&xname, name);
+        if (error)
+                return error;
        xfs_ilock(dp, XFS_ILOCK_SHARED);
        if (XFS_IFORK_Q(dp) == 0 ||
                   (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
@@ -595,10 +609,10 @@ xfs_attr_remove(
        }
        xfs_iunlock(dp, XFS_ILOCK_SHARED);
-        return xfs_attr_remove_int(dp, name, namelen, flags);
+        return xfs_attr_remove_int(dp, &xname, flags);
 }
-int                                                             /* error */
+STATIC int
 xfs_attr_list_int(xfs_attr_list_context_t *context)
 {
        int error;
@@ -2522,8 +2536,7 @@ attr_generic_get(
 {
        int     error, asize = size;
-        error = xfs_attr_get(xfs_vtoi(vp), name, data,
+        error = xfs_attr_get(xfs_vtoi(vp), name, data, &asize, xflags);
-                                    &asize, xflags, NULL);
        if (!error)
                return asize;
        return -error;
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 786eba3121c4..6cfc9384fe35 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -158,14 +158,10 @@ struct xfs_da_args;
 /*
 * Overall external interface routines.
 */
-int xfs_attr_set_int(struct xfs_inode *, const char *, int, char *, int, int);
-int xfs_attr_remove_int(struct xfs_inode *, const char *, int, int);
-int xfs_attr_list_int(struct xfs_attr_list_context *);
 int xfs_attr_inactive(struct xfs_inode *dp);
 int xfs_attr_shortform_getvalue(struct xfs_da_args *);
-int xfs_attr_fetch(struct xfs_inode *, const char *, int,
+int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);
-                        char *, int *, int, struct cred *);
 int xfs_attr_rmtval_get(struct xfs_da_args *args);
 #endif  /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index eb198c01c35d..53c259f5a5af 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4074,7 +4074,6 @@ xfs_bmap_add_attrfork(
 error2:
        xfs_bmap_cancel(&flist);
 error1:
-        ASSERT(ismrlocked(&ip->i_lock,MR_UPDATE));
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
 error0:
        xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 3f53fad356a3..5f3647cb9885 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -162,7 +162,7 @@ xfs_swap_extents(
                ips[1] = ip;
        }
-        xfs_lock_inodes(ips, 2, 0, lock_flags);
+        xfs_lock_inodes(ips, 2, lock_flags);
        locked = 1;
        /* Verify that both files have the same format */
@@ -265,7 +265,7 @@ xfs_swap_extents(
                locked = 0;
                goto error0;
        }
-        xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL);
+        xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
        /*
         * Count the number of extended attribute blocks
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index d3a0f538d6a6..381ebda4f7bc 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -462,7 +462,7 @@ xfs_fs_counts(
        xfs_mount_t             *mp,
        xfs_fsop_counts_t       *cnt)
 {
-        xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT);
+        xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
        spin_lock(&mp->m_sb_lock);
        cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
        cnt->freertx = mp->m_sb.sb_frextents;
@@ -524,7 +524,7 @@ xfs_reserve_blocks(
         */
 retry:
        spin_lock(&mp->m_sb_lock);
-        xfs_icsb_sync_counters_flags(mp, XFS_ICSB_SB_LOCKED);
+        xfs_icsb_sync_counters_locked(mp, 0);
        /*
         * If our previous reservation was larger than the current value,
@@ -552,11 +552,8 @@ retry:
                        mp->m_resblks += free;
                        mp->m_resblks_avail += free;
                        fdblks_delta = -free;
-                        mp->m_sb.sb_fdblocks = XFS_ALLOC_SET_ASIDE(mp);
                } else {
                        fdblks_delta = -delta;
-                        mp->m_sb.sb_fdblocks =
-                                lcounter + XFS_ALLOC_SET_ASIDE(mp);
                        mp->m_resblks = request;
                        mp->m_resblks_avail += delta;
                }
@@ -587,7 +584,6 @@ out:
                if (error == ENOSPC)
                        goto retry;
        }
        return 0;
 }
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index a64dfbd565a5..aad8c5da38af 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -147,6 +147,7 @@ xfs_ialloc_ag_alloc(
        int             version;        /* inode version number to use */
        int             isaligned = 0;  /* inode allocation at stripe unit */
                                        /* boundary */
+        unsigned int    gen;
        args.tp = tp;
        args.mp = tp->t_mountp;
@@ -290,6 +291,14 @@ xfs_ialloc_ag_alloc(
        else
                version = XFS_DINODE_VERSION_1;
+        /*
+         * Seed the new inode cluster with a random generation number. This
+         * prevents short-term reuse of generation numbers if a chunk is
+         * freed and then immediately reallocated. We use random numbers
+         * rather than a linear progression to prevent the next generation
+         * number from being easily guessable.
+         */
+        gen = random32();
        for (j = 0; j < nbufs; j++) {
                /*
                 * Get the block.
@@ -309,6 +318,7 @@ xfs_ialloc_ag_alloc(
                        free = XFS_MAKE_IPTR(args.mp, fbuf, i);
                        free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
                        free->di_core.di_version = version;
+                        free->di_core.di_gen = cpu_to_be32(gen);
                        free->di_next_unlinked = cpu_to_be32(NULLAGINO);
                        xfs_ialloc_log_di(tp, fbuf, i,
                                XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index e657c5128460..b07604b94d9f 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -593,8 +593,9 @@ xfs_iunlock_map_shared(
 *              XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
 */
 void
-xfs_ilock(xfs_inode_t   *ip,
+xfs_ilock(
-          uint          lock_flags)
+        xfs_inode_t             *ip,
+        uint                    lock_flags)
 {
        /*
         * You can't set both SHARED and EXCL for the same lock,
@@ -607,16 +608,16 @@ xfs_ilock(xfs_inode_t	*ip,
               (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
        ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
-        if (lock_flags & XFS_IOLOCK_EXCL) {
+        if (lock_flags & XFS_IOLOCK_EXCL)
                mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
-        } else if (lock_flags & XFS_IOLOCK_SHARED) {
+        else if (lock_flags & XFS_IOLOCK_SHARED)
                mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
-        }
-        if (lock_flags & XFS_ILOCK_EXCL) {
+        if (lock_flags & XFS_ILOCK_EXCL)
                mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
-        } else if (lock_flags & XFS_ILOCK_SHARED) {
+        else if (lock_flags & XFS_ILOCK_SHARED)
                mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
-        }
        xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address);
 }
@@ -631,15 +632,12 @@ xfs_ilock(xfs_inode_t	*ip,
 * lock_flags -- this parameter indicates the inode's locks to be
 *       to be locked.  See the comment for xfs_ilock() for a list
 *       of valid values.
- *
 */
 int
-xfs_ilock_nowait(xfs_inode_t    *ip,
+xfs_ilock_nowait(
-                 uint           lock_flags)
+        xfs_inode_t             *ip,
+        uint                    lock_flags)
 {
-        int     iolocked;
-        int     ilocked;
        /*
         * You can't set both SHARED and EXCL for the same lock,
         * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
@@ -651,37 +649,30 @@ xfs_ilock_nowait(xfs_inode_t	*ip,
               (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
        ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
-        iolocked = 0;
        if (lock_flags & XFS_IOLOCK_EXCL) {
-                iolocked = mrtryupdate(&ip->i_iolock);
+                if (!mrtryupdate(&ip->i_iolock))
-                if (!iolocked) {
+                        goto out;
-                        return 0;
-                }
        } else if (lock_flags & XFS_IOLOCK_SHARED) {
-                iolocked = mrtryaccess(&ip->i_iolock);
+                if (!mrtryaccess(&ip->i_iolock))
-                if (!iolocked) {
+                        goto out;
-                        return 0;
-                }
        }
        if (lock_flags & XFS_ILOCK_EXCL) {
-                ilocked = mrtryupdate(&ip->i_lock);
+                if (!mrtryupdate(&ip->i_lock))
-                if (!ilocked) {
+                        goto out_undo_iolock;
-                        if (iolocked) {
-                                mrunlock(&ip->i_iolock);
-                        }
-                        return 0;
-                }
        } else if (lock_flags & XFS_ILOCK_SHARED) {
-                ilocked = mrtryaccess(&ip->i_lock);
+                if (!mrtryaccess(&ip->i_lock))
-                if (!ilocked) {
+                        goto out_undo_iolock;
-                        if (iolocked) {
-                                mrunlock(&ip->i_iolock);
-                        }
-                        return 0;
-                }
        }
        xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address);
        return 1;
+ out_undo_iolock:
+        if (lock_flags & XFS_IOLOCK_EXCL)
+                mrunlock_excl(&ip->i_iolock);
+        else if (lock_flags & XFS_IOLOCK_SHARED)
+                mrunlock_shared(&ip->i_iolock);
+ out:
+        return 0;
 }
 /*
@@ -697,8 +688,9 @@ xfs_ilock_nowait(xfs_inode_t	*ip,
 *
 */
 void
-xfs_iunlock(xfs_inode_t *ip,
+xfs_iunlock(
-            uint        lock_flags)
+        xfs_inode_t             *ip,
+        uint                    lock_flags)
 {
        /*
         * You can't set both SHARED and EXCL for the same lock,
@@ -713,31 +705,25 @@ xfs_iunlock(xfs_inode_t	*ip,
                        XFS_LOCK_DEP_MASK)) == 0);
        ASSERT(lock_flags != 0);
-        if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) {
+        if (lock_flags & XFS_IOLOCK_EXCL)
-                ASSERT(!(lock_flags & XFS_IOLOCK_SHARED) ||
+                mrunlock_excl(&ip->i_iolock);
-                       (ismrlocked(&ip->i_iolock, MR_ACCESS)));
+        else if (lock_flags & XFS_IOLOCK_SHARED)
-                ASSERT(!(lock_flags & XFS_IOLOCK_EXCL) ||
+                mrunlock_shared(&ip->i_iolock);
-                       (ismrlocked(&ip->i_iolock, MR_UPDATE)));
-                mrunlock(&ip->i_iolock);
-        }
-        if (lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) {
+        if (lock_flags & XFS_ILOCK_EXCL)
-                ASSERT(!(lock_flags & XFS_ILOCK_SHARED) ||
+                mrunlock_excl(&ip->i_lock);
-                       (ismrlocked(&ip->i_lock, MR_ACCESS)));
+        else if (lock_flags & XFS_ILOCK_SHARED)
-                ASSERT(!(lock_flags & XFS_ILOCK_EXCL) ||
+                mrunlock_shared(&ip->i_lock);
-                       (ismrlocked(&ip->i_lock, MR_UPDATE)));
-                mrunlock(&ip->i_lock);
+        if ((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) &&
+            !(lock_flags & XFS_IUNLOCK_NONOTIFY) && ip->i_itemp) {
                /*
                 * Let the AIL know that this item has been unlocked in case
                 * it is in the AIL and anyone is waiting on it.  Don't do
                 * this if the caller has asked us not to.
                 */
-                if (!(lock_flags & XFS_IUNLOCK_NONOTIFY) &&
+                xfs_trans_unlocked_item(ip->i_mount,
-                     ip->i_itemp != NULL) {
+                                        (xfs_log_item_t*)(ip->i_itemp));
-                        xfs_trans_unlocked_item(ip->i_mount,
-                                                (xfs_log_item_t*)(ip->i_itemp));
-                }
        }
        xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address);
 }
@@ -747,21 +733,47 @@ xfs_iunlock(xfs_inode_t	*ip,
 * if it is being demoted.
 */
 void
-xfs_ilock_demote(xfs_inode_t    *ip,
+xfs_ilock_demote(
-                 uint           lock_flags)
+        xfs_inode_t             *ip,
+        uint                    lock_flags)
 {
        ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
        ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
-        if (lock_flags & XFS_ILOCK_EXCL) {
+        if (lock_flags & XFS_ILOCK_EXCL)
-                ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
                mrdemote(&ip->i_lock);
-        }
+        if (lock_flags & XFS_IOLOCK_EXCL)
-        if (lock_flags & XFS_IOLOCK_EXCL) {
-                ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
                mrdemote(&ip->i_iolock);
+}
+#ifdef DEBUG
+/*
+ * Debug-only routine, without additional rw_semaphore APIs, we can
+ * now only answer requests regarding whether we hold the lock for write
+ * (reader state is outside our visibility, we only track writer state).
+ *
+ * Note: this means !xfs_isilocked would give false positives, so don't do that.
+ */
+int
+xfs_isilocked(
+        xfs_inode_t             *ip,
+        uint                    lock_flags)
+{
+        if ((lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) ==
+                        XFS_ILOCK_EXCL) {
+                if (!ip->i_lock.mr_writer)
+                        return 0;
        }
+        if ((lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) ==
+                        XFS_IOLOCK_EXCL) {
+                if (!ip->i_iolock.mr_writer)
+                        return 0;
+        }
+        return 1;
 }
+#endif
 /*
 * The following three routines simply manage the i_flock
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ca12acb90394..cf0bb9c1d621 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1291,7 +1291,7 @@ xfs_file_last_byte(
        xfs_fileoff_t   size_last_block;
        int             error;
-        ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE | MR_ACCESS));
+        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
        mp = ip->i_mount;
        /*
@@ -1402,7 +1402,7 @@ xfs_itruncate_start(
        bhv_vnode_t     *vp;
        int             error = 0;
-        ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0);
+        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
        ASSERT((new_size == 0) || (new_size <= ip->i_size));
        ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
               (flags == XFS_ITRUNC_MAYBE));
@@ -1528,8 +1528,7 @@ xfs_itruncate_finish(
        xfs_bmap_free_t free_list;
        int             error;
-        ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0);
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-        ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
        ASSERT((new_size == 0) || (new_size <= ip->i_size));
        ASSERT(*tp != NULL);
        ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
@@ -1780,8 +1779,7 @@ xfs_igrow_start(
        xfs_fsize_t     new_size,
        cred_t          *credp)
 {
-        ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-        ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
        ASSERT(new_size > ip->i_size);
        /*
@@ -1809,8 +1807,7 @@ xfs_igrow_finish(
        xfs_fsize_t     new_size,
        int             change_flag)
 {
-        ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-        ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
        ASSERT(ip->i_transp == tp);
        ASSERT(new_size > ip->i_size);
@@ -2287,7 +2284,7 @@ xfs_ifree(
        xfs_dinode_t            *dip;
        xfs_buf_t               *ibp;
-        ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        ASSERT(ip->i_transp == tp);
        ASSERT(ip->i_d.di_nlink == 0);
        ASSERT(ip->i_d.di_nextents == 0);
@@ -2746,7 +2743,7 @@ void
 xfs_ipin(
        xfs_inode_t     *ip)
 {
-        ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        atomic_inc(&ip->i_pincount);
 }
@@ -2779,7 +2776,7 @@ __xfs_iunpin_wait(
 {
        xfs_inode_log_item_t    *iip = ip->i_itemp;
-        ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
        if (atomic_read(&ip->i_pincount) == 0)
                return;
@@ -2829,7 +2826,7 @@ xfs_iextents_copy(
        xfs_fsblock_t           start_block;
        ifp = XFS_IFORK_PTR(ip, whichfork);
-        ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
        ASSERT(ifp->if_bytes > 0);
        nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
@@ -3132,7 +3129,7 @@ xfs_iflush(
        XFS_STATS_INC(xs_iflush_count);
-        ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
        ASSERT(issemalocked(&(ip->i_flock)));
        ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
               ip->i_d.di_nextents > ip->i_df.if_ext_max);
@@ -3297,7 +3294,7 @@ xfs_iflush_int(
        int                     first;
 #endif
-        ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
        ASSERT(issemalocked(&(ip->i_flock)));
        ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
               ip->i_d.di_nextents > ip->i_df.if_ext_max);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 93c37697a72c..0a999fee4f03 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -386,20 +386,9 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
 #define XFS_ILOCK_EXCL          (1<<2)
 #define XFS_ILOCK_SHARED        (1<<3)
 #define XFS_IUNLOCK_NONOTIFY    (1<<4)
-/*      #define XFS_IOLOCK_NESTED       (1<<5)  */
-#define XFS_EXTENT_TOKEN_RD     (1<<6)
-#define XFS_SIZE_TOKEN_RD       (1<<7)
-#define XFS_EXTSIZE_RD          (XFS_EXTENT_TOKEN_RD|XFS_SIZE_TOKEN_RD)
-#define XFS_WILLLEND            (1<<8)  /* Always acquire tokens for lending */
-#define XFS_EXTENT_TOKEN_WR     (XFS_EXTENT_TOKEN_RD | XFS_WILLLEND)
-#define XFS_SIZE_TOKEN_WR       (XFS_SIZE_TOKEN_RD | XFS_WILLLEND)
-#define XFS_EXTSIZE_WR          (XFS_EXTSIZE_RD | XFS_WILLLEND)
-/* TODO:XFS_SIZE_TOKEN_WANT     (1<<9) */
 #define XFS_LOCK_MASK           (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
-                                | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
+                                | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
-                                | XFS_EXTENT_TOKEN_RD | XFS_SIZE_TOKEN_RD \
-                                | XFS_WILLLEND)
 /*
 * Flags for lockdep annotations.
@@ -483,6 +472,7 @@ void		xfs_ilock(xfs_inode_t *, uint);
 int             xfs_ilock_nowait(xfs_inode_t *, uint);
 void            xfs_iunlock(xfs_inode_t *, uint);
 void            xfs_ilock_demote(xfs_inode_t *, uint);
+int             xfs_isilocked(xfs_inode_t *, uint);
 void            xfs_iflock(xfs_inode_t *);
 int             xfs_iflock_nowait(xfs_inode_t *);
 uint            xfs_ilock_map_shared(xfs_inode_t *);
@@ -534,7 +524,7 @@ int		xfs_iflush(xfs_inode_t *, uint);
 void            xfs_iflush_all(struct xfs_mount *);
 void            xfs_ichgtime(xfs_inode_t *, int);
 xfs_fsize_t     xfs_file_last_byte(xfs_inode_t *);
-void            xfs_lock_inodes(xfs_inode_t **, int, int, uint);
+void            xfs_lock_inodes(xfs_inode_t **, int, uint);
 void            xfs_synchronize_atime(xfs_inode_t *);
 void            xfs_mark_inode_dirty_sync(xfs_inode_t *);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 93b5db453ea2..167b33f15772 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -547,7 +547,7 @@ STATIC void
 xfs_inode_item_pin(
        xfs_inode_log_item_t    *iip)
 {
-        ASSERT(ismrlocked(&(iip->ili_inode->i_lock), MR_UPDATE));
+        ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
        xfs_ipin(iip->ili_inode);
 }
@@ -664,13 +664,13 @@ xfs_inode_item_unlock(
        ASSERT(iip != NULL);
        ASSERT(iip->ili_inode->i_itemp != NULL);
-        ASSERT(ismrlocked(&(iip->ili_inode->i_lock), MR_UPDATE));
+        ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
        ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
                  XFS_ILI_IOLOCKED_EXCL)) ||
-               ismrlocked(&(iip->ili_inode->i_iolock), MR_UPDATE));
+               xfs_isilocked(iip->ili_inode, XFS_IOLOCK_EXCL));
        ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
                  XFS_ILI_IOLOCKED_SHARED)) ||
-               ismrlocked(&(iip->ili_inode->i_iolock), MR_ACCESS));
+               xfs_isilocked(iip->ili_inode, XFS_IOLOCK_SHARED));
        /*
         * Clear the transaction pointer in the inode.
         */
@@ -769,7 +769,7 @@ xfs_inode_item_pushbuf(
        ip = iip->ili_inode;
-        ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
        /*
         * The ili_pushbuf_flag keeps others from
@@ -857,7 +857,7 @@ xfs_inode_item_push(
        ip = iip->ili_inode;
-        ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
        ASSERT(issemalocked(&(ip->i_flock)));
        /*
         * Since we were able to lock the inode's flush lock and
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index fb3cf1191419..7edcde691d1a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -196,14 +196,14 @@ xfs_iomap(
                break;
        case BMAPI_WRITE:
                xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count);
-                lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
+                lockmode = XFS_ILOCK_EXCL;
                if (flags & BMAPI_IGNSTATE)
                        bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
                xfs_ilock(ip, lockmode);
                break;
        case BMAPI_ALLOCATE:
                xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count);
-                lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD;
+                lockmode = XFS_ILOCK_SHARED;
                bmapi_flags = XFS_BMAPI_ENTIRE;
                /* Attempt non-blocking lock */
@@ -523,8 +523,7 @@ xfs_iomap_write_direct(
                goto error_out;
        }
-        if (unlikely(!imap.br_startblock &&
+        if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) {
-                     !(XFS_IS_REALTIME_INODE(ip)))) {
                error = xfs_cmn_err_fsblock_zero(ip, &imap);
                goto error_out;
        }
@@ -624,7 +623,7 @@ xfs_iomap_write_delay(
        int             prealloc, fsynced = 0;
        int             error;
-        ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        /*
         * Make sure that the dquots are there. This doesn't hold
@@ -686,8 +685,7 @@ retry:
                goto retry;
        }
-        if (unlikely(!imap[0].br_startblock &&
+        if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
-                     !(XFS_IS_REALTIME_INODE(ip))))
                return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
        *ret_imap = imap[0];
@@ -838,9 +836,9 @@ xfs_iomap_write_allocate(
                 * See if we were able to allocate an extent that
                 * covers at least part of the callers request
                 */
-                if (unlikely(!imap.br_startblock &&
+                if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
-                             XFS_IS_REALTIME_INODE(ip)))
                        return xfs_cmn_err_fsblock_zero(ip, &imap);
                if ((offset_fsb >= imap.br_startoff) &&
                    (offset_fsb < (imap.br_startoff +
                                   imap.br_blockcount))) {
@@ -934,8 +932,7 @@ xfs_iomap_write_unwritten(
                if (error)
                        return XFS_ERROR(error);
-                if (unlikely(!imap.br_startblock &&
+                if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
-                             !(XFS_IS_REALTIME_INODE(ip))))
                        return xfs_cmn_err_fsblock_zero(ip, &imap);
                if ((numblks_fsb = imap.br_blockcount) == 0) {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index eb85bdedad0c..419de15aeb43 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -71,11 +71,6 @@ xfs_bulkstat_one_iget(
        ASSERT(ip != NULL);
        ASSERT(ip->i_blkno != (xfs_daddr_t)0);
-        if (ip->i_d.di_mode == 0) {
-                *stat = BULKSTAT_RV_NOTHING;
-                error = XFS_ERROR(ENOENT);
-                goto out_iput;
-        }
        vp = XFS_ITOV(ip);
        dic = &ip->i_d;
@@ -124,7 +119,6 @@ xfs_bulkstat_one_iget(
                break;
        }
- out_iput:
        xfs_iput(ip, XFS_ILOCK_SHARED);
        return error;
 }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 2fec452afbcc..da3988453b71 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -54,8 +54,9 @@ STATIC void	xfs_unmountfs_wait(xfs_mount_t *);
 #ifdef HAVE_PERCPU_SB
 STATIC void     xfs_icsb_destroy_counters(xfs_mount_t *);
 STATIC void     xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
-                                                int, int);
+                                                int);
-STATIC void     xfs_icsb_sync_counters(xfs_mount_t *);
+STATIC void     xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
+                                                int);
 STATIC int      xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
                                                int64_t, int);
 STATIC void     xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
@@ -63,8 +64,8 @@ STATIC void	xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
 #else
 #define xfs_icsb_destroy_counters(mp)                   do { } while (0)
-#define xfs_icsb_balance_counter(mp, a, b, c)           do { } while (0)
+#define xfs_icsb_balance_counter(mp, a, b)              do { } while (0)
-#define xfs_icsb_sync_counters(mp)                      do { } while (0)
+#define xfs_icsb_balance_counter_locked(mp, a, b)       do { } while (0)
 #define xfs_icsb_modify_counters(mp, a, b, c)           do { } while (0)
 #endif
@@ -1400,7 +1401,7 @@ xfs_log_sbcount(
        if (!xfs_fs_writable(mp))
                return 0;
-        xfs_icsb_sync_counters(mp);
+        xfs_icsb_sync_counters(mp, 0);
        /*
         * we don't need to do this if we are updating the superblock
@@ -2026,9 +2027,9 @@ xfs_icsb_cpu_notify(
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
                xfs_icsb_lock(mp);
-                xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0);
+                xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
-                xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0);
+                xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
-                xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0);
+                xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
                xfs_icsb_unlock(mp);
                break;
        case CPU_DEAD:
@@ -2048,12 +2049,9 @@ xfs_icsb_cpu_notify(
                memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
-                xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT,
+                xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0);
-                                         XFS_ICSB_SB_LOCKED, 0);
+                xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0);
-                xfs_icsb_balance_counter(mp, XFS_SBS_IFREE,
+                xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0);
-                                         XFS_ICSB_SB_LOCKED, 0);
-                xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS,
-                                         XFS_ICSB_SB_LOCKED, 0);
                spin_unlock(&mp->m_sb_lock);
                xfs_icsb_unlock(mp);
                break;
@@ -2105,9 +2103,9 @@ xfs_icsb_reinit_counters(
         * initial balance kicks us off correctly
         */
        mp->m_icsb_counters = -1;
-        xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0);
+        xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
-        xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0);
+        xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
-        xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0);
+        xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
        xfs_icsb_unlock(mp);
 }
@@ -2223,7 +2221,7 @@ xfs_icsb_disable_counter(
        if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
                /* drain back to superblock */
-                xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT);
+                xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
                switch(field) {
                case XFS_SBS_ICOUNT:
                        mp->m_sb.sb_icount = cnt.icsb_icount;
@@ -2278,38 +2276,33 @@ xfs_icsb_enable_counter(
 }
 void
-xfs_icsb_sync_counters_flags(
+xfs_icsb_sync_counters_locked(
        xfs_mount_t     *mp,
        int             flags)
 {
        xfs_icsb_cnts_t cnt;
-        /* Pass 1: lock all counters */
-        if ((flags & XFS_ICSB_SB_LOCKED) == 0)
-                spin_lock(&mp->m_sb_lock);
        xfs_icsb_count(mp, &cnt, flags);
-        /* Step 3: update mp->m_sb fields */
        if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
                mp->m_sb.sb_icount = cnt.icsb_icount;
        if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
                mp->m_sb.sb_ifree = cnt.icsb_ifree;
        if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
                mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
-        if ((flags & XFS_ICSB_SB_LOCKED) == 0)
-                spin_unlock(&mp->m_sb_lock);
 }
 /*
 * Accurate update of per-cpu counters to incore superblock
 */
-STATIC void
+void
 xfs_icsb_sync_counters(
-        xfs_mount_t     *mp)
+        xfs_mount_t     *mp,
+        int             flags)
 {
-        xfs_icsb_sync_counters_flags(mp, 0);
+        spin_lock(&mp->m_sb_lock);
+        xfs_icsb_sync_counters_locked(mp, flags);
+        spin_unlock(&mp->m_sb_lock);
 }
 /*
@@ -2332,19 +2325,15 @@ xfs_icsb_sync_counters(
 #define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
                (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
 STATIC void
-xfs_icsb_balance_counter(
+xfs_icsb_balance_counter_locked(
        xfs_mount_t     *mp,
        xfs_sb_field_t  field,
-        int             flags,
        int             min_per_cpu)
 {
        uint64_t        count, resid;
        int             weight = num_online_cpus();
        uint64_t        min = (uint64_t)min_per_cpu;
-        if (!(flags & XFS_ICSB_SB_LOCKED))
-                spin_lock(&mp->m_sb_lock);
        /* disable counter and sync counter */
        xfs_icsb_disable_counter(mp, field);
@@ -2354,19 +2343,19 @@ xfs_icsb_balance_counter(
                count = mp->m_sb.sb_icount;
                resid = do_div(count, weight);
                if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
-                        goto out;
+                        return;
                break;
        case XFS_SBS_IFREE:
                count = mp->m_sb.sb_ifree;
                resid = do_div(count, weight);
                if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
-                        goto out;
+                        return;
                break;
        case XFS_SBS_FDBLOCKS:
                count = mp->m_sb.sb_fdblocks;
                resid = do_div(count, weight);
                if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
-                        goto out;
+                        return;
                break;
        default:
                BUG();
@@ -2375,9 +2364,17 @@ xfs_icsb_balance_counter(
        }
        xfs_icsb_enable_counter(mp, field, count, resid);
-out:
+}
-        if (!(flags & XFS_ICSB_SB_LOCKED))
-                spin_unlock(&mp->m_sb_lock);
+STATIC void
+xfs_icsb_balance_counter(
+        xfs_mount_t     *mp,
+        xfs_sb_field_t  fields,
+        int             min_per_cpu)
+{
+        spin_lock(&mp->m_sb_lock);
+        xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu);
+        spin_unlock(&mp->m_sb_lock);
 }
 STATIC int
@@ -2484,7 +2481,7 @@ slow_path:
         * we are done.
         */
        if (ret != ENOSPC)
-                xfs_icsb_balance_counter(mp, field, 0, 0);
+                xfs_icsb_balance_counter(mp, field, 0);
        xfs_icsb_unlock(mp);
        return ret;
@@ -2508,7 +2505,7 @@ balance_counter:
         * will either succeed through the fast path or slow path without
         * another balance operation being required.
         */
-        xfs_icsb_balance_counter(mp, field, 0, delta);
+        xfs_icsb_balance_counter(mp, field, delta);
        xfs_icsb_unlock(mp);
        goto again;
 }
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1ed575110ff0..63e0693a358a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -206,17 +206,18 @@ typedef struct xfs_icsb_cnts {
 #define XFS_ICSB_FLAG_LOCK      (1 << 0)        /* counter lock bit */
-#define XFS_ICSB_SB_LOCKED      (1 << 0)        /* sb already locked */
 #define XFS_ICSB_LAZY_COUNT     (1 << 1)        /* accuracy not needed */
 extern int      xfs_icsb_init_counters(struct xfs_mount *);
 extern void     xfs_icsb_reinit_counters(struct xfs_mount *);
-extern void     xfs_icsb_sync_counters_flags(struct xfs_mount *, int);
+extern void     xfs_icsb_sync_counters(struct xfs_mount *, int);
+extern void     xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
 #else
 #define xfs_icsb_init_counters(mp)      (0)
 #define xfs_icsb_reinit_counters(mp)    do { } while (0)
-#define xfs_icsb_sync_counters_flags(mp, flags) do { } while (0)
+#define xfs_icsb_sync_counters(mp, flags)       do { } while (0)
+#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
 #endif
 typedef struct xfs_ail {
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index ee371890d85d..d8063e1ad298 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -55,85 +55,32 @@ xfs_rename_unlock4(
        xfs_iunlock(i_tab[0], lock_mode);
        for (i = 1; i < 4; i++) {
-                if (i_tab[i] == NULL) {
+                if (i_tab[i] == NULL)
                        break;
-                }
                /*
                 * Watch out for duplicate entries in the table.
                 */
-                if (i_tab[i] != i_tab[i-1]) {
+                if (i_tab[i] != i_tab[i-1])
                        xfs_iunlock(i_tab[i], lock_mode);
-                }
        }
 }
-#ifdef DEBUG
-int xfs_rename_skip, xfs_rename_nskip;
-#endif
 /*
- * The following routine will acquire the locks required for a rename
+ * Enter all inodes for a rename transaction into a sorted array.
- * operation. The code understands the semantics of renames and will
- * validate that name1 exists under dp1 & that name2 may or may not
- * exist under dp2.
- *
- * We are renaming dp1/name1 to dp2/name2.
- *
- * Return ENOENT if dp1 does not exist, other lookup errors, or 0 for success.
 */
-STATIC int
+STATIC void
-xfs_lock_for_rename(
+xfs_sort_for_rename(
        xfs_inode_t     *dp1,   /* in: old (source) directory inode */
        xfs_inode_t     *dp2,   /* in: new (target) directory inode */
        xfs_inode_t     *ip1,   /* in: inode of old entry */
-        struct xfs_name *name2, /* in: new entry name */
+        xfs_inode_t     *ip2,   /* in: inode of new entry, if it
-        xfs_inode_t     **ipp2, /* out: inode of new entry, if it
                                   already exists, NULL otherwise. */
        xfs_inode_t     **i_tab,/* out: array of inode returned, sorted */
        int             *num_inodes)  /* out: number of inodes in array */
 {
-        xfs_inode_t             *ip2 = NULL;
        xfs_inode_t             *temp;
-        xfs_ino_t               inum1, inum2;
-        int                     error;
        int                     i, j;
-        uint                    lock_mode;
-        int                     diff_dirs = (dp1 != dp2);
-        /*
-         * First, find out the current inums of the entries so that we
-         * can determine the initial locking order.  We'll have to
-         * sanity check stuff after all the locks have been acquired
-         * to see if we still have the right inodes, directories, etc.
-         */
-        lock_mode = xfs_ilock_map_shared(dp1);
-        IHOLD(ip1);
-        xfs_itrace_ref(ip1);
-        inum1 = ip1->i_ino;
-        /*
-         * Unlock dp1 and lock dp2 if they are different.
-         */
-        if (diff_dirs) {
-                xfs_iunlock_map_shared(dp1, lock_mode);
-                lock_mode = xfs_ilock_map_shared(dp2);
-        }
-        error = xfs_dir_lookup_int(dp2, lock_mode, name2, &inum2, &ip2);
-        if (error == ENOENT) {          /* target does not need to exist. */
-                inum2 = 0;
-        } else if (error) {
-                /*
-                 * If dp2 and dp1 are the same, the next line unlocks dp1.
-                 * Got it?
-                 */
-                xfs_iunlock_map_shared(dp2, lock_mode);
-                IRELE (ip1);
-                return error;
-        } else {
-                xfs_itrace_ref(ip2);
-        }
        /*
         * i_tab contains a list of pointers to inodes.  We initialize
@@ -145,21 +92,20 @@ xfs_lock_for_rename(
        i_tab[0] = dp1;
        i_tab[1] = dp2;
        i_tab[2] = ip1;
-        if (inum2 == 0) {
+        if (ip2) {
-                *num_inodes = 3;
-                i_tab[3] = NULL;
-        } else {
                *num_inodes = 4;
                i_tab[3] = ip2;
+        } else {
+                *num_inodes = 3;
+                i_tab[3] = NULL;
        }
-        *ipp2 = i_tab[3];
        /*
         * Sort the elements via bubble sort.  (Remember, there are at
         * most 4 elements to sort, so this is adequate.)
         */
-        for (i=0; i < *num_inodes; i++) {
+        for (i = 0; i < *num_inodes; i++) {
-                for (j=1; j < *num_inodes; j++) {
+                for (j = 1; j < *num_inodes; j++) {
                        if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
                                temp = i_tab[j];
                                i_tab[j] = i_tab[j-1];
@@ -167,30 +113,6 @@ xfs_lock_for_rename(
                        }
                }
        }
-        /*
-         * We have dp2 locked. If it isn't first, unlock it.
-         * If it is first, tell xfs_lock_inodes so it can skip it
-         * when locking. if dp1 == dp2, xfs_lock_inodes will skip both
-         * since they are equal. xfs_lock_inodes needs all these inodes
-         * so that it can unlock and retry if there might be a dead-lock
-         * potential with the log.
-         */
-        if (i_tab[0] == dp2 && lock_mode == XFS_ILOCK_SHARED) {
-#ifdef DEBUG
-                xfs_rename_skip++;
-#endif
-                xfs_lock_inodes(i_tab, *num_inodes, 1, XFS_ILOCK_SHARED);
-        } else {
-#ifdef DEBUG
-                xfs_rename_nskip++;
-#endif
-                xfs_iunlock_map_shared(dp2, lock_mode);
-                xfs_lock_inodes(i_tab, *num_inodes, 0, XFS_ILOCK_SHARED);
-        }
-        return 0;
 }
 /*
@@ -202,10 +124,10 @@ xfs_rename(
        struct xfs_name *src_name,
        xfs_inode_t     *src_ip,
        xfs_inode_t     *target_dp,
-        struct xfs_name *target_name)
+        struct xfs_name *target_name,
+        xfs_inode_t     *target_ip)
 {
-        xfs_trans_t     *tp;
+        xfs_trans_t     *tp = NULL;
-        xfs_inode_t     *target_ip;
        xfs_mount_t     *mp = src_dp->i_mount;
        int             new_parent;             /* moving to a new dir */
        int             src_is_directory;       /* src_name is a directory */
@@ -215,9 +137,7 @@ xfs_rename(
        int             cancel_flags;
        int             committed;
        xfs_inode_t     *inodes[4];
-        int             target_ip_dropped = 0;  /* dropped target_ip link? */
        int             spaceres;
-        int             target_link_zero = 0;
        int             num_inodes;
        xfs_itrace_entry(src_dp);
@@ -230,64 +150,27 @@ xfs_rename(
                                        target_dp, DM_RIGHT_NULL,
                                        src_name->name, target_name->name,
                                        0, 0, 0);
-                if (error) {
+                if (error)
                        return error;
-                }
        }
        /* Return through std_return after this point. */
-        /*
+        new_parent = (src_dp != target_dp);
-         * Lock all the participating inodes. Depending upon whether
+        src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
-         * the target_name exists in the target directory, and
-         * whether the target directory is the same as the source
-         * directory, we can lock from 2 to 4 inodes.
-         * xfs_lock_for_rename() will return ENOENT if src_name
-         * does not exist in the source directory.
-         */
-        tp = NULL;
-        error = xfs_lock_for_rename(src_dp, target_dp, src_ip, target_name,
-                                        &target_ip, inodes, &num_inodes);
-        if (error) {
-                /*
-                 * We have nothing locked, no inode references, and
-                 * no transaction, so just get out.
-                 */
-                goto std_return;
-        }
-        ASSERT(src_ip != NULL);
-        if ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+        if (src_is_directory) {
                /*
                 * Check for link count overflow on target_dp
                 */
-                if (target_ip == NULL && (src_dp != target_dp) &&
+                if (target_ip == NULL && new_parent &&
                    target_dp->i_d.di_nlink >= XFS_MAXLINK) {
                        error = XFS_ERROR(EMLINK);
-                        xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
+                        goto std_return;
-                        goto rele_return;
                }
        }
-        /*
+        xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
-         * If we are using project inheritance, we only allow renames
+                                inodes, &num_inodes);
-         * into our tree when the project IDs are the same; else the
-         * tree quota mechanism would be circumvented.
-         */
-        if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
-                     (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
-                error = XFS_ERROR(EXDEV);
-                xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
-                goto rele_return;
-        }
-        new_parent = (src_dp != target_dp);
-        src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
-        /*
-         * Drop the locks on our inodes so that we can start the transaction.
-         */
-        xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
        XFS_BMAP_INIT(&free_list, &first_block);
        tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
@@ -302,7 +185,7 @@ xfs_rename(
        }
        if (error) {
                xfs_trans_cancel(tp, 0);
-                goto rele_return;
+                goto std_return;
        }
        /*
@@ -310,13 +193,29 @@ xfs_rename(
         */
        if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) {
                xfs_trans_cancel(tp, cancel_flags);
-                goto rele_return;
+                goto std_return;
        }
        /*
-         * Reacquire the inode locks we dropped above.
+         * Lock all the participating inodes. Depending upon whether
+         * the target_name exists in the target directory, and
+         * whether the target directory is the same as the source
+         * directory, we can lock from 2 to 4 inodes.
+         */
+        xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
+        /*
+         * If we are using project inheritance, we only allow renames
+         * into our tree when the project IDs are the same; else the
+         * tree quota mechanism would be circumvented.
         */
-        xfs_lock_inodes(inodes, num_inodes, 0, XFS_ILOCK_EXCL);
+        if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
+                     (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
+                error = XFS_ERROR(EXDEV);
+                xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
+                xfs_trans_cancel(tp, cancel_flags);
+                goto std_return;
+        }
        /*
         * Join all the inodes to the transaction. From this point on,
@@ -328,17 +227,17 @@ xfs_rename(
         */
        IHOLD(src_dp);
        xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
        if (new_parent) {
                IHOLD(target_dp);
                xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
        }
-        if ((src_ip != src_dp) && (src_ip != target_dp)) {
-                xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
+        IHOLD(src_ip);
-        }
+        xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
-        if ((target_ip != NULL) &&
-            (target_ip != src_ip) &&
+        if (target_ip) {
-            (target_ip != src_dp) &&
+                IHOLD(target_ip);
-            (target_ip != target_dp)) {
                xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
        }
@@ -412,7 +311,6 @@ xfs_rename(
                error = xfs_droplink(tp, target_ip);
                if (error)
                        goto abort_return;
-                target_ip_dropped = 1;
                if (src_is_directory) {
                        /*
@@ -422,10 +320,6 @@ xfs_rename(
                        if (error)
                                goto abort_return;
                }
-                /* Do this test while we still hold the locks */
-                target_link_zero = (target_ip)->i_d.di_nlink==0;
        } /* target_ip != NULL */
        /*
@@ -492,15 +386,6 @@ xfs_rename(
        }
        /*
-         * If there was a target inode, take an extra reference on
-         * it here so that it doesn't go to xfs_inactive() from
-         * within the commit.
-         */
-        if (target_ip != NULL) {
-                IHOLD(target_ip);
-        }
-        /*
         * If this is a synchronous mount, make sure that the
         * rename transaction goes to disk before returning to
         * the user.
@@ -509,30 +394,11 @@ xfs_rename(
                xfs_trans_set_sync(tp);
        }
-        /*
-         * Take refs. for vop_link_removed calls below.  No need to worry
-         * about directory refs. because the caller holds them.
-         *
-         * Do holds before the xfs_bmap_finish since it might rele them down
-         * to zero.
-         */
-        if (target_ip_dropped)
-                IHOLD(target_ip);
-        IHOLD(src_ip);
        error = xfs_bmap_finish(&tp, &free_list, &committed);
        if (error) {
                xfs_bmap_cancel(&free_list);
                xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
                                 XFS_TRANS_ABORT));
-                if (target_ip != NULL) {
-                        IRELE(target_ip);
-                }
-                if (target_ip_dropped) {
-                        IRELE(target_ip);
-                }
-                IRELE(src_ip);
                goto std_return;
        }
@@ -541,15 +407,6 @@ xfs_rename(
         * the vnode references.
         */
        error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-        if (target_ip != NULL)
-                IRELE(target_ip);
-        /*
-         * Let interposed file systems know about removed links.
-         */
-        if (target_ip_dropped)
-                IRELE(target_ip);
-        IRELE(src_ip);
        /* Fall through to std_return with error = 0 or errno from
         * xfs_trans_commit      */
@@ -571,11 +428,4 @@ std_return:
        xfs_bmap_cancel(&free_list);
        xfs_trans_cancel(tp, cancel_flags);
        goto std_return;
- rele_return:
-        IRELE(src_ip);
-        if (target_ip != NULL) {
-                IRELE(target_ip);
-        }
-        goto std_return;
 }
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index b8db1d5cde5a..4c70bf5e9985 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -111,13 +111,13 @@ xfs_trans_iget(
                 */
                ASSERT(ip->i_itemp != NULL);
                ASSERT(lock_flags & XFS_ILOCK_EXCL);
-                ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+                ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
                ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
-                       ismrlocked(&ip->i_iolock, MR_UPDATE));
+                       xfs_isilocked(ip, XFS_IOLOCK_EXCL));
                ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
                       (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL));
                ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
-                       ismrlocked(&ip->i_iolock, (MR_UPDATE | MR_ACCESS)));
+                       xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
                ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
                       (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_ANY));
@@ -185,7 +185,7 @@ xfs_trans_ijoin(
        xfs_inode_log_item_t    *iip;
        ASSERT(ip->i_transp == NULL);
-        ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        ASSERT(lock_flags & XFS_ILOCK_EXCL);
        if (ip->i_itemp == NULL)
                xfs_inode_item_init(ip, ip->i_mount);
@@ -232,7 +232,7 @@ xfs_trans_ihold(
 {
        ASSERT(ip->i_transp == tp);
        ASSERT(ip->i_itemp != NULL);
-        ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        ip->i_itemp->ili_flags |= XFS_ILI_HOLD;
 }
@@ -257,7 +257,7 @@ xfs_trans_log_inode(
        ASSERT(ip->i_transp == tp);
        ASSERT(ip->i_itemp != NULL);
-        ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp));
        ASSERT(lidp != NULL);
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 2b8dc7e40772..98e5f110ba5f 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -41,49 +41,6 @@
 #include "xfs_utils.h"
-int
-xfs_dir_lookup_int(
-        xfs_inode_t     *dp,
-        uint            lock_mode,
-        struct xfs_name *name,
-        xfs_ino_t       *inum,
-        xfs_inode_t     **ipp)
-{
-        int             error;
-        xfs_itrace_entry(dp);
-        error = xfs_dir_lookup(NULL, dp, name, inum);
-        if (!error) {
-                /*
-                 * Unlock the directory. We do this because we can't
-                 * hold the directory lock while doing the vn_get()
-                 * in xfs_iget().  Doing so could cause us to hold
-                 * a lock while waiting for the inode to finish
-                 * being inactive while it's waiting for a log
-                 * reservation in the inactive routine.
-                 */
-                xfs_iunlock(dp, lock_mode);
-                error = xfs_iget(dp->i_mount, NULL, *inum, 0, 0, ipp, 0);
-                xfs_ilock(dp, lock_mode);
-                if (error) {
-                        *ipp = NULL;
-                } else if ((*ipp)->i_d.di_mode == 0) {
-                        /*
-                         * The inode has been freed.  Something is
-                         * wrong so just get out of here.
-                         */
-                        xfs_iunlock(dp, lock_mode);
-                        xfs_iput_new(*ipp, 0);
-                        *ipp = NULL;
-                        xfs_ilock(dp, lock_mode);
-                        error = XFS_ERROR(ENOENT);
-                }
-        }
-        return error;
-}
 /*
 * Allocates a new inode from disk and return a pointer to the
 * incore copy. This routine will internally commit the current
@@ -310,7 +267,7 @@ xfs_bump_ino_vers2(
 {
        xfs_mount_t     *mp;
-        ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1);
        ip->i_d.di_version = XFS_DINODE_VERSION_2;
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index 175b126d2cab..f316cb85d8e2 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -21,8 +21,6 @@
 #define IRELE(ip)       VN_RELE(XFS_ITOV(ip))
 #define IHOLD(ip)       VN_HOLD(XFS_ITOV(ip))
-extern int xfs_dir_lookup_int(xfs_inode_t *, uint, struct xfs_name *,
-                                xfs_ino_t *, xfs_inode_t **);
 extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *);
 extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
                                xfs_dev_t, cred_t *, prid_t, int,
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index fc48158fe479..30bacd8bb0e5 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -186,6 +186,7 @@ xfs_cleanup(void)
        kmem_zone_destroy(xfs_efi_zone);
        kmem_zone_destroy(xfs_ifork_zone);
        kmem_zone_destroy(xfs_ili_zone);
+        kmem_zone_destroy(xfs_log_ticket_zone);
 }
 /*
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 6650601c64f7..70702a60b4bb 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -76,132 +76,6 @@ xfs_open(
 }
 /*
- * xfs_getattr
- */
-int
-xfs_getattr(
-        xfs_inode_t     *ip,
-        bhv_vattr_t     *vap,
-        int             flags)
-{
-        bhv_vnode_t     *vp = XFS_ITOV(ip);
-        xfs_mount_t     *mp = ip->i_mount;
-        xfs_itrace_entry(ip);
-        if (XFS_FORCED_SHUTDOWN(mp))
-                return XFS_ERROR(EIO);
-        if (!(flags & ATTR_LAZY))
-                xfs_ilock(ip, XFS_ILOCK_SHARED);
-        vap->va_size = XFS_ISIZE(ip);
-        if (vap->va_mask == XFS_AT_SIZE)
-                goto all_done;
-        vap->va_nblocks =
-                XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
-        vap->va_nodeid = ip->i_ino;
-#if XFS_BIG_INUMS
-        vap->va_nodeid += mp->m_inoadd;
-#endif
-        vap->va_nlink = ip->i_d.di_nlink;
-        /*
-         * Quick exit for non-stat callers
-         */
-        if ((vap->va_mask &
-            ~(XFS_AT_SIZE|XFS_AT_FSID|XFS_AT_NODEID|
-              XFS_AT_NLINK|XFS_AT_BLKSIZE)) == 0)
-                goto all_done;
-        /*
-         * Copy from in-core inode.
-         */
-        vap->va_mode = ip->i_d.di_mode;
-        vap->va_uid = ip->i_d.di_uid;
-        vap->va_gid = ip->i_d.di_gid;
-        vap->va_projid = ip->i_d.di_projid;
-        /*
-         * Check vnode type block/char vs. everything else.
-         */
-        switch (ip->i_d.di_mode & S_IFMT) {
-        case S_IFBLK:
-        case S_IFCHR:
-                vap->va_rdev = ip->i_df.if_u2.if_rdev;
-                vap->va_blocksize = BLKDEV_IOSIZE;
-                break;
-        default:
-                vap->va_rdev = 0;
-                if (!(XFS_IS_REALTIME_INODE(ip))) {
-                        vap->va_blocksize = xfs_preferred_iosize(mp);
-                } else {
-                        /*
-                         * If the file blocks are being allocated from a
-                         * realtime partition, then return the inode's
-                         * realtime extent size or the realtime volume's
-                         * extent size.
-                         */
-                        vap->va_blocksize =
-                                xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
-                }
-                break;
-        }
-        vn_atime_to_timespec(vp, &vap->va_atime);
-        vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
-        vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
-        vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
-        vap->va_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
-        /*
-         * Exit for stat callers.  See if any of the rest of the fields
-         * to be filled in are needed.
-         */
-        if ((vap->va_mask &
-             (XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
-              XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
-                goto all_done;
-        /*
-         * Convert di_flags to xflags.
-         */
-        vap->va_xflags = xfs_ip2xflags(ip);
-        /*
-         * Exit for inode revalidate.  See if any of the rest of
-         * the fields to be filled in are needed.
-         */
-        if ((vap->va_mask &
-             (XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
-              XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
-                goto all_done;
-        vap->va_extsize = ip->i_d.di_extsize << mp->m_sb.sb_blocklog;
-        vap->va_nextents =
-                (ip->i_df.if_flags & XFS_IFEXTENTS) ?
-                        ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) :
-                        ip->i_d.di_nextents;
-        if (ip->i_afp)
-                vap->va_anextents =
-                        (ip->i_afp->if_flags & XFS_IFEXTENTS) ?
-                                ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) :
-                                 ip->i_d.di_anextents;
-        else
-                vap->va_anextents = 0;
-        vap->va_gen = ip->i_d.di_gen;
- all_done:
-        if (!(flags & ATTR_LAZY))
-                xfs_iunlock(ip, XFS_ILOCK_SHARED);
-        return 0;
-}
-/*
 * xfs_setattr
 */
 int
@@ -211,7 +85,6 @@ xfs_setattr(
        int                     flags,
        cred_t                  *credp)
 {
-        bhv_vnode_t             *vp = XFS_ITOV(ip);
        xfs_mount_t             *mp = ip->i_mount;
        xfs_trans_t             *tp;
        int                     mask;
@@ -222,7 +95,6 @@ xfs_setattr(
        gid_t                   gid=0, igid=0;
        int                     timeflags = 0;
        xfs_prid_t              projid=0, iprojid=0;
-        int                     mandlock_before, mandlock_after;
        struct xfs_dquot        *udqp, *gdqp, *olddquot1, *olddquot2;
        int                     file_owner;
        int                     need_iolock = 1;
@@ -383,7 +255,7 @@ xfs_setattr(
                                m |= S_ISGID;
 #if 0
                        /* Linux allows this, Irix doesn't. */
-                        if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp))
+                        if ((vap->va_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
                                m |= S_ISVTX;
 #endif
                        if (m && !capable(CAP_FSETID))
@@ -461,10 +333,10 @@ xfs_setattr(
                        goto error_return;
                }
-                if (VN_ISDIR(vp)) {
+                if (S_ISDIR(ip->i_d.di_mode)) {
                        code = XFS_ERROR(EISDIR);
                        goto error_return;
-                } else if (!VN_ISREG(vp)) {
+                } else if (!S_ISREG(ip->i_d.di_mode)) {
                        code = XFS_ERROR(EINVAL);
                        goto error_return;
                }
@@ -626,9 +498,6 @@ xfs_setattr(
                xfs_trans_ihold(tp, ip);
        }
-        /* determine whether mandatory locking mode changes */
-        mandlock_before = MANDLOCK(vp, ip->i_d.di_mode);
        /*
         * Truncate file.  Must have write permission and not be a directory.
         */
@@ -858,13 +727,6 @@ xfs_setattr(
                code = xfs_trans_commit(tp, commit_flags);
        }
-        /*
-         * If the (regular) file's mandatory locking mode changed, then
-         * notify the vnode.  We do this under the inode lock to prevent
-         * racing calls to vop_vnode_change.
-         */
-        mandlock_after = MANDLOCK(vp, ip->i_d.di_mode);
        xfs_iunlock(ip, lock_flags);
        /*
@@ -1443,7 +1305,7 @@ xfs_inactive_attrs(
        int             error;
        xfs_mount_t     *mp;
-        ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
+        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
        tp = *tpp;
        mp = ip->i_mount;
        ASSERT(ip->i_d.di_forkoff != 0);
@@ -1491,7 +1353,7 @@ xfs_release(
        xfs_mount_t     *mp = ip->i_mount;
        int             error;
-        if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0))
+        if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0))
                return 0;
        /* If this is a read-only mount, don't do this (would generate I/O) */
@@ -1774,8 +1636,7 @@ xfs_lookup(
        struct xfs_name         *name,
        xfs_inode_t             **ipp)
 {
-        xfs_inode_t             *ip;
+        xfs_ino_t               inum;
-        xfs_ino_t               e_inum;
        int                     error;
        uint                    lock_mode;
@@ -1785,12 +1646,21 @@ xfs_lookup(
                return XFS_ERROR(EIO);
        lock_mode = xfs_ilock_map_shared(dp);
-        error = xfs_dir_lookup_int(dp, lock_mode, name, &e_inum, &ip);
+        error = xfs_dir_lookup(NULL, dp, name, &inum);
-        if (!error) {
-                *ipp = ip;
-                xfs_itrace_ref(ip);
-        }
        xfs_iunlock_map_shared(dp, lock_mode);
+        if (error)
+                goto out;
+        error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0);
+        if (error)
+                goto out;
+        xfs_itrace_ref(*ipp);
+        return 0;
+ out:
+        *ipp = NULL;
        return error;
 }
@@ -1906,7 +1776,7 @@ xfs_create(
         * It is locked (and joined to the transaction).
         */
-        ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE));
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        /*
         * Now we join the directory inode to the transaction.  We do not do it
@@ -2112,7 +1982,7 @@ again:
                ips[0] = ip;
                ips[1] = dp;
-                xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL);
+                xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
        }
        /* else  e_inum == dp->i_ino */
        /*     This can happen if we're asked to lock /x/..
@@ -2160,7 +2030,6 @@ void
 xfs_lock_inodes(
        xfs_inode_t     **ips,
        int             inodes,
-        int             first_locked,
        uint            lock_mode)
 {
        int             attempts = 0, i, j, try_lock;
@@ -2168,13 +2037,8 @@ xfs_lock_inodes(
        ASSERT(ips && (inodes >= 2)); /* we need at least two */
-        if (first_locked) {
+        try_lock = 0;
-                try_lock = 1;
+        i = 0;
-                i = 1;
-        } else {
-                try_lock = 0;
-                i = 0;
-        }
 again:
        for (; i < inodes; i++) {
@@ -2298,29 +2162,14 @@ xfs_remove(
                        return error;
        }
-        /*
-         * We need to get a reference to ip before we get our log
-         * reservation. The reason for this is that we cannot call
-         * xfs_iget for an inode for which we do not have a reference
-         * once we've acquired a log reservation. This is because the
-         * inode we are trying to get might be in xfs_inactive going
-         * for a log reservation. Since we'll have to wait for the
-         * inactive code to complete before returning from xfs_iget,
-         * we need to make sure that we don't have log space reserved
-         * when we call xfs_iget.  Instead we get an unlocked reference
-         * to the inode before getting our log reservation.
-         */
-        IHOLD(ip);
        xfs_itrace_entry(ip);
        xfs_itrace_ref(ip);
        error = XFS_QM_DQATTACH(mp, dp, 0);
-        if (!error && dp != ip)
+        if (!error)
                error = XFS_QM_DQATTACH(mp, ip, 0);
        if (error) {
                REMOVE_DEBUG_TRACE(__LINE__);
-                IRELE(ip);
                goto std_return;
        }
@@ -2347,7 +2196,6 @@ xfs_remove(
                ASSERT(error != ENOSPC);
                REMOVE_DEBUG_TRACE(__LINE__);
                xfs_trans_cancel(tp, 0);
-                IRELE(ip);
                return error;
        }
@@ -2355,7 +2203,6 @@ xfs_remove(
        if (error) {
                REMOVE_DEBUG_TRACE(__LINE__);
                xfs_trans_cancel(tp, cancel_flags);
-                IRELE(ip);
                goto std_return;
        }
@@ -2363,23 +2210,18 @@ xfs_remove(
         * At this point, we've gotten both the directory and the entry
         * inodes locked.
         */
+        IHOLD(ip);
        xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
-        if (dp != ip) {
-                /*
+        IHOLD(dp);
-                 * Increment vnode ref count only in this case since
+        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-                 * there's an extra vnode reference in the case where
-                 * dp == ip.
-                 */
-                IHOLD(dp);
-                xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-        }
        /*
         * Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
         */
        XFS_BMAP_INIT(&free_list, &first_block);
        error = xfs_dir_removename(tp, dp, name, ip->i_ino,
-                                        &first_block, &free_list, 0);
+                                        &first_block, &free_list, resblks);
        if (error) {
                ASSERT(error != ENOENT);
                REMOVE_DEBUG_TRACE(__LINE__);
@@ -2402,12 +2244,6 @@ xfs_remove(
        link_zero = (ip)->i_d.di_nlink==0;
        /*
-         * Take an extra ref on the inode so that it doesn't
-         * go to xfs_inactive() from within the commit.
-         */
-        IHOLD(ip);
-        /*
         * If this is a synchronous mount, make sure that the
         * remove transaction goes to disk before returning to
         * the user.
@@ -2423,10 +2259,8 @@ xfs_remove(
        }
        error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-        if (error) {
+        if (error)
-                IRELE(ip);
                goto std_return;
-        }
        /*
         * If we are using filestreams, kill the stream association.
@@ -2438,7 +2272,6 @@ xfs_remove(
                xfs_filestream_deassociate(ip);
        xfs_itrace_exit(ip);
-        IRELE(ip);
 /*      Fall through to std_return with error = 0 */
 std_return:
@@ -2467,8 +2300,6 @@ xfs_remove(
        cancel_flags |= XFS_TRANS_ABORT;
        xfs_trans_cancel(tp, cancel_flags);
-        IRELE(ip);
        goto std_return;
 }
@@ -2536,7 +2367,7 @@ xfs_link(
                ips[1] = sip;
        }
-        xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL);
+        xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
        /*
         * Increment vnode ref counts since xfs_trans_commit &
@@ -2840,7 +2671,6 @@ xfs_rmdir(
        struct xfs_name         *name,
        xfs_inode_t             *cdp)
 {
-        bhv_vnode_t             *dir_vp = XFS_ITOV(dp);
        xfs_mount_t             *mp = dp->i_mount;
        xfs_trans_t             *tp;
        int                     error;
@@ -2866,27 +2696,12 @@ xfs_rmdir(
        }
        /*
-         * We need to get a reference to cdp before we get our log
-         * reservation.  The reason for this is that we cannot call
-         * xfs_iget for an inode for which we do not have a reference
-         * once we've acquired a log reservation.  This is because the
-         * inode we are trying to get might be in xfs_inactive going
-         * for a log reservation.  Since we'll have to wait for the
-         * inactive code to complete before returning from xfs_iget,
-         * we need to make sure that we don't have log space reserved
-         * when we call xfs_iget.  Instead we get an unlocked reference
-         * to the inode before getting our log reservation.
-         */
-        IHOLD(cdp);
-        /*
         * Get the dquots for the inodes.
         */
        error = XFS_QM_DQATTACH(mp, dp, 0);
-        if (!error && dp != cdp)
+        if (!error)
                error = XFS_QM_DQATTACH(mp, cdp, 0);
        if (error) {
-                IRELE(cdp);
                REMOVE_DEBUG_TRACE(__LINE__);
                goto std_return;
        }
@@ -2913,7 +2728,6 @@ xfs_rmdir(
        if (error) {
                ASSERT(error != ENOSPC);
                cancel_flags = 0;
-                IRELE(cdp);
                goto error_return;
        }
        XFS_BMAP_INIT(&free_list, &first_block);
@@ -2927,21 +2741,13 @@ xfs_rmdir(
        error = xfs_lock_dir_and_entry(dp, cdp);
        if (error) {
                xfs_trans_cancel(tp, cancel_flags);
-                IRELE(cdp);
                goto std_return;
        }
+        IHOLD(dp);
        xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
-        if (dp != cdp) {
-                /*
-                 * Only increment the parent directory vnode count if
-                 * we didn't bump it in looking up cdp.  The only time
-                 * we don't bump it is when we're looking up ".".
-                 */
-                VN_HOLD(dir_vp);
-        }
-        xfs_itrace_ref(cdp);
+        IHOLD(cdp);
        xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL);
        ASSERT(cdp->i_d.di_nlink >= 2);
@@ -2995,12 +2801,6 @@ xfs_rmdir(
        last_cdp_link = (cdp)->i_d.di_nlink==0;
        /*
-         * Take an extra ref on the child vnode so that it
-         * does not go to xfs_inactive() from within the commit.
-         */
-        IHOLD(cdp);
-        /*
         * If this is a synchronous mount, make sure that the
         * rmdir transaction goes to disk before returning to
         * the user.
@@ -3014,19 +2814,15 @@ xfs_rmdir(
                xfs_bmap_cancel(&free_list);
                xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
                                 XFS_TRANS_ABORT));
-                IRELE(cdp);
                goto std_return;
        }
        error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
        if (error) {
-                IRELE(cdp);
                goto std_return;
        }
-        IRELE(cdp);
        /* Fall through to std_return with error = 0 or the errno
         * from xfs_trans_commit. */
 std_return:
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 24c53923dc2c..8abe8f186e20 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -15,7 +15,6 @@ struct xfs_iomap;
 int xfs_open(struct xfs_inode *ip);
-int xfs_getattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags);
 int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags,
                struct cred *credp);
 int xfs_readlink(struct xfs_inode *ip, char *link);
@@ -48,9 +47,9 @@ int xfs_change_file_space(struct xfs_inode *ip, int cmd,
                struct cred *credp, int attr_flags);
 int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
                struct xfs_inode *src_ip, struct xfs_inode *target_dp,
-                struct xfs_name *target_name);
+                struct xfs_name *target_name, struct xfs_inode *target_ip);
 int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value,
-                int *valuelenp, int flags, cred_t *cred);
+                int *valuelenp, int flags);
 int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value,
                int valuelen, int flags);
 int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags);
@@ -61,9 +60,6 @@ int xfs_ioctl(struct xfs_inode *ip, struct file *filp,
 ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb,
                const struct iovec *iovp, unsigned int segs,
                loff_t *offset, int ioflags);
-ssize_t xfs_sendfile(struct xfs_inode *ip, struct file *filp,
-                loff_t *offset, int ioflags, size_t count,
-                read_actor_t actor, void *target);
 ssize_t xfs_splice_read(struct xfs_inode *ip, struct file *infilp,
                loff_t *ppos, struct pipe_inode_info *pipe, size_t count,
                int flags, int ioflags);