18 files changed, 1583 insertions, 1231 deletions
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index a8ff003a00f7..8a34710ecf40 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -5,8 +5,8 @@
 obj-$(CONFIG_EXT4_FS) += ext4.o
 ext4-y  := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-                   ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
+                ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
-                   ext4_jbd2.o migrate.o mballoc.o
+                ext4_jbd2.o migrate.o mballoc.o block_validity.o
 ext4-$(CONFIG_EXT4_FS_XATTR)            += xattr.o xattr_user.o xattr_trusted.o
 ext4-$(CONFIG_EXT4_FS_POSIX_ACL)        += acl.o
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 647e0d65a284..605aeed96d68 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -129,12 +129,15 @@ fail:
 static inline struct posix_acl *
 ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl)
 {
-        struct posix_acl *acl = EXT4_ACL_NOT_CACHED;
+        struct posix_acl *acl = ACCESS_ONCE(*i_acl);
-        spin_lock(&inode->i_lock);
+        if (acl) {
-        if (*i_acl != EXT4_ACL_NOT_CACHED)
+                spin_lock(&inode->i_lock);
-                acl = posix_acl_dup(*i_acl);
+                acl = *i_acl;
-        spin_unlock(&inode->i_lock);
+                if (acl != EXT4_ACL_NOT_CACHED)
+                        acl = posix_acl_dup(acl);
+                spin_unlock(&inode->i_lock);
+        }
        return acl;
 }
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 53c72ad85877..e2126d70dff5 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -19,7 +19,6 @@
 #include <linux/buffer_head.h>
 #include "ext4.h"
 #include "ext4_jbd2.h"
-#include "group.h"
 #include "mballoc.h"
 /*
@@ -88,6 +87,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
                 ext4_group_t block_group, struct ext4_group_desc *gdp)
 {
        int bit, bit_max;
+        ext4_group_t ngroups = ext4_get_groups_count(sb);
        unsigned free_blocks, group_blocks;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -123,7 +123,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
                bit_max += ext4_bg_num_gdb(sb, block_group);
        }
-        if (block_group == sbi->s_groups_count - 1) {
+        if (block_group == ngroups - 1) {
                /*
                 * Even though mke2fs always initialize first and last group
                 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
@@ -131,7 +131,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
                 */
                group_blocks = ext4_blocks_count(sbi->s_es) -
                        le32_to_cpu(sbi->s_es->s_first_data_block) -
-                        (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1));
+                        (EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1));
        } else {
                group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
        }
@@ -205,18 +205,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
 {
        unsigned int group_desc;
        unsigned int offset;
+        ext4_group_t ngroups = ext4_get_groups_count(sb);
        struct ext4_group_desc *desc;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
-        if (block_group >= sbi->s_groups_count) {
+        if (block_group >= ngroups) {
                ext4_error(sb, "ext4_get_group_desc",
                           "block_group >= groups_count - "
                           "block_group = %u, groups_count = %u",
-                           block_group, sbi->s_groups_count);
+                           block_group, ngroups);
                return NULL;
        }
-        smp_rmb();
        group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
        offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
@@ -326,16 +326,16 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
                unlock_buffer(bh);
                return bh;
        }
-        spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
+        ext4_lock_group(sb, block_group);
        if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
                ext4_init_block_bitmap(sb, bh, block_group, desc);
                set_bitmap_uptodate(bh);
                set_buffer_uptodate(bh);
-                spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+                ext4_unlock_group(sb, block_group);
                unlock_buffer(bh);
                return bh;
        }
-        spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+        ext4_unlock_group(sb, block_group);
        if (buffer_uptodate(bh)) {
                /*
                 * if not uninit if bh is uptodate,
@@ -451,7 +451,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
        down_write(&grp->alloc_sem);
        for (i = 0, blocks_freed = 0; i < count; i++) {
                BUFFER_TRACE(bitmap_bh, "clear bit");
-                if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
+                if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
                                                bit + i, bitmap_bh->b_data)) {
                        ext4_error(sb, __func__,
                                   "bit already cleared for block %llu",
@@ -461,11 +461,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
                        blocks_freed++;
                }
        }
-        spin_lock(sb_bgl_lock(sbi, block_group));
+        ext4_lock_group(sb, block_group);
        blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
        ext4_free_blks_set(sb, desc, blk_free_count);
        desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
-        spin_unlock(sb_bgl_lock(sbi, block_group));
+        ext4_unlock_group(sb, block_group);
        percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
        if (sbi->s_log_groups_per_flex) {
@@ -665,7 +665,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
        ext4_fsblk_t desc_count;
        struct ext4_group_desc *gdp;
        ext4_group_t i;
-        ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+        ext4_group_t ngroups = ext4_get_groups_count(sb);
 #ifdef EXT4FS_DEBUG
        struct ext4_super_block *es;
        ext4_fsblk_t bitmap_count;
@@ -677,7 +677,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
        bitmap_count = 0;
        gdp = NULL;
-        smp_rmb();
        for (i = 0; i < ngroups; i++) {
                gdp = ext4_get_group_desc(sb, i, NULL);
                if (!gdp)
@@ -700,7 +699,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
        return bitmap_count;
 #else
        desc_count = 0;
-        smp_rmb();
        for (i = 0; i < ngroups; i++) {
                gdp = ext4_get_group_desc(sb, i, NULL);
                if (!gdp)
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
new file mode 100644
index 000000000000..50784ef07563
--- /dev/null
+++ b/fs/ext4/block_validity.c
@@ -0,0 +1,244 @@
+/*
+ *  linux/fs/ext4/block_validity.c
+ *
+ * Copyright (C) 2009
+ * Theodore Ts'o (tytso@mit.edu)
+ *
+ * Track which blocks in the filesystem are metadata blocks that
+ * should never be used as data blocks by files or directories.
+ */
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
+#include <linux/module.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include <linux/version.h>
+#include <linux/blkdev.h>
+#include <linux/mutex.h>
+#include "ext4.h"
+struct ext4_system_zone {
+        struct rb_node  node;
+        ext4_fsblk_t    start_blk;
+        unsigned int    count;
+};
+static struct kmem_cache *ext4_system_zone_cachep;
+int __init init_ext4_system_zone(void)
+{
+        ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone,
+                                             SLAB_RECLAIM_ACCOUNT);
+        if (ext4_system_zone_cachep == NULL)
+                return -ENOMEM;
+        return 0;
+}
+void exit_ext4_system_zone(void)
+{
+        kmem_cache_destroy(ext4_system_zone_cachep);
+}
+static inline int can_merge(struct ext4_system_zone *entry1,
+                     struct ext4_system_zone *entry2)
+{
+        if ((entry1->start_blk + entry1->count) == entry2->start_blk)
+                return 1;
+        return 0;
+}
+/*
+ * Mark a range of blocks as belonging to the "system zone" --- that
+ * is, filesystem metadata blocks which should never be used by
+ * inodes.
+ */
+static int add_system_zone(struct ext4_sb_info *sbi,
+                           ext4_fsblk_t start_blk,
+                           unsigned int count)
+{
+        struct ext4_system_zone *new_entry = NULL, *entry;
+        struct rb_node **n = &sbi->system_blks.rb_node, *node;
+        struct rb_node *parent = NULL, *new_node = NULL;
+        while (*n) {
+                parent = *n;
+                entry = rb_entry(parent, struct ext4_system_zone, node);
+                if (start_blk < entry->start_blk)
+                        n = &(*n)->rb_left;
+                else if (start_blk >= (entry->start_blk + entry->count))
+                        n = &(*n)->rb_right;
+                else {
+                        if (start_blk + count > (entry->start_blk + 
+                                                 entry->count))
+                                entry->count = (start_blk + count - 
+                                                entry->start_blk);
+                        new_node = *n;
+                        new_entry = rb_entry(new_node, struct ext4_system_zone,
+                                             node);
+                        break;
+                }
+        }
+        if (!new_entry) {
+                new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
+                                             GFP_KERNEL);
+                if (!new_entry)
+                        return -ENOMEM;
+                new_entry->start_blk = start_blk;
+                new_entry->count = count;
+                new_node = &new_entry->node;
+                rb_link_node(new_node, parent, n);
+                rb_insert_color(new_node, &sbi->system_blks);
+        }
+        /* Can we merge to the left? */
+        node = rb_prev(new_node);
+        if (node) {
+                entry = rb_entry(node, struct ext4_system_zone, node);
+                if (can_merge(entry, new_entry)) {
+                        new_entry->start_blk = entry->start_blk;
+                        new_entry->count += entry->count;
+                        rb_erase(node, &sbi->system_blks);
+                        kmem_cache_free(ext4_system_zone_cachep, entry);
+                }
+        }
+        /* Can we merge to the right? */
+        node = rb_next(new_node);
+        if (node) {
+                entry = rb_entry(node, struct ext4_system_zone, node);
+                if (can_merge(new_entry, entry)) {
+                        new_entry->count += entry->count;
+                        rb_erase(node, &sbi->system_blks);
+                        kmem_cache_free(ext4_system_zone_cachep, entry);
+                }
+        }
+        return 0;
+}
+static void debug_print_tree(struct ext4_sb_info *sbi)
+{
+        struct rb_node *node;
+        struct ext4_system_zone *entry;
+        int first = 1;
+        printk(KERN_INFO "System zones: ");
+        node = rb_first(&sbi->system_blks);
+        while (node) {
+                entry = rb_entry(node, struct ext4_system_zone, node);
+                printk("%s%llu-%llu", first ? "" : ", ",
+                       entry->start_blk, entry->start_blk + entry->count - 1);
+                first = 0;
+                node = rb_next(node);
+        }
+        printk("\n");
+}
+int ext4_setup_system_zone(struct super_block *sb)
+{
+        ext4_group_t ngroups = ext4_get_groups_count(sb);
+        struct ext4_sb_info *sbi = EXT4_SB(sb);
+        struct ext4_group_desc *gdp;
+        ext4_group_t i;
+        int flex_size = ext4_flex_bg_size(sbi);
+        int ret;
+        if (!test_opt(sb, BLOCK_VALIDITY)) {
+                if (EXT4_SB(sb)->system_blks.rb_node)
+                        ext4_release_system_zone(sb);
+                return 0;
+        }
+        if (EXT4_SB(sb)->system_blks.rb_node)
+                return 0;
+        for (i=0; i < ngroups; i++) {
+                if (ext4_bg_has_super(sb, i) &&
+                    ((i < 5) || ((i % flex_size) == 0)))
+                        add_system_zone(sbi, ext4_group_first_block_no(sb, i),
+                                        sbi->s_gdb_count + 1);
+                gdp = ext4_get_group_desc(sb, i, NULL);
+                ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
+                if (ret)
+                        return ret;
+                ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1);
+                if (ret)
+                        return ret;
+                ret = add_system_zone(sbi, ext4_inode_table(sb, gdp),
+                                sbi->s_itb_per_group);
+                if (ret)
+                        return ret;
+        }
+        if (test_opt(sb, DEBUG))
+                debug_print_tree(EXT4_SB(sb));
+        return 0;
+}
+/* Called when the filesystem is unmounted */
+void ext4_release_system_zone(struct super_block *sb)
+{
+        struct rb_node  *n = EXT4_SB(sb)->system_blks.rb_node;
+        struct rb_node  *parent;
+        struct ext4_system_zone *entry;
+        while (n) {
+                /* Do the node's children first */
+                if (n->rb_left) {
+                        n = n->rb_left;
+                        continue;
+                }
+                if (n->rb_right) {
+                        n = n->rb_right;
+                        continue;
+                }
+                /*
+                 * The node has no children; free it, and then zero
+                 * out parent's link to it.  Finally go to the
+                 * beginning of the loop and try to free the parent
+                 * node.
+                 */
+                parent = rb_parent(n);
+                entry = rb_entry(n, struct ext4_system_zone, node);
+                kmem_cache_free(ext4_system_zone_cachep, entry);
+                if (!parent)
+                        EXT4_SB(sb)->system_blks.rb_node = NULL;
+                else if (parent->rb_left == n)
+                        parent->rb_left = NULL;
+                else if (parent->rb_right == n)
+                        parent->rb_right = NULL;
+                n = parent;
+        }
+        EXT4_SB(sb)->system_blks.rb_node = NULL;
+}
+/*
+ * Returns 1 if the passed-in block region (start_blk,
+ * start_blk+count) is valid; 0 if some part of the block region
+ * overlaps with filesystem metadata blocks.
+ */
+int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
+                          unsigned int count)
+{
+        struct ext4_system_zone *entry;
+        struct rb_node *n = sbi->system_blks.rb_node;
+        if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
+            (start_blk + count > ext4_blocks_count(sbi->s_es)))
+                return 0;
+        while (n) {
+                entry = rb_entry(n, struct ext4_system_zone, node);
+                if (start_blk + count - 1 < entry->start_blk)
+                        n = n->rb_left;
+                else if (start_blk >= (entry->start_blk + entry->count))
+                        n = n->rb_right;
+                else
+                        return 0;
+        }
+        return 1;
+}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index b64789929a65..9dc93168e262 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -131,8 +131,7 @@ static int ext4_readdir(struct file *filp,
                struct buffer_head *bh = NULL;
                map_bh.b_state = 0;
-                err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh,
+                err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0);
-                                                0, 0, 0);
                if (err > 0) {
                        pgoff_t index = map_bh.b_blocknr >>
                                        (PAGE_CACHE_SHIFT - inode->i_blkbits);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d0f15ef56de1..cc7d5edc38c9 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -21,7 +21,14 @@
 #include <linux/magic.h>
 #include <linux/jbd2.h>
 #include <linux/quota.h>
-#include "ext4_i.h"
+#include <linux/rwsem.h>
+#include <linux/rbtree.h>
+#include <linux/seqlock.h>
+#include <linux/mutex.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+#include <linux/blockgroup_lock.h>
+#include <linux/percpu_counter.h>
 /*
 * The fourth extended filesystem constants/structures
@@ -46,6 +53,19 @@
 #define ext4_debug(f, a...)     do {} while (0)
 #endif
+/* data type for block offset of block group */
+typedef int ext4_grpblk_t;
+/* data type for filesystem-wide blocks number */
+typedef unsigned long long ext4_fsblk_t;
+/* data type for file logical block number */
+typedef __u32 ext4_lblk_t;
+/* data type for block group number */
+typedef unsigned int ext4_group_t;
 /* prefer goal again. length */
 #define EXT4_MB_HINT_MERGE              1
 /* blocks already reserved */
@@ -179,9 +199,6 @@ struct flex_groups {
 #define EXT4_BG_BLOCK_UNINIT    0x0002 /* Block bitmap not in use */
 #define EXT4_BG_INODE_ZEROED    0x0004 /* On-disk itable initialized to zero */
-#ifdef __KERNEL__
-#include "ext4_sb.h"
-#endif
 /*
 * Macro-instructions used to manage group descriptors
 */
@@ -297,10 +314,23 @@ struct ext4_new_group_data {
 };
 /*
- * Following is used by preallocation code to tell get_blocks() that we
+ * Flags used by ext4_get_blocks()
- * want uninitialzed extents.
 */
-#define EXT4_CREATE_UNINITIALIZED_EXT           2
+        /* Allocate any needed blocks and/or convert an unitialized
+           extent to be an initialized ext4 */
+#define EXT4_GET_BLOCKS_CREATE                  0x0001
+        /* Request the creation of an unitialized extent */
+#define EXT4_GET_BLOCKS_UNINIT_EXT              0x0002
+#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT       (EXT4_GET_BLOCKS_UNINIT_EXT|\
+                                                 EXT4_GET_BLOCKS_CREATE)
+        /* Caller is from the delayed allocation writeout path,
+           so set the magic i_delalloc_reserve_flag after taking the 
+           inode allocation semaphore for */
+#define EXT4_GET_BLOCKS_DELALLOC_RESERVE        0x0004
+        /* Call ext4_da_update_reserve_space() after successfully 
+           allocating the blocks */
+#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE    0x0008
 /*
 * ioctl commands
@@ -516,6 +546,110 @@ do {									       \
 #endif /* defined(__KERNEL__) || defined(__linux__) */
 /*
+ * storage for cached extent
+ */
+struct ext4_ext_cache {
+        ext4_fsblk_t    ec_start;
+        ext4_lblk_t     ec_block;
+        __u32           ec_len; /* must be 32bit to return holes */
+        __u32           ec_type;
+};
+/*
+ * fourth extended file system inode data in memory
+ */
+struct ext4_inode_info {
+        __le32  i_data[15];     /* unconverted */
+        __u32   i_flags;
+        ext4_fsblk_t    i_file_acl;
+        __u32   i_dtime;
+        /*
+         * i_block_group is the number of the block group which contains
+         * this file's inode.  Constant across the lifetime of the inode,
+         * it is ued for making block allocation decisions - we try to
+         * place a file's data blocks near its inode block, and new inodes
+         * near to their parent directory's inode.
+         */
+        ext4_group_t    i_block_group;
+        __u32   i_state;                /* Dynamic state flags for ext4 */
+        ext4_lblk_t             i_dir_start_lookup;
+#ifdef CONFIG_EXT4_FS_XATTR
+        /*
+         * Extended attributes can be read independently of the main file
+         * data. Taking i_mutex even when reading would cause contention
+         * between readers of EAs and writers of regular file data, so
+         * instead we synchronize on xattr_sem when reading or changing
+         * EAs.
+         */
+        struct rw_semaphore xattr_sem;
+#endif
+#ifdef CONFIG_EXT4_FS_POSIX_ACL
+        struct posix_acl        *i_acl;
+        struct posix_acl        *i_default_acl;
+#endif
+        struct list_head i_orphan;      /* unlinked but open inodes */
+        /*
+         * i_disksize keeps track of what the inode size is ON DISK, not
+         * in memory.  During truncate, i_size is set to the new size by
+         * the VFS prior to calling ext4_truncate(), but the filesystem won't
+         * set i_disksize to 0 until the truncate is actually under way.
+         *
+         * The intent is that i_disksize always represents the blocks which
+         * are used by this file.  This allows recovery to restart truncate
+         * on orphans if we crash during truncate.  We actually write i_disksize
+         * into the on-disk inode when writing inodes out, instead of i_size.
+         *
+         * The only time when i_disksize and i_size may be different is when
+         * a truncate is in progress.  The only things which change i_disksize
+         * are ext4_get_block (growth) and ext4_truncate (shrinkth).
+         */
+        loff_t  i_disksize;
+        /*
+         * i_data_sem is for serialising ext4_truncate() against
+         * ext4_getblock().  In the 2.4 ext2 design, great chunks of inode's
+         * data tree are chopped off during truncate. We can't do that in
+         * ext4 because whenever we perform intermediate commits during
+         * truncate, the inode and all the metadata blocks *must* be in a
+         * consistent state which allows truncation of the orphans to restart
+         * during recovery.  Hence we must fix the get_block-vs-truncate race
+         * by other means, so we have i_data_sem.
+         */
+        struct rw_semaphore i_data_sem;
+        struct inode vfs_inode;
+        struct jbd2_inode jinode;
+        struct ext4_ext_cache i_cached_extent;
+        /*
+         * File creation time. Its function is same as that of
+         * struct timespec i_{a,c,m}time in the generic inode.
+         */
+        struct timespec i_crtime;
+        /* mballoc */
+        struct list_head i_prealloc_list;
+        spinlock_t i_prealloc_lock;
+        /* ialloc */
+        ext4_group_t    i_last_alloc_group;
+        /* allocation reservation info for delalloc */
+        unsigned int i_reserved_data_blocks;
+        unsigned int i_reserved_meta_blocks;
+        unsigned int i_allocated_meta_blocks;
+        unsigned short i_delalloc_reserved_flag;
+        /* on-disk additional length */
+        __u16 i_extra_isize;
+        spinlock_t i_block_reservation_lock;
+};
+/*
 * File system states
 */
 #define EXT4_VALID_FS                   0x0001  /* Unmounted cleanly */
@@ -560,6 +694,7 @@ do {									       \
 #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
 #define EXT4_MOUNT_DELALLOC             0x8000000 /* Delalloc support */
 #define EXT4_MOUNT_DATA_ERR_ABORT       0x10000000 /* Abort on file data write */
+#define EXT4_MOUNT_BLOCK_VALIDITY       0x20000000 /* Block validity checking */
 /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H
@@ -689,6 +824,137 @@ struct ext4_super_block {
 };
 #ifdef __KERNEL__
+/*
+ * fourth extended-fs super-block data in memory
+ */
+struct ext4_sb_info {
+        unsigned long s_desc_size;      /* Size of a group descriptor in bytes */
+        unsigned long s_inodes_per_block;/* Number of inodes per block */
+        unsigned long s_blocks_per_group;/* Number of blocks in a group */
+        unsigned long s_inodes_per_group;/* Number of inodes in a group */
+        unsigned long s_itb_per_group;  /* Number of inode table blocks per group */
+        unsigned long s_gdb_count;      /* Number of group descriptor blocks */
+        unsigned long s_desc_per_block; /* Number of group descriptors per block */
+        ext4_group_t s_groups_count;    /* Number of groups in the fs */
+        unsigned long s_overhead_last;  /* Last calculated overhead */
+        unsigned long s_blocks_last;    /* Last seen block count */
+        loff_t s_bitmap_maxbytes;       /* max bytes for bitmap files */
+        struct buffer_head * s_sbh;     /* Buffer containing the super block */
+        struct ext4_super_block *s_es;  /* Pointer to the super block in the buffer */
+        struct buffer_head **s_group_desc;
+        unsigned long  s_mount_opt;
+        ext4_fsblk_t s_sb_block;
+        uid_t s_resuid;
+        gid_t s_resgid;
+        unsigned short s_mount_state;
+        unsigned short s_pad;
+        int s_addr_per_block_bits;
+        int s_desc_per_block_bits;
+        int s_inode_size;
+        int s_first_ino;
+        unsigned int s_inode_readahead_blks;
+        spinlock_t s_next_gen_lock;
+        u32 s_next_generation;
+        u32 s_hash_seed[4];
+        int s_def_hash_version;
+        int s_hash_unsigned;    /* 3 if hash should be signed, 0 if not */
+        struct percpu_counter s_freeblocks_counter;
+        struct percpu_counter s_freeinodes_counter;
+        struct percpu_counter s_dirs_counter;
+        struct percpu_counter s_dirtyblocks_counter;
+        struct blockgroup_lock *s_blockgroup_lock;
+        struct proc_dir_entry *s_proc;
+        struct kobject s_kobj;
+        struct completion s_kobj_unregister;
+        /* Journaling */
+        struct inode *s_journal_inode;
+        struct journal_s *s_journal;
+        struct list_head s_orphan;
+        struct mutex s_orphan_lock;
+        struct mutex s_resize_lock;
+        unsigned long s_commit_interval;
+        u32 s_max_batch_time;
+        u32 s_min_batch_time;
+        struct block_device *journal_bdev;
+#ifdef CONFIG_JBD2_DEBUG
+        struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
+        wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
+#endif
+#ifdef CONFIG_QUOTA
+        char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
+        int s_jquota_fmt;                       /* Format of quota to use */
+#endif
+        unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
+        struct rb_root system_blks;
+#ifdef EXTENTS_STATS
+        /* ext4 extents stats */
+        unsigned long s_ext_min;
+        unsigned long s_ext_max;
+        unsigned long s_depth_max;
+        spinlock_t s_ext_stats_lock;
+        unsigned long s_ext_blocks;
+        unsigned long s_ext_extents;
+#endif
+        /* for buddy allocator */
+        struct ext4_group_info ***s_group_info;
+        struct inode *s_buddy_cache;
+        long s_blocks_reserved;
+        spinlock_t s_reserve_lock;
+        spinlock_t s_md_lock;
+        tid_t s_last_transaction;
+        unsigned short *s_mb_offsets;
+        unsigned int *s_mb_maxs;
+        /* tunables */
+        unsigned long s_stripe;
+        unsigned int s_mb_stream_request;
+        unsigned int s_mb_max_to_scan;
+        unsigned int s_mb_min_to_scan;
+        unsigned int s_mb_stats;
+        unsigned int s_mb_order2_reqs;
+        unsigned int s_mb_group_prealloc;
+        /* where last allocation was done - for stream allocation */
+        unsigned long s_mb_last_group;
+        unsigned long s_mb_last_start;
+        /* history to debug policy */
+        struct ext4_mb_history *s_mb_history;
+        int s_mb_history_cur;
+        int s_mb_history_max;
+        int s_mb_history_num;
+        spinlock_t s_mb_history_lock;
+        int s_mb_history_filter;
+        /* stats for buddy allocator */
+        spinlock_t s_mb_pa_lock;
+        atomic_t s_bal_reqs;    /* number of reqs with len > 1 */
+        atomic_t s_bal_success; /* we found long enough chunks */
+        atomic_t s_bal_allocated;       /* in blocks */
+        atomic_t s_bal_ex_scanned;      /* total extents scanned */
+        atomic_t s_bal_goals;   /* goal hits */
+        atomic_t s_bal_breaks;  /* too long searches */
+        atomic_t s_bal_2orders; /* 2^order hits */
+        spinlock_t s_bal_lock;
+        unsigned long s_mb_buddies_generated;
+        unsigned long long s_mb_generation_time;
+        atomic_t s_mb_lost_chunks;
+        atomic_t s_mb_preallocated;
+        atomic_t s_mb_discarded;
+        /* locality groups */
+        struct ext4_locality_group *s_locality_groups;
+        /* for write statistics */
+        unsigned long s_sectors_written_start;
+        u64 s_kbytes_written;
+        unsigned int s_log_groups_per_flex;
+        struct flex_groups *s_flex_groups;
+};
 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
 {
        return sb->s_fs_info;
@@ -704,7 +970,6 @@ static inline struct timespec ext4_current_time(struct inode *inode)
                current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
 }
 static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 {
        return ino == EXT4_ROOT_INO ||
@@ -1014,6 +1279,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
                                                    ext4_group_t block_group,
                                                    struct buffer_head ** bh);
 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
+struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
+                                      ext4_group_t block_group);
+extern unsigned ext4_init_block_bitmap(struct super_block *sb,
+                                       struct buffer_head *bh,
+                                       ext4_group_t group,
+                                       struct ext4_group_desc *desc);
+#define ext4_free_blocks_after_init(sb, group, desc)                    \
+                ext4_init_block_bitmap(sb, NULL, group, desc)
 /* dir.c */
 extern int ext4_check_dir_entry(const char *, struct inode *,
@@ -1038,6 +1311,11 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
 extern unsigned long ext4_count_free_inodes(struct super_block *);
 extern unsigned long ext4_count_dirs(struct super_block *);
 extern void ext4_check_inodes_bitmap(struct super_block *);
+extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
+                                       struct buffer_head *bh,
+                                       ext4_group_t group,
+                                       struct ext4_group_desc *desc);
+extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
 /* mballoc.c */
 extern long ext4_mb_stats;
@@ -1123,6 +1401,8 @@ extern void ext4_abort(struct super_block *, const char *, const char *, ...)
        __attribute__ ((format (printf, 3, 4)));
 extern void ext4_warning(struct super_block *, const char *, const char *, ...)
        __attribute__ ((format (printf, 3, 4)));
+extern void ext4_msg(struct super_block *, const char *, const char *, ...)
+        __attribute__ ((format (printf, 3, 4)));
 extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
                                const char *, const char *, ...)
        __attribute__ ((format (printf, 4, 5)));
@@ -1161,6 +1441,10 @@ extern void ext4_used_dirs_set(struct super_block *sb,
                                struct ext4_group_desc *bg, __u32 count);
 extern void ext4_itable_unused_set(struct super_block *sb,
                                   struct ext4_group_desc *bg, __u32 count);
+extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
+                                   struct ext4_group_desc *gdp);
+extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
+                                       struct ext4_group_desc *gdp);
 static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
 {
@@ -1228,6 +1512,18 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
         return grp_info[indexv][indexh];
 }
+/*
+ * Reading s_groups_count requires using smp_rmb() afterwards.  See
+ * the locking protocol documented in the comments of ext4_group_add()
+ * in resize.c
+ */
+static inline ext4_group_t ext4_get_groups_count(struct super_block *sb)
+{
+        ext4_group_t    ngroups = EXT4_SB(sb)->s_groups_count;
+        smp_rmb();
+        return ngroups;
+}
 static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi,
                                             ext4_group_t block_group)
@@ -1283,33 +1579,25 @@ struct ext4_group_info {
 };
 #define EXT4_GROUP_INFO_NEED_INIT_BIT   0
-#define EXT4_GROUP_INFO_LOCKED_BIT      1
 #define EXT4_MB_GRP_NEED_INIT(grp)      \
        (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
-static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
+static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb,
+                                              ext4_group_t group)
 {
-        struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+        return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group);
-        bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
 }
-static inline void ext4_unlock_group(struct super_block *sb,
+static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
-                                        ext4_group_t group)
 {
-        struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+        spin_lock(ext4_group_lock_ptr(sb, group));
-        bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
 }
-static inline int ext4_is_group_locked(struct super_block *sb,
+static inline void ext4_unlock_group(struct super_block *sb,
                                        ext4_group_t group)
 {
-        struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+        spin_unlock(ext4_group_lock_ptr(sb, group));
-        return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
-                                                &(grinfo->bb_state));
 }
 /*
@@ -1326,11 +1614,21 @@ extern const struct file_operations ext4_file_operations;
 /* namei.c */
 extern const struct inode_operations ext4_dir_inode_operations;
 extern const struct inode_operations ext4_special_inode_operations;
+extern struct dentry *ext4_get_parent(struct dentry *child);
 /* symlink.c */
 extern const struct inode_operations ext4_symlink_inode_operations;
 extern const struct inode_operations ext4_fast_symlink_inode_operations;
+/* block_validity */
+extern void ext4_release_system_zone(struct super_block *sb);
+extern int ext4_setup_system_zone(struct super_block *sb);
+extern int __init init_ext4_system_zone(void);
+extern void exit_ext4_system_zone(void);
+extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
+                                 ext4_fsblk_t start_blk,
+                                 unsigned int count);
 /* extents.c */
 extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
 extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
@@ -1338,17 +1636,15 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
                                       int chunk);
 extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                               ext4_lblk_t iblock, unsigned int max_blocks,
-                               struct buffer_head *bh_result,
+                               struct buffer_head *bh_result, int flags);
-                               int create, int extend_disksize);
 extern void ext4_ext_truncate(struct inode *);
 extern void ext4_ext_init(struct super_block *);
 extern void ext4_ext_release(struct super_block *);
 extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
                          loff_t len);
-extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
+extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
-                        sector_t block, unsigned int max_blocks,
+                           sector_t block, unsigned int max_blocks,
-                        struct buffer_head *bh, int create,
+                           struct buffer_head *bh, int flags);
-                        int extend_disksize, int flag);
 extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        __u64 start, __u64 len);
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
deleted file mode 100644
index 4ce2187123aa..000000000000
--- a/fs/ext4/ext4_i.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- *  ext4_i.h
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- *  from
- *
- *  linux/include/linux/minix_fs_i.h
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- */
-#ifndef _EXT4_I
-#define _EXT4_I
-#include <linux/rwsem.h>
-#include <linux/rbtree.h>
-#include <linux/seqlock.h>
-#include <linux/mutex.h>
-/* data type for block offset of block group */
-typedef int ext4_grpblk_t;
-/* data type for filesystem-wide blocks number */
-typedef unsigned long long ext4_fsblk_t;
-/* data type for file logical block number */
-typedef __u32 ext4_lblk_t;
-/* data type for block group number */
-typedef unsigned int ext4_group_t;
-/*
- * storage for cached extent
- */
-struct ext4_ext_cache {
-        ext4_fsblk_t    ec_start;
-        ext4_lblk_t     ec_block;
-        __u32           ec_len; /* must be 32bit to return holes */
-        __u32           ec_type;
-};
-/*
- * fourth extended file system inode data in memory
- */
-struct ext4_inode_info {
-        __le32  i_data[15];     /* unconverted */
-        __u32   i_flags;
-        ext4_fsblk_t    i_file_acl;
-        __u32   i_dtime;
-        /*
-         * i_block_group is the number of the block group which contains
-         * this file's inode.  Constant across the lifetime of the inode,
-         * it is ued for making block allocation decisions - we try to
-         * place a file's data blocks near its inode block, and new inodes
-         * near to their parent directory's inode.
-         */
-        ext4_group_t    i_block_group;
-        __u32   i_state;                /* Dynamic state flags for ext4 */
-        ext4_lblk_t             i_dir_start_lookup;
-#ifdef CONFIG_EXT4_FS_XATTR
-        /*
-         * Extended attributes can be read independently of the main file
-         * data. Taking i_mutex even when reading would cause contention
-         * between readers of EAs and writers of regular file data, so
-         * instead we synchronize on xattr_sem when reading or changing
-         * EAs.
-         */
-        struct rw_semaphore xattr_sem;
-#endif
-#ifdef CONFIG_EXT4_FS_POSIX_ACL
-        struct posix_acl        *i_acl;
-        struct posix_acl        *i_default_acl;
-#endif
-        struct list_head i_orphan;      /* unlinked but open inodes */
-        /*
-         * i_disksize keeps track of what the inode size is ON DISK, not
-         * in memory.  During truncate, i_size is set to the new size by
-         * the VFS prior to calling ext4_truncate(), but the filesystem won't
-         * set i_disksize to 0 until the truncate is actually under way.
-         *
-         * The intent is that i_disksize always represents the blocks which
-         * are used by this file.  This allows recovery to restart truncate
-         * on orphans if we crash during truncate.  We actually write i_disksize
-         * into the on-disk inode when writing inodes out, instead of i_size.
-         *
-         * The only time when i_disksize and i_size may be different is when
-         * a truncate is in progress.  The only things which change i_disksize
-         * are ext4_get_block (growth) and ext4_truncate (shrinkth).
-         */
-        loff_t  i_disksize;
-        /*
-         * i_data_sem is for serialising ext4_truncate() against
-         * ext4_getblock().  In the 2.4 ext2 design, great chunks of inode's
-         * data tree are chopped off during truncate. We can't do that in
-         * ext4 because whenever we perform intermediate commits during
-         * truncate, the inode and all the metadata blocks *must* be in a
-         * consistent state which allows truncation of the orphans to restart
-         * during recovery.  Hence we must fix the get_block-vs-truncate race
-         * by other means, so we have i_data_sem.
-         */
-        struct rw_semaphore i_data_sem;
-        struct inode vfs_inode;
-        struct jbd2_inode jinode;
-        struct ext4_ext_cache i_cached_extent;
-        /*
-         * File creation time. Its function is same as that of
-         * struct timespec i_{a,c,m}time in the generic inode.
-         */
-        struct timespec i_crtime;
-        /* mballoc */
-        struct list_head i_prealloc_list;
-        spinlock_t i_prealloc_lock;
-        /* ialloc */
-        ext4_group_t    i_last_alloc_group;
-        /* allocation reservation info for delalloc */
-        unsigned int i_reserved_data_blocks;
-        unsigned int i_reserved_meta_blocks;
-        unsigned int i_allocated_meta_blocks;
-        unsigned short i_delalloc_reserved_flag;
-        /* on-disk additional length */
-        __u16 i_extra_isize;
-        spinlock_t i_block_reservation_lock;
-};
-#endif  /* _EXT4_I */
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
deleted file mode 100644
index 57b71fefbccf..000000000000
--- a/fs/ext4/ext4_sb.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- *  ext4_sb.h
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- *  from
- *
- *  linux/include/linux/minix_fs_sb.h
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- */
-#ifndef _EXT4_SB
-#define _EXT4_SB
-#ifdef __KERNEL__
-#include <linux/timer.h>
-#include <linux/wait.h>
-#include <linux/blockgroup_lock.h>
-#include <linux/percpu_counter.h>
-#endif
-#include <linux/rbtree.h>
-/*
- * fourth extended-fs super-block data in memory
- */
-struct ext4_sb_info {
-        unsigned long s_desc_size;      /* Size of a group descriptor in bytes */
-        unsigned long s_inodes_per_block;/* Number of inodes per block */
-        unsigned long s_blocks_per_group;/* Number of blocks in a group */
-        unsigned long s_inodes_per_group;/* Number of inodes in a group */
-        unsigned long s_itb_per_group;  /* Number of inode table blocks per group */
-        unsigned long s_gdb_count;      /* Number of group descriptor blocks */
-        unsigned long s_desc_per_block; /* Number of group descriptors per block */
-        ext4_group_t s_groups_count;    /* Number of groups in the fs */
-        unsigned long s_overhead_last;  /* Last calculated overhead */
-        unsigned long s_blocks_last;    /* Last seen block count */
-        loff_t s_bitmap_maxbytes;       /* max bytes for bitmap files */
-        struct buffer_head * s_sbh;     /* Buffer containing the super block */
-        struct ext4_super_block *s_es;  /* Pointer to the super block in the buffer */
-        struct buffer_head **s_group_desc;
-        unsigned long  s_mount_opt;
-        ext4_fsblk_t s_sb_block;
-        uid_t s_resuid;
-        gid_t s_resgid;
-        unsigned short s_mount_state;
-        unsigned short s_pad;
-        int s_addr_per_block_bits;
-        int s_desc_per_block_bits;
-        int s_inode_size;
-        int s_first_ino;
-        unsigned int s_inode_readahead_blks;
-        spinlock_t s_next_gen_lock;
-        u32 s_next_generation;
-        u32 s_hash_seed[4];
-        int s_def_hash_version;
-        int s_hash_unsigned;    /* 3 if hash should be signed, 0 if not */
-        struct percpu_counter s_freeblocks_counter;
-        struct percpu_counter s_freeinodes_counter;
-        struct percpu_counter s_dirs_counter;
-        struct percpu_counter s_dirtyblocks_counter;
-        struct blockgroup_lock *s_blockgroup_lock;
-        struct proc_dir_entry *s_proc;
-        struct kobject s_kobj;
-        struct completion s_kobj_unregister;
-        /* Journaling */
-        struct inode *s_journal_inode;
-        struct journal_s *s_journal;
-        struct list_head s_orphan;
-        unsigned long s_commit_interval;
-        u32 s_max_batch_time;
-        u32 s_min_batch_time;
-        struct block_device *journal_bdev;
-#ifdef CONFIG_JBD2_DEBUG
-        struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
-        wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
-#endif
-#ifdef CONFIG_QUOTA
-        char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
-        int s_jquota_fmt;                       /* Format of quota to use */
-#endif
-        unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
-#ifdef EXTENTS_STATS
-        /* ext4 extents stats */
-        unsigned long s_ext_min;
-        unsigned long s_ext_max;
-        unsigned long s_depth_max;
-        spinlock_t s_ext_stats_lock;
-        unsigned long s_ext_blocks;
-        unsigned long s_ext_extents;
-#endif
-        /* for buddy allocator */
-        struct ext4_group_info ***s_group_info;
-        struct inode *s_buddy_cache;
-        long s_blocks_reserved;
-        spinlock_t s_reserve_lock;
-        spinlock_t s_md_lock;
-        tid_t s_last_transaction;
-        unsigned short *s_mb_offsets;
-        unsigned int *s_mb_maxs;
-        /* tunables */
-        unsigned long s_stripe;
-        unsigned int s_mb_stream_request;
-        unsigned int s_mb_max_to_scan;
-        unsigned int s_mb_min_to_scan;
-        unsigned int s_mb_stats;
-        unsigned int s_mb_order2_reqs;
-        unsigned int s_mb_group_prealloc;
-        /* where last allocation was done - for stream allocation */
-        unsigned long s_mb_last_group;
-        unsigned long s_mb_last_start;
-        /* history to debug policy */
-        struct ext4_mb_history *s_mb_history;
-        int s_mb_history_cur;
-        int s_mb_history_max;
-        int s_mb_history_num;
-        spinlock_t s_mb_history_lock;
-        int s_mb_history_filter;
-        /* stats for buddy allocator */
-        spinlock_t s_mb_pa_lock;
-        atomic_t s_bal_reqs;    /* number of reqs with len > 1 */
-        atomic_t s_bal_success; /* we found long enough chunks */
-        atomic_t s_bal_allocated;       /* in blocks */
-        atomic_t s_bal_ex_scanned;      /* total extents scanned */
-        atomic_t s_bal_goals;   /* goal hits */
-        atomic_t s_bal_breaks;  /* too long searches */
-        atomic_t s_bal_2orders; /* 2^order hits */
-        spinlock_t s_bal_lock;
-        unsigned long s_mb_buddies_generated;
-        unsigned long long s_mb_generation_time;
-        atomic_t s_mb_lost_chunks;
-        atomic_t s_mb_preallocated;
-        atomic_t s_mb_discarded;
-        /* locality groups */
-        struct ext4_locality_group *s_locality_groups;
-        /* for write statistics */
-        unsigned long s_sectors_written_start;
-        u64 s_kbytes_written;
-        unsigned int s_log_groups_per_flex;
-        struct flex_groups *s_flex_groups;
-};
-static inline spinlock_t *
-sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group)
-{
-        return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
-}
-#endif  /* _EXT4_SB */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e3a55eb8b26a..2593f748c3a4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -326,32 +326,18 @@ ext4_ext_max_entries(struct inode *inode, int depth)
 static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
 {
-        ext4_fsblk_t block = ext_pblock(ext), valid_block;
+        ext4_fsblk_t block = ext_pblock(ext);
        int len = ext4_ext_get_actual_len(ext);
-        struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
-        valid_block = le32_to_cpu(es->s_first_data_block) +
+        return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
-                EXT4_SB(inode->i_sb)->s_gdb_count;
-        if (unlikely(block <= valid_block ||
-                     ((block + len) > ext4_blocks_count(es))))
-                return 0;
-        else
-                return 1;
 }
 static int ext4_valid_extent_idx(struct inode *inode,
                                struct ext4_extent_idx *ext_idx)
 {
-        ext4_fsblk_t block = idx_pblock(ext_idx), valid_block;
+        ext4_fsblk_t block = idx_pblock(ext_idx);
-        struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
-        valid_block = le32_to_cpu(es->s_first_data_block) +
+        return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
-                EXT4_SB(inode->i_sb)->s_gdb_count;
-        if (unlikely(block <= valid_block ||
-                     (block >= ext4_blocks_count(es))))
-                return 0;
-        else
-                return 1;
 }
 static int ext4_valid_extent_entries(struct inode *inode,
@@ -2097,12 +2083,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
        ex = EXT_LAST_EXTENT(eh);
        ex_ee_block = le32_to_cpu(ex->ee_block);
-        if (ext4_ext_is_uninitialized(ex))
-                uninitialized = 1;
        ex_ee_len = ext4_ext_get_actual_len(ex);
        while (ex >= EXT_FIRST_EXTENT(eh) &&
                        ex_ee_block + ex_ee_len > start) {
+                if (ext4_ext_is_uninitialized(ex))
+                        uninitialized = 1;
+                else
+                        uninitialized = 0;
                ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len);
                path[depth].p_ext = ex;
@@ -2784,7 +2774,7 @@ fix_extent_len:
 int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                        ext4_lblk_t iblock,
                        unsigned int max_blocks, struct buffer_head *bh_result,
-                        int create, int extend_disksize)
+                        int flags)
 {
        struct ext4_ext_path *path = NULL;
        struct ext4_extent_header *eh;
@@ -2793,7 +2783,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        int err = 0, depth, ret, cache_type;
        unsigned int allocated = 0;
        struct ext4_allocation_request ar;
-        loff_t disksize;
        __clear_bit(BH_New, &bh_result->b_state);
        ext_debug("blocks %u/%u requested for inode %u\n",
@@ -2803,7 +2792,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        cache_type = ext4_ext_in_cache(inode, iblock, &newex);
        if (cache_type) {
                if (cache_type == EXT4_EXT_CACHE_GAP) {
-                        if (!create) {
+                        if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
                                /*
                                 * block isn't allocated yet and
                                 * user doesn't want to allocate it
@@ -2869,9 +2858,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                                                        EXT4_EXT_CACHE_EXTENT);
                                goto out;
                        }
-                        if (create == EXT4_CREATE_UNINITIALIZED_EXT)
+                        if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)
                                goto out;
-                        if (!create) {
+                        if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
+                                if (allocated > max_blocks)
+                                        allocated = max_blocks;
                                /*
                                 * We have blocks reserved already.  We
                                 * return allocated blocks so that delalloc
@@ -2879,8 +2870,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                                 * the buffer head will be unmapped so that
                                 * a read from the block returns 0s.
                                 */
-                                if (allocated > max_blocks)
-                                        allocated = max_blocks;
                                set_buffer_unwritten(bh_result);
                                bh_result->b_bdev = inode->i_sb->s_bdev;
                                bh_result->b_blocknr = newblock;
@@ -2903,7 +2892,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
         * requested block isn't allocated yet;
         * we couldn't try to create block if create flag is zero
         */
-        if (!create) {
+        if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
                /*
                 * put just found gap into cache to speed up
                 * subsequent requests
@@ -2932,10 +2921,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
         * EXT_UNINIT_MAX_LEN.
         */
        if (max_blocks > EXT_INIT_MAX_LEN &&
-            create != EXT4_CREATE_UNINITIALIZED_EXT)
+            !(flags & EXT4_GET_BLOCKS_UNINIT_EXT))
                max_blocks = EXT_INIT_MAX_LEN;
        else if (max_blocks > EXT_UNINIT_MAX_LEN &&
-                 create == EXT4_CREATE_UNINITIALIZED_EXT)
+                 (flags & EXT4_GET_BLOCKS_UNINIT_EXT))
                max_blocks = EXT_UNINIT_MAX_LEN;
        /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
@@ -2966,7 +2955,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        /* try to insert new extent into found leaf and return */
        ext4_ext_store_pblock(&newex, newblock);
        newex.ee_len = cpu_to_le16(ar.len);
-        if (create == EXT4_CREATE_UNINITIALIZED_EXT)  /* Mark uninitialized */
+        if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)  /* Mark uninitialized */
                ext4_ext_mark_uninitialized(&newex);
        err = ext4_ext_insert_extent(handle, inode, path, &newex);
        if (err) {
@@ -2983,18 +2972,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        newblock = ext_pblock(&newex);
        allocated = ext4_ext_get_actual_len(&newex);
 outnew:
-        if (extend_disksize) {
-                disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits;
-                if (disksize > i_size_read(inode))
-                        disksize = i_size_read(inode);
-                if (disksize > EXT4_I(inode)->i_disksize)
-                        EXT4_I(inode)->i_disksize = disksize;
-        }
        set_buffer_new(bh_result);
        /* Cache only when it is _not_ an uninitialized extent */
-        if (create != EXT4_CREATE_UNINITIALIZED_EXT)
+        if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
                ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
                                                EXT4_EXT_CACHE_EXTENT);
 out:
@@ -3150,9 +3131,10 @@ retry:
                        ret = PTR_ERR(handle);
                        break;
                }
-                ret = ext4_get_blocks_wrap(handle, inode, block,
+                map_bh.b_state = 0;
-                                          max_blocks, &map_bh,
+                ret = ext4_get_blocks(handle, inode, block,
-                                          EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
+                                      max_blocks, &map_bh,
+                                      EXT4_GET_BLOCKS_CREATE_UNINIT_EXT);
                if (ret <= 0) {
 #ifdef EXT4FS_DEBUG
                        WARN_ON(ret <= 0);
@@ -3195,7 +3177,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
                       void *data)
 {
        struct fiemap_extent_info *fieinfo = data;
-        unsigned long blksize_bits = inode->i_sb->s_blocksize_bits;
+        unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
        __u64   logical;
        __u64   physical;
        __u64   length;
@@ -3242,9 +3224,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
         *
         * XXX this might miss a single-block extent at EXT_MAX_BLOCK
         */
-        if (logical + length - 1 == EXT_MAX_BLOCK ||
+        if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
-            ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK)
+            newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
+                loff_t size = i_size_read(inode);
+                loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
                flags |= FIEMAP_EXTENT_LAST;
+                if ((flags & FIEMAP_EXTENT_DELALLOC) &&
+                    logical+length > size)
+                        length = (size - logical + bs - 1) & ~(bs-1);
+        }
        error = fiemap_fill_next_extent(fieinfo, logical, physical,
                                        length, flags);
@@ -3318,10 +3307,10 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                 * Walk the extent tree gathering extent information.
                 * ext4_ext_fiemap_cb will push extents back to user.
                 */
-                down_write(&EXT4_I(inode)->i_data_sem);
+                down_read(&EXT4_I(inode)->i_data_sem);
                error = ext4_ext_walk_space(inode, start_blk, len_blks,
                                          ext4_ext_fiemap_cb, fieinfo);
-                up_write(&EXT4_I(inode)->i_data_sem);
+                up_read(&EXT4_I(inode)->i_data_sem);
        }
        return error;
diff --git a/fs/ext4/group.h b/fs/ext4/group.h
deleted file mode 100644
index c2c0a8d06d0e..000000000000
--- a/fs/ext4/group.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- *  linux/fs/ext4/group.h
- *
- * Copyright (C) 2007 Cluster File Systems, Inc
- *
- * Author: Andreas Dilger <adilger@clusterfs.com>
- */
-#ifndef _LINUX_EXT4_GROUP_H
-#define _LINUX_EXT4_GROUP_H
-extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
-                                   struct ext4_group_desc *gdp);
-extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
-                                       struct ext4_group_desc *gdp);
-struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
-                                      ext4_group_t block_group);
-extern unsigned ext4_init_block_bitmap(struct super_block *sb,
-                                       struct buffer_head *bh,
-                                       ext4_group_t group,
-                                       struct ext4_group_desc *desc);
-#define ext4_free_blocks_after_init(sb, group, desc)                    \
-                ext4_init_block_bitmap(sb, NULL, group, desc)
-extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
-                                       struct buffer_head *bh,
-                                       ext4_group_t group,
-                                       struct ext4_group_desc *desc);
-extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
-#endif /* _LINUX_EXT4_GROUP_H */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f18e0a08a6b5..3743bd849bce 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -27,7 +27,6 @@
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
-#include "group.h"
 /*
 * ialloc.c contains the inodes allocation and deallocation routines
@@ -123,16 +122,16 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
                unlock_buffer(bh);
                return bh;
        }
-        spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
+        ext4_lock_group(sb, block_group);
        if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
                ext4_init_inode_bitmap(sb, bh, block_group, desc);
                set_bitmap_uptodate(bh);
                set_buffer_uptodate(bh);
-                spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+                ext4_unlock_group(sb, block_group);
                unlock_buffer(bh);
                return bh;
        }
-        spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+        ext4_unlock_group(sb, block_group);
        if (buffer_uptodate(bh)) {
                /*
                 * if not uninit if bh is uptodate,
@@ -247,9 +246,8 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
                goto error_return;
        /* Ok, now we can actually update the inode bitmaps.. */
-        spin_lock(sb_bgl_lock(sbi, block_group));
+        cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
-        cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
+                                        bit, bitmap_bh->b_data);
-        spin_unlock(sb_bgl_lock(sbi, block_group));
        if (!cleared)
                ext4_error(sb, "ext4_free_inode",
                           "bit already cleared for inode %lu", ino);
@@ -261,7 +259,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
                if (fatal) goto error_return;
                if (gdp) {
-                        spin_lock(sb_bgl_lock(sbi, block_group));
+                        ext4_lock_group(sb, block_group);
                        count = ext4_free_inodes_count(sb, gdp) + 1;
                        ext4_free_inodes_set(sb, gdp, count);
                        if (is_directory) {
@@ -277,7 +275,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
                        }
                        gdp->bg_checksum = ext4_group_desc_csum(sbi,
                                                        block_group, gdp);
-                        spin_unlock(sb_bgl_lock(sbi, block_group));
+                        ext4_unlock_group(sb, block_group);
                        percpu_counter_inc(&sbi->s_freeinodes_counter);
                        if (is_directory)
                                percpu_counter_dec(&sbi->s_dirs_counter);
@@ -316,7 +314,7 @@ error_return:
 static int find_group_dir(struct super_block *sb, struct inode *parent,
                                ext4_group_t *best_group)
 {
-        ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+        ext4_group_t ngroups = ext4_get_groups_count(sb);
        unsigned int freei, avefreei;
        struct ext4_group_desc *desc, *best_desc = NULL;
        ext4_group_t group;
@@ -349,11 +347,10 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct ext4_group_desc *desc;
-        struct buffer_head *bh;
        struct flex_groups *flex_group = sbi->s_flex_groups;
        ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
        ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
-        ext4_group_t ngroups = sbi->s_groups_count;
+        ext4_group_t ngroups = ext4_get_groups_count(sb);
        int flex_size = ext4_flex_bg_size(sbi);
        ext4_group_t best_flex = parent_fbg_group;
        int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
@@ -362,7 +359,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
        ext4_group_t n_fbg_groups;
        ext4_group_t i;
-        n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >>
+        n_fbg_groups = (ngroups + flex_size - 1) >>
                sbi->s_log_groups_per_flex;
 find_close_to_parent:
@@ -404,7 +401,7 @@ find_close_to_parent:
 found_flexbg:
        for (i = best_flex * flex_size; i < ngroups &&
                     i < (best_flex + 1) * flex_size; i++) {
-                desc = ext4_get_group_desc(sb, i, &bh);
+                desc = ext4_get_group_desc(sb, i, NULL);
                if (ext4_free_inodes_count(sb, desc)) {
                        *best_group = i;
                        goto out;
@@ -478,20 +475,21 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
 {
        ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
-        ext4_group_t ngroups = sbi->s_groups_count;
+        ext4_group_t real_ngroups = ext4_get_groups_count(sb);
        int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
        unsigned int freei, avefreei;
        ext4_fsblk_t freeb, avefreeb;
        unsigned int ndirs;
        int max_dirs, min_inodes;
        ext4_grpblk_t min_blocks;
-        ext4_group_t i, grp, g;
+        ext4_group_t i, grp, g, ngroups;
        struct ext4_group_desc *desc;
        struct orlov_stats stats;
        int flex_size = ext4_flex_bg_size(sbi);
+        ngroups = real_ngroups;
        if (flex_size > 1) {
-                ngroups = (ngroups + flex_size - 1) >>
+                ngroups = (real_ngroups + flex_size - 1) >>
                        sbi->s_log_groups_per_flex;
                parent_group >>= sbi->s_log_groups_per_flex;
        }
@@ -543,7 +541,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
                 */
                grp *= flex_size;
                for (i = 0; i < flex_size; i++) {
-                        if (grp+i >= sbi->s_groups_count)
+                        if (grp+i >= real_ngroups)
                                break;
                        desc = ext4_get_group_desc(sb, grp+i, NULL);
                        if (desc && ext4_free_inodes_count(sb, desc)) {
@@ -583,7 +581,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
        }
 fallback:
-        ngroups = sbi->s_groups_count;
+        ngroups = real_ngroups;
        avefreei = freei / ngroups;
 fallback_retry:
        parent_group = EXT4_I(parent)->i_block_group;
@@ -613,9 +611,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
                            ext4_group_t *group, int mode)
 {
        ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
-        ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+        ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
        struct ext4_group_desc *desc;
-        ext4_group_t i, last;
        int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
        /*
@@ -708,10 +705,10 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
 /*
 * claim the inode from the inode bitmap. If the group
- * is uninit we need to take the groups's sb_bgl_lock
+ * is uninit we need to take the groups's ext4_group_lock
 * and clear the uninit flag. The inode bitmap update
 * and group desc uninit flag clear should be done
- * after holding sb_bgl_lock so that ext4_read_inode_bitmap
+ * after holding ext4_group_lock so that ext4_read_inode_bitmap
 * doesn't race with the ext4_claim_inode
 */
 static int ext4_claim_inode(struct super_block *sb,
@@ -722,7 +719,7 @@ static int ext4_claim_inode(struct super_block *sb,
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
-        spin_lock(sb_bgl_lock(sbi, group));
+        ext4_lock_group(sb, group);
        if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
                /* not a free inode */
                retval = 1;
@@ -731,7 +728,7 @@ static int ext4_claim_inode(struct super_block *sb,
        ino++;
        if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
                        ino > EXT4_INODES_PER_GROUP(sb)) {
-                spin_unlock(sb_bgl_lock(sbi, group));
+                ext4_unlock_group(sb, group);
                ext4_error(sb, __func__,
                           "reserved inode or inode > inodes count - "
                           "block_group = %u, inode=%lu", group,
@@ -780,7 +777,7 @@ static int ext4_claim_inode(struct super_block *sb,
        }
        gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
 err_ret:
-        spin_unlock(sb_bgl_lock(sbi, group));
+        ext4_unlock_group(sb, group);
        return retval;
 }
@@ -799,11 +796,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
        struct super_block *sb;
        struct buffer_head *inode_bitmap_bh = NULL;
        struct buffer_head *group_desc_bh;
-        ext4_group_t group = 0;
+        ext4_group_t ngroups, group = 0;
        unsigned long ino = 0;
        struct inode *inode;
        struct ext4_group_desc *gdp = NULL;
-        struct ext4_super_block *es;
        struct ext4_inode_info *ei;
        struct ext4_sb_info *sbi;
        int ret2, err = 0;
@@ -818,15 +814,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
                return ERR_PTR(-EPERM);
        sb = dir->i_sb;
+        ngroups = ext4_get_groups_count(sb);
        trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id,
                   dir->i_ino, mode);
        inode = new_inode(sb);
        if (!inode)
                return ERR_PTR(-ENOMEM);
        ei = EXT4_I(inode);
        sbi = EXT4_SB(sb);
-        es = sbi->s_es;
        if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
                ret2 = find_group_flex(sb, dir, &group);
@@ -856,7 +851,7 @@ got_group:
        if (ret2 == -1)
                goto out;
-        for (i = 0; i < sbi->s_groups_count; i++) {
+        for (i = 0; i < ngroups; i++) {
                err = -EIO;
                gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
@@ -917,7 +912,7 @@ repeat_in_this_group:
                 * group descriptor metadata has not yet been updated.
                 * So we just go onto the next blockgroup.
                 */
-                if (++group == sbi->s_groups_count)
+                if (++group == ngroups)
                        group = 0;
        }
        err = -ENOSPC;
@@ -938,7 +933,7 @@ got:
                }
                free = 0;
-                spin_lock(sb_bgl_lock(sbi, group));
+                ext4_lock_group(sb, group);
                /* recheck and clear flag under lock if we still need to */
                if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
                        free = ext4_free_blocks_after_init(sb, group, gdp);
@@ -947,7 +942,7 @@ got:
                        gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
                                                                gdp);
                }
-                spin_unlock(sb_bgl_lock(sbi, group));
+                ext4_unlock_group(sb, group);
                /* Don't need to dirty bitmap block if we didn't change it */
                if (free) {
@@ -1158,7 +1153,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
 {
        unsigned long desc_count;
        struct ext4_group_desc *gdp;
-        ext4_group_t i;
+        ext4_group_t i, ngroups = ext4_get_groups_count(sb);
 #ifdef EXT4FS_DEBUG
        struct ext4_super_block *es;
        unsigned long bitmap_count, x;
@@ -1168,7 +1163,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
        desc_count = 0;
        bitmap_count = 0;
        gdp = NULL;
-        for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+        for (i = 0; i < ngroups; i++) {
                gdp = ext4_get_group_desc(sb, i, NULL);
                if (!gdp)
                        continue;
@@ -1190,7 +1185,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
        return desc_count;
 #else
        desc_count = 0;
-        for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+        for (i = 0; i < ngroups; i++) {
                gdp = ext4_get_group_desc(sb, i, NULL);
                if (!gdp)
                        continue;
@@ -1205,9 +1200,9 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
 unsigned long ext4_count_dirs(struct super_block * sb)
 {
        unsigned long count = 0;
-        ext4_group_t i;
+        ext4_group_t i, ngroups = ext4_get_groups_count(sb);
-        for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+        for (i = 0; i < ngroups; i++) {
                struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
                if (!gdp)
                        continue;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2a9ffd528dd1..875db944b22f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -372,20 +372,21 @@ static int ext4_block_to_path(struct inode *inode,
 }
 static int __ext4_check_blockref(const char *function, struct inode *inode,
-                                 __le32 *p, unsigned int max) {
+                                 __le32 *p, unsigned int max)
+{
-        unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es);
        __le32 *bref = p;
+        unsigned int blk;
        while (bref < p+max) {
-                if (unlikely(le32_to_cpu(*bref) >= maxblocks)) {
+                blk = le32_to_cpu(*bref++);
+                if (blk && 
+                    unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), 
+                                                    blk, 1))) {
                        ext4_error(inode->i_sb, function,
-                                   "block reference %u >= max (%u) "
+                                   "invalid block reference %u "
-                                   "in inode #%lu, offset=%d",
+                                   "in inode #%lu", blk, inode->i_ino);
-                                   le32_to_cpu(*bref), maxblocks,
-                                   inode->i_ino, (int)(bref-p));
                        return -EIO;
                }
-                bref++;
        }
        return 0;
 }
@@ -892,6 +893,10 @@ err_out:
 }
 /*
+ * The ext4_ind_get_blocks() function handles non-extents inodes
+ * (i.e., using the traditional indirect/double-indirect i_blocks
+ * scheme) for ext4_get_blocks().
+ *
 * Allocation strategy is simple: if we have to allocate something, we will
 * have to go the whole way to leaf. So let's do it before attaching anything
 * to tree, set linkage between the newborn blocks, write them if sync is
@@ -909,15 +914,16 @@ err_out:
 * return = 0, if plain lookup failed.
 * return < 0, error case.
 *
- *
+ * The ext4_ind_get_blocks() function should be called with
- * Need to be called with
+ * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem
- * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
+ * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or
- * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
+ * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
+ * blocks.
 */
-static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
+static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
                                  ext4_lblk_t iblock, unsigned int maxblocks,
                                  struct buffer_head *bh_result,
-                                  int create, int extend_disksize)
+                                  int flags)
 {
        int err = -EIO;
        ext4_lblk_t offsets[4];
@@ -927,14 +933,11 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
        int indirect_blks;
        int blocks_to_boundary = 0;
        int depth;
-        struct ext4_inode_info *ei = EXT4_I(inode);
        int count = 0;
        ext4_fsblk_t first_block = 0;
-        loff_t disksize;
        J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
-        J_ASSERT(handle != NULL || create == 0);
+        J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
        depth = ext4_block_to_path(inode, iblock, offsets,
                                        &blocks_to_boundary);
@@ -963,7 +966,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
        }
        /* Next simple case - plain lookup or failed read of indirect block */
-        if (!create || err == -EIO)
+        if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO)
                goto cleanup;
        /*
@@ -997,19 +1000,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
        if (!err)
                err = ext4_splice_branch(handle, inode, iblock,
                                        partial, indirect_blks, count);
-        /*
+        else 
-         * i_disksize growing is protected by i_data_sem.  Don't forget to
-         * protect it if you're about to implement concurrent
-         * ext4_get_block() -bzzz
-        */
-        if (!err && extend_disksize) {
-                disksize = ((loff_t) iblock + count) << inode->i_blkbits;
-                if (disksize > i_size_read(inode))
-                        disksize = i_size_read(inode);
-                if (disksize > ei->i_disksize)
-                        ei->i_disksize = disksize;
-        }
-        if (err)
                goto cleanup;
        set_buffer_new(bh_result);
@@ -1120,8 +1111,23 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
                ext4_discard_preallocations(inode);
 }
+static int check_block_validity(struct inode *inode, sector_t logical,
+                                sector_t phys, int len)
+{
+        if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
+                ext4_error(inode->i_sb, "check_block_validity",
+                           "inode #%lu logical block %llu mapped to %llu "
+                           "(size %d)", inode->i_ino,
+                           (unsigned long long) logical,
+                           (unsigned long long) phys, len);
+                WARN_ON(1);
+                return -EIO;
+        }
+        return 0;
+}
 /*
- * The ext4_get_blocks_wrap() function try to look up the requested blocks,
+ * The ext4_get_blocks() function tries to look up the requested blocks,
 * and returns if the blocks are already mapped.
 *
 * Otherwise it takes the write lock of the i_data_sem and allocate blocks
@@ -1129,7 +1135,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
 * mapped.
 *
 * If file type is extents based, it will call ext4_ext_get_blocks(),
- * Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping
+ * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping
 * based files
 *
 * On success, it returns the number of blocks being mapped or allocate.
@@ -1142,9 +1148,9 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
 *
 * It returns the error in case of allocation failure.
 */
-int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
+int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
-                        unsigned int max_blocks, struct buffer_head *bh,
+                    unsigned int max_blocks, struct buffer_head *bh,
-                        int create, int extend_disksize, int flag)
+                    int flags)
 {
        int retval;
@@ -1152,21 +1158,28 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
        clear_buffer_unwritten(bh);
        /*
-         * Try to see if we can get  the block without requesting
+         * Try to see if we can get the block without requesting a new
-         * for new file system block.
+         * file system block.
         */
        down_read((&EXT4_I(inode)->i_data_sem));
        if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
                retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
-                                bh, 0, 0);
+                                bh, 0);
        } else {
-                retval = ext4_get_blocks_handle(handle,
+                retval = ext4_ind_get_blocks(handle, inode, block, max_blocks,
-                                inode, block, max_blocks, bh, 0, 0);
+                                             bh, 0);
        }
        up_read((&EXT4_I(inode)->i_data_sem));
+        if (retval > 0 && buffer_mapped(bh)) {
+                int ret = check_block_validity(inode, block, 
+                                               bh->b_blocknr, retval);
+                if (ret != 0)
+                        return ret;
+        }
        /* If it is only a block(s) look up */
-        if (!create)
+        if ((flags & EXT4_GET_BLOCKS_CREATE) == 0)
                return retval;
        /*
@@ -1205,7 +1218,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
         * let the underlying get_block() function know to
         * avoid double accounting
         */
-        if (flag)
+        if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
                EXT4_I(inode)->i_delalloc_reserved_flag = 1;
        /*
         * We need to check for EXT4 here because migrate
@@ -1213,10 +1226,10 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
         */
        if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
                retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
-                                bh, create, extend_disksize);
+                                              bh, flags);
        } else {
-                retval = ext4_get_blocks_handle(handle, inode, block,
+                retval = ext4_ind_get_blocks(handle, inode, block,
-                                max_blocks, bh, create, extend_disksize);
+                                             max_blocks, bh, flags);
                if (retval > 0 && buffer_new(bh)) {
                        /*
@@ -1229,18 +1242,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
                }
        }
-        if (flag) {
+        if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
                EXT4_I(inode)->i_delalloc_reserved_flag = 0;
-                /*
-                 * Update reserved blocks/metadata blocks
+        /*
-                 * after successful block allocation
+         * Update reserved blocks/metadata blocks after successful
-                 * which were deferred till now
+         * block allocation which had been deferred till now.
-                 */
+         */
-                if ((retval > 0) && buffer_delay(bh))
+        if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
-                        ext4_da_update_reserve_space(inode, retval);
+                ext4_da_update_reserve_space(inode, retval);
-        }
        up_write((&EXT4_I(inode)->i_data_sem));
+        if (retval > 0 && buffer_mapped(bh)) {
+                int ret = check_block_validity(inode, block, 
+                                               bh->b_blocknr, retval);
+                if (ret != 0)
+                        return ret;
+        }
        return retval;
 }
@@ -1268,8 +1286,8 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
                started = 1;
        }
-        ret = ext4_get_blocks_wrap(handle, inode, iblock,
+        ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
-                                        max_blocks, bh_result, create, 0, 0);
+                              create ? EXT4_GET_BLOCKS_CREATE : 0);
        if (ret > 0) {
                bh_result->b_size = (ret << inode->i_blkbits);
                ret = 0;
@@ -1288,17 +1306,19 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
 {
        struct buffer_head dummy;
        int fatal = 0, err;
+        int flags = 0;
        J_ASSERT(handle != NULL || create == 0);
        dummy.b_state = 0;
        dummy.b_blocknr = -1000;
        buffer_trace_init(&dummy.b_history);
-        err = ext4_get_blocks_wrap(handle, inode, block, 1,
+        if (create)
-                                        &dummy, create, 1, 0);
+                flags |= EXT4_GET_BLOCKS_CREATE;
+        err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags);
        /*
-         * ext4_get_blocks_handle() returns number of blocks
+         * ext4_get_blocks() returns number of blocks mapped. 0 in
-         * mapped. 0 in case of a HOLE.
+         * case of a HOLE.
         */
        if (err > 0) {
                if (err > 1)
@@ -1439,7 +1459,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
                                struct page **pagep, void **fsdata)
 {
        struct inode *inode = mapping->host;
-        int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
+        int ret, needed_blocks;
        handle_t *handle;
        int retries = 0;
        struct page *page;
@@ -1450,6 +1470,11 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
                   "dev %s ino %lu pos %llu len %u flags %u",
                   inode->i_sb->s_id, inode->i_ino,
                   (unsigned long long) pos, len, flags);
+        /*
+         * Reserve one block more for addition to orphan list in case
+         * we allocate blocks but write fails for some reason
+         */
+        needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
        index = pos >> PAGE_CACHE_SHIFT;
        from = pos & (PAGE_CACHE_SIZE - 1);
        to = from + len;
@@ -1483,15 +1508,30 @@ retry:
        if (ret) {
                unlock_page(page);
-                ext4_journal_stop(handle);
                page_cache_release(page);
                /*
                 * block_write_begin may have instantiated a few blocks
                 * outside i_size.  Trim these off again. Don't need
                 * i_size_read because we hold i_mutex.
+                 *
+                 * Add inode to orphan list in case we crash before
+                 * truncate finishes
                 */
                if (pos + len > inode->i_size)
+                        ext4_orphan_add(handle, inode);
+                ext4_journal_stop(handle);
+                if (pos + len > inode->i_size) {
                        vmtruncate(inode, inode->i_size);
+                        /* 
+                         * If vmtruncate failed early the inode might
+                         * still be on the orphan list; we need to
+                         * make sure the inode is removed from the
+                         * orphan list in that case.
+                         */
+                        if (inode->i_nlink)
+                                ext4_orphan_del(NULL, inode);
+                }
        }
        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -1509,6 +1549,52 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
        return ext4_handle_dirty_metadata(handle, NULL, bh);
 }
+static int ext4_generic_write_end(struct file *file,
+                                struct address_space *mapping,
+                                loff_t pos, unsigned len, unsigned copied,
+                                struct page *page, void *fsdata)
+{
+        int i_size_changed = 0;
+        struct inode *inode = mapping->host;
+        handle_t *handle = ext4_journal_current_handle();
+        copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+        /*
+         * No need to use i_size_read() here, the i_size
+         * cannot change under us because we hold i_mutex.
+         *
+         * But it's important to update i_size while still holding page lock:
+         * page writeout could otherwise come in and zero beyond i_size.
+         */
+        if (pos + copied > inode->i_size) {
+                i_size_write(inode, pos + copied);
+                i_size_changed = 1;
+        }
+        if (pos + copied >  EXT4_I(inode)->i_disksize) {
+                /* We need to mark inode dirty even if
+                 * new_i_size is less that inode->i_size
+                 * bu greater than i_disksize.(hint delalloc)
+                 */
+                ext4_update_i_disksize(inode, (pos + copied));
+                i_size_changed = 1;
+        }
+        unlock_page(page);
+        page_cache_release(page);
+        /*
+         * Don't mark the inode dirty under page lock. First, it unnecessarily
+         * makes the holding time of page lock longer. Second, it forces lock
+         * ordering of page lock and transaction start for journaling
+         * filesystems.
+         */
+        if (i_size_changed)
+                ext4_mark_inode_dirty(handle, inode);
+        return copied;
+}
 /*
 * We need to pick up the new inode size which generic_commit_write gave us
 * `file' can be NULL - eg, when called from page_symlink().
@@ -1532,21 +1618,15 @@ static int ext4_ordered_write_end(struct file *file,
        ret = ext4_jbd2_file_inode(handle, inode);
        if (ret == 0) {
-                loff_t new_i_size;
+                ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
-                new_i_size = pos + copied;
-                if (new_i_size > EXT4_I(inode)->i_disksize) {
-                        ext4_update_i_disksize(inode, new_i_size);
-                        /* We need to mark inode dirty even if
-                         * new_i_size is less that inode->i_size
-                         * bu greater than i_disksize.(hint delalloc)
-                         */
-                        ext4_mark_inode_dirty(handle, inode);
-                }
-                ret2 = generic_write_end(file, mapping, pos, len, copied,
                                                        page, fsdata);
                copied = ret2;
+                if (pos + len > inode->i_size)
+                        /* if we have allocated more blocks and copied
+                         * less. We will have blocks allocated outside
+                         * inode->i_size. So truncate them
+                         */
+                        ext4_orphan_add(handle, inode);
                if (ret2 < 0)
                        ret = ret2;
        }
@@ -1554,6 +1634,18 @@ static int ext4_ordered_write_end(struct file *file,
        if (!ret)
                ret = ret2;
+        if (pos + len > inode->i_size) {
+                vmtruncate(inode, inode->i_size);
+                /* 
+                 * If vmtruncate failed early the inode might still be
+                 * on the orphan list; we need to make sure the inode
+                 * is removed from the orphan list in that case.
+                 */
+                if (inode->i_nlink)
+                        ext4_orphan_del(NULL, inode);
+        }
        return ret ? ret : copied;
 }
@@ -1565,25 +1657,21 @@ static int ext4_writeback_write_end(struct file *file,
        handle_t *handle = ext4_journal_current_handle();
        struct inode *inode = mapping->host;
        int ret = 0, ret2;
-        loff_t new_i_size;
        trace_mark(ext4_writeback_write_end,
                   "dev %s ino %lu pos %llu len %u copied %u",
                   inode->i_sb->s_id, inode->i_ino,
                   (unsigned long long) pos, len, copied);
-        new_i_size = pos + copied;
+        ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
-        if (new_i_size > EXT4_I(inode)->i_disksize) {
-                ext4_update_i_disksize(inode, new_i_size);
-                /* We need to mark inode dirty even if
-                 * new_i_size is less that inode->i_size
-                 * bu greater than i_disksize.(hint delalloc)
-                 */
-                ext4_mark_inode_dirty(handle, inode);
-        }
-        ret2 = generic_write_end(file, mapping, pos, len, copied,
                                                        page, fsdata);
        copied = ret2;
+        if (pos + len > inode->i_size)
+                /* if we have allocated more blocks and copied
+                 * less. We will have blocks allocated outside
+                 * inode->i_size. So truncate them
+                 */
+                ext4_orphan_add(handle, inode);
        if (ret2 < 0)
                ret = ret2;
@@ -1591,6 +1679,17 @@ static int ext4_writeback_write_end(struct file *file,
        if (!ret)
                ret = ret2;
+        if (pos + len > inode->i_size) {
+                vmtruncate(inode, inode->i_size);
+                /* 
+                 * If vmtruncate failed early the inode might still be
+                 * on the orphan list; we need to make sure the inode
+                 * is removed from the orphan list in that case.
+                 */
+                if (inode->i_nlink)
+                        ext4_orphan_del(NULL, inode);
+        }
        return ret ? ret : copied;
 }
@@ -1635,10 +1734,27 @@ static int ext4_journalled_write_end(struct file *file,
        }
        unlock_page(page);
+        page_cache_release(page);
+        if (pos + len > inode->i_size)
+                /* if we have allocated more blocks and copied
+                 * less. We will have blocks allocated outside
+                 * inode->i_size. So truncate them
+                 */
+                ext4_orphan_add(handle, inode);
        ret2 = ext4_journal_stop(handle);
        if (!ret)
                ret = ret2;
-        page_cache_release(page);
+        if (pos + len > inode->i_size) {
+                vmtruncate(inode, inode->i_size);
+                /* 
+                 * If vmtruncate failed early the inode might still be
+                 * on the orphan list; we need to make sure the inode
+                 * is removed from the orphan list in that case.
+                 */
+                if (inode->i_nlink)
+                        ext4_orphan_del(NULL, inode);
+        }
        return ret ? ret : copied;
 }
@@ -1852,7 +1968,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
 * @logical - first logical block to start assignment with
 *
 * the function goes through all passed space and put actual disk
- * block numbers into buffer heads, dropping BH_Delay
+ * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
 */
 static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
                                 struct buffer_head *exbh)
@@ -1902,16 +2018,24 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
                        do {
                                if (cur_logical >= logical + blocks)
                                        break;
-                                if (buffer_delay(bh)) {
-                                        bh->b_blocknr = pblock;
+                                if (buffer_delay(bh) ||
-                                        clear_buffer_delay(bh);
+                                                buffer_unwritten(bh)) {
-                                        bh->b_bdev = inode->i_sb->s_bdev;
-                                } else if (buffer_unwritten(bh)) {
+                                        BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
-                                        bh->b_blocknr = pblock;
-                                        clear_buffer_unwritten(bh);
+                                        if (buffer_delay(bh)) {
-                                        set_buffer_mapped(bh);
+                                                clear_buffer_delay(bh);
-                                        set_buffer_new(bh);
+                                                bh->b_blocknr = pblock;
-                                        bh->b_bdev = inode->i_sb->s_bdev;
+                                        } else {
+                                                /*
+                                                 * unwritten already should have
+                                                 * blocknr assigned. Verify that
+                                                 */
+                                                clear_buffer_unwritten(bh);
+                                                BUG_ON(bh->b_blocknr != pblock);
+                                        }
                                } else if (buffer_mapped(bh))
                                        BUG_ON(bh->b_blocknr != pblock);
@@ -1990,51 +2114,6 @@ static void ext4_print_free_blocks(struct inode *inode)
        return;
 }
-#define         EXT4_DELALLOC_RSVED     1
-static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
-                                   struct buffer_head *bh_result, int create)
-{
-        int ret;
-        unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
-        loff_t disksize = EXT4_I(inode)->i_disksize;
-        handle_t *handle = NULL;
-        handle = ext4_journal_current_handle();
-        BUG_ON(!handle);
-        ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
-                                   bh_result, create, 0, EXT4_DELALLOC_RSVED);
-        if (ret <= 0)
-                return ret;
-        bh_result->b_size = (ret << inode->i_blkbits);
-        if (ext4_should_order_data(inode)) {
-                int retval;
-                retval = ext4_jbd2_file_inode(handle, inode);
-                if (retval)
-                        /*
-                         * Failed to add inode for ordered mode. Don't
-                         * update file size
-                         */
-                        return retval;
-        }
-        /*
-         * Update on-disk size along with block allocation we don't
-         * use 'extend_disksize' as size may change within already
-         * allocated block -bzzz
-         */
-        disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
-        if (disksize > i_size_read(inode))
-                disksize = i_size_read(inode);
-        if (disksize > EXT4_I(inode)->i_disksize) {
-                ext4_update_i_disksize(inode, disksize);
-                ret = ext4_mark_inode_dirty(handle, inode);
-                return ret;
-        }
-        return 0;
-}
 /*
 * mpage_da_map_blocks - go through given space
 *
@@ -2045,29 +2124,57 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
 */
 static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 {
-        int err = 0;
+        int err, blks, get_blocks_flags;
        struct buffer_head new;
-        sector_t next;
+        sector_t next = mpd->b_blocknr;
+        unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
+        loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
+        handle_t *handle = NULL;
        /*
         * We consider only non-mapped and non-allocated blocks
         */
        if ((mpd->b_state  & (1 << BH_Mapped)) &&
-            !(mpd->b_state & (1 << BH_Delay)))
+                !(mpd->b_state & (1 << BH_Delay)) &&
+                !(mpd->b_state & (1 << BH_Unwritten)))
                return 0;
-        new.b_state = mpd->b_state;
-        new.b_blocknr = 0;
-        new.b_size = mpd->b_size;
-        next = mpd->b_blocknr;
        /*
-         * If we didn't accumulate anything
+         * If we didn't accumulate anything to write simply return
-         * to write simply return
         */
-        if (!new.b_size)
+        if (!mpd->b_size)
                return 0;
-        err = ext4_da_get_block_write(mpd->inode, next, &new, 1);
+        handle = ext4_journal_current_handle();
-        if (err) {
+        BUG_ON(!handle);
+        /*
+         * Call ext4_get_blocks() to allocate any delayed allocation
+         * blocks, or to convert an uninitialized extent to be
+         * initialized (in the case where we have written into
+         * one or more preallocated blocks).
+         *
+         * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to
+         * indicate that we are on the delayed allocation path.  This
+         * affects functions in many different parts of the allocation
+         * call path.  This flag exists primarily because we don't
+         * want to change *many* call functions, so ext4_get_blocks()
+         * will set the magic i_delalloc_reserved_flag once the
+         * inode's allocation semaphore is taken.
+         *
+         * If the blocks in questions were delalloc blocks, set
+         * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting
+         * variables are updated after the blocks have been allocated.
+         */
+        new.b_state = 0;
+        get_blocks_flags = (EXT4_GET_BLOCKS_CREATE |
+                            EXT4_GET_BLOCKS_DELALLOC_RESERVE);
+        if (mpd->b_state & (1 << BH_Delay))
+                get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE;
+        blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
+                               &new, get_blocks_flags);
+        if (blks < 0) {
+                err = blks;
                /*
                 * If get block returns with error we simply
                 * return. Later writepage will redirty the page and
@@ -2100,12 +2207,14 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
                if (err == -ENOSPC) {
                        ext4_print_free_blocks(mpd->inode);
                }
-                /* invlaidate all the pages */
+                /* invalidate all the pages */
                ext4_da_block_invalidatepages(mpd, next,
                                mpd->b_size >> mpd->inode->i_blkbits);
                return err;
        }
-        BUG_ON(new.b_size == 0);
+        BUG_ON(blks == 0);
+        new.b_size = (blks << mpd->inode->i_blkbits);
        if (buffer_new(&new))
                __unmap_underlying_blocks(mpd->inode, &new);
@@ -2118,6 +2227,23 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
            (mpd->b_state & (1 << BH_Unwritten)))
                mpage_put_bnr_to_bhs(mpd, next, &new);
+        if (ext4_should_order_data(mpd->inode)) {
+                err = ext4_jbd2_file_inode(handle, mpd->inode);
+                if (err)
+                        return err;
+        }
+        /*
+         * Update on-disk size along with block allocation.
+         */
+        disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits;
+        if (disksize > i_size_read(mpd->inode))
+                disksize = i_size_read(mpd->inode);
+        if (disksize > EXT4_I(mpd->inode)->i_disksize) {
+                ext4_update_i_disksize(mpd->inode, disksize);
+                return ext4_mark_inode_dirty(handle, mpd->inode);
+        }
        return 0;
 }
@@ -2192,6 +2318,17 @@ flush_it:
        return;
 }
+static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
+{
+        /*
+         * unmapped buffer is possible for holes.
+         * delay buffer is possible with delayed allocation.
+         * We also need to consider unwritten buffer as unmapped.
+         */
+        return (!buffer_mapped(bh) || buffer_delay(bh) ||
+                                buffer_unwritten(bh)) && buffer_dirty(bh);
+}
 /*
 * __mpage_da_writepage - finds extent of pages and blocks
 *
@@ -2276,8 +2413,7 @@ static int __mpage_da_writepage(struct page *page,
                         * Otherwise we won't make progress
                         * with the page in ext4_da_writepage
                         */
-                        if (buffer_dirty(bh) &&
+                        if (ext4_bh_unmapped_or_delay(NULL, bh)) {
-                            (!buffer_mapped(bh) || buffer_delay(bh))) {
                                mpage_add_bh_to_extent(mpd, logical,
                                                       bh->b_size,
                                                       bh->b_state);
@@ -2303,8 +2439,16 @@ static int __mpage_da_writepage(struct page *page,
 }
 /*
- * this is a special callback for ->write_begin() only
+ * This is a special get_blocks_t callback which is used by
- * it's intention is to return mapped block or reserve space
+ * ext4_da_write_begin().  It will either return mapped block or
+ * reserve space for a single block.
+ *
+ * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set.
+ * We also have b_blocknr = -1 and b_bdev initialized properly
+ *
+ * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set.
+ * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev
+ * initialized properly.
 */
 static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
                                  struct buffer_head *bh_result, int create)
@@ -2323,7 +2467,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
         * preallocated blocks are unmapped but should treated
         * the same as allocated blocks.
         */
-        ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1,  bh_result, 0, 0, 0);
+        ret = ext4_get_blocks(NULL, inode, iblock, 1,  bh_result, 0);
        if ((ret == 0) && !buffer_delay(bh_result)) {
                /* the block isn't (pre)allocated yet, let's reserve space */
                /*
@@ -2340,40 +2484,53 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
                set_buffer_delay(bh_result);
        } else if (ret > 0) {
                bh_result->b_size = (ret << inode->i_blkbits);
-                /*
+                if (buffer_unwritten(bh_result)) {
-                 * With sub-block writes into unwritten extents
+                        /* A delayed write to unwritten bh should
-                 * we also need to mark the buffer as new so that
+                         * be marked new and mapped.  Mapped ensures
-                 * the unwritten parts of the buffer gets correctly zeroed.
+                         * that we don't do get_block multiple times
-                 */
+                         * when we write to the same offset and new
-                if (buffer_unwritten(bh_result))
+                         * ensures that we do proper zero out for
+                         * partial write.
+                         */
                        set_buffer_new(bh_result);
+                        set_buffer_mapped(bh_result);
+                }
                ret = 0;
        }
        return ret;
 }
-static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
+/*
-{
+ * This function is used as a standard get_block_t calback function
-        /*
+ * when there is no desire to allocate any blocks.  It is used as a
-         * unmapped buffer is possible for holes.
+ * callback function for block_prepare_write(), nobh_writepage(), and
-         * delay buffer is possible with delayed allocation
+ * block_write_full_page().  These functions should only try to map a
-         */
+ * single block at a time.
-        return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh));
+ *
-}
+ * Since this function doesn't do block allocations even if the caller
+ * requests it by passing in create=1, it is critically important that
-static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
+ * any caller checks to make sure that any buffer heads are returned
+ * by this function are either all already mapped or marked for
+ * delayed allocation before calling nobh_writepage() or
+ * block_write_full_page().  Otherwise, b_blocknr could be left
+ * unitialized, and the page write functions will be taken by
+ * surprise.
+ */
+static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
                                   struct buffer_head *bh_result, int create)
 {
        int ret = 0;
        unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
+        BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
        /*
         * we don't want to do block allocation in writepage
         * so call get_block_wrap with create = 0
         */
-        ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks,
+        ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0);
-                                   bh_result, 0, 0, 0);
+        BUG_ON(create && ret == 0);
        if (ret > 0) {
                bh_result->b_size = (ret << inode->i_blkbits);
                ret = 0;
@@ -2382,10 +2539,11 @@ static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
 }
 /*
- * get called vi ext4_da_writepages after taking page lock (have journal handle)
+ * This function can get called via...
- * get called via journal_submit_inode_data_buffers (no journal handle)
+ *   - ext4_da_writepages after taking page lock (have journal handle)
- * get called via shrink_page_list via pdflush (no journal handle)
+ *   - journal_submit_inode_data_buffers (no journal handle)
- * or grab_page_cache when doing write_begin (have journal handle)
+ *   - shrink_page_list via pdflush (no journal handle)
+ *   - grab_page_cache when doing write_begin (have journal handle)
 */
 static int ext4_da_writepage(struct page *page,
                                struct writeback_control *wbc)
@@ -2436,7 +2594,7 @@ static int ext4_da_writepage(struct page *page,
                 * do block allocation here.
                 */
                ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
-                                                ext4_normal_get_block_write);
+                                          noalloc_get_block_write);
                if (!ret) {
                        page_bufs = page_buffers(page);
                        /* check whether all are mapped and non delay */
@@ -2461,11 +2619,10 @@ static int ext4_da_writepage(struct page *page,
        }
        if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
-                ret = nobh_writepage(page, ext4_normal_get_block_write, wbc);
+                ret = nobh_writepage(page, noalloc_get_block_write, wbc);
        else
-                ret = block_write_full_page(page,
+                ret = block_write_full_page(page, noalloc_get_block_write,
-                                                ext4_normal_get_block_write,
+                                            wbc);
-                                                wbc);
        return ret;
 }
@@ -2777,7 +2934,7 @@ retry:
        *pagep = page;
        ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-                                                        ext4_da_get_block_prep);
+                                ext4_da_get_block_prep);
        if (ret < 0) {
                unlock_page(page);
                ext4_journal_stop(handle);
@@ -2815,7 +2972,7 @@ static int ext4_da_should_update_i_disksize(struct page *page,
        for (i = 0; i < idx; i++)
                bh = bh->b_this_page;
-        if (!buffer_mapped(bh) || (buffer_delay(bh)))
+        if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh))
                return 0;
        return 1;
 }
@@ -3085,12 +3242,10 @@ static int __ext4_normal_writepage(struct page *page,
        struct inode *inode = page->mapping->host;
        if (test_opt(inode->i_sb, NOBH))
-                return nobh_writepage(page,
+                return nobh_writepage(page, noalloc_get_block_write, wbc);
-                                        ext4_normal_get_block_write, wbc);
        else
-                return block_write_full_page(page,
+                return block_write_full_page(page, noalloc_get_block_write,
-                                                ext4_normal_get_block_write,
+                                             wbc);
-                                                wbc);
 }
 static int ext4_normal_writepage(struct page *page,
@@ -3142,7 +3297,7 @@ static int __ext4_journalled_writepage(struct page *page,
        int err;
        ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
-                                        ext4_normal_get_block_write);
+                                  noalloc_get_block_write);
        if (ret != 0)
                goto out_unlock;
@@ -3227,9 +3382,8 @@ static int ext4_journalled_writepage(struct page *page,
                 * really know unless we go poke around in the buffer_heads.
                 * But block_write_full_page will do the right thing.
                 */
-                return block_write_full_page(page,
+                return block_write_full_page(page, noalloc_get_block_write,
-                                                ext4_normal_get_block_write,
+                                             wbc);
-                                                wbc);
        }
 no_write:
        redirty_page_for_writepage(wbc, page);
@@ -3973,7 +4127,8 @@ void ext4_truncate(struct inode *inode)
        if (!ext4_can_truncate(inode))
                return;
-        if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
+        if (ei->i_disksize && inode->i_size == 0 &&
+            !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
                ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
        if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
@@ -4715,25 +4870,6 @@ int ext4_write_inode(struct inode *inode, int wait)
        return ext4_force_commit(inode->i_sb);
 }
-int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh)
-{
-        int err = 0;
-        mark_buffer_dirty(bh);
-        if (inode && inode_needs_sync(inode)) {
-                sync_dirty_buffer(bh);
-                if (buffer_req(bh) && !buffer_uptodate(bh)) {
-                        ext4_error(inode->i_sb, __func__,
-                                   "IO error syncing inode, "
-                                   "inode=%lu, block=%llu",
-                                   inode->i_ino,
-                                   (unsigned long long)bh->b_blocknr);
-                        err = -EIO;
-                }
-        }
-        return err;
-}
 /*
 * ext4_setattr()
 *
@@ -4930,7 +5066,8 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 */
 int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 {
-        int groups, gdpblocks;
+        ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
+        int gdpblocks;
        int idxblocks;
        int ret = 0;
@@ -4957,8 +5094,8 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
                groups += nrblocks;
        gdpblocks = groups;
-        if (groups > EXT4_SB(inode->i_sb)->s_groups_count)
+        if (groups > ngroups)
-                groups = EXT4_SB(inode->i_sb)->s_groups_count;
+                groups = ngroups;
        if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
                gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
@@ -4998,7 +5135,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
 * Calculate the journal credits for a chunk of data modification.
 *
 * This is called from DIO, fallocate or whoever calling
- * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks.
+ * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks.
 *
 * journal buffers for data blocks are not included here, as DIO
 * and fallocate do no need to journal data buffers.
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f871677a7984..ed8482e22c0e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -372,24 +372,12 @@ static inline void mb_set_bit(int bit, void *addr)
        ext4_set_bit(bit, addr);
 }
-static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr)
-{
-        addr = mb_correct_addr_and_bit(&bit, addr);
-        ext4_set_bit_atomic(lock, bit, addr);
-}
 static inline void mb_clear_bit(int bit, void *addr)
 {
        addr = mb_correct_addr_and_bit(&bit, addr);
        ext4_clear_bit(bit, addr);
 }
-static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
-{
-        addr = mb_correct_addr_and_bit(&bit, addr);
-        ext4_clear_bit_atomic(lock, bit, addr);
-}
 static inline int mb_find_next_zero_bit(void *addr, int max, int start)
 {
        int fix = 0, ret, tmpmax;
@@ -448,7 +436,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
        if (unlikely(e4b->bd_info->bb_bitmap == NULL))
                return;
-        BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group));
+        assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
        for (i = 0; i < count; i++) {
                if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
                        ext4_fsblk_t blocknr;
@@ -472,7 +460,7 @@ static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
        if (unlikely(e4b->bd_info->bb_bitmap == NULL))
                return;
-        BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
+        assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
        for (i = 0; i < count; i++) {
                BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
                mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
@@ -739,6 +727,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
 static int ext4_mb_init_cache(struct page *page, char *incore)
 {
+        ext4_group_t ngroups;
        int blocksize;
        int blocks_per_page;
        int groups_per_page;
@@ -757,6 +746,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
        inode = page->mapping->host;
        sb = inode->i_sb;
+        ngroups = ext4_get_groups_count(sb);
        blocksize = 1 << inode->i_blkbits;
        blocks_per_page = PAGE_CACHE_SIZE / blocksize;
@@ -780,7 +770,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
        for (i = 0; i < groups_per_page; i++) {
                struct ext4_group_desc *desc;
-                if (first_group + i >= EXT4_SB(sb)->s_groups_count)
+                if (first_group + i >= ngroups)
                        break;
                err = -EIO;
@@ -801,17 +791,17 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
                        unlock_buffer(bh[i]);
                        continue;
                }
-                spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
+                ext4_lock_group(sb, first_group + i);
                if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
                        ext4_init_block_bitmap(sb, bh[i],
                                                first_group + i, desc);
                        set_bitmap_uptodate(bh[i]);
                        set_buffer_uptodate(bh[i]);
-                        spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
+                        ext4_unlock_group(sb, first_group + i);
                        unlock_buffer(bh[i]);
                        continue;
                }
-                spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
+                ext4_unlock_group(sb, first_group + i);
                if (buffer_uptodate(bh[i])) {
                        /*
                         * if not uninit if bh is uptodate,
@@ -852,7 +842,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
                struct ext4_group_info *grinfo;
                group = (first_block + i) >> 1;
-                if (group >= EXT4_SB(sb)->s_groups_count)
+                if (group >= ngroups)
                        break;
                /*
@@ -1078,7 +1068,7 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
        return 0;
 }
-static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
+static void mb_clear_bits(void *bm, int cur, int len)
 {
        __u32 *addr;
@@ -1091,15 +1081,12 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
                        cur += 32;
                        continue;
                }
-                if (lock)
+                mb_clear_bit(cur, bm);
-                        mb_clear_bit_atomic(lock, cur, bm);
-                else
-                        mb_clear_bit(cur, bm);
                cur++;
        }
 }
-static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
+static void mb_set_bits(void *bm, int cur, int len)
 {
        __u32 *addr;
@@ -1112,10 +1099,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
                        cur += 32;
                        continue;
                }
-                if (lock)
+                mb_set_bit(cur, bm);
-                        mb_set_bit_atomic(lock, cur, bm);
-                else
-                        mb_set_bit(cur, bm);
                cur++;
        }
 }
@@ -1131,7 +1115,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
        struct super_block *sb = e4b->bd_sb;
        BUG_ON(first + count > (sb->s_blocksize << 3));
-        BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group));
+        assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
        mb_check_buddy(e4b);
        mb_free_blocks_double(inode, e4b, first, count);
@@ -1212,7 +1196,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
        int ord;
        void *buddy;
-        BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
+        assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
        BUG_ON(ex == NULL);
        buddy = mb_find_buddy(e4b, order, &max);
@@ -1276,7 +1260,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
        BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
        BUG_ON(e4b->bd_group != ex->fe_group);
-        BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
+        assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
        mb_check_buddy(e4b);
        mb_mark_used_double(e4b, start, len);
@@ -1330,8 +1314,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
                e4b->bd_info->bb_counters[ord]++;
        }
-        mb_set_bits(sb_bgl_lock(EXT4_SB(e4b->bd_sb), ex->fe_group),
+        mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
-                        EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
        mb_check_buddy(e4b);
        return ret;
@@ -1726,7 +1709,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
        unsigned free, fragments;
        unsigned i, bits;
        int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
-        struct ext4_group_desc *desc;
        struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
        BUG_ON(cr < 0 || cr >= 4);
@@ -1742,10 +1724,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
        switch (cr) {
        case 0:
                BUG_ON(ac->ac_2order == 0);
-                /* If this group is uninitialized, skip it initially */
-                desc = ext4_get_group_desc(ac->ac_sb, group, NULL);
-                if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
-                        return 0;
                /* Avoid using the first bg of a flexgroup for data files */
                if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
@@ -1788,6 +1766,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
        int block, pnum;
        int blocks_per_page;
        int groups_per_page;
+        ext4_group_t ngroups = ext4_get_groups_count(sb);
        ext4_group_t first_group;
        struct ext4_group_info *grp;
@@ -1807,7 +1786,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
        /* read all groups the page covers into the cache */
        for (i = 0; i < groups_per_page; i++) {
-                if ((first_group + i) >= EXT4_SB(sb)->s_groups_count)
+                if ((first_group + i) >= ngroups)
                        break;
                grp = ext4_get_group_info(sb, first_group + i);
                /* take all groups write allocation
@@ -1945,8 +1924,7 @@ err:
 static noinline_for_stack int
 ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 {
-        ext4_group_t group;
+        ext4_group_t ngroups, group, i;
-        ext4_group_t i;
        int cr;
        int err = 0;
        int bsbits;
@@ -1957,6 +1935,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
        sb = ac->ac_sb;
        sbi = EXT4_SB(sb);
+        ngroups = ext4_get_groups_count(sb);
        BUG_ON(ac->ac_status == AC_STATUS_FOUND);
        /* first, try the goal */
@@ -2017,11 +1996,11 @@ repeat:
                 */
                group = ac->ac_g_ex.fe_group;
-                for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) {
+                for (i = 0; i < ngroups; group++, i++) {
                        struct ext4_group_info *grp;
                        struct ext4_group_desc *desc;
-                        if (group == EXT4_SB(sb)->s_groups_count)
+                        if (group == ngroups)
                                group = 0;
                        /* quick check to skip empty groups */
@@ -2064,9 +2043,7 @@ repeat:
                        ac->ac_groups_scanned++;
                        desc = ext4_get_group_desc(sb, group, NULL);
-                        if (cr == 0 || (desc->bg_flags &
+                        if (cr == 0)
-                                        cpu_to_le16(EXT4_BG_BLOCK_UNINIT) &&
-                                        ac->ac_2order != 0))
                                ext4_mb_simple_scan_group(ac, &e4b);
                        else if (cr == 1 &&
                                        ac->ac_g_ex.fe_len == sbi->s_stripe)
@@ -2315,12 +2292,10 @@ static struct file_operations ext4_mb_seq_history_fops = {
 static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
 {
        struct super_block *sb = seq->private;
-        struct ext4_sb_info *sbi = EXT4_SB(sb);
        ext4_group_t group;
-        if (*pos < 0 || *pos >= sbi->s_groups_count)
+        if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
                return NULL;
        group = *pos + 1;
        return (void *) ((unsigned long) group);
 }
@@ -2328,11 +2303,10 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
 static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
 {
        struct super_block *sb = seq->private;
-        struct ext4_sb_info *sbi = EXT4_SB(sb);
        ext4_group_t group;
        ++*pos;
-        if (*pos < 0 || *pos >= sbi->s_groups_count)
+        if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
                return NULL;
        group = *pos + 1;
        return (void *) ((unsigned long) group);
@@ -2420,7 +2394,8 @@ static void ext4_mb_history_release(struct super_block *sb)
        if (sbi->s_proc != NULL) {
                remove_proc_entry("mb_groups", sbi->s_proc);
-                remove_proc_entry("mb_history", sbi->s_proc);
+                if (sbi->s_mb_history_max)
+                        remove_proc_entry("mb_history", sbi->s_proc);
        }
        kfree(sbi->s_mb_history);
 }
@@ -2431,17 +2406,17 @@ static void ext4_mb_history_init(struct super_block *sb)
        int i;
        if (sbi->s_proc != NULL) {
-                proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
+                if (sbi->s_mb_history_max)
-                                 &ext4_mb_seq_history_fops, sb);
+                        proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
+                                         &ext4_mb_seq_history_fops, sb);
                proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
                                 &ext4_mb_seq_groups_fops, sb);
        }
-        sbi->s_mb_history_max = 1000;
        sbi->s_mb_history_cur = 0;
        spin_lock_init(&sbi->s_mb_history_lock);
        i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
-        sbi->s_mb_history = kzalloc(i, GFP_KERNEL);
+        sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL;
        /* if we can't allocate history, then we simple won't use it */
 }
@@ -2451,7 +2426,7 @@ ext4_mb_store_history(struct ext4_allocation_context *ac)
        struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
        struct ext4_mb_history h;
-        if (unlikely(sbi->s_mb_history == NULL))
+        if (sbi->s_mb_history == NULL)
                return;
        if (!(ac->ac_op & sbi->s_mb_history_filter))
@@ -2587,6 +2562,7 @@ void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add)
 static int ext4_mb_init_backend(struct super_block *sb)
 {
+        ext4_group_t ngroups = ext4_get_groups_count(sb);
        ext4_group_t i;
        int metalen;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2598,7 +2574,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
        struct ext4_group_desc *desc;
        /* This is the number of blocks used by GDT */
-        num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) -
+        num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
                                1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
        /*
@@ -2644,7 +2620,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
        for (i = 0; i < num_meta_group_infos; i++) {
                if ((i + 1) == num_meta_group_infos)
                        metalen = sizeof(*meta_group_info) *
-                                (sbi->s_groups_count -
+                                (ngroups -
                                        (i << EXT4_DESC_PER_BLOCK_BITS(sb)));
                meta_group_info = kmalloc(metalen, GFP_KERNEL);
                if (meta_group_info == NULL) {
@@ -2655,7 +2631,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
                sbi->s_group_info[i] = meta_group_info;
        }
-        for (i = 0; i < sbi->s_groups_count; i++) {
+        for (i = 0; i < ngroups; i++) {
                desc = ext4_get_group_desc(sb, i, NULL);
                if (desc == NULL) {
                        printk(KERN_ERR
@@ -2761,7 +2737,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
        return 0;
 }
-/* need to called with ext4 group lock (ext4_lock_group) */
+/* need to called with the ext4 group lock held */
 static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
 {
        struct ext4_prealloc_space *pa;
@@ -2781,13 +2757,14 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
 int ext4_mb_release(struct super_block *sb)
 {
+        ext4_group_t ngroups = ext4_get_groups_count(sb);
        ext4_group_t i;
        int num_meta_group_infos;
        struct ext4_group_info *grinfo;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        if (sbi->s_group_info) {
-                for (i = 0; i < sbi->s_groups_count; i++) {
+                for (i = 0; i < ngroups; i++) {
                        grinfo = ext4_get_group_info(sb, i);
 #ifdef DOUBLE_CHECK
                        kfree(grinfo->bb_bitmap);
@@ -2797,7 +2774,7 @@ int ext4_mb_release(struct super_block *sb)
                        ext4_unlock_group(sb, i);
                        kfree(grinfo);
                }
-                num_meta_group_infos = (sbi->s_groups_count +
+                num_meta_group_infos = (ngroups +
                                EXT4_DESC_PER_BLOCK(sb) - 1) >>
                        EXT4_DESC_PER_BLOCK_BITS(sb);
                for (i = 0; i < num_meta_group_infos; i++)
@@ -2984,27 +2961,25 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                + le32_to_cpu(es->s_first_data_block);
        len = ac->ac_b_ex.fe_len;
-        if (in_range(ext4_block_bitmap(sb, gdp), block, len) ||
+        if (!ext4_data_block_valid(sbi, block, len)) {
-            in_range(ext4_inode_bitmap(sb, gdp), block, len) ||
-            in_range(block, ext4_inode_table(sb, gdp),
-                     EXT4_SB(sb)->s_itb_per_group) ||
-            in_range(block + len - 1, ext4_inode_table(sb, gdp),
-                     EXT4_SB(sb)->s_itb_per_group)) {
                ext4_error(sb, __func__,
-                           "Allocating block %llu in system zone of %d group\n",
+                           "Allocating blocks %llu-%llu which overlap "
-                           block, ac->ac_b_ex.fe_group);
+                           "fs metadata\n", block, block+len);
                /* File system mounted not to panic on error
                 * Fix the bitmap and repeat the block allocation
                 * We leak some of the blocks here.
                 */
-                mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group),
+                ext4_lock_group(sb, ac->ac_b_ex.fe_group);
-                                bitmap_bh->b_data, ac->ac_b_ex.fe_start,
+                mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
-                                ac->ac_b_ex.fe_len);
+                            ac->ac_b_ex.fe_len);
+                ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
                err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
                if (!err)
                        err = -EAGAIN;
                goto out_err;
        }
+        ext4_lock_group(sb, ac->ac_b_ex.fe_group);
 #ifdef AGGRESSIVE_CHECK
        {
                int i;
@@ -3014,9 +2989,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                }
        }
 #endif
-        spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
+        mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len);
-        mb_set_bits(NULL, bitmap_bh->b_data,
-                                ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
        if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
                gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
                ext4_free_blks_set(sb, gdp,
@@ -3026,7 +2999,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
        len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
        ext4_free_blks_set(sb, gdp, len);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
-        spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
+        ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
        percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
        /*
         * Now reduce the dirty block count also. Should not go negative
@@ -3459,7 +3433,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 * the function goes through all block freed in the group
 * but not yet committed and marks them used in in-core bitmap.
 * buddy must be generated from this bitmap
- * Need to be called with ext4 group lock (ext4_lock_group)
+ * Need to be called with the ext4 group lock held
 */
 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
                                                ext4_group_t group)
@@ -3473,9 +3447,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
        while (n) {
                entry = rb_entry(n, struct ext4_free_data, node);
-                mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
+                mb_set_bits(bitmap, entry->start_blk, entry->count);
-                                bitmap, entry->start_blk,
-                                entry->count);
                n = rb_next(n);
        }
        return;
@@ -3484,7 +3456,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
 /*
 * the function goes through all preallocation in this group and marks them
 * used in in-core bitmap. buddy must be generated from this bitmap
- * Need to be called with ext4 group lock (ext4_lock_group)
+ * Need to be called with ext4 group lock held
 */
 static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
                                        ext4_group_t group)
@@ -3516,8 +3488,7 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
                if (unlikely(len == 0))
                        continue;
                BUG_ON(groupnr != group);
-                mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
+                mb_set_bits(bitmap, start, len);
-                                                bitmap, start, len);
                preallocated += len;
                count++;
        }
@@ -4121,7 +4092,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode,
 static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 {
        struct super_block *sb = ac->ac_sb;
-        ext4_group_t i;
+        ext4_group_t ngroups, i;
        printk(KERN_ERR "EXT4-fs: Can't allocate:"
                        " Allocation context details:\n");
@@ -4145,7 +4116,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
        printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned,
                ac->ac_found);
        printk(KERN_ERR "EXT4-fs: groups: \n");
-        for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+        ngroups = ext4_get_groups_count(sb);
+        for (i = 0; i < ngroups; i++) {
                struct ext4_group_info *grp = ext4_get_group_info(sb, i);
                struct ext4_prealloc_space *pa;
                ext4_grpblk_t start;
@@ -4469,13 +4441,13 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
 static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
 {
-        ext4_group_t i;
+        ext4_group_t i, ngroups = ext4_get_groups_count(sb);
        int ret;
        int freed = 0;
        trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d",
                   sb->s_id, needed);
-        for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) {
+        for (i = 0; i < ngroups && needed > 0; i++) {
                ret = ext4_mb_discard_group_preallocations(sb, i, needed);
                freed += ret;
                needed -= ret;
@@ -4859,29 +4831,25 @@ do_more:
                new_entry->group  = block_group;
                new_entry->count = count;
                new_entry->t_tid = handle->h_transaction->t_tid;
                ext4_lock_group(sb, block_group);
-                mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
+                mb_clear_bits(bitmap_bh->b_data, bit, count);
-                                bit, count);
                ext4_mb_free_metadata(handle, &e4b, new_entry);
-                ext4_unlock_group(sb, block_group);
        } else {
-                ext4_lock_group(sb, block_group);
                /* need to update group_info->bb_free and bitmap
                 * with group lock held. generate_buddy look at
                 * them with group lock_held
                 */
-                mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
+                ext4_lock_group(sb, block_group);
-                                bit, count);
+                mb_clear_bits(bitmap_bh->b_data, bit, count);
                mb_free_blocks(inode, &e4b, bit, count);
                ext4_mb_return_to_preallocation(inode, &e4b, block, count);
-                ext4_unlock_group(sb, block_group);
        }
-        spin_lock(sb_bgl_lock(sbi, block_group));
        ret = ext4_free_blks_count(sb, gdp) + count;
        ext4_free_blks_set(sb, gdp, ret);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
-        spin_unlock(sb_bgl_lock(sbi, block_group));
+        ext4_unlock_group(sb, block_group);
        percpu_counter_add(&sbi->s_freeblocks_counter, count);
        if (sbi->s_log_groups_per_flex) {
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index dd9e6cd5f6cf..75e34f69215b 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -23,7 +23,6 @@
 #include <linux/mutex.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
-#include "group.h"
 /*
 * with AGGRESSIVE_CHECK allocator runs consistency checks over
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 22098e1cd085..07eb6649e4fa 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -37,7 +37,6 @@
 #include "ext4.h"
 #include "ext4_jbd2.h"
-#include "namei.h"
 #include "xattr.h"
 #include "acl.h"
@@ -750,7 +749,7 @@ static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
                        ext4fs_dirhash(de->name, de->name_len, &h);
                        map_tail--;
                        map_tail->hash = h.hash;
-                        map_tail->offs = (u16) ((char *) de - base);
+                        map_tail->offs = ((char *) de - base)>>2;
                        map_tail->size = le16_to_cpu(de->rec_len);
                        count++;
                        cond_resched();
@@ -1148,7 +1147,8 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
        unsigned rec_len = 0;
        while (count--) {
-                struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs);
+                struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) 
+                                                (from + (map->offs<<2));
                rec_len = EXT4_DIR_REC_LEN(de->name_len);
                memcpy (to, de, rec_len);
                ((struct ext4_dir_entry_2 *) to)->rec_len =
@@ -1997,7 +1997,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
        if (!ext4_handle_valid(handle))
                return 0;
-        lock_super(sb);
+        mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
        if (!list_empty(&EXT4_I(inode)->i_orphan))
                goto out_unlock;
@@ -2006,9 +2006,13 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
        /* @@@ FIXME: Observation from aviro:
         * I think I can trigger J_ASSERT in ext4_orphan_add().  We block
-         * here (on lock_super()), so race with ext4_link() which might bump
+         * here (on s_orphan_lock), so race with ext4_link() which might bump
         * ->i_nlink. For, say it, character device. Not a regular file,
         * not a directory, not a symlink and ->i_nlink > 0.
+         *
+         * tytso, 4/25/2009: I'm not sure how that could happen;
+         * shouldn't the fs core protect us from these sort of
+         * unlink()/link() races?
         */
        J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
                  S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
@@ -2045,7 +2049,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
        jbd_debug(4, "orphan inode %lu will point to %d\n",
                        inode->i_ino, NEXT_ORPHAN(inode));
 out_unlock:
-        unlock_super(sb);
+        mutex_unlock(&EXT4_SB(sb)->s_orphan_lock);
        ext4_std_error(inode->i_sb, err);
        return err;
 }
@@ -2066,11 +2070,9 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
        if (!ext4_handle_valid(handle))
                return 0;
-        lock_super(inode->i_sb);
+        mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
-        if (list_empty(&ei->i_orphan)) {
+        if (list_empty(&ei->i_orphan))
-                unlock_super(inode->i_sb);
+                goto out;
-                return 0;
-        }
        ino_next = NEXT_ORPHAN(inode);
        prev = ei->i_orphan.prev;
@@ -2120,7 +2122,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
 out_err:
        ext4_std_error(inode->i_sb, err);
 out:
-        unlock_super(inode->i_sb);
+        mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
        return err;
 out_brelse:
@@ -2533,6 +2535,7 @@ const struct inode_operations ext4_dir_inode_operations = {
        .removexattr    = generic_removexattr,
 #endif
        .permission     = ext4_permission,
+        .fiemap         = ext4_fiemap,
 };
 const struct inode_operations ext4_special_inode_operations = {
diff --git a/fs/ext4/namei.h b/fs/ext4/namei.h
deleted file mode 100644
index 5e4dfff36a00..000000000000
--- a/fs/ext4/namei.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/*  linux/fs/ext4/namei.h
- *
- * Copyright (C) 2005 Simtec Electronics
- *      Ben Dooks <ben@simtec.co.uk>
- *
-*/
-extern struct dentry *ext4_get_parent(struct dentry *child);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 546c7dd869e1..27eb289eea37 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -15,7 +15,6 @@
 #include <linux/slab.h>
 #include "ext4_jbd2.h"
-#include "group.h"
 #define outside(b, first, last) ((b) < (first) || (b) >= (last))
 #define inside(b, first, last)  ((b) >= (first) && (b) < (last))
@@ -193,7 +192,7 @@ static int setup_new_group_blocks(struct super_block *sb,
        if (IS_ERR(handle))
                return PTR_ERR(handle);
-        lock_super(sb);
+        mutex_lock(&sbi->s_resize_lock);
        if (input->group != sbi->s_groups_count) {
                err = -EBUSY;
                goto exit_journal;
@@ -302,7 +301,7 @@ exit_bh:
        brelse(bh);
 exit_journal:
-        unlock_super(sb);
+        mutex_unlock(&sbi->s_resize_lock);
        if ((err2 = ext4_journal_stop(handle)) && !err)
                err = err2;
@@ -643,11 +642,12 @@ exit_free:
 * important part is that the new block and inode counts are in the backup
 * superblocks, and the location of the new group metadata in the GDT backups.
 *
- * We do not need lock_super() for this, because these blocks are not
+ * We do not need take the s_resize_lock for this, because these
- * otherwise touched by the filesystem code when it is mounted.  We don't
+ * blocks are not otherwise touched by the filesystem code when it is
- * need to worry about last changing from sbi->s_groups_count, because the
+ * mounted.  We don't need to worry about last changing from
- * worst that can happen is that we do not copy the full number of backups
+ * sbi->s_groups_count, because the worst that can happen is that we
- * at this time.  The resize which changed s_groups_count will backup again.
+ * do not copy the full number of backups at this time.  The resize
+ * which changed s_groups_count will backup again.
 */
 static void update_backups(struct super_block *sb,
                           int blk_off, char *data, int size)
@@ -809,7 +809,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
                goto exit_put;
        }
-        lock_super(sb);
+        mutex_lock(&sbi->s_resize_lock);
        if (input->group != sbi->s_groups_count) {
                ext4_warning(sb, __func__,
                             "multiple resizers run on filesystem!");
@@ -840,7 +840,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        /*
         * OK, now we've set up the new group.  Time to make it active.
         *
-         * Current kernels don't lock all allocations via lock_super(),
+         * We do not lock all allocations via s_resize_lock
         * so we have to be safe wrt. concurrent accesses the group
         * data.  So we need to be careful to set all of the relevant
         * group descriptor data etc. *before* we enable the group.
@@ -900,12 +900,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
         *
         * The precise rules we use are:
         *
-         * * Writers of s_groups_count *must* hold lock_super
+         * * Writers of s_groups_count *must* hold s_resize_lock
         * AND
         * * Writers must perform a smp_wmb() after updating all dependent
         *   data and before modifying the groups count
         *
-         * * Readers must hold lock_super() over the access
+         * * Readers must hold s_resize_lock over the access
         * OR
         * * Readers must perform an smp_rmb() after reading the groups count
         *   and before reading any dependent data.
@@ -948,7 +948,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        sb->s_dirt = 1;
 exit_journal:
-        unlock_super(sb);
+        mutex_unlock(&sbi->s_resize_lock);
        if ((err2 = ext4_journal_stop(handle)) && !err)
                err = err2;
        if (!err) {
@@ -986,7 +986,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
        /* We don't need to worry about locking wrt other resizers just
         * yet: we're going to revalidate es->s_blocks_count after
-         * taking lock_super() below. */
+         * taking the s_resize_lock below. */
        o_blocks_count = ext4_blocks_count(es);
        o_groups_count = EXT4_SB(sb)->s_groups_count;
@@ -1056,11 +1056,11 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
                goto exit_put;
        }
-        lock_super(sb);
+        mutex_lock(&EXT4_SB(sb)->s_resize_lock);
        if (o_blocks_count != ext4_blocks_count(es)) {
                ext4_warning(sb, __func__,
                             "multiple resizers run on filesystem!");
-                unlock_super(sb);
+                mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
                ext4_journal_stop(handle);
                err = -EBUSY;
                goto exit_put;
@@ -1070,14 +1070,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
                                                 EXT4_SB(sb)->s_sbh))) {
                ext4_warning(sb, __func__,
                             "error %d on journal write access", err);
-                unlock_super(sb);
+                mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
                ext4_journal_stop(handle);
                goto exit_put;
        }
        ext4_blocks_count_set(es, o_blocks_count + add);
        ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
        sb->s_dirt = 1;
-        unlock_super(sb);
+        mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
        ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
                   o_blocks_count + add);
        /* We add the blocks to the bitmap and set the group need init bit */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2958f4e6f222..012c4251397e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -20,6 +20,7 @@
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/time.h>
+#include <linux/vmalloc.h>
 #include <linux/jbd2.h>
 #include <linux/slab.h>
 #include <linux/init.h>
@@ -45,16 +46,20 @@
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
-#include "namei.h"
-#include "group.h"
+static int default_mb_history_length = 1000;
+module_param_named(default_mb_history_length, default_mb_history_length,
+                   int, 0644);
+MODULE_PARM_DESC(default_mb_history_length,
+                 "Default number of entries saved for mb_history");
 struct proc_dir_entry *ext4_proc_root;
 static struct kset *ext4_kset;
 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
                             unsigned long journal_devnum);
-static int ext4_commit_super(struct super_block *sb,
+static int ext4_commit_super(struct super_block *sb, int sync);
-                              struct ext4_super_block *es, int sync);
 static void ext4_mark_recovery_complete(struct super_block *sb,
                                        struct ext4_super_block *es);
 static void ext4_clear_journal_err(struct super_block *sb,
@@ -74,7 +79,7 @@ ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
 {
        return le32_to_cpu(bg->bg_block_bitmap_lo) |
                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-                (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
+                 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
 }
 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
@@ -82,7 +87,7 @@ ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
 {
        return le32_to_cpu(bg->bg_inode_bitmap_lo) |
                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-                (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
+                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
 }
 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
@@ -90,7 +95,7 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb,
 {
        return le32_to_cpu(bg->bg_inode_table_lo) |
                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-                (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
+                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 }
 __u32 ext4_free_blks_count(struct super_block *sb,
@@ -98,7 +103,7 @@ __u32 ext4_free_blks_count(struct super_block *sb,
 {
        return le16_to_cpu(bg->bg_free_blocks_count_lo) |
                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-                (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
+                 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
 }
 __u32 ext4_free_inodes_count(struct super_block *sb,
@@ -106,7 +111,7 @@ __u32 ext4_free_inodes_count(struct super_block *sb,
 {
        return le16_to_cpu(bg->bg_free_inodes_count_lo) |
                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-                (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
+                 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
 }
 __u32 ext4_used_dirs_count(struct super_block *sb,
@@ -114,7 +119,7 @@ __u32 ext4_used_dirs_count(struct super_block *sb,
 {
        return le16_to_cpu(bg->bg_used_dirs_count_lo) |
                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-                (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
+                 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
 }
 __u32 ext4_itable_unused_count(struct super_block *sb,
@@ -122,7 +127,7 @@ __u32 ext4_itable_unused_count(struct super_block *sb,
 {
        return le16_to_cpu(bg->bg_itable_unused_lo) |
                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-                (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
+                 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
 }
 void ext4_block_bitmap_set(struct super_block *sb,
@@ -202,8 +207,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
        journal = EXT4_SB(sb)->s_journal;
        if (journal) {
                if (is_journal_aborted(journal)) {
-                        ext4_abort(sb, __func__,
+                        ext4_abort(sb, __func__, "Detected aborted journal");
-                                   "Detected aborted journal");
                        return ERR_PTR(-EROFS);
                }
                return jbd2_journal_start(journal, nblocks);
@@ -302,10 +306,10 @@ static void ext4_handle_error(struct super_block *sb)
                        jbd2_journal_abort(journal, -EIO);
        }
        if (test_opt(sb, ERRORS_RO)) {
-                printk(KERN_CRIT "Remounting filesystem read-only\n");
+                ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
                sb->s_flags |= MS_RDONLY;
        }
-        ext4_commit_super(sb, es, 1);
+        ext4_commit_super(sb, 1);
        if (test_opt(sb, ERRORS_PANIC))
                panic("EXT4-fs (device %s): panic forced after error\n",
                        sb->s_id);
@@ -395,8 +399,6 @@ void ext4_abort(struct super_block *sb, const char *function,
 {
        va_list args;
-        printk(KERN_CRIT "ext4_abort called.\n");
        va_start(args, fmt);
        printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
        vprintk(fmt, args);
@@ -409,7 +411,7 @@ void ext4_abort(struct super_block *sb, const char *function,
        if (sb->s_flags & MS_RDONLY)
                return;
-        printk(KERN_CRIT "Remounting filesystem read-only\n");
+        ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
        EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
        sb->s_flags |= MS_RDONLY;
        EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
@@ -417,6 +419,18 @@ void ext4_abort(struct super_block *sb, const char *function,
                jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 }
+void ext4_msg (struct super_block * sb, const char *prefix,
+                   const char *fmt, ...)
+{
+        va_list args;
+        va_start(args, fmt);
+        printk("%sEXT4-fs (%s): ", prefix, sb->s_id);
+        vprintk(fmt, args);
+        printk("\n");
+        va_end(args);
+}
 void ext4_warning(struct super_block *sb, const char *function,
                  const char *fmt, ...)
 {
@@ -431,7 +445,7 @@ void ext4_warning(struct super_block *sb, const char *function,
 }
 void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
-                                const char *function, const char *fmt, ...)
+                           const char *function, const char *fmt, ...)
 __releases(bitlock)
 __acquires(bitlock)
 {
@@ -447,7 +461,7 @@ __acquires(bitlock)
        if (test_opt(sb, ERRORS_CONT)) {
                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-                ext4_commit_super(sb, es, 0);
+                ext4_commit_super(sb, 0);
                return;
        }
        ext4_unlock_group(sb, grp);
@@ -467,7 +481,6 @@ __acquires(bitlock)
        return;
 }
 void ext4_update_dynamic_rev(struct super_block *sb)
 {
        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
@@ -496,7 +509,7 @@ void ext4_update_dynamic_rev(struct super_block *sb)
 /*
 * Open the external journal device
 */
-static struct block_device *ext4_blkdev_get(dev_t dev)
+static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 {
        struct block_device *bdev;
        char b[BDEVNAME_SIZE];
@@ -507,7 +520,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev)
        return bdev;
 fail:
-        printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n",
+        ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
                        __bdevname(dev, b), PTR_ERR(bdev));
        return NULL;
 }
@@ -543,8 +556,8 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 {
        struct list_head *l;
-        printk(KERN_ERR "sb orphan head is %d\n",
+        ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
-               le32_to_cpu(sbi->s_es->s_last_orphan));
+                 le32_to_cpu(sbi->s_es->s_last_orphan));
        printk(KERN_ERR "sb_info orphan list:\n");
        list_for_each(l, &sbi->s_orphan) {
@@ -563,6 +576,12 @@ static void ext4_put_super(struct super_block *sb)
        struct ext4_super_block *es = sbi->s_es;
        int i, err;
+        lock_super(sb);
+        lock_kernel();
+        if (sb->s_dirt)
+                ext4_commit_super(sb, 1);
+        ext4_release_system_zone(sb);
        ext4_mb_release(sb);
        ext4_ext_release(sb);
        ext4_xattr_put_super(sb);
@@ -576,7 +595,7 @@ static void ext4_put_super(struct super_block *sb)
        if (!(sb->s_flags & MS_RDONLY)) {
                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
                es->s_state = cpu_to_le16(sbi->s_mount_state);
-                ext4_commit_super(sb, es, 1);
+                ext4_commit_super(sb, 1);
        }
        if (sbi->s_proc) {
                remove_proc_entry(sb->s_id, ext4_proc_root);
@@ -586,7 +605,10 @@ static void ext4_put_super(struct super_block *sb)
        for (i = 0; i < sbi->s_gdb_count; i++)
                brelse(sbi->s_group_desc[i]);
        kfree(sbi->s_group_desc);
-        kfree(sbi->s_flex_groups);
+        if (is_vmalloc_addr(sbi->s_flex_groups))
+                vfree(sbi->s_flex_groups);
+        else
+                kfree(sbi->s_flex_groups);
        percpu_counter_destroy(&sbi->s_freeblocks_counter);
        percpu_counter_destroy(&sbi->s_freeinodes_counter);
        percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -625,11 +647,8 @@ static void ext4_put_super(struct super_block *sb)
        unlock_super(sb);
        kobject_put(&sbi->s_kobj);
        wait_for_completion(&sbi->s_kobj_unregister);
-        lock_super(sb);
-        lock_kernel();
        kfree(sbi->s_blockgroup_lock);
        kfree(sbi);
-        return;
 }
 static struct kmem_cache *ext4_inode_cachep;
@@ -644,6 +663,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
        ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
        if (!ei)
                return NULL;
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
        ei->i_acl = EXT4_ACL_NOT_CACHED;
        ei->i_default_acl = EXT4_ACL_NOT_CACHED;
@@ -664,14 +684,16 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
        ei->i_allocated_meta_blocks = 0;
        ei->i_delalloc_reserved_flag = 0;
        spin_lock_init(&(ei->i_block_reservation_lock));
        return &ei->vfs_inode;
 }
 static void ext4_destroy_inode(struct inode *inode)
 {
        if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
-                printk("EXT4 Inode %p: orphan list check failed!\n",
+                ext4_msg(inode->i_sb, KERN_ERR,
-                        EXT4_I(inode));
+                         "Inode %lu (%p): orphan list check failed!",
+                         inode->i_ino, EXT4_I(inode));
                print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
                                EXT4_I(inode), sizeof(struct ext4_inode_info),
                                true);
@@ -870,12 +892,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
                seq_puts(seq, ",noauto_da_alloc");
        ext4_show_quota_options(seq, sb);
        return 0;
 }
 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
-                u64 ino, u32 generation)
+                                        u64 ino, u32 generation)
 {
        struct inode *inode;
@@ -904,14 +926,14 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
 }
 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
-                int fh_len, int fh_type)
+                                        int fh_len, int fh_type)
 {
        return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
                                    ext4_nfs_get_inode);
 }
 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
-                int fh_len, int fh_type)
+                                        int fh_len, int fh_type)
 {
        return generic_fh_to_parent(sb, fid, fh_len, fh_type,
                                    ext4_nfs_get_inode);
@@ -923,7 +945,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
 * which would prevent try_to_free_buffers() from freeing them, we must use
 * jbd2 layer's try_to_free_buffers() function to release them.
 */
-static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait)
+static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
+                                 gfp_t wait)
 {
        journal_t *journal = EXT4_SB(sb)->s_journal;
@@ -992,7 +1015,6 @@ static const struct super_operations ext4_sops = {
        .dirty_inode    = ext4_dirty_inode,
        .delete_inode   = ext4_delete_inode,
        .put_super      = ext4_put_super,
-        .write_super    = ext4_write_super,
        .sync_fs        = ext4_sync_fs,
        .freeze_fs      = ext4_freeze,
        .unfreeze_fs    = ext4_unfreeze,
@@ -1007,6 +1029,25 @@ static const struct super_operations ext4_sops = {
        .bdev_try_to_free_page = bdev_try_to_free_page,
 };
+static const struct super_operations ext4_nojournal_sops = {
+        .alloc_inode    = ext4_alloc_inode,
+        .destroy_inode  = ext4_destroy_inode,
+        .write_inode    = ext4_write_inode,
+        .dirty_inode    = ext4_dirty_inode,
+        .delete_inode   = ext4_delete_inode,
+        .write_super    = ext4_write_super,
+        .put_super      = ext4_put_super,
+        .statfs         = ext4_statfs,
+        .remount_fs     = ext4_remount,
+        .clear_inode    = ext4_clear_inode,
+        .show_options   = ext4_show_options,
+#ifdef CONFIG_QUOTA
+        .quota_read     = ext4_quota_read,
+        .quota_write    = ext4_quota_write,
+#endif
+        .bdev_try_to_free_page = bdev_try_to_free_page,
+};
 static const struct export_operations ext4_export_ops = {
        .fh_to_dentry = ext4_fh_to_dentry,
        .fh_to_parent = ext4_fh_to_parent,
@@ -1023,12 +1064,13 @@ enum {
        Opt_journal_update, Opt_journal_dev,
        Opt_journal_checksum, Opt_journal_async_commit,
        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
-        Opt_data_err_abort, Opt_data_err_ignore,
+        Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length,
        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
        Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
        Opt_usrquota, Opt_grpquota, Opt_i_version,
        Opt_stripe, Opt_delalloc, Opt_nodelalloc,
+        Opt_block_validity, Opt_noblock_validity,
        Opt_inode_readahead_blks, Opt_journal_ioprio
 };
@@ -1069,6 +1111,7 @@ static const match_table_t tokens = {
        {Opt_data_writeback, "data=writeback"},
        {Opt_data_err_abort, "data_err=abort"},
        {Opt_data_err_ignore, "data_err=ignore"},
+        {Opt_mb_history_length, "mb_history_length=%u"},
        {Opt_offusrjquota, "usrjquota="},
        {Opt_usrjquota, "usrjquota=%s"},
        {Opt_offgrpjquota, "grpjquota="},
@@ -1087,6 +1130,8 @@ static const match_table_t tokens = {
        {Opt_resize, "resize"},
        {Opt_delalloc, "delalloc"},
        {Opt_nodelalloc, "nodelalloc"},
+        {Opt_block_validity, "block_validity"},
+        {Opt_noblock_validity, "noblock_validity"},
        {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
        {Opt_journal_ioprio, "journal_ioprio=%u"},
        {Opt_auto_da_alloc, "auto_da_alloc=%u"},
@@ -1102,8 +1147,9 @@ static ext4_fsblk_t get_sb_block(void **data)
        if (!options || strncmp(options, "sb=", 3) != 0)
                return 1;       /* Default location */
        options += 3;
-        /*todo: use simple_strtoll with >32bit ext4 */
+        /* TODO: use simple_strtoll with >32bit ext4 */
        sb_block = simple_strtoul(options, &options, 0);
        if (*options && *options != ',') {
                printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
@@ -1113,6 +1159,7 @@ static ext4_fsblk_t get_sb_block(void **data)
        if (*options == ',')
                options++;
        *data = (void *) options;
        return sb_block;
 }
@@ -1206,8 +1253,7 @@ static int parse_options(char *options, struct super_block *sb,
 #else
                case Opt_user_xattr:
                case Opt_nouser_xattr:
-                        printk(KERN_ERR "EXT4 (no)user_xattr options "
+                        ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported");
-                               "not supported\n");
                        break;
 #endif
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
@@ -1220,8 +1266,7 @@ static int parse_options(char *options, struct super_block *sb,
 #else
                case Opt_acl:
                case Opt_noacl:
-                        printk(KERN_ERR "EXT4 (no)acl options "
+                        ext4_msg(sb, KERN_ERR, "(no)acl options not supported");
-                               "not supported\n");
                        break;
 #endif
                case Opt_journal_update:
@@ -1231,16 +1276,16 @@ static int parse_options(char *options, struct super_block *sb,
                           user to specify an existing inode to be the
                           journal file. */
                        if (is_remount) {
-                                printk(KERN_ERR "EXT4-fs: cannot specify "
+                                ext4_msg(sb, KERN_ERR,
-                                       "journal on remount\n");
+                                         "Cannot specify journal on remount");
                                return 0;
                        }
                        set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
                        break;
                case Opt_journal_dev:
                        if (is_remount) {
-                                printk(KERN_ERR "EXT4-fs: cannot specify "
+                                ext4_msg(sb, KERN_ERR,
-                                       "journal on remount\n");
+                                        "Cannot specify journal on remount");
                                return 0;
                        }
                        if (match_int(&args[0], &option))
@@ -1294,9 +1339,8 @@ static int parse_options(char *options, struct super_block *sb,
                        if (is_remount) {
                                if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS)
                                                != data_opt) {
-                                        printk(KERN_ERR
+                                        ext4_msg(sb, KERN_ERR,
-                                                "EXT4-fs: cannot change data "
+                                                "Cannot change data mode on remount");
-                                                "mode on remount\n");
                                        return 0;
                                }
                        } else {
@@ -1310,6 +1354,13 @@ static int parse_options(char *options, struct super_block *sb,
                case Opt_data_err_ignore:
                        clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
                        break;
+                case Opt_mb_history_length:
+                        if (match_int(&args[0], &option))
+                                return 0;
+                        if (option < 0)
+                                return 0;
+                        sbi->s_mb_history_max = option;
+                        break;
 #ifdef CONFIG_QUOTA
                case Opt_usrjquota:
                        qtype = USRQUOTA;
@@ -1319,31 +1370,31 @@ static int parse_options(char *options, struct super_block *sb,
 set_qf_name:
                        if (sb_any_quota_loaded(sb) &&
                            !sbi->s_qf_names[qtype]) {
-                                printk(KERN_ERR
+                                ext4_msg(sb, KERN_ERR,
-                                       "EXT4-fs: Cannot change journaled "
+                                       "Cannot change journaled "
-                                       "quota options when quota turned on.\n");
+                                       "quota options when quota turned on");
                                return 0;
                        }
                        qname = match_strdup(&args[0]);
                        if (!qname) {
-                                printk(KERN_ERR
+                                ext4_msg(sb, KERN_ERR,
-                                        "EXT4-fs: not enough memory for "
+                                        "Not enough memory for "
-                                        "storing quotafile name.\n");
+                                        "storing quotafile name");
                                return 0;
                        }
                        if (sbi->s_qf_names[qtype] &&
                            strcmp(sbi->s_qf_names[qtype], qname)) {
-                                printk(KERN_ERR
+                                ext4_msg(sb, KERN_ERR,
-                                        "EXT4-fs: %s quota file already "
+                                        "%s quota file already "
-                                        "specified.\n", QTYPE2NAME(qtype));
+                                        "specified", QTYPE2NAME(qtype));
                                kfree(qname);
                                return 0;
                        }
                        sbi->s_qf_names[qtype] = qname;
                        if (strchr(sbi->s_qf_names[qtype], '/')) {
-                                printk(KERN_ERR
+                                ext4_msg(sb, KERN_ERR,
-                                        "EXT4-fs: quotafile must be on "
+                                        "quotafile must be on "
-                                        "filesystem root.\n");
+                                        "filesystem root");
                                kfree(sbi->s_qf_names[qtype]);
                                sbi->s_qf_names[qtype] = NULL;
                                return 0;
@@ -1358,9 +1409,9 @@ set_qf_name:
 clear_qf_name:
                        if (sb_any_quota_loaded(sb) &&
                            sbi->s_qf_names[qtype]) {
-                                printk(KERN_ERR "EXT4-fs: Cannot change "
+                                ext4_msg(sb, KERN_ERR, "Cannot change "
                                        "journaled quota options when "
-                                        "quota turned on.\n");
+                                        "quota turned on");
                                return 0;
                        }
                        /*
@@ -1377,9 +1428,9 @@ clear_qf_name:
 set_qf_format:
                        if (sb_any_quota_loaded(sb) &&
                            sbi->s_jquota_fmt != qfmt) {
-                                printk(KERN_ERR "EXT4-fs: Cannot change "
+                                ext4_msg(sb, KERN_ERR, "Cannot change "
                                        "journaled quota options when "
-                                        "quota turned on.\n");
+                                        "quota turned on");
                                return 0;
                        }
                        sbi->s_jquota_fmt = qfmt;
@@ -1395,8 +1446,8 @@ set_qf_format:
                        break;
                case Opt_noquota:
                        if (sb_any_quota_loaded(sb)) {
-                                printk(KERN_ERR "EXT4-fs: Cannot change quota "
+                                ext4_msg(sb, KERN_ERR, "Cannot change quota "
-                                        "options when quota turned on.\n");
+                                        "options when quota turned on");
                                return 0;
                        }
                        clear_opt(sbi->s_mount_opt, QUOTA);
@@ -1407,8 +1458,8 @@ set_qf_format:
                case Opt_quota:
                case Opt_usrquota:
                case Opt_grpquota:
-                        printk(KERN_ERR
+                        ext4_msg(sb, KERN_ERR,
-                                "EXT4-fs: quota options not supported.\n");
+                                "quota options not supported");
                        break;
                case Opt_usrjquota:
                case Opt_grpjquota:
@@ -1416,9 +1467,8 @@ set_qf_format:
                case Opt_offgrpjquota:
                case Opt_jqfmt_vfsold:
                case Opt_jqfmt_vfsv0:
-                        printk(KERN_ERR
+                        ext4_msg(sb, KERN_ERR,
-                                "EXT4-fs: journaled quota options not "
+                                "journaled quota options not supported");
-                                "supported.\n");
                        break;
                case Opt_noquota:
                        break;
@@ -1443,8 +1493,9 @@ set_qf_format:
                        break;
                case Opt_resize:
                        if (!is_remount) {
-                                printk("EXT4-fs: resize option only available "
+                                ext4_msg(sb, KERN_ERR,
-                                        "for remount\n");
+                                        "resize option only available "
+                                        "for remount");
                                return 0;
                        }
                        if (match_int(&args[0], &option) != 0)
@@ -1474,14 +1525,21 @@ set_qf_format:
                case Opt_delalloc:
                        set_opt(sbi->s_mount_opt, DELALLOC);
                        break;
+                case Opt_block_validity:
+                        set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+                        break;
+                case Opt_noblock_validity:
+                        clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
+                        break;
                case Opt_inode_readahead_blks:
                        if (match_int(&args[0], &option))
                                return 0;
                        if (option < 0 || option > (1 << 30))
                                return 0;
-                        if (option & (option - 1)) {
+                        if (!is_power_of_2(option)) {
-                                printk(KERN_ERR "EXT4-fs: inode_readahead_blks"
+                                ext4_msg(sb, KERN_ERR,
-                                       " must be a power of 2\n");
+                                         "EXT4-fs: inode_readahead_blks"
+                                         " must be a power of 2");
                                return 0;
                        }
                        sbi->s_inode_readahead_blks = option;
@@ -1508,9 +1566,9 @@ set_qf_format:
                                set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
                        break;
                default:
-                        printk(KERN_ERR
+                        ext4_msg(sb, KERN_ERR,
-                               "EXT4-fs: Unrecognized mount option \"%s\" "
+                               "Unrecognized mount option \"%s\" "
-                               "or missing value\n", p);
+                               "or missing value", p);
                        return 0;
                }
        }
@@ -1528,21 +1586,21 @@ set_qf_format:
                                (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
                    (sbi->s_qf_names[GRPQUOTA] &&
                                (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
-                        printk(KERN_ERR "EXT4-fs: old and new quota "
+                        ext4_msg(sb, KERN_ERR, "old and new quota "
-                                        "format mixing.\n");
+                                        "format mixing");
                        return 0;
                }
                if (!sbi->s_jquota_fmt) {
-                        printk(KERN_ERR "EXT4-fs: journaled quota format "
+                        ext4_msg(sb, KERN_ERR, "journaled quota format "
-                                        "not specified.\n");
+                                        "not specified");
                        return 0;
                }
        } else {
                if (sbi->s_jquota_fmt) {
-                        printk(KERN_ERR "EXT4-fs: journaled quota format "
+                        ext4_msg(sb, KERN_ERR, "journaled quota format "
                                        "specified with no journaling "
-                                        "enabled.\n");
+                                        "enabled");
                        return 0;
                }
        }
@@ -1557,32 +1615,32 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
        int res = 0;
        if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
-                printk(KERN_ERR "EXT4-fs warning: revision level too high, "
+                ext4_msg(sb, KERN_ERR, "revision level too high, "
-                       "forcing read-only mode\n");
+                         "forcing read-only mode");
                res = MS_RDONLY;
        }
        if (read_only)
                return res;
        if (!(sbi->s_mount_state & EXT4_VALID_FS))
-                printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
+                ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
-                       "running e2fsck is recommended\n");
+                         "running e2fsck is recommended");
        else if ((sbi->s_mount_state & EXT4_ERROR_FS))
-                printk(KERN_WARNING
+                ext4_msg(sb, KERN_WARNING,
-                       "EXT4-fs warning: mounting fs with errors, "
+                         "warning: mounting fs with errors, "
-                       "running e2fsck is recommended\n");
+                         "running e2fsck is recommended");
        else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
                 le16_to_cpu(es->s_mnt_count) >=
                 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
-                printk(KERN_WARNING
+                ext4_msg(sb, KERN_WARNING,
-                       "EXT4-fs warning: maximal mount count reached, "
+                         "warning: maximal mount count reached, "
-                       "running e2fsck is recommended\n");
+                         "running e2fsck is recommended");
        else if (le32_to_cpu(es->s_checkinterval) &&
                (le32_to_cpu(es->s_lastcheck) +
                        le32_to_cpu(es->s_checkinterval) <= get_seconds()))
-                printk(KERN_WARNING
+                ext4_msg(sb, KERN_WARNING,
-                       "EXT4-fs warning: checktime reached, "
+                         "warning: checktime reached, "
-                       "running e2fsck is recommended\n");
+                         "running e2fsck is recommended");
-        if (!sbi->s_journal) 
+        if (!sbi->s_journal)
                es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
        if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
                es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
@@ -1592,7 +1650,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
        if (sbi->s_journal)
                EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
-        ext4_commit_super(sb, es, 1);
+        ext4_commit_super(sb, 1);
        if (test_opt(sb, DEBUG))
                printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
                                "bpg=%lu, ipg=%lu, mo=%04lx]\n",
@@ -1603,11 +1661,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
                        sbi->s_mount_opt);
        if (EXT4_SB(sb)->s_journal) {
-                printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
+                ext4_msg(sb, KERN_INFO, "%s journal on %s",
-                       sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
+                       EXT4_SB(sb)->s_journal->j_inode ? "internal" :
                       "external", EXT4_SB(sb)->s_journal->j_devname);
        } else {
-                printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id);
+                ext4_msg(sb, KERN_INFO, "no journal");
        }
        return res;
 }
@@ -1616,10 +1674,10 @@ static int ext4_fill_flex_info(struct super_block *sb)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct ext4_group_desc *gdp = NULL;
-        struct buffer_head *bh;
        ext4_group_t flex_group_count;
        ext4_group_t flex_group;
        int groups_per_flex = 0;
+        size_t size;
        int i;
        if (!sbi->s_es->s_log_groups_per_flex) {
@@ -1634,16 +1692,21 @@ static int ext4_fill_flex_info(struct super_block *sb)
        flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
                        ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
                              EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
-        sbi->s_flex_groups = kzalloc(flex_group_count *
+        size = flex_group_count * sizeof(struct flex_groups);
-                                     sizeof(struct flex_groups), GFP_KERNEL);
+        sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
+        if (sbi->s_flex_groups == NULL) {
+                sbi->s_flex_groups = vmalloc(size);
+                if (sbi->s_flex_groups)
+                        memset(sbi->s_flex_groups, 0, size);
+        }
        if (sbi->s_flex_groups == NULL) {
-                printk(KERN_ERR "EXT4-fs: not enough memory for "
+                ext4_msg(sb, KERN_ERR, "not enough memory for "
-                                "%u flex groups\n", flex_group_count);
+                                "%u flex groups", flex_group_count);
                goto failed;
        }
        for (i = 0; i < sbi->s_groups_count; i++) {
-                gdp = ext4_get_group_desc(sb, i, &bh);
+                gdp = ext4_get_group_desc(sb, i, NULL);
                flex_group = ext4_flex_group(sbi, i);
                atomic_set(&sbi->s_flex_groups[flex_group].free_inodes,
@@ -1724,44 +1787,44 @@ static int ext4_check_descriptors(struct super_block *sb)
                block_bitmap = ext4_block_bitmap(sb, gdp);
                if (block_bitmap < first_block || block_bitmap > last_block) {
-                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
+                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
                               "Block bitmap for group %u not in group "
-                               "(block %llu)!\n", i, block_bitmap);
+                               "(block %llu)!", i, block_bitmap);
                        return 0;
                }
                inode_bitmap = ext4_inode_bitmap(sb, gdp);
                if (inode_bitmap < first_block || inode_bitmap > last_block) {
-                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
+                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
                               "Inode bitmap for group %u not in group "
-                               "(block %llu)!\n", i, inode_bitmap);
+                               "(block %llu)!", i, inode_bitmap);
                        return 0;
                }
                inode_table = ext4_inode_table(sb, gdp);
                if (inode_table < first_block ||
                    inode_table + sbi->s_itb_per_group - 1 > last_block) {
-                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
+                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
                               "Inode table for group %u not in group "
-                               "(block %llu)!\n", i, inode_table);
+                               "(block %llu)!", i, inode_table);
                        return 0;
                }
-                spin_lock(sb_bgl_lock(sbi, i));
+                ext4_lock_group(sb, i);
                if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
-                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
+                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
-                               "Checksum for group %u failed (%u!=%u)\n",
+                                 "Checksum for group %u failed (%u!=%u)",
-                               i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
+                                 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
-                               gdp)), le16_to_cpu(gdp->bg_checksum));
+                                     gdp)), le16_to_cpu(gdp->bg_checksum));
                        if (!(sb->s_flags & MS_RDONLY)) {
-                                spin_unlock(sb_bgl_lock(sbi, i));
+                                ext4_unlock_group(sb, i);
                                return 0;
                        }
                }
-                spin_unlock(sb_bgl_lock(sbi, i));
+                ext4_unlock_group(sb, i);
                if (!flexbg_flag)
                        first_block += EXT4_BLOCKS_PER_GROUP(sb);
        }
        ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
-        sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
+        sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
        return 1;
 }
@@ -1796,8 +1859,8 @@ static void ext4_orphan_cleanup(struct super_block *sb,
        }
        if (bdev_read_only(sb->s_bdev)) {
-                printk(KERN_ERR "EXT4-fs: write access "
+                ext4_msg(sb, KERN_ERR, "write access "
-                        "unavailable, skipping orphan cleanup.\n");
+                        "unavailable, skipping orphan cleanup");
                return;
        }
@@ -1811,8 +1874,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
        }
        if (s_flags & MS_RDONLY) {
-                printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n",
+                ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
-                       sb->s_id);
                sb->s_flags &= ~MS_RDONLY;
        }
 #ifdef CONFIG_QUOTA
@@ -1823,9 +1885,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,
                if (EXT4_SB(sb)->s_qf_names[i]) {
                        int ret = ext4_quota_on_mount(sb, i);
                        if (ret < 0)
-                                printk(KERN_ERR
+                                ext4_msg(sb, KERN_ERR,
-                                        "EXT4-fs: Cannot turn on journaled "
+                                        "Cannot turn on journaled "
-                                        "quota: error %d\n", ret);
+                                        "quota: error %d", ret);
                }
        }
 #endif
@@ -1842,16 +1904,16 @@ static void ext4_orphan_cleanup(struct super_block *sb,
                list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
                vfs_dq_init(inode);
                if (inode->i_nlink) {
-                        printk(KERN_DEBUG
+                        ext4_msg(sb, KERN_DEBUG,
-                                "%s: truncating inode %lu to %lld bytes\n",
+                                "%s: truncating inode %lu to %lld bytes",
                                __func__, inode->i_ino, inode->i_size);
                        jbd_debug(2, "truncating inode %lu to %lld bytes\n",
                                  inode->i_ino, inode->i_size);
                        ext4_truncate(inode);
                        nr_truncates++;
                } else {
-                        printk(KERN_DEBUG
+                        ext4_msg(sb, KERN_DEBUG,
-                                "%s: deleting unreferenced inode %lu\n",
+                                "%s: deleting unreferenced inode %lu",
                                __func__, inode->i_ino);
                        jbd_debug(2, "deleting unreferenced inode %lu\n",
                                  inode->i_ino);
@@ -1863,11 +1925,11 @@ static void ext4_orphan_cleanup(struct super_block *sb,
 #define PLURAL(x) (x), ((x) == 1) ? "" : "s"
        if (nr_orphans)
-                printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
+                ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
-                       sb->s_id, PLURAL(nr_orphans));
+                       PLURAL(nr_orphans));
        if (nr_truncates)
-                printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n",
+                ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
-                       sb->s_id, PLURAL(nr_truncates));
+                       PLURAL(nr_truncates));
 #ifdef CONFIG_QUOTA
        /* Turn quotas off */
        for (i = 0; i < MAXQUOTAS; i++) {
@@ -1877,6 +1939,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
 #endif
        sb->s_flags = s_flags; /* Restore MS_RDONLY status */
 }
 /*
 * Maximal extent format file size.
 * Resulting logical blkno at s_maxbytes must fit in our on-disk
@@ -1927,19 +1990,19 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
        loff_t res = EXT4_NDIR_BLOCKS;
        int meta_blocks;
        loff_t upper_limit;
-        /* This is calculated to be the largest file size for a
+        /* This is calculated to be the largest file size for a dense, block
-         * dense, bitmapped file such that the total number of
+         * mapped file such that the file's total number of 512-byte sectors,
-         * sectors in the file, including data and all indirect blocks,
+         * including data and all indirect blocks, does not exceed (2^48 - 1).
-         * does not exceed 2^48 -1
+         *
-         * __u32 i_blocks_lo and _u16 i_blocks_high representing the
+         * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
-         * total number of  512 bytes blocks of the file
+         * number of 512-byte sectors of the file.
         */
        if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
                /*
-                 * !has_huge_files or CONFIG_LBD is not enabled
+                 * !has_huge_files or CONFIG_LBD not enabled implies that
-                 * implies the inode i_block represent total blocks in
+                 * the inode i_block field represents total file blocks in
-                 * 512 bytes 32 == size of vfs inode i_blocks * 8
+                 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
                 */
                upper_limit = (1LL << 32) - 1;
@@ -1981,7 +2044,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
 }
 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
-                                ext4_fsblk_t logical_sb_block, int nr)
+                                   ext4_fsblk_t logical_sb_block, int nr)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        ext4_group_t bg, first_meta_bg;
@@ -1995,6 +2058,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,
        bg = sbi->s_desc_per_block * nr;
        if (ext4_bg_has_super(sb, bg))
                has_super = 1;
        return (has_super + ext4_group_first_block_no(sb, bg));
 }
@@ -2091,8 +2155,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
        if (parse_strtoul(buf, 0x40000000, &t))
                return -EINVAL;
-        /* inode_readahead_blks must be a power of 2 */
+        if (!is_power_of_2(t))
-        if (t & (t-1))
                return -EINVAL;
        sbi->s_inode_readahead_blks = t;
@@ -2100,7 +2163,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
 }
 static ssize_t sbi_ui_show(struct ext4_attr *a,
-                                struct ext4_sb_info *sbi, char *buf)
+                           struct ext4_sb_info *sbi, char *buf)
 {
        unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
@@ -2205,7 +2268,6 @@ static struct kobj_type ext4_ktype = {
 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                                __releases(kernel_lock)
                                __acquires(kernel_lock)
 {
        struct buffer_head *bh;
        struct ext4_super_block *es = NULL;
@@ -2256,7 +2318,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
        if (!blocksize) {
-                printk(KERN_ERR "EXT4-fs: unable to set blocksize\n");
+                ext4_msg(sb, KERN_ERR, "unable to set blocksize");
                goto out_fail;
        }
@@ -2272,7 +2334,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        }
        if (!(bh = sb_bread(sb, logical_sb_block))) {
-                printk(KERN_ERR "EXT4-fs: unable to read superblock\n");
+                ext4_msg(sb, KERN_ERR, "unable to read superblock");
                goto out_fail;
        }
        /*
@@ -2321,6 +2383,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
        sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
        sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
+        sbi->s_mb_history_max = default_mb_history_length;
        set_opt(sbi->s_mount_opt, BARRIER);
@@ -2330,7 +2393,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
         */
        set_opt(sbi->s_mount_opt, DELALLOC);
        if (!parse_options((char *) data, sb, &journal_devnum,
                           &journal_ioprio, NULL, 0))
                goto failed_mount;
@@ -2342,9 +2404,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
            (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
             EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
             EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
-                printk(KERN_WARNING
+                ext4_msg(sb, KERN_WARNING,
-                       "EXT4-fs warning: feature flags set on rev 0 fs, "
+                       "feature flags set on rev 0 fs, "
-                       "running e2fsck is recommended\n");
+                       "running e2fsck is recommended");
        /*
         * Check feature flags regardless of the revision level, since we
@@ -2353,16 +2415,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
         */
        features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
        if (features) {
-                printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of "
+                ext4_msg(sb, KERN_ERR,
-                       "unsupported optional features (%x).\n", sb->s_id,
+                        "Couldn't mount because of "
+                        "unsupported optional features (%x)",
                        (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
                        ~EXT4_FEATURE_INCOMPAT_SUPP));
                goto failed_mount;
        }
        features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
        if (!(sb->s_flags & MS_RDONLY) && features) {
-                printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of "
+                ext4_msg(sb, KERN_ERR,
-                       "unsupported optional features (%x).\n", sb->s_id,
+                        "Couldn't mount RDWR because of "
+                        "unsupported optional features (%x)",
                        (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
                        ~EXT4_FEATURE_RO_COMPAT_SUPP));
                goto failed_mount;
@@ -2376,9 +2440,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                 */
                if (sizeof(root->i_blocks) < sizeof(u64) &&
                                !(sb->s_flags & MS_RDONLY)) {
-                        printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
+                        ext4_msg(sb, KERN_ERR, "Filesystem with huge "
                                        "files cannot be mounted read-write "
-                                        "without CONFIG_LBD.\n", sb->s_id);
+                                        "without CONFIG_LBD");
                        goto failed_mount;
                }
        }
@@ -2386,17 +2450,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        if (blocksize < EXT4_MIN_BLOCK_SIZE ||
            blocksize > EXT4_MAX_BLOCK_SIZE) {
-                printk(KERN_ERR
+                ext4_msg(sb, KERN_ERR,
-                       "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n",
+                       "Unsupported filesystem blocksize %d", blocksize);
-                       blocksize, sb->s_id);
                goto failed_mount;
        }
        if (sb->s_blocksize != blocksize) {
                /* Validate the filesystem blocksize */
                if (!sb_set_blocksize(sb, blocksize)) {
-                        printk(KERN_ERR "EXT4-fs: bad block size %d.\n",
+                        ext4_msg(sb, KERN_ERR, "bad block size %d",
                                        blocksize);
                        goto failed_mount;
                }
@@ -2406,15 +2468,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                offset = do_div(logical_sb_block, blocksize);
                bh = sb_bread(sb, logical_sb_block);
                if (!bh) {
-                        printk(KERN_ERR
+                        ext4_msg(sb, KERN_ERR,
-                               "EXT4-fs: Can't read superblock on 2nd try.\n");
+                               "Can't read superblock on 2nd try");
                        goto failed_mount;
                }
                es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
                sbi->s_es = es;
                if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
-                        printk(KERN_ERR
+                        ext4_msg(sb, KERN_ERR,
-                               "EXT4-fs: Magic mismatch, very weird !\n");
+                               "Magic mismatch, very weird!");
                        goto failed_mount;
                }
        }
@@ -2432,30 +2494,33 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
                    (!is_power_of_2(sbi->s_inode_size)) ||
                    (sbi->s_inode_size > blocksize)) {
-                        printk(KERN_ERR
+                        ext4_msg(sb, KERN_ERR,
-                               "EXT4-fs: unsupported inode size: %d\n",
+                               "unsupported inode size: %d",
                               sbi->s_inode_size);
                        goto failed_mount;
                }
                if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
                        sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
        }
        sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
                if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
                    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
                    !is_power_of_2(sbi->s_desc_size)) {
-                        printk(KERN_ERR
+                        ext4_msg(sb, KERN_ERR,
-                               "EXT4-fs: unsupported descriptor size %lu\n",
+                               "unsupported descriptor size %lu",
                               sbi->s_desc_size);
                        goto failed_mount;
                }
        } else
                sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
        sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
        sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
        if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
                goto cantfind_ext4;
        sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
        if (sbi->s_inodes_per_block == 0)
                goto cantfind_ext4;
@@ -2466,6 +2531,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        sbi->s_mount_state = le16_to_cpu(es->s_state);
        sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
        sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
        for (i = 0; i < 4; i++)
                sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
        sbi->s_def_hash_version = es->s_def_hash_version;
@@ -2483,25 +2549,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        }
        if (sbi->s_blocks_per_group > blocksize * 8) {
-                printk(KERN_ERR
+                ext4_msg(sb, KERN_ERR,
-                       "EXT4-fs: #blocks per group too big: %lu\n",
+                       "#blocks per group too big: %lu",
                       sbi->s_blocks_per_group);
                goto failed_mount;
        }
        if (sbi->s_inodes_per_group > blocksize * 8) {
-                printk(KERN_ERR
+                ext4_msg(sb, KERN_ERR,
-                       "EXT4-fs: #inodes per group too big: %lu\n",
+                       "#inodes per group too big: %lu",
                       sbi->s_inodes_per_group);
                goto failed_mount;
        }
        if (ext4_blocks_count(es) >
                    (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
-                printk(KERN_ERR "EXT4-fs: filesystem on %s:"
+                ext4_msg(sb, KERN_ERR, "filesystem"
-                        " too large to mount safely\n", sb->s_id);
+                        " too large to mount safely");
                if (sizeof(sector_t) < 8)
-                        printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not "
+                        ext4_msg(sb, KERN_WARNING, "CONFIG_LBD not enabled");
-                                        "enabled\n");
                goto failed_mount;
        }
@@ -2511,21 +2576,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        /* check blocks count against device size */
        blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
        if (blocks_count && ext4_blocks_count(es) > blocks_count) {
-                printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu "
+                ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
-                       "exceeds size of device (%llu blocks)\n",
+                       "exceeds size of device (%llu blocks)",
                       ext4_blocks_count(es), blocks_count);
                goto failed_mount;
        }
-        /*
+        /*
-         * It makes no sense for the first data block to be beyond the end
+         * It makes no sense for the first data block to be beyond the end
-         * of the filesystem.
+         * of the filesystem.
-         */
+         */
-        if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
+        if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
-                printk(KERN_WARNING "EXT4-fs: bad geometry: first data"
+                ext4_msg(sb, KERN_WARNING, "bad geometry: first data"
-                       "block %u is beyond end of filesystem (%llu)\n",
+                         "block %u is beyond end of filesystem (%llu)",
-                       le32_to_cpu(es->s_first_data_block),
+                         le32_to_cpu(es->s_first_data_block),
-                       ext4_blocks_count(es));
+                         ext4_blocks_count(es));
                goto failed_mount;
        }
        blocks_count = (ext4_blocks_count(es) -
@@ -2533,9 +2598,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                        EXT4_BLOCKS_PER_GROUP(sb) - 1);
        do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
        if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
-                printk(KERN_WARNING "EXT4-fs: groups count too large: %u "
+                ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
                       "(block count %llu, first data block %u, "
-                       "blocks per group %lu)\n", sbi->s_groups_count,
+                       "blocks per group %lu)", sbi->s_groups_count,
                       ext4_blocks_count(es),
                       le32_to_cpu(es->s_first_data_block),
                       EXT4_BLOCKS_PER_GROUP(sb));
@@ -2547,7 +2612,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
                                    GFP_KERNEL);
        if (sbi->s_group_desc == NULL) {
-                printk(KERN_ERR "EXT4-fs: not enough memory\n");
+                ext4_msg(sb, KERN_ERR, "not enough memory");
                goto failed_mount;
        }
@@ -2562,21 +2627,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                block = descriptor_loc(sb, logical_sb_block, i);
                sbi->s_group_desc[i] = sb_bread(sb, block);
                if (!sbi->s_group_desc[i]) {
-                        printk(KERN_ERR "EXT4-fs: "
+                        ext4_msg(sb, KERN_ERR,
-                               "can't read group descriptor %d\n", i);
+                               "can't read group descriptor %d", i);
                        db_count = i;
                        goto failed_mount2;
                }
        }
        if (!ext4_check_descriptors(sb)) {
-                printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
+                ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
                goto failed_mount2;
        }
        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
                if (!ext4_fill_flex_info(sb)) {
-                        printk(KERN_ERR
+                        ext4_msg(sb, KERN_ERR,
-                               "EXT4-fs: unable to initialize "
+                               "unable to initialize "
-                               "flex_bg meta info!\n");
+                               "flex_bg meta info!");
                        goto failed_mount2;
                }
@@ -2598,7 +2663,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
        }
        if (err) {
-                printk(KERN_ERR "EXT4-fs: insufficient memory\n");
+                ext4_msg(sb, KERN_ERR, "insufficient memory");
                goto failed_mount3;
        }
@@ -2607,7 +2672,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        /*
         * set up enough so that it can read an inode
         */
-        sb->s_op = &ext4_sops;
+        if (!test_opt(sb, NOLOAD) &&
+            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
+                sb->s_op = &ext4_sops;
+        else
+                sb->s_op = &ext4_nojournal_sops;
        sb->s_export_op = &ext4_export_ops;
        sb->s_xattr = ext4_xattr_handlers;
 #ifdef CONFIG_QUOTA
@@ -2615,6 +2684,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        sb->dq_op = &ext4_quota_operations;
 #endif
        INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
+        mutex_init(&sbi->s_orphan_lock);
+        mutex_init(&sbi->s_resize_lock);
        sb->s_root = NULL;
@@ -2632,13 +2703,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                        goto failed_mount3;
                if (!(sb->s_flags & MS_RDONLY) &&
                    EXT4_SB(sb)->s_journal->j_failed_commit) {
-                        printk(KERN_CRIT "EXT4-fs error (device %s): "
+                        ext4_msg(sb, KERN_CRIT, "error: "
                               "ext4_fill_super: Journal transaction "
-                               "%u is corrupt\n", sb->s_id,
+                               "%u is corrupt",
                               EXT4_SB(sb)->s_journal->j_failed_commit);
                        if (test_opt(sb, ERRORS_RO)) {
-                                printk(KERN_CRIT
+                                ext4_msg(sb, KERN_CRIT,
-                                       "Mounting filesystem read-only\n");
+                                       "Mounting filesystem read-only");
                                sb->s_flags |= MS_RDONLY;
                                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
                                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
@@ -2646,14 +2717,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                        if (test_opt(sb, ERRORS_PANIC)) {
                                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
                                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-                                ext4_commit_super(sb, es, 1);
+                                ext4_commit_super(sb, 1);
                                goto failed_mount4;
                        }
                }
        } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
              EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
-                printk(KERN_ERR "EXT4-fs: required journal recovery "
+                ext4_msg(sb, KERN_ERR, "required journal recovery "
-                       "suppressed and not mounted read-only\n");
+                       "suppressed and not mounted read-only");
                goto failed_mount4;
        } else {
                clear_opt(sbi->s_mount_opt, DATA_FLAGS);
@@ -2666,7 +2737,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        if (ext4_blocks_count(es) > 0xffffffffULL &&
            !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
                                       JBD2_FEATURE_INCOMPAT_64BIT)) {
-                printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n");
+                ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
                goto failed_mount4;
        }
@@ -2704,8 +2775,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        case EXT4_MOUNT_WRITEBACK_DATA:
                if (!jbd2_journal_check_available_features
                    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
-                        printk(KERN_ERR "EXT4-fs: Journal does not support "
+                        ext4_msg(sb, KERN_ERR, "Journal does not support "
-                               "requested data journaling mode\n");
+                               "requested data journaling mode");
                        goto failed_mount4;
                }
        default:
@@ -2717,8 +2788,8 @@ no_journal:
        if (test_opt(sb, NOBH)) {
                if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
-                        printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - "
+                        ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
-                                "its supported only with writeback mode\n");
+                                "its supported only with writeback mode");
                        clear_opt(sbi->s_mount_opt, NOBH);
                }
        }
@@ -2729,18 +2800,18 @@ no_journal:
        root = ext4_iget(sb, EXT4_ROOT_INO);
        if (IS_ERR(root)) {
-                printk(KERN_ERR "EXT4-fs: get root inode failed\n");
+                ext4_msg(sb, KERN_ERR, "get root inode failed");
                ret = PTR_ERR(root);
                goto failed_mount4;
        }
        if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
                iput(root);
-                printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n");
+                ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
                goto failed_mount4;
        }
        sb->s_root = d_alloc_root(root);
        if (!sb->s_root) {
-                printk(KERN_ERR "EXT4-fs: get root dentry failed\n");
+                ext4_msg(sb, KERN_ERR, "get root dentry failed");
                iput(root);
                ret = -ENOMEM;
                goto failed_mount4;
@@ -2769,22 +2840,29 @@ no_journal:
                                                        sbi->s_inode_size) {
                sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
                                                       EXT4_GOOD_OLD_INODE_SIZE;
-                printk(KERN_INFO "EXT4-fs: required extra inode space not"
+                ext4_msg(sb, KERN_INFO, "required extra inode space not"
-                        "available.\n");
+                         "available");
        }
        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
-                printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
+                ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
-                                "requested data journaling mode\n");
+                         "requested data journaling mode");
                clear_opt(sbi->s_mount_opt, DELALLOC);
        } else if (test_opt(sb, DELALLOC))
-                printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
+                ext4_msg(sb, KERN_INFO, "delayed allocation enabled");
+        err = ext4_setup_system_zone(sb);
+        if (err) {
+                ext4_msg(sb, KERN_ERR, "failed to initialize system "
+                         "zone (%d)\n", err);
+                goto failed_mount4;
+        }
        ext4_ext_init(sb);
        err = ext4_mb_init(sb, needs_recovery);
        if (err) {
-                printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
+                ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)",
-                       err);
+                         err);
                goto failed_mount4;
        }
@@ -2798,19 +2876,11 @@ no_journal:
                goto failed_mount4;
        };
-        /*
-         * akpm: core read_super() calls in here with the superblock locked.
-         * That deadlocks, because orphan cleanup needs to lock the superblock
-         * in numerous places.  Here we just pop the lock - it's relatively
-         * harmless, because we are now ready to accept write_super() requests,
-         * and aviro says that's the only reason for hanging onto the
-         * superblock lock.
-         */
        EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
        ext4_orphan_cleanup(sb, es);
        EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
        if (needs_recovery) {
-                printk(KERN_INFO "EXT4-fs: recovery complete.\n");
+                ext4_msg(sb, KERN_INFO, "recovery complete");
                ext4_mark_recovery_complete(sb, es);
        }
        if (EXT4_SB(sb)->s_journal) {
@@ -2823,25 +2893,30 @@ no_journal:
        } else
                descr = "out journal";
-        printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n",
+        ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr);
-               sb->s_id, descr);
        lock_kernel();
        return 0;
 cantfind_ext4:
        if (!silent)
-                printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n",
+                ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
-                       sb->s_id);
        goto failed_mount;
 failed_mount4:
-        printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id);
+        ext4_msg(sb, KERN_ERR, "mount failed");
+        ext4_release_system_zone(sb);
        if (sbi->s_journal) {
                jbd2_journal_destroy(sbi->s_journal);
                sbi->s_journal = NULL;
        }
 failed_mount3:
+        if (sbi->s_flex_groups) {
+                if (is_vmalloc_addr(sbi->s_flex_groups))
+                        vfree(sbi->s_flex_groups);
+                else
+                        kfree(sbi->s_flex_groups);
+        }
        percpu_counter_destroy(&sbi->s_freeblocks_counter);
        percpu_counter_destroy(&sbi->s_freeinodes_counter);
        percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -2862,6 +2937,7 @@ failed_mount:
        brelse(bh);
 out_fail:
        sb->s_fs_info = NULL;
+        kfree(sbi->s_blockgroup_lock);
        kfree(sbi);
        lock_kernel();
        return ret;
@@ -2906,27 +2982,27 @@ static journal_t *ext4_get_journal(struct super_block *sb,
        journal_inode = ext4_iget(sb, journal_inum);
        if (IS_ERR(journal_inode)) {
-                printk(KERN_ERR "EXT4-fs: no journal found.\n");
+                ext4_msg(sb, KERN_ERR, "no journal found");
                return NULL;
        }
        if (!journal_inode->i_nlink) {
                make_bad_inode(journal_inode);
                iput(journal_inode);
-                printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n");
+                ext4_msg(sb, KERN_ERR, "journal inode is deleted");
                return NULL;
        }
        jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
                  journal_inode, journal_inode->i_size);
        if (!S_ISREG(journal_inode->i_mode)) {
-                printk(KERN_ERR "EXT4-fs: invalid journal inode.\n");
+                ext4_msg(sb, KERN_ERR, "invalid journal inode");
                iput(journal_inode);
                return NULL;
        }
        journal = jbd2_journal_init_inode(journal_inode);
        if (!journal) {
-                printk(KERN_ERR "EXT4-fs: Could not load journal inode\n");
+                ext4_msg(sb, KERN_ERR, "Could not load journal inode");
                iput(journal_inode);
                return NULL;
        }
@@ -2950,22 +3026,22 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
-        bdev = ext4_blkdev_get(j_dev);
+        bdev = ext4_blkdev_get(j_dev, sb);
        if (bdev == NULL)
                return NULL;
        if (bd_claim(bdev, sb)) {
-                printk(KERN_ERR
+                ext4_msg(sb, KERN_ERR,
-                        "EXT4-fs: failed to claim external journal device.\n");
+                        "failed to claim external journal device");
                blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
                return NULL;
        }
        blocksize = sb->s_blocksize;
-        hblock = bdev_hardsect_size(bdev);
+        hblock = bdev_logical_block_size(bdev);
        if (blocksize < hblock) {
-                printk(KERN_ERR
+                ext4_msg(sb, KERN_ERR,
-                        "EXT4-fs: blocksize too small for journal device.\n");
+                        "blocksize too small for journal device");
                goto out_bdev;
        }
@@ -2973,8 +3049,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
        offset = EXT4_MIN_BLOCK_SIZE % blocksize;
        set_blocksize(bdev, blocksize);
        if (!(bh = __bread(bdev, sb_block, blocksize))) {
-                printk(KERN_ERR "EXT4-fs: couldn't read superblock of "
+                ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
-                       "external journal\n");
+                       "external journal");
                goto out_bdev;
        }
@@ -2982,14 +3058,14 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
        if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
            !(le32_to_cpu(es->s_feature_incompat) &
              EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
-                printk(KERN_ERR "EXT4-fs: external journal has "
+                ext4_msg(sb, KERN_ERR, "external journal has "
-                                        "bad superblock\n");
+                                        "bad superblock");
                brelse(bh);
                goto out_bdev;
        }
        if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
-                printk(KERN_ERR "EXT4-fs: journal UUID does not match\n");
+                ext4_msg(sb, KERN_ERR, "journal UUID does not match");
                brelse(bh);
                goto out_bdev;
        }
@@ -3001,25 +3077,26 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
        journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
                                        start, len, blocksize);
        if (!journal) {
-                printk(KERN_ERR "EXT4-fs: failed to create device journal\n");
+                ext4_msg(sb, KERN_ERR, "failed to create device journal");
                goto out_bdev;
        }
        journal->j_private = sb;
        ll_rw_block(READ, 1, &journal->j_sb_buffer);
        wait_on_buffer(journal->j_sb_buffer);
        if (!buffer_uptodate(journal->j_sb_buffer)) {
-                printk(KERN_ERR "EXT4-fs: I/O error on journal device\n");
+                ext4_msg(sb, KERN_ERR, "I/O error on journal device");
                goto out_journal;
        }
        if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
-                printk(KERN_ERR "EXT4-fs: External journal has more than one "
+                ext4_msg(sb, KERN_ERR, "External journal has more than one "
-                                        "user (unsupported) - %d\n",
+                                        "user (unsupported) - %d",
                        be32_to_cpu(journal->j_superblock->s_nr_users));
                goto out_journal;
        }
        EXT4_SB(sb)->journal_bdev = bdev;
        ext4_init_journal_params(sb, journal);
        return journal;
 out_journal:
        jbd2_journal_destroy(journal);
 out_bdev:
@@ -3041,8 +3118,8 @@ static int ext4_load_journal(struct super_block *sb,
        if (journal_devnum &&
            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
-                printk(KERN_INFO "EXT4-fs: external journal device major/minor "
+                ext4_msg(sb, KERN_INFO, "external journal device major/minor "
-                        "numbers have changed\n");
+                        "numbers have changed");
                journal_dev = new_decode_dev(journal_devnum);
        } else
                journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
@@ -3054,24 +3131,23 @@ static int ext4_load_journal(struct super_block *sb,
         * crash?  For recovery, we need to check in advance whether we
         * can get read-write access to the device.
         */
        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
                if (sb->s_flags & MS_RDONLY) {
-                        printk(KERN_INFO "EXT4-fs: INFO: recovery "
+                        ext4_msg(sb, KERN_INFO, "INFO: recovery "
-                                        "required on readonly filesystem.\n");
+                                        "required on readonly filesystem");
                        if (really_read_only) {
-                                printk(KERN_ERR "EXT4-fs: write access "
+                                ext4_msg(sb, KERN_ERR, "write access "
-                                        "unavailable, cannot proceed.\n");
+                                        "unavailable, cannot proceed");
                                return -EROFS;
                        }
-                        printk(KERN_INFO "EXT4-fs: write access will "
+                        ext4_msg(sb, KERN_INFO, "write access will "
-                               "be enabled during recovery.\n");
+                               "be enabled during recovery");
                }
        }
        if (journal_inum && journal_dev) {
-                printk(KERN_ERR "EXT4-fs: filesystem has both journal "
+                ext4_msg(sb, KERN_ERR, "filesystem has both journal "
-                       "and inode journals!\n");
+                       "and inode journals!");
                return -EINVAL;
        }
@@ -3084,14 +3160,14 @@ static int ext4_load_journal(struct super_block *sb,
        }
        if (journal->j_flags & JBD2_BARRIER)
-                printk(KERN_INFO "EXT4-fs: barriers enabled\n");
+                ext4_msg(sb, KERN_INFO, "barriers enabled");
        else
-                printk(KERN_INFO "EXT4-fs: barriers disabled\n");
+                ext4_msg(sb, KERN_INFO, "barriers disabled");
        if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
                err = jbd2_journal_update_format(journal);
                if (err)  {
-                        printk(KERN_ERR "EXT4-fs: error updating journal.\n");
+                        ext4_msg(sb, KERN_ERR, "error updating journal");
                        jbd2_journal_destroy(journal);
                        return err;
                }
@@ -3103,7 +3179,7 @@ static int ext4_load_journal(struct super_block *sb,
                err = jbd2_journal_load(journal);
        if (err) {
-                printk(KERN_ERR "EXT4-fs: error loading journal.\n");
+                ext4_msg(sb, KERN_ERR, "error loading journal");
                jbd2_journal_destroy(journal);
                return err;
        }
@@ -3114,18 +3190,17 @@ static int ext4_load_journal(struct super_block *sb,
        if (journal_devnum &&
            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
                es->s_journal_dev = cpu_to_le32(journal_devnum);
-                sb->s_dirt = 1;
                /* Make sure we flush the recovery flag to disk. */
-                ext4_commit_super(sb, es, 1);
+                ext4_commit_super(sb, 1);
        }
        return 0;
 }
-static int ext4_commit_super(struct super_block *sb,
+static int ext4_commit_super(struct super_block *sb, int sync)
-                              struct ext4_super_block *es, int sync)
 {
+        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
        struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
        int error = 0;
@@ -3140,8 +3215,8 @@ static int ext4_commit_super(struct super_block *sb,
                 * be remapped.  Nothing we can do but to retry the
                 * write and hope for the best.
                 */
-                printk(KERN_ERR "EXT4-fs: previous I/O error to "
+                ext4_msg(sb, KERN_ERR, "previous I/O error to "
-                       "superblock detected for %s.\n", sb->s_id);
+                       "superblock detected");
                clear_buffer_write_io_error(sbh);
                set_buffer_uptodate(sbh);
        }
@@ -3154,7 +3229,7 @@ static int ext4_commit_super(struct super_block *sb,
                                        &EXT4_SB(sb)->s_freeblocks_counter));
        es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
                                        &EXT4_SB(sb)->s_freeinodes_counter));
+        sb->s_dirt = 0;
        BUFFER_TRACE(sbh, "marking dirty");
        mark_buffer_dirty(sbh);
        if (sync) {
@@ -3164,8 +3239,8 @@ static int ext4_commit_super(struct super_block *sb,
                error = buffer_write_io_error(sbh);
                if (error) {
-                        printk(KERN_ERR "EXT4-fs: I/O error while writing "
+                        ext4_msg(sb, KERN_ERR, "I/O error while writing "
-                               "superblock for %s.\n", sb->s_id);
+                               "superblock");
                        clear_buffer_write_io_error(sbh);
                        set_buffer_uptodate(sbh);
                }
@@ -3173,7 +3248,6 @@ static int ext4_commit_super(struct super_block *sb,
        return error;
 }
 /*
 * Have we just finished recovery?  If so, and if we are mounting (or
 * remounting) the filesystem readonly, then we will end up with a
@@ -3192,14 +3266,11 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
        if (jbd2_journal_flush(journal) < 0)
                goto out;
-        lock_super(sb);
        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
            sb->s_flags & MS_RDONLY) {
                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
-                sb->s_dirt = 0;
+                ext4_commit_super(sb, 1);
-                ext4_commit_super(sb, es, 1);
        }
-        unlock_super(sb);
 out:
        jbd2_journal_unlock_updates(journal);
@@ -3238,7 +3309,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-                ext4_commit_super(sb, es, 1);
+                ext4_commit_super(sb, 1);
                jbd2_journal_clear_err(journal);
        }
@@ -3257,29 +3328,17 @@ int ext4_force_commit(struct super_block *sb)
                return 0;
        journal = EXT4_SB(sb)->s_journal;
-        if (journal) {
+        if (journal)
-                sb->s_dirt = 0;
                ret = ext4_journal_force_commit(journal);
-        }
        return ret;
 }
-/*
- * Ext4 always journals updates to the superblock itself, so we don't
- * have to propagate any other updates to the superblock on disk at this
- * point.  (We can probably nuke this function altogether, and remove
- * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...)
- */
 static void ext4_write_super(struct super_block *sb)
 {
-        if (EXT4_SB(sb)->s_journal) {
+        lock_super(sb);
-                if (mutex_trylock(&sb->s_lock) != 0)
+        ext4_commit_super(sb, 1);
-                        BUG();
+        unlock_super(sb);
-                sb->s_dirt = 0;
-        } else {
-                ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
-        }
 }
 static int ext4_sync_fs(struct super_block *sb, int wait)
@@ -3288,16 +3347,9 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
        tid_t target;
        trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
-        sb->s_dirt = 0;
+        if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
-        if (EXT4_SB(sb)->s_journal) {
+                if (wait)
-                if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal,
+                        jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
-                                              &target)) {
-                        if (wait)
-                                jbd2_log_wait_commit(EXT4_SB(sb)->s_journal,
-                                                     target);
-                }
-        } else {
-                ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
        }
        return ret;
 }
@@ -3310,34 +3362,32 @@ static int ext4_freeze(struct super_block *sb)
 {
        int error = 0;
        journal_t *journal;
-        sb->s_dirt = 0;
-        if (!(sb->s_flags & MS_RDONLY)) {
+        if (sb->s_flags & MS_RDONLY)
-                journal = EXT4_SB(sb)->s_journal;
+                return 0;
-                if (journal) {
+        journal = EXT4_SB(sb)->s_journal;
-                        /* Now we set up the journal barrier. */
-                        jbd2_journal_lock_updates(journal);
-                        /*
+        /* Now we set up the journal barrier. */
-                         * We don't want to clear needs_recovery flag when we
+        jbd2_journal_lock_updates(journal);
-                         * failed to flush the journal.
-                         */
-                        error = jbd2_journal_flush(journal);
-                        if (error < 0)
-                                goto out;
-                }
-                /* Journal blocked and flushed, clear needs_recovery flag. */
+        /*
-                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+         * Don't clear the needs_recovery flag if we failed to flush
-                error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
+         * the journal.
-                if (error)
+         */
-                        goto out;
+        error = jbd2_journal_flush(journal);
+        if (error < 0) {
+        out:
+                jbd2_journal_unlock_updates(journal);
+                return error;
        }
+        /* Journal blocked and flushed, clear needs_recovery flag. */
+        EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+        error = ext4_commit_super(sb, 1);
+        if (error)
+                goto out;
        return 0;
-out:
-        jbd2_journal_unlock_updates(journal);
-        return error;
 }
 /*
@@ -3346,14 +3396,15 @@ out:
 */
 static int ext4_unfreeze(struct super_block *sb)
 {
-        if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) {
+        if (sb->s_flags & MS_RDONLY)
-                lock_super(sb);
+                return 0;
-                /* Reser the needs_recovery flag before the fs is unlocked. */
-                EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+        lock_super(sb);
-                ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
+        /* Reset the needs_recovery flag before the fs is unlocked. */
-                unlock_super(sb);
+        EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
-                jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+        ext4_commit_super(sb, 1);
-        }
+        unlock_super(sb);
+        jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
        return 0;
 }
@@ -3371,7 +3422,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
        int i;
 #endif
+        lock_kernel();
        /* Store the original options */
+        lock_super(sb);
        old_sb_flags = sb->s_flags;
        old_opts.s_mount_opt = sbi->s_mount_opt;
        old_opts.s_resuid = sbi->s_resuid;
@@ -3432,22 +3486,15 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
                            (sbi->s_mount_state & EXT4_VALID_FS))
                                es->s_state = cpu_to_le16(sbi->s_mount_state);
-                        /*
+                        if (sbi->s_journal)
-                         * We have to unlock super so that we can wait for
-                         * transactions.
-                         */
-                        if (sbi->s_journal) {
-                                unlock_super(sb);
                                ext4_mark_recovery_complete(sb, es);
-                                lock_super(sb);
-                        }
                } else {
                        int ret;
                        if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
                                        ~EXT4_FEATURE_RO_COMPAT_SUPP))) {
-                                printk(KERN_WARNING "EXT4-fs: %s: couldn't "
+                                ext4_msg(sb, KERN_WARNING, "couldn't "
                                       "remount RDWR because of unsupported "
-                                       "optional features (%x).\n", sb->s_id,
+                                       "optional features (%x)",
                                (le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
                                        ~EXT4_FEATURE_RO_COMPAT_SUPP));
                                err = -EROFS;
@@ -3456,17 +3503,15 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
                        /*
                         * Make sure the group descriptor checksums
-                         * are sane.  If they aren't, refuse to
+                         * are sane.  If they aren't, refuse to remount r/w.
-                         * remount r/w.
                         */
                        for (g = 0; g < sbi->s_groups_count; g++) {
                                struct ext4_group_desc *gdp =
                                        ext4_get_group_desc(sb, g, NULL);
                                if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
-                                        printk(KERN_ERR
+                                        ext4_msg(sb, KERN_ERR,
-               "EXT4-fs: ext4_remount: "
+               "ext4_remount: Checksum for group %u failed (%u!=%u)",
-                "Checksum for group %u failed (%u!=%u)\n",
                g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
                                               le16_to_cpu(gdp->bg_checksum));
                                        err = -EINVAL;
@@ -3480,11 +3525,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
                         * require a full umount/remount for now.
                         */
                        if (es->s_last_orphan) {
-                                printk(KERN_WARNING "EXT4-fs: %s: couldn't "
+                                ext4_msg(sb, KERN_WARNING, "Couldn't "
                                       "remount RDWR because of unprocessed "
                                       "orphan inode list.  Please "
-                                       "umount/remount instead.\n",
+                                       "umount/remount instead");
-                                       sb->s_id);
                                err = -EINVAL;
                                goto restore_opts;
                        }
@@ -3504,8 +3548,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
                                sb->s_flags &= ~MS_RDONLY;
                }
        }
+        ext4_setup_system_zone(sb);
        if (sbi->s_journal == NULL)
-                ext4_commit_super(sb, es, 1);
+                ext4_commit_super(sb, 1);
 #ifdef CONFIG_QUOTA
        /* Release old quota file names */
@@ -3514,7 +3559,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
                    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
                        kfree(old_opts.s_qf_names[i]);
 #endif
+        unlock_super(sb);
+        unlock_kernel();
        return 0;
 restore_opts:
        sb->s_flags = old_sb_flags;
        sbi->s_mount_opt = old_opts.s_mount_opt;
@@ -3532,6 +3580,8 @@ restore_opts:
                sbi->s_qf_names[i] = old_opts.s_qf_names[i];
        }
 #endif
+        unlock_super(sb);
+        unlock_kernel();
        return err;
 }
@@ -3545,9 +3595,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
        if (test_opt(sb, MINIX_DF)) {
                sbi->s_overhead_last = 0;
        } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
-                ext4_group_t ngroups = sbi->s_groups_count, i;
+                ext4_group_t i, ngroups = ext4_get_groups_count(sb);
                ext4_fsblk_t overhead = 0;
-                smp_rmb();
                /*
                 * Compute the overhead (FS structures).  This is constant
@@ -3599,11 +3648,12 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
               le64_to_cpup((void *)es->s_uuid + sizeof(u64));
        buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
        buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
        return 0;
 }
-/* Helper function for writing quotas on sync - we need to start transaction before quota file
+/* Helper function for writing quotas on sync - we need to start transaction
- * is locked for write. Otherwise the are possible deadlocks:
+ * before quota file is locked for write. Otherwise the are possible deadlocks:
 * Process 1                         Process 2
 * ext4_create()                     quota_sync()
 *   jbd2_journal_start()                  write_dquot()
@@ -3627,7 +3677,7 @@ static int ext4_write_dquot(struct dquot *dquot)
        inode = dquot_to_inode(dquot);
        handle = ext4_journal_start(inode,
-                                        EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
+                                    EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
        ret = dquot_commit(dquot);
@@ -3643,7 +3693,7 @@ static int ext4_acquire_dquot(struct dquot *dquot)
        handle_t *handle;
        handle = ext4_journal_start(dquot_to_inode(dquot),
-                                        EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
+                                    EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
        ret = dquot_acquire(dquot);
@@ -3659,7 +3709,7 @@ static int ext4_release_dquot(struct dquot *dquot)
        handle_t *handle;
        handle = ext4_journal_start(dquot_to_inode(dquot),
-                                        EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+                                    EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
        if (IS_ERR(handle)) {
                /* Release dquot anyway to avoid endless cycle in dqput() */
                dquot_release(dquot);
@@ -3707,7 +3757,7 @@ static int ext4_write_info(struct super_block *sb, int type)
 static int ext4_quota_on_mount(struct super_block *sb, int type)
 {
        return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
-                        EXT4_SB(sb)->s_jquota_fmt, type);
+                                  EXT4_SB(sb)->s_jquota_fmt, type);
 }
 /*
@@ -3738,9 +3788,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
        if (EXT4_SB(sb)->s_qf_names[type]) {
                /* Quotafile not in fs root? */
                if (path.dentry->d_parent != sb->s_root)
-                        printk(KERN_WARNING
+                        ext4_msg(sb, KERN_WARNING,
-                                "EXT4-fs: Quota file not on filesystem root. "
+                                "Quota file not on filesystem root. "
-                                "Journaled quota will not work.\n");
+                                "Journaled quota will not work");
        }
        /*
@@ -3823,8 +3873,8 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
        handle_t *handle = journal_current_handle();
        if (EXT4_SB(sb)->s_journal && !handle) {
-                printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
+                ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
-                        " cancelled because transaction is not started.\n",
+                        " cancelled because transaction is not started",
                        (unsigned long long)off, (unsigned long long)len);
                return -EIO;
        }
@@ -3878,10 +3928,10 @@ out:
 #endif
-static int ext4_get_sb(struct file_system_type *fs_type,
+static int ext4_get_sb(struct file_system_type *fs_type, int flags,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+                       const char *dev_name, void *data, struct vfsmount *mnt)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
+        return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
 }
 static struct file_system_type ext4_fs_type = {
@@ -3893,14 +3943,14 @@ static struct file_system_type ext4_fs_type = {
 };
 #ifdef CONFIG_EXT4DEV_COMPAT
-static int ext4dev_get_sb(struct file_system_type *fs_type,
+static int ext4dev_get_sb(struct file_system_type *fs_type, int flags,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+                          const char *dev_name, void *data,struct vfsmount *mnt)
 {
-        printk(KERN_WARNING "EXT4-fs: Update your userspace programs "
+        printk(KERN_WARNING "EXT4-fs (%s): Update your userspace programs "
-               "to mount using ext4\n");
+               "to mount using ext4\n", dev_name);
-        printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility "
+        printk(KERN_WARNING "EXT4-fs (%s): ext4dev backwards compatibility "
-               "will go away by 2.6.31\n");
+               "will go away by 2.6.31\n", dev_name);
-        return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
+        return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
 }
 static struct file_system_type ext4dev_fs_type = {
@@ -3917,13 +3967,16 @@ static int __init init_ext4_fs(void)
 {
        int err;
+        err = init_ext4_system_zone();
+        if (err)
+                return err;
        ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
        if (!ext4_kset)
-                return -ENOMEM;
+                goto out4;
        ext4_proc_root = proc_mkdir("fs/ext4", NULL);
        err = init_ext4_mballoc();
        if (err)
-                return err;
+                goto out3;
        err = init_ext4_xattr();
        if (err)
@@ -3948,6 +4001,11 @@ out1:
        exit_ext4_xattr();
 out2:
        exit_ext4_mballoc();
+out3:
+        remove_proc_entry("fs/ext4", NULL);
+        kset_unregister(ext4_kset);
+out4:
+        exit_ext4_system_zone();
        return err;
 }
@@ -3962,6 +4020,7 @@ static void __exit exit_ext4_fs(void)
        exit_ext4_mballoc();
        remove_proc_entry("fs/ext4", NULL);
        kset_unregister(ext4_kset);
+        exit_ext4_system_zone();
 }
 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");