42 files changed, 864 insertions, 508 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index d8062745716..e31f3691b15 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -140,6 +140,7 @@ config EXT4DEV_FS
        tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)"
        depends on EXPERIMENTAL
        select JBD2
+        select CRC16
        help
          Ext4dev is a predecessor filesystem of the next generation
          extended fs ext4, based on ext3 filesystem code. It will be
diff --git a/fs/aio.c b/fs/aio.c
index d02f43b50a3..f12db415c0f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -710,18 +710,9 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
        /*
         * Now we are all set to call the retry method in async
-         * context. By setting this thread's io_wait context
+         * context.
-         * to point to the wait queue entry inside the currently
-         * running iocb for the duration of the retry, we ensure
-         * that async notification wakeups are queued by the
-         * operation instead of blocking waits, and when notified,
-         * cause the iocb to be kicked for continuation (through
-         * the aio_wake_function callback).
         */
-        BUG_ON(current->io_wait != NULL);
-        current->io_wait = &iocb->ki_wait;
        ret = retry(iocb);
-        current->io_wait = NULL;
        if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) {
                BUG_ON(!list_empty(&iocb->ki_wait.task_list));
@@ -1508,10 +1499,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb)
 *      Simply triggers a retry of the operation via kick_iocb.
 *
 *      This callback is specified in the wait queue entry in
- *      a kiocb (current->io_wait points to this wait queue
+ *      a kiocb.
- *      entry when an aio operation executes; it is used
- *      instead of a synchronous wait when an i/o blocking
- *      condition is encountered during aio).
 *
 * Note:
 * This routine is executed with the wait queue lock held.
diff --git a/fs/attr.c b/fs/attr.c
index ae58bd3f875..966b73e25f8 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -103,12 +103,11 @@ EXPORT_SYMBOL(inode_setattr);
 int notify_change(struct dentry * dentry, struct iattr * attr)
 {
        struct inode *inode = dentry->d_inode;
-        mode_t mode;
+        mode_t mode = inode->i_mode;
        int error;
        struct timespec now;
        unsigned int ia_valid = attr->ia_valid;
-        mode = inode->i_mode;
        now = current_fs_time(inode->i_sb);
        attr->ia_ctime = now;
@@ -125,18 +124,25 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
                if (error)
                        return error;
        }
+        /*
+         * We now pass ATTR_KILL_S*ID to the lower level setattr function so
+         * that the function has the ability to reinterpret a mode change
+         * that's due to these bits. This adds an implicit restriction that
+         * no function will ever call notify_change with both ATTR_MODE and
+         * ATTR_KILL_S*ID set.
+         */
+        if ((ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) &&
+            (ia_valid & ATTR_MODE))
+                BUG();
        if (ia_valid & ATTR_KILL_SUID) {
-                attr->ia_valid &= ~ATTR_KILL_SUID;
                if (mode & S_ISUID) {
-                        if (!(ia_valid & ATTR_MODE)) {
+                        ia_valid = attr->ia_valid |= ATTR_MODE;
-                                ia_valid = attr->ia_valid |= ATTR_MODE;
+                        attr->ia_mode = (inode->i_mode & ~S_ISUID);
-                                attr->ia_mode = inode->i_mode;
-                        }
-                        attr->ia_mode &= ~S_ISUID;
                }
        }
        if (ia_valid & ATTR_KILL_SGID) {
-                attr->ia_valid &= ~ ATTR_KILL_SGID;
                if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
                        if (!(ia_valid & ATTR_MODE)) {
                                ia_valid = attr->ia_valid |= ATTR_MODE;
@@ -145,7 +151,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
                        attr->ia_mode &= ~S_ISGID;
                }
        }
-        if (!attr->ia_valid)
+        if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID)))
                return 0;
        if (ia_valid & ATTR_SIZE)
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index 19a9cafb5dd..be46805972f 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -182,7 +182,7 @@ int autofs_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_toke
 {
        struct autofs_wait_queue *wq, **wql;
-        for ( wql = &sbi->queues ; (wq = *wql) != 0 ; wql = &wq->next ) {
+        for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) {
                if ( wq->wait_queue_token == wait_queue_token )
                        break;
        }
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 0d041a9cb34..1fe28e4754c 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -376,7 +376,7 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
        struct autofs_wait_queue *wq, **wql;
        mutex_lock(&sbi->wq_mutex);
-        for (wql = &sbi->queues ; (wq = *wql) != 0 ; wql = &wq->next) {
+        for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) {
                if (wq->wait_queue_token == wait_queue_token)
                        break;
        }
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index dd4167762a8..279f3c5e0ce 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1538,6 +1538,11 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
        }
        time_buf.Attributes = 0;
+        /* skip mode change if it's just for clearing setuid/setgid */
+        if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
+                attrs->ia_valid &= ~ATTR_MODE;
        if (attrs->ia_valid & ATTR_MODE) {
                cFYI(1, ("Mode changed to 0x%x", attrs->ia_mode));
                mode = attrs->ia_mode;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 6dacd39bf04..a4284ccac1f 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -3001,7 +3001,7 @@ static int __init init_sys32_ioctl(void)
        int i;
        for (i = 0; i < ARRAY_SIZE(ioctl_start); i++) {
-                if (ioctl_start[i].next != 0) {
+                if (ioctl_start[i].next) {
                        printk("ioctl translation %d bad\n",i);
                        return -1;
                }
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 5c817bd0838..350680fd7da 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -148,7 +148,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
 {
        struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
        struct page *pages[BLKS_PER_BUF];
-        unsigned i, blocknr, buffer, unread;
+        unsigned i, blocknr, buffer;
        unsigned long devsize;
        char *data;
@@ -175,7 +175,6 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
        devsize = mapping->host->i_size >> PAGE_CACHE_SHIFT;
        /* Ok, read in BLKS_PER_BUF pages completely first. */
-        unread = 0;
        for (i = 0; i < BLKS_PER_BUF; i++) {
                struct page *page = NULL;
@@ -362,7 +361,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
        if (offset & 3)
                return -EINVAL;
-        buf = kmalloc(256, GFP_KERNEL);
+        buf = kmalloc(CRAMFS_MAXPATHLEN, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;
@@ -376,7 +375,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
                int namelen, error;
                mutex_lock(&read_mutex);
-                de = cramfs_read(sb, OFFSET(inode) + offset, sizeof(*de)+256);
+                de = cramfs_read(sb, OFFSET(inode) + offset, sizeof(*de)+CRAMFS_MAXPATHLEN);
                name = (char *)(de+1);
                /*
@@ -426,7 +425,7 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, s
                char *name;
                int namelen, retval;
-                de = cramfs_read(dir->i_sb, OFFSET(dir) + offset, sizeof(*de)+256);
+                de = cramfs_read(dir->i_sb, OFFSET(dir) + offset, sizeof(*de)+CRAMFS_MAXPATHLEN);
                name = (char *)(de+1);
                /* Try to take advantage of sorted directories */
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 5701f816faf..0b1ab016fa2 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -914,6 +914,14 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
                if (rc < 0)
                        goto out;
        }
+        /*
+         * mode change is for clearing setuid/setgid bits. Allow lower fs
+         * to interpret this in its own way.
+         */
+        if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
+                ia->ia_valid &= ~ATTR_MODE;
        rc = notify_change(lower_dentry, ia);
 out:
        fsstack_copy_attr_all(inode, lower_inode, NULL);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 77b9953624f..de618929195 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -463,7 +463,7 @@ static void ep_free(struct eventpoll *ep)
         * holding "epmutex" we can be sure that no file cleanup code will hit
         * us during this operation. So we can avoid the lock on "ep->lock".
         */
-        while ((rbp = rb_first(&ep->rbr)) != 0) {
+        while ((rbp = rb_first(&ep->rbr)) != NULL) {
                epi = rb_entry(rbp, struct epitem, rbn);
                ep_remove(ep, epi);
        }
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index dd1fd3c0fc0..a588e23841d 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -47,7 +47,7 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
        struct inode *inode = dentry->d_inode;
        int ret = 0;
-        J_ASSERT(ext3_journal_current_handle() == 0);
+        J_ASSERT(ext3_journal_current_handle() == NULL);
        /*
         * data=writeback:
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2f2b6864db1..3dec003b773 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1028,7 +1028,7 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
                }
                if (buffer_new(&dummy)) {
                        J_ASSERT(create != 0);
-                        J_ASSERT(handle != 0);
+                        J_ASSERT(handle != NULL);
                        /*
                         * Now that we do not always journal data, we should
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 771f7ada15d..44de1453c30 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -245,10 +245,10 @@ static int setup_new_group_blocks(struct super_block *sb,
                        brelse(gdb);
                        goto exit_bh;
                }
-                lock_buffer(bh);
+                lock_buffer(gdb);
-                memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size);
+                memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
                set_buffer_uptodate(gdb);
-                unlock_buffer(bh);
+                unlock_buffer(gdb);
                ext3_journal_dirty_metadata(handle, gdb);
                ext3_set_bit(bit, bh->b_data);
                brelse(gdb);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 141573de7a9..81868c0bc40 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1620,7 +1620,11 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
                }
                brelse (bh);
-                sb_set_blocksize(sb, blocksize);
+                if (!sb_set_blocksize(sb, blocksize)) {
+                        printk(KERN_ERR "EXT3-fs: bad blocksize %d.\n",
+                                blocksize);
+                        goto out_fail;
+                }
                logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
                offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
                bh = sb_bread(sb, logic_sb_block);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index b74bf436844..e906b65448e 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -20,6 +20,7 @@
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
+#include "group.h"
 /*
 * balloc.c contains the blocks allocation and deallocation routines
 */
@@ -42,6 +43,94 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
 }
+/* Initializes an uninitialized block bitmap if given, and returns the
+ * number of blocks free in the group. */
+unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
+                                int block_group, struct ext4_group_desc *gdp)
+{
+        unsigned long start;
+        int bit, bit_max;
+        unsigned free_blocks, group_blocks;
+        struct ext4_sb_info *sbi = EXT4_SB(sb);
+        if (bh) {
+                J_ASSERT_BH(bh, buffer_locked(bh));
+                /* If checksum is bad mark all blocks used to prevent allocation
+                 * essentially implementing a per-group read-only flag. */
+                if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
+                        ext4_error(sb, __FUNCTION__,
+                                   "Checksum bad for group %u\n", block_group);
+                        gdp->bg_free_blocks_count = 0;
+                        gdp->bg_free_inodes_count = 0;
+                        gdp->bg_itable_unused = 0;
+                        memset(bh->b_data, 0xff, sb->s_blocksize);
+                        return 0;
+                }
+                memset(bh->b_data, 0, sb->s_blocksize);
+        }
+        /* Check for superblock and gdt backups in this group */
+        bit_max = ext4_bg_has_super(sb, block_group);
+        if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
+            block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
+                          sbi->s_desc_per_block) {
+                if (bit_max) {
+                        bit_max += ext4_bg_num_gdb(sb, block_group);
+                        bit_max +=
+                                le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
+                }
+        } else { /* For META_BG_BLOCK_GROUPS */
+                int group_rel = (block_group -
+                                 le32_to_cpu(sbi->s_es->s_first_meta_bg)) %
+                                EXT4_DESC_PER_BLOCK(sb);
+                if (group_rel == 0 || group_rel == 1 ||
+                    (group_rel == EXT4_DESC_PER_BLOCK(sb) - 1))
+                        bit_max += 1;
+        }
+        if (block_group == sbi->s_groups_count - 1) {
+                /*
+                 * Even though mke2fs always initialize first and last group
+                 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
+                 * to make sure we calculate the right free blocks
+                 */
+                group_blocks = ext4_blocks_count(sbi->s_es) -
+                        le32_to_cpu(sbi->s_es->s_first_data_block) -
+                        (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1));
+        } else {
+                group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
+        }
+        free_blocks = group_blocks - bit_max;
+        if (bh) {
+                for (bit = 0; bit < bit_max; bit++)
+                        ext4_set_bit(bit, bh->b_data);
+                start = block_group * EXT4_BLOCKS_PER_GROUP(sb) +
+                        le32_to_cpu(sbi->s_es->s_first_data_block);
+                /* Set bits for block and inode bitmaps, and inode table */
+                ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
+                ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data);
+                for (bit = (ext4_inode_table(sb, gdp) - start),
+                     bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++)
+                        ext4_set_bit(bit, bh->b_data);
+                /*
+                 * Also if the number of blocks within the group is
+                 * less than the blocksize * 8 ( which is the size
+                 * of bitmap ), set rest of the block bitmap to 1
+                 */
+                mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
+        }
+        return free_blocks - sbi->s_itb_per_group - 2;
+}
 /*
 * The free blocks are managed by bitmaps.  A file system contains several
 * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
@@ -119,7 +208,7 @@ block_in_use(ext4_fsblk_t block, struct super_block *sb, unsigned char *map)
 *
 * Return buffer_head on success or NULL in case of failure.
 */
-static struct buffer_head *
+struct buffer_head *
 read_block_bitmap(struct super_block *sb, unsigned int block_group)
 {
        int i;
@@ -127,11 +216,24 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
        struct buffer_head * bh = NULL;
        ext4_fsblk_t bitmap_blk;
-        desc = ext4_get_group_desc (sb, block_group, NULL);
+        desc = ext4_get_group_desc(sb, block_group, NULL);
        if (!desc)
                return NULL;
        bitmap_blk = ext4_block_bitmap(sb, desc);
-        bh = sb_bread(sb, bitmap_blk);
+        if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+                bh = sb_getblk(sb, bitmap_blk);
+                if (!buffer_uptodate(bh)) {
+                        lock_buffer(bh);
+                        if (!buffer_uptodate(bh)) {
+                                ext4_init_block_bitmap(sb, bh, block_group,
+                                                       desc);
+                                set_buffer_uptodate(bh);
+                        }
+                        unlock_buffer(bh);
+                }
+        } else {
+                bh = sb_bread(sb, bitmap_blk);
+        }
        if (!bh)
                ext4_error (sb, __FUNCTION__,
                            "Cannot read block bitmap - "
@@ -627,6 +729,7 @@ do_more:
        desc->bg_free_blocks_count =
                cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
                        group_freed);
+        desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
        spin_unlock(sb_bgl_lock(sbi, block_group));
        percpu_counter_add(&sbi->s_freeblocks_counter, count);
@@ -1685,8 +1788,11 @@ allocated:
                        ret_block, goal_hits, goal_attempts);
        spin_lock(sb_bgl_lock(sbi, group_no));
+        if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
+                gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
        gdp->bg_free_blocks_count =
                        cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
+        gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
        spin_unlock(sb_bgl_lock(sbi, group_no));
        percpu_counter_sub(&sbi->s_freeblocks_counter, num);
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 0fb1e62b20d..f612bef9831 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -47,9 +47,7 @@ const struct file_operations ext4_dir_operations = {
        .compat_ioctl   = ext4_compat_ioctl,
 #endif
        .fsync          = ext4_sync_file,       /* BKL held */
-#ifdef CONFIG_EXT4_INDEX
        .release        = ext4_release_dir,
-#endif
 };
@@ -107,7 +105,6 @@ static int ext4_readdir(struct file * filp,
        sb = inode->i_sb;
-#ifdef CONFIG_EXT4_INDEX
        if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
                                    EXT4_FEATURE_COMPAT_DIR_INDEX) &&
            ((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) ||
@@ -123,7 +120,6 @@ static int ext4_readdir(struct file * filp,
                 */
                EXT4_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL;
        }
-#endif
        stored = 0;
        offset = filp->f_pos & (sb->s_blocksize - 1);
@@ -232,7 +228,6 @@ out:
        return ret;
 }
-#ifdef CONFIG_EXT4_INDEX
 /*
 * These functions convert from the major/minor hash to an f_pos
 * value.
@@ -518,5 +513,3 @@ static int ext4_release_dir (struct inode * inode, struct file * filp)
        return 0;
 }
-#endif
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 78beb096f57..85287742f2a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -33,7 +33,7 @@
 #include <linux/fs.h>
 #include <linux/time.h>
 #include <linux/ext4_jbd2.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/highuid.h>
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
@@ -52,7 +52,7 @@ static ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
 {
        ext4_fsblk_t block;
-        block = le32_to_cpu(ex->ee_start);
+        block = le32_to_cpu(ex->ee_start_lo);
        block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
        return block;
 }
@@ -65,7 +65,7 @@ static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
 {
        ext4_fsblk_t block;
-        block = le32_to_cpu(ix->ei_leaf);
+        block = le32_to_cpu(ix->ei_leaf_lo);
        block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
        return block;
 }
@@ -77,7 +77,7 @@ static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
 */
 static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
 {
-        ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff));
+        ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
        ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
 }
@@ -88,7 +88,7 @@ static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
 */
 static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
 {
-        ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff));
+        ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
        ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
 }
@@ -1409,8 +1409,7 @@ has_space:
        eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1);
        nearex = path[depth].p_ext;
        nearex->ee_block = newext->ee_block;
-        nearex->ee_start = newext->ee_start;
+        ext4_ext_store_pblock(nearex, ext_pblock(newext));
-        nearex->ee_start_hi = newext->ee_start_hi;
        nearex->ee_len = newext->ee_len;
 merge:
@@ -2177,7 +2176,6 @@ int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode,
        }
        /* ex2: iblock to iblock + maxblocks-1 : initialised */
        ex2->ee_block = cpu_to_le32(iblock);
-        ex2->ee_start = cpu_to_le32(newblock);
        ext4_ext_store_pblock(ex2, newblock);
        ex2->ee_len = cpu_to_le16(allocated);
        if (ex2 != ex)
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 2a167d7131f..8d50879d1c2 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -47,7 +47,7 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
        struct inode *inode = dentry->d_inode;
        int ret = 0;
-        J_ASSERT(ext4_journal_current_handle() == 0);
+        J_ASSERT(ext4_journal_current_handle() == NULL);
        /*
         * data=writeback:
diff --git a/fs/ext4/group.h b/fs/ext4/group.h
new file mode 100644
index 00000000000..1577910bb58
--- /dev/null
+++ b/fs/ext4/group.h
@@ -0,0 +1,27 @@
+/*
+ *  linux/fs/ext4/group.h
+ *
+ * Copyright (C) 2007 Cluster File Systems, Inc
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+#ifndef _LINUX_EXT4_GROUP_H
+#define _LINUX_EXT4_GROUP_H
+extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
+                                   struct ext4_group_desc *gdp);
+extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
+                                       struct ext4_group_desc *gdp);
+struct buffer_head *read_block_bitmap(struct super_block *sb,
+                                      unsigned int block_group);
+extern unsigned ext4_init_block_bitmap(struct super_block *sb,
+                                       struct buffer_head *bh, int group,
+                                       struct ext4_group_desc *desc);
+#define ext4_free_blocks_after_init(sb, group, desc)                    \
+                ext4_init_block_bitmap(sb, NULL, group, desc)
+extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
+                                       struct buffer_head *bh, int group,
+                                       struct ext4_group_desc *desc);
+extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
+#endif /* _LINUX_EXT4_GROUP_H */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index d0c7793d939..c61f37fd3f0 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -28,6 +28,7 @@
 #include "xattr.h"
 #include "acl.h"
+#include "group.h"
 /*
 * ialloc.c contains the inodes allocation and deallocation routines
@@ -43,6 +44,52 @@
 * the free blocks count in the block.
 */
+/*
+ * To avoid calling the atomic setbit hundreds or thousands of times, we only
+ * need to use it within a single byte (to ensure we get endianness right).
+ * We can use memset for the rest of the bitmap as there are no other users.
+ */
+void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
+{
+        int i;
+        if (start_bit >= end_bit)
+                return;
+        ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
+        for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
+                ext4_set_bit(i, bitmap);
+        if (i < end_bit)
+                memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
+}
+/* Initializes an uninitialized inode bitmap */
+unsigned ext4_init_inode_bitmap(struct super_block *sb,
+                                struct buffer_head *bh, int block_group,
+                                struct ext4_group_desc *gdp)
+{
+        struct ext4_sb_info *sbi = EXT4_SB(sb);
+        J_ASSERT_BH(bh, buffer_locked(bh));
+        /* If checksum is bad mark all blocks and inodes use to prevent
+         * allocation, essentially implementing a per-group read-only flag. */
+        if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
+                ext4_error(sb, __FUNCTION__, "Checksum bad for group %u\n",
+                           block_group);
+                gdp->bg_free_blocks_count = 0;
+                gdp->bg_free_inodes_count = 0;
+                gdp->bg_itable_unused = 0;
+                memset(bh->b_data, 0xff, sb->s_blocksize);
+                return 0;
+        }
+        memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
+        mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
+                        bh->b_data);
+        return EXT4_INODES_PER_GROUP(sb);
+}
 /*
 * Read the inode allocation bitmap for a given block_group, reading
@@ -59,8 +106,20 @@ read_inode_bitmap(struct super_block * sb, unsigned long block_group)
        desc = ext4_get_group_desc(sb, block_group, NULL);
        if (!desc)
                goto error_out;
+        if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
-        bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
+                bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc));
+                if (!buffer_uptodate(bh)) {
+                        lock_buffer(bh);
+                        if (!buffer_uptodate(bh)) {
+                                ext4_init_inode_bitmap(sb, bh, block_group,
+                                                       desc);
+                                set_buffer_uptodate(bh);
+                        }
+                        unlock_buffer(bh);
+                }
+        } else {
+                bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
+        }
        if (!bh)
                ext4_error(sb, "read_inode_bitmap",
                            "Cannot read inode bitmap - "
@@ -169,6 +228,8 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
                        if (is_directory)
                                gdp->bg_used_dirs_count = cpu_to_le16(
                                  le16_to_cpu(gdp->bg_used_dirs_count) - 1);
+                        gdp->bg_checksum = ext4_group_desc_csum(sbi,
+                                                        block_group, gdp);
                        spin_unlock(sb_bgl_lock(sbi, block_group));
                        percpu_counter_inc(&sbi->s_freeinodes_counter);
                        if (is_directory)
@@ -435,7 +496,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
        struct ext4_sb_info *sbi;
        int err = 0;
        struct inode *ret;
-        int i;
+        int i, free = 0;
        /* Cannot create files in a deleted directory */
        if (!dir || !dir->i_nlink)
@@ -517,11 +578,13 @@ repeat_in_this_group:
        goto out;
 got:
-        ino += group * EXT4_INODES_PER_GROUP(sb) + 1;
+        ino++;
-        if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
+        if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
-                ext4_error (sb, "ext4_new_inode",
+            ino > EXT4_INODES_PER_GROUP(sb)) {
-                            "reserved inode or inode > inodes count - "
+                ext4_error(sb, __FUNCTION__,
-                            "block_group = %d, inode=%lu", group, ino);
+                           "reserved inode or inode > inodes count - "
+                           "block_group = %d, inode=%lu", group,
+                           ino + group * EXT4_INODES_PER_GROUP(sb));
                err = -EIO;
                goto fail;
        }
@@ -529,13 +592,78 @@ got:
        BUFFER_TRACE(bh2, "get_write_access");
        err = ext4_journal_get_write_access(handle, bh2);
        if (err) goto fail;
+        /* We may have to initialize the block bitmap if it isn't already */
+        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
+            gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+                struct buffer_head *block_bh = read_block_bitmap(sb, group);
+                BUFFER_TRACE(block_bh, "get block bitmap access");
+                err = ext4_journal_get_write_access(handle, block_bh);
+                if (err) {
+                        brelse(block_bh);
+                        goto fail;
+                }
+                free = 0;
+                spin_lock(sb_bgl_lock(sbi, group));
+                /* recheck and clear flag under lock if we still need to */
+                if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+                        gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
+                        free = ext4_free_blocks_after_init(sb, group, gdp);
+                        gdp->bg_free_blocks_count = cpu_to_le16(free);
+                }
+                spin_unlock(sb_bgl_lock(sbi, group));
+                /* Don't need to dirty bitmap block if we didn't change it */
+                if (free) {
+                        BUFFER_TRACE(block_bh, "dirty block bitmap");
+                        err = ext4_journal_dirty_metadata(handle, block_bh);
+                }
+                brelse(block_bh);
+                if (err)
+                        goto fail;
+        }
        spin_lock(sb_bgl_lock(sbi, group));
+        /* If we didn't allocate from within the initialized part of the inode
+         * table then we need to initialize up to this inode. */
+        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+                if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+                        gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
+                        /* When marking the block group with
+                         * ~EXT4_BG_INODE_UNINIT we don't want to depend
+                         * on the value of bg_itable_unsed even though
+                         * mke2fs could have initialized the same for us.
+                         * Instead we calculated the value below
+                         */
+                        free = 0;
+                } else {
+                        free = EXT4_INODES_PER_GROUP(sb) -
+                                le16_to_cpu(gdp->bg_itable_unused);
+                }
+                /*
+                 * Check the relative inode number against the last used
+                 * relative inode number in this group. if it is greater
+                 * we need to  update the bg_itable_unused count
+                 *
+                 */
+                if (ino > free)
+                        gdp->bg_itable_unused =
+                                cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
+        }
        gdp->bg_free_inodes_count =
                cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
        if (S_ISDIR(mode)) {
                gdp->bg_used_dirs_count =
                        cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
        }
+        gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
        spin_unlock(sb_bgl_lock(sbi, group));
        BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
        err = ext4_journal_dirty_metadata(handle, bh2);
@@ -557,7 +685,7 @@ got:
                inode->i_gid = current->fsgid;
        inode->i_mode = mode;
-        inode->i_ino = ino;
+        inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
        /* This is the optimal IO size (for stat), not the fs block size */
        inode->i_blocks = 0;
        inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
@@ -573,11 +701,6 @@ got:
        /* dirsync only applies to directories */
        if (!S_ISDIR(mode))
                ei->i_flags &= ~EXT4_DIRSYNC_FL;
-#ifdef EXT4_FRAGMENTS
-        ei->i_faddr = 0;
-        ei->i_frag_no = 0;
-        ei->i_frag_size = 0;
-#endif
        ei->i_file_acl = 0;
        ei->i_dir_acl = 0;
        ei->i_dtime = 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0df2b1e06d0..5489703d957 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1027,7 +1027,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
                }
                if (buffer_new(&dummy)) {
                        J_ASSERT(create != 0);
-                        J_ASSERT(handle != 0);
+                        J_ASSERT(handle != NULL);
                        /*
                         * Now that we do not always journal data, we should
@@ -2711,11 +2711,6 @@ void ext4_read_inode(struct inode * inode)
        }
        inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
        ei->i_flags = le32_to_cpu(raw_inode->i_flags);
-#ifdef EXT4_FRAGMENTS
-        ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
-        ei->i_frag_no = raw_inode->i_frag;
-        ei->i_frag_size = raw_inode->i_fsize;
-#endif
        ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
        if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
            cpu_to_le32(EXT4_OS_HURD))
@@ -2860,11 +2855,6 @@ static int ext4_do_update_inode(handle_t *handle,
        raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
        raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
        raw_inode->i_flags = cpu_to_le32(ei->i_flags);
-#ifdef EXT4_FRAGMENTS
-        raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
-        raw_inode->i_frag = ei->i_frag_no;
-        raw_inode->i_fsize = ei->i_frag_size;
-#endif
        if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
            cpu_to_le32(EXT4_OS_HURD))
                raw_inode->i_file_acl_high =
@@ -3243,12 +3233,14 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
                                                      iloc, handle);
                        if (ret) {
                                EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
-                                if (mnt_count != sbi->s_es->s_mnt_count) {
+                                if (mnt_count !=
+                                        le16_to_cpu(sbi->s_es->s_mnt_count)) {
                                        ext4_warning(inode->i_sb, __FUNCTION__,
                                        "Unable to expand inode %lu. Delete"
                                        " some EAs or run e2fsck.",
                                        inode->i_ino);
-                                        mnt_count = sbi->s_es->s_mnt_count;
+                                        mnt_count =
+                                          le16_to_cpu(sbi->s_es->s_mnt_count);
                                }
                        }
                }
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5fdb862e71c..94ee6f315dc 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -144,7 +144,6 @@ struct dx_map_entry
        u16 size;
 };
-#ifdef CONFIG_EXT4_INDEX
 static inline unsigned dx_get_block (struct dx_entry *entry);
 static void dx_set_block (struct dx_entry *entry, unsigned value);
 static inline unsigned dx_get_hash (struct dx_entry *entry);
@@ -766,8 +765,6 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
        dx_set_block(new, block);
        dx_set_count(entries, count + 1);
 }
-#endif
 static void ext4_update_dx_flag(struct inode *inode)
 {
@@ -869,7 +866,6 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
        name = dentry->d_name.name;
        if (namelen > EXT4_NAME_LEN)
                return NULL;
-#ifdef CONFIG_EXT4_INDEX
        if (is_dx(dir)) {
                bh = ext4_dx_find_entry(dentry, res_dir, &err);
                /*
@@ -881,7 +877,6 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
                        return bh;
                dxtrace(printk("ext4_find_entry: dx failed, falling back\n"));
        }
-#endif
        nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
        start = EXT4_I(dir)->i_dir_start_lookup;
        if (start >= nblocks)
@@ -957,7 +952,6 @@ cleanup_and_exit:
        return ret;
 }
-#ifdef CONFIG_EXT4_INDEX
 static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
                       struct ext4_dir_entry_2 **res_dir, int *err)
 {
@@ -1025,7 +1019,6 @@ errout:
        dx_release (frames);
        return NULL;
 }
-#endif
 static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
 {
@@ -1121,7 +1114,6 @@ static inline void ext4_set_de_type(struct super_block *sb,
                de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
 }
-#ifdef CONFIG_EXT4_INDEX
 /*
 * Move count entries from end of map between two memory locations.
 * Returns pointer to last entry moved.
@@ -1266,8 +1258,6 @@ errout:
        *error = err;
        return NULL;
 }
-#endif
 /*
 * Add a new entry into a directory (leaf) block.  If de is non-NULL,
@@ -1364,7 +1354,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
        return 0;
 }
-#ifdef CONFIG_EXT4_INDEX
 /*
 * This converts a one block unindexed directory to a 3 block indexed
 * directory, and adds the dentry to the indexed directory.
@@ -1443,7 +1432,6 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
        return add_dirent_to_buf(handle, dentry, inode, de, bh);
 }
-#endif
 /*
 *      ext4_add_entry()
@@ -1464,9 +1452,7 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
        struct ext4_dir_entry_2 *de;
        struct super_block * sb;
        int     retval;
-#ifdef CONFIG_EXT4_INDEX
        int     dx_fallback=0;
-#endif
        unsigned blocksize;
        u32 block, blocks;
@@ -1474,7 +1460,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
        blocksize = sb->s_blocksize;
        if (!dentry->d_name.len)
                return -EINVAL;
-#ifdef CONFIG_EXT4_INDEX
        if (is_dx(dir)) {
                retval = ext4_dx_add_entry(handle, dentry, inode);
                if (!retval || (retval != ERR_BAD_DX_DIR))
@@ -1483,7 +1468,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
                dx_fallback++;
                ext4_mark_inode_dirty(handle, dir);
        }
-#endif
        blocks = dir->i_size >> sb->s_blocksize_bits;
        for (block = 0, offset = 0; block < blocks; block++) {
                bh = ext4_bread(handle, dir, block, 0, &retval);
@@ -1493,11 +1477,9 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
                if (retval != -ENOSPC)
                        return retval;
-#ifdef CONFIG_EXT4_INDEX
                if (blocks == 1 && !dx_fallback &&
                    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
                        return make_indexed_dir(handle, dentry, inode, bh);
-#endif
                brelse(bh);
        }
        bh = ext4_append(handle, dir, &block, &retval);
@@ -1509,7 +1491,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
        return add_dirent_to_buf(handle, dentry, inode, de, bh);
 }
-#ifdef CONFIG_EXT4_INDEX
 /*
 * Returns 0 for success, or a negative error value
 */
@@ -1644,7 +1625,6 @@ cleanup:
        dx_release(frames);
        return err;
 }
-#endif
 /*
 * ext4_delete_entry deletes a directory entry by merging it with the
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 472fc0d3e1c..bd8a52bb399 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -16,6 +16,7 @@
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include "group.h"
 #define outside(b, first, last) ((b) < (first) || (b) >= (last))
 #define inside(b, first, last)  ((b) >= (first) && (b) < (last))
@@ -140,22 +141,29 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
 }
 /*
- * To avoid calling the atomic setbit hundreds or thousands of times, we only
+ * If we have fewer than thresh credits, extend by EXT4_MAX_TRANS_DATA.
- * need to use it within a single byte (to ensure we get endianness right).
+ * If that fails, restart the transaction & regain write access for the
- * We can use memset for the rest of the bitmap as there are no other users.
+ * buffer head which is used for block_bitmap modifications.
 */
-static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
+static int extend_or_restart_transaction(handle_t *handle, int thresh,
+                                         struct buffer_head *bh)
 {
-        int i;
+        int err;
+        if (handle->h_buffer_credits >= thresh)
+                return 0;
-        if (start_bit >= end_bit)
+        err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA);
-                return;
+        if (err < 0)
+                return err;
+        if (err) {
+                if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
+                        return err;
+                if ((err = ext4_journal_get_write_access(handle, bh)))
+                        return err;
+        }
-        ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
+        return 0;
-        for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
-                ext4_set_bit(i, bitmap);
-        if (i < end_bit)
-                memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
 }
 /*
@@ -180,8 +188,9 @@ static int setup_new_group_blocks(struct super_block *sb,
        int i;
        int err = 0, err2;
-        handle = ext4_journal_start_sb(sb, reserved_gdb + gdblocks +
+        /* This transaction may be extended/restarted along the way */
-                                       2 + sbi->s_itb_per_group);
+        handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
        if (IS_ERR(handle))
                return PTR_ERR(handle);
@@ -208,6 +217,9 @@ static int setup_new_group_blocks(struct super_block *sb,
                ext4_debug("update backup group %#04lx (+%d)\n", block, bit);
+                if ((err = extend_or_restart_transaction(handle, 1, bh)))
+                        goto exit_bh;
                gdb = sb_getblk(sb, block);
                if (!gdb) {
                        err = -EIO;
@@ -217,10 +229,10 @@ static int setup_new_group_blocks(struct super_block *sb,
                        brelse(gdb);
                        goto exit_bh;
                }
-                lock_buffer(bh);
+                lock_buffer(gdb);
-                memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size);
+                memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
                set_buffer_uptodate(gdb);
-                unlock_buffer(bh);
+                unlock_buffer(gdb);
                ext4_journal_dirty_metadata(handle, gdb);
                ext4_set_bit(bit, bh->b_data);
                brelse(gdb);
@@ -233,6 +245,9 @@ static int setup_new_group_blocks(struct super_block *sb,
                ext4_debug("clear reserved block %#04lx (+%d)\n", block, bit);
+                if ((err = extend_or_restart_transaction(handle, 1, bh)))
+                        goto exit_bh;
                if (IS_ERR(gdb = bclean(handle, sb, block))) {
                        err = PTR_ERR(bh);
                        goto exit_bh;
@@ -254,6 +269,10 @@ static int setup_new_group_blocks(struct super_block *sb,
                struct buffer_head *it;
                ext4_debug("clear inode block %#04lx (+%d)\n", block, bit);
+                if ((err = extend_or_restart_transaction(handle, 1, bh)))
+                        goto exit_bh;
                if (IS_ERR(it = bclean(handle, sb, block))) {
                        err = PTR_ERR(it);
                        goto exit_bh;
@@ -262,6 +281,10 @@ static int setup_new_group_blocks(struct super_block *sb,
                brelse(it);
                ext4_set_bit(bit, bh->b_data);
        }
+        if ((err = extend_or_restart_transaction(handle, 2, bh)))
+                goto exit_bh;
        mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb),
                        bh->b_data);
        ext4_journal_dirty_metadata(handle, bh);
@@ -289,7 +312,6 @@ exit_journal:
        return err;
 }
 /*
 * Iterate through the groups which hold BACKUP superblock/GDT copies in an
 * ext4 filesystem.  The counters should be initialized to 1, 5, and 7 before
@@ -842,6 +864,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
        gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
        gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
+        gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
        /*
         * Make the new blocks and inodes valid next.  We do this before
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4c8d31c6145..b11e9e2bcd0 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -37,12 +37,14 @@
 #include <linux/quotaops.h>
 #include <linux/seq_file.h>
 #include <linux/log2.h>
+#include <linux/crc16.h>
 #include <asm/uaccess.h>
 #include "xattr.h"
 #include "acl.h"
 #include "namei.h"
+#include "group.h"
 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
                             unsigned long journal_devnum);
@@ -68,31 +70,31 @@ static void ext4_write_super_lockfs(struct super_block *sb);
 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
                               struct ext4_group_desc *bg)
 {
-        return le32_to_cpu(bg->bg_block_bitmap) |
+        return le32_to_cpu(bg->bg_block_bitmap_lo) |
                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-                 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
+                (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
 }
 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
                               struct ext4_group_desc *bg)
 {
-        return le32_to_cpu(bg->bg_inode_bitmap) |
+        return le32_to_cpu(bg->bg_inode_bitmap_lo) |
                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
+                (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
 }
 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
                              struct ext4_group_desc *bg)
 {
-        return le32_to_cpu(bg->bg_inode_table) |
+        return le32_to_cpu(bg->bg_inode_table_lo) |
                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
+                (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 }
 void ext4_block_bitmap_set(struct super_block *sb,
                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 {
-        bg->bg_block_bitmap = cpu_to_le32((u32)blk);
+        bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
                bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
 }
@@ -100,7 +102,7 @@ void ext4_block_bitmap_set(struct super_block *sb,
 void ext4_inode_bitmap_set(struct super_block *sb,
                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 {
-        bg->bg_inode_bitmap  = cpu_to_le32((u32)blk);
+        bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
                bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
 }
@@ -108,7 +110,7 @@ void ext4_inode_bitmap_set(struct super_block *sb,
 void ext4_inode_table_set(struct super_block *sb,
                          struct ext4_group_desc *bg, ext4_fsblk_t blk)
 {
-        bg->bg_inode_table = cpu_to_le32((u32)blk);
+        bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
                bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 }
@@ -1037,7 +1039,7 @@ static int parse_options (char *options, struct super_block *sb,
                        if (option < 0)
                                return 0;
                        if (option == 0)
-                                option = JBD_DEFAULT_MAX_COMMIT_AGE;
+                                option = JBD2_DEFAULT_MAX_COMMIT_AGE;
                        sbi->s_commit_interval = HZ * option;
                        break;
                case Opt_data_journal:
@@ -1308,6 +1310,43 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
        return res;
 }
+__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
+                            struct ext4_group_desc *gdp)
+{
+        __u16 crc = 0;
+        if (sbi->s_es->s_feature_ro_compat &
+            cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+                int offset = offsetof(struct ext4_group_desc, bg_checksum);
+                __le32 le_group = cpu_to_le32(block_group);
+                crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
+                crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
+                crc = crc16(crc, (__u8 *)gdp, offset);
+                offset += sizeof(gdp->bg_checksum); /* skip checksum */
+                /* for checksum of struct ext4_group_desc do the rest...*/
+                if ((sbi->s_es->s_feature_incompat &
+                     cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
+                    offset < le16_to_cpu(sbi->s_es->s_desc_size))
+                        crc = crc16(crc, (__u8 *)gdp + offset,
+                                    le16_to_cpu(sbi->s_es->s_desc_size) -
+                                        offset);
+        }
+        return cpu_to_le16(crc);
+}
+int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
+                                struct ext4_group_desc *gdp)
+{
+        if ((sbi->s_es->s_feature_ro_compat &
+             cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
+            (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
+                return 0;
+        return 1;
+}
 /* Called at mount-time, super-block is locked */
 static int ext4_check_descriptors (struct super_block * sb)
 {
@@ -1319,13 +1358,17 @@ static int ext4_check_descriptors (struct super_block * sb)
        ext4_fsblk_t inode_table;
        struct ext4_group_desc * gdp = NULL;
        int desc_block = 0;
+        int flexbg_flag = 0;
        int i;
+        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
+                flexbg_flag = 1;
        ext4_debug ("Checking group descriptors");
        for (i = 0; i < sbi->s_groups_count; i++)
        {
-                if (i == sbi->s_groups_count - 1)
+                if (i == sbi->s_groups_count - 1 || flexbg_flag)
                        last_block = ext4_blocks_count(sbi->s_es) - 1;
                else
                        last_block = first_block +
@@ -1362,7 +1405,16 @@ static int ext4_check_descriptors (struct super_block * sb)
                                    i, inode_table);
                        return 0;
                }
-                first_block += EXT4_BLOCKS_PER_GROUP(sb);
+                if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
+                        ext4_error(sb, __FUNCTION__,
+                                   "Checksum for group %d failed (%u!=%u)\n", i,
+                                   le16_to_cpu(ext4_group_desc_csum(sbi, i,
+                                                                    gdp)),
+                                   le16_to_cpu(gdp->bg_checksum));
+                        return 0;
+                }
+                if (!flexbg_flag)
+                        first_block += EXT4_BLOCKS_PER_GROUP(sb);
                gdp = (struct ext4_group_desc *)
                        ((__u8 *)gdp + EXT4_DESC_SIZE(sb));
        }
@@ -1726,14 +1778,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
                if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
                        sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
        }
-        sbi->s_frag_size = EXT4_MIN_FRAG_SIZE <<
-                                   le32_to_cpu(es->s_log_frag_size);
-        if (blocksize != sbi->s_frag_size) {
-                printk(KERN_ERR
-                       "EXT4-fs: fragsize %lu != blocksize %u (unsupported)\n",
-                       sbi->s_frag_size, blocksize);
-                goto failed_mount;
-        }
        sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
                if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
@@ -1747,7 +1791,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
        } else
                sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
        sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
-        sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
        sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
        if (EXT4_INODE_SIZE(sb) == 0)
                goto cantfind_ext4;
@@ -1771,12 +1814,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
                        sbi->s_blocks_per_group);
                goto failed_mount;
        }
-        if (sbi->s_frags_per_group > blocksize * 8) {
-                printk (KERN_ERR
-                        "EXT4-fs: #fragments per group too big: %lu\n",
-                        sbi->s_frags_per_group);
-                goto failed_mount;
-        }
        if (sbi->s_inodes_per_group > blocksize * 8) {
                printk (KERN_ERR
                        "EXT4-fs: #inodes per group too big: %lu\n",
@@ -2630,7 +2667,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
        if (test_opt(sb, MINIX_DF)) {
                sbi->s_overhead_last = 0;
-        } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
+        } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
                unsigned long ngroups = sbi->s_groups_count, i;
                ext4_fsblk_t overhead = 0;
                smp_rmb();
@@ -2665,14 +2702,14 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
                overhead += ngroups * (2 + sbi->s_itb_per_group);
                sbi->s_overhead_last = overhead;
                smp_wmb();
-                sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
+                sbi->s_blocks_last = ext4_blocks_count(es);
        }
        buf->f_type = EXT4_SUPER_MAGIC;
        buf->f_bsize = sb->s_blocksize;
        buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
        buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
-        es->s_free_blocks_count = cpu_to_le32(buf->f_bfree);
+        ext4_free_blocks_count_set(es, buf->f_bfree);
        buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
        if (buf->f_bfree < ext4_r_blocks_count(es))
                buf->f_bavail = 0;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index b10d68fffb5..86387302c2a 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -750,12 +750,11 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
                }
        } else {
                /* Allocate a buffer where we construct the new block. */
-                s->base = kmalloc(sb->s_blocksize, GFP_KERNEL);
+                s->base = kzalloc(sb->s_blocksize, GFP_KERNEL);
                /* assert(header == s->base) */
                error = -ENOMEM;
                if (s->base == NULL)
                        goto cleanup;
-                memset(s->base, 0, sb->s_blocksize);
                header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
                header(s->base)->h_blocks = cpu_to_le32(1);
                header(s->base)->h_refcount = cpu_to_le32(1);
@@ -1121,7 +1120,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
        int total_ino, total_blk;
        void *base, *start, *end;
        int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
-        int s_min_extra_isize = EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize;
+        int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
        down_write(&EXT4_I(inode)->xattr_sem);
 retry:
@@ -1293,7 +1292,7 @@ retry:
                i.name = b_entry_name;
                i.value = buffer;
-                i.value_len = cpu_to_le32(size);
+                i.value_len = size;
                error = ext4_xattr_block_find(inode, &i, bs);
                if (error)
                        goto cleanup;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index d1acab93133..3763757f9fe 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -63,13 +63,21 @@ static u64 time_to_jiffies(unsigned long sec, unsigned long nsec)
 * Set dentry and possibly attribute timeouts from the lookup/mk*
 * replies
 */
-static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o)
+static void fuse_change_entry_timeout(struct dentry *entry,
+                                      struct fuse_entry_out *o)
 {
        fuse_dentry_settime(entry,
                time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
-        if (entry->d_inode)
+}
-                get_fuse_inode(entry->d_inode)->i_time =
-                        time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
+static u64 attr_timeout(struct fuse_attr_out *o)
+{
+        return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
+}
+static u64 entry_attr_timeout(struct fuse_entry_out *o)
+{
+        return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
 }
 /*
@@ -108,13 +116,19 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
                             struct dentry *entry,
                             struct fuse_entry_out *outarg)
 {
+        struct fuse_conn *fc = get_fuse_conn(dir);
+        memset(outarg, 0, sizeof(struct fuse_entry_out));
        req->in.h.opcode = FUSE_LOOKUP;
        req->in.h.nodeid = get_node_id(dir);
        req->in.numargs = 1;
        req->in.args[0].size = entry->d_name.len + 1;
        req->in.args[0].value = entry->d_name.name;
        req->out.numargs = 1;
-        req->out.args[0].size = sizeof(struct fuse_entry_out);
+        if (fc->minor < 9)
+                req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+        else
+                req->out.args[0].size = sizeof(struct fuse_entry_out);
        req->out.args[0].value = outarg;
 }
@@ -140,6 +154,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
                struct fuse_req *req;
                struct fuse_req *forget_req;
                struct dentry *parent;
+                u64 attr_version;
                /* For negative dentries, always do a fresh lookup */
                if (!inode)
@@ -156,6 +171,10 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
                        return 0;
                }
+                spin_lock(&fc->lock);
+                attr_version = fc->attr_version;
+                spin_unlock(&fc->lock);
                parent = dget_parent(entry);
                fuse_lookup_init(req, parent->d_inode, entry, &outarg);
                request_send(fc, req);
@@ -180,8 +199,10 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
                if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
                        return 0;
-                fuse_change_attributes(inode, &outarg.attr);
+                fuse_change_attributes(inode, &outarg.attr,
-                fuse_change_timeout(entry, &outarg);
+                                       entry_attr_timeout(&outarg),
+                                       attr_version);
+                fuse_change_entry_timeout(entry, &outarg);
        }
        return 1;
 }
@@ -228,6 +249,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
        struct fuse_conn *fc = get_fuse_conn(dir);
        struct fuse_req *req;
        struct fuse_req *forget_req;
+        u64 attr_version;
        if (entry->d_name.len > FUSE_NAME_MAX)
                return ERR_PTR(-ENAMETOOLONG);
@@ -242,6 +264,10 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
                return ERR_PTR(PTR_ERR(forget_req));
        }
+        spin_lock(&fc->lock);
+        attr_version = fc->attr_version;
+        spin_unlock(&fc->lock);
        fuse_lookup_init(req, dir, entry, &outarg);
        request_send(fc, req);
        err = req->out.h.error;
@@ -253,7 +279,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
                err = -EIO;
        if (!err && outarg.nodeid) {
                inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
-                                  &outarg.attr);
+                                  &outarg.attr, entry_attr_timeout(&outarg),
+                                  attr_version);
                if (!inode) {
                        fuse_send_forget(fc, forget_req, outarg.nodeid, 1);
                        return ERR_PTR(-ENOMEM);
@@ -276,7 +303,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
        entry->d_op = &fuse_dentry_operations;
        if (!err)
-                fuse_change_timeout(entry, &outarg);
+                fuse_change_entry_timeout(entry, &outarg);
        else
                fuse_invalidate_entry_cache(entry);
        return NULL;
@@ -335,6 +362,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
        flags &= ~O_NOCTTY;
        memset(&inarg, 0, sizeof(inarg));
+        memset(&outentry, 0, sizeof(outentry));
        inarg.flags = flags;
        inarg.mode = mode;
        req->in.h.opcode = FUSE_CREATE;
@@ -345,7 +373,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
        req->in.args[1].size = entry->d_name.len + 1;
        req->in.args[1].value = entry->d_name.name;
        req->out.numargs = 2;
-        req->out.args[0].size = sizeof(outentry);
+        if (fc->minor < 9)
+                req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+        else
+                req->out.args[0].size = sizeof(outentry);
        req->out.args[0].value = &outentry;
        req->out.args[1].size = sizeof(outopen);
        req->out.args[1].value = &outopen;
@@ -363,7 +394,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
        fuse_put_request(fc, req);
        inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
-                          &outentry.attr);
+                          &outentry.attr, entry_attr_timeout(&outentry), 0);
        if (!inode) {
                flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
                ff->fh = outopen.fh;
@@ -373,7 +404,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
        }
        fuse_put_request(fc, forget_req);
        d_instantiate(entry, inode);
-        fuse_change_timeout(entry, &outentry);
+        fuse_change_entry_timeout(entry, &outentry);
        file = lookup_instantiate_filp(nd, entry, generic_file_open);
        if (IS_ERR(file)) {
                ff->fh = outopen.fh;
@@ -410,9 +441,13 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
                return PTR_ERR(forget_req);
        }
+        memset(&outarg, 0, sizeof(outarg));
        req->in.h.nodeid = get_node_id(dir);
        req->out.numargs = 1;
-        req->out.args[0].size = sizeof(outarg);
+        if (fc->minor < 9)
+                req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+        else
+                req->out.args[0].size = sizeof(outarg);
        req->out.args[0].value = &outarg;
        request_send(fc, req);
        err = req->out.h.error;
@@ -428,7 +463,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
                goto out_put_forget_req;
        inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
-                          &outarg.attr);
+                          &outarg.attr, entry_attr_timeout(&outarg), 0);
        if (!inode) {
                fuse_send_forget(fc, forget_req, outarg.nodeid, 1);
                return -ENOMEM;
@@ -451,7 +486,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
        } else
                d_instantiate(entry, inode);
-        fuse_change_timeout(entry, &outarg);
+        fuse_change_entry_timeout(entry, &outarg);
        fuse_invalidate_attr(dir);
        return 0;
@@ -663,52 +698,84 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
        return err;
 }
-static int fuse_do_getattr(struct inode *inode)
+static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
+                          struct kstat *stat)
+{
+        stat->dev = inode->i_sb->s_dev;
+        stat->ino = attr->ino;
+        stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
+        stat->nlink = attr->nlink;
+        stat->uid = attr->uid;
+        stat->gid = attr->gid;
+        stat->rdev = inode->i_rdev;
+        stat->atime.tv_sec = attr->atime;
+        stat->atime.tv_nsec = attr->atimensec;
+        stat->mtime.tv_sec = attr->mtime;
+        stat->mtime.tv_nsec = attr->mtimensec;
+        stat->ctime.tv_sec = attr->ctime;
+        stat->ctime.tv_nsec = attr->ctimensec;
+        stat->size = attr->size;
+        stat->blocks = attr->blocks;
+        stat->blksize = (1 << inode->i_blkbits);
+}
+static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
+                           struct file *file)
 {
        int err;
-        struct fuse_attr_out arg;
+        struct fuse_getattr_in inarg;
+        struct fuse_attr_out outarg;
        struct fuse_conn *fc = get_fuse_conn(inode);
-        struct fuse_req *req = fuse_get_req(fc);
+        struct fuse_req *req;
+        u64 attr_version;
+        req = fuse_get_req(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
+        spin_lock(&fc->lock);
+        attr_version = fc->attr_version;
+        spin_unlock(&fc->lock);
+        memset(&inarg, 0, sizeof(inarg));
+        memset(&outarg, 0, sizeof(outarg));
+        /* Directories have separate file-handle space */
+        if (file && S_ISREG(inode->i_mode)) {
+                struct fuse_file *ff = file->private_data;
+                inarg.getattr_flags |= FUSE_GETATTR_FH;
+                inarg.fh = ff->fh;
+        }
        req->in.h.opcode = FUSE_GETATTR;
        req->in.h.nodeid = get_node_id(inode);
+        req->in.numargs = 1;
+        req->in.args[0].size = sizeof(inarg);
+        req->in.args[0].value = &inarg;
        req->out.numargs = 1;
-        req->out.args[0].size = sizeof(arg);
+        if (fc->minor < 9)
-        req->out.args[0].value = &arg;
+                req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+        else
+                req->out.args[0].size = sizeof(outarg);
+        req->out.args[0].value = &outarg;
        request_send(fc, req);
        err = req->out.h.error;
        fuse_put_request(fc, req);
        if (!err) {
-                if ((inode->i_mode ^ arg.attr.mode) & S_IFMT) {
+                if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
                        make_bad_inode(inode);
                        err = -EIO;
                } else {
-                        struct fuse_inode *fi = get_fuse_inode(inode);
+                        fuse_change_attributes(inode, &outarg.attr,
-                        fuse_change_attributes(inode, &arg.attr);
+                                               attr_timeout(&outarg),
-                        fi->i_time = time_to_jiffies(arg.attr_valid,
+                                               attr_version);
-                                                     arg.attr_valid_nsec);
+                        if (stat)
+                                fuse_fillattr(inode, &outarg.attr, stat);
                }
        }
        return err;
 }
 /*
- * Check if attributes are still valid, and if not send a GETATTR
- * request to refresh them.
- */
-static int fuse_refresh_attributes(struct inode *inode)
-{
-        struct fuse_inode *fi = get_fuse_inode(inode);
-        if (fi->i_time < get_jiffies_64())
-                return fuse_do_getattr(inode);
-        else
-                return 0;
-}
-/*
 * Calling into a user-controlled filesystem gives the filesystem
 * daemon ptrace-like capabilities over the requester process.  This
 * means, that the filesystem daemon is able to record the exact
@@ -721,7 +788,7 @@ static int fuse_refresh_attributes(struct inode *inode)
 * for which the owner of the mount has ptrace privilege.  This
 * excludes processes started by other users, suid or sgid processes.
 */
-static int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
+int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
 {
        if (fc->flags & FUSE_ALLOW_OTHER)
                return 1;
@@ -795,11 +862,14 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
         */
        if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
            ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
-                err = fuse_refresh_attributes(inode);
+                struct fuse_inode *fi = get_fuse_inode(inode);
-                if (err)
+                if (fi->i_time < get_jiffies_64()) {
-                        return err;
+                        err = fuse_do_getattr(inode, NULL, NULL);
+                        if (err)
+                                return err;
-                refreshed = true;
+                        refreshed = true;
+                }
        }
        if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
@@ -809,7 +879,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
                   attributes.  This is also needed, because the root
                   node will at first have no permissions */
                if (err == -EACCES && !refreshed) {
-                        err = fuse_do_getattr(inode);
+                        err = fuse_do_getattr(inode, NULL, NULL);
                        if (!err)
                                err = generic_permission(inode, mask, NULL);
                }
@@ -825,7 +895,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
                        if (refreshed)
                                return -EACCES;
-                        err = fuse_do_getattr(inode);
+                        err = fuse_do_getattr(inode, NULL, NULL);
                        if (!err && !(inode->i_mode & S_IXUGO))
                                return -EACCES;
                }
@@ -962,6 +1032,20 @@ static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync)
        return file ? fuse_fsync_common(file, de, datasync, 1) : 0;
 }
+static bool update_mtime(unsigned ivalid)
+{
+        /* Always update if mtime is explicitly set  */
+        if (ivalid & ATTR_MTIME_SET)
+                return true;
+        /* If it's an open(O_TRUNC) or an ftruncate(), don't update */
+        if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
+                return false;
+        /* In all other cases update */
+        return true;
+}
 static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
 {
        unsigned ivalid = iattr->ia_valid;
@@ -974,16 +1058,19 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
                arg->valid |= FATTR_GID,    arg->gid = iattr->ia_gid;
        if (ivalid & ATTR_SIZE)
                arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
-        /* You can only _set_ these together (they may change by themselves) */
+        if (ivalid & ATTR_ATIME) {
-        if ((ivalid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) {
+                arg->valid |= FATTR_ATIME;
-                arg->valid |= FATTR_ATIME | FATTR_MTIME;
                arg->atime = iattr->ia_atime.tv_sec;
-                arg->mtime = iattr->ia_mtime.tv_sec;
+                arg->atimensec = iattr->ia_atime.tv_nsec;
+                if (!(ivalid & ATTR_ATIME_SET))
+                        arg->valid |= FATTR_ATIME_NOW;
        }
-        if (ivalid & ATTR_FILE) {
+        if ((ivalid & ATTR_MTIME) && update_mtime(ivalid)) {
-                struct fuse_file *ff = iattr->ia_file->private_data;
+                arg->valid |= FATTR_MTIME;
-                arg->valid |= FATTR_FH;
+                arg->mtime = iattr->ia_mtime.tv_sec;
-                arg->fh = ff->fh;
+                arg->mtimensec = iattr->ia_mtime.tv_nsec;
+                if (!(ivalid & ATTR_MTIME_SET))
+                        arg->valid |= FATTR_MTIME_NOW;
        }
 }
@@ -995,22 +1082,28 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
 * vmtruncate() doesn't allow for this case, so do the rlimit checking
 * and the actual truncation by hand.
 */
-static int fuse_setattr(struct dentry *entry, struct iattr *attr)
+static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
+                           struct file *file)
 {
        struct inode *inode = entry->d_inode;
        struct fuse_conn *fc = get_fuse_conn(inode);
-        struct fuse_inode *fi = get_fuse_inode(inode);
        struct fuse_req *req;
        struct fuse_setattr_in inarg;
        struct fuse_attr_out outarg;
        int err;
+        if (!fuse_allow_task(fc, current))
+                return -EACCES;
        if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
                err = inode_change_ok(inode, attr);
                if (err)
                        return err;
        }
+        if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc)
+                return 0;
        if (attr->ia_valid & ATTR_SIZE) {
                unsigned long limit;
                if (IS_SWAPFILE(inode))
@@ -1027,14 +1120,28 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
                return PTR_ERR(req);
        memset(&inarg, 0, sizeof(inarg));
+        memset(&outarg, 0, sizeof(outarg));
        iattr_to_fattr(attr, &inarg);
+        if (file) {
+                struct fuse_file *ff = file->private_data;
+                inarg.valid |= FATTR_FH;
+                inarg.fh = ff->fh;
+        }
+        if (attr->ia_valid & ATTR_SIZE) {
+                /* For mandatory locking in truncate */
+                inarg.valid |= FATTR_LOCKOWNER;
+                inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
+        }
        req->in.h.opcode = FUSE_SETATTR;
        req->in.h.nodeid = get_node_id(inode);
        req->in.numargs = 1;
        req->in.args[0].size = sizeof(inarg);
        req->in.args[0].value = &inarg;
        req->out.numargs = 1;
-        req->out.args[0].size = sizeof(outarg);
+        if (fc->minor < 9)
+                req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+        else
+                req->out.args[0].size = sizeof(outarg);
        req->out.args[0].value = &outarg;
        request_send(fc, req);
        err = req->out.h.error;
@@ -1050,11 +1157,18 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
                return -EIO;
        }
-        fuse_change_attributes(inode, &outarg.attr);
+        fuse_change_attributes(inode, &outarg.attr, attr_timeout(&outarg), 0);
-        fi->i_time = time_to_jiffies(outarg.attr_valid, outarg.attr_valid_nsec);
        return 0;
 }
+static int fuse_setattr(struct dentry *entry, struct iattr *attr)
+{
+        if (attr->ia_valid & ATTR_FILE)
+                return fuse_do_setattr(entry, attr, attr->ia_file);
+        else
+                return fuse_do_setattr(entry, attr, NULL);
+}
 static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
                        struct kstat *stat)
 {
@@ -1066,8 +1180,10 @@ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
        if (!fuse_allow_task(fc, current))
                return -EACCES;
-        err = fuse_refresh_attributes(inode);
+        if (fi->i_time < get_jiffies_64())
-        if (!err) {
+                err = fuse_do_getattr(inode, stat, NULL);
+        else {
+                err = 0;
                generic_fillattr(inode, stat);
                stat->mode = fi->orig_i_mode;
        }
@@ -1172,6 +1288,9 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
        struct fuse_getxattr_out outarg;
        ssize_t ret;
+        if (!fuse_allow_task(fc, current))
+                return -EACCES;
        if (fc->no_listxattr)
                return -EOPNOTSUPP;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c4b98c03a46..0fcdba9d47c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -28,7 +28,9 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
                return PTR_ERR(req);
        memset(&inarg, 0, sizeof(inarg));
-        inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+        inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
+        if (!fc->atomic_o_trunc)
+                inarg.flags &= ~O_TRUNC;
        req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
        req->in.h.nodeid = get_node_id(inode);
        req->in.numargs = 1;
@@ -54,6 +56,7 @@ struct fuse_file *fuse_file_alloc(void)
                        kfree(ff);
                        ff = NULL;
                }
+                INIT_LIST_HEAD(&ff->write_entry);
                atomic_set(&ff->count, 0);
        }
        return ff;
@@ -148,12 +151,18 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir)
 {
        struct fuse_file *ff = file->private_data;
        if (ff) {
+                struct fuse_conn *fc = get_fuse_conn(inode);
                fuse_release_fill(ff, get_node_id(inode), file->f_flags,
                                  isdir ? FUSE_RELEASEDIR : FUSE_RELEASE);
                /* Hold vfsmount and dentry until release is finished */
                ff->reserved_req->vfsmount = mntget(file->f_path.mnt);
                ff->reserved_req->dentry = dget(file->f_path.dentry);
+                spin_lock(&fc->lock);
+                list_del(&ff->write_entry);
+                spin_unlock(&fc->lock);
                /*
                 * Normally this will send the RELEASE request,
                 * however if some asynchronous READ or WRITE requests
@@ -180,7 +189,7 @@ static int fuse_release(struct inode *inode, struct file *file)
 * Scramble the ID space with XTEA, so that the value of the files_struct
 * pointer is not exposed to userspace.
 */
-static u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
+u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
 {
        u32 *k = fc->scramble_key;
        u64 v = (unsigned long) id;
@@ -299,11 +308,19 @@ void fuse_read_fill(struct fuse_req *req, struct fuse_file *ff,
 }
 static size_t fuse_send_read(struct fuse_req *req, struct file *file,
-                             struct inode *inode, loff_t pos, size_t count)
+                             struct inode *inode, loff_t pos, size_t count,
+                             fl_owner_t owner)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_file *ff = file->private_data;
        fuse_read_fill(req, ff, inode, pos, count, FUSE_READ);
+        if (owner != NULL) {
+                struct fuse_read_in *inarg = &req->misc.read_in;
+                inarg->read_flags |= FUSE_READ_LOCKOWNER;
+                inarg->lock_owner = fuse_lock_owner_id(fc, owner);
+        }
        request_send(fc, req);
        return req->out.args[0].size;
 }
@@ -327,7 +344,8 @@ static int fuse_readpage(struct file *file, struct page *page)
        req->out.page_zeroing = 1;
        req->num_pages = 1;
        req->pages[0] = page;
-        fuse_send_read(req, file, inode, page_offset(page), PAGE_CACHE_SIZE);
+        fuse_send_read(req, file, inode, page_offset(page), PAGE_CACHE_SIZE,
+                       NULL);
        err = req->out.h.error;
        fuse_put_request(fc, req);
        if (!err)
@@ -434,30 +452,47 @@ out:
        return err;
 }
-static size_t fuse_send_write(struct fuse_req *req, struct file *file,
+static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
-                              struct inode *inode, loff_t pos, size_t count)
+                            struct inode *inode, loff_t pos, size_t count,
+                            int writepage)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
-        struct fuse_file *ff = file->private_data;
+        struct fuse_write_in *inarg = &req->misc.write.in;
-        struct fuse_write_in inarg;
+        struct fuse_write_out *outarg = &req->misc.write.out;
-        struct fuse_write_out outarg;
-        memset(&inarg, 0, sizeof(struct fuse_write_in));
+        memset(inarg, 0, sizeof(struct fuse_write_in));
-        inarg.fh = ff->fh;
+        inarg->fh = ff->fh;
-        inarg.offset = pos;
+        inarg->offset = pos;
-        inarg.size = count;
+        inarg->size = count;
+        inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0;
        req->in.h.opcode = FUSE_WRITE;
        req->in.h.nodeid = get_node_id(inode);
        req->in.argpages = 1;
        req->in.numargs = 2;
-        req->in.args[0].size = sizeof(struct fuse_write_in);
+        if (fc->minor < 9)
-        req->in.args[0].value = &inarg;
+                req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
+        else
+                req->in.args[0].size = sizeof(struct fuse_write_in);
+        req->in.args[0].value = inarg;
        req->in.args[1].size = count;
        req->out.numargs = 1;
        req->out.args[0].size = sizeof(struct fuse_write_out);
-        req->out.args[0].value = &outarg;
+        req->out.args[0].value = outarg;
+}
+static size_t fuse_send_write(struct fuse_req *req, struct file *file,
+                              struct inode *inode, loff_t pos, size_t count,
+                              fl_owner_t owner)
+{
+        struct fuse_conn *fc = get_fuse_conn(inode);
+        fuse_write_fill(req, file->private_data, inode, pos, count, 0);
+        if (owner != NULL) {
+                struct fuse_write_in *inarg = &req->misc.write.in;
+                inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
+                inarg->lock_owner = fuse_lock_owner_id(fc, owner);
+        }
        request_send(fc, req);
-        return outarg.size;
+        return req->misc.write.out.size;
 }
 static int fuse_write_begin(struct file *file, struct address_space *mapping,
@@ -478,6 +513,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
        int err;
        size_t nres;
        struct fuse_conn *fc = get_fuse_conn(inode);
+        struct fuse_inode *fi = get_fuse_inode(inode);
        unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
        struct fuse_req *req;
@@ -491,7 +527,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
        req->num_pages = 1;
        req->pages[0] = page;
        req->page_offset = offset;
-        nres = fuse_send_write(req, file, inode, pos, count);
+        nres = fuse_send_write(req, file, inode, pos, count, NULL);
        err = req->out.h.error;
        fuse_put_request(fc, req);
        if (!err && !nres)
@@ -499,6 +535,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
        if (!err) {
                pos += nres;
                spin_lock(&fc->lock);
+                fi->attr_version = ++fc->attr_version;
                if (pos > inode->i_size)
                        i_size_write(inode, pos);
                spin_unlock(&fc->lock);
@@ -591,9 +628,11 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
                nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset;
                nbytes = min(count, nbytes);
                if (write)
-                        nres = fuse_send_write(req, file, inode, pos, nbytes);
+                        nres = fuse_send_write(req, file, inode, pos, nbytes,
+                                               current->files);
                else
-                        nres = fuse_send_read(req, file, inode, pos, nbytes);
+                        nres = fuse_send_read(req, file, inode, pos, nbytes,
+                                              current->files);
                fuse_release_user_pages(req, !write);
                if (req->out.h.error) {
                        if (!res)
@@ -695,7 +734,8 @@ static int convert_fuse_file_lock(const struct fuse_file_lock *ffl,
 }
 static void fuse_lk_fill(struct fuse_req *req, struct file *file,
-                         const struct file_lock *fl, int opcode, pid_t pid)
+                         const struct file_lock *fl, int opcode, pid_t pid,
+                         int flock)
 {
        struct inode *inode = file->f_path.dentry->d_inode;
        struct fuse_conn *fc = get_fuse_conn(inode);
@@ -708,6 +748,8 @@ static void fuse_lk_fill(struct fuse_req *req, struct file *file,
        arg->lk.end = fl->fl_end;
        arg->lk.type = fl->fl_type;
        arg->lk.pid = pid;
+        if (flock)
+                arg->lk_flags |= FUSE_LK_FLOCK;
        req->in.h.opcode = opcode;
        req->in.h.nodeid = get_node_id(inode);
        req->in.numargs = 1;
@@ -727,7 +769,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
        if (IS_ERR(req))
                return PTR_ERR(req);
-        fuse_lk_fill(req, file, fl, FUSE_GETLK, 0);
+        fuse_lk_fill(req, file, fl, FUSE_GETLK, 0, 0);
        req->out.numargs = 1;
        req->out.args[0].size = sizeof(outarg);
        req->out.args[0].value = &outarg;
@@ -740,7 +782,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
        return err;
 }
-static int fuse_setlk(struct file *file, struct file_lock *fl)
+static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
 {
        struct inode *inode = file->f_path.dentry->d_inode;
        struct fuse_conn *fc = get_fuse_conn(inode);
@@ -757,7 +799,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl)
        if (IS_ERR(req))
                return PTR_ERR(req);
-        fuse_lk_fill(req, file, fl, opcode, pid);
+        fuse_lk_fill(req, file, fl, opcode, pid, flock);
        request_send(fc, req);
        err = req->out.h.error;
        /* locking is restartable */
@@ -783,8 +825,25 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
                if (fc->no_lock)
                        err = posix_lock_file_wait(file, fl);
                else
-                        err = fuse_setlk(file, fl);
+                        err = fuse_setlk(file, fl, 0);
+        }
+        return err;
+}
+static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl)
+{
+        struct inode *inode = file->f_path.dentry->d_inode;
+        struct fuse_conn *fc = get_fuse_conn(inode);
+        int err;
+        if (fc->no_lock) {
+                err = flock_lock_file_wait(file, fl);
+        } else {
+                /* emulate flock with POSIX locks */
+                fl->fl_owner = (fl_owner_t) file;
+                err = fuse_setlk(file, fl, 1);
        }
        return err;
 }
@@ -836,6 +895,7 @@ static const struct file_operations fuse_file_operations = {
        .release        = fuse_release,
        .fsync          = fuse_fsync,
        .lock           = fuse_file_lock,
+        .flock          = fuse_file_flock,
        .splice_read    = generic_file_splice_read,
 };
@@ -848,6 +908,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
        .release        = fuse_release,
        .fsync          = fuse_fsync,
        .lock           = fuse_file_lock,
+        .flock          = fuse_file_flock,
        /* no mmap and splice_read */
 };
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1764506fdd1..6c5461de1a5 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -67,6 +67,12 @@ struct fuse_inode {
        /** The sticky bit in inode->i_mode may have been removed, so
            preserve the original mode */
        mode_t orig_i_mode;
+        /** Version of last attribute change */
+        u64 attr_version;
+        /** Files usable in writepage.  Protected by fc->lock */
+        struct list_head write_files;
 };
 /** FUSE specific file data */
@@ -79,6 +85,9 @@ struct fuse_file {
        /** Refcount */
        atomic_t count;
+        /** Entry on inode's write_files list */
+        struct list_head write_entry;
 };
 /** One input argument of a request */
@@ -210,6 +219,10 @@ struct fuse_req {
                struct fuse_init_in init_in;
                struct fuse_init_out init_out;
                struct fuse_read_in read_in;
+                struct {
+                        struct fuse_write_in in;
+                        struct fuse_write_out out;
+                } write;
                struct fuse_lk_in lk_in;
        } misc;
@@ -317,6 +330,9 @@ struct fuse_conn {
        /** Do readpages asynchronously?  Only set in INIT */
        unsigned async_read : 1;
+        /** Do not send separate SETATTR request before open(O_TRUNC)  */
+        unsigned atomic_o_trunc : 1;
        /*
         * The following bitfields are only for optimization purposes
         * and hence races in setting them will not cause malfunction
@@ -387,6 +403,9 @@ struct fuse_conn {
        /** Reserved request for the DESTROY message */
        struct fuse_req *destroy_req;
+        /** Version counter for attribute changes */
+        u64 attr_version;
 };
 static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -416,7 +435,8 @@ extern const struct file_operations fuse_dev_operations;
 * Get a filled in inode
 */
 struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
-                        int generation, struct fuse_attr *attr);
+                        int generation, struct fuse_attr *attr,
+                        u64 attr_valid, u64 attr_version);
 /**
 * Send FORGET command
@@ -477,7 +497,8 @@ void fuse_init_symlink(struct inode *inode);
 /**
 * Change attributes of an inode
 */
-void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr);
+void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
+                            u64 attr_valid, u64 attr_version);
 /**
 * Initialize the client device
@@ -565,3 +586,10 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc);
 * Is file type valid?
 */
 int fuse_valid_type(int m);
+/**
+ * Is task allowed to perform filesystem operation?
+ */
+int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task);
+u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fd0735715c1..9a68d697084 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -56,6 +56,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
        fi->i_time = 0;
        fi->nodeid = 0;
        fi->nlookup = 0;
+        INIT_LIST_HEAD(&fi->write_files);
        fi->forget_req = fuse_request_alloc();
        if (!fi->forget_req) {
                kmem_cache_free(fuse_inode_cachep, inode);
@@ -68,6 +69,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
 static void fuse_destroy_inode(struct inode *inode)
 {
        struct fuse_inode *fi = get_fuse_inode(inode);
+        BUG_ON(!list_empty(&fi->write_files));
        if (fi->forget_req)
                fuse_request_free(fi->forget_req);
        kmem_cache_free(fuse_inode_cachep, inode);
@@ -117,12 +119,22 @@ static void fuse_truncate(struct address_space *mapping, loff_t offset)
        unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
 }
-void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
+void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
+                            u64 attr_valid, u64 attr_version)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_inode *fi = get_fuse_inode(inode);
        loff_t oldsize;
+        spin_lock(&fc->lock);
+        if (attr_version != 0 && fi->attr_version > attr_version) {
+                spin_unlock(&fc->lock);
+                return;
+        }
+        fi->attr_version = ++fc->attr_version;
+        fi->i_time = attr_valid;
        inode->i_ino     = attr->ino;
        inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
        inode->i_nlink   = attr->nlink;
@@ -136,6 +148,11 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
        inode->i_ctime.tv_sec   = attr->ctime;
        inode->i_ctime.tv_nsec  = attr->ctimensec;
+        if (attr->blksize != 0)
+                inode->i_blkbits = ilog2(attr->blksize);
+        else
+                inode->i_blkbits = inode->i_sb->s_blocksize_bits;
        /*
         * Don't set the sticky bit in i_mode, unless we want the VFS
         * to check permissions.  This prevents failures due to the
@@ -145,7 +162,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
        if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
                inode->i_mode &= ~S_ISVTX;
-        spin_lock(&fc->lock);
        oldsize = inode->i_size;
        i_size_write(inode, attr->size);
        spin_unlock(&fc->lock);
@@ -194,7 +210,8 @@ static int fuse_inode_set(struct inode *inode, void *_nodeidp)
 }
 struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
-                        int generation, struct fuse_attr *attr)
+                        int generation, struct fuse_attr *attr,
+                        u64 attr_valid, u64 attr_version)
 {
        struct inode *inode;
        struct fuse_inode *fi;
@@ -222,7 +239,8 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
        spin_lock(&fc->lock);
        fi->nlookup ++;
        spin_unlock(&fc->lock);
-        fuse_change_attributes(inode, attr);
+        fuse_change_attributes(inode, attr, attr_valid, attr_version);
        return inode;
 }
@@ -287,6 +305,11 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
        struct fuse_statfs_out outarg;
        int err;
+        if (!fuse_allow_task(fc, current)) {
+                buf->f_type = FUSE_SUPER_MAGIC;
+                return 0;
+        }
        req = fuse_get_req(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -452,6 +475,7 @@ static struct fuse_conn *new_conn(void)
                }
                fc->reqctr = 0;
                fc->blocked = 1;
+                fc->attr_version = 1;
                get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
        }
 out:
@@ -483,7 +507,7 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
        attr.mode = mode;
        attr.ino = FUSE_ROOT_ID;
        attr.nlink = 1;
-        return fuse_iget(sb, 1, 0, &attr);
+        return fuse_iget(sb, 1, 0, &attr, 0, 0);
 }
 static const struct super_operations fuse_super_operations = {
@@ -514,6 +538,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                                fc->async_read = 1;
                        if (!(arg->flags & FUSE_POSIX_LOCKS))
                                fc->no_lock = 1;
+                        if (arg->flags & FUSE_ATOMIC_O_TRUNC)
+                                fc->atomic_o_trunc = 1;
                } else {
                        ra_pages = fc->max_read / PAGE_CACHE_SIZE;
                        fc->no_lock = 1;
@@ -536,7 +562,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
        arg->major = FUSE_KERNEL_VERSION;
        arg->minor = FUSE_KERNEL_MINOR_VERSION;
        arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
-        arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS;
+        arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_FILE_OPS |
+                FUSE_ATOMIC_O_TRUNC;
        req->in.h.opcode = FUSE_INIT;
        req->in.numargs = 1;
        req->in.args[0].size = sizeof(*arg);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index a003d50edcd..a263d82761d 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -375,7 +375,7 @@ void journal_commit_transaction(journal_t *journal)
                        struct buffer_head *bh = jh2bh(jh);
                        jbd_lock_bh_state(bh);
-                        jbd_slab_free(jh->b_committed_data, bh->b_size);
+                        jbd_free(jh->b_committed_data, bh->b_size);
                        jh->b_committed_data = NULL;
                        jbd_unlock_bh_state(bh);
                }
@@ -792,14 +792,14 @@ restart_loop:
                 * Otherwise, we can just throw away the frozen data now.
                 */
                if (jh->b_committed_data) {
-                        jbd_slab_free(jh->b_committed_data, bh->b_size);
+                        jbd_free(jh->b_committed_data, bh->b_size);
                        jh->b_committed_data = NULL;
                        if (jh->b_frozen_data) {
                                jh->b_committed_data = jh->b_frozen_data;
                                jh->b_frozen_data = NULL;
                        }
                } else if (jh->b_frozen_data) {
-                        jbd_slab_free(jh->b_frozen_data, bh->b_size);
+                        jbd_free(jh->b_frozen_data, bh->b_size);
                        jh->b_frozen_data = NULL;
                }
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index a6be78c05dc..5d9fec0b7eb 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -83,7 +83,6 @@ EXPORT_SYMBOL(journal_force_commit);
 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
 static void __journal_abort_soft (journal_t *journal, int errno);
-static int journal_create_jbd_slab(size_t slab_size);
 /*
 * Helper function used to manage commit timeouts
@@ -218,7 +217,7 @@ static int journal_start_thread(journal_t *journal)
        if (IS_ERR(t))
                return PTR_ERR(t);
-        wait_event(journal->j_wait_done_commit, journal->j_task != 0);
+        wait_event(journal->j_wait_done_commit, journal->j_task != NULL);
        return 0;
 }
@@ -230,7 +229,8 @@ static void journal_kill_thread(journal_t *journal)
        while (journal->j_task) {
                wake_up(&journal->j_wait_commit);
                spin_unlock(&journal->j_state_lock);
-                wait_event(journal->j_wait_done_commit, journal->j_task == 0);
+                wait_event(journal->j_wait_done_commit,
+                                journal->j_task == NULL);
                spin_lock(&journal->j_state_lock);
        }
        spin_unlock(&journal->j_state_lock);
@@ -334,10 +334,10 @@ repeat:
                char *tmp;
                jbd_unlock_bh_state(bh_in);
-                tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS);
+                tmp = jbd_alloc(bh_in->b_size, GFP_NOFS);
                jbd_lock_bh_state(bh_in);
                if (jh_in->b_frozen_data) {
-                        jbd_slab_free(tmp, bh_in->b_size);
+                        jbd_free(tmp, bh_in->b_size);
                        goto repeat;
                }
@@ -654,7 +654,7 @@ static journal_t * journal_init_common (void)
        journal_t *journal;
        int err;
-        journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL);
+        journal = kmalloc(sizeof(*journal), GFP_KERNEL);
        if (!journal)
                goto fail;
        memset(journal, 0, sizeof(*journal));
@@ -1095,13 +1095,6 @@ int journal_load(journal_t *journal)
                }
        }
-        /*
-         * Create a slab for this blocksize
-         */
-        err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
-        if (err)
-                return err;
        /* Let the recovery code check whether it needs to recover any
         * data from the journal. */
        if (journal_recover(journal))
@@ -1615,86 +1608,6 @@ int journal_blocks_per_page(struct inode *inode)
 }
 /*
- * Simple support for retrying memory allocations.  Introduced to help to
- * debug different VM deadlock avoidance strategies.
- */
-void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
-{
-        return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
-}
-/*
- * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
- * and allocate frozen and commit buffers from these slabs.
- *
- * Reason for doing this is to avoid, SLAB_DEBUG - since it could
- * cause bh to cross page boundary.
- */
-#define JBD_MAX_SLABS 5
-#define JBD_SLAB_INDEX(size)  (size >> 11)
-static struct kmem_cache *jbd_slab[JBD_MAX_SLABS];
-static const char *jbd_slab_names[JBD_MAX_SLABS] = {
-        "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
-};
-static void journal_destroy_jbd_slabs(void)
-{
-        int i;
-        for (i = 0; i < JBD_MAX_SLABS; i++) {
-                if (jbd_slab[i])
-                        kmem_cache_destroy(jbd_slab[i]);
-                jbd_slab[i] = NULL;
-        }
-}
-static int journal_create_jbd_slab(size_t slab_size)
-{
-        int i = JBD_SLAB_INDEX(slab_size);
-        BUG_ON(i >= JBD_MAX_SLABS);
-        /*
-         * Check if we already have a slab created for this size
-         */
-        if (jbd_slab[i])
-                return 0;
-        /*
-         * Create a slab and force alignment to be same as slabsize -
-         * this will make sure that allocations won't cross the page
-         * boundary.
-         */
-        jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
-                                slab_size, slab_size, 0, NULL);
-        if (!jbd_slab[i]) {
-                printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
-                return -ENOMEM;
-        }
-        return 0;
-}
-void * jbd_slab_alloc(size_t size, gfp_t flags)
-{
-        int idx;
-        idx = JBD_SLAB_INDEX(size);
-        BUG_ON(jbd_slab[idx] == NULL);
-        return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
-}
-void jbd_slab_free(void *ptr,  size_t size)
-{
-        int idx;
-        idx = JBD_SLAB_INDEX(size);
-        BUG_ON(jbd_slab[idx] == NULL);
-        kmem_cache_free(jbd_slab[idx], ptr);
-}
-/*
 * Journal_head storage management
 */
 static struct kmem_cache *journal_head_cache;
@@ -1739,14 +1652,14 @@ static struct journal_head *journal_alloc_journal_head(void)
        atomic_inc(&nr_journal_heads);
 #endif
        ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
-        if (ret == 0) {
+        if (ret == NULL) {
                jbd_debug(1, "out of memory for journal_head\n");
                if (time_after(jiffies, last_warning + 5*HZ)) {
                        printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
                               __FUNCTION__);
                        last_warning = jiffies;
                }
-                while (ret == 0) {
+                while (ret == NULL) {
                        yield();
                        ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
                }
@@ -1881,13 +1794,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
                                printk(KERN_WARNING "%s: freeing "
                                                "b_frozen_data\n",
                                                __FUNCTION__);
-                                jbd_slab_free(jh->b_frozen_data, bh->b_size);
+                                jbd_free(jh->b_frozen_data, bh->b_size);
                        }
                        if (jh->b_committed_data) {
                                printk(KERN_WARNING "%s: freeing "
                                                "b_committed_data\n",
                                                __FUNCTION__);
-                                jbd_slab_free(jh->b_committed_data, bh->b_size);
+                                jbd_free(jh->b_committed_data, bh->b_size);
                        }
                        bh->b_private = NULL;
                        jh->b_bh = NULL;        /* debug, really */
@@ -2042,7 +1955,6 @@ static void journal_destroy_caches(void)
        journal_destroy_revoke_caches();
        journal_destroy_journal_head_cache();
        journal_destroy_handle_cache();
-        journal_destroy_jbd_slabs();
 }
 static int __init journal_init(void)
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 8df5bac0b7a..9841b1e5af0 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -96,8 +96,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
 alloc_transaction:
        if (!journal->j_running_transaction) {
-                new_transaction = jbd_kmalloc(sizeof(*new_transaction),
+                new_transaction = kmalloc(sizeof(*new_transaction),
-                                                GFP_NOFS);
+                                                GFP_NOFS|__GFP_NOFAIL);
                if (!new_transaction) {
                        ret = -ENOMEM;
                        goto out;
@@ -675,7 +675,7 @@ repeat:
                                JBUFFER_TRACE(jh, "allocate memory for buffer");
                                jbd_unlock_bh_state(bh);
                                frozen_buffer =
-                                        jbd_slab_alloc(jh2bh(jh)->b_size,
+                                        jbd_alloc(jh2bh(jh)->b_size,
                                                         GFP_NOFS);
                                if (!frozen_buffer) {
                                        printk(KERN_EMERG
@@ -735,7 +735,7 @@ done:
 out:
        if (unlikely(frozen_buffer))    /* It's usually NULL */
-                jbd_slab_free(frozen_buffer, bh->b_size);
+                jbd_free(frozen_buffer, bh->b_size);
        JBUFFER_TRACE(jh, "exit");
        return error;
@@ -888,7 +888,7 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
 repeat:
        if (!jh->b_committed_data) {
-                committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
+                committed_data = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS);
                if (!committed_data) {
                        printk(KERN_EMERG "%s: No memory for committed data\n",
                                __FUNCTION__);
@@ -915,7 +915,7 @@ repeat:
 out:
        journal_put_journal_head(jh);
        if (unlikely(committed_data))
-                jbd_slab_free(committed_data, bh->b_size);
+                jbd_free(committed_data, bh->b_size);
        return err;
 }
@@ -1172,7 +1172,7 @@ int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
        }
        /* That test should have eliminated the following case: */
-        J_ASSERT_JH(jh, jh->b_frozen_data == 0);
+        J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
        JBUFFER_TRACE(jh, "file as BJ_Metadata");
        spin_lock(&journal->j_list_lock);
@@ -1522,7 +1522,7 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh)
        J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
        if (jh->b_jlist != BJ_None)
-                J_ASSERT_JH(jh, transaction != 0);
+                J_ASSERT_JH(jh, transaction != NULL);
        switch (jh->b_jlist) {
        case BJ_None:
@@ -1591,11 +1591,11 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
        if (buffer_locked(bh) || buffer_dirty(bh))
                goto out;
-        if (jh->b_next_transaction != 0)
+        if (jh->b_next_transaction != NULL)
                goto out;
        spin_lock(&journal->j_list_lock);
-        if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) {
+        if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) {
                if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) {
                        /* A written-back ordered data buffer */
                        JBUFFER_TRACE(jh, "release data");
@@ -1603,7 +1603,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
                        journal_remove_journal_head(bh);
                        __brelse(bh);
                }
-        } else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) {
+        } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
                /* written-back checkpointed metadata buffer */
                if (jh->b_jlist == BJ_None) {
                        JBUFFER_TRACE(jh, "remove from checkpoint list");
@@ -1963,7 +1963,7 @@ void __journal_file_buffer(struct journal_head *jh,
        J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
        J_ASSERT_JH(jh, jh->b_transaction == transaction ||
-                                jh->b_transaction == 0);
+                                jh->b_transaction == NULL);
        if (jh->b_transaction && jh->b_jlist == jlist)
                return;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index c0f59d1b13d..6986f334c64 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -278,7 +278,7 @@ static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
                                   unsigned long long block)
 {
        tag->t_blocknr = cpu_to_be32(block & (u32)~0);
-        if (tag_bytes > JBD_TAG_SIZE32)
+        if (tag_bytes > JBD2_TAG_SIZE32)
                tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
 }
@@ -384,7 +384,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                        struct buffer_head *bh = jh2bh(jh);
                        jbd_lock_bh_state(bh);
-                        jbd2_slab_free(jh->b_committed_data, bh->b_size);
+                        jbd2_free(jh->b_committed_data, bh->b_size);
                        jh->b_committed_data = NULL;
                        jbd_unlock_bh_state(bh);
                }
@@ -475,7 +475,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        spin_unlock(&journal->j_list_lock);
        if (err)
-                __jbd2_journal_abort_hard(journal);
+                jbd2_journal_abort(journal, err);
        jbd2_journal_write_revoke_records(journal, commit_transaction);
@@ -533,7 +533,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                        descriptor = jbd2_journal_get_descriptor_buffer(journal);
                        if (!descriptor) {
-                                __jbd2_journal_abort_hard(journal);
+                                jbd2_journal_abort(journal, -EIO);
                                continue;
                        }
@@ -566,7 +566,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                   and repeat this loop: we'll fall into the
                   refile-on-abort condition above. */
                if (err) {
-                        __jbd2_journal_abort_hard(journal);
+                        jbd2_journal_abort(journal, err);
                        continue;
                }
@@ -757,7 +757,7 @@ wait_for_iobuf:
                err = -EIO;
        if (err)
-                __jbd2_journal_abort_hard(journal);
+                jbd2_journal_abort(journal, err);
        /* End of a transaction!  Finally, we can do checkpoint
           processing: any buffers committed as a result of this
@@ -801,14 +801,14 @@ restart_loop:
                 * Otherwise, we can just throw away the frozen data now.
                 */
                if (jh->b_committed_data) {
-                        jbd2_slab_free(jh->b_committed_data, bh->b_size);
+                        jbd2_free(jh->b_committed_data, bh->b_size);
                        jh->b_committed_data = NULL;
                        if (jh->b_frozen_data) {
                                jh->b_committed_data = jh->b_frozen_data;
                                jh->b_frozen_data = NULL;
                        }
                } else if (jh->b_frozen_data) {
-                        jbd2_slab_free(jh->b_frozen_data, bh->b_size);
+                        jbd2_free(jh->b_frozen_data, bh->b_size);
                        jh->b_frozen_data = NULL;
                }
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f37324aee81..6ddc5531587 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -84,7 +84,6 @@ EXPORT_SYMBOL(jbd2_journal_force_commit);
 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
 static void __journal_abort_soft (journal_t *journal, int errno);
-static int jbd2_journal_create_jbd_slab(size_t slab_size);
 /*
 * Helper function used to manage commit timeouts
@@ -335,10 +334,10 @@ repeat:
                char *tmp;
                jbd_unlock_bh_state(bh_in);
-                tmp = jbd2_slab_alloc(bh_in->b_size, GFP_NOFS);
+                tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
                jbd_lock_bh_state(bh_in);
                if (jh_in->b_frozen_data) {
-                        jbd2_slab_free(tmp, bh_in->b_size);
+                        jbd2_free(tmp, bh_in->b_size);
                        goto repeat;
                }
@@ -655,10 +654,9 @@ static journal_t * journal_init_common (void)
        journal_t *journal;
        int err;
-        journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL);
+        journal = kzalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL);
        if (!journal)
                goto fail;
-        memset(journal, 0, sizeof(*journal));
        init_waitqueue_head(&journal->j_wait_transaction_locked);
        init_waitqueue_head(&journal->j_wait_logspace);
@@ -672,7 +670,7 @@ static journal_t * journal_init_common (void)
        spin_lock_init(&journal->j_list_lock);
        spin_lock_init(&journal->j_state_lock);
-        journal->j_commit_interval = (HZ * JBD_DEFAULT_MAX_COMMIT_AGE);
+        journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
        /* The journal is marked for error until we succeed with recovery! */
        journal->j_flags = JBD2_ABORT;
@@ -1096,13 +1094,6 @@ int jbd2_journal_load(journal_t *journal)
                }
        }
-        /*
-         * Create a slab for this blocksize
-         */
-        err = jbd2_journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
-        if (err)
-                return err;
        /* Let the recovery code check whether it needs to recover any
         * data from the journal. */
        if (jbd2_journal_recover(journal))
@@ -1621,89 +1612,9 @@ int jbd2_journal_blocks_per_page(struct inode *inode)
 size_t journal_tag_bytes(journal_t *journal)
 {
        if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
-                return JBD_TAG_SIZE64;
+                return JBD2_TAG_SIZE64;
        else
-                return JBD_TAG_SIZE32;
+                return JBD2_TAG_SIZE32;
-}
-/*
- * Simple support for retrying memory allocations.  Introduced to help to
- * debug different VM deadlock avoidance strategies.
- */
-void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
-{
-        return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
-}
-/*
- * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
- * and allocate frozen and commit buffers from these slabs.
- *
- * Reason for doing this is to avoid, SLAB_DEBUG - since it could
- * cause bh to cross page boundary.
- */
-#define JBD_MAX_SLABS 5
-#define JBD_SLAB_INDEX(size)  (size >> 11)
-static struct kmem_cache *jbd_slab[JBD_MAX_SLABS];
-static const char *jbd_slab_names[JBD_MAX_SLABS] = {
-        "jbd2_1k", "jbd2_2k", "jbd2_4k", NULL, "jbd2_8k"
-};
-static void jbd2_journal_destroy_jbd_slabs(void)
-{
-        int i;
-        for (i = 0; i < JBD_MAX_SLABS; i++) {
-                if (jbd_slab[i])
-                        kmem_cache_destroy(jbd_slab[i]);
-                jbd_slab[i] = NULL;
-        }
-}
-static int jbd2_journal_create_jbd_slab(size_t slab_size)
-{
-        int i = JBD_SLAB_INDEX(slab_size);
-        BUG_ON(i >= JBD_MAX_SLABS);
-        /*
-         * Check if we already have a slab created for this size
-         */
-        if (jbd_slab[i])
-                return 0;
-        /*
-         * Create a slab and force alignment to be same as slabsize -
-         * this will make sure that allocations won't cross the page
-         * boundary.
-         */
-        jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
-                                slab_size, slab_size, 0, NULL);
-        if (!jbd_slab[i]) {
-                printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
-                return -ENOMEM;
-        }
-        return 0;
-}
-void * jbd2_slab_alloc(size_t size, gfp_t flags)
-{
-        int idx;
-        idx = JBD_SLAB_INDEX(size);
-        BUG_ON(jbd_slab[idx] == NULL);
-        return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
-}
-void jbd2_slab_free(void *ptr,  size_t size)
-{
-        int idx;
-        idx = JBD_SLAB_INDEX(size);
-        BUG_ON(jbd_slab[idx] == NULL);
-        kmem_cache_free(jbd_slab[idx], ptr);
 }
 /*
@@ -1770,7 +1681,7 @@ static void journal_free_journal_head(struct journal_head *jh)
 {
 #ifdef CONFIG_JBD2_DEBUG
        atomic_dec(&nr_journal_heads);
-        memset(jh, JBD_POISON_FREE, sizeof(*jh));
+        memset(jh, JBD2_POISON_FREE, sizeof(*jh));
 #endif
        kmem_cache_free(jbd2_journal_head_cache, jh);
 }
@@ -1893,13 +1804,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
                                printk(KERN_WARNING "%s: freeing "
                                                "b_frozen_data\n",
                                                __FUNCTION__);
-                                jbd2_slab_free(jh->b_frozen_data, bh->b_size);
+                                jbd2_free(jh->b_frozen_data, bh->b_size);
                        }
                        if (jh->b_committed_data) {
                                printk(KERN_WARNING "%s: freeing "
                                                "b_committed_data\n",
                                                __FUNCTION__);
-                                jbd2_slab_free(jh->b_committed_data, bh->b_size);
+                                jbd2_free(jh->b_committed_data, bh->b_size);
                        }
                        bh->b_private = NULL;
                        jh->b_bh = NULL;        /* debug, really */
@@ -1953,16 +1864,14 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
 /*
 * debugfs tunables
 */
-#if defined(CONFIG_JBD2_DEBUG)
+#ifdef CONFIG_JBD2_DEBUG
-u8 jbd2_journal_enable_debug;
+u8 jbd2_journal_enable_debug __read_mostly;
 EXPORT_SYMBOL(jbd2_journal_enable_debug);
-#endif
-#if defined(CONFIG_JBD2_DEBUG) && defined(CONFIG_DEBUG_FS)
 #define JBD2_DEBUG_NAME "jbd2-debug"
-struct dentry *jbd2_debugfs_dir, *jbd2_debug;
+static struct dentry *jbd2_debugfs_dir;
+static struct dentry *jbd2_debug;
 static void __init jbd2_create_debugfs_entry(void)
 {
@@ -1975,24 +1884,18 @@ static void __init jbd2_create_debugfs_entry(void)
 static void __exit jbd2_remove_debugfs_entry(void)
 {
-        if (jbd2_debug)
+        debugfs_remove(jbd2_debug);
-                debugfs_remove(jbd2_debug);
+        debugfs_remove(jbd2_debugfs_dir);
-        if (jbd2_debugfs_dir)
-                debugfs_remove(jbd2_debugfs_dir);
 }
 #else
 static void __init jbd2_create_debugfs_entry(void)
 {
-        do {
-        } while (0);
 }
 static void __exit jbd2_remove_debugfs_entry(void)
 {
-        do {
-        } while (0);
 }
 #endif
@@ -2040,7 +1943,6 @@ static void jbd2_journal_destroy_caches(void)
        jbd2_journal_destroy_revoke_caches();
        jbd2_journal_destroy_jbd2_journal_head_cache();
        jbd2_journal_destroy_handle_cache();
-        jbd2_journal_destroy_jbd_slabs();
 }
 static int __init journal_init(void)
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index b50be8a044e..d0ce627539e 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -311,7 +311,7 @@ int jbd2_journal_skip_recovery(journal_t *journal)
 static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag)
 {
        unsigned long long block = be32_to_cpu(tag->t_blocknr);
-        if (tag_bytes > JBD_TAG_SIZE32)
+        if (tag_bytes > JBD2_TAG_SIZE32)
                block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
        return block;
 }
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 01d88975e0c..3595fd432d5 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -352,7 +352,7 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
                if (bh)
                        BUFFER_TRACE(bh, "found on hash");
        }
-#ifdef JBD_EXPENSIVE_CHECKING
+#ifdef JBD2_EXPENSIVE_CHECKING
        else {
                struct buffer_head *bh2;
@@ -453,7 +453,7 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
                }
        }
-#ifdef JBD_EXPENSIVE_CHECKING
+#ifdef JBD2_EXPENSIVE_CHECKING
        /* There better not be one left behind by now! */
        record = find_revoke_record(journal, bh->b_blocknr);
        J_ASSERT_JH(jh, record == NULL);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 7946ff43fc4..b1fcf2b3dca 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -96,13 +96,12 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
 alloc_transaction:
        if (!journal->j_running_transaction) {
-                new_transaction = jbd_kmalloc(sizeof(*new_transaction),
+                new_transaction = kzalloc(sizeof(*new_transaction),
-                                                GFP_NOFS);
+                                                GFP_NOFS|__GFP_NOFAIL);
                if (!new_transaction) {
                        ret = -ENOMEM;
                        goto out;
                }
-                memset(new_transaction, 0, sizeof(*new_transaction));
        }
        jbd_debug(3, "New handle %p going live.\n", handle);
@@ -236,7 +235,7 @@ out:
 /* Allocate a new handle.  This should probably be in a slab... */
 static handle_t *new_handle(int nblocks)
 {
-        handle_t *handle = jbd_alloc_handle(GFP_NOFS);
+        handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
        if (!handle)
                return NULL;
        memset(handle, 0, sizeof(*handle));
@@ -282,7 +281,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
        err = start_this_handle(journal, handle);
        if (err < 0) {
-                jbd_free_handle(handle);
+                jbd2_free_handle(handle);
                current->journal_info = NULL;
                handle = ERR_PTR(err);
        }
@@ -668,7 +667,7 @@ repeat:
                                JBUFFER_TRACE(jh, "allocate memory for buffer");
                                jbd_unlock_bh_state(bh);
                                frozen_buffer =
-                                        jbd2_slab_alloc(jh2bh(jh)->b_size,
+                                        jbd2_alloc(jh2bh(jh)->b_size,
                                                         GFP_NOFS);
                                if (!frozen_buffer) {
                                        printk(KERN_EMERG
@@ -728,7 +727,7 @@ done:
 out:
        if (unlikely(frozen_buffer))    /* It's usually NULL */
-                jbd2_slab_free(frozen_buffer, bh->b_size);
+                jbd2_free(frozen_buffer, bh->b_size);
        JBUFFER_TRACE(jh, "exit");
        return error;
@@ -881,7 +880,7 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
 repeat:
        if (!jh->b_committed_data) {
-                committed_data = jbd2_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
+                committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
                if (!committed_data) {
                        printk(KERN_EMERG "%s: No memory for committed data\n",
                                __FUNCTION__);
@@ -908,7 +907,7 @@ repeat:
 out:
        jbd2_journal_put_journal_head(jh);
        if (unlikely(committed_data))
-                jbd2_slab_free(committed_data, bh->b_size);
+                jbd2_free(committed_data, bh->b_size);
        return err;
 }
@@ -1411,7 +1410,7 @@ int jbd2_journal_stop(handle_t *handle)
                spin_unlock(&journal->j_state_lock);
        }
-        jbd_free_handle(handle);
+        jbd2_free_handle(handle);
        return err;
 }
diff --git a/fs/namei.c b/fs/namei.c
index 464eeccb675..1e5c7166916 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1659,8 +1659,10 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
                error = locks_verify_locked(inode);
                if (!error) {
                        DQUOT_INIT(inode);
-                        
-                        error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL);
+                        error = do_truncate(dentry, 0,
+                                            ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
+                                            NULL);
                }
                put_write_access(inode);
                if (error)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6c22453d77a..6d2f2a3eccf 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -357,6 +357,10 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
        nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
+        /* skip mode change if it's just for clearing setuid/setgid */
+        if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
+                attr->ia_valid &= ~ATTR_MODE;
        if (attr->ia_valid & ATTR_SIZE) {
                if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
                        attr->ia_valid &= ~ATTR_SIZE;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 819545d2167..46934c97f8f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -364,14 +364,23 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
        if (iap->ia_valid & ATTR_MODE) {
                iap->ia_mode &= S_IALLUGO;
                imode = iap->ia_mode |= (imode & ~S_IALLUGO);
+                /* if changing uid/gid revoke setuid/setgid in mode */
+                if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) {
+                        iap->ia_valid |= ATTR_KILL_PRIV;
+                        iap->ia_mode &= ~S_ISUID;
+                }
+                if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)
+                        iap->ia_mode &= ~S_ISGID;
+        } else {
+                /*
+                 * Revoke setuid/setgid bit on chown/chgrp
+                 */
+                if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid)
+                        iap->ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV;
+                if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)
+                        iap->ia_valid |= ATTR_KILL_SGID;
        }
-        /* Revoke setuid/setgid bit on chown/chgrp */
-        if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid)
-                iap->ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV;
-        if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)
-                iap->ia_valid |= ATTR_KILL_SGID;
        /* Change the attributes. */
        iap->ia_valid |= ATTR_CTIME;
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index e7905816c4c..64965e1c21c 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -111,7 +111,7 @@ utf8_wctomb(__u8 *s, wchar_t wc, int maxlen)
        int c, nc;
        const struct utf8_table *t;
  
-        if (s == 0)
+        if (!s)
                return 0;
  
        l = wc;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9ea12004fa5..0804289d355 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3061,7 +3061,11 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
        struct inode *inode = dentry->d_inode;
        int error;
-        unsigned int ia_valid = attr->ia_valid;
+        unsigned int ia_valid;
+        /* must be turned off for recursive notify_change calls */
+        ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
        reiserfs_write_lock(inode->i_sb);
        if (attr->ia_valid & ATTR_SIZE) {
                /* version 2 items will be caught by the s_maxbytes check