52 files changed, 1977 insertions, 534 deletions
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 1dd96d4406c0..47d4a01c5393 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -52,6 +52,19 @@ static const struct dentry_operations anon_inodefs_dentry_operations = {
        .d_delete       = anon_inodefs_delete_dentry,
 };
+/*
+ * nop .set_page_dirty method so that people can use .page_mkwrite on
+ * anon inodes.
+ */
+static int anon_set_page_dirty(struct page *page)
+{
+        return 0;
+};
+static const struct address_space_operations anon_aops = {
+        .set_page_dirty = anon_set_page_dirty,
+};
 /**
 * anon_inode_getfd - creates a new file instance by hooking it up to an
 *                    anonymous inode, and a dentry that describe the "class"
@@ -151,6 +164,8 @@ static struct inode *anon_inode_mkinode(void)
        inode->i_fop = &anon_inode_fops;
+        inode->i_mapping->a_ops = &anon_aops;
        /*
         * Mark the inode dirty from the very beginning,
         * that way it will never be moved to the dirty
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 40381df34869..9fa212b014a5 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1340,8 +1340,10 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
        prstatus->pr_sigpend = p->pending.signal.sig[0];
        prstatus->pr_sighold = p->blocked.sig[0];
+        rcu_read_lock();
+        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
+        rcu_read_unlock();
        prstatus->pr_pid = task_pid_vnr(p);
-        prstatus->pr_ppid = task_pid_vnr(p->real_parent);
        prstatus->pr_pgrp = task_pgrp_vnr(p);
        prstatus->pr_sid = task_session_vnr(p);
        if (thread_group_leader(p)) {
@@ -1382,8 +1384,10 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
                        psinfo->pr_psargs[i] = ' ';
        psinfo->pr_psargs[len] = 0;
+        rcu_read_lock();
+        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
+        rcu_read_unlock();
        psinfo->pr_pid = task_pid_vnr(p);
-        psinfo->pr_ppid = task_pid_vnr(p->real_parent);
        psinfo->pr_pgrp = task_pgrp_vnr(p);
        psinfo->pr_sid = task_session_vnr(p);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index fdb66faa24f1..20fbeced472b 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1387,8 +1387,10 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
        prstatus->pr_sigpend = p->pending.signal.sig[0];
        prstatus->pr_sighold = p->blocked.sig[0];
+        rcu_read_lock();
+        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
+        rcu_read_unlock();
        prstatus->pr_pid = task_pid_vnr(p);
-        prstatus->pr_ppid = task_pid_vnr(p->real_parent);
        prstatus->pr_pgrp = task_pgrp_vnr(p);
        prstatus->pr_sid = task_session_vnr(p);
        if (thread_group_leader(p)) {
@@ -1432,8 +1434,10 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
                        psinfo->pr_psargs[i] = ' ';
        psinfo->pr_psargs[len] = 0;
+        rcu_read_lock();
+        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
+        rcu_read_unlock();
        psinfo->pr_pid = task_pid_vnr(p);
-        psinfo->pr_ppid = task_pid_vnr(p->real_parent);
        psinfo->pr_pgrp = task_pgrp_vnr(p);
        psinfo->pr_sid = task_session_vnr(p);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 5458e80fc558..085c5c063420 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -98,7 +98,7 @@ struct epoll_filefd {
 struct nested_call_node {
        struct list_head llink;
        void *cookie;
-        int cpu;
+        void *ctx;
 };
 /*
@@ -317,17 +317,17 @@ static void ep_nested_calls_init(struct nested_calls *ncalls)
 * @nproc: Nested call core function pointer.
 * @priv: Opaque data to be passed to the @nproc callback.
 * @cookie: Cookie to be used to identify this nested call.
+ * @ctx: This instance context.
 *
 * Returns: Returns the code returned by the @nproc callback, or -1 if
 *          the maximum recursion limit has been exceeded.
 */
 static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
                          int (*nproc)(void *, void *, int), void *priv,
-                          void *cookie)
+                          void *cookie, void *ctx)
 {
        int error, call_nests = 0;
        unsigned long flags;
-        int this_cpu = get_cpu();
        struct list_head *lsthead = &ncalls->tasks_call_list;
        struct nested_call_node *tncur;
        struct nested_call_node tnode;
@@ -340,7 +340,7 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
         * very much limited.
         */
        list_for_each_entry(tncur, lsthead, llink) {
-                if (tncur->cpu == this_cpu &&
+                if (tncur->ctx == ctx &&
                    (tncur->cookie == cookie || ++call_nests > max_nests)) {
                        /*
                         * Ops ... loop detected or maximum nest level reached.
@@ -352,7 +352,7 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
        }
        /* Add the current task and cookie to the list */
-        tnode.cpu = this_cpu;
+        tnode.ctx = ctx;
        tnode.cookie = cookie;
        list_add(&tnode.llink, lsthead);
@@ -364,10 +364,9 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
        /* Remove the current task from the list */
        spin_lock_irqsave(&ncalls->lock, flags);
        list_del(&tnode.llink);
- out_unlock:
+out_unlock:
        spin_unlock_irqrestore(&ncalls->lock, flags);
-        put_cpu();
        return error;
 }
@@ -408,8 +407,12 @@ static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests)
 */
 static void ep_poll_safewake(wait_queue_head_t *wq)
 {
+        int this_cpu = get_cpu();
        ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
-                       ep_poll_wakeup_proc, NULL, wq);
+                       ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
+        put_cpu();
 }
 /*
@@ -663,7 +666,7 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
         * could re-enter here.
         */
        pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS,
-                                   ep_poll_readyevents_proc, ep, ep);
+                                   ep_poll_readyevents_proc, ep, ep, current);
        return pollflags != -1 ? pollflags : 0;
 }
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 003500498c22..6cde970b0a1a 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -450,7 +450,7 @@ ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child)
 /* Releases the page */
 void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
-                        struct page *page, struct inode *inode)
+                   struct page *page, struct inode *inode, int update_times)
 {
        loff_t pos = page_offset(page) +
                        (char *) de - (char *) page_address(page);
@@ -465,7 +465,8 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
        ext2_set_de_type(de, inode);
        err = ext2_commit_chunk(page, pos, len);
        ext2_put_page(page);
-        dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+        if (update_times)
+                dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
        EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
        mark_inode_dirty(dir);
 }
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index f2e5811936d0..d988a718aedb 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -111,7 +111,7 @@ extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct qstr *,
 extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
 extern int ext2_empty_dir (struct inode *);
 extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
-extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *);
+extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
 /* ialloc.c */
 extern struct inode * ext2_new_inode (struct inode *, int);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 90ea17998a73..6524ecaebb7a 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -320,7 +320,7 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
                if (!new_de)
                        goto out_dir;
                inode_inc_link_count(old_inode);
-                ext2_set_link(new_dir, new_de, new_page, old_inode);
+                ext2_set_link(new_dir, new_de, new_page, old_inode, 1);
                new_inode->i_ctime = CURRENT_TIME_SEC;
                if (dir_de)
                        drop_nlink(new_inode);
@@ -352,7 +352,8 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
        inode_dec_link_count(old_inode);
        if (dir_de) {
-                ext2_set_link(old_inode, dir_de, dir_page, new_dir);
+                if (old_dir != new_dir)
+                        ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0);
                inode_dec_link_count(old_dir);
        }
        return 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index b0248c6d5d4c..05dea8132fc0 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -820,7 +820,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
                while (count < maxblocks && count <= blocks_to_boundary) {
                        ext3_fsblk_t blk;
-                        if (!verify_chain(chain, partial)) {
+                        if (!verify_chain(chain, chain + depth - 1)) {
                                /*
                                 * Indirect block might be removed by
                                 * truncate while we were reading it.
@@ -2374,7 +2374,7 @@ void ext3_truncate(struct inode *inode)
        struct page *page;
        if (!ext3_can_truncate(inode))
-                return;
+                goto out_notrans;
        if (inode->i_size == 0 && ext3_should_writeback_data(inode))
                ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE;
@@ -2390,7 +2390,7 @@ void ext3_truncate(struct inode *inode)
                page = grab_cache_page(mapping,
                                inode->i_size >> PAGE_CACHE_SHIFT);
                if (!page)
-                        return;
+                        goto out_notrans;
        }
        handle = start_transaction(inode);
@@ -2401,7 +2401,7 @@ void ext3_truncate(struct inode *inode)
                        unlock_page(page);
                        page_cache_release(page);
                }
-                return;         /* AKPM: return what? */
+                goto out_notrans;
        }
        last_block = (inode->i_size + blocksize-1)
@@ -2525,6 +2525,14 @@ out_stop:
                ext3_orphan_del(handle, inode);
        ext3_journal_stop(handle);
+        return;
+out_notrans:
+        /*
+         * Delete the inode from orphan list so that it doesn't stay there
+         * forever and trigger assertion on umount.
+         */
+        if (inode->i_nlink)
+                ext3_orphan_del(NULL, inode);
 }
 static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
@@ -3122,12 +3130,6 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
        rc = inode_setattr(inode, attr);
-        /* If inode_setattr's call to ext3_truncate failed to get a
-         * transaction handle at all, we need to clean up the in-core
-         * orphan list manually. */
-        if (inode->i_nlink)
-                ext3_orphan_del(NULL, inode);
        if (!rc && (ia_valid & ATTR_MODE))
                rc = ext3_acl_chmod(inode);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 8a0b26340b54..8359e7b3dc89 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -990,7 +990,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
                        sb->s_id, n_blocks_count);
                if (sizeof(sector_t) < 8)
                        ext3_warning(sb, __func__,
-                        "CONFIG_LBD not enabled\n");
+                        "CONFIG_LBDAF not enabled\n");
                return -EINVAL;
        }
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 26aa64dee6aa..601e881e6105 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1812,7 +1812,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
                printk(KERN_ERR "EXT3-fs: filesystem on %s:"
                        " too large to mount safely\n", sb->s_id);
                if (sizeof(sector_t) < 8)
-                        printk(KERN_WARNING "EXT3-fs: CONFIG_LBD not "
+                        printk(KERN_WARNING "EXT3-fs: CONFIG_LBDAF not "
                                        "enabled\n");
                goto failed_mount;
        }
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 8a34710ecf40..8867b2a1e5fe 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
 ext4-y  := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
                ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
-                ext4_jbd2.o migrate.o mballoc.o block_validity.o
+                ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
 ext4-$(CONFIG_EXT4_FS_XATTR)            += xattr.o xattr_user.o xattr_trusted.o
 ext4-$(CONFIG_EXT4_FS_POSIX_ACL)        += acl.o
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index cc7d5edc38c9..17b9998680e3 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -352,6 +352,7 @@ struct ext4_new_group_data {
 /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
 #define EXT4_IOC_ALLOC_DA_BLKS          _IO('f', 12)
+#define EXT4_IOC_MOVE_EXT               _IOWR('f', 15, struct move_extent)
 /*
 * ioctl commands in 32 bit emulation
@@ -447,6 +448,15 @@ struct ext4_inode {
        __le32  i_version_hi;   /* high 32 bits for 64-bit version */
 };
+struct move_extent {
+        __u32 reserved;         /* should be zero */
+        __u32 donor_fd;         /* donor file descriptor */
+        __u64 orig_start;       /* logical start offset in block for orig */
+        __u64 donor_start;      /* logical start offset in block for donor */
+        __u64 len;              /* block length to be moved */
+        __u64 moved_len;        /* moved block length */
+};
+#define MAX_DEFRAG_SIZE         ((1UL<<31) - 1)
 #define EXT4_EPOCH_BITS 2
 #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
@@ -674,7 +684,6 @@ struct ext4_inode_info {
 #define EXT4_MOUNT_ERRORS_PANIC         0x00040 /* Panic on errors */
 #define EXT4_MOUNT_MINIX_DF             0x00080 /* Mimics the Minix statfs */
 #define EXT4_MOUNT_NOLOAD               0x00100 /* Don't use existing journal*/
-#define EXT4_MOUNT_ABORT                0x00200 /* Fatal error detected */
 #define EXT4_MOUNT_DATA_FLAGS           0x00C00 /* Mode for data writes: */
 #define EXT4_MOUNT_JOURNAL_DATA         0x00400 /* Write data to journal */
 #define EXT4_MOUNT_ORDERED_DATA         0x00800 /* Flush data before commit */
@@ -696,17 +705,10 @@ struct ext4_inode_info {
 #define EXT4_MOUNT_DATA_ERR_ABORT       0x10000000 /* Abort on file data write */
 #define EXT4_MOUNT_BLOCK_VALIDITY       0x20000000 /* Block validity checking */
-/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
-#ifndef _LINUX_EXT2_FS_H
 #define clear_opt(o, opt)               o &= ~EXT4_MOUNT_##opt
 #define set_opt(o, opt)                 o |= EXT4_MOUNT_##opt
 #define test_opt(sb, opt)               (EXT4_SB(sb)->s_mount_opt & \
                                         EXT4_MOUNT_##opt)
-#else
-#define EXT2_MOUNT_NOLOAD               EXT4_MOUNT_NOLOAD
-#define EXT2_MOUNT_ABORT                EXT4_MOUNT_ABORT
-#define EXT2_MOUNT_DATA_FLAGS           EXT4_MOUNT_DATA_FLAGS
-#endif
 #define ext4_set_bit                    ext2_set_bit
 #define ext4_set_bit_atomic             ext2_set_bit_atomic
@@ -824,6 +826,13 @@ struct ext4_super_block {
 };
 #ifdef __KERNEL__
+/*
+ * run-time mount flags
+ */
+#define EXT4_MF_MNTDIR_SAMPLED  0x0001
+#define EXT4_MF_FS_ABORTED      0x0002  /* Fatal error detected */
 /*
 * fourth extended-fs super-block data in memory
 */
@@ -842,7 +851,8 @@ struct ext4_sb_info {
        struct buffer_head * s_sbh;     /* Buffer containing the super block */
        struct ext4_super_block *s_es;  /* Pointer to the super block in the buffer */
        struct buffer_head **s_group_desc;
-        unsigned long  s_mount_opt;
+        unsigned int s_mount_opt;
+        unsigned int s_mount_flags;
        ext4_fsblk_t s_sb_block;
        uid_t s_resuid;
        gid_t s_resgid;
@@ -853,6 +863,7 @@ struct ext4_sb_info {
        int s_inode_size;
        int s_first_ino;
        unsigned int s_inode_readahead_blks;
+        unsigned int s_inode_goal;
        spinlock_t s_next_gen_lock;
        u32 s_next_generation;
        u32 s_hash_seed[4];
@@ -1305,7 +1316,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct
                          dx_hash_info *hinfo);
 /* ialloc.c */
-extern struct inode * ext4_new_inode(handle_t *, struct inode *, int);
+extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
+                                    const struct qstr *qstr, __u32 goal);
 extern void ext4_free_inode(handle_t *, struct inode *);
 extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
 extern unsigned long ext4_count_free_inodes(struct super_block *);
@@ -1329,7 +1341,7 @@ extern void ext4_discard_preallocations(struct inode *);
 extern int __init init_ext4_mballoc(void);
 extern void exit_ext4_mballoc(void);
 extern void ext4_mb_free_blocks(handle_t *, struct inode *,
-                unsigned long, unsigned long, int, unsigned long *);
+                ext4_fsblk_t, unsigned long, int, unsigned long *);
 extern int ext4_mb_add_groupinfo(struct super_block *sb,
                ext4_group_t i, struct ext4_group_desc *desc);
 extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
@@ -1647,6 +1659,11 @@ extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
                           struct buffer_head *bh, int flags);
 extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        __u64 start, __u64 len);
+/* move_extent.c */
+extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
+                             __u64 start_orig, __u64 start_donor,
+                             __u64 len, __u64 *moved_len);
 /*
 * Add new method to test wether block and inode bitmaps are properly
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index f0c3ec85bd48..20a84105a10b 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -221,12 +221,16 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
 }
 extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
+extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
 extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
 extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
 extern int ext4_extent_tree_init(handle_t *, struct inode *);
 extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
                                                   int num,
                                                   struct ext4_ext_path *path);
+extern int ext4_can_extents_be_merged(struct inode *inode,
+                                      struct ext4_extent *ex1,
+                                      struct ext4_extent *ex2);
 extern int ext4_ext_try_to_merge(struct inode *inode,
                                 struct ext4_ext_path *path,
                                 struct ext4_extent *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2593f748c3a4..50322a09bd01 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -49,7 +49,7 @@
 * ext_pblock:
 * combine low and high parts of physical block number into ext4_fsblk_t
 */
-static ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
+ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
 {
        ext4_fsblk_t block;
@@ -1417,7 +1417,7 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
        return err;
 }
-static int
+int
 ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
                                struct ext4_extent *ex2)
 {
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 588af8c77246..3f1873fef1c6 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -21,6 +21,8 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/jbd2.h>
+#include <linux/mount.h>
+#include <linux/path.h>
 #include "ext4.h"
 #include "ext4_jbd2.h"
 #include "xattr.h"
@@ -145,6 +147,38 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
        return 0;
 }
+static int ext4_file_open(struct inode * inode, struct file * filp)
+{
+        struct super_block *sb = inode->i_sb;
+        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+        struct vfsmount *mnt = filp->f_path.mnt;
+        struct path path;
+        char buf[64], *cp;
+        if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
+                     !(sb->s_flags & MS_RDONLY))) {
+                sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
+                /*
+                 * Sample where the filesystem has been mounted and
+                 * store it in the superblock for sysadmin convenience
+                 * when trying to sort through large numbers of block
+                 * devices or filesystem images.
+                 */
+                memset(buf, 0, sizeof(buf));
+                path.mnt = mnt->mnt_parent;
+                path.dentry = mnt->mnt_mountpoint;
+                path_get(&path);
+                cp = d_path(&path, buf, sizeof(buf));
+                path_put(&path);
+                if (!IS_ERR(cp)) {
+                        memcpy(sbi->s_es->s_last_mounted, cp,
+                               sizeof(sbi->s_es->s_last_mounted));
+                        sb->s_dirt = 1;
+                }
+        }
+        return generic_file_open(inode, filp);
+}
 const struct file_operations ext4_file_operations = {
        .llseek         = generic_file_llseek,
        .read           = do_sync_read,
@@ -156,7 +190,7 @@ const struct file_operations ext4_file_operations = {
        .compat_ioctl   = ext4_compat_ioctl,
 #endif
        .mmap           = ext4_file_mmap,
-        .open           = generic_file_open,
+        .open           = ext4_file_open,
        .release        = ext4_release_file,
        .fsync          = ext4_sync_file,
        .splice_read    = generic_file_splice_read,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 5afe4370840b..83cf6415f599 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -28,10 +28,12 @@
 #include <linux/writeback.h>
 #include <linux/jbd2.h>
 #include <linux/blkdev.h>
-#include <linux/marker.h>
 #include "ext4.h"
 #include "ext4_jbd2.h"
+#include <trace/events/ext4.h>
 /*
 * akpm: A new design for ext4_sync_file().
 *
@@ -52,9 +54,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
        J_ASSERT(ext4_journal_current_handle() == NULL);
-        trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld",
+        trace_ext4_sync_file(file, dentry, datasync);
-                   inode->i_sb->s_id, datasync, inode->i_ino,
-                   dentry->d_parent->d_inode->i_ino);
        /*
         * data=writeback:
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 3743bd849bce..2f645732e3b7 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -23,11 +23,14 @@
 #include <linux/bitops.h>
 #include <linux/blkdev.h>
 #include <asm/byteorder.h>
 #include "ext4.h"
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
+#include <trace/events/ext4.h>
 /*
 * ialloc.c contains the inodes allocation and deallocation routines
 */
@@ -208,11 +211,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
        ino = inode->i_ino;
        ext4_debug("freeing inode %lu\n", ino);
-        trace_mark(ext4_free_inode,
+        trace_ext4_free_inode(inode);
-                   "dev %s ino %lu mode %d uid %lu gid %lu bocks %llu",
-                   sb->s_id, inode->i_ino, inode->i_mode,
-                   (unsigned long) inode->i_uid, (unsigned long) inode->i_gid,
-                   (unsigned long long) inode->i_blocks);
        /*
         * Note: we must free any quota before locking the superblock,
@@ -471,7 +470,8 @@ void get_orlov_stats(struct super_block *sb, ext4_group_t g,
 */
 static int find_group_orlov(struct super_block *sb, struct inode *parent,
-                            ext4_group_t *group, int mode)
+                            ext4_group_t *group, int mode,
+                            const struct qstr *qstr)
 {
        ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -486,6 +486,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
        struct ext4_group_desc *desc;
        struct orlov_stats stats;
        int flex_size = ext4_flex_bg_size(sbi);
+        struct dx_hash_info hinfo;
        ngroups = real_ngroups;
        if (flex_size > 1) {
@@ -507,7 +508,13 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
                int best_ndir = inodes_per_group;
                int ret = -1;
-                get_random_bytes(&grp, sizeof(grp));
+                if (qstr) {
+                        hinfo.hash_version = DX_HASH_HALF_MD4;
+                        hinfo.seed = sbi->s_hash_seed;
+                        ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
+                        grp = hinfo.hash;
+                } else
+                        get_random_bytes(&grp, sizeof(grp));
                parent_group = (unsigned)grp % ngroups;
                for (i = 0; i < ngroups; i++) {
                        g = (parent_group + i) % ngroups;
@@ -650,7 +657,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
                *group = parent_group + flex_size;
                if (*group > ngroups)
                        *group = 0;
-                return find_group_orlov(sb, parent, group, mode);
+                return find_group_orlov(sb, parent, group, mode, 0);
        }
        /*
@@ -791,7 +798,8 @@ err_ret:
 * For other inodes, search forward from the parent directory's block
 * group to find a free inode.
 */
-struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
+struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
+                             const struct qstr *qstr, __u32 goal)
 {
        struct super_block *sb;
        struct buffer_head *inode_bitmap_bh = NULL;
@@ -815,14 +823,23 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
        sb = dir->i_sb;
        ngroups = ext4_get_groups_count(sb);
-        trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id,
+        trace_ext4_request_inode(dir, mode);
-                   dir->i_ino, mode);
        inode = new_inode(sb);
        if (!inode)
                return ERR_PTR(-ENOMEM);
        ei = EXT4_I(inode);
        sbi = EXT4_SB(sb);
+        if (!goal)
+                goal = sbi->s_inode_goal;
+        if (goal && goal < le32_to_cpu(sbi->s_es->s_inodes_count)) {
+                group = (goal - 1) / EXT4_INODES_PER_GROUP(sb);
+                ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb);
+                ret2 = 0;
+                goto got_group;
+        }
        if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
                ret2 = find_group_flex(sb, dir, &group);
                if (ret2 == -1) {
@@ -841,7 +858,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
                if (test_opt(sb, OLDALLOC))
                        ret2 = find_group_dir(sb, dir, &group);
                else
-                        ret2 = find_group_orlov(sb, dir, &group, mode);
+                        ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
        } else
                ret2 = find_group_other(sb, dir, &group, mode);
@@ -851,7 +868,7 @@ got_group:
        if (ret2 == -1)
                goto out;
-        for (i = 0; i < ngroups; i++) {
+        for (i = 0; i < ngroups; i++, ino = 0) {
                err = -EIO;
                gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
@@ -863,8 +880,6 @@ got_group:
                if (!inode_bitmap_bh)
                        goto fail;
-                ino = 0;
 repeat_in_this_group:
                ino = ext4_find_next_zero_bit((unsigned long *)
                                              inode_bitmap_bh->b_data,
@@ -1047,8 +1062,7 @@ got:
        }
        ext4_debug("allocating inode %lu\n", inode->i_ino);
-        trace_mark(ext4_allocate_inode, "dev %s ino %lu dir %lu mode %d",
+        trace_ext4_allocate_inode(inode, dir, mode);
-                   sb->s_id, inode->i_ino, dir->i_ino, mode);
        goto really_out;
 fail:
        ext4_std_error(sb, err);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 875db944b22f..7c17ae275af4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -37,11 +37,14 @@
 #include <linux/namei.h>
 #include <linux/uio.h>
 #include <linux/bio.h>
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
 #include "ext4_extents.h"
+#include <trace/events/ext4.h>
 #define MPAGE_DA_EXTENT_TAIL 0x01
 static inline int ext4_begin_ordered_truncate(struct inode *inode,
@@ -78,7 +81,7 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
 * If the handle isn't valid we're not journaling so there's nothing to do.
 */
 int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
-                        struct buffer_head *bh, ext4_fsblk_t blocknr)
+                struct buffer_head *bh, ext4_fsblk_t blocknr)
 {
        int err;
@@ -90,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
        BUFFER_TRACE(bh, "enter");
        jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
-                  "data mode %lx\n",
+                  "data mode %x\n",
                  bh, is_metadata, inode->i_mode,
                  test_opt(inode->i_sb, DATA_FLAGS));
@@ -329,8 +332,8 @@ static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
 */
 static int ext4_block_to_path(struct inode *inode,
-                        ext4_lblk_t i_block,
+                              ext4_lblk_t i_block,
-                        ext4_lblk_t offsets[4], int *boundary)
+                              ext4_lblk_t offsets[4], int *boundary)
 {
        int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb);
        int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb);
@@ -362,9 +365,9 @@ static int ext4_block_to_path(struct inode *inode,
                final = ptrs;
        } else {
                ext4_warning(inode->i_sb, "ext4_block_to_path",
-                                "block %lu > max in inode %lu",
+                             "block %lu > max in inode %lu",
-                                i_block + direct_blocks +
+                             i_block + direct_blocks +
-                                indirect_blocks + double_blocks, inode->i_ino);
+                             indirect_blocks + double_blocks, inode->i_ino);
        }
        if (boundary)
                *boundary = final - 1 - (i_block & (ptrs - 1));
@@ -379,25 +382,25 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
        while (bref < p+max) {
                blk = le32_to_cpu(*bref++);
-                if (blk && 
+                if (blk &&
-                    unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), 
+                    unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
                                                    blk, 1))) {
                        ext4_error(inode->i_sb, function,
                                   "invalid block reference %u "
                                   "in inode #%lu", blk, inode->i_ino);
-                        return -EIO;
+                        return -EIO;
-                }
+                }
-        }
+        }
-        return 0;
+        return 0;
 }
 #define ext4_check_indirect_blockref(inode, bh)                         \
-        __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data,  \
+        __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data,  \
                              EXT4_ADDR_PER_BLOCK((inode)->i_sb))
 #define ext4_check_inode_blockref(inode)                                \
-        __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data,   \
+        __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data,   \
                              EXT4_NDIR_BLOCKS)
 /**
@@ -447,7 +450,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
                bh = sb_getblk(sb, le32_to_cpu(p->key));
                if (unlikely(!bh))
                        goto failure;
-                  
                if (!bh_uptodate_or_lock(bh)) {
                        if (bh_submit_read(bh) < 0) {
                                put_bh(bh);
@@ -459,7 +462,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
                                goto failure;
                        }
                }
-                
                add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
                /* Reader: end */
                if (!p->key)
@@ -552,7 +555,7 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
 *      returns it.
 */
 static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
-                Indirect *partial)
+                                   Indirect *partial)
 {
        /*
         * XXX need to get goal block from mballoc's data structures
@@ -574,7 +577,7 @@ static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
 *      direct and indirect blocks.
 */
 static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
-                int blocks_to_boundary)
+                                 int blocks_to_boundary)
 {
        unsigned int count = 0;
@@ -610,9 +613,9 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
 *              direct blocks
 */
 static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
-                                ext4_lblk_t iblock, ext4_fsblk_t goal,
+                             ext4_lblk_t iblock, ext4_fsblk_t goal,
-                                int indirect_blks, int blks,
+                             int indirect_blks, int blks,
-                                ext4_fsblk_t new_blocks[4], int *err)
+                             ext4_fsblk_t new_blocks[4], int *err)
 {
        struct ext4_allocation_request ar;
        int target, i;
@@ -683,10 +686,10 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
        }
        if (!*err) {
                if (target == blks) {
-                /*
+                        /*
-                 * save the new block number
+                         * save the new block number
-                 * for the first direct block
+                         * for the first direct block
-                 */
+                         */
                        new_blocks[index] = current_block;
                }
                blk_allocated += ar.len;
@@ -728,9 +731,9 @@ failed_out:
 *      as described above and return 0.
 */
 static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
-                                ext4_lblk_t iblock, int indirect_blks,
+                             ext4_lblk_t iblock, int indirect_blks,
-                                int *blks, ext4_fsblk_t goal,
+                             int *blks, ext4_fsblk_t goal,
-                                ext4_lblk_t *offsets, Indirect *branch)
+                             ext4_lblk_t *offsets, Indirect *branch)
 {
        int blocksize = inode->i_sb->s_blocksize;
        int i, n = 0;
@@ -777,7 +780,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
                         * the chain to point to the new allocated
                         * data blocks numbers
                         */
-                        for (i=1; i < num; i++)
+                        for (i = 1; i < num; i++)
                                *(branch[n].p + i) = cpu_to_le32(++current_block);
                }
                BUFFER_TRACE(bh, "marking uptodate");
@@ -820,7 +823,8 @@ failed:
 * chain to new block and return 0.
 */
 static int ext4_splice_branch(handle_t *handle, struct inode *inode,
-                        ext4_lblk_t block, Indirect *where, int num, int blks)
+                              ext4_lblk_t block, Indirect *where, int num,
+                              int blks)
 {
        int i;
        int err = 0;
@@ -852,10 +856,6 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
        }
        /* We are done with atomic stuff, now do the rest of housekeeping */
-        inode->i_ctime = ext4_current_time(inode);
-        ext4_mark_inode_dirty(handle, inode);
        /* had we spliced it onto indirect block? */
        if (where->bh) {
                /*
@@ -874,8 +874,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
        } else {
                /*
                 * OK, we spliced it into the inode itself on a direct block.
-                 * Inode was dirtied above.
                 */
+                ext4_mark_inode_dirty(handle, inode);
                jbd_debug(5, "splicing direct\n");
        }
        return err;
@@ -921,9 +921,9 @@ err_out:
 * blocks.
 */
 static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
-                                  ext4_lblk_t iblock, unsigned int maxblocks,
+                               ext4_lblk_t iblock, unsigned int maxblocks,
-                                  struct buffer_head *bh_result,
+                               struct buffer_head *bh_result,
-                                  int flags)
+                               int flags)
 {
        int err = -EIO;
        ext4_lblk_t offsets[4];
@@ -939,7 +939,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
        J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
        J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
        depth = ext4_block_to_path(inode, iblock, offsets,
-                                        &blocks_to_boundary);
+                                   &blocks_to_boundary);
        if (depth == 0)
                goto out;
@@ -987,8 +987,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
         * Block out ext4_truncate while we alter the tree
         */
        err = ext4_alloc_branch(handle, inode, iblock, indirect_blks,
-                                        &count, goal,
+                                &count, goal,
-                                        offsets + (partial - chain), partial);
+                                offsets + (partial - chain), partial);
        /*
         * The ext4_splice_branch call will free and forget any buffers
@@ -999,8 +999,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
         */
        if (!err)
                err = ext4_splice_branch(handle, inode, iblock,
-                                        partial, indirect_blks, count);
+                                         partial, indirect_blks, count);
-        else 
+        else
                goto cleanup;
        set_buffer_new(bh_result);
@@ -1172,7 +1172,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
        up_read((&EXT4_I(inode)->i_data_sem));
        if (retval > 0 && buffer_mapped(bh)) {
-                int ret = check_block_validity(inode, block, 
+                int ret = check_block_validity(inode, block,
                                               bh->b_blocknr, retval);
                if (ret != 0)
                        return ret;
@@ -1254,7 +1254,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
        up_write((&EXT4_I(inode)->i_data_sem));
        if (retval > 0 && buffer_mapped(bh)) {
-                int ret = check_block_validity(inode, block, 
+                int ret = check_block_validity(inode, block,
                                               bh->b_blocknr, retval);
                if (ret != 0)
                        return ret;
@@ -1405,8 +1405,7 @@ static int walk_page_buffers(handle_t *handle,
        for (bh = head, block_start = 0;
             ret == 0 && (bh != head || !block_start);
-             block_start = block_end, bh = next)
+             block_start = block_end, bh = next) {
-        {
                next = bh->b_this_page;
                block_end = block_start + blocksize;
                if (block_end <= from || block_start >= to) {
@@ -1447,7 +1446,7 @@ static int walk_page_buffers(handle_t *handle,
 * write.
 */
 static int do_journal_get_write_access(handle_t *handle,
-                                        struct buffer_head *bh)
+                                       struct buffer_head *bh)
 {
        if (!buffer_mapped(bh) || buffer_freed(bh))
                return 0;
@@ -1455,27 +1454,24 @@ static int do_journal_get_write_access(handle_t *handle,
 }
 static int ext4_write_begin(struct file *file, struct address_space *mapping,
-                                loff_t pos, unsigned len, unsigned flags,
+                            loff_t pos, unsigned len, unsigned flags,
-                                struct page **pagep, void **fsdata)
+                            struct page **pagep, void **fsdata)
 {
        struct inode *inode = mapping->host;
        int ret, needed_blocks;
        handle_t *handle;
        int retries = 0;
        struct page *page;
-        pgoff_t index;
+        pgoff_t index;
        unsigned from, to;
-        trace_mark(ext4_write_begin,
+        trace_ext4_write_begin(inode, pos, len, flags);
-                   "dev %s ino %lu pos %llu len %u flags %u",
-                   inode->i_sb->s_id, inode->i_ino,
-                   (unsigned long long) pos, len, flags);
        /*
         * Reserve one block more for addition to orphan list in case
         * we allocate blocks but write fails for some reason
         */
        needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
-        index = pos >> PAGE_CACHE_SHIFT;
+        index = pos >> PAGE_CACHE_SHIFT;
        from = pos & (PAGE_CACHE_SIZE - 1);
        to = from + len;
@@ -1523,7 +1519,7 @@ retry:
                ext4_journal_stop(handle);
                if (pos + len > inode->i_size) {
                        vmtruncate(inode, inode->i_size);
-                        /* 
+                        /*
                         * If vmtruncate failed early the inode might
                         * still be on the orphan list; we need to
                         * make sure the inode is removed from the
@@ -1550,9 +1546,9 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
 }
 static int ext4_generic_write_end(struct file *file,
-                                struct address_space *mapping,
+                                  struct address_space *mapping,
-                                loff_t pos, unsigned len, unsigned copied,
+                                  loff_t pos, unsigned len, unsigned copied,
-                                struct page *page, void *fsdata)
+                                  struct page *page, void *fsdata)
 {
        int i_size_changed = 0;
        struct inode *inode = mapping->host;
@@ -1603,18 +1599,15 @@ static int ext4_generic_write_end(struct file *file,
 * buffers are managed internally.
 */
 static int ext4_ordered_write_end(struct file *file,
-                                struct address_space *mapping,
+                                  struct address_space *mapping,
-                                loff_t pos, unsigned len, unsigned copied,
+                                  loff_t pos, unsigned len, unsigned copied,
-                                struct page *page, void *fsdata)
+                                  struct page *page, void *fsdata)
 {
        handle_t *handle = ext4_journal_current_handle();
        struct inode *inode = mapping->host;
        int ret = 0, ret2;
-        trace_mark(ext4_ordered_write_end,
+        trace_ext4_ordered_write_end(inode, pos, len, copied);
-                   "dev %s ino %lu pos %llu len %u copied %u",
-                   inode->i_sb->s_id, inode->i_ino,
-                   (unsigned long long) pos, len, copied);
        ret = ext4_jbd2_file_inode(handle, inode);
        if (ret == 0) {
@@ -1636,7 +1629,7 @@ static int ext4_ordered_write_end(struct file *file,
        if (pos + len > inode->i_size) {
                vmtruncate(inode, inode->i_size);
-                /* 
+                /*
                 * If vmtruncate failed early the inode might still be
                 * on the orphan list; we need to make sure the inode
                 * is removed from the orphan list in that case.
@@ -1650,18 +1643,15 @@ static int ext4_ordered_write_end(struct file *file,
 }
 static int ext4_writeback_write_end(struct file *file,
-                                struct address_space *mapping,
+                                    struct address_space *mapping,
-                                loff_t pos, unsigned len, unsigned copied,
+                                    loff_t pos, unsigned len, unsigned copied,
-                                struct page *page, void *fsdata)
+                                    struct page *page, void *fsdata)
 {
        handle_t *handle = ext4_journal_current_handle();
        struct inode *inode = mapping->host;
        int ret = 0, ret2;
-        trace_mark(ext4_writeback_write_end,
+        trace_ext4_writeback_write_end(inode, pos, len, copied);
-                   "dev %s ino %lu pos %llu len %u copied %u",
-                   inode->i_sb->s_id, inode->i_ino,
-                   (unsigned long long) pos, len, copied);
        ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
                                                        page, fsdata);
        copied = ret2;
@@ -1681,7 +1671,7 @@ static int ext4_writeback_write_end(struct file *file,
        if (pos + len > inode->i_size) {
                vmtruncate(inode, inode->i_size);
-                /* 
+                /*
                 * If vmtruncate failed early the inode might still be
                 * on the orphan list; we need to make sure the inode
                 * is removed from the orphan list in that case.
@@ -1694,9 +1684,9 @@ static int ext4_writeback_write_end(struct file *file,
 }
 static int ext4_journalled_write_end(struct file *file,
-                                struct address_space *mapping,
+                                     struct address_space *mapping,
-                                loff_t pos, unsigned len, unsigned copied,
+                                     loff_t pos, unsigned len, unsigned copied,
-                                struct page *page, void *fsdata)
+                                     struct page *page, void *fsdata)
 {
        handle_t *handle = ext4_journal_current_handle();
        struct inode *inode = mapping->host;
@@ -1705,10 +1695,7 @@ static int ext4_journalled_write_end(struct file *file,
        unsigned from, to;
        loff_t new_i_size;
-        trace_mark(ext4_journalled_write_end,
+        trace_ext4_journalled_write_end(inode, pos, len, copied);
-                   "dev %s ino %lu pos %llu len %u copied %u",
-                   inode->i_sb->s_id, inode->i_ino,
-                   (unsigned long long) pos, len, copied);
        from = pos & (PAGE_CACHE_SIZE - 1);
        to = from + len;
@@ -1747,7 +1734,7 @@ static int ext4_journalled_write_end(struct file *file,
                ret = ret2;
        if (pos + len > inode->i_size) {
                vmtruncate(inode, inode->i_size);
-                /* 
+                /*
                 * If vmtruncate failed early the inode might still be
                 * on the orphan list; we need to make sure the inode
                 * is removed from the orphan list in that case.
@@ -1854,7 +1841,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
 }
 static void ext4_da_page_release_reservation(struct page *page,
-                                                unsigned long offset)
+                                             unsigned long offset)
 {
        int to_release = 0;
        struct buffer_head *head, *bh;
@@ -2554,9 +2541,7 @@ static int ext4_da_writepage(struct page *page,
        struct buffer_head *page_bufs;
        struct inode *inode = page->mapping->host;
-        trace_mark(ext4_da_writepage,
+        trace_ext4_da_writepage(inode, page);
-                   "dev %s ino %lu page_index %lu",
-                   inode->i_sb->s_id, inode->i_ino, page->index);
        size = i_size_read(inode);
        if (page->index == size >> PAGE_CACHE_SHIFT)
                len = size & ~PAGE_CACHE_MASK;
@@ -2667,19 +2652,7 @@ static int ext4_da_writepages(struct address_space *mapping,
        int needed_blocks, ret = 0, nr_to_writebump = 0;
        struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
-        trace_mark(ext4_da_writepages,
+        trace_ext4_da_writepages(inode, wbc);
-                   "dev %s ino %lu nr_t_write %ld "
-                   "pages_skipped %ld range_start %llu "
-                   "range_end %llu nonblocking %d "
-                   "for_kupdate %d for_reclaim %d "
-                   "for_writepages %d range_cyclic %d",
-                   inode->i_sb->s_id, inode->i_ino,
-                   wbc->nr_to_write, wbc->pages_skipped,
-                   (unsigned long long) wbc->range_start,
-                   (unsigned long long) wbc->range_end,
-                   wbc->nonblocking, wbc->for_kupdate,
-                   wbc->for_reclaim, wbc->for_writepages,
-                   wbc->range_cyclic);
        /*
         * No pages to write? This is mainly a kludge to avoid starting
@@ -2693,13 +2666,13 @@ static int ext4_da_writepages(struct address_space *mapping,
         * If the filesystem has aborted, it is read-only, so return
         * right away instead of dumping stack traces later on that
         * will obscure the real source of the problem.  We test
-         * EXT4_MOUNT_ABORT instead of sb->s_flag's MS_RDONLY because
+         * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because
         * the latter could be true if the filesystem is mounted
         * read-only, and in that case, ext4_da_writepages should
         * *never* be called, so if that ever happens, we would want
         * the stack trace.
         */
-        if (unlikely(sbi->s_mount_opt & EXT4_MOUNT_ABORT))
+        if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
                return -EROFS;
        /*
@@ -2845,14 +2818,7 @@ out_writepages:
        if (!no_nrwrite_index_update)
                wbc->no_nrwrite_index_update = 0;
        wbc->nr_to_write -= nr_to_writebump;
-        trace_mark(ext4_da_writepage_result,
+        trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
-                   "dev %s ino %lu ret %d pages_written %d "
-                   "pages_skipped %ld congestion %d "
-                   "more_io %d no_nrwrite_index_update %d",
-                   inode->i_sb->s_id, inode->i_ino, ret,
-                   pages_written, wbc->pages_skipped,
-                   wbc->encountered_congestion, wbc->more_io,
-                   wbc->no_nrwrite_index_update);
        return ret;
 }
@@ -2884,8 +2850,8 @@ static int ext4_nonda_switch(struct super_block *sb)
 }
 static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
-                                loff_t pos, unsigned len, unsigned flags,
+                               loff_t pos, unsigned len, unsigned flags,
-                                struct page **pagep, void **fsdata)
+                               struct page **pagep, void **fsdata)
 {
        int ret, retries = 0;
        struct page *page;
@@ -2904,11 +2870,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
                                        len, flags, pagep, fsdata);
        }
        *fsdata = (void *)0;
+        trace_ext4_da_write_begin(inode, pos, len, flags);
-        trace_mark(ext4_da_write_begin,
-                   "dev %s ino %lu pos %llu len %u flags %u",
-                   inode->i_sb->s_id, inode->i_ino,
-                   (unsigned long long) pos, len, flags);
 retry:
        /*
         * With delayed allocation, we don't log the i_disksize update
@@ -2959,7 +2921,7 @@ out:
 * when write to the end of file but not require block allocation
 */
 static int ext4_da_should_update_i_disksize(struct page *page,
-                                         unsigned long offset)
+                                            unsigned long offset)
 {
        struct buffer_head *bh;
        struct inode *inode = page->mapping->host;
@@ -2978,9 +2940,9 @@ static int ext4_da_should_update_i_disksize(struct page *page,
 }
 static int ext4_da_write_end(struct file *file,
-                                struct address_space *mapping,
+                             struct address_space *mapping,
-                                loff_t pos, unsigned len, unsigned copied,
+                             loff_t pos, unsigned len, unsigned copied,
-                                struct page *page, void *fsdata)
+                             struct page *page, void *fsdata)
 {
        struct inode *inode = mapping->host;
        int ret = 0, ret2;
@@ -3001,10 +2963,7 @@ static int ext4_da_write_end(struct file *file,
                }
        }
-        trace_mark(ext4_da_write_end,
+        trace_ext4_da_write_end(inode, pos, len, copied);
-                   "dev %s ino %lu pos %llu len %u copied %u",
-                   inode->i_sb->s_id, inode->i_ino,
-                   (unsigned long long) pos, len, copied);
        start = pos & (PAGE_CACHE_SIZE - 1);
        end = start + copied - 1;
@@ -3081,7 +3040,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
         * not strictly speaking necessary (and for users of
         * laptop_mode, not even desirable).  However, to do otherwise
         * would require replicating code paths in:
-         * 
+         *
         * ext4_da_writepages() ->
         *    write_cache_pages() ---> (via passed in callback function)
         *        __mpage_da_writepage() -->
@@ -3101,7 +3060,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
         * write out the pages, but rather only collect contiguous
         * logical block extents, call the multi-block allocator, and
         * then update the buffer heads with the block allocations.
-         * 
+         *
         * For now, though, we'll cheat by calling filemap_flush(),
         * which will map the blocks, and start the I/O, but not
         * actually wait for the I/O to complete.
@@ -3237,7 +3196,7 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
 *
 */
 static int __ext4_normal_writepage(struct page *page,
-                                struct writeback_control *wbc)
+                                   struct writeback_control *wbc)
 {
        struct inode *inode = page->mapping->host;
@@ -3249,15 +3208,13 @@ static int __ext4_normal_writepage(struct page *page,
 }
 static int ext4_normal_writepage(struct page *page,
-                                struct writeback_control *wbc)
+                                 struct writeback_control *wbc)
 {
        struct inode *inode = page->mapping->host;
        loff_t size = i_size_read(inode);
        loff_t len;
-        trace_mark(ext4_normal_writepage,
+        trace_ext4_normal_writepage(inode, page);
-                   "dev %s ino %lu page_index %lu",
-                   inode->i_sb->s_id, inode->i_ino, page->index);
        J_ASSERT(PageLocked(page));
        if (page->index == size >> PAGE_CACHE_SHIFT)
                len = size & ~PAGE_CACHE_MASK;
@@ -3287,7 +3244,7 @@ static int ext4_normal_writepage(struct page *page,
 }
 static int __ext4_journalled_writepage(struct page *page,
-                                struct writeback_control *wbc)
+                                       struct writeback_control *wbc)
 {
        struct address_space *mapping = page->mapping;
        struct inode *inode = mapping->host;
@@ -3337,15 +3294,13 @@ out:
 }
 static int ext4_journalled_writepage(struct page *page,
-                                struct writeback_control *wbc)
+                                     struct writeback_control *wbc)
 {
        struct inode *inode = page->mapping->host;
        loff_t size = i_size_read(inode);
        loff_t len;
-        trace_mark(ext4_journalled_writepage,
+        trace_ext4_journalled_writepage(inode, page);
-                   "dev %s ino %lu page_index %lu",
-                   inode->i_sb->s_id, inode->i_ino, page->index);
        J_ASSERT(PageLocked(page));
        if (page->index == size >> PAGE_CACHE_SHIFT)
                len = size & ~PAGE_CACHE_MASK;
@@ -3442,8 +3397,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
 * VFS code falls back into buffered path in that case so we are safe.
 */
 static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
-                        const struct iovec *iov, loff_t offset,
+                              const struct iovec *iov, loff_t offset,
-                        unsigned long nr_segs)
+                              unsigned long nr_segs)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
@@ -3763,7 +3718,8 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
 *                      (no partially truncated stuff there).  */
 static Indirect *ext4_find_shared(struct inode *inode, int depth,
-                        ext4_lblk_t offsets[4], Indirect chain[4], __le32 *top)
+                                  ext4_lblk_t offsets[4], Indirect chain[4],
+                                  __le32 *top)
 {
        Indirect *partial, *p;
        int k, err;
@@ -3819,8 +3775,10 @@ no_top:
 * than `count' because there can be holes in there.
 */
 static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
-                struct buffer_head *bh, ext4_fsblk_t block_to_free,
+                              struct buffer_head *bh,
-                unsigned long count, __le32 *first, __le32 *last)
+                              ext4_fsblk_t block_to_free,
+                              unsigned long count, __le32 *first,
+                              __le32 *last)
 {
        __le32 *p;
        if (try_to_extend_transaction(handle, inode)) {
@@ -3837,10 +3795,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
        }
        /*
-         * Any buffers which are on the journal will be in memory. We find
+         * Any buffers which are on the journal will be in memory. We
-         * them on the hash table so jbd2_journal_revoke() will run jbd2_journal_forget()
+         * find them on the hash table so jbd2_journal_revoke() will
-         * on them.  We've already detached each block from the file, so
+         * run jbd2_journal_forget() on them.  We've already detached
-         * bforget() in jbd2_journal_forget() should be safe.
+         * each block from the file, so bforget() in
+         * jbd2_journal_forget() should be safe.
         *
         * AKPM: turn on bforget in jbd2_journal_forget()!!!
         */
@@ -4212,7 +4171,7 @@ void ext4_truncate(struct inode *inode)
                                   (__le32*)partial->bh->b_data+addr_per_block,
                                   (chain+n-1) - partial);
                BUFFER_TRACE(partial->bh, "call brelse");
-                brelse (partial->bh);
+                brelse(partial->bh);
                partial--;
        }
 do_indirects:
@@ -4453,8 +4412,9 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei)
        if (flags & S_DIRSYNC)
                ei->i_flags |= EXT4_DIRSYNC_FL;
 }
 static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
-                                        struct ext4_inode_info *ei)
+                                  struct ext4_inode_info *ei)
 {
        blkcnt_t i_blocks ;
        struct inode *inode = &(ei->vfs_inode);
@@ -4569,7 +4529,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                                        EXT4_GOOD_OLD_INODE_SIZE +
                                        ei->i_extra_isize;
                        if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
-                                 ei->i_state |= EXT4_STATE_XATTR;
+                                ei->i_state |= EXT4_STATE_XATTR;
                }
        } else
                ei->i_extra_isize = 0;
@@ -4588,7 +4548,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        ret = 0;
        if (ei->i_file_acl &&
-            ((ei->i_file_acl < 
+            ((ei->i_file_acl <
              (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
               EXT4_SB(sb)->s_gdb_count)) ||
             (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
@@ -4603,15 +4563,15 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                     !ext4_inode_is_fast_symlink(inode)))
                        /* Validate extent which is part of inode */
                        ret = ext4_ext_check_inode(inode);
-        } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+        } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
                   (S_ISLNK(inode->i_mode) &&
                    !ext4_inode_is_fast_symlink(inode))) {
-                /* Validate block references which are part of inode */
+                /* Validate block references which are part of inode */
                ret = ext4_check_inode_blockref(inode);
        }
        if (ret) {
-                brelse(bh);
+                brelse(bh);
-                goto bad_inode;
+                goto bad_inode;
        }
        if (S_ISREG(inode->i_mode)) {
@@ -4642,7 +4602,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        } else {
                brelse(bh);
                ret = -EIO;
-                ext4_error(inode->i_sb, __func__, 
+                ext4_error(inode->i_sb, __func__,
                           "bogus i_mode (%o) for inode=%lu",
                           inode->i_mode, inode->i_ino);
                goto bad_inode;
@@ -4795,8 +4755,9 @@ static int ext4_do_update_inode(handle_t *handle,
                                cpu_to_le32(new_encode_dev(inode->i_rdev));
                        raw_inode->i_block[2] = 0;
                }
-        } else for (block = 0; block < EXT4_N_BLOCKS; block++)
+        } else
-                raw_inode->i_block[block] = ei->i_data[block];
+                for (block = 0; block < EXT4_N_BLOCKS; block++)
+                        raw_inode->i_block[block] = ei->i_data[block];
        raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
        if (ei->i_extra_isize) {
@@ -5150,7 +5111,7 @@ int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks)
 * Give this, we know that the caller already has write access to iloc->bh.
 */
 int ext4_mark_iloc_dirty(handle_t *handle,
-                struct inode *inode, struct ext4_iloc *iloc)
+                         struct inode *inode, struct ext4_iloc *iloc)
 {
        int err = 0;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 91e75f7a9e73..bb415408fdb6 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -14,6 +14,7 @@
 #include <linux/compat.h>
 #include <linux/smp_lock.h>
 #include <linux/mount.h>
+#include <linux/file.h>
 #include <asm/uaccess.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
@@ -213,6 +214,41 @@ setversion_out:
                return err;
        }
+        case EXT4_IOC_MOVE_EXT: {
+                struct move_extent me;
+                struct file *donor_filp;
+                int err;
+                if (copy_from_user(&me,
+                        (struct move_extent __user *)arg, sizeof(me)))
+                        return -EFAULT;
+                donor_filp = fget(me.donor_fd);
+                if (!donor_filp)
+                        return -EBADF;
+                if (!capable(CAP_DAC_OVERRIDE)) {
+                        if ((current->real_cred->fsuid != inode->i_uid) ||
+                                !(inode->i_mode & S_IRUSR) ||
+                                !(donor_filp->f_dentry->d_inode->i_mode &
+                                S_IRUSR)) {
+                                fput(donor_filp);
+                                return -EACCES;
+                        }
+                }
+                err = ext4_move_extents(filp, donor_filp, me.orig_start,
+                                        me.donor_start, me.len, &me.moved_len);
+                fput(donor_filp);
+                if (!err)
+                        if (copy_to_user((struct move_extent *)arg,
+                                &me, sizeof(me)))
+                                return -EFAULT;
+                return err;
+        }
        case EXT4_IOC_GROUP_ADD: {
                struct ext4_new_group_data input;
                struct super_block *sb = inode->i_sb;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index ed8482e22c0e..519a0a686d94 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -22,6 +22,8 @@
 */
 #include "mballoc.h"
+#include <trace/events/ext4.h>
 /*
 * MUSTDO:
 *   - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -340,8 +342,6 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
                                                ext4_group_t group);
 static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
 static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
 {
 #if BITS_PER_LONG == 64
@@ -2859,9 +2859,8 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
                discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
                        + entry->start_blk
                        + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
-                trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u",
+                trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
-                           sb->s_id, (unsigned long long) discard_block,
+                                          entry->count);
-                           entry->count);
                sb_issue_discard(sb, discard_block, entry->count);
                kmem_cache_free(ext4_free_ext_cachep, entry);
@@ -3629,10 +3628,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
        mb_debug("new inode pa %p: %llu/%u for %u\n", pa,
                        pa->pa_pstart, pa->pa_len, pa->pa_lstart);
-        trace_mark(ext4_mb_new_inode_pa,
+        trace_ext4_mb_new_inode_pa(ac, pa);
-                   "dev %s ino %lu pstart %llu len %u lstart %u",
-                   sb->s_id, ac->ac_inode->i_ino,
-                   pa->pa_pstart, pa->pa_len, pa->pa_lstart);
        ext4_mb_use_inode_pa(ac, pa);
        atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
@@ -3691,9 +3687,8 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
        pa->pa_type = MB_GROUP_PA;
        mb_debug("new group pa %p: %llu/%u for %u\n", pa,
-                 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+                        pa->pa_pstart, pa->pa_len, pa->pa_lstart);
-        trace_mark(ext4_mb_new_group_pa, "dev %s pstart %llu len %u lstart %u",
+        trace_ext4_mb_new_group_pa(ac, pa);
-                   sb->s_id, pa->pa_pstart, pa->pa_len, pa->pa_lstart);
        ext4_mb_use_group_pa(ac, pa);
        atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
@@ -3783,10 +3778,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
                        ext4_mb_store_history(ac);
                }
-                trace_mark(ext4_mb_release_inode_pa,
+                trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit,
-                           "dev %s ino %lu block %llu count %u",
+                                               next - bit);
-                           sb->s_id, pa->pa_inode->i_ino, grp_blk_start + bit,
-                           next - bit);
                mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
                bit = next + 1;
        }
@@ -3820,8 +3813,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
        if (ac)
                ac->ac_op = EXT4_MB_HISTORY_DISCARD;
-        trace_mark(ext4_mb_release_group_pa, "dev %s pstart %llu len %d",
+        trace_ext4_mb_release_group_pa(ac, pa);
-                   sb->s_id, pa->pa_pstart, pa->pa_len);
        BUG_ON(pa->pa_deleted == 0);
        ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
        BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
@@ -3889,6 +3881,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
        INIT_LIST_HEAD(&list);
        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+        if (ac)
+                ac->ac_sb = sb;
 repeat:
        ext4_lock_group(sb, group);
        list_for_each_entry_safe(pa, tmp,
@@ -3987,12 +3981,15 @@ void ext4_discard_preallocations(struct inode *inode)
        }
        mb_debug("discard preallocation for inode %lu\n", inode->i_ino);
-        trace_mark(ext4_discard_preallocations, "dev %s ino %lu", sb->s_id,
+        trace_ext4_discard_preallocations(inode);
-                   inode->i_ino);
        INIT_LIST_HEAD(&list);
        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+        if (ac) {
+                ac->ac_sb = sb;
+                ac->ac_inode = inode;
+        }
 repeat:
        /* first, collect all pa's in the inode */
        spin_lock(&ei->i_prealloc_lock);
@@ -4276,6 +4273,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
        INIT_LIST_HEAD(&discard_list);
        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+        if (ac)
+                ac->ac_sb = sb;
        spin_lock(&lg->lg_prealloc_lock);
        list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
@@ -4445,8 +4444,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
        int ret;
        int freed = 0;
-        trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d",
+        trace_ext4_mb_discard_preallocations(sb, needed);
-                   sb->s_id, needed);
        for (i = 0; i < ngroups && needed > 0; i++) {
                ret = ext4_mb_discard_group_preallocations(sb, i, needed);
                freed += ret;
@@ -4475,17 +4473,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
        sb = ar->inode->i_sb;
        sbi = EXT4_SB(sb);
-        trace_mark(ext4_request_blocks, "dev %s flags %u len %u ino %lu "
+        trace_ext4_request_blocks(ar);
-                   "lblk %llu goal %llu lleft %llu lright %llu "
-                   "pleft %llu pright %llu ",
-                   sb->s_id, ar->flags, ar->len,
-                   ar->inode ? ar->inode->i_ino : 0,
-                   (unsigned long long) ar->logical,
-                   (unsigned long long) ar->goal,
-                   (unsigned long long) ar->lleft,
-                   (unsigned long long) ar->lright,
-                   (unsigned long long) ar->pleft,
-                   (unsigned long long) ar->pright);
        /*
         * For delayed allocation, we could skip the ENOSPC and
@@ -4521,7 +4509,10 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
        }
        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
-        if (!ac) {
+        if (ac) {
+                ac->ac_sb = sb;
+                ac->ac_inode = ar->inode;
+        } else {
                ar->len = 0;
                *errp = -ENOMEM;
                goto out1;
@@ -4594,18 +4585,7 @@ out3:
                                                reserv_blks);
        }
-        trace_mark(ext4_allocate_blocks,
+        trace_ext4_allocate_blocks(ar, (unsigned long long)block);
-                   "dev %s block %llu flags %u len %u ino %lu "
-                   "logical %llu goal %llu lleft %llu lright %llu "
-                   "pleft %llu pright %llu ",
-                   sb->s_id, (unsigned long long) block,
-                   ar->flags, ar->len, ar->inode ? ar->inode->i_ino : 0,
-                   (unsigned long long) ar->logical,
-                   (unsigned long long) ar->goal,
-                   (unsigned long long) ar->lleft,
-                   (unsigned long long) ar->lright,
-                   (unsigned long long) ar->pleft,
-                   (unsigned long long) ar->pright);
        return block;
 }
@@ -4709,7 +4689,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 * Main entry point into mballoc to free blocks
 */
 void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
-                        unsigned long block, unsigned long count,
+                        ext4_fsblk_t block, unsigned long count,
                        int metadata, unsigned long *freed)
 {
        struct buffer_head *bitmap_bh = NULL;
@@ -4735,15 +4715,12 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
            block + count > ext4_blocks_count(es)) {
                ext4_error(sb, __func__,
                            "Freeing blocks not in datazone - "
-                            "block = %lu, count = %lu", block, count);
+                            "block = %llu, count = %lu", block, count);
                goto error_return;
        }
-        ext4_debug("freeing block %lu\n", block);
+        ext4_debug("freeing block %llu\n", block);
-        trace_mark(ext4_free_blocks,
+        trace_ext4_free_blocks(inode, block, count, metadata);
-                   "dev %s block %llu count %lu metadata %d ino %lu",
-                   sb->s_id, (unsigned long long) block, count, metadata,
-                   inode ? inode->i_ino : 0);
        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
        if (ac) {
@@ -4784,7 +4761,7 @@ do_more:
                ext4_error(sb, __func__,
                           "Freeing blocks in system zone - "
-                           "Block = %lu, count = %lu", block, count);
+                           "Block = %llu, count = %lu", block, count);
                /* err = 0. ext4_std_error should be a no op */
                goto error_return;
        }
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 75e34f69215b..c96bb19f58f9 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -19,7 +19,6 @@
 #include <linux/seq_file.h>
 #include <linux/version.h>
 #include <linux/blkdev.h>
-#include <linux/marker.h>
 #include <linux/mutex.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index fe64d9f79852..313a50b39741 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -458,6 +458,7 @@ int ext4_ext_migrate(struct inode *inode)
        struct inode *tmp_inode = NULL;
        struct list_blocks_struct lb;
        unsigned long max_entries;
+        __u32 goal;
        /*
         * If the filesystem does not support extents, or the inode
@@ -483,9 +484,10 @@ int ext4_ext_migrate(struct inode *inode)
                retval = PTR_ERR(handle);
                return retval;
        }
-        tmp_inode = ext4_new_inode(handle,
+        goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
-                                inode->i_sb->s_root->d_inode,
+                EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
-                                S_IFREG);
+        tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
+                                   S_IFREG, 0, goal);
        if (IS_ERR(tmp_inode)) {
                retval = -ENOMEM;
                ext4_journal_stop(handle);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
new file mode 100644
index 000000000000..bbf2dd9404dc
--- /dev/null
+++ b/fs/ext4/move_extent.c
@@ -0,0 +1,1320 @@
+/*
+ * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd.
+ * Written by Takashi Sato <t-sato@yk.jp.nec.com>
+ *            Akira Fujita <a-fujita@rs.jp.nec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/fs.h>
+#include <linux/quotaops.h>
+#include "ext4_jbd2.h"
+#include "ext4_extents.h"
+#include "ext4.h"
+#define get_ext_path(path, inode, block, ret)           \
+        do {                                                            \
+                path = ext4_ext_find_extent(inode, block, path);        \
+                if (IS_ERR(path)) {                                     \
+                        ret = PTR_ERR(path);                            \
+                        path = NULL;                                    \
+                }                                                       \
+        } while (0)
+/**
+ * copy_extent_status - Copy the extent's initialization status
+ *
+ * @src:        an extent for getting initialize status
+ * @dest:       an extent to be set the status
+ */
+static void
+copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
+{
+        if (ext4_ext_is_uninitialized(src))
+                ext4_ext_mark_uninitialized(dest);
+        else
+                dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest));
+}
+/**
+ * mext_next_extent - Search for the next extent and set it to "extent"
+ *
+ * @inode:      inode which is searched
+ * @path:       this will obtain data for the next extent
+ * @extent:     pointer to the next extent we have just gotten
+ *
+ * Search the next extent in the array of ext4_ext_path structure (@path)
+ * and set it to ext4_extent structure (@extent). In addition, the member of
+ * @path (->p_ext) also points the next extent. Return 0 on success, 1 if
+ * ext4_ext_path structure refers to the last extent, or a negative error
+ * value on failure.
+ */
+static int
+mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
+                      struct ext4_extent **extent)
+{
+        int ppos, leaf_ppos = path->p_depth;
+        ppos = leaf_ppos;
+        if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
+                /* leaf block */
+                *extent = ++path[ppos].p_ext;
+                return 0;
+        }
+        while (--ppos >= 0) {
+                if (EXT_LAST_INDEX(path[ppos].p_hdr) >
+                    path[ppos].p_idx) {
+                        int cur_ppos = ppos;
+                        /* index block */
+                        path[ppos].p_idx++;
+                        path[ppos].p_block = idx_pblock(path[ppos].p_idx);
+                        if (path[ppos+1].p_bh)
+                                brelse(path[ppos+1].p_bh);
+                        path[ppos+1].p_bh =
+                                sb_bread(inode->i_sb, path[ppos].p_block);
+                        if (!path[ppos+1].p_bh)
+                                return -EIO;
+                        path[ppos+1].p_hdr =
+                                ext_block_hdr(path[ppos+1].p_bh);
+                        /* Halfway index block */
+                        while (++cur_ppos < leaf_ppos) {
+                                path[cur_ppos].p_idx =
+                                        EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
+                                path[cur_ppos].p_block =
+                                        idx_pblock(path[cur_ppos].p_idx);
+                                if (path[cur_ppos+1].p_bh)
+                                        brelse(path[cur_ppos+1].p_bh);
+                                path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
+                                        path[cur_ppos].p_block);
+                                if (!path[cur_ppos+1].p_bh)
+                                        return -EIO;
+                                path[cur_ppos+1].p_hdr =
+                                        ext_block_hdr(path[cur_ppos+1].p_bh);
+                        }
+                        /* leaf block */
+                        path[leaf_ppos].p_ext = *extent =
+                                EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
+                        return 0;
+                }
+        }
+        /* We found the last extent */
+        return 1;
+}
+/**
+ * mext_double_down_read - Acquire two inodes' read semaphore
+ *
+ * @orig_inode:         original inode structure
+ * @donor_inode:        donor inode structure
+ * Acquire read semaphore of the two inodes (orig and donor) by i_ino order.
+ */
+static void
+mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
+{
+        struct inode *first = orig_inode, *second = donor_inode;
+        BUG_ON(orig_inode == NULL || donor_inode == NULL);
+        /*
+         * Use the inode number to provide the stable locking order instead
+         * of its address, because the C language doesn't guarantee you can
+         * compare pointers that don't come from the same array.
+         */
+        if (donor_inode->i_ino < orig_inode->i_ino) {
+                first = donor_inode;
+                second = orig_inode;
+        }
+        down_read(&EXT4_I(first)->i_data_sem);
+        down_read(&EXT4_I(second)->i_data_sem);
+}
+/**
+ * mext_double_down_write - Acquire two inodes' write semaphore
+ *
+ * @orig_inode:         original inode structure
+ * @donor_inode:        donor inode structure
+ * Acquire write semaphore of the two inodes (orig and donor) by i_ino order.
+ */
+static void
+mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
+{
+        struct inode *first = orig_inode, *second = donor_inode;
+        BUG_ON(orig_inode == NULL || donor_inode == NULL);
+        /*
+         * Use the inode number to provide the stable locking order instead
+         * of its address, because the C language doesn't guarantee you can
+         * compare pointers that don't come from the same array.
+         */
+        if (donor_inode->i_ino < orig_inode->i_ino) {
+                first = donor_inode;
+                second = orig_inode;
+        }
+        down_write(&EXT4_I(first)->i_data_sem);
+        down_write(&EXT4_I(second)->i_data_sem);
+}
+/**
+ * mext_double_up_read - Release two inodes' read semaphore
+ *
+ * @orig_inode:         original inode structure to be released its lock first
+ * @donor_inode:        donor inode structure to be released its lock second
+ * Release read semaphore of two inodes (orig and donor).
+ */
+static void
+mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
+{
+        BUG_ON(orig_inode == NULL || donor_inode == NULL);
+        up_read(&EXT4_I(orig_inode)->i_data_sem);
+        up_read(&EXT4_I(donor_inode)->i_data_sem);
+}
+/**
+ * mext_double_up_write - Release two inodes' write semaphore
+ *
+ * @orig_inode:         original inode structure to be released its lock first
+ * @donor_inode:        donor inode structure to be released its lock second
+ * Release write semaphore of two inodes (orig and donor).
+ */
+static void
+mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
+{
+        BUG_ON(orig_inode == NULL || donor_inode == NULL);
+        up_write(&EXT4_I(orig_inode)->i_data_sem);
+        up_write(&EXT4_I(donor_inode)->i_data_sem);
+}
+/**
+ * mext_insert_across_blocks - Insert extents across leaf block
+ *
+ * @handle:             journal handle
+ * @orig_inode:         original inode
+ * @o_start:            first original extent to be changed
+ * @o_end:              last original extent to be changed
+ * @start_ext:          first new extent to be inserted
+ * @new_ext:            middle of new extent to be inserted
+ * @end_ext:            last new extent to be inserted
+ *
+ * Allocate a new leaf block and insert extents into it. Return 0 on success,
+ * or a negative error value on failure.
+ */
+static int
+mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
+                struct ext4_extent *o_start, struct ext4_extent *o_end,
+                struct ext4_extent *start_ext, struct ext4_extent *new_ext,
+                struct ext4_extent *end_ext)
+{
+        struct ext4_ext_path *orig_path = NULL;
+        ext4_lblk_t eblock = 0;
+        int new_flag = 0;
+        int end_flag = 0;
+        int err = 0;
+        if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) {
+                if (o_start == o_end) {
+                        /*       start_ext   new_ext    end_ext
+                         * donor |---------|-----------|--------|
+                         * orig  |------------------------------|
+                         */
+                        end_flag = 1;
+                } else {
+                        /*       start_ext   new_ext   end_ext
+                         * donor |---------|----------|---------|
+                         * orig  |---------------|--------------|
+                         */
+                        o_end->ee_block = end_ext->ee_block;
+                        o_end->ee_len = end_ext->ee_len;
+                        ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
+                }
+                o_start->ee_len = start_ext->ee_len;
+                new_flag = 1;
+        } else if (start_ext->ee_len && new_ext->ee_len &&
+                   !end_ext->ee_len && o_start == o_end) {
+                /*       start_ext      new_ext
+                 * donor |--------------|---------------|
+                 * orig  |------------------------------|
+                 */
+                o_start->ee_len = start_ext->ee_len;
+                new_flag = 1;
+        } else if (!start_ext->ee_len && new_ext->ee_len &&
+                   end_ext->ee_len && o_start == o_end) {
+                /*        new_ext       end_ext
+                 * donor |--------------|---------------|
+                 * orig  |------------------------------|
+                 */
+                o_end->ee_block = end_ext->ee_block;
+                o_end->ee_len = end_ext->ee_len;
+                ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
+                /*
+                 * Set 0 to the extent block if new_ext was
+                 * the first block.
+                 */
+                if (new_ext->ee_block)
+                        eblock = le32_to_cpu(new_ext->ee_block);
+                new_flag = 1;
+        } else {
+                ext4_debug("ext4 move extent: Unexpected insert case\n");
+                return -EIO;
+        }
+        if (new_flag) {
+                get_ext_path(orig_path, orig_inode, eblock, err);
+                if (orig_path == NULL)
+                        goto out;
+                if (ext4_ext_insert_extent(handle, orig_inode,
+                                        orig_path, new_ext))
+                        goto out;
+        }
+        if (end_flag) {
+                get_ext_path(orig_path, orig_inode,
+                                      le32_to_cpu(end_ext->ee_block) - 1, err);
+                if (orig_path == NULL)
+                        goto out;
+                if (ext4_ext_insert_extent(handle, orig_inode,
+                                           orig_path, end_ext))
+                        goto out;
+        }
+out:
+        if (orig_path) {
+                ext4_ext_drop_refs(orig_path);
+                kfree(orig_path);
+        }
+        return err;
+}
+/**
+ * mext_insert_inside_block - Insert new extent to the extent block
+ *
+ * @o_start:            first original extent to be moved
+ * @o_end:              last original extent to be moved
+ * @start_ext:          first new extent to be inserted
+ * @new_ext:            middle of new extent to be inserted
+ * @end_ext:            last new extent to be inserted
+ * @eh:                 extent header of target leaf block
+ * @range_to_move:      used to decide how to insert extent
+ *
+ * Insert extents into the leaf block. The extent (@o_start) is overwritten
+ * by inserted extents.
+ */
+static void
+mext_insert_inside_block(struct ext4_extent *o_start,
+                              struct ext4_extent *o_end,
+                              struct ext4_extent *start_ext,
+                              struct ext4_extent *new_ext,
+                              struct ext4_extent *end_ext,
+                              struct ext4_extent_header *eh,
+                              int range_to_move)
+{
+        int i = 0;
+        unsigned long len;
+        /* Move the existing extents */
+        if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) {
+                len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) -
+                        (unsigned long)(o_end + 1);
+                memmove(o_end + 1 + range_to_move, o_end + 1, len);
+        }
+        /* Insert start entry */
+        if (start_ext->ee_len)
+                o_start[i++].ee_len = start_ext->ee_len;
+        /* Insert new entry */
+        if (new_ext->ee_len) {
+                o_start[i] = *new_ext;
+                ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext));
+        }
+        /* Insert end entry */
+        if (end_ext->ee_len)
+                o_start[i] = *end_ext;
+        /* Increment the total entries counter on the extent block */
+        le16_add_cpu(&eh->eh_entries, range_to_move);
+}
+/**
+ * mext_insert_extents - Insert new extent
+ *
+ * @handle:     journal handle
+ * @orig_inode: original inode
+ * @orig_path:  path indicates first extent to be changed
+ * @o_start:    first original extent to be changed
+ * @o_end:      last original extent to be changed
+ * @start_ext:  first new extent to be inserted
+ * @new_ext:    middle of new extent to be inserted
+ * @end_ext:    last new extent to be inserted
+ *
+ * Call the function to insert extents. If we cannot add more extents into
+ * the leaf block, we call mext_insert_across_blocks() to create a
+ * new leaf block. Otherwise call mext_insert_inside_block(). Return 0
+ * on success, or a negative error value on failure.
+ */
+static int
+mext_insert_extents(handle_t *handle, struct inode *orig_inode,
+                         struct ext4_ext_path *orig_path,
+                         struct ext4_extent *o_start,
+                         struct ext4_extent *o_end,
+                         struct ext4_extent *start_ext,
+                         struct ext4_extent *new_ext,
+                         struct ext4_extent *end_ext)
+{
+        struct  ext4_extent_header *eh;
+        unsigned long need_slots, slots_range;
+        int     range_to_move, depth, ret;
+        /*
+         * The extents need to be inserted
+         * start_extent + new_extent + end_extent.
+         */
+        need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) +
+                (new_ext->ee_len ? 1 : 0);
+        /* The number of slots between start and end */
+        slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1)
+                / sizeof(struct ext4_extent);
+        /* Range to move the end of extent */
+        range_to_move = need_slots - slots_range;
+        depth = orig_path->p_depth;
+        orig_path += depth;
+        eh = orig_path->p_hdr;
+        if (depth) {
+                /* Register to journal */
+                ret = ext4_journal_get_write_access(handle, orig_path->p_bh);
+                if (ret)
+                        return ret;
+        }
+        /* Expansion */
+        if (range_to_move > 0 &&
+                (range_to_move > le16_to_cpu(eh->eh_max)
+                        - le16_to_cpu(eh->eh_entries))) {
+                ret = mext_insert_across_blocks(handle, orig_inode, o_start,
+                                        o_end, start_ext, new_ext, end_ext);
+                if (ret < 0)
+                        return ret;
+        } else
+                mext_insert_inside_block(o_start, o_end, start_ext, new_ext,
+                                                end_ext, eh, range_to_move);
+        if (depth) {
+                ret = ext4_handle_dirty_metadata(handle, orig_inode,
+                                                 orig_path->p_bh);
+                if (ret)
+                        return ret;
+        } else {
+                ret = ext4_mark_inode_dirty(handle, orig_inode);
+                if (ret < 0)
+                        return ret;
+        }
+        return 0;
+}
+/**
+ * mext_leaf_block - Move one leaf extent block into the inode.
+ *
+ * @handle:             journal handle
+ * @orig_inode:         original inode
+ * @orig_path:          path indicates first extent to be changed
+ * @dext:               donor extent
+ * @from:               start offset on the target file
+ *
+ * In order to insert extents into the leaf block, we must divide the extent
+ * in the leaf block into three extents. The one is located to be inserted
+ * extents, and the others are located around it.
+ *
+ * Therefore, this function creates structures to save extents of the leaf
+ * block, and inserts extents by calling mext_insert_extents() with
+ * created extents. Return 0 on success, or a negative error value on failure.
+ */
+static int
+mext_leaf_block(handle_t *handle, struct inode *orig_inode,
+                     struct ext4_ext_path *orig_path, struct ext4_extent *dext,
+                     ext4_lblk_t *from)
+{
+        struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
+        struct ext4_extent new_ext, start_ext, end_ext;
+        ext4_lblk_t new_ext_end;
+        ext4_fsblk_t new_phys_end;
+        int oext_alen, new_ext_alen, end_ext_alen;
+        int depth = ext_depth(orig_inode);
+        int ret;
+        o_start = o_end = oext = orig_path[depth].p_ext;
+        oext_alen = ext4_ext_get_actual_len(oext);
+        start_ext.ee_len = end_ext.ee_len = 0;
+        new_ext.ee_block = cpu_to_le32(*from);
+        ext4_ext_store_pblock(&new_ext, ext_pblock(dext));
+        new_ext.ee_len = dext->ee_len;
+        new_ext_alen = ext4_ext_get_actual_len(&new_ext);
+        new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
+        new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1;
+        /*
+         * Case: original extent is first
+         * oext      |--------|
+         * new_ext      |--|
+         * start_ext |--|
+         */
+        if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) &&
+                le32_to_cpu(new_ext.ee_block) <
+                le32_to_cpu(oext->ee_block) + oext_alen) {
+                start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
+                                               le32_to_cpu(oext->ee_block));
+                copy_extent_status(oext, &start_ext);
+        } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
+                prev_ext = oext - 1;
+                /*
+                 * We can merge new_ext into previous extent,
+                 * if these are contiguous and same extent type.
+                 */
+                if (ext4_can_extents_be_merged(orig_inode, prev_ext,
+                                               &new_ext)) {
+                        o_start = prev_ext;
+                        start_ext.ee_len = cpu_to_le16(
+                                ext4_ext_get_actual_len(prev_ext) +
+                                new_ext_alen);
+                        copy_extent_status(prev_ext, &start_ext);
+                        new_ext.ee_len = 0;
+                }
+        }
+        /*
+         * Case: new_ext_end must be less than oext
+         * oext      |-----------|
+         * new_ext       |-------|
+         */
+        BUG_ON(le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end);
+        /*
+         * Case: new_ext is smaller than original extent
+         * oext    |---------------|
+         * new_ext |-----------|
+         * end_ext             |---|
+         */
+        if (le32_to_cpu(oext->ee_block) <= new_ext_end &&
+                new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) {
+                end_ext.ee_len =
+                        cpu_to_le16(le32_to_cpu(oext->ee_block) +
+                        oext_alen - 1 - new_ext_end);
+                copy_extent_status(oext, &end_ext);
+                end_ext_alen = ext4_ext_get_actual_len(&end_ext);
+                ext4_ext_store_pblock(&end_ext,
+                        (ext_pblock(o_end) + oext_alen - end_ext_alen));
+                end_ext.ee_block =
+                        cpu_to_le32(le32_to_cpu(o_end->ee_block) +
+                        oext_alen - end_ext_alen);
+        }
+        ret = mext_insert_extents(handle, orig_inode, orig_path, o_start,
+                                o_end, &start_ext, &new_ext, &end_ext);
+        return ret;
+}
+/**
+ * mext_calc_swap_extents - Calculate extents for extent swapping.
+ *
+ * @tmp_dext:           the extent that will belong to the original inode
+ * @tmp_oext:           the extent that will belong to the donor inode
+ * @orig_off:           block offset of original inode
+ * @donor_off:          block offset of donor inode
+ * @max_count:          the maximun length of extents
+ */
+static void
+mext_calc_swap_extents(struct ext4_extent *tmp_dext,
+                              struct ext4_extent *tmp_oext,
+                              ext4_lblk_t orig_off, ext4_lblk_t donor_off,
+                              ext4_lblk_t max_count)
+{
+        ext4_lblk_t diff, orig_diff;
+        struct ext4_extent dext_old, oext_old;
+        dext_old = *tmp_dext;
+        oext_old = *tmp_oext;
+        /* When tmp_dext is too large, pick up the target range. */
+        diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
+        ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff);
+        tmp_dext->ee_block =
+                        cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff);
+        tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
+        if (max_count < ext4_ext_get_actual_len(tmp_dext))
+                tmp_dext->ee_len = cpu_to_le16(max_count);
+        orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
+        ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff);
+        /* Adjust extent length if donor extent is larger than orig */
+        if (ext4_ext_get_actual_len(tmp_dext) >
+            ext4_ext_get_actual_len(tmp_oext) - orig_diff)
+                tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) -
+                                                orig_diff);
+        tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext));
+        copy_extent_status(&oext_old, tmp_dext);
+        copy_extent_status(&dext_old, tmp_oext);
+}
+/**
+ * mext_replace_branches - Replace original extents with new extents
+ *
+ * @handle:             journal handle
+ * @orig_inode:         original inode
+ * @donor_inode:        donor inode
+ * @from:               block offset of orig_inode
+ * @count:              block count to be replaced
+ *
+ * Replace original inode extents and donor inode extents page by page.
+ * We implement this replacement in the following three steps:
+ * 1. Save the block information of original and donor inodes into
+ *    dummy extents.
+ * 2. Change the block information of original inode to point at the
+ *    donor inode blocks.
+ * 3. Change the block information of donor inode to point at the saved
+ *    original inode blocks in the dummy extents.
+ *
+ * Return 0 on success, or a negative error value on failure.
+ */
+static int
+mext_replace_branches(handle_t *handle, struct inode *orig_inode,
+                           struct inode *donor_inode, ext4_lblk_t from,
+                           ext4_lblk_t count)
+{
+        struct ext4_ext_path *orig_path = NULL;
+        struct ext4_ext_path *donor_path = NULL;
+        struct ext4_extent *oext, *dext;
+        struct ext4_extent tmp_dext, tmp_oext;
+        ext4_lblk_t orig_off = from, donor_off = from;
+        int err = 0;
+        int depth;
+        int replaced_count = 0;
+        int dext_alen;
+        mext_double_down_write(orig_inode, donor_inode);
+        /* Get the original extent for the block "orig_off" */
+        get_ext_path(orig_path, orig_inode, orig_off, err);
+        if (orig_path == NULL)
+                goto out;
+        /* Get the donor extent for the head */
+        get_ext_path(donor_path, donor_inode, donor_off, err);
+        if (donor_path == NULL)
+                goto out;
+        depth = ext_depth(orig_inode);
+        oext = orig_path[depth].p_ext;
+        tmp_oext = *oext;
+        depth = ext_depth(donor_inode);
+        dext = donor_path[depth].p_ext;
+        tmp_dext = *dext;
+        mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
+                                      donor_off, count);
+        /* Loop for the donor extents */
+        while (1) {
+                /* The extent for donor must be found. */
+                BUG_ON(!dext || donor_off != le32_to_cpu(tmp_dext.ee_block));
+                /* Set donor extent to orig extent */
+                err = mext_leaf_block(handle, orig_inode,
+                                           orig_path, &tmp_dext, &orig_off);
+                if (err < 0)
+                        goto out;
+                /* Set orig extent to donor extent */
+                err = mext_leaf_block(handle, donor_inode,
+                                           donor_path, &tmp_oext, &donor_off);
+                if (err < 0)
+                        goto out;
+                dext_alen = ext4_ext_get_actual_len(&tmp_dext);
+                replaced_count += dext_alen;
+                donor_off += dext_alen;
+                orig_off += dext_alen;
+                /* Already moved the expected blocks */
+                if (replaced_count >= count)
+                        break;
+                if (orig_path)
+                        ext4_ext_drop_refs(orig_path);
+                get_ext_path(orig_path, orig_inode, orig_off, err);
+                if (orig_path == NULL)
+                        goto out;
+                depth = ext_depth(orig_inode);
+                oext = orig_path[depth].p_ext;
+                if (le32_to_cpu(oext->ee_block) +
+                                ext4_ext_get_actual_len(oext) <= orig_off) {
+                        err = 0;
+                        goto out;
+                }
+                tmp_oext = *oext;
+                if (donor_path)
+                        ext4_ext_drop_refs(donor_path);
+                get_ext_path(donor_path, donor_inode,
+                                      donor_off, err);
+                if (donor_path == NULL)
+                        goto out;
+                depth = ext_depth(donor_inode);
+                dext = donor_path[depth].p_ext;
+                if (le32_to_cpu(dext->ee_block) +
+                                ext4_ext_get_actual_len(dext) <= donor_off) {
+                        err = 0;
+                        goto out;
+                }
+                tmp_dext = *dext;
+                mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
+                                              donor_off,
+                                              count - replaced_count);
+        }
+out:
+        if (orig_path) {
+                ext4_ext_drop_refs(orig_path);
+                kfree(orig_path);
+        }
+        if (donor_path) {
+                ext4_ext_drop_refs(donor_path);
+                kfree(donor_path);
+        }
+        mext_double_up_write(orig_inode, donor_inode);
+        return err;
+}
+/**
+ * move_extent_per_page - Move extent data per page
+ *
+ * @o_filp:                     file structure of original file
+ * @donor_inode:                donor inode
+ * @orig_page_offset:           page index on original file
+ * @data_offset_in_page:        block index where data swapping starts
+ * @block_len_in_page:          the number of blocks to be swapped
+ * @uninit:                     orig extent is uninitialized or not
+ *
+ * Save the data in original inode blocks and replace original inode extents
+ * with donor inode extents by calling mext_replace_branches().
+ * Finally, write out the saved data in new original inode blocks. Return 0
+ * on success, or a negative error value on failure.
+ */
+static int
+move_extent_par_page(struct file *o_filp, struct inode *donor_inode,
+                  pgoff_t orig_page_offset, int data_offset_in_page,
+                  int block_len_in_page, int uninit)
+{
+        struct inode *orig_inode = o_filp->f_dentry->d_inode;
+        struct address_space *mapping = orig_inode->i_mapping;
+        struct buffer_head *bh;
+        struct page *page = NULL;
+        const struct address_space_operations *a_ops = mapping->a_ops;
+        handle_t *handle;
+        ext4_lblk_t orig_blk_offset;
+        long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
+        unsigned long blocksize = orig_inode->i_sb->s_blocksize;
+        unsigned int w_flags = 0;
+        unsigned int tmp_data_len, data_len;
+        void *fsdata;
+        int ret, i, jblocks;
+        int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
+        /*
+         * It needs twice the amount of ordinary journal buffers because
+         * inode and donor_inode may change each different metadata blocks.
+         */
+        jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
+        handle = ext4_journal_start(orig_inode, jblocks);
+        if (IS_ERR(handle)) {
+                ret = PTR_ERR(handle);
+                return ret;
+        }
+        if (segment_eq(get_fs(), KERNEL_DS))
+                w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
+        orig_blk_offset = orig_page_offset * blocks_per_page +
+                data_offset_in_page;
+        /*
+         * If orig extent is uninitialized one,
+         * it's not necessary force the page into memory
+         * and then force it to be written out again.
+         * Just swap data blocks between orig and donor.
+         */
+        if (uninit) {
+                ret = mext_replace_branches(handle, orig_inode,
+                                                 donor_inode, orig_blk_offset,
+                                                 block_len_in_page);
+                /* Clear the inode cache not to refer to the old data */
+                ext4_ext_invalidate_cache(orig_inode);
+                ext4_ext_invalidate_cache(donor_inode);
+                goto out2;
+        }
+        offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
+        /* Calculate data_len */
+        if ((orig_blk_offset + block_len_in_page - 1) ==
+            ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
+                /* Replace the last block */
+                tmp_data_len = orig_inode->i_size & (blocksize - 1);
+                /*
+                 * If data_len equal zero, it shows data_len is multiples of
+                 * blocksize. So we set appropriate value.
+                 */
+                if (tmp_data_len == 0)
+                        tmp_data_len = blocksize;
+                data_len = tmp_data_len +
+                        ((block_len_in_page - 1) << orig_inode->i_blkbits);
+        } else {
+                data_len = block_len_in_page << orig_inode->i_blkbits;
+        }
+        ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags,
+                                 &page, &fsdata);
+        if (unlikely(ret < 0))
+                goto out;
+        if (!PageUptodate(page)) {
+                mapping->a_ops->readpage(o_filp, page);
+                lock_page(page);
+        }
+        /*
+         * try_to_release_page() doesn't call releasepage in writeback mode.
+         * We should care about the order of writing to the same file
+         * by multiple move extent processes.
+         * It needs to call wait_on_page_writeback() to wait for the
+         * writeback of the page.
+         */
+        if (PageWriteback(page))
+                wait_on_page_writeback(page);
+        /* Release old bh and drop refs */
+        try_to_release_page(page, 0);
+        ret = mext_replace_branches(handle, orig_inode, donor_inode,
+                                         orig_blk_offset, block_len_in_page);
+        if (ret < 0)
+                goto out;
+        /* Clear the inode cache not to refer to the old data */
+        ext4_ext_invalidate_cache(orig_inode);
+        ext4_ext_invalidate_cache(donor_inode);
+        if (!page_has_buffers(page))
+                create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
+        bh = page_buffers(page);
+        for (i = 0; i < data_offset_in_page; i++)
+                bh = bh->b_this_page;
+        for (i = 0; i < block_len_in_page; i++) {
+                ret = ext4_get_block(orig_inode,
+                                (sector_t)(orig_blk_offset + i), bh, 0);
+                if (ret < 0)
+                        goto out;
+                if (bh->b_this_page != NULL)
+                        bh = bh->b_this_page;
+        }
+        ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len,
+                               page, fsdata);
+        page = NULL;
+out:
+        if (unlikely(page)) {
+                if (PageLocked(page))
+                        unlock_page(page);
+                page_cache_release(page);
+        }
+out2:
+        ext4_journal_stop(handle);
+        return ret < 0 ? ret : 0;
+}
+/**
+ * mext_check_argumants - Check whether move extent can be done
+ *
+ * @orig_inode:         original inode
+ * @donor_inode:        donor inode
+ * @orig_start:         logical start offset in block for orig
+ * @donor_start:        logical start offset in block for donor
+ * @len:                the number of blocks to be moved
+ * @moved_len:          moved block length
+ *
+ * Check the arguments of ext4_move_extents() whether the files can be
+ * exchanged with each other.
+ * Return 0 on success, or a negative error value on failure.
+ */
+static int
+mext_check_arguments(struct inode *orig_inode,
+                          struct inode *donor_inode, __u64 orig_start,
+                          __u64 donor_start, __u64 *len, __u64 moved_len)
+{
+        /* Regular file check */
+        if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
+                ext4_debug("ext4 move extent: The argument files should be "
+                        "regular file [ino:orig %lu, donor %lu]\n",
+                        orig_inode->i_ino, donor_inode->i_ino);
+                return -EINVAL;
+        }
+        /* Ext4 move extent does not support swapfile */
+        if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
+                ext4_debug("ext4 move extent: The argument files should "
+                        "not be swapfile [ino:orig %lu, donor %lu]\n",
+                        orig_inode->i_ino, donor_inode->i_ino);
+                return -EINVAL;
+        }
+        /* Files should be in the same ext4 FS */
+        if (orig_inode->i_sb != donor_inode->i_sb) {
+                ext4_debug("ext4 move extent: The argument files "
+                        "should be in same FS [ino:orig %lu, donor %lu]\n",
+                        orig_inode->i_ino, donor_inode->i_ino);
+                return -EINVAL;
+        }
+        /* orig and donor should be different file */
+        if (orig_inode->i_ino == donor_inode->i_ino) {
+                ext4_debug("ext4 move extent: The argument files should not "
+                        "be same file [ino:orig %lu, donor %lu]\n",
+                        orig_inode->i_ino, donor_inode->i_ino);
+                return -EINVAL;
+        }
+        /* Ext4 move extent supports only extent based file */
+        if (!(EXT4_I(orig_inode)->i_flags & EXT4_EXTENTS_FL)) {
+                ext4_debug("ext4 move extent: orig file is not extents "
+                        "based file [ino:orig %lu]\n", orig_inode->i_ino);
+                return -EOPNOTSUPP;
+        } else if (!(EXT4_I(donor_inode)->i_flags & EXT4_EXTENTS_FL)) {
+                ext4_debug("ext4 move extent: donor file is not extents "
+                        "based file [ino:donor %lu]\n", donor_inode->i_ino);
+                return -EOPNOTSUPP;
+        }
+        if ((!orig_inode->i_size) || (!donor_inode->i_size)) {
+                ext4_debug("ext4 move extent: File size is 0 byte\n");
+                return -EINVAL;
+        }
+        /* Start offset should be same */
+        if (orig_start != donor_start) {
+                ext4_debug("ext4 move extent: orig and donor's start "
+                        "offset are not same [ino:orig %lu, donor %lu]\n",
+                        orig_inode->i_ino, donor_inode->i_ino);
+                return -EINVAL;
+        }
+        if (moved_len) {
+                ext4_debug("ext4 move extent: moved_len should be 0 "
+                        "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
+                        donor_inode->i_ino);
+                return -EINVAL;
+        }
+        if ((orig_start > MAX_DEFRAG_SIZE) ||
+            (donor_start > MAX_DEFRAG_SIZE) ||
+            (*len > MAX_DEFRAG_SIZE) ||
+            (orig_start + *len > MAX_DEFRAG_SIZE))  {
+                ext4_debug("ext4 move extent: Can't handle over [%lu] blocks "
+                        "[ino:orig %lu, donor %lu]\n", MAX_DEFRAG_SIZE,
+                        orig_inode->i_ino, donor_inode->i_ino);
+                return -EINVAL;
+        }
+        if (orig_inode->i_size > donor_inode->i_size) {
+                if (orig_start >= donor_inode->i_size) {
+                        ext4_debug("ext4 move extent: orig start offset "
+                        "[%llu] should be less than donor file size "
+                        "[%lld] [ino:orig %lu, donor_inode %lu]\n",
+                        orig_start, donor_inode->i_size,
+                        orig_inode->i_ino, donor_inode->i_ino);
+                        return -EINVAL;
+                }
+                if (orig_start + *len > donor_inode->i_size) {
+                        ext4_debug("ext4 move extent: End offset [%llu] should "
+                                "be less than donor file size [%lld]."
+                                "So adjust length from %llu to %lld "
+                                "[ino:orig %lu, donor %lu]\n",
+                                orig_start + *len, donor_inode->i_size,
+                                *len, donor_inode->i_size - orig_start,
+                                orig_inode->i_ino, donor_inode->i_ino);
+                        *len = donor_inode->i_size - orig_start;
+                }
+        } else {
+                if (orig_start >= orig_inode->i_size) {
+                        ext4_debug("ext4 move extent: start offset [%llu] "
+                                "should be less than original file size "
+                                "[%lld] [inode:orig %lu, donor %lu]\n",
+                                 orig_start, orig_inode->i_size,
+                                orig_inode->i_ino, donor_inode->i_ino);
+                        return -EINVAL;
+                }
+                if (orig_start + *len > orig_inode->i_size) {
+                        ext4_debug("ext4 move extent: Adjust length "
+                                "from %llu to %lld. Because it should be "
+                                "less than original file size "
+                                "[ino:orig %lu, donor %lu]\n",
+                                *len, orig_inode->i_size - orig_start,
+                                orig_inode->i_ino, donor_inode->i_ino);
+                        *len = orig_inode->i_size - orig_start;
+                }
+        }
+        if (!*len) {
+                ext4_debug("ext4 move extent: len shoudld not be 0 "
+                        "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
+                        donor_inode->i_ino);
+                return -EINVAL;
+        }
+        return 0;
+}
+/**
+ * mext_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
+ *
+ * @inode1:     the inode structure
+ * @inode2:     the inode structure
+ *
+ * Lock two inodes' i_mutex by i_ino order. This function is moved from
+ * fs/inode.c.
+ */
+static void
+mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
+{
+        if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
+                if (inode1)
+                        mutex_lock(&inode1->i_mutex);
+                else if (inode2)
+                        mutex_lock(&inode2->i_mutex);
+                return;
+        }
+        if (inode1->i_ino < inode2->i_ino) {
+                mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+                mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+        } else {
+                mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
+                mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
+        }
+}
+/**
+ * mext_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
+ *
+ * @inode1:     the inode that is released first
+ * @inode2:     the inode that is released second
+ *
+ * This function is moved from fs/inode.c.
+ */
+static void
+mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
+{
+        if (inode1)
+                mutex_unlock(&inode1->i_mutex);
+        if (inode2 && inode2 != inode1)
+                mutex_unlock(&inode2->i_mutex);
+}
+/**
+ * ext4_move_extents - Exchange the specified range of a file
+ *
+ * @o_filp:             file structure of the original file
+ * @d_filp:             file structure of the donor file
+ * @orig_start:         start offset in block for orig
+ * @donor_start:        start offset in block for donor
+ * @len:                the number of blocks to be moved
+ * @moved_len:          moved block length
+ *
+ * This function returns 0 and moved block length is set in moved_len
+ * if succeed, otherwise returns error value.
+ *
+ * Note: ext4_move_extents() proceeds the following order.
+ * 1:ext4_move_extents() calculates the last block number of moving extent
+ *   function by the start block number (orig_start) and the number of blocks
+ *   to be moved (len) specified as arguments.
+ *   If the {orig, donor}_start points a hole, the extent's start offset
+ *   pointed by ext_cur (current extent), holecheck_path, orig_path are set
+ *   after hole behind.
+ * 2:Continue step 3 to step 5, until the holecheck_path points to last_extent
+ *   or the ext_cur exceeds the block_end which is last logical block number.
+ * 3:To get the length of continues area, call mext_next_extent()
+ *   specified with the ext_cur (initial value is holecheck_path) re-cursive,
+ *   until find un-continuous extent, the start logical block number exceeds
+ *   the block_end or the extent points to the last extent.
+ * 4:Exchange the original inode data with donor inode data
+ *   from orig_page_offset to seq_end_page.
+ *   The start indexes of data are specified as arguments.
+ *   That of the original inode is orig_page_offset,
+ *   and the donor inode is also orig_page_offset
+ *   (To easily handle blocksize != pagesize case, the offset for the
+ *   donor inode is block unit).
+ * 5:Update holecheck_path and orig_path to points a next proceeding extent,
+ *   then returns to step 2.
+ * 6:Release holecheck_path, orig_path and set the len to moved_len
+ *   which shows the number of moved blocks.
+ *   The moved_len is useful for the command to calculate the file offset
+ *   for starting next move extent ioctl.
+ * 7:Return 0 on success, or a negative error value on failure.
+ */
+int
+ext4_move_extents(struct file *o_filp, struct file *d_filp,
+                 __u64 orig_start, __u64 donor_start, __u64 len,
+                 __u64 *moved_len)
+{
+        struct inode *orig_inode = o_filp->f_dentry->d_inode;
+        struct inode *donor_inode = d_filp->f_dentry->d_inode;
+        struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL;
+        struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;
+        ext4_lblk_t block_start = orig_start;
+        ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
+        ext4_lblk_t rest_blocks;
+        pgoff_t orig_page_offset = 0, seq_end_page;
+        int ret, depth, last_extent = 0;
+        int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
+        int data_offset_in_page;
+        int block_len_in_page;
+        int uninit;
+        /* protect orig and donor against a truncate */
+        mext_inode_double_lock(orig_inode, donor_inode);
+        mext_double_down_read(orig_inode, donor_inode);
+        /* Check the filesystem environment whether move_extent can be done */
+        ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
+                                        donor_start, &len, *moved_len);
+        mext_double_up_read(orig_inode, donor_inode);
+        if (ret)
+                goto out2;
+        file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
+        block_end = block_start + len - 1;
+        if (file_end < block_end)
+                len -= block_end - file_end;
+        get_ext_path(orig_path, orig_inode, block_start, ret);
+        if (orig_path == NULL)
+                goto out2;
+        /* Get path structure to check the hole */
+        get_ext_path(holecheck_path, orig_inode, block_start, ret);
+        if (holecheck_path == NULL)
+                goto out;
+        depth = ext_depth(orig_inode);
+        ext_cur = holecheck_path[depth].p_ext;
+        if (ext_cur == NULL) {
+                ret = -EINVAL;
+                goto out;
+        }
+        /*
+         * Get proper extent whose ee_block is beyond block_start
+         * if block_start was within the hole.
+         */
+        if (le32_to_cpu(ext_cur->ee_block) +
+                ext4_ext_get_actual_len(ext_cur) - 1 < block_start) {
+                last_extent = mext_next_extent(orig_inode,
+                                        holecheck_path, &ext_cur);
+                if (last_extent < 0) {
+                        ret = last_extent;
+                        goto out;
+                }
+                last_extent = mext_next_extent(orig_inode, orig_path,
+                                                        &ext_dummy);
+                if (last_extent < 0) {
+                        ret = last_extent;
+                        goto out;
+                }
+        }
+        seq_start = block_start;
+        /* No blocks within the specified range. */
+        if (le32_to_cpu(ext_cur->ee_block) > block_end) {
+                ext4_debug("ext4 move extent: The specified range of file "
+                                                        "may be the hole\n");
+                ret = -EINVAL;
+                goto out;
+        }
+        /* Adjust start blocks */
+        add_blocks = min(le32_to_cpu(ext_cur->ee_block) +
+                         ext4_ext_get_actual_len(ext_cur), block_end + 1) -
+                     max(le32_to_cpu(ext_cur->ee_block), block_start);
+        while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
+                seq_blocks += add_blocks;
+                /* Adjust tail blocks */
+                if (seq_start + seq_blocks - 1 > block_end)
+                        seq_blocks = block_end - seq_start + 1;
+                ext_prev = ext_cur;
+                last_extent = mext_next_extent(orig_inode, holecheck_path,
+                                                &ext_cur);
+                if (last_extent < 0) {
+                        ret = last_extent;
+                        break;
+                }
+                add_blocks = ext4_ext_get_actual_len(ext_cur);
+                /*
+                 * Extend the length of contiguous block (seq_blocks)
+                 * if extents are contiguous.
+                 */
+                if (ext4_can_extents_be_merged(orig_inode,
+                                               ext_prev, ext_cur) &&
+                    block_end >= le32_to_cpu(ext_cur->ee_block) &&
+                    !last_extent)
+                        continue;
+                /* Is original extent is uninitialized */
+                uninit = ext4_ext_is_uninitialized(ext_prev);
+                data_offset_in_page = seq_start % blocks_per_page;
+                /*
+                 * Calculate data blocks count that should be swapped
+                 * at the first page.
+                 */
+                if (data_offset_in_page + seq_blocks > blocks_per_page) {
+                        /* Swapped blocks are across pages */
+                        block_len_in_page =
+                                        blocks_per_page - data_offset_in_page;
+                } else {
+                        /* Swapped blocks are in a page */
+                        block_len_in_page = seq_blocks;
+                }
+                orig_page_offset = seq_start >>
+                                (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
+                seq_end_page = (seq_start + seq_blocks - 1) >>
+                                (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
+                seq_start = le32_to_cpu(ext_cur->ee_block);
+                rest_blocks = seq_blocks;
+                /* Discard preallocations of two inodes */
+                down_write(&EXT4_I(orig_inode)->i_data_sem);
+                ext4_discard_preallocations(orig_inode);
+                up_write(&EXT4_I(orig_inode)->i_data_sem);
+                down_write(&EXT4_I(donor_inode)->i_data_sem);
+                ext4_discard_preallocations(donor_inode);
+                up_write(&EXT4_I(donor_inode)->i_data_sem);
+                while (orig_page_offset <= seq_end_page) {
+                        /* Swap original branches with new branches */
+                        ret = move_extent_par_page(o_filp, donor_inode,
+                                                orig_page_offset,
+                                                data_offset_in_page,
+                                                block_len_in_page, uninit);
+                        if (ret < 0)
+                                goto out;
+                        orig_page_offset++;
+                        /* Count how many blocks we have exchanged */
+                        *moved_len += block_len_in_page;
+                        BUG_ON(*moved_len > len);
+                        data_offset_in_page = 0;
+                        rest_blocks -= block_len_in_page;
+                        if (rest_blocks > blocks_per_page)
+                                block_len_in_page = blocks_per_page;
+                        else
+                                block_len_in_page = rest_blocks;
+                }
+                /* Decrease buffer counter */
+                if (holecheck_path)
+                        ext4_ext_drop_refs(holecheck_path);
+                get_ext_path(holecheck_path, orig_inode,
+                                      seq_start, ret);
+                if (holecheck_path == NULL)
+                        break;
+                depth = holecheck_path->p_depth;
+                /* Decrease buffer counter */
+                if (orig_path)
+                        ext4_ext_drop_refs(orig_path);
+                get_ext_path(orig_path, orig_inode, seq_start, ret);
+                if (orig_path == NULL)
+                        break;
+                ext_cur = holecheck_path[depth].p_ext;
+                add_blocks = ext4_ext_get_actual_len(ext_cur);
+                seq_blocks = 0;
+        }
+out:
+        if (orig_path) {
+                ext4_ext_drop_refs(orig_path);
+                kfree(orig_path);
+        }
+        if (holecheck_path) {
+                ext4_ext_drop_refs(holecheck_path);
+                kfree(holecheck_path);
+        }
+out2:
+        mext_inode_double_unlock(orig_inode, donor_inode);
+        if (ret)
+                return ret;
+        /* All of the specified blocks must be exchanged in succeed */
+        BUG_ON(*moved_len != len);
+        return 0;
+}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 07eb6649e4fa..de04013d16ff 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1782,7 +1782,7 @@ retry:
        if (IS_DIRSYNC(dir))
                ext4_handle_sync(handle);
-        inode = ext4_new_inode (handle, dir, mode);
+        inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
        err = PTR_ERR(inode);
        if (!IS_ERR(inode)) {
                inode->i_op = &ext4_file_inode_operations;
@@ -1816,7 +1816,7 @@ retry:
        if (IS_DIRSYNC(dir))
                ext4_handle_sync(handle);
-        inode = ext4_new_inode(handle, dir, mode);
+        inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
        err = PTR_ERR(inode);
        if (!IS_ERR(inode)) {
                init_special_inode(inode, inode->i_mode, rdev);
@@ -1853,7 +1853,8 @@ retry:
        if (IS_DIRSYNC(dir))
                ext4_handle_sync(handle);
-        inode = ext4_new_inode(handle, dir, S_IFDIR | mode);
+        inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
+                               &dentry->d_name, 0);
        err = PTR_ERR(inode);
        if (IS_ERR(inode))
                goto out_stop;
@@ -2264,7 +2265,8 @@ retry:
        if (IS_DIRSYNC(dir))
                ext4_handle_sync(handle);
-        inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO);
+        inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
+                               &dentry->d_name, 0);
        err = PTR_ERR(inode);
        if (IS_ERR(inode))
                goto out_stop;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 27eb289eea37..68b0351fc647 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1002,7 +1002,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
                        " too large to resize to %llu blocks safely\n",
                        sb->s_id, n_blocks_count);
                if (sizeof(sector_t) < 8)
-                        ext4_warning(sb, __func__, "CONFIG_LBD not enabled");
+                        ext4_warning(sb, __func__, "CONFIG_LBDAF not enabled");
                return -EINVAL;
        }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 012c4251397e..8bb9e2d3e4b8 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -37,7 +37,6 @@
 #include <linux/seq_file.h>
 #include <linux/proc_fs.h>
 #include <linux/ctype.h>
-#include <linux/marker.h>
 #include <linux/log2.h>
 #include <linux/crc16.h>
 #include <asm/uaccess.h>
@@ -47,6 +46,9 @@
 #include "xattr.h"
 #include "acl.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/ext4.h>
 static int default_mb_history_length = 1000;
 module_param_named(default_mb_history_length, default_mb_history_length,
@@ -301,7 +303,7 @@ static void ext4_handle_error(struct super_block *sb)
        if (!test_opt(sb, ERRORS_CONT)) {
                journal_t *journal = EXT4_SB(sb)->s_journal;
-                EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
+                EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
                if (journal)
                        jbd2_journal_abort(journal, -EIO);
        }
@@ -414,7 +416,7 @@ void ext4_abort(struct super_block *sb, const char *function,
        ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
        EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
        sb->s_flags |= MS_RDONLY;
-        EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
+        EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
        if (EXT4_SB(sb)->s_journal)
                jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 }
@@ -1474,7 +1476,7 @@ set_qf_format:
                        break;
 #endif
                case Opt_abort:
-                        set_opt(sbi->s_mount_opt, ABORT);
+                        sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
                        break;
                case Opt_nobarrier:
                        clear_opt(sbi->s_mount_opt, BARRIER);
@@ -1653,7 +1655,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
        ext4_commit_super(sb, 1);
        if (test_opt(sb, DEBUG))
                printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
-                                "bpg=%lu, ipg=%lu, mo=%04lx]\n",
+                                "bpg=%lu, ipg=%lu, mo=%04x]\n",
                        sb->s_blocksize,
                        sbi->s_groups_count,
                        EXT4_BLOCKS_PER_GROUP(sb),
@@ -1957,7 +1959,7 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
        /* small i_blocks in vfs inode? */
        if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
                /*
-                 * CONFIG_LBD is not enabled implies the inode
+                 * CONFIG_LBDAF is not enabled implies the inode
                 * i_block represent total blocks in 512 bytes
                 * 32 == size of vfs inode i_blocks * 8
                 */
@@ -2000,7 +2002,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
        if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
                /*
-                 * !has_huge_files or CONFIG_LBD not enabled implies that
+                 * !has_huge_files or CONFIG_LBDAF not enabled implies that
                 * the inode i_block field represents total file blocks in
                 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
                 */
@@ -2204,6 +2206,7 @@ EXT4_RO_ATTR(session_write_kbytes);
 EXT4_RO_ATTR(lifetime_write_kbytes);
 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
                 inode_readahead_blks_store, s_inode_readahead_blks);
+EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
@@ -2216,6 +2219,7 @@ static struct attribute *ext4_attrs[] = {
        ATTR_LIST(session_write_kbytes),
        ATTR_LIST(lifetime_write_kbytes),
        ATTR_LIST(inode_readahead_blks),
+        ATTR_LIST(inode_goal),
        ATTR_LIST(mb_stats),
        ATTR_LIST(mb_max_to_scan),
        ATTR_LIST(mb_min_to_scan),
@@ -2436,13 +2440,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        if (has_huge_files) {
                /*
                 * Large file size enabled file system can only be
-                 * mount if kernel is build with CONFIG_LBD
+                 * mount if kernel is build with CONFIG_LBDAF
                 */
                if (sizeof(root->i_blocks) < sizeof(u64) &&
                                !(sb->s_flags & MS_RDONLY)) {
                        ext4_msg(sb, KERN_ERR, "Filesystem with huge "
                                        "files cannot be mounted read-write "
-                                        "without CONFIG_LBD");
+                                        "without CONFIG_LBDAF");
                        goto failed_mount;
                }
        }
@@ -2566,7 +2570,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                ext4_msg(sb, KERN_ERR, "filesystem"
                        " too large to mount safely");
                if (sizeof(sector_t) < 8)
-                        ext4_msg(sb, KERN_WARNING, "CONFIG_LBD not enabled");
+                        ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
                goto failed_mount;
        }
@@ -3346,7 +3350,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
        int ret = 0;
        tid_t target;
-        trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
+        trace_ext4_sync_fs(sb, wait);
        if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
                if (wait)
                        jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
@@ -3450,7 +3454,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
                goto restore_opts;
        }
-        if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
+        if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
                ext4_abort(sb, __func__, "Abort forced by user");
        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -3465,7 +3469,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
        if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
                n_blocks_count > ext4_blocks_count(es)) {
-                if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) {
+                if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
                        err = -EROFS;
                        goto restore_opts;
                }
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 304b411cb8bc..8970d8c49bb0 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -966,7 +966,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
        opts->fs_uid = current_uid();
        opts->fs_gid = current_gid();
-        opts->fs_fmask = current_umask();
+        opts->fs_fmask = opts->fs_dmask = current_umask();
        opts->allow_utime = -1;
        opts->codepage = fat_default_codepage;
        opts->iocharset = fat_default_iocharset;
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index cad957cdb1e5..5971359d2090 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -1,6 +1,6 @@
 config GFS2_FS
        tristate "GFS2 file system support"
-        depends on EXPERIMENTAL && (64BIT || LBD)
+        depends on EXPERIMENTAL && (64BIT || LBDAF)
        select DLM if GFS2_FS_LOCKING_DLM
        select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
        select SYSFS if GFS2_FS_LOCKING_DLM
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 2f0dc5a14633..8ba5441063be 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -195,9 +195,8 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
                 * Do not report hidden files if so instructed, or associated
                 * files unless instructed to do so
                 */
-                if ((sbi->s_hide == 'y' &&
+                if ((sbi->s_hide && (de->flags[-sbi->s_high_sierra] & 1)) ||
-                                (de->flags[-sbi->s_high_sierra] & 1)) ||
+                    (!sbi->s_showassoc &&
-                                (sbi->s_showassoc =='n' &&
                                (de->flags[-sbi->s_high_sierra] & 4))) {
                        filp->f_pos += de_len;
                        continue;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 068b34b5a107..58a7963e168a 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -141,13 +141,17 @@ static const struct dentry_operations isofs_dentry_ops[] = {
 };
 struct iso9660_options{
-        char map;
+        unsigned int rock:1;
-        char rock;
+        unsigned int cruft:1;
+        unsigned int hide:1;
+        unsigned int showassoc:1;
+        unsigned int nocompress:1;
+        unsigned int overriderockperm:1;
+        unsigned int uid_set:1;
+        unsigned int gid_set:1;
+        unsigned int utf8:1;
+        unsigned char map;
        char joliet;
-        char cruft;
-        char hide;
-        char showassoc;
-        char nocompress;
        unsigned char check;
        unsigned int blocksize;
        mode_t fmode;
@@ -155,7 +159,6 @@ struct iso9660_options{
        gid_t gid;
        uid_t uid;
        char *iocharset;
-        unsigned char utf8;
        /* LVE */
        s32 session;
        s32 sbsector;
@@ -312,7 +315,7 @@ enum {
        Opt_block, Opt_check_r, Opt_check_s, Opt_cruft, Opt_gid, Opt_ignore,
        Opt_iocharset, Opt_map_a, Opt_map_n, Opt_map_o, Opt_mode, Opt_nojoliet,
        Opt_norock, Opt_sb, Opt_session, Opt_uid, Opt_unhide, Opt_utf8, Opt_err,
-        Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode,
+        Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode, Opt_overriderockperm,
 };
 static const match_table_t tokens = {
@@ -340,6 +343,7 @@ static const match_table_t tokens = {
        {Opt_gid, "gid=%u"},
        {Opt_mode, "mode=%u"},
        {Opt_dmode, "dmode=%u"},
+        {Opt_overriderockperm, "overriderockperm"},
        {Opt_block, "block=%u"},
        {Opt_ignore, "conv=binary"},
        {Opt_ignore, "conv=b"},
@@ -359,24 +363,22 @@ static int parse_options(char *options, struct iso9660_options *popt)
        int option;
        popt->map = 'n';
-        popt->rock = 'y';
+        popt->rock = 1;
-        popt->joliet = 'y';
+        popt->joliet = 1;
-        popt->cruft = 'n';
+        popt->cruft = 0;
-        popt->hide = 'n';
+        popt->hide = 0;
-        popt->showassoc = 'n';
+        popt->showassoc = 0;
        popt->check = 'u';              /* unset */
        popt->nocompress = 0;
        popt->blocksize = 1024;
-        popt->fmode = popt->dmode = S_IRUGO | S_IXUGO; /*
+        popt->fmode = popt->dmode = ISOFS_INVALID_MODE;
-                                         * r-x for all.  The disc could
+        popt->uid_set = 0;
-                                         * be shared with DOS machines so
+        popt->gid_set = 0;
-                                         * virtually anything could be
-                                         * a valid executable.
-                                         */
        popt->gid = 0;
        popt->uid = 0;
        popt->iocharset = NULL;
        popt->utf8 = 0;
+        popt->overriderockperm = 0;
        popt->session=-1;
        popt->sbsector=-1;
        if (!options)
@@ -393,20 +395,20 @@ static int parse_options(char *options, struct iso9660_options *popt)
                token = match_token(p, tokens, args);
                switch (token) {
                case Opt_norock:
-                        popt->rock = 'n';
+                        popt->rock = 0;
                        break;
                case Opt_nojoliet:
-                        popt->joliet = 'n';
+                        popt->joliet = 0;
                        break;
                case Opt_hide:
-                        popt->hide = 'y';
+                        popt->hide = 1;
                        break;
                case Opt_unhide:
                case Opt_showassoc:
-                        popt->showassoc = 'y';
+                        popt->showassoc = 1;
                        break;
                case Opt_cruft:
-                        popt->cruft = 'y';
+                        popt->cruft = 1;
                        break;
                case Opt_utf8:
                        popt->utf8 = 1;
@@ -450,11 +452,13 @@ static int parse_options(char *options, struct iso9660_options *popt)
                        if (match_int(&args[0], &option))
                                return 0;
                        popt->uid = option;
+                        popt->uid_set = 1;
                        break;
                case Opt_gid:
                        if (match_int(&args[0], &option))
                                return 0;
                        popt->gid = option;
+                        popt->gid_set = 1;
                        break;
                case Opt_mode:
                        if (match_int(&args[0], &option))
@@ -466,6 +470,9 @@ static int parse_options(char *options, struct iso9660_options *popt)
                                return 0;
                        popt->dmode = option;
                        break;
+                case Opt_overriderockperm:
+                        popt->overriderockperm = 1;
+                        break;
                case Opt_block:
                        if (match_int(&args[0], &option))
                                return 0;
@@ -650,7 +657,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
                                        goto out_freebh;
                                sbi->s_high_sierra = 1;
-                                opt.rock = 'n';
+                                opt.rock = 0;
                                h_pri = (struct hs_primary_descriptor *)vdp;
                                goto root_found;
                        }
@@ -673,7 +680,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
 root_found:
-        if (joliet_level && (pri == NULL || opt.rock == 'n')) {
+        if (joliet_level && (pri == NULL || !opt.rock)) {
                /* This is the case of Joliet with the norock mount flag.
                 * A disc with both Joliet and Rock Ridge is handled later
                 */
@@ -802,22 +809,31 @@ root_found:
        s->s_op = &isofs_sops;
        s->s_export_op = &isofs_export_ops;
        sbi->s_mapping = opt.map;
-        sbi->s_rock = (opt.rock == 'y' ? 2 : 0);
+        sbi->s_rock = (opt.rock ? 2 : 0);
        sbi->s_rock_offset = -1; /* initial offset, will guess until SP is found*/
        sbi->s_cruft = opt.cruft;
        sbi->s_hide = opt.hide;
        sbi->s_showassoc = opt.showassoc;
        sbi->s_uid = opt.uid;
        sbi->s_gid = opt.gid;
+        sbi->s_uid_set = opt.uid_set;
+        sbi->s_gid_set = opt.gid_set;
        sbi->s_utf8 = opt.utf8;
        sbi->s_nocompress = opt.nocompress;
+        sbi->s_overriderockperm = opt.overriderockperm;
        /*
         * It would be incredibly stupid to allow people to mark every file
         * on the disk as suid, so we merely allow them to set the default
         * permissions.
         */
-        sbi->s_fmode = opt.fmode & 0777;
+        if (opt.fmode != ISOFS_INVALID_MODE)
-        sbi->s_dmode = opt.dmode & 0777;
+                sbi->s_fmode = opt.fmode & 0777;
+        else
+                sbi->s_fmode = ISOFS_INVALID_MODE;
+        if (opt.dmode != ISOFS_INVALID_MODE)
+                sbi->s_dmode = opt.dmode & 0777;
+        else
+                sbi->s_dmode = ISOFS_INVALID_MODE;
        /*
         * Read the root inode, which _may_ result in changing
@@ -1095,18 +1111,6 @@ static const struct address_space_operations isofs_aops = {
        .bmap = _isofs_bmap
 };
-static inline void test_and_set_uid(uid_t *p, uid_t value)
-{
-        if (value)
-                *p = value;
-}
-static inline void test_and_set_gid(gid_t *p, gid_t value)
-{
-        if (value)
-                *p = value;
-}
 static int isofs_read_level3_size(struct inode *inode)
 {
        unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
@@ -1261,7 +1265,10 @@ static int isofs_read_inode(struct inode *inode)
        ei->i_file_format = isofs_file_normal;
        if (de->flags[-high_sierra] & 2) {
-                inode->i_mode = sbi->s_dmode | S_IFDIR;
+                if (sbi->s_dmode != ISOFS_INVALID_MODE)
+                        inode->i_mode = S_IFDIR | sbi->s_dmode;
+                else
+                        inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
                inode->i_nlink = 1;     /*
                                         * Set to 1.  We know there are 2, but
                                         * the find utility tries to optimize
@@ -1270,8 +1277,16 @@ static int isofs_read_inode(struct inode *inode)
                                         * do it the hard way.
                                         */
        } else {
-                /* Everybody gets to read the file. */
+                if (sbi->s_fmode != ISOFS_INVALID_MODE) {
-                inode->i_mode = sbi->s_fmode | S_IFREG;
+                        inode->i_mode = S_IFREG | sbi->s_fmode;
+                } else {
+                        /*
+                         * Set default permissions: r-x for all.  The disc
+                         * could be shared with DOS machines so virtually
+                         * anything could be a valid executable.
+                         */
+                        inode->i_mode = S_IFREG | S_IRUGO | S_IXUGO;
+                }
                inode->i_nlink = 1;
        }
        inode->i_uid = sbi->s_uid;
@@ -1300,7 +1315,7 @@ static int isofs_read_inode(struct inode *inode)
         * this CDROM was mounted with the cruft option.
         */
-        if (sbi->s_cruft == 'y')
+        if (sbi->s_cruft)
                inode->i_size &= 0x00ffffff;
        if (de->interleave[0]) {
@@ -1346,9 +1361,18 @@ static int isofs_read_inode(struct inode *inode)
        if (!high_sierra) {
                parse_rock_ridge_inode(de, inode);
                /* if we want uid/gid set, override the rock ridge setting */
-                test_and_set_uid(&inode->i_uid, sbi->s_uid);
+                if (sbi->s_uid_set)
-                test_and_set_gid(&inode->i_gid, sbi->s_gid);
+                        inode->i_uid = sbi->s_uid;
+                if (sbi->s_gid_set)
+                        inode->i_gid = sbi->s_gid;
        }
+        /* Now set final access rights if overriding rock ridge setting */
+        if (S_ISDIR(inode->i_mode) && sbi->s_overriderockperm &&
+            sbi->s_dmode != ISOFS_INVALID_MODE)
+                inode->i_mode = S_IFDIR | sbi->s_dmode;
+        if (S_ISREG(inode->i_mode) && sbi->s_overriderockperm &&
+            sbi->s_fmode != ISOFS_INVALID_MODE)
+                inode->i_mode = S_IFREG | sbi->s_fmode;
        /* Install the inode operations vector */
        if (S_ISREG(inode->i_mode)) {
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index ccbf72faf27a..7d33de84f52a 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -35,21 +35,20 @@ struct isofs_sb_info {
        unsigned long s_log_zone_size;
        unsigned long s_max_size;
        
-        unsigned char s_high_sierra; /* A simple flag */
-        unsigned char s_mapping;
        int           s_rock_offset; /* offset of SUSP fields within SU area */
-        unsigned char s_rock;
        unsigned char s_joliet_level;
-        unsigned char s_utf8;
+        unsigned char s_mapping;
-        unsigned char s_cruft; /* Broken disks with high
+        unsigned int  s_high_sierra:1;
-                                  byte of length containing
+        unsigned int  s_rock:2;
-                                  junk */
+        unsigned int  s_utf8:1;
-        unsigned char s_unhide;
+        unsigned int  s_cruft:1; /* Broken disks with high byte of length
-        unsigned char s_nosuid;
+                                  * containing junk */
-        unsigned char s_nodev;
+        unsigned int  s_nocompress:1;
-        unsigned char s_nocompress;
+        unsigned int  s_hide:1;
-        unsigned char s_hide;
+        unsigned int  s_showassoc:1;
-        unsigned char s_showassoc;
+        unsigned int  s_overriderockperm:1;
+        unsigned int  s_uid_set:1;
+        unsigned int  s_gid_set:1;
        mode_t s_fmode;
        mode_t s_dmode;
@@ -58,6 +57,8 @@ struct isofs_sb_info {
        struct nls_table *s_nls_iocharset; /* Native language support table */
 };
+#define ISOFS_INVALID_MODE ((mode_t) -1)
 static inline struct isofs_sb_info *ISOFS_SB(struct super_block *sb)
 {
        return sb->s_fs_info;
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 8299889a835e..eaa831311c9c 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -142,9 +142,9 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
                 */
                match = 0;
                if (dlen > 0 &&
-                        (sbi->s_hide =='n' ||
+                        (!sbi->s_hide ||
                                (!(de->flags[-sbi->s_high_sierra] & 1))) &&
-                        (sbi->s_showassoc =='y' ||
+                        (sbi->s_showassoc ||
                                (!(de->flags[-sbi->s_high_sierra] & 4)))) {
                        match = (isofs_cmp(dentry, dpnt, dlen) == 0);
                }
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index ed886e6db399..73242ba7c7b1 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1686,35 +1686,6 @@ out:
        return;
 }
-/*
- * journal_try_to_free_buffers() could race with journal_commit_transaction()
- * The latter might still hold the a count on buffers when inspecting
- * them on t_syncdata_list or t_locked_list.
- *
- * journal_try_to_free_buffers() will call this function to
- * wait for the current transaction to finish syncing data buffers, before
- * tryinf to free that buffer.
- *
- * Called with journal->j_state_lock held.
- */
-static void journal_wait_for_transaction_sync_data(journal_t *journal)
-{
-        transaction_t *transaction = NULL;
-        tid_t tid;
-        spin_lock(&journal->j_state_lock);
-        transaction = journal->j_committing_transaction;
-        if (!transaction) {
-                spin_unlock(&journal->j_state_lock);
-                return;
-        }
-        tid = transaction->t_tid;
-        spin_unlock(&journal->j_state_lock);
-        log_wait_commit(journal, tid);
-}
 /**
 * int journal_try_to_free_buffers() - try to free page buffers.
 * @journal: journal for operation
@@ -1786,25 +1757,6 @@ int journal_try_to_free_buffers(journal_t *journal,
        ret = try_to_free_buffers(page);
-        /*
-         * There are a number of places where journal_try_to_free_buffers()
-         * could race with journal_commit_transaction(), the later still
-         * holds the reference to the buffers to free while processing them.
-         * try_to_free_buffers() failed to free those buffers. Some of the
-         * caller of releasepage() request page buffers to be dropped, otherwise
-         * treat the fail-to-free as errors (such as generic_file_direct_IO())
-         *
-         * So, if the caller of try_to_release_page() wants the synchronous
-         * behaviour(i.e make sure buffers are dropped upon return),
-         * let's wait for the current transaction to finish flush of
-         * dirty data buffers, then try to free those buffers again,
-         * with the journal locked.
-         */
-        if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
-                journal_wait_for_transaction_sync_data(journal);
-                ret = try_to_free_buffers(page);
-        }
 busy:
        return ret;
 }
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 17159cacbd9e..5d70b3e6d49b 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -20,9 +20,9 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/jbd2.h>
-#include <linux/marker.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include <trace/events/jbd2.h>
 /*
 * Unlink a buffer from a transaction checkpoint list.
@@ -358,8 +358,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
         * journal straight away.
         */
        result = jbd2_cleanup_journal_tail(journal);
-        trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d",
+        trace_jbd2_checkpoint(journal, result);
-                   journal->j_devname, result);
        jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
        if (result <= 0)
                return result;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 0b7d3b8226fd..7b4088b2364d 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -16,7 +16,6 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/jbd2.h>
-#include <linux/marker.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
@@ -26,6 +25,7 @@
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
 #include <linux/bio.h>
+#include <trace/events/jbd2.h>
 /*
 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -253,6 +253,7 @@ static int journal_submit_data_buffers(journal_t *journal,
                 * block allocation  with delalloc. We need to write
                 * only allocated blocks here.
                 */
+                trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
                err = journal_submit_inode_data_buffers(mapping);
                if (!ret)
                        ret = err;
@@ -394,8 +395,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        commit_transaction = journal->j_running_transaction;
        J_ASSERT(commit_transaction->t_state == T_RUNNING);
-        trace_mark(jbd2_start_commit, "dev %s transaction %d",
+        trace_jbd2_start_commit(journal, commit_transaction);
-                   journal->j_devname, commit_transaction->t_tid);
        jbd_debug(1, "JBD: starting commit of transaction %d\n",
                        commit_transaction->t_tid);
@@ -409,6 +409,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
         */
        if (commit_transaction->t_synchronous_commit)
                write_op = WRITE_SYNC_PLUG;
+        trace_jbd2_commit_locking(journal, commit_transaction);
        stats.u.run.rs_wait = commit_transaction->t_max_wait;
        stats.u.run.rs_locked = jiffies;
        stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
@@ -484,6 +485,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
         */
        jbd2_journal_switch_revoke_table(journal);
+        trace_jbd2_commit_flushing(journal, commit_transaction);
        stats.u.run.rs_flushing = jiffies;
        stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked,
                                               stats.u.run.rs_flushing);
@@ -520,6 +522,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        commit_transaction->t_state = T_COMMIT;
        spin_unlock(&journal->j_state_lock);
+        trace_jbd2_commit_logging(journal, commit_transaction);
        stats.u.run.rs_logging = jiffies;
        stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing,
                                                 stats.u.run.rs_logging);
@@ -1054,9 +1057,7 @@ restart_loop:
        if (journal->j_commit_callback)
                journal->j_commit_callback(journal, commit_transaction);
-        trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
+        trace_jbd2_end_commit(journal, commit_transaction);
-                   journal->j_devname, commit_transaction->t_tid,
-                   journal->j_tail_sequence);
        jbd_debug(1, "JBD: commit %d complete, head %d\n",
                  journal->j_commit_sequence, journal->j_tail_sequence);
        if (to_free)
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 62be7d294ec2..18bfd5dab642 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -38,6 +38,10 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/math64.h>
+#include <linux/hash.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/jbd2.h>
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -2377,6 +2381,71 @@ static void __exit journal_exit(void)
        jbd2_journal_destroy_caches();
 }
+/* 
+ * jbd2_dev_to_name is a utility function used by the jbd2 and ext4 
+ * tracing infrastructure to map a dev_t to a device name.
+ *
+ * The caller should use rcu_read_lock() in order to make sure the
+ * device name stays valid until its done with it.  We use
+ * rcu_read_lock() as well to make sure we're safe in case the caller
+ * gets sloppy, and because rcu_read_lock() is cheap and can be safely
+ * nested.
+ */
+struct devname_cache {
+        struct rcu_head rcu;
+        dev_t           device;
+        char            devname[BDEVNAME_SIZE];
+};
+#define CACHE_SIZE_BITS 6
+static struct devname_cache *devcache[1 << CACHE_SIZE_BITS];
+static DEFINE_SPINLOCK(devname_cache_lock);
+static void free_devcache(struct rcu_head *rcu)
+{
+        kfree(rcu);
+}
+const char *jbd2_dev_to_name(dev_t device)
+{
+        int     i = hash_32(device, CACHE_SIZE_BITS);
+        char    *ret;
+        struct block_device *bd;
+        rcu_read_lock();
+        if (devcache[i] && devcache[i]->device == device) {
+                ret = devcache[i]->devname;
+                rcu_read_unlock();
+                return ret;
+        }
+        rcu_read_unlock();
+        spin_lock(&devname_cache_lock);
+        if (devcache[i]) {
+                if (devcache[i]->device == device) {
+                        ret = devcache[i]->devname;
+                        spin_unlock(&devname_cache_lock);
+                        return ret;
+                }
+                call_rcu(&devcache[i]->rcu, free_devcache);
+        }
+        devcache[i] = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
+        if (!devcache[i]) {
+                spin_unlock(&devname_cache_lock);
+                return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
+        }
+        devcache[i]->device = device;
+        bd = bdget(device);
+        if (bd) {
+                bdevname(bd, devcache[i]->devname);
+                bdput(bd);
+        } else
+                __bdevname(device, devcache[i]->devname);
+        ret = devcache[i]->devname;
+        spin_unlock(&devname_cache_lock);
+        return ret;
+}
+EXPORT_SYMBOL(jbd2_dev_to_name);
 MODULE_LICENSE("GPL");
 module_init(journal_init);
 module_exit(journal_exit);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 996ffda06bf3..494501edba6b 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1547,36 +1547,6 @@ out:
        return;
 }
-/*
- * jbd2_journal_try_to_free_buffers() could race with
- * jbd2_journal_commit_transaction(). The later might still hold the
- * reference count to the buffers when inspecting them on
- * t_syncdata_list or t_locked_list.
- *
- * jbd2_journal_try_to_free_buffers() will call this function to
- * wait for the current transaction to finish syncing data buffers, before
- * try to free that buffer.
- *
- * Called with journal->j_state_lock hold.
- */
-static void jbd2_journal_wait_for_transaction_sync_data(journal_t *journal)
-{
-        transaction_t *transaction;
-        tid_t tid;
-        spin_lock(&journal->j_state_lock);
-        transaction = journal->j_committing_transaction;
-        if (!transaction) {
-                spin_unlock(&journal->j_state_lock);
-                return;
-        }
-        tid = transaction->t_tid;
-        spin_unlock(&journal->j_state_lock);
-        jbd2_log_wait_commit(journal, tid);
-}
 /**
 * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
 * @journal: journal for operation
@@ -1649,25 +1619,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
        ret = try_to_free_buffers(page);
-        /*
-         * There are a number of places where jbd2_journal_try_to_free_buffers()
-         * could race with jbd2_journal_commit_transaction(), the later still
-         * holds the reference to the buffers to free while processing them.
-         * try_to_free_buffers() failed to free those buffers. Some of the
-         * caller of releasepage() request page buffers to be dropped, otherwise
-         * treat the fail-to-free as errors (such as generic_file_direct_IO())
-         *
-         * So, if the caller of try_to_release_page() wants the synchronous
-         * behaviour(i.e make sure buffers are dropped upon return),
-         * let's wait for the current transaction to finish flush of
-         * dirty data buffers, then try to free those buffers again,
-         * with the journal locked.
-         */
-        if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
-                jbd2_journal_wait_for_transaction_sync_data(journal);
-                ret = try_to_free_buffers(page);
-        }
 busy:
        return ret;
 }
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index ea2605a58b8a..f234f3a4c8ca 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -15,7 +15,8 @@ struct inotify_inode_mark_entry {
        int wd;
 };
-extern void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group);
+extern void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
+                                           struct fsnotify_group *group);
 extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);
 extern const struct fsnotify_ops inotify_fsnotify_ops;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 7ef75b83247e..47cd258fd24d 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -81,7 +81,7 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
 static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group)
 {
-        inotify_destroy_mark_entry(entry, group);
+        inotify_ignored_and_remove_idr(entry, group);
 }
 static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask)
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 982a412ac5bc..ff231ad23895 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -363,39 +363,17 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
 }
 /*
- * When, for whatever reason, inotify is done with a mark (or what used to be a
+ * Send IN_IGNORED for this wd, remove this wd from the idr, and drop the
- * watch) we need to remove that watch from the idr and we need to send IN_IGNORED
+ * internal reference help on the mark because it is in the idr.
- * for the given wd.
- *
- * There is a bit of recursion here.  The loop looks like:
- *      inotify_destroy_mark_entry -> fsnotify_destroy_mark_by_entry ->
- *      inotify_freeing_mark -> inotify_destory_mark_entry -> restart
- * But the loop is broken in 2 places.  fsnotify_destroy_mark_by_entry sets
- * entry->group = NULL before the call to inotify_freeing_mark, so the if (egroup)
- * test below will not call back to fsnotify again.  But even if that test wasn't
- * there this would still be safe since fsnotify_destroy_mark_by_entry() is
- * safe from recursion.
 */
-void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group)
+void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
+                                    struct fsnotify_group *group)
 {
        struct inotify_inode_mark_entry *ientry;
        struct inotify_event_private_data *event_priv;
        struct fsnotify_event_private_data *fsn_event_priv;
-        struct fsnotify_group *egroup;
        struct idr *idr;
-        spin_lock(&entry->lock);
-        egroup = entry->group;
-        /* if egroup we aren't really done and something might still send events
-         * for this inode, on the callback we'll send the IN_IGNORED */
-        if (egroup) {
-                spin_unlock(&entry->lock);
-                fsnotify_destroy_mark_by_entry(entry);
-                return;
-        }
-        spin_unlock(&entry->lock);
        ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
        event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
@@ -699,7 +677,7 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
        fsnotify_get_mark(entry);
        spin_unlock(&group->inotify_data.idr_lock);
-        inotify_destroy_mark_entry(entry, group);
+        fsnotify_destroy_mark_by_entry(entry);
        fsnotify_put_mark(entry);
 out:
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index d33767f17ba3..0d3ed7407a04 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -552,7 +552,7 @@ static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
         */
 #if BITS_PER_LONG == 32
-# if defined(CONFIG_LBD)
+# if defined(CONFIG_LBDAF)
        BUILD_BUG_ON(sizeof(sector_t) != 8);
        /*
         * We might be limited by page cache size.
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 63d965193b22..11a7b5c68153 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -18,6 +18,7 @@ proc-y	+= meminfo.o
 proc-y  += stat.o
 proc-y  += uptime.o
 proc-y  += version.o
+proc-y  += softirqs.o
 proc-$(CONFIG_PROC_SYSCTL)      += proc_sysctl.o
 proc-$(CONFIG_NET)              += proc_net.o
 proc-$(CONFIG_PROC_KCORE)       += kcore.o
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index fc6c3025befd..7ba79a54948c 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -195,20 +195,20 @@ void proc_device_tree_add_node(struct device_node *np,
                        p = fixup_name(np, de, p);
                ent = proc_mkdir(p, de);
-                if (ent == 0)
+                if (ent == NULL)
                        break;
                proc_device_tree_add_node(child, ent);
        }
        of_node_put(child);
-        for (pp = np->properties; pp != 0; pp = pp->next) {
+        for (pp = np->properties; pp != NULL; pp = pp->next) {
                p = pp->name;
                if (duplicate_name(de, p))
                        p = fixup_name(np, de, p);
                ent = __proc_device_tree_add_prop(de, pp, p);
-                if (ent == 0)
+                if (ent == NULL)
                        break;
        }
 }
@@ -221,10 +221,10 @@ void __init proc_device_tree_init(void)
        struct device_node *root;
        proc_device_tree = proc_mkdir("device-tree", NULL);
-        if (proc_device_tree == 0)
+        if (proc_device_tree == NULL)
                return;
        root = of_find_node_by_path("/");
-        if (root == 0) {
+        if (root == NULL) {
                printk(KERN_ERR "/proc/device-tree: can't find root\n");
                return;
        }
diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c
new file mode 100644
index 000000000000..1807c2419f17
--- /dev/null
+++ b/fs/proc/softirqs.c
@@ -0,0 +1,44 @@
+#include <linux/init.h>
+#include <linux/kernel_stat.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+/*
+ * /proc/softirqs  ... display the number of softirqs
+ */
+static int show_softirqs(struct seq_file *p, void *v)
+{
+        int i, j;
+        seq_printf(p, "                ");
+        for_each_possible_cpu(i)
+                seq_printf(p, "CPU%-8d", i);
+        seq_printf(p, "\n");
+        for (i = 0; i < NR_SOFTIRQS; i++) {
+                seq_printf(p, "%8s:", softirq_to_name[i]);
+                for_each_possible_cpu(j)
+                        seq_printf(p, " %10u", kstat_softirqs_cpu(i, j));
+                seq_printf(p, "\n");
+        }
+        return 0;
+}
+static int softirqs_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, show_softirqs, NULL);
+}
+static const struct file_operations proc_softirqs_operations = {
+        .open           = softirqs_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+static int __init proc_softirqs_init(void)
+{
+        proc_create("softirqs", 0, NULL, &proc_softirqs_operations);
+        return 0;
+}
+module_init(proc_softirqs_init);
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 81e4eb60972e..7cc726c6d70a 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -29,6 +29,8 @@ static int show_stat(struct seq_file *p, void *v)
        cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
        cputime64_t guest;
        u64 sum = 0;
+        u64 sum_softirq = 0;
+        unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
        struct timespec boottime;
        unsigned int per_irq_sum;
@@ -53,6 +55,13 @@ static int show_stat(struct seq_file *p, void *v)
                        sum += kstat_irqs_cpu(j, i);
                }
                sum += arch_irq_stat_cpu(i);
+                for (j = 0; j < NR_SOFTIRQS; j++) {
+                        unsigned int softirq_stat = kstat_softirqs_cpu(j, i);
+                        per_softirq_sums[j] += softirq_stat;
+                        sum_softirq += softirq_stat;
+                }
        }
        sum += arch_irq_stat();
@@ -115,6 +124,12 @@ static int show_stat(struct seq_file *p, void *v)
                nr_running(),
                nr_iowait());
+        seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq);
+        for (i = 0; i < NR_SOFTIRQS; i++)
+                seq_printf(p, " %u", per_softirq_sums[i]);
+        seq_printf(p, "\n");
        return 0;
 }
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 5edcc3f92ba7..0872afa58d39 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -166,12 +166,7 @@ static const struct file_operations proc_vmcore_operations = {
 static struct vmcore* __init get_new_element(void)
 {
-        struct vmcore *p;
+        return kzalloc(sizeof(struct vmcore), GFP_KERNEL);
-        p = kmalloc(sizeof(*p), GFP_KERNEL);
-        if (p)
-                memset(p, 0, sizeof(*p));
-        return p;
 }
 static u64 __init get_vmcore_size_elf64(char *elfptr)
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 4beb964a2a3e..128d3f7c8aa5 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -1270,9 +1270,8 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item h
                                        RFALSE(ih, "PAP-12210: ih must be 0");
-                                        if (is_direntry_le_ih
+                                        aux_ih = B_N_PITEM_HEAD(tbS0, item_pos);
-                                            (aux_ih =
+                                        if (is_direntry_le_ih(aux_ih)) {
-                                             B_N_PITEM_HEAD(tbS0, item_pos))) {
                                                /* we append to directory item */
                                                int entry_count;
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 381750a155f6..03d85cbf90bf 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -390,7 +390,8 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
        if (last_first == FIRST_TO_LAST) {
                /* if ( if item in position item_num in buffer SOURCE is directory item ) */
-                if (is_direntry_le_ih(ih = B_N_PITEM_HEAD(src, item_num)))
+                ih = B_N_PITEM_HEAD(src, item_num);
+                if (is_direntry_le_ih(ih))
                        leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST,
                                              item_num, 0, cpy_bytes);
                else {
@@ -418,7 +419,8 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
                }
        } else {
                /*  if ( if item in position item_num in buffer SOURCE is directory item ) */
-                if (is_direntry_le_ih(ih = B_N_PITEM_HEAD(src, item_num)))
+                ih = B_N_PITEM_HEAD(src, item_num);
+                if (is_direntry_le_ih(ih))
                        leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST,
                                              item_num,
                                              I_ENTRY_COUNT(ih) - cpy_bytes,
@@ -774,8 +776,8 @@ void leaf_delete_items(struct buffer_info *cur_bi, int last_first,
                        leaf_delete_items_entirely(cur_bi, first + 1,
                                                   del_num - 1);
-                        if (is_direntry_le_ih
+                        ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1);
-                            (ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1)))
+                        if (is_direntry_le_ih(ih))
                                /* the last item is directory  */
                                /* len = numbers of directory entries in this item */
                                len = ih_entry_count(ih);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 7f40f30c55c5..6c959275f2d0 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -640,6 +640,26 @@ int seq_puts(struct seq_file *m, const char *s)
 }
 EXPORT_SYMBOL(seq_puts);
+/**
+ * seq_write - write arbitrary data to buffer
+ * @seq: seq_file identifying the buffer to which data should be written
+ * @data: data address
+ * @len: number of bytes
+ *
+ * Return 0 on success, non-zero otherwise.
+ */
+int seq_write(struct seq_file *seq, const void *data, size_t len)
+{
+        if (seq->count + len < seq->size) {
+                memcpy(seq->buf + seq->count, data, len);
+                seq->count += len;
+                return 0;
+        }
+        seq->count = seq->size;
+        return -1;
+}
+EXPORT_SYMBOL(seq_write);
 struct list_head *seq_list_start(struct list_head *head, loff_t pos)
 {
        struct list_head *lh;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 3d2512c21f05..7cf33379fd46 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -56,9 +56,7 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off
        UFSD("ptrs=uspi->s_apb = %d,double_blocks=%ld \n",ptrs,double_blocks);
-        if (i_block < 0) {
+        if (i_block < direct_blocks) {
-                ufs_warning(inode->i_sb, "ufs_block_to_path", "block < 0");
-        } else if (i_block < direct_blocks) {
                offsets[n++] = i_block;
        } else if ((i_block -= direct_blocks) < indirect_blocks) {
                offsets[n++] = UFS_IND_BLOCK;
@@ -440,8 +438,6 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
        lock_kernel();
        UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment);
-        if (fragment < 0)
-                goto abort_negative;
        if (fragment >
            ((UFS_NDADDR + uspi->s_apb + uspi->s_2apb + uspi->s_3apb)
             << uspi->s_fpbshift))
@@ -504,10 +500,6 @@ abort:
        unlock_kernel();
        return err;
-abort_negative:
-        ufs_warning(sb, "ufs_get_block", "block < 0");
-        goto abort;
 abort_too_big:
        ufs_warning(sb, "ufs_get_block", "block > big");
        goto abort;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index f65a53f8752f..6127e24062d0 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -24,7 +24,7 @@
 * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
 * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
 */
-#if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
+#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
 # define XFS_BIG_BLKNOS 1
 # define XFS_BIG_INUMS  1
 #else
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 2e09efbca8db..a220d36f789b 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -616,7 +616,7 @@ xfs_max_file_offset(
         */
 #if BITS_PER_LONG == 32
-# if defined(CONFIG_LBD)
+# if defined(CONFIG_LBDAF)
        ASSERT(sizeof(sector_t) == 8);
        pagefactor = PAGE_CACHE_SIZE;
        bitshift = BITS_PER_LONG;