44 files changed, 870 insertions, 558 deletions
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 56372ecf1690..dfc0197905ca 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -914,7 +914,9 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
        /* Stash our initial stack pointer into the mm structure */
        current->mm->start_stack = (unsigned long )sp;
-        
+#ifdef FLAT_PLAT_INIT
+        FLAT_PLAT_INIT(regs);
+#endif
        DBG_FLT("start_thread(regs=0x%x, entry=0x%x, start_stack=0x%x)\n",
                (int)regs, (int)start_addr, (int)current->mm->start_stack);
        
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 756205314c24..8d7e88e02e0f 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -120,8 +120,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
        if (bprm->misc_bang)
                goto _ret;
-        bprm->misc_bang = 1;
        /* to keep locking time low, we copy the interpreter string */
        read_lock(&entries_lock);
        fmt = check_file(bprm);
@@ -199,6 +197,8 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
        if (retval < 0)
                goto _error;
+        bprm->misc_bang = 1;
        retval = search_binary_handler (bprm, regs);
        if (retval < 0)
                goto _error;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index e8da4ee761b5..25ecbd5b0404 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -175,6 +175,8 @@ out_no_root:
        if (inode)
                iput(inode);
+        cifs_umount(sb, cifs_sb);
 out_mount_failed:
        if (cifs_sb) {
 #ifdef CONFIG_CIFS_DFS_UPCALL
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 28a22092d450..848286861c31 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -649,6 +649,7 @@ struct inode *cifs_iget(struct super_block *sb, unsigned long ino)
                inode->i_fop = &simple_dir_operations;
                inode->i_uid = cifs_sb->mnt_uid;
                inode->i_gid = cifs_sb->mnt_gid;
+        } else if (rc) {
                _FreeXid(xid);
                iget_failed(inode);
                return ERR_PTR(rc);
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 0c3b618c15b3..f40423eb1a14 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -43,58 +43,13 @@ static DEFINE_MUTEX(read_mutex);
 static int cramfs_iget5_test(struct inode *inode, void *opaque)
 {
        struct cramfs_inode *cramfs_inode = opaque;
+        return inode->i_ino == CRAMINO(cramfs_inode) && inode->i_ino != 1;
-        if (inode->i_ino != CRAMINO(cramfs_inode))
-                return 0; /* does not match */
-        if (inode->i_ino != 1)
-                return 1;
-        /* all empty directories, char, block, pipe, and sock, share inode #1 */
-        if ((inode->i_mode != cramfs_inode->mode) ||
-            (inode->i_gid != cramfs_inode->gid) ||
-            (inode->i_uid != cramfs_inode->uid))
-                return 0; /* does not match */
-        if ((S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) &&
-            (inode->i_rdev != old_decode_dev(cramfs_inode->size)))
-                return 0; /* does not match */
-        return 1; /* matches */
 }
 static int cramfs_iget5_set(struct inode *inode, void *opaque)
 {
-        static struct timespec zerotime;
        struct cramfs_inode *cramfs_inode = opaque;
-        inode->i_mode = cramfs_inode->mode;
-        inode->i_uid = cramfs_inode->uid;
-        inode->i_size = cramfs_inode->size;
-        inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
-        inode->i_gid = cramfs_inode->gid;
-        /* Struct copy intentional */
-        inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
        inode->i_ino = CRAMINO(cramfs_inode);
-        /* inode->i_nlink is left 1 - arguably wrong for directories,
-           but it's the best we can do without reading the directory
-           contents.  1 yields the right result in GNU find, even
-           without -noleaf option. */
-        if (S_ISREG(inode->i_mode)) {
-                inode->i_fop = &generic_ro_fops;
-                inode->i_data.a_ops = &cramfs_aops;
-        } else if (S_ISDIR(inode->i_mode)) {
-                inode->i_op = &cramfs_dir_inode_operations;
-                inode->i_fop = &cramfs_directory_operations;
-        } else if (S_ISLNK(inode->i_mode)) {
-                inode->i_op = &page_symlink_inode_operations;
-                inode->i_data.a_ops = &cramfs_aops;
-        } else {
-                inode->i_size = 0;
-                inode->i_blocks = 0;
-                init_special_inode(inode, inode->i_mode,
-                        old_decode_dev(cramfs_inode->size));
-        }
        return 0;
 }
@@ -104,12 +59,48 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
        struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode),
                                            cramfs_iget5_test, cramfs_iget5_set,
                                            cramfs_inode);
+        static struct timespec zerotime;
        if (inode && (inode->i_state & I_NEW)) {
+                inode->i_mode = cramfs_inode->mode;
+                inode->i_uid = cramfs_inode->uid;
+                inode->i_size = cramfs_inode->size;
+                inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
+                inode->i_gid = cramfs_inode->gid;
+                /* Struct copy intentional */
+                inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
+                /* inode->i_nlink is left 1 - arguably wrong for directories,
+                   but it's the best we can do without reading the directory
+                   contents.  1 yields the right result in GNU find, even
+                   without -noleaf option. */
+                if (S_ISREG(inode->i_mode)) {
+                        inode->i_fop = &generic_ro_fops;
+                        inode->i_data.a_ops = &cramfs_aops;
+                } else if (S_ISDIR(inode->i_mode)) {
+                        inode->i_op = &cramfs_dir_inode_operations;
+                        inode->i_fop = &cramfs_directory_operations;
+                } else if (S_ISLNK(inode->i_mode)) {
+                        inode->i_op = &page_symlink_inode_operations;
+                        inode->i_data.a_ops = &cramfs_aops;
+                } else {
+                        inode->i_size = 0;
+                        inode->i_blocks = 0;
+                        init_special_inode(inode, inode->i_mode,
+                                old_decode_dev(cramfs_inode->size));
+                }
                unlock_new_inode(inode);
        }
        return inode;
 }
+static void cramfs_drop_inode(struct inode *inode)
+{
+        if (inode->i_ino == 1)
+                generic_delete_inode(inode);
+        else
+                generic_drop_inode(inode);
+}
 /*
 * We have our own block cache: don't fill up the buffer cache
 * with the rom-image, because the way the filesystem is set
@@ -534,6 +525,7 @@ static const struct super_operations cramfs_ops = {
        .put_super      = cramfs_put_super,
        .remount_fs     = cramfs_remount,
        .statfs         = cramfs_statfs,
+        .drop_inode     = cramfs_drop_inode,
 };
 static int cramfs_get_sb(struct file_system_type *fs_type,
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 1ae5004e93fc..e9fa960ba6da 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1626,6 +1626,9 @@ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
                free_blocks =
                        percpu_counter_sum_and_set(&sbi->s_freeblocks_counter);
 #endif
+        if (free_blocks <= root_blocks)
+                /* we don't have free space */
+                return 0;
        if (free_blocks - root_blocks < nblocks)
                return free_blocks - root_blocks;
        return nblocks;
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index d3d23d73c08b..ec8e33b45219 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -411,7 +411,7 @@ static int call_filldir(struct file * filp, void * dirent,
                                get_dtype(sb, fname->file_type));
                if (error) {
                        filp->f_pos = curr_pos;
-                        info->extra_fname = fname->next;
+                        info->extra_fname = fname;
                        return error;
                }
                fname = fname->next;
@@ -450,11 +450,21 @@ static int ext4_dx_readdir(struct file * filp,
         * If there are any leftover names on the hash collision
         * chain, return them first.
         */
-        if (info->extra_fname &&
+        if (info->extra_fname) {
-            call_filldir(filp, dirent, filldir, info->extra_fname))
+                if (call_filldir(filp, dirent, filldir, info->extra_fname))
-                goto finished;
+                        goto finished;
-        if (!info->curr_node)
+                info->extra_fname = NULL;
+                info->curr_node = rb_next(info->curr_node);
+                if (!info->curr_node) {
+                        if (info->next_hash == ~0) {
+                                filp->f_pos = EXT4_HTREE_EOF;
+                                goto finished;
+                        }
+                        info->curr_hash = info->next_hash;
+                        info->curr_minor_hash = 0;
+                }
+        } else if (!info->curr_node)
                info->curr_node = rb_first(&info->root);
        while (1) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6c7924d9e358..295003241d3d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1072,6 +1072,8 @@ extern void ext4_set_inode_flags(struct inode *);
 extern void ext4_get_inode_flags(struct ext4_inode_info *);
 extern void ext4_set_aops(struct inode *inode);
 extern int ext4_writepage_trans_blocks(struct inode *);
+extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
+extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
 extern int ext4_block_truncate_page(handle_t *handle,
                struct address_space *mapping, loff_t from);
 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
@@ -1227,6 +1229,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
 /* extents.c */
 extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
 extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
+extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
+                                       int chunk);
 extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                        ext4_lblk_t iblock,
                        unsigned long max_blocks, struct buffer_head *bh_result,
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 6c166c0a54b7..d33dc56d6986 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -216,7 +216,9 @@ extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
 extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
 extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
 extern int ext4_extent_tree_init(handle_t *, struct inode *);
-extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
+extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
+                                                   int num,
+                                                   struct ext4_ext_path *path);
 extern int ext4_ext_try_to_merge(struct inode *inode,
                                 struct ext4_ext_path *path,
                                 struct ext4_extent *);
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index eb8bc3afe6e9..b455c685a98b 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -51,6 +51,14 @@
                                         EXT4_XATTR_TRANS_BLOCKS - 2 + \
                                         2*EXT4_QUOTA_TRANS_BLOCKS(sb))
+/*
+ * Define the number of metadata blocks we need to account to modify data.
+ *
+ * This include super block, inode block, quota blocks and xattr blocks
+ */
+#define EXT4_META_TRANS_BLOCKS(sb)      (EXT4_XATTR_TRANS_BLOCKS + \
+                                        2*EXT4_QUOTA_TRANS_BLOCKS(sb))
 /* Delete operations potentially hit one directory's namespace plus an
 * entire inode, plus arbitrary amounts of bitmap/indirection data.  Be
 * generous.  We can grow the delete transaction later if necessary. */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 612c3d2c3824..b24d3c53f20c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1747,54 +1747,61 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 }
 /*
- * ext4_ext_calc_credits_for_insert:
+ * ext4_ext_calc_credits_for_single_extent:
- * This routine returns max. credits that the extent tree can consume.
+ * This routine returns max. credits that needed to insert an extent
- * It should be OK for low-performance paths like ->writepage()
+ * to the extent tree.
- * To allow many writing processes to fit into a single transaction,
+ * When pass the actual path, the caller should calculate credits
- * the caller should calculate credits under i_data_sem and
+ * under i_data_sem.
- * pass the actual path.
 */
-int ext4_ext_calc_credits_for_insert(struct inode *inode,
+int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
                                                struct ext4_ext_path *path)
 {
-        int depth, needed;
        if (path) {
+                int depth = ext_depth(inode);
+                int ret = 0;
                /* probably there is space in leaf? */
-                depth = ext_depth(inode);
                if (le16_to_cpu(path[depth].p_hdr->eh_entries)
-                                < le16_to_cpu(path[depth].p_hdr->eh_max))
+                                < le16_to_cpu(path[depth].p_hdr->eh_max)) {
-                        return 1;
-        }
-        /*
+                        /*
-         * given 32-bit logical block (4294967296 blocks), max. tree
+                         *  There are some space in the leaf tree, no
-         * can be 4 levels in depth -- 4 * 340^4 == 53453440000.
+                         *  need to account for leaf block credit
-         * Let's also add one more level for imbalance.
+                         *
-         */
+                         *  bitmaps and block group descriptor blocks
-        depth = 5;
+                         *  and other metadat blocks still need to be
+                         *  accounted.
-        /* allocation of new data block(s) */
+                         */
-        needed = 2;
+                        /* 1 bitmap, 1 block group descriptor */
+                        ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
+                }
+        }
-        /*
+        return ext4_chunk_trans_blocks(inode, nrblocks);
-         * tree can be full, so it would need to grow in depth:
+}
-         * we need one credit to modify old root, credits for
-         * new root will be added in split accounting
-         */
-        needed += 1;
-        /*
+/*
-         * Index split can happen, we would need:
+ * How many index/leaf blocks need to change/allocate to modify nrblocks?
-         *    allocate intermediate indexes (bitmap + group)
+ *
-         *  + change two blocks at each level, but root (already included)
+ * if nrblocks are fit in a single extent (chunk flag is 1), then
-         */
+ * in the worse case, each tree level index/leaf need to be changed
-        needed += (depth * 2) + (depth * 2);
+ * if the tree split due to insert a new extent, then the old tree
+ * index/leaf need to be updated too
+ *
+ * If the nrblocks are discontiguous, they could cause
+ * the whole tree split more than once, but this is really rare.
+ */
+int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+{
+        int index;
+        int depth = ext_depth(inode);
-        /* any allocation modifies superblock */
+        if (chunk)
-        needed += 1;
+                index = depth * 2;
+        else
+                index = depth * 3;
-        return needed;
+        return index;
 }
 static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
@@ -1921,9 +1928,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                        correct_index = 1;
                        credits += (ext_depth(inode)) + 1;
                }
-#ifdef CONFIG_QUOTA
                credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
-#endif
                err = ext4_ext_journal_restart(handle, credits);
                if (err)
@@ -2805,7 +2810,7 @@ void ext4_ext_truncate(struct inode *inode)
        /*
         * probably first extent we're gonna free will be last in block
         */
-        err = ext4_writepage_trans_blocks(inode) + 3;
+        err = ext4_writepage_trans_blocks(inode);
        handle = ext4_journal_start(inode, err);
        if (IS_ERR(handle))
                return;
@@ -2819,7 +2824,7 @@ void ext4_ext_truncate(struct inode *inode)
        down_write(&EXT4_I(inode)->i_data_sem);
        ext4_ext_invalidate_cache(inode);
-        ext4_mb_discard_inode_preallocations(inode);
+        ext4_discard_reservation(inode);
        /*
         * TODO: optimization is possible here.
@@ -2858,27 +2863,6 @@ out_stop:
        ext4_journal_stop(handle);
 }
-/*
- * ext4_ext_writepage_trans_blocks:
- * calculate max number of blocks we could modify
- * in order to allocate new block for an inode
- */
-int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
-{
-        int needed;
-        needed = ext4_ext_calc_credits_for_insert(inode, NULL);
-        /* caller wants to allocate num blocks, but note it includes sb */
-        needed = needed * num - (num - 1);
-#ifdef CONFIG_QUOTA
-        needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
-#endif
-        return needed;
-}
 static void ext4_falloc_update_inode(struct inode *inode,
                                int mode, loff_t new_size, int update_ctime)
 {
@@ -2939,10 +2923,9 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
        max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
                                                        - block;
        /*
-         * credits to insert 1 extent into extent tree + buffers to be able to
+         * credits to insert 1 extent into extent tree
-         * modify 1 super block, 1 block bitmap and 1 group descriptor.
         */
-        credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3;
+        credits = ext4_chunk_trans_blocks(inode, max_blocks);
        mutex_lock(&inode->i_mutex);
 retry:
        while (ret >= 0 && ret < max_blocks) {
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 655e760212b8..f344834bbf58 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -351,7 +351,7 @@ find_close_to_parent:
                        goto found_flexbg;
                }
-                if (best_flex < 0 ||
+                if (flex_group[best_flex].free_inodes == 0 ||
                    (flex_group[i].free_blocks >
                     flex_group[best_flex].free_blocks &&
                     flex_group[i].free_inodes))
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 59fbbe899acc..7e91913e325b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -41,6 +41,8 @@
 #include "acl.h"
 #include "ext4_extents.h"
+#define MPAGE_DA_EXTENT_TAIL 0x01
 static inline int ext4_begin_ordered_truncate(struct inode *inode,
                                              loff_t new_size)
 {
@@ -1005,6 +1007,9 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
 */
 static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
 {
+        if (!blocks)
+                return 0;
        if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
                return ext4_ext_calc_metadata_amount(inode, blocks);
@@ -1041,18 +1046,6 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 }
-/* Maximum number of blocks we map for direct IO at once. */
-#define DIO_MAX_BLOCKS 4096
-/*
- * Number of credits we need for writing DIO_MAX_BLOCKS:
- * We need sb + group descriptor + bitmap + inode -> 4
- * For B blocks with A block pointers per block we need:
- * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect).
- * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25.
- */
-#define DIO_CREDITS 25
 /*
 * The ext4_get_blocks_wrap() function try to look up the requested blocks,
 * and returns if the blocks are already mapped.
@@ -1164,19 +1157,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
        return retval;
 }
+/* Maximum number of blocks we map for direct IO at once. */
+#define DIO_MAX_BLOCKS 4096
 static int ext4_get_block(struct inode *inode, sector_t iblock,
                        struct buffer_head *bh_result, int create)
 {
        handle_t *handle = ext4_journal_current_handle();
        int ret = 0, started = 0;
        unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
+        int dio_credits;
        if (create && !handle) {
                /* Direct IO write... */
                if (max_blocks > DIO_MAX_BLOCKS)
                        max_blocks = DIO_MAX_BLOCKS;
-                handle = ext4_journal_start(inode, DIO_CREDITS +
+                dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
-                              2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
+                handle = ext4_journal_start(inode, dio_credits);
                if (IS_ERR(handle)) {
                        ret = PTR_ERR(handle);
                        goto out;
@@ -1559,7 +1556,25 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        int total, mdb, mdb_free, release;
+        if (!to_free)
+                return;         /* Nothing to release, exit */
        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+        if (!EXT4_I(inode)->i_reserved_data_blocks) {
+                /*
+                 * if there is no reserved blocks, but we try to free some
+                 * then the counter is messed up somewhere.
+                 * but since this function is called from invalidate
+                 * page, it's harmless to return without any action
+                 */
+                printk(KERN_INFO "ext4 delalloc try to release %d reserved "
+                            "blocks for inode %lu, but there is no reserved "
+                            "data blocks\n", to_free, inode->i_ino);
+                spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+                return;
+        }
        /* recalculate the number of metablocks still need to be reserved */
        total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
        mdb = ext4_calc_metadata_amount(inode, total);
@@ -1613,11 +1628,13 @@ struct mpage_da_data {
        unsigned long first_page, next_page;    /* extent of pages */
        get_block_t *get_block;
        struct writeback_control *wbc;
+        int io_done;
+        long pages_written;
 };
 /*
 * mpage_da_submit_io - walks through extent of pages and try to write
- * them with __mpage_writepage()
+ * them with writepage() call back
 *
 * @mpd->inode: inode
 * @mpd->first_page: first page of the extent
@@ -1632,18 +1649,11 @@ struct mpage_da_data {
 static int mpage_da_submit_io(struct mpage_da_data *mpd)
 {
        struct address_space *mapping = mpd->inode->i_mapping;
-        struct mpage_data mpd_pp = {
-                .bio = NULL,
-                .last_block_in_bio = 0,
-                .get_block = mpd->get_block,
-                .use_writepage = 1,
-        };
        int ret = 0, err, nr_pages, i;
        unsigned long index, end;
        struct pagevec pvec;
        BUG_ON(mpd->next_page <= mpd->first_page);
        pagevec_init(&pvec, 0);
        index = mpd->first_page;
        end = mpd->next_page - 1;
@@ -1661,8 +1671,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
                                break;
                        index++;
-                        err = __mpage_writepage(page, mpd->wbc, &mpd_pp);
+                        err = mapping->a_ops->writepage(page, mpd->wbc);
+                        if (!err)
+                                mpd->pages_written++;
                        /*
                         * In error case, we have to continue because
                         * remaining pages are still locked
@@ -1673,9 +1684,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
                }
                pagevec_release(&pvec);
        }
-        if (mpd_pp.bio)
-                mpage_bio_submit(WRITE, mpd_pp.bio);
        return ret;
 }
@@ -1698,7 +1706,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
        int blocks = exbh->b_size >> inode->i_blkbits;
        sector_t pblock = exbh->b_blocknr, cur_logical;
        struct buffer_head *head, *bh;
-        unsigned long index, end;
+        pgoff_t index, end;
        struct pagevec pvec;
        int nr_pages, i;
@@ -1741,6 +1749,13 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
                                if (buffer_delay(bh)) {
                                        bh->b_blocknr = pblock;
                                        clear_buffer_delay(bh);
+                                        bh->b_bdev = inode->i_sb->s_bdev;
+                                } else if (buffer_unwritten(bh)) {
+                                        bh->b_blocknr = pblock;
+                                        clear_buffer_unwritten(bh);
+                                        set_buffer_mapped(bh);
+                                        set_buffer_new(bh);
+                                        bh->b_bdev = inode->i_sb->s_bdev;
                                } else if (buffer_mapped(bh))
                                        BUG_ON(bh->b_blocknr != pblock);
@@ -1776,13 +1791,11 @@ static inline void __unmap_underlying_blocks(struct inode *inode,
 *
 * The function skips space we know is already mapped to disk blocks.
 *
- * The function ignores errors ->get_block() returns, thus real
- * error handling is postponed to __mpage_writepage()
 */
 static void mpage_da_map_blocks(struct mpage_da_data *mpd)
 {
+        int err = 0;
        struct buffer_head *lbh = &mpd->lbh;
-        int err = 0, remain = lbh->b_size;
        sector_t next = lbh->b_blocknr;
        struct buffer_head new;
@@ -1792,38 +1805,36 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
        if (buffer_mapped(lbh) && !buffer_delay(lbh))
                return;
-        while (remain) {
+        new.b_state = lbh->b_state;
-                new.b_state = lbh->b_state;
+        new.b_blocknr = 0;
-                new.b_blocknr = 0;
+        new.b_size = lbh->b_size;
-                new.b_size = remain;
-                err = mpd->get_block(mpd->inode, next, &new, 1);
-                if (err) {
-                        /*
-                         * Rather than implement own error handling
-                         * here, we just leave remaining blocks
-                         * unallocated and try again with ->writepage()
-                         */
-                        break;
-                }
-                BUG_ON(new.b_size == 0);
-                if (buffer_new(&new))
+        /*
-                        __unmap_underlying_blocks(mpd->inode, &new);
+         * If we didn't accumulate anything
+         * to write simply return
+         */
+        if (!new.b_size)
+                return;
+        err = mpd->get_block(mpd->inode, next, &new, 1);
+        if (err)
+                return;
+        BUG_ON(new.b_size == 0);
-                /*
+        if (buffer_new(&new))
-                 * If blocks are delayed marked, we need to
+                __unmap_underlying_blocks(mpd->inode, &new);
-                 * put actual blocknr and drop delayed bit
-                 */
-                if (buffer_delay(lbh))
-                        mpage_put_bnr_to_bhs(mpd, next, &new);
-                /* go for the remaining blocks */
+        /*
-                next += new.b_size >> mpd->inode->i_blkbits;
+         * If blocks are delayed marked, we need to
-                remain -= new.b_size;
+         * put actual blocknr and drop delayed bit
-        }
+         */
+        if (buffer_delay(lbh) || buffer_unwritten(lbh))
+                mpage_put_bnr_to_bhs(mpd, next, &new);
+        return;
 }
-#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay))
+#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
+                (1 << BH_Delay) | (1 << BH_Unwritten))
 /*
 * mpage_add_bh_to_extent - try to add one more block to extent of blocks
@@ -1837,41 +1848,61 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
 static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
                                   sector_t logical, struct buffer_head *bh)
 {
-        struct buffer_head *lbh = &mpd->lbh;
        sector_t next;
+        size_t b_size = bh->b_size;
+        struct buffer_head *lbh = &mpd->lbh;
+        int nrblocks = lbh->b_size >> mpd->inode->i_blkbits;
-        next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits);
+        /* check if thereserved journal credits might overflow */
+        if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) {
+                if (nrblocks >= EXT4_MAX_TRANS_DATA) {
+                        /*
+                         * With non-extent format we are limited by the journal
+                         * credit available.  Total credit needed to insert
+                         * nrblocks contiguous blocks is dependent on the
+                         * nrblocks.  So limit nrblocks.
+                         */
+                        goto flush_it;
+                } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) >
+                                EXT4_MAX_TRANS_DATA) {
+                        /*
+                         * Adding the new buffer_head would make it cross the
+                         * allowed limit for which we have journal credit
+                         * reserved. So limit the new bh->b_size
+                         */
+                        b_size = (EXT4_MAX_TRANS_DATA - nrblocks) <<
+                                                mpd->inode->i_blkbits;
+                        /* we will do mpage_da_submit_io in the next loop */
+                }
+        }
        /*
         * First block in the extent
         */
        if (lbh->b_size == 0) {
                lbh->b_blocknr = logical;
-                lbh->b_size = bh->b_size;
+                lbh->b_size = b_size;
                lbh->b_state = bh->b_state & BH_FLAGS;
                return;
        }
+        next = lbh->b_blocknr + nrblocks;
        /*
         * Can we merge the block to our big extent?
         */
        if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) {
-                lbh->b_size += bh->b_size;
+                lbh->b_size += b_size;
                return;
        }
+flush_it:
        /*
         * We couldn't merge the block to our extent, so we
         * need to flush current  extent and start new one
         */
        mpage_da_map_blocks(mpd);
+        mpage_da_submit_io(mpd);
-        /*
+        mpd->io_done = 1;
-         * Now start a new extent
+        return;
-         */
-        lbh->b_size = bh->b_size;
-        lbh->b_state = bh->b_state & BH_FLAGS;
-        lbh->b_blocknr = logical;
 }
 /*
@@ -1891,17 +1922,35 @@ static int __mpage_da_writepage(struct page *page,
        struct buffer_head *bh, *head, fake;
        sector_t logical;
+        if (mpd->io_done) {
+                /*
+                 * Rest of the page in the page_vec
+                 * redirty then and skip then. We will
+                 * try to to write them again after
+                 * starting a new transaction
+                 */
+                redirty_page_for_writepage(wbc, page);
+                unlock_page(page);
+                return MPAGE_DA_EXTENT_TAIL;
+        }
        /*
         * Can we merge this page to current extent?
         */
        if (mpd->next_page != page->index) {
                /*
                 * Nope, we can't. So, we map non-allocated blocks
-                 * and start IO on them using __mpage_writepage()
+                 * and start IO on them using writepage()
                 */
                if (mpd->next_page != mpd->first_page) {
                        mpage_da_map_blocks(mpd);
                        mpage_da_submit_io(mpd);
+                        /*
+                         * skip rest of the page in the page_vec
+                         */
+                        mpd->io_done = 1;
+                        redirty_page_for_writepage(wbc, page);
+                        unlock_page(page);
+                        return MPAGE_DA_EXTENT_TAIL;
                }
                /*
@@ -1932,6 +1981,8 @@ static int __mpage_da_writepage(struct page *page,
                set_buffer_dirty(bh);
                set_buffer_uptodate(bh);
                mpage_add_bh_to_extent(mpd, logical, bh);
+                if (mpd->io_done)
+                        return MPAGE_DA_EXTENT_TAIL;
        } else {
                /*
                 * Page with regular buffer heads, just add all dirty ones
@@ -1940,8 +1991,12 @@ static int __mpage_da_writepage(struct page *page,
                bh = head;
                do {
                        BUG_ON(buffer_locked(bh));
-                        if (buffer_dirty(bh))
+                        if (buffer_dirty(bh) &&
+                                (!buffer_mapped(bh) || buffer_delay(bh))) {
                                mpage_add_bh_to_extent(mpd, logical, bh);
+                                if (mpd->io_done)
+                                        return MPAGE_DA_EXTENT_TAIL;
+                        }
                        logical++;
                } while ((bh = bh->b_this_page) != head);
        }
@@ -1960,22 +2015,13 @@ static int __mpage_da_writepage(struct page *page,
 *
 * This is a library function, which implements the writepages()
 * address_space_operation.
- *
- * In order to avoid duplication of logic that deals with partial pages,
- * multiple bio per page, etc, we find non-allocated blocks, allocate
- * them with minimal calls to ->get_block() and re-use __mpage_writepage()
- *
- * It's important that we call __mpage_writepage() only once for each
- * involved page, otherwise we'd have to implement more complicated logic
- * to deal with pages w/o PG_lock or w/ PG_writeback and so on.
- *
- * See comments to mpage_writepages()
 */
 static int mpage_da_writepages(struct address_space *mapping,
                               struct writeback_control *wbc,
                               get_block_t get_block)
 {
        struct mpage_da_data mpd;
+        long to_write;
        int ret;
        if (!get_block)
@@ -1989,17 +2035,22 @@ static int mpage_da_writepages(struct address_space *mapping,
        mpd.first_page = 0;
        mpd.next_page = 0;
        mpd.get_block = get_block;
+        mpd.io_done = 0;
+        mpd.pages_written = 0;
+        to_write = wbc->nr_to_write;
        ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
        /*
         * Handle last extent of pages
         */
-        if (mpd.next_page != mpd.first_page) {
+        if (!mpd.io_done && mpd.next_page != mpd.first_page) {
                mpage_da_map_blocks(&mpd);
                mpage_da_submit_io(&mpd);
        }
+        wbc->nr_to_write = to_write - mpd.pages_written;
        return ret;
 }
@@ -2204,63 +2255,95 @@ static int ext4_da_writepage(struct page *page,
 }
 /*
- * For now just follow the DIO way to estimate the max credits
+ * This is called via ext4_da_writepages() to
- * needed to write out EXT4_MAX_WRITEBACK_PAGES.
+ * calulate the total number of credits to reserve to fit
- * todo: need to calculate the max credits need for
+ * a single extent allocation into a single transaction,
- * extent based files, currently the DIO credits is based on
+ * ext4_da_writpeages() will loop calling this before
- * indirect-blocks mapping way.
+ * the block allocation.
- *
- * Probably should have a generic way to calculate credits
- * for DIO, writepages, and truncate
 */
-#define EXT4_MAX_WRITEBACK_PAGES      DIO_MAX_BLOCKS
-#define EXT4_MAX_WRITEBACK_CREDITS    DIO_CREDITS
+static int ext4_da_writepages_trans_blocks(struct inode *inode)
+{
+        int max_blocks = EXT4_I(inode)->i_reserved_data_blocks;
+        /*
+         * With non-extent format the journal credit needed to
+         * insert nrblocks contiguous block is dependent on
+         * number of contiguous block. So we will limit
+         * number of contiguous block to a sane value
+         */
+        if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
+            (max_blocks > EXT4_MAX_TRANS_DATA))
+                max_blocks = EXT4_MAX_TRANS_DATA;
+        return ext4_chunk_trans_blocks(inode, max_blocks);
+}
 static int ext4_da_writepages(struct address_space *mapping,
-                                struct writeback_control *wbc)
+                              struct writeback_control *wbc)
 {
-        struct inode *inode = mapping->host;
        handle_t *handle = NULL;
-        int needed_blocks;
-        int ret = 0;
-        long to_write;
        loff_t range_start = 0;
+        struct inode *inode = mapping->host;
+        int needed_blocks, ret = 0, nr_to_writebump = 0;
+        long to_write, pages_skipped = 0;
+        struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
        /*
         * No pages to write? This is mainly a kludge to avoid starting
         * a transaction for special inodes like journal inode on last iput()
         * because that could violate lock ordering on umount
         */
-        if (!mapping->nrpages)
+        if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
                return 0;
        /*
-         * Estimate the worse case needed credits to write out
+         * Make sure nr_to_write is >= sbi->s_mb_stream_request
-         * EXT4_MAX_BUF_BLOCKS pages
+         * This make sure small files blocks are allocated in
+         * single attempt. This ensure that small files
+         * get less fragmented.
         */
-        needed_blocks = EXT4_MAX_WRITEBACK_CREDITS;
+        if (wbc->nr_to_write < sbi->s_mb_stream_request) {
+                nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
+                wbc->nr_to_write = sbi->s_mb_stream_request;
+        }
-        to_write = wbc->nr_to_write;
+        if (!wbc->range_cyclic)
-        if (!wbc->range_cyclic) {
                /*
                 * If range_cyclic is not set force range_cont
                 * and save the old writeback_index
                 */
                wbc->range_cont = 1;
-                range_start =  wbc->range_start;
-        }
-        while (!ret && to_write) {
+        range_start =  wbc->range_start;
+        pages_skipped = wbc->pages_skipped;
+restart_loop:
+        to_write = wbc->nr_to_write;
+        while (!ret && to_write > 0) {
+                /*
+                 * we  insert one extent at a time. So we need
+                 * credit needed for single extent allocation.
+                 * journalled mode is currently not supported
+                 * by delalloc
+                 */
+                BUG_ON(ext4_should_journal_data(inode));
+                needed_blocks = ext4_da_writepages_trans_blocks(inode);
                /* start a new transaction*/
                handle = ext4_journal_start(inode, needed_blocks);
                if (IS_ERR(handle)) {
                        ret = PTR_ERR(handle);
+                        printk(KERN_EMERG "%s: jbd2_start: "
+                               "%ld pages, ino %lu; err %d\n", __func__,
+                                wbc->nr_to_write, inode->i_ino, ret);
+                        dump_stack();
                        goto out_writepages;
                }
                if (ext4_should_order_data(inode)) {
                        /*
                         * With ordered mode we need to add
-                         * the inode to the journal handle
+                         * the inode to the journal handl
                         * when we do block allocation.
                         */
                        ret = ext4_jbd2_file_inode(handle, inode);
@@ -2268,20 +2351,20 @@ static int ext4_da_writepages(struct address_space *mapping,
                                ext4_journal_stop(handle);
                                goto out_writepages;
                        }
                }
-                /*
-                 * set the max dirty pages could be write at a time
-                 * to fit into the reserved transaction credits
-                 */
-                if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES)
-                        wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES;
                to_write -= wbc->nr_to_write;
                ret = mpage_da_writepages(mapping, wbc,
-                                                ext4_da_get_block_write);
+                                          ext4_da_get_block_write);
                ext4_journal_stop(handle);
-                if (wbc->nr_to_write) {
+                if (ret == MPAGE_DA_EXTENT_TAIL) {
+                        /*
+                         * got one extent now try with
+                         * rest of the pages
+                         */
+                        to_write += wbc->nr_to_write;
+                        ret = 0;
+                } else if (wbc->nr_to_write) {
                        /*
                         * There is no more writeout needed
                         * or we requested for a noblocking writeout
@@ -2293,10 +2376,18 @@ static int ext4_da_writepages(struct address_space *mapping,
                wbc->nr_to_write = to_write;
        }
-out_writepages:
+        if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
-        wbc->nr_to_write = to_write;
+                /* We skipped pages in this loop */
-        if (range_start)
                wbc->range_start = range_start;
+                wbc->nr_to_write = to_write +
+                                wbc->pages_skipped - pages_skipped;
+                wbc->pages_skipped = pages_skipped;
+                goto restart_loop;
+        }
+out_writepages:
+        wbc->nr_to_write = to_write - nr_to_writebump;
+        wbc->range_start = range_start;
        return ret;
 }
@@ -3486,6 +3577,9 @@ void ext4_truncate(struct inode *inode)
         * modify the block allocation tree.
         */
        down_write(&ei->i_data_sem);
+        ext4_discard_reservation(inode);
        /*
         * The orphan list entry will now protect us from any crash which
         * occurs before the truncate completes, so it is now safe to propagate
@@ -3555,8 +3649,6 @@ do_indirects:
                ;
        }
-        ext4_discard_reservation(inode);
        up_write(&ei->i_data_sem);
        inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
@@ -4324,57 +4416,129 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
        return 0;
 }
+static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
+                                      int chunk)
+{
+        int indirects;
+        /* if nrblocks are contiguous */
+        if (chunk) {
+                /*
+                 * With N contiguous data blocks, it need at most
+                 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks
+                 * 2 dindirect blocks
+                 * 1 tindirect block
+                 */
+                indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb);
+                return indirects + 3;
+        }
+        /*
+         * if nrblocks are not contiguous, worse case, each block touch
+         * a indirect block, and each indirect block touch a double indirect
+         * block, plus a triple indirect block
+         */
+        indirects = nrblocks * 2 + 1;
+        return indirects;
+}
+static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+{
+        if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+                return ext4_indirect_trans_blocks(inode, nrblocks, 0);
+        return ext4_ext_index_trans_blocks(inode, nrblocks, 0);
+}
 /*
- * How many blocks doth make a writepage()?
+ * Account for index blocks, block groups bitmaps and block group
- *
+ * descriptor blocks if modify datablocks and index blocks
- * With N blocks per page, it may be:
+ * worse case, the indexs blocks spread over different block groups
- * N data blocks
- * 2 indirect block
- * 2 dindirect
- * 1 tindirect
- * N+5 bitmap blocks (from the above)
- * N+5 group descriptor summary blocks
- * 1 inode block
- * 1 superblock.
- * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files
 *
- * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS
+ * If datablocks are discontiguous, they are possible to spread over
+ * different block groups too. If they are contiugous, with flexbg,
+ * they could still across block group boundary.
 *
- * With ordered or writeback data it's the same, less the N data blocks.
+ * Also account for superblock, inode, quota and xattr blocks
+ */
+int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+{
+        int groups, gdpblocks;
+        int idxblocks;
+        int ret = 0;
+        /*
+         * How many index blocks need to touch to modify nrblocks?
+         * The "Chunk" flag indicating whether the nrblocks is
+         * physically contiguous on disk
+         *
+         * For Direct IO and fallocate, they calls get_block to allocate
+         * one single extent at a time, so they could set the "Chunk" flag
+         */
+        idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk);
+        ret = idxblocks;
+        /*
+         * Now let's see how many group bitmaps and group descriptors need
+         * to account
+         */
+        groups = idxblocks;
+        if (chunk)
+                groups += 1;
+        else
+                groups += nrblocks;
+        gdpblocks = groups;
+        if (groups > EXT4_SB(inode->i_sb)->s_groups_count)
+                groups = EXT4_SB(inode->i_sb)->s_groups_count;
+        if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
+                gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
+        /* bitmaps and block group descriptor blocks */
+        ret += groups + gdpblocks;
+        /* Blocks for super block, inode, quota and xattr blocks */
+        ret += EXT4_META_TRANS_BLOCKS(inode->i_sb);
+        return ret;
+}
+/*
+ * Calulate the total number of credits to reserve to fit
+ * the modification of a single pages into a single transaction,
+ * which may include multiple chunks of block allocations.
 *
- * If the inode's direct blocks can hold an integral number of pages then a
+ * This could be called via ext4_write_begin()
- * page cannot straddle two indirect blocks, and we can only touch one indirect
- * and dindirect block, and the "5" above becomes "3".
 *
- * This still overestimates under most circumstances.  If we were to pass the
+ * We need to consider the worse case, when
- * start and end offsets in here as well we could do block_to_path() on each
+ * one new block per extent.
- * block and work out the exact number of indirects which are touched.  Pah.
 */
 int ext4_writepage_trans_blocks(struct inode *inode)
 {
        int bpp = ext4_journal_blocks_per_page(inode);
-        int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3;
        int ret;
-        if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+        ret = ext4_meta_trans_blocks(inode, bpp, 0);
-                return ext4_ext_writepage_trans_blocks(inode, bpp);
+        /* Account for data blocks for journalled mode */
        if (ext4_should_journal_data(inode))
-                ret = 3 * (bpp + indirects) + 2;
+                ret += bpp;
-        else
-                ret = 2 * (bpp + indirects) + 2;
-#ifdef CONFIG_QUOTA
-        /* We know that structure was already allocated during DQUOT_INIT so
-         * we will be updating only the data blocks + inodes */
-        ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
-#endif
        return ret;
 }
 /*
+ * Calculate the journal credits for a chunk of data modification.
+ *
+ * This is called from DIO, fallocate or whoever calling
+ * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks.
+ *
+ * journal buffers for data blocks are not included here, as DIO
+ * and fallocate do no need to journal data buffers.
+ */
+int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks)
+{
+        return ext4_meta_trans_blocks(inode, nrblocks, 1);
+}
+/*
 * The caller must have previously called ext4_reserve_inode_write().
 * Give this, we know that the caller already has write access to iloc->bh.
 */
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 865e9ddb44d4..e0e3a5eb1ddb 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3282,6 +3282,35 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
 }
 /*
+ * Return the prealloc space that have minimal distance
+ * from the goal block. @cpa is the prealloc
+ * space that is having currently known minimal distance
+ * from the goal block.
+ */
+static struct ext4_prealloc_space *
+ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
+                        struct ext4_prealloc_space *pa,
+                        struct ext4_prealloc_space *cpa)
+{
+        ext4_fsblk_t cur_distance, new_distance;
+        if (cpa == NULL) {
+                atomic_inc(&pa->pa_count);
+                return pa;
+        }
+        cur_distance = abs(goal_block - cpa->pa_pstart);
+        new_distance = abs(goal_block - pa->pa_pstart);
+        if (cur_distance < new_distance)
+                return cpa;
+        /* drop the previous reference */
+        atomic_dec(&cpa->pa_count);
+        atomic_inc(&pa->pa_count);
+        return pa;
+}
+/*
 * search goal blocks in preallocated space
 */
 static noinline_for_stack int
@@ -3290,7 +3319,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
        int order, i;
        struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
        struct ext4_locality_group *lg;
-        struct ext4_prealloc_space *pa;
+        struct ext4_prealloc_space *pa, *cpa = NULL;
+        ext4_fsblk_t goal_block;
        /* only data can be preallocated */
        if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3333,6 +3363,13 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
                /* The max size of hash table is PREALLOC_TB_SIZE */
                order = PREALLOC_TB_SIZE - 1;
+        goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) +
+                     ac->ac_g_ex.fe_start +
+                     le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block);
+        /*
+         * search for the prealloc space that is having
+         * minimal distance from the goal block.
+         */
        for (i = order; i < PREALLOC_TB_SIZE; i++) {
                rcu_read_lock();
                list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
@@ -3340,17 +3377,19 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
                        spin_lock(&pa->pa_lock);
                        if (pa->pa_deleted == 0 &&
                                        pa->pa_free >= ac->ac_o_ex.fe_len) {
-                                atomic_inc(&pa->pa_count);
-                                ext4_mb_use_group_pa(ac, pa);
+                                cpa = ext4_mb_check_group_pa(goal_block,
-                                spin_unlock(&pa->pa_lock);
+                                                                pa, cpa);
-                                ac->ac_criteria = 20;
-                                rcu_read_unlock();
-                                return 1;
                        }
                        spin_unlock(&pa->pa_lock);
                }
                rcu_read_unlock();
        }
+        if (cpa) {
+                ext4_mb_use_group_pa(ac, cpa);
+                ac->ac_criteria = 20;
+                return 1;
+        }
        return 0;
 }
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b9e077ba07e9..46fc0b5b12ba 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -53,7 +53,8 @@ static int finish_range(handle_t *handle, struct inode *inode,
         * credit. But below we try to not accumalate too much
         * of them by restarting the journal.
         */
-        needed = ext4_ext_calc_credits_for_insert(inode, path);
+        needed = ext4_ext_calc_credits_for_single_extent(inode,
+                    lb->last_block - lb->first_block + 1, path);
        /*
         * Make sure the credit we accumalated is not really high
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 0a9265164265..b3d35604ea18 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -773,7 +773,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        if (reserved_gdb || gdb_off == 0) {
                if (!EXT4_HAS_COMPAT_FEATURE(sb,
-                                             EXT4_FEATURE_COMPAT_RESIZE_INODE)){
+                                             EXT4_FEATURE_COMPAT_RESIZE_INODE)
+                    || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
                        ext4_warning(sb, __func__,
                                     "No reserved GDT blocks, can't resize");
                        return -EPERM;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d5d77958b861..566344b926b7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -568,6 +568,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 #endif
        ei->i_block_alloc_info = NULL;
        ei->vfs_inode.i_version = 1;
+        ei->vfs_inode.i_data.writeback_index = 0;
        memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
        INIT_LIST_HEAD(&ei->i_prealloc_list);
        spin_lock_init(&ei->i_prealloc_lock);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 6d266d793e2c..80ff3381fa21 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -562,26 +562,23 @@ static int fat_write_inode(struct inode *inode, int wait)
        struct buffer_head *bh;
        struct msdos_dir_entry *raw_entry;
        loff_t i_pos;
-        int err = 0;
+        int err;
 retry:
        i_pos = MSDOS_I(inode)->i_pos;
        if (inode->i_ino == MSDOS_ROOT_INO || !i_pos)
                return 0;
-        lock_super(sb);
        bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
        if (!bh) {
                printk(KERN_ERR "FAT: unable to read inode block "
                       "for updating (i_pos %lld)\n", i_pos);
-                err = -EIO;
+                return -EIO;
-                goto out;
        }
        spin_lock(&sbi->inode_hash_lock);
        if (i_pos != MSDOS_I(inode)->i_pos) {
                spin_unlock(&sbi->inode_hash_lock);
                brelse(bh);
-                unlock_super(sb);
                goto retry;
        }
@@ -607,11 +604,10 @@ retry:
        }
        spin_unlock(&sbi->inode_hash_lock);
        mark_buffer_dirty(bh);
+        err = 0;
        if (wait)
                err = sync_dirty_buffer(bh);
        brelse(bh);
-out:
-        unlock_super(sb);
        return err;
 }
diff --git a/fs/inode.c b/fs/inode.c
index b6726f644530..0487ddba1397 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -166,6 +166,7 @@ static struct inode *alloc_inode(struct super_block *sb)
                mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE);
                mapping->assoc_mapping = NULL;
                mapping->backing_dev_info = &default_backing_dev_info;
+                mapping->writeback_index = 0;
                /*
                 * If the block_device provides a backing_dev_info for client
diff --git a/fs/ioprio.c b/fs/ioprio.c
index c4a1c3c65aac..da3cc460d4df 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -115,11 +115,11 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
                                pgrp = task_pgrp(current);
                        else
                                pgrp = find_vpid(who);
-                        do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
+                        do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
                                ret = set_task_ioprio(p, ioprio);
                                if (ret)
                                        break;
-                        } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
+                        } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
                        break;
                case IOPRIO_WHO_USER:
                        if (!who)
@@ -204,7 +204,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
                                pgrp = task_pgrp(current);
                        else
                                pgrp = find_vpid(who);
-                        do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
+                        do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
                                tmpio = get_task_ioprio(p);
                                if (tmpio < 0)
                                        continue;
@@ -212,7 +212,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
                                        ret = tmpio;
                                else
                                        ret = ioprio_best(ret, tmpio);
-                        } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
+                        } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
                        break;
                case IOPRIO_WHO_USER:
                        if (!who)
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 31559f45fdde..4c41db91eaa4 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -12,7 +12,6 @@
 #ifndef _JFFS2_FS_I
 #define _JFFS2_FS_I
-#include <linux/version.h>
 #include <linux/rbtree.h>
 #include <linux/posix_acl.h>
 #include <linux/mutex.h>
diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c
index 697663b01bae..e1c0ec0ae989 100644
--- a/fs/omfs/bitmap.c
+++ b/fs/omfs/bitmap.c
@@ -92,7 +92,7 @@ int omfs_allocate_block(struct super_block *sb, u64 block)
        struct buffer_head *bh;
        struct omfs_sb_info *sbi = OMFS_SB(sb);
        int bits_per_entry = 8 * sb->s_blocksize;
-        int map, bit;
+        unsigned int map, bit;
        int ret = 0;
        u64 tmp;
@@ -176,7 +176,8 @@ int omfs_clear_range(struct super_block *sb, u64 block, int count)
        struct omfs_sb_info *sbi = OMFS_SB(sb);
        int bits_per_entry = 8 * sb->s_blocksize;
        u64 tmp;
-        int map, bit, ret;
+        unsigned int map, bit;
+        int ret;
        tmp = block;
        bit = do_div(tmp, bits_per_entry);
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 7e2499053e4d..834b2331f6b3 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -26,6 +26,13 @@ static int omfs_sync_file(struct file *file, struct dentry *dentry,
        return err ? -EIO : 0;
 }
+static u32 omfs_max_extents(struct omfs_sb_info *sbi, int offset)
+{
+        return (sbi->s_sys_blocksize - offset -
+                sizeof(struct omfs_extent)) /
+                sizeof(struct omfs_extent_entry) + 1;
+}
 void omfs_make_empty_table(struct buffer_head *bh, int offset)
 {
        struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset];
@@ -45,6 +52,7 @@ int omfs_shrink_inode(struct inode *inode)
        struct buffer_head *bh;
        u64 next, last;
        u32 extent_count;
+        u32 max_extents;
        int ret;
        /* traverse extent table, freeing each entry that is greater
@@ -62,15 +70,18 @@ int omfs_shrink_inode(struct inode *inode)
                goto out;
        oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
+        max_extents = omfs_max_extents(sbi, OMFS_EXTENT_START);
        for (;;) {
-                if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next)) {
+                if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next))
-                        brelse(bh);
+                        goto out_brelse;
-                        goto out;
-                }
                extent_count = be32_to_cpu(oe->e_extent_count);
+                if (extent_count > max_extents)
+                        goto out_brelse;
                last = next;
                next = be64_to_cpu(oe->e_next);
                entry = &oe->e_entry;
@@ -98,10 +109,14 @@ int omfs_shrink_inode(struct inode *inode)
                if (!bh)
                        goto out;
                oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
+                max_extents = omfs_max_extents(sbi, OMFS_EXTENT_CONT);
        }
        ret = 0;
 out:
        return ret;
+out_brelse:
+        brelse(bh);
+        return ret;
 }
 static void omfs_truncate(struct inode *inode)
@@ -154,9 +169,7 @@ static int omfs_grow_extent(struct inode *inode, struct omfs_extent *oe,
                        goto out;
                }
        }
-        max_count = (sbi->s_sys_blocksize - OMFS_EXTENT_START -
+        max_count = omfs_max_extents(sbi, OMFS_EXTENT_START);
-                sizeof(struct omfs_extent)) /
-                sizeof(struct omfs_extent_entry) + 1;
        /* TODO: add a continuation block here */
        if (be32_to_cpu(oe->e_extent_count) > max_count-1)
@@ -225,6 +238,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
        sector_t next, offset;
        int ret;
        u64 new_block;
+        u32 max_extents;
        int extent_count;
        struct omfs_extent *oe;
        struct omfs_extent_entry *entry;
@@ -238,6 +252,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
                goto out;
        oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
+        max_extents = omfs_max_extents(sbi, OMFS_EXTENT_START);
        next = inode->i_ino;
        for (;;) {
@@ -249,6 +264,9 @@ static int omfs_get_block(struct inode *inode, sector_t block,
                next = be64_to_cpu(oe->e_next);
                entry = &oe->e_entry;
+                if (extent_count > max_extents)
+                        goto out_brelse;
                offset = find_block(inode, entry, block, extent_count, &remain);
                if (offset > 0) {
                        ret = 0;
@@ -266,6 +284,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
                if (!bh)
                        goto out;
                oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
+                max_extents = omfs_max_extents(sbi, OMFS_EXTENT_CONT);
        }
        if (create) {
                ret = omfs_grow_extent(inode, oe, &new_block);
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index a95fe5984f4b..d29047b1b9b0 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -232,8 +232,7 @@ struct inode *omfs_iget(struct super_block *sb, ino_t ino)
                inode->i_mode = S_IFDIR | (S_IRWXUGO & ~sbi->s_dmask);
                inode->i_op = &omfs_dir_inops;
                inode->i_fop = &omfs_dir_operations;
-                inode->i_size = be32_to_cpu(oi->i_head.h_body_size) +
+                inode->i_size = sbi->s_sys_blocksize;
-                        sizeof(struct omfs_header);
                inc_nlink(inode);
                break;
        case OMFS_FILE:
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 79ecd281d2cb..3f87d2632947 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -52,14 +52,14 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
        }
        seq_printf(m,
-                   "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
+                   "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
                   vma->vm_start,
                   vma->vm_end,
                   flags & VM_READ ? 'r' : '-',
                   flags & VM_WRITE ? 'w' : '-',
                   flags & VM_EXEC ? 'x' : '-',
                   flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
-                   vma->vm_pgoff << PAGE_SHIFT,
+                   ((loff_t)vma->vm_pgoff) << PAGE_SHIFT,
                   MAJOR(dev), MINOR(dev), ino, &len);
        if (file) {
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7546a918f790..73d1891ee625 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -219,14 +219,14 @@ static int show_map(struct seq_file *m, void *v)
                ino = inode->i_ino;
        }
-        seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
+        seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
                        vma->vm_start,
                        vma->vm_end,
                        flags & VM_READ ? 'r' : '-',
                        flags & VM_WRITE ? 'w' : '-',
                        flags & VM_EXEC ? 'x' : '-',
                        flags & VM_MAYSHARE ? 's' : 'p',
-                        vma->vm_pgoff << PAGE_SHIFT,
+                        ((loff_t)vma->vm_pgoff) << PAGE_SHIFT,
                        MAJOR(dev), MINOR(dev), ino, &len);
        /*
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index d81fb9ed2b8e..154098157473 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -263,8 +263,8 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
        idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
-        /* And make sure we have twice the index size of space reserved */
+        /* And make sure we have thrice the index size of space reserved */
-        idx_size <<= 1;
+        idx_size = idx_size + (idx_size << 1);
        /*
         * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes'
@@ -388,11 +388,11 @@ static int can_use_rp(struct ubifs_info *c)
 * This function makes sure UBIFS has enough free eraseblocks for index growth
 * and data.
 *
- * When budgeting index space, UBIFS reserves twice as more LEBs as the index
+ * When budgeting index space, UBIFS reserves thrice as many LEBs as the index
 * would take if it was consolidated and written to the flash. This guarantees
 * that the "in-the-gaps" commit method always succeeds and UBIFS will always
 * be able to commit dirty index. So this function basically adds amount of
- * budgeted index space to the size of the current index, multiplies this by 2,
+ * budgeted index space to the size of the current index, multiplies this by 3,
 * and makes sure this does not exceed the amount of free eraseblocks.
 *
 * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables:
@@ -543,8 +543,16 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
        int err, idx_growth, data_growth, dd_growth;
        struct retries_info ri;
+        ubifs_assert(req->new_page <= 1);
+        ubifs_assert(req->dirtied_page <= 1);
+        ubifs_assert(req->new_dent <= 1);
+        ubifs_assert(req->mod_dent <= 1);
+        ubifs_assert(req->new_ino <= 1);
+        ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA);
        ubifs_assert(req->dirtied_ino <= 4);
        ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
+        ubifs_assert(!(req->new_ino_d & 7));
+        ubifs_assert(!(req->dirtied_ino_d & 7));
        data_growth = calc_data_growth(c, req);
        dd_growth = calc_dd_growth(c, req);
@@ -618,8 +626,16 @@ again:
 */
 void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
 {
+        ubifs_assert(req->new_page <= 1);
+        ubifs_assert(req->dirtied_page <= 1);
+        ubifs_assert(req->new_dent <= 1);
+        ubifs_assert(req->mod_dent <= 1);
+        ubifs_assert(req->new_ino <= 1);
+        ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA);
        ubifs_assert(req->dirtied_ino <= 4);
        ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
+        ubifs_assert(!(req->new_ino_d & 7));
+        ubifs_assert(!(req->dirtied_ino_d & 7));
        if (!req->recalculate) {
                ubifs_assert(req->idx_growth >= 0);
                ubifs_assert(req->data_growth >= 0);
@@ -647,7 +663,11 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
        ubifs_assert(c->budg_idx_growth >= 0);
        ubifs_assert(c->budg_data_growth >= 0);
+        ubifs_assert(c->budg_dd_growth >= 0);
        ubifs_assert(c->min_idx_lebs < c->main_lebs);
+        ubifs_assert(!(c->budg_idx_growth & 7));
+        ubifs_assert(!(c->budg_data_growth & 7));
+        ubifs_assert(!(c->budg_dd_growth & 7));
        spin_unlock(&c->space_lock);
 }
@@ -686,9 +706,10 @@ void ubifs_convert_page_budget(struct ubifs_info *c)
 void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
                                      struct ubifs_inode *ui)
 {
-        struct ubifs_budget_req req = {.dd_growth = c->inode_budget,
+        struct ubifs_budget_req req;
-                                       .dirtied_ino_d = ui->data_len};
+        memset(&req, 0, sizeof(struct ubifs_budget_req));
+        req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8);
        ubifs_release_budget(c, &req);
 }
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 3b516316c9b3..0a6aa2cc78f0 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -74,6 +74,7 @@ static int do_commit(struct ubifs_info *c)
                        goto out_up;
        }
+        c->cmt_no += 1;
        err = ubifs_gc_start_commit(c);
        if (err)
                goto out_up;
@@ -115,7 +116,7 @@ static int do_commit(struct ubifs_info *c)
                goto out;
        mutex_lock(&c->mst_mutex);
-        c->mst_node->cmt_no      = cpu_to_le64(++c->cmt_no);
+        c->mst_node->cmt_no      = cpu_to_le64(c->cmt_no);
        c->mst_node->log_lnum    = cpu_to_le32(new_ltail_lnum);
        c->mst_node->root_lnum   = cpu_to_le32(zroot.lnum);
        c->mst_node->root_offs   = cpu_to_le32(zroot.offs);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 4e3aaeba4eca..b9cb77473758 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -568,8 +568,8 @@ void dbg_dump_budget_req(const struct ubifs_budget_req *req)
 void dbg_dump_lstats(const struct ubifs_lp_stats *lst)
 {
        spin_lock(&dbg_lock);
-        printk(KERN_DEBUG "Lprops statistics: empty_lebs %d, idx_lebs  %d\n",
+        printk(KERN_DEBUG "(pid %d) Lprops statistics: empty_lebs %d, "
-               lst->empty_lebs, lst->idx_lebs);
+               "idx_lebs  %d\n", current->pid, lst->empty_lebs, lst->idx_lebs);
        printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, "
               "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free,
               lst->total_dirty);
@@ -587,8 +587,8 @@ void dbg_dump_budg(struct ubifs_info *c)
        struct ubifs_gced_idx_leb *idx_gc;
        spin_lock(&dbg_lock);
-        printk(KERN_DEBUG "Budgeting info: budg_data_growth %lld, "
+        printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, "
-               "budg_dd_growth %lld, budg_idx_growth %lld\n",
+               "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid,
               c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth);
        printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, "
               "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth,
@@ -634,7 +634,7 @@ void dbg_dump_lprops(struct ubifs_info *c)
        struct ubifs_lprops lp;
        struct ubifs_lp_stats lst;
-        printk(KERN_DEBUG "Dumping LEB properties\n");
+        printk(KERN_DEBUG "(pid %d) Dumping LEB properties\n", current->pid);
        ubifs_get_lp_stats(c, &lst);
        dbg_dump_lstats(&lst);
@@ -655,7 +655,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
        if (dbg_failure_mode)
                return;
-        printk(KERN_DEBUG "Dumping LEB %d\n", lnum);
+        printk(KERN_DEBUG "(pid %d) Dumping LEB %d\n", current->pid, lnum);
        sleb = ubifs_scan(c, lnum, 0, c->dbg_buf);
        if (IS_ERR(sleb)) {
@@ -720,8 +720,8 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
 {
        int i;
-        printk(KERN_DEBUG "Dumping heap cat %d (%d elements)\n",
+        printk(KERN_DEBUG "(pid %d) Dumping heap cat %d (%d elements)\n",
-               cat, heap->cnt);
+               current->pid, cat, heap->cnt);
        for (i = 0; i < heap->cnt; i++) {
                struct ubifs_lprops *lprops = heap->arr[i];
@@ -736,7 +736,7 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
 {
        int i;
-        printk(KERN_DEBUG "Dumping pnode:\n");
+        printk(KERN_DEBUG "(pid %d) Dumping pnode:\n", current->pid);
        printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n",
               (size_t)pnode, (size_t)parent, (size_t)pnode->cnext);
        printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n",
@@ -755,7 +755,7 @@ void dbg_dump_tnc(struct ubifs_info *c)
        int level;
        printk(KERN_DEBUG "\n");
-        printk(KERN_DEBUG "Dumping the TNC tree\n");
+        printk(KERN_DEBUG "(pid %d) Dumping the TNC tree\n", current->pid);
        znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
        level = znode->level;
        printk(KERN_DEBUG "== Level %d ==\n", level);
@@ -2208,16 +2208,17 @@ int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
 int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
                  int offset, int len, int dtype)
 {
-        int err;
+        int err, failing;
        if (in_failure_mode(desc))
                return -EIO;
-        if (do_fail(desc, lnum, 1))
+        failing = do_fail(desc, lnum, 1);
+        if (failing)
                cut_data(buf, len);
        err = ubi_leb_write(desc, lnum, buf, offset, len, dtype);
        if (err)
                return err;
-        if (in_failure_mode(desc))
+        if (failing)
                return -EIO;
        return 0;
 }
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 3c4f1e93c9e0..50315fc57185 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -27,7 +27,7 @@
 #define UBIFS_DBG(op) op
-#define ubifs_assert(expr)  do {                                               \
+#define ubifs_assert(expr) do {                                                \
        if (unlikely(!(expr))) {                                               \
                printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \
                       __func__, __LINE__, current->pid);                      \
@@ -73,50 +73,50 @@ const char *dbg_key_str1(const struct ubifs_info *c,
                         const union ubifs_key *key);
 /*
- * DBGKEY macros require dbg_lock to be held, which it is in the dbg message
+ * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message
 * macros.
 */
 #define DBGKEY(key) dbg_key_str0(c, (key))
 #define DBGKEY1(key) dbg_key_str1(c, (key))
 /* General messages */
-#define dbg_gen(fmt, ...)        dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__)
+#define dbg_gen(fmt, ...)   dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__)
 /* Additional journal messages */
-#define dbg_jnl(fmt, ...)        dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__)
+#define dbg_jnl(fmt, ...)   dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__)
 /* Additional TNC messages */
-#define dbg_tnc(fmt, ...)        dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__)
+#define dbg_tnc(fmt, ...)   dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__)
 /* Additional lprops messages */
-#define dbg_lp(fmt, ...)         dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__)
+#define dbg_lp(fmt, ...)    dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__)
 /* Additional LEB find messages */
-#define dbg_find(fmt, ...)       dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__)
+#define dbg_find(fmt, ...)  dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__)
 /* Additional mount messages */
-#define dbg_mnt(fmt, ...)        dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__)
+#define dbg_mnt(fmt, ...)   dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__)
 /* Additional I/O messages */
-#define dbg_io(fmt, ...)         dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__)
+#define dbg_io(fmt, ...)    dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__)
 /* Additional commit messages */
-#define dbg_cmt(fmt, ...)        dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__)
+#define dbg_cmt(fmt, ...)   dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__)
 /* Additional budgeting messages */
-#define dbg_budg(fmt, ...)       dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__)
+#define dbg_budg(fmt, ...)  dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__)
 /* Additional log messages */
-#define dbg_log(fmt, ...)        dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__)
+#define dbg_log(fmt, ...)   dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__)
 /* Additional gc messages */
-#define dbg_gc(fmt, ...)         dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__)
+#define dbg_gc(fmt, ...)    dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__)
 /* Additional scan messages */
-#define dbg_scan(fmt, ...)       dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__)
+#define dbg_scan(fmt, ...)  dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__)
 /* Additional recovery messages */
-#define dbg_rcvry(fmt, ...)      dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__)
+#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__)
 /*
 * Debugging message type flags (must match msg_type_names in debug.c).
@@ -239,34 +239,23 @@ typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
                                 struct ubifs_zbranch *zbr, void *priv);
 typedef int (*dbg_znode_callback)(struct ubifs_info *c,
                                  struct ubifs_znode *znode, void *priv);
 int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
                   dbg_znode_callback znode_cb, void *priv);
 /* Checking functions */
 int dbg_check_lprops(struct ubifs_info *c);
 int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot);
 int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot);
 int dbg_check_cats(struct ubifs_info *c);
 int dbg_check_ltab(struct ubifs_info *c);
 int dbg_check_synced_i_size(struct inode *inode);
 int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir);
 int dbg_check_tnc(struct ubifs_info *c, int extra);
 int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
 int dbg_check_filesystem(struct ubifs_info *c);
 void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
                    int add_pos);
 int dbg_check_lprops(struct ubifs_info *c);
 int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
                        int row, int col);
@@ -329,71 +318,77 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
 #else /* !CONFIG_UBIFS_FS_DEBUG */
 #define UBIFS_DBG(op)
-#define ubifs_assert(expr)                         ({})
-#define ubifs_assert_cmt_locked(c)
+/* Use "if (0)" to make compiler check arguments even if debugging is off */
+#define ubifs_assert(expr)  do {                                               \
+        if (0 && (expr))                                                       \
+                printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \
+                       __func__, __LINE__, current->pid);                      \
+} while (0)
+#define dbg_err(fmt, ...)   do {                                               \
+        if (0)                                                                 \
+                ubifs_err(fmt, ##__VA_ARGS__);                                 \
+} while (0)
+#define dbg_msg(fmt, ...) do {                                                 \
+        if (0)                                                                 \
+                printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n",         \
+                       current->pid, __func__, ##__VA_ARGS__);                 \
+} while (0)
 #define dbg_dump_stack()
-#define dbg_err(fmt, ...)                          ({})
+#define ubifs_assert_cmt_locked(c)
-#define dbg_msg(fmt, ...)                          ({})
-#define dbg_key(c, key, fmt, ...)                  ({})
-#define dbg_gen(fmt, ...)                          ({})
-#define dbg_jnl(fmt, ...)                          ({})
-#define dbg_tnc(fmt, ...)                          ({})
-#define dbg_lp(fmt, ...)                           ({})
-#define dbg_find(fmt, ...)                         ({})
-#define dbg_mnt(fmt, ...)                          ({})
-#define dbg_io(fmt, ...)                           ({})
-#define dbg_cmt(fmt, ...)                          ({})
-#define dbg_budg(fmt, ...)                         ({})
-#define dbg_log(fmt, ...)                          ({})
-#define dbg_gc(fmt, ...)                           ({})
-#define dbg_scan(fmt, ...)                         ({})
-#define dbg_rcvry(fmt, ...)                        ({})
-#define dbg_ntype(type)                            ""
-#define dbg_cstate(cmt_state)                      ""
-#define dbg_get_key_dump(c, key)                   ({})
-#define dbg_dump_inode(c, inode)                   ({})
-#define dbg_dump_node(c, node)                     ({})
-#define dbg_dump_budget_req(req)                   ({})
-#define dbg_dump_lstats(lst)                       ({})
-#define dbg_dump_budg(c)                           ({})
-#define dbg_dump_lprop(c, lp)                      ({})
-#define dbg_dump_lprops(c)                         ({})
-#define dbg_dump_leb(c, lnum)                      ({})
-#define dbg_dump_znode(c, znode)                   ({})
-#define dbg_dump_heap(c, heap, cat)                ({})
-#define dbg_dump_pnode(c, pnode, parent, iip)      ({})
-#define dbg_dump_tnc(c)                            ({})
-#define dbg_dump_index(c)                          ({})
-#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0
+#define dbg_gen(fmt, ...)   dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_jnl(fmt, ...)   dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_tnc(fmt, ...)   dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_lp(fmt, ...)    dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_find(fmt, ...)  dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_mnt(fmt, ...)   dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_io(fmt, ...)    dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_cmt(fmt, ...)   dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_budg(fmt, ...)  dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_log(fmt, ...)   dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_gc(fmt, ...)    dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_scan(fmt, ...)  dbg_msg(fmt, ##__VA_ARGS__)
+#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
+#define DBGKEY(key)  ((char *)(key))
+#define DBGKEY1(key) ((char *)(key))
+#define dbg_ntype(type)                       ""
+#define dbg_cstate(cmt_state)                 ""
+#define dbg_get_key_dump(c, key)              ({})
+#define dbg_dump_inode(c, inode)              ({})
+#define dbg_dump_node(c, node)                ({})
+#define dbg_dump_budget_req(req)              ({})
+#define dbg_dump_lstats(lst)                  ({})
+#define dbg_dump_budg(c)                      ({})
+#define dbg_dump_lprop(c, lp)                 ({})
+#define dbg_dump_lprops(c)                    ({})
+#define dbg_dump_leb(c, lnum)                 ({})
+#define dbg_dump_znode(c, znode)              ({})
+#define dbg_dump_heap(c, heap, cat)           ({})
+#define dbg_dump_pnode(c, pnode, parent, iip) ({})
+#define dbg_dump_tnc(c)                       ({})
+#define dbg_dump_index(c)                     ({})
+#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0
 #define dbg_old_index_check_init(c, zroot)         0
 #define dbg_check_old_index(c, zroot)              0
 #define dbg_check_cats(c)                          0
 #define dbg_check_ltab(c)                          0
 #define dbg_check_synced_i_size(inode)             0
 #define dbg_check_dir_size(c, dir)                 0
 #define dbg_check_tnc(c, x)                        0
 #define dbg_check_idx_size(c, idx_size)            0
 #define dbg_check_filesystem(c)                    0
 #define dbg_check_heap(c, heap, cat, add_pos)      ({})
 #define dbg_check_lprops(c)                        0
 #define dbg_check_lpt_nodes(c, cnode, row, col)    0
 #define dbg_force_in_the_gaps_enabled              0
 #define dbg_force_in_the_gaps()                    0
 #define dbg_failure_mode                           0
 #define dbg_failure_mode_registration(c)           ({})
 #define dbg_failure_mode_deregistration(c)         ({})
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index e90374be7d3b..5c96f1fb7016 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -165,7 +165,6 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
        }
        inode->i_ino = ++c->highest_inum;
-        inode->i_generation = ++c->vfs_gen;
        /*
         * The creation sequence number remains with this inode for its
         * lifetime. All nodes for this inode have a greater sequence number,
@@ -220,15 +219,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
        err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name);
        if (err) {
-                /*
+                if (err == -ENOENT) {
-                 * Do not hash the direntry if parent 'i_nlink' is zero, because
-                 * this has side-effects - '->delete_inode()' call will not be
-                 * called for the parent orphan inode, because 'd_count' of its
-                 * direntry will stay 1 (it'll be negative direntry I guess)
-                 * and prevent 'iput_final()' until the dentry is destroyed due
-                 * to unmount or memory pressure.
-                 */
-                if (err == -ENOENT && dir->i_nlink != 0) {
                        dbg_gen("not found");
                        goto done;
                }
@@ -525,7 +516,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
        struct ubifs_inode *dir_ui = ubifs_inode(dir);
        int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
        struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2,
-                                        .dirtied_ino_d = ui->data_len };
+                                .dirtied_ino_d = ALIGN(ui->data_len, 8) };
        /*
         * Budget request settings: new direntry, changing the target inode,
@@ -727,8 +718,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        struct ubifs_inode *dir_ui = ubifs_inode(dir);
        struct ubifs_info *c = dir->i_sb->s_fs_info;
        int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
-        struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
+        struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 };
-                                        .dirtied_ino_d = 1 };
        /*
         * Budget request settings: new inode, new direntry and changing parent
@@ -789,7 +779,8 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
        int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
        int err, devlen = 0;
        struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
-                                        .new_ino_d = devlen, .dirtied_ino = 1 };
+                                        .new_ino_d = ALIGN(devlen, 8),
+                                        .dirtied_ino = 1 };
        /*
         * Budget request settings: new inode, new direntry and changing parent
@@ -863,7 +854,8 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
        int err, len = strlen(symname);
        int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
        struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
-                                        .new_ino_d = len, .dirtied_ino = 1 };
+                                        .new_ino_d = ALIGN(len, 8),
+                                        .dirtied_ino = 1 };
        /*
         * Budget request settings: new inode, new direntry and changing parent
@@ -1012,7 +1004,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1,
                                        .dirtied_ino = 3 };
        struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
-                                .dirtied_ino_d = old_inode_ui->data_len };
+                        .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
        struct timespec time;
        /*
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 8565e586e533..4071d1cae29f 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -890,7 +890,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
        loff_t new_size = attr->ia_size;
        struct ubifs_inode *ui = ubifs_inode(inode);
        struct ubifs_budget_req req = { .dirtied_ino = 1,
-                                        .dirtied_ino_d = ui->data_len };
+                                .dirtied_ino_d = ALIGN(ui->data_len, 8) };
        err = ubifs_budget_space(c, &req);
        if (err)
@@ -941,7 +941,8 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
        struct inode *inode = dentry->d_inode;
        struct ubifs_info *c = inode->i_sb->s_fs_info;
-        dbg_gen("ino %lu, ia_valid %#x", inode->i_ino, attr->ia_valid);
+        dbg_gen("ino %lu, mode %#x, ia_valid %#x",
+                inode->i_ino, inode->i_mode, attr->ia_valid);
        err = inode_change_ok(inode, attr);
        if (err)
                return err;
@@ -1051,7 +1052,7 @@ static int update_mctime(struct ubifs_info *c, struct inode *inode)
        if (mctime_update_needed(inode, &now)) {
                int err, release;
                struct ubifs_budget_req req = { .dirtied_ino = 1,
-                                                .dirtied_ino_d = ui->data_len };
+                                .dirtied_ino_d = ALIGN(ui->data_len, 8) };
                err = ubifs_budget_space(c, &req);
                if (err)
@@ -1270,6 +1271,7 @@ struct file_operations ubifs_file_operations = {
        .fsync          = ubifs_fsync,
        .unlocked_ioctl = ubifs_ioctl,
        .splice_read    = generic_file_splice_read,
+        .splice_write   = generic_file_splice_write,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = ubifs_compat_ioctl,
 #endif
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 10394c548367..adee7b5ddeab 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -290,9 +290,14 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
                idx_lp = idx_heap->arr[0];
                sum = idx_lp->free + idx_lp->dirty;
                /*
-                 * Since we reserve twice as more space for the index than it
+                 * Since we reserve thrice as much space for the index than it
                 * actually takes, it does not make sense to pick indexing LEBs
-                 * with less than half LEB of dirty space.
+                 * with less than, say, half LEB of dirty space. May be half is
+                 * not the optimal boundary - this should be tested and
+                 * checked. This boundary should determine how much we use
+                 * in-the-gaps to consolidate the index comparing to how much
+                 * we use garbage collector to consolidate it. The "half"
+                 * criteria just feels to be fine.
                 */
                if (sum < min_space || sum < c->half_leb_size)
                        idx_lp = NULL;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 3374f91b6709..054363f2b207 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -54,6 +54,20 @@
 #include "ubifs.h"
 /**
+ * ubifs_ro_mode - switch UBIFS to read read-only mode.
+ * @c: UBIFS file-system description object
+ * @err: error code which is the reason of switching to R/O mode
+ */
+void ubifs_ro_mode(struct ubifs_info *c, int err)
+{
+        if (!c->ro_media) {
+                c->ro_media = 1;
+                ubifs_warn("switched to read-only mode, error %d", err);
+                dbg_dump_stack();
+        }
+}
+/**
 * ubifs_check_node - check node.
 * @c: UBIFS file-system description object
 * @buf: node to check
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 283155abe5f5..22993f867d19 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -447,13 +447,11 @@ static int get_dent_type(int mode)
 * @ino: buffer in which to pack inode node
 * @inode: inode to pack
 * @last: indicates the last node of the group
- * @last_reference: non-zero if this is a deletion inode
 */
 static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino,
-                       const struct inode *inode, int last,
+                       const struct inode *inode, int last)
-                       int last_reference)
 {
-        int data_len = 0;
+        int data_len = 0, last_reference = !inode->i_nlink;
        struct ubifs_inode *ui = ubifs_inode(inode);
        ino->ch.node_type = UBIFS_INO_NODE;
@@ -596,9 +594,9 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
        ubifs_prep_grp_node(c, dent, dlen, 0);
        ino = (void *)dent + aligned_dlen;
-        pack_inode(c, ino, inode, 0, last_reference);
+        pack_inode(c, ino, inode, 0);
        ino = (void *)ino + aligned_ilen;
-        pack_inode(c, ino, dir, 1, 0);
+        pack_inode(c, ino, dir, 1);
        if (last_reference) {
                err = ubifs_add_orphan(c, inode->i_ino);
@@ -606,6 +604,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
                        release_head(c, BASEHD);
                        goto out_finish;
                }
+                ui->del_cmtno = c->cmt_no;
        }
        err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync);
@@ -750,30 +749,25 @@ out_free:
 * ubifs_jnl_write_inode - flush inode to the journal.
 * @c: UBIFS file-system description object
 * @inode: inode to flush
- * @deletion: inode has been deleted
 *
 * This function writes inode @inode to the journal. If the inode is
 * synchronous, it also synchronizes the write-buffer. Returns zero in case of
 * success and a negative error code in case of failure.
 */
-int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
+int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
-                          int deletion)
 {
-        int err, len, lnum, offs, sync = 0;
+        int err, lnum, offs;
        struct ubifs_ino_node *ino;
        struct ubifs_inode *ui = ubifs_inode(inode);
+        int sync = 0, len = UBIFS_INO_NODE_SZ, last_reference = !inode->i_nlink;
-        dbg_jnl("ino %lu%s", inode->i_ino,
+        dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink);
-                deletion ? " (last reference)" : "");
-        if (deletion)
-                ubifs_assert(inode->i_nlink == 0);
-        len = UBIFS_INO_NODE_SZ;
        /*
         * If the inode is being deleted, do not write the attached data. No
         * need to synchronize the write-buffer either.
         */
-        if (!deletion) {
+        if (!last_reference) {
                len += ui->data_len;
                sync = IS_SYNC(inode);
        }
@@ -786,7 +780,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
        if (err)
                goto out_free;
-        pack_inode(c, ino, inode, 1, deletion);
+        pack_inode(c, ino, inode, 1);
        err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync);
        if (err)
                goto out_release;
@@ -795,7 +789,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
                                          inode->i_ino);
        release_head(c, BASEHD);
-        if (deletion) {
+        if (last_reference) {
                err = ubifs_tnc_remove_ino(c, inode->i_ino);
                if (err)
                        goto out_ro;
@@ -828,6 +822,65 @@ out_free:
 }
 /**
+ * ubifs_jnl_delete_inode - delete an inode.
+ * @c: UBIFS file-system description object
+ * @inode: inode to delete
+ *
+ * This function deletes inode @inode which includes removing it from orphans,
+ * deleting it from TNC and, in some cases, writing a deletion inode to the
+ * journal.
+ *
+ * When regular file inodes are unlinked or a directory inode is removed, the
+ * 'ubifs_jnl_update()' function writes a corresponding deletion inode and
+ * direntry to the media, and adds the inode to orphans. After this, when the
+ * last reference to this inode has been dropped, this function is called. In
+ * general, it has to write one more deletion inode to the media, because if
+ * a commit happened between 'ubifs_jnl_update()' and
+ * 'ubifs_jnl_delete_inode()', the deletion inode is not in the journal
+ * anymore, and in fact it might not be on the flash anymore, because it might
+ * have been garbage-collected already. And for optimization reasons UBIFS does
+ * not read the orphan area if it has been unmounted cleanly, so it would have
+ * no indication in the journal that there is a deleted inode which has to be
+ * removed from TNC.
+ *
+ * However, if there was no commit between 'ubifs_jnl_update()' and
+ * 'ubifs_jnl_delete_inode()', then there is no need to write the deletion
+ * inode to the media for the second time. And this is quite a typical case.
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode)
+{
+        int err;
+        struct ubifs_inode *ui = ubifs_inode(inode);
+        ubifs_assert(inode->i_nlink == 0);
+        if (ui->del_cmtno != c->cmt_no)
+                /* A commit happened for sure */
+                return ubifs_jnl_write_inode(c, inode);
+        down_read(&c->commit_sem);
+        /*
+         * Check commit number again, because the first test has been done
+         * without @c->commit_sem, so a commit might have happened.
+         */
+        if (ui->del_cmtno != c->cmt_no) {
+                up_read(&c->commit_sem);
+                return ubifs_jnl_write_inode(c, inode);
+        }
+        err = ubifs_tnc_remove_ino(c, inode->i_ino);
+        if (err)
+                ubifs_ro_mode(c, err);
+        else
+                ubifs_delete_orphan(c, inode->i_ino);
+        up_read(&c->commit_sem);
+        return err;
+}
+/**
 * ubifs_jnl_rename - rename a directory entry.
 * @c: UBIFS file-system description object
 * @old_dir: parent inode of directory entry to rename
@@ -917,16 +970,16 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
        p = (void *)dent2 + aligned_dlen2;
        if (new_inode) {
-                pack_inode(c, p, new_inode, 0, last_reference);
+                pack_inode(c, p, new_inode, 0);
                p += ALIGN(ilen, 8);
        }
        if (!move)
-                pack_inode(c, p, old_dir, 1, 0);
+                pack_inode(c, p, old_dir, 1);
        else {
-                pack_inode(c, p, old_dir, 0, 0);
+                pack_inode(c, p, old_dir, 0);
                p += ALIGN(plen, 8);
-                pack_inode(c, p, new_dir, 1, 0);
+                pack_inode(c, p, new_dir, 1);
        }
        if (last_reference) {
@@ -935,6 +988,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
                        release_head(c, BASEHD);
                        goto out_finish;
                }
+                new_ui->del_cmtno = c->cmt_no;
        }
        err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync);
@@ -1131,7 +1185,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
        if (err)
                goto out_free;
-        pack_inode(c, ino, inode, 0, 0);
+        pack_inode(c, ino, inode, 0);
        ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1);
        if (dlen)
                ubifs_prep_grp_node(c, dn, dlen, 1);
@@ -1251,9 +1305,9 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
        ubifs_prep_grp_node(c, xent, xlen, 0);
        ino = (void *)xent + aligned_xlen;
-        pack_inode(c, ino, inode, 0, 1);
+        pack_inode(c, ino, inode, 0);
        ino = (void *)ino + UBIFS_INO_NODE_SZ;
-        pack_inode(c, ino, host, 1, 0);
+        pack_inode(c, ino, host, 1);
        err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync);
        if (!sync && !err)
@@ -1320,7 +1374,7 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
                           const struct inode *host)
 {
        int err, len1, len2, aligned_len, aligned_len1, lnum, offs;
-        struct ubifs_inode *host_ui = ubifs_inode(inode);
+        struct ubifs_inode *host_ui = ubifs_inode(host);
        struct ubifs_ino_node *ino;
        union ubifs_key key;
        int sync = IS_DIRSYNC(host);
@@ -1344,8 +1398,8 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
        if (err)
                goto out_free;
-        pack_inode(c, ino, host, 0, 0);
+        pack_inode(c, ino, host, 0);
-        pack_inode(c, (void *)ino + aligned_len1, inode, 1, 0);
+        pack_inode(c, (void *)ino + aligned_len1, inode, 1);
        err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0);
        if (!sync && !err) {
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 36857b9ed59e..3e0aa7367556 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -317,6 +317,8 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
        return 0;
 out_unlock:
+        if (err != -EAGAIN)
+                ubifs_ro_mode(c, err);
        mutex_unlock(&c->log_mutex);
        kfree(ref);
        kfree(bud);
@@ -410,7 +412,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
                return -ENOMEM;
        cs->ch.node_type = UBIFS_CS_NODE;
-        cs->cmt_no = cpu_to_le64(c->cmt_no + 1);
+        cs->cmt_no = cpu_to_le64(c->cmt_no);
        ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0);
        /*
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 4beccfc256d2..87dabf9fe742 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -80,20 +80,6 @@ static inline struct ubifs_inode *ubifs_inode(const struct inode *inode)
 }
 /**
- * ubifs_ro_mode - switch UBIFS to read read-only mode.
- * @c: UBIFS file-system description object
- * @err: error code which is the reason of switching to R/O mode
- */
-static inline void ubifs_ro_mode(struct ubifs_info *c, int err)
-{
-        if (!c->ro_media) {
-                c->ro_media = 1;
-                ubifs_warn("switched to read-only mode, error %d", err);
-                dbg_dump_stack();
-        }
-}
-/**
 * ubifs_compr_present - check if compressor was compiled in.
 * @compr_type: compressor type to check
 *
@@ -322,7 +308,7 @@ static inline long long ubifs_reported_space(const struct ubifs_info *c,
 {
        int divisor, factor;
-        divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz << 1);
+        divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz * 3);
        factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ;
        do_div(free, divisor);
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 3afeb9242c6a..02d3462f4d3e 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -310,10 +310,10 @@ static int write_orph_node(struct ubifs_info *c, int atomic)
        c->cmt_orphans -= cnt;
        spin_unlock(&c->orphan_lock);
        if (c->cmt_orphans)
-                orph->cmt_no = cpu_to_le64(c->cmt_no + 1);
+                orph->cmt_no = cpu_to_le64(c->cmt_no);
        else
                /* Mark the last node of the commit */
-                orph->cmt_no = cpu_to_le64((c->cmt_no + 1) | (1ULL << 63));
+                orph->cmt_no = cpu_to_le64((c->cmt_no) | (1ULL << 63));
        ubifs_assert(c->ohead_offs + len <= c->leb_size);
        ubifs_assert(c->ohead_lnum >= c->orph_first);
        ubifs_assert(c->ohead_lnum <= c->orph_last);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index ca1e2d4e03cc..f71e6b8822c4 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -30,7 +30,6 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
-#include <linux/random.h>
 #include <linux/kthread.h>
 #include <linux/parser.h>
 #include <linux/seq_file.h>
@@ -149,7 +148,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
        if (err)
                goto out_invalid;
-        /* Disable readahead */
+        /* Disable read-ahead */
        inode->i_mapping->backing_dev_info = &c->bdi;
        switch (inode->i_mode & S_IFMT) {
@@ -278,7 +277,7 @@ static void ubifs_destroy_inode(struct inode *inode)
 */
 static int ubifs_write_inode(struct inode *inode, int wait)
 {
-        int err;
+        int err = 0;
        struct ubifs_info *c = inode->i_sb->s_fs_info;
        struct ubifs_inode *ui = ubifs_inode(inode);
@@ -299,10 +298,18 @@ static int ubifs_write_inode(struct inode *inode, int wait)
                return 0;
        }
-        dbg_gen("inode %lu", inode->i_ino);
+        /*
-        err = ubifs_jnl_write_inode(c, inode, 0);
+         * As an optimization, do not write orphan inodes to the media just
-        if (err)
+         * because this is not needed.
-                ubifs_err("can't write inode %lu, error %d", inode->i_ino, err);
+         */
+        dbg_gen("inode %lu, mode %#x, nlink %u",
+                inode->i_ino, (int)inode->i_mode, inode->i_nlink);
+        if (inode->i_nlink) {
+                err = ubifs_jnl_write_inode(c, inode);
+                if (err)
+                        ubifs_err("can't write inode %lu, error %d",
+                                  inode->i_ino, err);
+        }
        ui->dirty = 0;
        mutex_unlock(&ui->ui_mutex);
@@ -314,8 +321,9 @@ static void ubifs_delete_inode(struct inode *inode)
 {
        int err;
        struct ubifs_info *c = inode->i_sb->s_fs_info;
+        struct ubifs_inode *ui = ubifs_inode(inode);
-        if (ubifs_inode(inode)->xattr)
+        if (ui->xattr)
                /*
                 * Extended attribute inode deletions are fully handled in
                 * 'ubifs_removexattr()'. These inodes are special and have
@@ -323,7 +331,7 @@ static void ubifs_delete_inode(struct inode *inode)
                 */
                goto out;
-        dbg_gen("inode %lu", inode->i_ino);
+        dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode);
        ubifs_assert(!atomic_read(&inode->i_count));
        ubifs_assert(inode->i_nlink == 0);
@@ -331,15 +339,19 @@ static void ubifs_delete_inode(struct inode *inode)
        if (is_bad_inode(inode))
                goto out;
-        ubifs_inode(inode)->ui_size = inode->i_size = 0;
+        ui->ui_size = inode->i_size = 0;
-        err = ubifs_jnl_write_inode(c, inode, 1);
+        err = ubifs_jnl_delete_inode(c, inode);
        if (err)
                /*
                 * Worst case we have a lost orphan inode wasting space, so a
-                 * simple error message is ok here.
+                 * simple error message is OK here.
                 */
-                ubifs_err("can't write inode %lu, error %d", inode->i_ino, err);
+                ubifs_err("can't delete inode %lu, error %d",
+                          inode->i_ino, err);
 out:
+        if (ui->dirty)
+                ubifs_release_dirty_inode_budget(c, ui);
        clear_inode(inode);
 }
@@ -1122,8 +1134,8 @@ static int mount_ubifs(struct ubifs_info *c)
        if (err)
                goto out_infos;
-        ubifs_msg("mounted UBI device %d, volume %d", c->vi.ubi_num,
+        ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"",
-                  c->vi.vol_id);
+                  c->vi.ubi_num, c->vi.vol_id, c->vi.name);
        if (mounted_read_only)
                ubifs_msg("mounted read-only");
        x = (long long)c->main_lebs * c->leb_size;
@@ -1469,6 +1481,7 @@ static void ubifs_put_super(struct super_block *sb)
         */
        ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0);
        ubifs_assert(c->budg_idx_growth == 0);
+        ubifs_assert(c->budg_dd_growth == 0);
        ubifs_assert(c->budg_data_growth == 0);
        /*
@@ -1657,7 +1670,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
        INIT_LIST_HEAD(&c->orph_new);
        c->highest_inum = UBIFS_FIRST_INO;
-        get_random_bytes(&c->vfs_gen, sizeof(int));
        c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
        ubi_get_volume_info(ubi, &c->vi);
@@ -1671,10 +1683,10 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
        }
        /*
-         * UBIFS provids 'backing_dev_info' in order to disable readahead. For
+         * UBIFS provides 'backing_dev_info' in order to disable read-ahead. For
         * UBIFS, I/O is not deferred, it is done immediately in readpage,
         * which means the user would have to wait not just for their own I/O
-         * but the readahead I/O as well i.e. completely pointless.
+         * but the read-ahead I/O as well i.e. completely pointless.
         *
         * Read-ahead will be disabled because @c->bdi.ra_pages is 0.
         */
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 8117e65ba2e9..8ac76b1c2d55 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -372,26 +372,25 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
                written = layout_leb_in_gaps(c, p);
                if (written < 0) {
                        err = written;
-                        if (err == -ENOSPC) {
+                        if (err != -ENOSPC) {
-                                if (!dbg_force_in_the_gaps_enabled) {
+                                kfree(c->gap_lebs);
-                                        /*
+                                c->gap_lebs = NULL;
-                                         * Do not print scary warnings if the
+                                return err;
-                                         * debugging option which forces
-                                         * in-the-gaps is enabled.
-                                         */
-                                        ubifs_err("out of space");
-                                        spin_lock(&c->space_lock);
-                                        dbg_dump_budg(c);
-                                        spin_unlock(&c->space_lock);
-                                        dbg_dump_lprops(c);
-                                }
-                                /* Try to commit anyway */
-                                err = 0;
-                                break;
                        }
-                        kfree(c->gap_lebs);
+                        if (!dbg_force_in_the_gaps_enabled) {
-                        c->gap_lebs = NULL;
+                                /*
-                        return err;
+                                 * Do not print scary warnings if the debugging
+                                 * option which forces in-the-gaps is enabled.
+                                 */
+                                ubifs_err("out of space");
+                                spin_lock(&c->space_lock);
+                                dbg_dump_budg(c);
+                                spin_unlock(&c->space_lock);
+                                dbg_dump_lprops(c);
+                        }
+                        /* Try to commit anyway */
+                        err = 0;
+                        break;
                }
                p++;
                cnt -= written;
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index 0cc7da9bed47..bd2121f3426e 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -228,10 +228,10 @@ enum {
 /* Minimum number of orphan area logical eraseblocks */
 #define UBIFS_MIN_ORPH_LEBS 1
 /*
- * Minimum number of main area logical eraseblocks (buds, 2 for the index, 1
+ * Minimum number of main area logical eraseblocks (buds, 3 for the index, 1
 * for GC, 1 for deletions, and at least 1 for committed data).
 */
-#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 5)
+#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 6)
 /* Minimum number of logical eraseblocks */
 #define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index e4f89f271827..d7f706f7a302 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -20,8 +20,6 @@
 *          Adrian Hunter
 */
-/* Implementation version 0.7 */
 #ifndef __UBIFS_H__
 #define __UBIFS_H__
@@ -322,6 +320,8 @@ struct ubifs_gced_idx_leb {
 * struct ubifs_inode - UBIFS in-memory inode description.
 * @vfs_inode: VFS inode description object
 * @creat_sqnum: sequence number at time of creation
+ * @del_cmtno: commit number corresponding to the time the inode was deleted,
+ *             protected by @c->commit_sem;
 * @xattr_size: summarized size of all extended attributes in bytes
 * @xattr_cnt: count of extended attributes this inode has
 * @xattr_names: sum of lengths of all extended attribute names belonging to
@@ -373,6 +373,7 @@ struct ubifs_gced_idx_leb {
 struct ubifs_inode {
        struct inode vfs_inode;
        unsigned long long creat_sqnum;
+        unsigned long long del_cmtno;
        unsigned int xattr_size;
        unsigned int xattr_cnt;
        unsigned int xattr_names;
@@ -779,7 +780,7 @@ struct ubifs_compressor {
 /**
 * struct ubifs_budget_req - budget requirements of an operation.
 *
- * @fast: non-zero if the budgeting should try to aquire budget quickly and
+ * @fast: non-zero if the budgeting should try to acquire budget quickly and
 *        should not try to call write-back
 * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields
 *               have to be re-calculated
@@ -805,21 +806,31 @@ struct ubifs_compressor {
 * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d
 * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made
 * dirty by the re-name operation.
+ *
+ * Note, UBIFS aligns node lengths to 8-bytes boundary, so the requester has to
+ * make sure the amount of inode data which contribute to @new_ino_d and
+ * @dirtied_ino_d fields are aligned.
 */
 struct ubifs_budget_req {
        unsigned int fast:1;
        unsigned int recalculate:1;
+#ifndef UBIFS_DEBUG
        unsigned int new_page:1;
        unsigned int dirtied_page:1;
        unsigned int new_dent:1;
        unsigned int mod_dent:1;
        unsigned int new_ino:1;
        unsigned int new_ino_d:13;
-#ifndef UBIFS_DEBUG
        unsigned int dirtied_ino:4;
        unsigned int dirtied_ino_d:15;
 #else
        /* Not bit-fields to check for overflows */
+        unsigned int new_page;
+        unsigned int dirtied_page;
+        unsigned int new_dent;
+        unsigned int mod_dent;
+        unsigned int new_ino;
+        unsigned int new_ino_d;
        unsigned int dirtied_ino;
        unsigned int dirtied_ino_d;
 #endif
@@ -860,13 +871,13 @@ struct ubifs_mount_opts {
 * struct ubifs_info - UBIFS file-system description data structure
 * (per-superblock).
 * @vfs_sb: VFS @struct super_block object
- * @bdi: backing device info object to make VFS happy and disable readahead
+ * @bdi: backing device info object to make VFS happy and disable read-ahead
 *
 * @highest_inum: highest used inode number
- * @vfs_gen: VFS inode generation counter
 * @max_sqnum: current global sequence number
- * @cmt_no: commit number (last successfully completed commit)
+ * @cmt_no: commit number of the last successfully completed commit, protected
- * @cnt_lock: protects @highest_inum, @vfs_gen, and @max_sqnum counters
+ *          by @commit_sem
+ * @cnt_lock: protects @highest_inum and @max_sqnum counters
 * @fmt_version: UBIFS on-flash format version
 * @uuid: UUID from super block
 *
@@ -1103,7 +1114,6 @@ struct ubifs_info {
        struct backing_dev_info bdi;
        ino_t highest_inum;
-        unsigned int vfs_gen;
        unsigned long long max_sqnum;
        unsigned long long cmt_no;
        spinlock_t cnt_lock;
@@ -1346,6 +1356,7 @@ extern struct backing_dev_info ubifs_backing_dev_info;
 extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
 /* io.c */
+void ubifs_ro_mode(struct ubifs_info *c, int err);
 int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len);
 int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
                           int dtype);
@@ -1399,8 +1410,8 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
                     int deletion, int xent);
 int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
                         const union ubifs_key *key, const void *buf, int len);
-int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
+int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode);
-                          int last_reference);
+int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode);
 int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
                     const struct dentry *old_dentry,
                     const struct inode *new_dir,
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 1388a078e1a9..649bec78b645 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -61,7 +61,7 @@
 /*
 * Limit the number of extended attributes per inode so that the total size
- * (xattr_size) is guaranteeded to fit in an 'unsigned int'.
+ * (@xattr_size) is guaranteeded to fit in an 'unsigned int'.
 */
 #define MAX_XATTRS_PER_INODE 65535
@@ -103,14 +103,14 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
        struct inode *inode;
        struct ubifs_inode *ui, *host_ui = ubifs_inode(host);
        struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
-                                        .new_ino_d = size, .dirtied_ino = 1,
+                                .new_ino_d = ALIGN(size, 8), .dirtied_ino = 1,
-                                        .dirtied_ino_d = host_ui->data_len};
+                                .dirtied_ino_d = ALIGN(host_ui->data_len, 8) };
        if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE)
                return -ENOSPC;
        /*
         * Linux limits the maximum size of the extended attribute names list
-         * to %XATTR_LIST_MAX. This means we should not allow creating more*
+         * to %XATTR_LIST_MAX. This means we should not allow creating more
         * extended attributes if the name list becomes larger. This limitation
         * is artificial for UBIFS, though.
         */
@@ -128,7 +128,6 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
                goto out_budg;
        }
-        mutex_lock(&host_ui->ui_mutex);
        /* Re-define all operations to be "nothing" */
        inode->i_mapping->a_ops = &none_address_operations;
        inode->i_op = &none_inode_operations;
@@ -141,23 +140,19 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
        ui->data = kmalloc(size, GFP_NOFS);
        if (!ui->data) {
                err = -ENOMEM;
-                goto out_unlock;
+                goto out_free;
        }
        memcpy(ui->data, value, size);
+        inode->i_size = ui->ui_size = size;
+        ui->data_len = size;
+        mutex_lock(&host_ui->ui_mutex);
        host->i_ctime = ubifs_current_time(host);
        host_ui->xattr_cnt += 1;
        host_ui->xattr_size += CALC_DENT_SIZE(nm->len);
        host_ui->xattr_size += CALC_XATTR_BYTES(size);
        host_ui->xattr_names += nm->len;
-        /*
-         * We do not use i_size_write() because nobody can race with us as we
-         * are holding host @host->i_mutex - every xattr operation for this
-         * inode is serialized by it.
-         */
-        inode->i_size = ui->ui_size = size;
-        ui->data_len = size;
        err = ubifs_jnl_update(c, host, nm, inode, 0, 1);
        if (err)
                goto out_cancel;
@@ -172,8 +167,8 @@ out_cancel:
        host_ui->xattr_cnt -= 1;
        host_ui->xattr_size -= CALC_DENT_SIZE(nm->len);
        host_ui->xattr_size -= CALC_XATTR_BYTES(size);
-out_unlock:
        mutex_unlock(&host_ui->ui_mutex);
+out_free:
        make_bad_inode(inode);
        iput(inode);
 out_budg:
@@ -200,29 +195,28 @@ static int change_xattr(struct ubifs_info *c, struct inode *host,
        struct ubifs_inode *host_ui = ubifs_inode(host);
        struct ubifs_inode *ui = ubifs_inode(inode);
        struct ubifs_budget_req req = { .dirtied_ino = 2,
-                                .dirtied_ino_d = size + host_ui->data_len };
+                .dirtied_ino_d = ALIGN(size, 8) + ALIGN(host_ui->data_len, 8) };
        ubifs_assert(ui->data_len == inode->i_size);
        err = ubifs_budget_space(c, &req);
        if (err)
                return err;
-        mutex_lock(&host_ui->ui_mutex);
-        host->i_ctime = ubifs_current_time(host);
-        host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
-        host_ui->xattr_size += CALC_XATTR_BYTES(size);
        kfree(ui->data);
        ui->data = kmalloc(size, GFP_NOFS);
        if (!ui->data) {
                err = -ENOMEM;
-                goto out_unlock;
+                goto out_free;
        }
        memcpy(ui->data, value, size);
        inode->i_size = ui->ui_size = size;
        ui->data_len = size;
+        mutex_lock(&host_ui->ui_mutex);
+        host->i_ctime = ubifs_current_time(host);
+        host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
+        host_ui->xattr_size += CALC_XATTR_BYTES(size);
        /*
         * It is important to write the host inode after the xattr inode
         * because if the host inode gets synchronized (via 'fsync()'), then
@@ -240,9 +234,9 @@ static int change_xattr(struct ubifs_info *c, struct inode *host,
 out_cancel:
        host_ui->xattr_size -= CALC_XATTR_BYTES(size);
        host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len);
-        make_bad_inode(inode);
-out_unlock:
        mutex_unlock(&host_ui->ui_mutex);
+        make_bad_inode(inode);
+out_free:
        ubifs_release_budget(c, &req);
        return err;
 }
@@ -312,6 +306,7 @@ int ubifs_setxattr(struct dentry *dentry, const char *name,
        dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name,
                host->i_ino, dentry->d_name.len, dentry->d_name.name, size);
+        ubifs_assert(mutex_is_locked(&host->i_mutex));
        if (size > UBIFS_MAX_INO_DATA)
                return -ERANGE;
@@ -384,7 +379,6 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
        if (!xent)
                return -ENOMEM;
-        mutex_lock(&host->i_mutex);
        xent_key_init(c, &key, host->i_ino, &nm);
        err = ubifs_tnc_lookup_nm(c, &key, xent, &nm);
        if (err) {
@@ -419,7 +413,6 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
 out_iput:
        iput(inode);
 out_unlock:
-        mutex_unlock(&host->i_mutex);
        kfree(xent);
        return err;
 }
@@ -449,8 +442,6 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
                return -ERANGE;
        lowest_xent_key(c, &key, host->i_ino);
-        mutex_lock(&host->i_mutex);
        while (1) {
                int type;
@@ -479,7 +470,6 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
                pxent = xent;
                key_read(c, &xent->key, &key);
        }
-        mutex_unlock(&host->i_mutex);
        kfree(pxent);
        if (err != -ENOENT) {
@@ -497,8 +487,8 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host,
        int err;
        struct ubifs_inode *host_ui = ubifs_inode(host);
        struct ubifs_inode *ui = ubifs_inode(inode);
-        struct ubifs_budget_req req = { .dirtied_ino = 1, .mod_dent = 1,
+        struct ubifs_budget_req req = { .dirtied_ino = 2, .mod_dent = 1,
-                                        .dirtied_ino_d = host_ui->data_len };
+                                .dirtied_ino_d = ALIGN(host_ui->data_len, 8) };
        ubifs_assert(ui->data_len == inode->i_size);
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index cdc2d3464a1a..2813cdd72375 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -18,7 +18,6 @@
 #ifndef __XFS_DMAPI_H__
 #define __XFS_DMAPI_H__
-#include <linux/version.h>
 /*      Values used to define the on-disk version of dm_attrname_t. All
 *      on-disk attribute names start with the 8-byte string "SGI_DMI_".
 *