94 files changed, 1883 insertions, 1028 deletions
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 42dd2e499ed8..35de0c04729f 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -55,13 +55,13 @@ static int __init afs_get_client_UUID(void)
        afs_uuid.time_low = uuidtime;
        afs_uuid.time_mid = uuidtime >> 32;
        afs_uuid.time_hi_and_version = (uuidtime >> 48) & AFS_UUID_TIMEHI_MASK;
-        afs_uuid.time_hi_and_version = AFS_UUID_VERSION_TIME;
+        afs_uuid.time_hi_and_version |= AFS_UUID_VERSION_TIME;
        get_random_bytes(&clockseq, 2);
        afs_uuid.clock_seq_low = clockseq;
        afs_uuid.clock_seq_hi_and_reserved =
                (clockseq >> 8) & AFS_UUID_CLOCKHI_MASK;
-        afs_uuid.clock_seq_hi_and_reserved = AFS_UUID_VARIANT_STD;
+        afs_uuid.clock_seq_hi_and_reserved |= AFS_UUID_VARIANT_STD;
        _debug("AFS UUID: %08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
               afs_uuid.time_low,
diff --git a/fs/aio.c b/fs/aio.c
index 955947ef3e02..bd7ec2cc2674 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -506,6 +506,8 @@ static void free_ioctx(struct work_struct *work)
        aio_free_ring(ctx);
        free_percpu(ctx->cpu);
+        percpu_ref_exit(&ctx->reqs);
+        percpu_ref_exit(&ctx->users);
        kmem_cache_free(kioctx_cachep, ctx);
 }
@@ -715,8 +717,8 @@ err_ctx:
 err:
        mutex_unlock(&ctx->ring_lock);
        free_percpu(ctx->cpu);
-        free_percpu(ctx->reqs.pcpu_count);
+        percpu_ref_exit(&ctx->reqs);
-        free_percpu(ctx->users.pcpu_count);
+        percpu_ref_exit(&ctx->users);
        kmem_cache_free(kioctx_cachep, ctx);
        pr_debug("error allocating ioctx %d\n", err);
        return ERR_PTR(err);
@@ -830,16 +832,20 @@ void exit_aio(struct mm_struct *mm)
 static void put_reqs_available(struct kioctx *ctx, unsigned nr)
 {
        struct kioctx_cpu *kcpu;
+        unsigned long flags;
        preempt_disable();
        kcpu = this_cpu_ptr(ctx->cpu);
+        local_irq_save(flags);
        kcpu->reqs_available += nr;
        while (kcpu->reqs_available >= ctx->req_batch * 2) {
                kcpu->reqs_available -= ctx->req_batch;
                atomic_add(ctx->req_batch, &ctx->reqs_available);
        }
+        local_irq_restore(flags);
        preempt_enable();
 }
@@ -847,10 +853,12 @@ static bool get_reqs_available(struct kioctx *ctx)
 {
        struct kioctx_cpu *kcpu;
        bool ret = false;
+        unsigned long flags;
        preempt_disable();
        kcpu = this_cpu_ptr(ctx->cpu);
+        local_irq_save(flags);
        if (!kcpu->reqs_available) {
                int old, avail = atomic_read(&ctx->reqs_available);
@@ -869,6 +877,7 @@ static bool get_reqs_available(struct kioctx *ctx)
        ret = true;
        kcpu->reqs_available--;
 out:
+        local_irq_restore(flags);
        preempt_enable();
        return ret;
 }
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a389820d158b..3e11aab9f391 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3437,16 +3437,10 @@ done_unlocked:
        return 0;
 }
-static int eb_wait(void *word)
-{
-        io_schedule();
-        return 0;
-}
 void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
 {
-        wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait,
+        wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
-                    TASK_UNINTERRUPTIBLE);
+                       TASK_UNINTERRUPTIBLE);
 }
 static noinline_for_stack int
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e12441c7cf1d..7187b14faa6c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -484,8 +484,19 @@ void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
                                           log_list);
                list_del_init(&ordered->log_list);
                spin_unlock_irq(&log->log_extents_lock[index]);
+                if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
+                    !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
+                        struct inode *inode = ordered->inode;
+                        u64 start = ordered->file_offset;
+                        u64 end = ordered->file_offset + ordered->len - 1;
+                        WARN_ON(!inode);
+                        filemap_fdatawrite_range(inode->i_mapping, start, end);
+                }
                wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
                                                   &ordered->flags));
                btrfs_put_ordered_extent(ordered);
                spin_lock_irq(&log->log_extents_lock[index]);
        }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6104676857f5..6cb82f62cb7c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1680,11 +1680,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        if (device->bdev == root->fs_info->fs_devices->latest_bdev)
                root->fs_info->fs_devices->latest_bdev = next_device->bdev;
-        if (device->bdev)
+        if (device->bdev) {
                device->fs_devices->open_devices--;
+                /* remove sysfs entry */
-        /* remove sysfs entry */
+                btrfs_kobj_rm_device(root->fs_info, device);
-        btrfs_kobj_rm_device(root->fs_info, device);
+        }
        call_rcu(&device->rcu, free_device);
diff --git a/fs/buffer.c b/fs/buffer.c
index eba6e4f621ce..8f05111bbb8b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -61,16 +61,9 @@ inline void touch_buffer(struct buffer_head *bh)
 }
 EXPORT_SYMBOL(touch_buffer);
-static int sleep_on_buffer(void *word)
-{
-        io_schedule();
-        return 0;
-}
 void __lock_buffer(struct buffer_head *bh)
 {
-        wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
+        wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
-                                                        TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(__lock_buffer);
@@ -123,7 +116,7 @@ EXPORT_SYMBOL(buffer_check_dirty_writeback);
 */
 void __wait_on_buffer(struct buffer_head * bh)
 {
-        wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
+        wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(__wait_on_buffer);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 20d75b8ddb26..b98366f21f9e 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3934,13 +3934,6 @@ cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb)
        return tlink_tcon(cifs_sb_master_tlink(cifs_sb));
 }
-static int
-cifs_sb_tcon_pending_wait(void *unused)
-{
-        schedule();
-        return signal_pending(current) ? -ERESTARTSYS : 0;
-}
 /* find and return a tlink with given uid */
 static struct tcon_link *
 tlink_rb_search(struct rb_root *root, kuid_t uid)
@@ -4039,11 +4032,10 @@ cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
        } else {
 wait_for_construction:
                ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING,
-                                  cifs_sb_tcon_pending_wait,
                                  TASK_INTERRUPTIBLE);
                if (ret) {
                        cifs_put_tlink(tlink);
-                        return ERR_PTR(ret);
+                        return ERR_PTR(-ERESTARTSYS);
                }
                /* if it's good, return it */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e90a1e9aa627..b88b1ade4d3d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3618,13 +3618,6 @@ static int cifs_launder_page(struct page *page)
        return rc;
 }
-static int
-cifs_pending_writers_wait(void *unused)
-{
-        schedule();
-        return 0;
-}
 void cifs_oplock_break(struct work_struct *work)
 {
        struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
@@ -3636,7 +3629,7 @@ void cifs_oplock_break(struct work_struct *work)
        int rc = 0;
        wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
-                        cifs_pending_writers_wait, TASK_UNINTERRUPTIBLE);
+                        TASK_UNINTERRUPTIBLE);
        server->ops->downgrade_oplock(server, cinode,
                test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a174605f6afa..41de3935caa0 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1780,7 +1780,7 @@ cifs_invalidate_mapping(struct inode *inode)
 * @word: long word containing the bit lock
 */
 static int
-cifs_wait_bit_killable(void *word)
+cifs_wait_bit_killable(struct wait_bit_key *key)
 {
        if (fatal_signal_pending(current))
                return -ERESTARTSYS;
@@ -1794,8 +1794,8 @@ cifs_revalidate_mapping(struct inode *inode)
        int rc;
        unsigned long *flags = &CIFS_I(inode)->flags;
-        rc = wait_on_bit_lock(flags, CIFS_INO_LOCK, cifs_wait_bit_killable,
+        rc = wait_on_bit_lock_action(flags, CIFS_INO_LOCK, cifs_wait_bit_killable,
-                                TASK_KILLABLE);
+                                     TASK_KILLABLE);
        if (rc)
                return rc;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 3b0c62e622da..6bf55d0ed494 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -582,7 +582,7 @@ int cifs_get_writer(struct cifsInodeInfo *cinode)
 start:
        rc = wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK,
-                                   cifs_oplock_break_wait, TASK_KILLABLE);
+                         TASK_KILLABLE);
        if (rc)
                return rc;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index e82289047272..afec6450450f 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -59,7 +59,7 @@
 #include <linux/gfp.h>
 #include <net/bluetooth/bluetooth.h>
-#include <net/bluetooth/hci.h>
+#include <net/bluetooth/hci_sock.h>
 #include <net/bluetooth/rfcomm.h>
 #include <linux/capi.h>
diff --git a/fs/coredump.c b/fs/coredump.c
index 0b2528fb640e..a93f7e6ea4cf 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -306,7 +306,7 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
        if (unlikely(nr < 0))
                return nr;
-        tsk->flags = PF_DUMPCORE;
+        tsk->flags |= PF_DUMPCORE;
        if (atomic_read(&mm->mm_users) == nr + 1)
                goto done;
        /*
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 63146295153b..76c08c2beb2f 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -451,7 +451,7 @@ static ssize_t read_file_bool(struct file *file, char __user *user_buf,
 {
        char buf[3];
        u32 *val = file->private_data;
-        
        if (*val)
                buf[0] = 'Y';
        else
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 8c41b52da358..1e3b99d3db0d 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -66,7 +66,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, umode_t mode, dev
                        break;
                }
        }
-        return inode; 
+        return inode;
 }
 /* SMP-safe */
@@ -317,7 +317,7 @@ static struct dentry *__create_file(const char *name, umode_t mode,
                goto exit;
        /* If the parent is not specified, we create it in the root.
-         * We need the root dentry to do this, which is in the super 
+         * We need the root dentry to do this, which is in the super
         * block. A pointer to that is in the struct vfsmount that we
         * have around.
         */
@@ -330,7 +330,7 @@ static struct dentry *__create_file(const char *name, umode_t mode,
                switch (mode & S_IFMT) {
                case S_IFDIR:
                        error = debugfs_mkdir(parent->d_inode, dentry, mode);
-                                              
                        break;
                case S_IFLNK:
                        error = debugfs_link(parent->d_inode, dentry, mode,
@@ -534,7 +534,7 @@ EXPORT_SYMBOL_GPL(debugfs_remove);
 */
 void debugfs_remove_recursive(struct dentry *dentry)
 {
-        struct dentry *child, *next, *parent;
+        struct dentry *child, *parent;
        if (IS_ERR_OR_NULL(dentry))
                return;
@@ -546,30 +546,49 @@ void debugfs_remove_recursive(struct dentry *dentry)
        parent = dentry;
 down:
        mutex_lock(&parent->d_inode->i_mutex);
-        list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) {
+ loop:
+        /*
+         * The parent->d_subdirs is protected by the d_lock. Outside that
+         * lock, the child can be unlinked and set to be freed which can
+         * use the d_u.d_child as the rcu head and corrupt this list.
+         */
+        spin_lock(&parent->d_lock);
+        list_for_each_entry(child, &parent->d_subdirs, d_u.d_child) {
                if (!debugfs_positive(child))
                        continue;
                /* perhaps simple_empty(child) makes more sense */
                if (!list_empty(&child->d_subdirs)) {
+                        spin_unlock(&parent->d_lock);
                        mutex_unlock(&parent->d_inode->i_mutex);
                        parent = child;
                        goto down;
                }
- up:
+                spin_unlock(&parent->d_lock);
                if (!__debugfs_remove(child, parent))
                        simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+                /*
+                 * The parent->d_lock protects agaist child from unlinking
+                 * from d_subdirs. When releasing the parent->d_lock we can
+                 * no longer trust that the next pointer is valid.
+                 * Restart the loop. We'll skip this one with the
+                 * debugfs_positive() check.
+                 */
+                goto loop;
        }
+        spin_unlock(&parent->d_lock);
        mutex_unlock(&parent->d_inode->i_mutex);
        child = parent;
        parent = parent->d_parent;
        mutex_lock(&parent->d_inode->i_mutex);
-        if (child != dentry) {
+        if (child != dentry)
-                next = list_next_entry(child, d_u.d_child);
+                /* go up */
-                goto up;
+                goto loop;
-        }
        if (!__debugfs_remove(child, parent))
                simple_release_fs(&debugfs_mount, &debugfs_mount_count);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 98040ba388ac..17e39b047de5 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -71,7 +71,6 @@ struct dio_submit {
                                           been performed at the start of a
                                           write */
        int pages_in_io;                /* approximate total IO pages */
-        size_t  size;                   /* total request size (doesn't change)*/
        sector_t block_in_file;         /* Current offset into the underlying
                                           file in dio_block units. */
        unsigned blocks_available;      /* At block_in_file.  changes */
@@ -198,9 +197,8 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 * L1 cache.
 */
 static inline struct page *dio_get_page(struct dio *dio,
-                struct dio_submit *sdio, size_t *from, size_t *to)
+                                        struct dio_submit *sdio)
 {
-        int n;
        if (dio_pages_present(sdio) == 0) {
                int ret;
@@ -209,10 +207,7 @@ static inline struct page *dio_get_page(struct dio *dio,
                        return ERR_PTR(ret);
                BUG_ON(dio_pages_present(sdio) == 0);
        }
-        n = sdio->head++;
+        return dio->pages[sdio->head];
-        *from = n ? 0 : sdio->from;
-        *to = (n == sdio->tail - 1) ? sdio->to : PAGE_SIZE;
-        return dio->pages[n];
 }
 /**
@@ -911,11 +906,15 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
        while (sdio->block_in_file < sdio->final_block_in_request) {
                struct page *page;
                size_t from, to;
-                page = dio_get_page(dio, sdio, &from, &to);
+                page = dio_get_page(dio, sdio);
                if (IS_ERR(page)) {
                        ret = PTR_ERR(page);
                        goto out;
                }
+                from = sdio->head ? 0 : sdio->from;
+                to = (sdio->head == sdio->tail - 1) ? sdio->to : PAGE_SIZE;
+                sdio->head++;
                while (from < to) {
                        unsigned this_chunk_bytes;      /* # of bytes mapped */
@@ -1104,7 +1103,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        unsigned blkbits = i_blkbits;
        unsigned blocksize_mask = (1 << blkbits) - 1;
        ssize_t retval = -EINVAL;
-        loff_t end = offset + iov_iter_count(iter);
+        size_t count = iov_iter_count(iter);
+        loff_t end = offset + count;
        struct dio *dio;
        struct dio_submit sdio = { 0, };
        struct buffer_head map_bh = { 0, };
@@ -1287,10 +1287,9 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
         */
        BUG_ON(retval == -EIOCBQUEUED);
        if (dio->is_async && retval == 0 && dio->result &&
-            ((rw == READ) || (dio->result == sdio.size)))
+            (rw == READ || dio->result == count))
                retval = -EIOCBQUEUED;
+        else
-        if (retval != -EIOCBQUEUED)
                dio_await_completion(dio);
        if (drop_refcount(dio) == 0) {
diff --git a/fs/exec.c b/fs/exec.c
index a3d33fe592d6..ab1f1200ce5d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1216,7 +1216,7 @@ EXPORT_SYMBOL(install_exec_creds);
 /*
 * determine how safe it is to execute the proposed program
 * - the caller must hold ->cred_guard_mutex to protect against
- *   PTRACE_ATTACH
+ *   PTRACE_ATTACH or seccomp thread-sync
 */
 static void check_unsafe_exec(struct linux_binprm *bprm)
 {
@@ -1234,7 +1234,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
         * This isn't strictly necessary, but it makes it harder for LSMs to
         * mess up.
         */
-        if (current->no_new_privs)
+        if (task_no_new_privs(current))
                bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
        t = p;
@@ -1272,7 +1272,7 @@ int prepare_binprm(struct linux_binprm *bprm)
        bprm->cred->egid = current_egid();
        if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
-            !current->no_new_privs &&
+            !task_no_new_privs(current) &&
            kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
            kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
                /* Set-uid? */
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index fca382037ddd..581ef40fbe90 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -639,7 +639,6 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
        if (!(*errp) &&
            ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
                spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
-                EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
                spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
                dquot_alloc_block_nofail(inode,
                                EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ef1bed66c14f..0bb3f9ea0832 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -571,6 +571,31 @@ static int ext4_release_dir(struct inode *inode, struct file *filp)
        return 0;
 }
+int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf,
+                      int buf_size)
+{
+        struct ext4_dir_entry_2 *de;
+        int nlen, rlen;
+        unsigned int offset = 0;
+        char *top;
+        de = (struct ext4_dir_entry_2 *)buf;
+        top = buf + buf_size;
+        while ((char *) de < top) {
+                if (ext4_check_dir_entry(dir, NULL, de, bh,
+                                         buf, buf_size, offset))
+                        return -EIO;
+                nlen = EXT4_DIR_REC_LEN(de->name_len);
+                rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
+                de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
+                offset += rlen;
+        }
+        if ((char *) de > top)
+                return -EIO;
+        return 0;
+}
 const struct file_operations ext4_dir_operations = {
        .llseek         = ext4_dir_llseek,
        .read           = generic_read_dir,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 7cc5a0e23688..5b19760b1de5 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -591,7 +591,6 @@ enum {
 #define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
 #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER   0x0010
 #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER    0x0020
-#define EXT4_FREE_BLOCKS_RESERVE                0x0040
 /*
 * ioctl commands
@@ -2029,6 +2028,8 @@ static inline  unsigned char get_dtype(struct super_block *sb, int filetype)
        return ext4_filetype_table[filetype];
 }
+extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh,
+                             void *buf, int buf_size);
 /* fsync.c */
 extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
@@ -2144,8 +2145,8 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
 extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
 extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
 extern void ext4_ind_truncate(handle_t *, struct inode *inode);
-extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
+extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
-                                 ext4_lblk_t first, ext4_lblk_t stop);
+                                 ext4_lblk_t start, ext4_lblk_t end);
 /* ioctl.c */
 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
@@ -2560,7 +2561,6 @@ extern const struct file_operations ext4_file_operations;
 extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
 /* inline.c */
-extern int ext4_has_inline_data(struct inode *inode);
 extern int ext4_get_max_inline_size(struct inode *inode);
 extern int ext4_find_inline_data_nolock(struct inode *inode);
 extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
@@ -2626,6 +2626,12 @@ extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
 extern int ext4_convert_inline_data(struct inode *inode);
+static inline int ext4_has_inline_data(struct inode *inode)
+{
+        return ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) &&
+               EXT4_I(inode)->i_inline_off;
+}
 /* namei.c */
 extern const struct inode_operations ext4_dir_inode_operations;
 extern const struct inode_operations ext4_special_inode_operations;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4da228a0e6d0..76c2df382b7d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -161,6 +161,8 @@ int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle,
                     struct inode *inode, struct ext4_ext_path *path)
 {
        int err;
+        WARN_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
        if (path->p_bh) {
                ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh));
                /* path points to block */
@@ -1808,8 +1810,7 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
        brelse(path[1].p_bh);
        ext4_free_blocks(handle, inode, NULL, blk, 1,
-                         EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET |
+                         EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
-                         EXT4_FREE_BLOCKS_RESERVE);
 }
 /*
@@ -3253,7 +3254,7 @@ out:
 fix_extent_len:
        ex->ee_len = orig_ex.ee_len;
-        ext4_ext_dirty(handle, inode, path + depth);
+        ext4_ext_dirty(handle, inode, path + path->p_depth);
        return err;
 }
@@ -5403,16 +5404,13 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        int ret;
        /* Collapse range works only on fs block size aligned offsets. */
-        if (offset & (EXT4_BLOCK_SIZE(sb) - 1) ||
+        if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
-            len & (EXT4_BLOCK_SIZE(sb) - 1))
+            len & (EXT4_CLUSTER_SIZE(sb) - 1))
                return -EINVAL;
        if (!S_ISREG(inode->i_mode))
                return -EINVAL;
-        if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1)
-                return -EOPNOTSUPP;
        trace_ext4_collapse_range(inode, offset, len);
        punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8695f70af1ef..aca7b24a4432 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -200,10 +200,6 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
 static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
-        struct address_space *mapping = file->f_mapping;
-        if (!mapping->a_ops->readpage)
-                return -ENOEXEC;
        file_accessed(file);
        vma->vm_ops = &ext4_file_vm_ops;
        return 0;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index fd69da194826..e75f840000a0 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -1295,97 +1295,220 @@ do_indirects:
        }
 }
-static int free_hole_blocks(handle_t *handle, struct inode *inode,
+/**
-                            struct buffer_head *parent_bh, __le32 *i_data,
+ *      ext4_ind_remove_space - remove space from the range
-                            int level, ext4_lblk_t first,
+ *      @handle: JBD handle for this transaction
-                            ext4_lblk_t count, int max)
+ *      @inode: inode we are dealing with
+ *      @start: First block to remove
+ *      @end:   One block after the last block to remove (exclusive)
+ *
+ *      Free the blocks in the defined range (end is exclusive endpoint of
+ *      range). This is used by ext4_punch_hole().
+ */
+int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
+                          ext4_lblk_t start, ext4_lblk_t end)
 {
-        struct buffer_head *bh = NULL;
+        struct ext4_inode_info *ei = EXT4_I(inode);
+        __le32 *i_data = ei->i_data;
        int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
-        int ret = 0;
+        ext4_lblk_t offsets[4], offsets2[4];
-        int i, inc;
+        Indirect chain[4], chain2[4];
-        ext4_lblk_t offset;
+        Indirect *partial, *partial2;
-        __le32 blk;
+        ext4_lblk_t max_block;
+        __le32 nr = 0, nr2 = 0;
-        inc = 1 << ((EXT4_BLOCK_SIZE_BITS(inode->i_sb) - 2) * level);
+        int n = 0, n2 = 0;
-        for (i = 0, offset = 0; i < max; i++, i_data++, offset += inc) {
+        unsigned blocksize = inode->i_sb->s_blocksize;
-                if (offset >= count + first)
-                        break;
-                if (*i_data == 0 || (offset + inc) <= first)
-                        continue;
-                blk = *i_data;
-                if (level > 0) {
-                        ext4_lblk_t first2;
-                        ext4_lblk_t count2;
-                        bh = sb_bread(inode->i_sb, le32_to_cpu(blk));
+        max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
-                        if (!bh) {
+                                        >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
-                                EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk),
+        if (end >= max_block)
-                                                       "Read failure");
+                end = max_block;
-                                return -EIO;
+        if ((start >= end) || (start > max_block))
-                        }
+                return 0;
-                        if (first > offset) {
-                                first2 = first - offset;
+        n = ext4_block_to_path(inode, start, offsets, NULL);
-                                count2 = count;
+        n2 = ext4_block_to_path(inode, end, offsets2, NULL);
+        BUG_ON(n > n2);
+        if ((n == 1) && (n == n2)) {
+                /* We're punching only within direct block range */
+                ext4_free_data(handle, inode, NULL, i_data + offsets[0],
+                               i_data + offsets2[0]);
+                return 0;
+        } else if (n2 > n) {
+                /*
+                 * Start and end are on a different levels so we're going to
+                 * free partial block at start, and partial block at end of
+                 * the range. If there are some levels in between then
+                 * do_indirects label will take care of that.
+                 */
+                if (n == 1) {
+                        /*
+                         * Start is at the direct block level, free
+                         * everything to the end of the level.
+                         */
+                        ext4_free_data(handle, inode, NULL, i_data + offsets[0],
+                                       i_data + EXT4_NDIR_BLOCKS);
+                        goto end_range;
+                }
+                partial = ext4_find_shared(inode, n, offsets, chain, &nr);
+                if (nr) {
+                        if (partial == chain) {
+                                /* Shared branch grows from the inode */
+                                ext4_free_branches(handle, inode, NULL,
+                                           &nr, &nr+1, (chain+n-1) - partial);
+                                *partial->p = 0;
                        } else {
-                                first2 = 0;
+                                /* Shared branch grows from an indirect block */
-                                count2 = count - (offset - first);
+                                BUFFER_TRACE(partial->bh, "get_write_access");
+                                ext4_free_branches(handle, inode, partial->bh,
+                                        partial->p,
+                                        partial->p+1, (chain+n-1) - partial);
                        }
-                        ret = free_hole_blocks(handle, inode, bh,
+                }
-                                               (__le32 *)bh->b_data, level - 1,
-                                               first2, count2,
+                /*
-                                               inode->i_sb->s_blocksize >> 2);
+                 * Clear the ends of indirect blocks on the shared branch
-                        if (ret) {
+                 * at the start of the range
-                                brelse(bh);
+                 */
-                                goto err;
+                while (partial > chain) {
+                        ext4_free_branches(handle, inode, partial->bh,
+                                partial->p + 1,
+                                (__le32 *)partial->bh->b_data+addr_per_block,
+                                (chain+n-1) - partial);
+                        BUFFER_TRACE(partial->bh, "call brelse");
+                        brelse(partial->bh);
+                        partial--;
+                }
+end_range:
+                partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
+                if (nr2) {
+                        if (partial2 == chain2) {
+                                /*
+                                 * Remember, end is exclusive so here we're at
+                                 * the start of the next level we're not going
+                                 * to free. Everything was covered by the start
+                                 * of the range.
+                                 */
+                                return 0;
+                        } else {
+                                /* Shared branch grows from an indirect block */
+                                partial2--;
                        }
+                } else {
+                        /*
+                         * ext4_find_shared returns Indirect structure which
+                         * points to the last element which should not be
+                         * removed by truncate. But this is end of the range
+                         * in punch_hole so we need to point to the next element
+                         */
+                        partial2->p++;
                }
-                if (level == 0 ||
-                    (bh && all_zeroes((__le32 *)bh->b_data,
+                /*
-                                      (__le32 *)bh->b_data + addr_per_block))) {
+                 * Clear the ends of indirect blocks on the shared branch
-                        ext4_free_data(handle, inode, parent_bh,
+                 * at the end of the range
-                                       i_data, i_data + 1);
+                 */
+                while (partial2 > chain2) {
+                        ext4_free_branches(handle, inode, partial2->bh,
+                                           (__le32 *)partial2->bh->b_data,
+                                           partial2->p,
+                                           (chain2+n2-1) - partial2);
+                        BUFFER_TRACE(partial2->bh, "call brelse");
+                        brelse(partial2->bh);
+                        partial2--;
                }
-                brelse(bh);
+                goto do_indirects;
-                bh = NULL;
        }
-err:
+        /* Punch happened within the same level (n == n2) */
-        return ret;
+        partial = ext4_find_shared(inode, n, offsets, chain, &nr);
-}
+        partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
+        /*
-int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
+         * ext4_find_shared returns Indirect structure which
-                          ext4_lblk_t first, ext4_lblk_t stop)
+         * points to the last element which should not be
-{
+         * removed by truncate. But this is end of the range
-        int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+         * in punch_hole so we need to point to the next element
-        int level, ret = 0;
+         */
-        int num = EXT4_NDIR_BLOCKS;
+        partial2->p++;
-        ext4_lblk_t count, max = EXT4_NDIR_BLOCKS;
+        while ((partial > chain) || (partial2 > chain2)) {
-        __le32 *i_data = EXT4_I(inode)->i_data;
+                /* We're at the same block, so we're almost finished */
+                if ((partial->bh && partial2->bh) &&
-        count = stop - first;
+                    (partial->bh->b_blocknr == partial2->bh->b_blocknr)) {
-        for (level = 0; level < 4; level++, max *= addr_per_block) {
+                        if ((partial > chain) && (partial2 > chain2)) {
-                if (first < max) {
+                                ext4_free_branches(handle, inode, partial->bh,
-                        ret = free_hole_blocks(handle, inode, NULL, i_data,
+                                                   partial->p + 1,
-                                               level, first, count, num);
+                                                   partial2->p,
-                        if (ret)
+                                                   (chain+n-1) - partial);
-                                goto err;
+                                BUFFER_TRACE(partial->bh, "call brelse");
-                        if (count > max - first)
+                                brelse(partial->bh);
-                                count -= max - first;
+                                BUFFER_TRACE(partial2->bh, "call brelse");
-                        else
+                                brelse(partial2->bh);
-                                break;
+                        }
-                        first = 0;
+                        return 0;
-                } else {
-                        first -= max;
                }
-                i_data += num;
+                /*
-                if (level == 0) {
+                 * Clear the ends of indirect blocks on the shared branch
-                        num = 1;
+                 * at the start of the range
-                        max = 1;
+                 */
+                if (partial > chain) {
+                        ext4_free_branches(handle, inode, partial->bh,
+                                   partial->p + 1,
+                                   (__le32 *)partial->bh->b_data+addr_per_block,
+                                   (chain+n-1) - partial);
+                        BUFFER_TRACE(partial->bh, "call brelse");
+                        brelse(partial->bh);
+                        partial--;
+                }
+                /*
+                 * Clear the ends of indirect blocks on the shared branch
+                 * at the end of the range
+                 */
+                if (partial2 > chain2) {
+                        ext4_free_branches(handle, inode, partial2->bh,
+                                           (__le32 *)partial2->bh->b_data,
+                                           partial2->p,
+                                           (chain2+n-1) - partial2);
+                        BUFFER_TRACE(partial2->bh, "call brelse");
+                        brelse(partial2->bh);
+                        partial2--;
                }
        }
-err:
+do_indirects:
-        return ret;
+        /* Kill the remaining (whole) subtrees */
+        switch (offsets[0]) {
+        default:
+                if (++n >= n2)
+                        return 0;
+                nr = i_data[EXT4_IND_BLOCK];
+                if (nr) {
+                        ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
+                        i_data[EXT4_IND_BLOCK] = 0;
+                }
+        case EXT4_IND_BLOCK:
+                if (++n >= n2)
+                        return 0;
+                nr = i_data[EXT4_DIND_BLOCK];
+                if (nr) {
+                        ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
+                        i_data[EXT4_DIND_BLOCK] = 0;
+                }
+        case EXT4_DIND_BLOCK:
+                if (++n >= n2)
+                        return 0;
+                nr = i_data[EXT4_TIND_BLOCK];
+                if (nr) {
+                        ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
+                        i_data[EXT4_TIND_BLOCK] = 0;
+                }
+        case EXT4_TIND_BLOCK:
+                ;
+        }
+        return 0;
 }
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 645205d8ada6..bea662bd0ca6 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -120,12 +120,6 @@ int ext4_get_max_inline_size(struct inode *inode)
        return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE;
 }
-int ext4_has_inline_data(struct inode *inode)
-{
-        return ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) &&
-               EXT4_I(inode)->i_inline_off;
-}
 /*
 * this function does not take xattr_sem, which is OK because it is
 * currently only used in a code path coming form ext4_iget, before
@@ -1178,6 +1172,18 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
        if (error < 0)
                goto out;
+        /*
+         * Make sure the inline directory entries pass checks before we try to
+         * convert them, so that we avoid touching stuff that needs fsck.
+         */
+        if (S_ISDIR(inode->i_mode)) {
+                error = ext4_check_all_de(inode, iloc->bh,
+                                        buf + EXT4_INLINE_DOTDOT_SIZE,
+                                        inline_size - EXT4_INLINE_DOTDOT_SIZE);
+                if (error)
+                        goto out;
+        }
        error = ext4_destroy_inline_data_nolock(handle, inode);
        if (error)
                goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8a064734e6eb..367a60c07cf0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -325,18 +325,6 @@ qsize_t *ext4_get_reserved_space(struct inode *inode)
 #endif
 /*
- * Calculate the number of metadata blocks need to reserve
- * to allocate a block located at @lblock
- */
-static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
-{
-        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-                return ext4_ext_calc_metadata_amount(inode, lblock);
-        return ext4_ind_calc_metadata_amount(inode, lblock);
-}
-/*
 * Called with i_data_sem down, which is important since we can call
 * ext4_discard_preallocations() from here.
 */
@@ -357,35 +345,10 @@ void ext4_da_update_reserve_space(struct inode *inode,
                used = ei->i_reserved_data_blocks;
        }
-        if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
-                ext4_warning(inode->i_sb, "ino %lu, allocated %d "
-                        "with only %d reserved metadata blocks "
-                        "(releasing %d blocks with reserved %d data blocks)",
-                        inode->i_ino, ei->i_allocated_meta_blocks,
-                             ei->i_reserved_meta_blocks, used,
-                             ei->i_reserved_data_blocks);
-                WARN_ON(1);
-                ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
-        }
        /* Update per-inode reservations */
        ei->i_reserved_data_blocks -= used;
-        ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
+        percpu_counter_sub(&sbi->s_dirtyclusters_counter, used);
-        percpu_counter_sub(&sbi->s_dirtyclusters_counter,
-                           used + ei->i_allocated_meta_blocks);
-        ei->i_allocated_meta_blocks = 0;
-        if (ei->i_reserved_data_blocks == 0) {
-                /*
-                 * We can release all of the reserved metadata blocks
-                 * only when we have written all of the delayed
-                 * allocation blocks.
-                 */
-                percpu_counter_sub(&sbi->s_dirtyclusters_counter,
-                                   ei->i_reserved_meta_blocks);
-                ei->i_reserved_meta_blocks = 0;
-                ei->i_da_metadata_calc_len = 0;
-        }
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
        /* Update quota subsystem for data blocks */
@@ -1222,49 +1185,6 @@ static int ext4_journalled_write_end(struct file *file,
 }
 /*
- * Reserve a metadata for a single block located at lblock
- */
-static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
-{
-        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-        struct ext4_inode_info *ei = EXT4_I(inode);
-        unsigned int md_needed;
-        ext4_lblk_t save_last_lblock;
-        int save_len;
-        /*
-         * recalculate the amount of metadata blocks to reserve
-         * in order to allocate nrblocks
-         * worse case is one extent per block
-         */
-        spin_lock(&ei->i_block_reservation_lock);
-        /*
-         * ext4_calc_metadata_amount() has side effects, which we have
-         * to be prepared undo if we fail to claim space.
-         */
-        save_len = ei->i_da_metadata_calc_len;
-        save_last_lblock = ei->i_da_metadata_calc_last_lblock;
-        md_needed = EXT4_NUM_B2C(sbi,
-                                 ext4_calc_metadata_amount(inode, lblock));
-        trace_ext4_da_reserve_space(inode, md_needed);
-        /*
-         * We do still charge estimated metadata to the sb though;
-         * we cannot afford to run out of free blocks.
-         */
-        if (ext4_claim_free_clusters(sbi, md_needed, 0)) {
-                ei->i_da_metadata_calc_len = save_len;
-                ei->i_da_metadata_calc_last_lblock = save_last_lblock;
-                spin_unlock(&ei->i_block_reservation_lock);
-                return -ENOSPC;
-        }
-        ei->i_reserved_meta_blocks += md_needed;
-        spin_unlock(&ei->i_block_reservation_lock);
-        return 0;       /* success */
-}
-/*
 * Reserve a single cluster located at lblock
 */
 static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
@@ -1273,8 +1193,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
        struct ext4_inode_info *ei = EXT4_I(inode);
        unsigned int md_needed;
        int ret;
-        ext4_lblk_t save_last_lblock;
-        int save_len;
        /*
         * We will charge metadata quota at writeout time; this saves
@@ -1295,25 +1213,15 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
         * ext4_calc_metadata_amount() has side effects, which we have
         * to be prepared undo if we fail to claim space.
         */
-        save_len = ei->i_da_metadata_calc_len;
+        md_needed = 0;
-        save_last_lblock = ei->i_da_metadata_calc_last_lblock;
+        trace_ext4_da_reserve_space(inode, 0);
-        md_needed = EXT4_NUM_B2C(sbi,
-                                 ext4_calc_metadata_amount(inode, lblock));
-        trace_ext4_da_reserve_space(inode, md_needed);
-        /*
+        if (ext4_claim_free_clusters(sbi, 1, 0)) {
-         * We do still charge estimated metadata to the sb though;
-         * we cannot afford to run out of free blocks.
-         */
-        if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
-                ei->i_da_metadata_calc_len = save_len;
-                ei->i_da_metadata_calc_last_lblock = save_last_lblock;
                spin_unlock(&ei->i_block_reservation_lock);
                dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
                return -ENOSPC;
        }
        ei->i_reserved_data_blocks++;
-        ei->i_reserved_meta_blocks += md_needed;
        spin_unlock(&ei->i_block_reservation_lock);
        return 0;       /* success */
@@ -1346,20 +1254,6 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
        }
        ei->i_reserved_data_blocks -= to_free;
-        if (ei->i_reserved_data_blocks == 0) {
-                /*
-                 * We can release all of the reserved metadata blocks
-                 * only when we have written all of the delayed
-                 * allocation blocks.
-                 * Note that in case of bigalloc, i_reserved_meta_blocks,
-                 * i_reserved_data_blocks, etc. refer to number of clusters.
-                 */
-                percpu_counter_sub(&sbi->s_dirtyclusters_counter,
-                                   ei->i_reserved_meta_blocks);
-                ei->i_reserved_meta_blocks = 0;
-                ei->i_da_metadata_calc_len = 0;
-        }
        /* update fs dirty data blocks counter */
        percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
@@ -1500,10 +1394,6 @@ static void ext4_print_free_blocks(struct inode *inode)
        ext4_msg(sb, KERN_CRIT, "Block reservation details");
        ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u",
                 ei->i_reserved_data_blocks);
-        ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u",
-               ei->i_reserved_meta_blocks);
-        ext4_msg(sb, KERN_CRIT, "i_allocated_meta_blocks=%u",
-               ei->i_allocated_meta_blocks);
        return;
 }
@@ -1620,13 +1510,6 @@ add_delayed:
                                retval = ret;
                                goto out_unlock;
                        }
-                } else {
-                        ret = ext4_da_reserve_metadata(inode, iblock);
-                        if (ret) {
-                                /* not enough space to reserve */
-                                retval = ret;
-                                goto out_unlock;
-                        }
                }
                ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
@@ -2843,8 +2726,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
 {
        trace_ext4_alloc_da_blocks(inode);
-        if (!EXT4_I(inode)->i_reserved_data_blocks &&
+        if (!EXT4_I(inode)->i_reserved_data_blocks)
-            !EXT4_I(inode)->i_reserved_meta_blocks)
                return 0;
        /*
@@ -3624,7 +3506,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
                ret = ext4_ext_remove_space(inode, first_block,
                                            stop_block - 1);
        else
-                ret = ext4_free_hole_blocks(handle, inode, first_block,
+                ret = ext4_ind_remove_space(handle, inode, first_block,
                                            stop_block);
        up_write(&EXT4_I(inode)->i_data_sem);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 2dcb936be90e..956027711faf 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3075,8 +3075,9 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
                                                        (23 - bsbits)) << 23;
                size = 8 * 1024 * 1024;
        } else {
-                start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
+                start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
-                size      = ac->ac_o_ex.fe_len << bsbits;
+                size      = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
+                                              ac->ac_o_ex.fe_len) << bsbits;
        }
        size = size >> bsbits;
        start = start_off >> bsbits;
@@ -3216,8 +3217,27 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
 static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
 {
        struct ext4_prealloc_space *pa = ac->ac_pa;
+        struct ext4_buddy e4b;
+        int err;
-        if (pa && pa->pa_type == MB_INODE_PA)
+        if (pa == NULL) {
+                err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
+                if (err) {
+                        /*
+                         * This should never happen since we pin the
+                         * pages in the ext4_allocation_context so
+                         * ext4_mb_load_buddy() should never fail.
+                         */
+                        WARN(1, "mb_load_buddy failed (%d)", err);
+                        return;
+                }
+                ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
+                mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
+                               ac->ac_f_ex.fe_len);
+                ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
+                return;
+        }
+        if (pa->pa_type == MB_INODE_PA)
                pa->pa_free += ac->ac_b_ex.fe_len;
 }
@@ -4627,7 +4647,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
        struct buffer_head *gd_bh;
        ext4_group_t block_group;
        struct ext4_sb_info *sbi;
-        struct ext4_inode_info *ei = EXT4_I(inode);
        struct ext4_buddy e4b;
        unsigned int count_clusters;
        int err = 0;
@@ -4838,19 +4857,7 @@ do_more:
                             &sbi->s_flex_groups[flex_group].free_clusters);
        }
-        if (flags & EXT4_FREE_BLOCKS_RESERVE && ei->i_reserved_data_blocks) {
+        if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
-                percpu_counter_add(&sbi->s_dirtyclusters_counter,
-                                   count_clusters);
-                spin_lock(&ei->i_block_reservation_lock);
-                if (flags & EXT4_FREE_BLOCKS_METADATA)
-                        ei->i_reserved_meta_blocks += count_clusters;
-                else
-                        ei->i_reserved_data_blocks += count_clusters;
-                spin_unlock(&ei->i_block_reservation_lock);
-                if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
-                        dquot_reclaim_block(inode,
-                                        EXT4_C2B(sbi, count_clusters));
-        } else if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
                dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
        percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index ec092437d3e0..d3567f27bae7 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -39,6 +39,8 @@ static int finish_range(handle_t *handle, struct inode *inode,
        newext.ee_block = cpu_to_le32(lb->first_block);
        newext.ee_len   = cpu_to_le16(lb->last_block - lb->first_block + 1);
        ext4_ext_store_pblock(&newext, lb->first_pblock);
+        /* Locking only for convinience since we are operating on temp inode */
+        down_write(&EXT4_I(inode)->i_data_sem);
        path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0);
        if (IS_ERR(path)) {
@@ -61,7 +63,9 @@ static int finish_range(handle_t *handle, struct inode *inode,
         */
        if (needed && ext4_handle_has_enough_credits(handle,
                                                EXT4_RESERVE_TRANS_BLOCKS)) {
+                up_write((&EXT4_I(inode)->i_data_sem));
                retval = ext4_journal_restart(handle, needed);
+                down_write((&EXT4_I(inode)->i_data_sem));
                if (retval)
                        goto err_out;
        } else if (needed) {
@@ -70,13 +74,16 @@ static int finish_range(handle_t *handle, struct inode *inode,
                        /*
                         * IF not able to extend the journal restart the journal
                         */
+                        up_write((&EXT4_I(inode)->i_data_sem));
                        retval = ext4_journal_restart(handle, needed);
+                        down_write((&EXT4_I(inode)->i_data_sem));
                        if (retval)
                                goto err_out;
                }
        }
        retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0);
 err_out:
+        up_write((&EXT4_I(inode)->i_data_sem));
        if (path) {
                ext4_ext_drop_refs(path);
                kfree(path);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 2484c7ec6a72..671a74b14fd7 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -1013,10 +1013,11 @@ data_copy:
                *err = -EBUSY;
                goto unlock_pages;
        }
+        ext4_double_down_write_data_sem(orig_inode, donor_inode);
        replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
                                               orig_blk_offset,
                                               block_len_in_page, err);
+        ext4_double_up_write_data_sem(orig_inode, donor_inode);
        if (*err) {
                if (replaced_count) {
                        block_len_in_page = replaced_count;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6df7bc611dbd..32b43ad154b9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2142,10 +2142,6 @@ static int ext4_check_descriptors(struct super_block *sb,
        }
        if (NULL != first_not_zeroed)
                *first_not_zeroed = grp;
-        ext4_free_blocks_count_set(sbi->s_es,
-                                   EXT4_C2B(sbi, ext4_count_free_clusters(sb)));
-        sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
        return 1;
 }
@@ -3883,13 +3879,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
                goto failed_mount2;
        }
-        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
-                if (!ext4_fill_flex_info(sb)) {
-                        ext4_msg(sb, KERN_ERR,
-                               "unable to initialize "
-                               "flex_bg meta info!");
-                        goto failed_mount2;
-                }
        sbi->s_gdb_count = db_count;
        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
@@ -3902,23 +3891,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        /* Register extent status tree shrinker */
        ext4_es_register_shrinker(sbi);
-        err = percpu_counter_init(&sbi->s_freeclusters_counter,
+        if ((err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0)) != 0) {
-                        ext4_count_free_clusters(sb));
-        if (!err) {
-                err = percpu_counter_init(&sbi->s_freeinodes_counter,
-                                ext4_count_free_inodes(sb));
-        }
-        if (!err) {
-                err = percpu_counter_init(&sbi->s_dirs_counter,
-                                ext4_count_dirs(sb));
-        }
-        if (!err) {
-                err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
-        }
-        if (!err) {
-                err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0);
-        }
-        if (err) {
                ext4_msg(sb, KERN_ERR, "insufficient memory");
                goto failed_mount3;
        }
@@ -4022,18 +3995,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
-        /*
-         * The journal may have updated the bg summary counts, so we
-         * need to update the global counters.
-         */
-        percpu_counter_set(&sbi->s_freeclusters_counter,
-                           ext4_count_free_clusters(sb));
-        percpu_counter_set(&sbi->s_freeinodes_counter,
-                           ext4_count_free_inodes(sb));
-        percpu_counter_set(&sbi->s_dirs_counter,
-                           ext4_count_dirs(sb));
-        percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
 no_journal:
        if (ext4_mballoc_ready) {
                sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id);
@@ -4141,6 +4102,33 @@ no_journal:
                goto failed_mount5;
        }
+        block = ext4_count_free_clusters(sb);
+        ext4_free_blocks_count_set(sbi->s_es, 
+                                   EXT4_C2B(sbi, block));
+        err = percpu_counter_init(&sbi->s_freeclusters_counter, block);
+        if (!err) {
+                unsigned long freei = ext4_count_free_inodes(sb);
+                sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
+                err = percpu_counter_init(&sbi->s_freeinodes_counter, freei);
+        }
+        if (!err)
+                err = percpu_counter_init(&sbi->s_dirs_counter,
+                                          ext4_count_dirs(sb));
+        if (!err)
+                err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
+        if (err) {
+                ext4_msg(sb, KERN_ERR, "insufficient memory");
+                goto failed_mount6;
+        }
+        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
+                if (!ext4_fill_flex_info(sb)) {
+                        ext4_msg(sb, KERN_ERR,
+                               "unable to initialize "
+                               "flex_bg meta info!");
+                        goto failed_mount6;
+                }
        err = ext4_register_li_request(sb, first_not_zeroed);
        if (err)
                goto failed_mount6;
@@ -4215,6 +4203,12 @@ failed_mount7:
        ext4_unregister_li_request(sb);
 failed_mount6:
        ext4_mb_release(sb);
+        if (sbi->s_flex_groups)
+                ext4_kvfree(sbi->s_flex_groups);
+        percpu_counter_destroy(&sbi->s_freeclusters_counter);
+        percpu_counter_destroy(&sbi->s_freeinodes_counter);
+        percpu_counter_destroy(&sbi->s_dirs_counter);
+        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
 failed_mount5:
        ext4_ext_release(sb);
        ext4_release_system_zone(sb);
@@ -4233,12 +4227,6 @@ failed_mount_wq:
 failed_mount3:
        ext4_es_unregister_shrinker(sbi);
        del_timer_sync(&sbi->s_err_report);
-        if (sbi->s_flex_groups)
-                ext4_kvfree(sbi->s_flex_groups);
-        percpu_counter_destroy(&sbi->s_freeclusters_counter);
-        percpu_counter_destroy(&sbi->s_freeinodes_counter);
-        percpu_counter_destroy(&sbi->s_dirs_counter);
-        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
        percpu_counter_destroy(&sbi->s_extent_cache_cnt);
        if (sbi->s_mmp_tsk)
                kthread_stop(sbi->s_mmp_tsk);
@@ -4556,11 +4544,13 @@ static int ext4_commit_super(struct super_block *sb, int sync)
        else
                es->s_kbytes_written =
                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
-        ext4_free_blocks_count_set(es,
+        if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
+                ext4_free_blocks_count_set(es,
                        EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
                                &EXT4_SB(sb)->s_freeclusters_counter)));
-        es->s_free_inodes_count =
+        if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
-                cpu_to_le32(percpu_counter_sum_positive(
+                es->s_free_inodes_count =
+                        cpu_to_le32(percpu_counter_sum_positive(
                                &EXT4_SB(sb)->s_freeinodes_counter));
        BUFFER_TRACE(sbh, "marking dirty");
        ext4_superblock_csum_set(sb);
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index dbe2141d10ad..83b9b5a8d112 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -203,12 +203,6 @@ static int __f2fs_set_acl(struct inode *inode, int type,
        size_t size = 0;
        int error;
-        if (acl) {
-                error = posix_acl_valid(acl);
-                if (error < 0)
-                        return error;
-        }
        switch (type) {
        case ACL_TYPE_ACCESS:
                name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 0b4710c1d370..6aeed5bada52 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -22,7 +22,7 @@
 #include "segment.h"
 #include <trace/events/f2fs.h>
-static struct kmem_cache *orphan_entry_slab;
+static struct kmem_cache *ino_entry_slab;
 static struct kmem_cache *inode_entry_slab;
 /*
@@ -282,72 +282,120 @@ const struct address_space_operations f2fs_meta_aops = {
        .set_page_dirty = f2fs_set_meta_page_dirty,
 };
+static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+        struct ino_entry *e;
+retry:
+        spin_lock(&sbi->ino_lock[type]);
+        e = radix_tree_lookup(&sbi->ino_root[type], ino);
+        if (!e) {
+                e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
+                if (!e) {
+                        spin_unlock(&sbi->ino_lock[type]);
+                        goto retry;
+                }
+                if (radix_tree_insert(&sbi->ino_root[type], ino, e)) {
+                        spin_unlock(&sbi->ino_lock[type]);
+                        kmem_cache_free(ino_entry_slab, e);
+                        goto retry;
+                }
+                memset(e, 0, sizeof(struct ino_entry));
+                e->ino = ino;
+                list_add_tail(&e->list, &sbi->ino_list[type]);
+        }
+        spin_unlock(&sbi->ino_lock[type]);
+}
+static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+        struct ino_entry *e;
+        spin_lock(&sbi->ino_lock[type]);
+        e = radix_tree_lookup(&sbi->ino_root[type], ino);
+        if (e) {
+                list_del(&e->list);
+                radix_tree_delete(&sbi->ino_root[type], ino);
+                if (type == ORPHAN_INO)
+                        sbi->n_orphans--;
+                spin_unlock(&sbi->ino_lock[type]);
+                kmem_cache_free(ino_entry_slab, e);
+                return;
+        }
+        spin_unlock(&sbi->ino_lock[type]);
+}
+void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+        /* add new dirty ino entry into list */
+        __add_ino_entry(sbi, ino, type);
+}
+void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+        /* remove dirty ino entry from list */
+        __remove_ino_entry(sbi, ino, type);
+}
+/* mode should be APPEND_INO or UPDATE_INO */
+bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
+{
+        struct ino_entry *e;
+        spin_lock(&sbi->ino_lock[mode]);
+        e = radix_tree_lookup(&sbi->ino_root[mode], ino);
+        spin_unlock(&sbi->ino_lock[mode]);
+        return e ? true : false;
+}
+static void release_dirty_inode(struct f2fs_sb_info *sbi)
+{
+        struct ino_entry *e, *tmp;
+        int i;
+        for (i = APPEND_INO; i <= UPDATE_INO; i++) {
+                spin_lock(&sbi->ino_lock[i]);
+                list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) {
+                        list_del(&e->list);
+                        radix_tree_delete(&sbi->ino_root[i], e->ino);
+                        kmem_cache_free(ino_entry_slab, e);
+                }
+                spin_unlock(&sbi->ino_lock[i]);
+        }
+}
 int acquire_orphan_inode(struct f2fs_sb_info *sbi)
 {
        int err = 0;
-        spin_lock(&sbi->orphan_inode_lock);
+        spin_lock(&sbi->ino_lock[ORPHAN_INO]);
        if (unlikely(sbi->n_orphans >= sbi->max_orphans))
                err = -ENOSPC;
        else
                sbi->n_orphans++;
-        spin_unlock(&sbi->orphan_inode_lock);
+        spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
        return err;
 }
 void release_orphan_inode(struct f2fs_sb_info *sbi)
 {
-        spin_lock(&sbi->orphan_inode_lock);
+        spin_lock(&sbi->ino_lock[ORPHAN_INO]);
        f2fs_bug_on(sbi->n_orphans == 0);
        sbi->n_orphans--;
-        spin_unlock(&sbi->orphan_inode_lock);
+        spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
 }
 void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
 {
-        struct list_head *head;
+        /* add new orphan ino entry into list */
-        struct orphan_inode_entry *new, *orphan;
+        __add_ino_entry(sbi, ino, ORPHAN_INO);
-        new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
-        new->ino = ino;
-        spin_lock(&sbi->orphan_inode_lock);
-        head = &sbi->orphan_inode_list;
-        list_for_each_entry(orphan, head, list) {
-                if (orphan->ino == ino) {
-                        spin_unlock(&sbi->orphan_inode_lock);
-                        kmem_cache_free(orphan_entry_slab, new);
-                        return;
-                }
-                if (orphan->ino > ino)
-                        break;
-        }
-        /* add new orphan entry into list which is sorted by inode number */
-        list_add_tail(&new->list, &orphan->list);
-        spin_unlock(&sbi->orphan_inode_lock);
 }
 void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
 {
-        struct list_head *head;
+        /* remove orphan entry from orphan list */
-        struct orphan_inode_entry *orphan;
+        __remove_ino_entry(sbi, ino, ORPHAN_INO);
-        spin_lock(&sbi->orphan_inode_lock);
-        head = &sbi->orphan_inode_list;
-        list_for_each_entry(orphan, head, list) {
-                if (orphan->ino == ino) {
-                        list_del(&orphan->list);
-                        f2fs_bug_on(sbi->n_orphans == 0);
-                        sbi->n_orphans--;
-                        spin_unlock(&sbi->orphan_inode_lock);
-                        kmem_cache_free(orphan_entry_slab, orphan);
-                        return;
-                }
-        }
-        spin_unlock(&sbi->orphan_inode_lock);
 }
 static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -401,14 +449,14 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
        unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
                (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
        struct page *page = NULL;
-        struct orphan_inode_entry *orphan = NULL;
+        struct ino_entry *orphan = NULL;
        for (index = 0; index < orphan_blocks; index++)
                grab_meta_page(sbi, start_blk + index);
        index = 1;
-        spin_lock(&sbi->orphan_inode_lock);
+        spin_lock(&sbi->ino_lock[ORPHAN_INO]);
-        head = &sbi->orphan_inode_list;
+        head = &sbi->ino_list[ORPHAN_INO];
        /* loop for each orphan inode entry and write them in Jornal block */
        list_for_each_entry(orphan, head, list) {
@@ -448,7 +496,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
                f2fs_put_page(page, 1);
        }
-        spin_unlock(&sbi->orphan_inode_lock);
+        spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
 }
 static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
@@ -714,10 +762,10 @@ retry_flush_dents:
         * until finishing nat/sit flush.
         */
 retry_flush_nodes:
-        mutex_lock(&sbi->node_write);
+        down_write(&sbi->node_write);
        if (get_pages(sbi, F2FS_DIRTY_NODES)) {
-                mutex_unlock(&sbi->node_write);
+                up_write(&sbi->node_write);
                sync_node_pages(sbi, 0, &wbc);
                goto retry_flush_nodes;
        }
@@ -726,7 +774,7 @@ retry_flush_nodes:
 static void unblock_operations(struct f2fs_sb_info *sbi)
 {
-        mutex_unlock(&sbi->node_write);
+        up_write(&sbi->node_write);
        f2fs_unlock_all(sbi);
 }
@@ -748,6 +796,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
 static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 {
        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
        nid_t last_nid = 0;
        block_t start_blk;
        struct page *cp_page;
@@ -761,7 +810,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
         * This avoids to conduct wrong roll-forward operations and uses
         * metapages, so should be called prior to sync_meta_pages below.
         */
-        discard_next_dnode(sbi);
+        discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));
        /* Flush all the NAT/SIT pages */
        while (get_pages(sbi, F2FS_DIRTY_META))
@@ -885,8 +934,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
        /* Here, we only have one bio having CP pack */
        sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
-        if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
+        if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
                clear_prefree_segments(sbi);
+                release_dirty_inode(sbi);
                F2FS_RESET_SB_DIRT(sbi);
        }
 }
@@ -932,31 +982,37 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
        trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
 }
-void init_orphan_info(struct f2fs_sb_info *sbi)
+void init_ino_entry_info(struct f2fs_sb_info *sbi)
 {
-        spin_lock_init(&sbi->orphan_inode_lock);
+        int i;
-        INIT_LIST_HEAD(&sbi->orphan_inode_list);
-        sbi->n_orphans = 0;
+        for (i = 0; i < MAX_INO_ENTRY; i++) {
+                INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC);
+                spin_lock_init(&sbi->ino_lock[i]);
+                INIT_LIST_HEAD(&sbi->ino_list[i]);
+        }
        /*
         * considering 512 blocks in a segment 8 blocks are needed for cp
         * and log segment summaries. Remaining blocks are used to keep
         * orphan entries with the limitation one reserved segment
         * for cp pack we can have max 1020*504 orphan entries
         */
+        sbi->n_orphans = 0;
        sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
                                * F2FS_ORPHANS_PER_BLOCK;
 }
 int __init create_checkpoint_caches(void)
 {
-        orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
+        ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
-                        sizeof(struct orphan_inode_entry));
+                        sizeof(struct ino_entry));
-        if (!orphan_entry_slab)
+        if (!ino_entry_slab)
                return -ENOMEM;
        inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
                        sizeof(struct dir_inode_entry));
        if (!inode_entry_slab) {
-                kmem_cache_destroy(orphan_entry_slab);
+                kmem_cache_destroy(ino_entry_slab);
                return -ENOMEM;
        }
        return 0;
@@ -964,6 +1020,6 @@ int __init create_checkpoint_caches(void)
 void destroy_checkpoint_caches(void)
 {
-        kmem_cache_destroy(orphan_entry_slab);
+        kmem_cache_destroy(ino_entry_slab);
        kmem_cache_destroy(inode_entry_slab);
 }
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index f8cf619edb5f..03313099c51c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -139,7 +139,10 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
        /* change META to META_FLUSH in the checkpoint procedure */
        if (type >= META_FLUSH) {
                io->fio.type = META_FLUSH;
-                io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
+                if (test_opt(sbi, NOBARRIER))
+                        io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
+                else
+                        io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
        }
        __submit_merged_bio(io);
        up_write(&io->io_rwsem);
@@ -626,8 +629,10 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
        if (check_extent_cache(inode, pgofs, bh_result))
                goto out;
-        if (create)
+        if (create) {
+                f2fs_balance_fs(sbi);
                f2fs_lock_op(sbi);
+        }
        /* When reading holes, we need its node page */
        set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -784,9 +789,11 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
                        !is_cold_data(page) &&
                        need_inplace_update(inode))) {
                rewrite_data_page(page, old_blkaddr, fio);
+                set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
        } else {
                write_data_page(page, &dn, &new_blkaddr, fio);
                update_extent_cache(new_blkaddr, &dn);
+                set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
        }
 out_writepage:
        f2fs_put_dnode(&dn);
@@ -914,6 +921,16 @@ skip_write:
        return 0;
 }
+static void f2fs_write_failed(struct address_space *mapping, loff_t to)
+{
+        struct inode *inode = mapping->host;
+        if (to > inode->i_size) {
+                truncate_pagecache(inode, inode->i_size);
+                truncate_blocks(inode, inode->i_size);
+        }
+}
 static int f2fs_write_begin(struct file *file, struct address_space *mapping,
                loff_t pos, unsigned len, unsigned flags,
                struct page **pagep, void **fsdata)
@@ -931,11 +948,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 repeat:
        err = f2fs_convert_inline_data(inode, pos + len);
        if (err)
-                return err;
+                goto fail;
        page = grab_cache_page_write_begin(mapping, index, flags);
-        if (!page)
+        if (!page) {
-                return -ENOMEM;
+                err = -ENOMEM;
+                goto fail;
+        }
        /* to avoid latency during memory pressure */
        unlock_page(page);
@@ -949,10 +968,9 @@ repeat:
        set_new_dnode(&dn, inode, NULL, NULL, 0);
        err = f2fs_reserve_block(&dn, index);
        f2fs_unlock_op(sbi);
        if (err) {
                f2fs_put_page(page, 0);
-                return err;
+                goto fail;
        }
 inline_data:
        lock_page(page);
@@ -982,19 +1000,20 @@ inline_data:
                        err = f2fs_read_inline_data(inode, page);
                        if (err) {
                                page_cache_release(page);
-                                return err;
+                                goto fail;
                        }
                } else {
                        err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
                                                        READ_SYNC);
                        if (err)
-                                return err;
+                                goto fail;
                }
                lock_page(page);
                if (unlikely(!PageUptodate(page))) {
                        f2fs_put_page(page, 1);
-                        return -EIO;
+                        err = -EIO;
+                        goto fail;
                }
                if (unlikely(page->mapping != mapping)) {
                        f2fs_put_page(page, 1);
@@ -1005,6 +1024,9 @@ out:
        SetPageUptodate(page);
        clear_cold_data(page);
        return 0;
+fail:
+        f2fs_write_failed(mapping, pos + len);
+        return err;
 }
 static int f2fs_write_end(struct file *file,
@@ -1016,7 +1038,6 @@ static int f2fs_write_end(struct file *file,
        trace_f2fs_write_end(inode, pos, len, copied);
-        SetPageUptodate(page);
        set_page_dirty(page);
        if (pos + copied > i_size_read(inode)) {
@@ -1050,7 +1071,10 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
                struct iov_iter *iter, loff_t offset)
 {
        struct file *file = iocb->ki_filp;
-        struct inode *inode = file->f_mapping->host;
+        struct address_space *mapping = file->f_mapping;
+        struct inode *inode = mapping->host;
+        size_t count = iov_iter_count(iter);
+        int err;
        /* Let buffer I/O handle the inline data case. */
        if (f2fs_has_inline_data(inode))
@@ -1062,8 +1086,15 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
        /* clear fsync mark to recover these blocks */
        fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
-        return blockdev_direct_IO(rw, iocb, inode, iter, offset,
+        trace_f2fs_direct_IO_enter(inode, offset, count, rw);
-                                  get_data_block);
+        err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
+        if (err < 0 && (rw & WRITE))
+                f2fs_write_failed(mapping, offset + count);
+        trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
+        return err;
 }
 static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index b52c12cf5873..a441ba33be11 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -167,7 +167,7 @@ get_cache:
        si->cache_mem += npages << PAGE_CACHE_SHIFT;
        npages = META_MAPPING(sbi)->nrpages;
        si->cache_mem += npages << PAGE_CACHE_SHIFT;
-        si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry);
+        si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry);
        si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
 }
@@ -345,21 +345,14 @@ void __init f2fs_create_root_stats(void)
        f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
        if (!f2fs_debugfs_root)
-                goto bail;
+                return;
        file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
                        NULL, &stat_fops);
-        if (!file)
+        if (!file) {
-                goto free_debugfs_dir;
+                debugfs_remove(f2fs_debugfs_root);
+                f2fs_debugfs_root = NULL;
-        return;
+        }
-free_debugfs_dir:
-        debugfs_remove(f2fs_debugfs_root);
-bail:
-        f2fs_debugfs_root = NULL;
-        return;
 }
 void f2fs_destroy_root_stats(void)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index a4addd72ebbd..bcf893c3d903 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -77,8 +77,8 @@ static unsigned long dir_block_index(unsigned int level,
        return bidx;
 }
-static bool early_match_name(const char *name, size_t namelen,
+static bool early_match_name(size_t namelen, f2fs_hash_t namehash,
-                        f2fs_hash_t namehash, struct f2fs_dir_entry *de)
+                                struct f2fs_dir_entry *de)
 {
        if (le16_to_cpu(de->name_len) != namelen)
                return false;
@@ -90,7 +90,7 @@ static bool early_match_name(const char *name, size_t namelen,
 }
 static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
-                        const char *name, size_t namelen, int *max_slots,
+                        struct qstr *name, int *max_slots,
                        f2fs_hash_t namehash, struct page **res_page)
 {
        struct f2fs_dir_entry *de;
@@ -109,9 +109,10 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
                        continue;
                }
                de = &dentry_blk->dentry[bit_pos];
-                if (early_match_name(name, namelen, namehash, de)) {
+                if (early_match_name(name->len, namehash, de)) {
                        if (!memcmp(dentry_blk->filename[bit_pos],
-                                                        name, namelen)) {
+                                                        name->name,
+                                                        name->len)) {
                                *res_page = dentry_page;
                                goto found;
                        }
@@ -120,6 +121,13 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
                        *max_slots = max_len;
                        max_len = 0;
                }
+                /*
+                 * For the most part, it should be a bug when name_len is zero.
+                 * We stop here for figuring out where the bugs are occurred.
+                 */
+                f2fs_bug_on(!de->name_len);
                bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
        }
@@ -132,10 +140,10 @@ found:
 }
 static struct f2fs_dir_entry *find_in_level(struct inode *dir,
-                unsigned int level, const char *name, size_t namelen,
+                        unsigned int level, struct qstr *name,
                        f2fs_hash_t namehash, struct page **res_page)
 {
-        int s = GET_DENTRY_SLOTS(namelen);
+        int s = GET_DENTRY_SLOTS(name->len);
        unsigned int nbucket, nblock;
        unsigned int bidx, end_block;
        struct page *dentry_page;
@@ -160,8 +168,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
                        continue;
                }
-                de = find_in_block(dentry_page, name, namelen,
+                de = find_in_block(dentry_page, name, &max_slots,
-                                        &max_slots, namehash, res_page);
+                                        namehash, res_page);
                if (de)
                        break;
@@ -187,8 +195,6 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
                        struct qstr *child, struct page **res_page)
 {
-        const char *name = child->name;
-        size_t namelen = child->len;
        unsigned long npages = dir_blocks(dir);
        struct f2fs_dir_entry *de = NULL;
        f2fs_hash_t name_hash;
@@ -200,12 +206,11 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
        *res_page = NULL;
-        name_hash = f2fs_dentry_hash(name, namelen);
+        name_hash = f2fs_dentry_hash(child);
        max_depth = F2FS_I(dir)->i_current_depth;
        for (level = 0; level < max_depth; level++) {
-                de = find_in_level(dir, level, name,
+                de = find_in_level(dir, level, child, name_hash, res_page);
-                                namelen, name_hash, res_page);
                if (de)
                        break;
        }
@@ -298,14 +303,13 @@ static int make_empty_dir(struct inode *inode,
        struct page *dentry_page;
        struct f2fs_dentry_block *dentry_blk;
        struct f2fs_dir_entry *de;
-        void *kaddr;
        dentry_page = get_new_data_page(inode, page, 0, true);
        if (IS_ERR(dentry_page))
                return PTR_ERR(dentry_page);
-        kaddr = kmap_atomic(dentry_page);
-        dentry_blk = (struct f2fs_dentry_block *)kaddr;
+        dentry_blk = kmap_atomic(dentry_page);
        de = &dentry_blk->dentry[0];
        de->name_len = cpu_to_le16(1);
@@ -323,7 +327,7 @@ static int make_empty_dir(struct inode *inode,
        test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);
        test_and_set_bit_le(1, &dentry_blk->dentry_bitmap);
-        kunmap_atomic(kaddr);
+        kunmap_atomic(dentry_blk);
        set_page_dirty(dentry_page);
        f2fs_put_page(dentry_page, 1);
@@ -333,11 +337,12 @@ static int make_empty_dir(struct inode *inode,
 static struct page *init_inode_metadata(struct inode *inode,
                struct inode *dir, const struct qstr *name)
 {
+        struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
        struct page *page;
        int err;
        if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
-                page = new_inode_page(inode, name);
+                page = new_inode_page(inode);
                if (IS_ERR(page))
                        return page;
@@ -362,7 +367,8 @@ static struct page *init_inode_metadata(struct inode *inode,
                set_cold_node(inode, page);
        }
-        init_dent_inode(name, page);
+        if (name)
+                init_dent_inode(name, page);
        /*
         * This file should be checkpointed during fsync.
@@ -370,6 +376,12 @@ static struct page *init_inode_metadata(struct inode *inode,
         */
        if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
                file_lost_pino(inode);
+                /*
+                 * If link the tmpfile to alias through linkat path,
+                 * we should remove this inode from orphan list.
+                 */
+                if (inode->i_nlink == 0)
+                        remove_orphan_inode(sbi, inode->i_ino);
                inc_nlink(inode);
        }
        return page;
@@ -453,7 +465,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
        int err = 0;
        int i;
-        dentry_hash = f2fs_dentry_hash(name->name, name->len);
+        dentry_hash = f2fs_dentry_hash(name);
        level = 0;
        current_depth = F2FS_I(dir)->i_current_depth;
        if (F2FS_I(dir)->chash == dentry_hash) {
@@ -529,6 +541,27 @@ fail:
        return err;
 }
+int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
+{
+        struct page *page;
+        int err = 0;
+        down_write(&F2FS_I(inode)->i_sem);
+        page = init_inode_metadata(inode, dir, NULL);
+        if (IS_ERR(page)) {
+                err = PTR_ERR(page);
+                goto fail;
+        }
+        /* we don't need to mark_inode_dirty now */
+        update_inode(inode, page);
+        f2fs_put_page(page, 1);
+        clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
+fail:
+        up_write(&F2FS_I(inode)->i_sem);
+        return err;
+}
 /*
 * It only removes the dentry from the dentry page,corresponding name
 * entry in name page does not need to be touched during deletion.
@@ -541,14 +574,13 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
        struct address_space *mapping = page->mapping;
        struct inode *dir = mapping->host;
        int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
-        void *kaddr = page_address(page);
        int i;
        lock_page(page);
        f2fs_wait_on_page_writeback(page, DATA);
-        dentry_blk = (struct f2fs_dentry_block *)kaddr;
+        dentry_blk = page_address(page);
-        bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry;
+        bit_pos = dentry - dentry_blk->dentry;
        for (i = 0; i < slots; i++)
                test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
@@ -603,7 +635,6 @@ bool f2fs_empty_dir(struct inode *dir)
        unsigned long nblock = dir_blocks(dir);
        for (bidx = 0; bidx < nblock; bidx++) {
-                void *kaddr;
                dentry_page = get_lock_data_page(dir, bidx);
                if (IS_ERR(dentry_page)) {
                        if (PTR_ERR(dentry_page) == -ENOENT)
@@ -612,8 +643,8 @@ bool f2fs_empty_dir(struct inode *dir)
                                return false;
                }
-                kaddr = kmap_atomic(dentry_page);
-                dentry_blk = (struct f2fs_dentry_block *)kaddr;
+                dentry_blk = kmap_atomic(dentry_page);
                if (bidx == 0)
                        bit_pos = 2;
                else
@@ -621,7 +652,7 @@ bool f2fs_empty_dir(struct inode *dir)
                bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
                                                NR_DENTRY_IN_BLOCK,
                                                bit_pos);
-                kunmap_atomic(kaddr);
+                kunmap_atomic(dentry_blk);
                f2fs_put_page(dentry_page, 1);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 58df97e174d0..4dab5338a97a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -41,6 +41,7 @@
 #define F2FS_MOUNT_INLINE_XATTR         0x00000080
 #define F2FS_MOUNT_INLINE_DATA          0x00000100
 #define F2FS_MOUNT_FLUSH_MERGE          0x00000200
+#define F2FS_MOUNT_NOBARRIER            0x00000400
 #define clear_opt(sbi, option)  (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
 #define set_opt(sbi, option)    (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -99,8 +100,15 @@ enum {
        META_SSA
 };
-/* for the list of orphan inodes */
+/* for the list of ino */
-struct orphan_inode_entry {
+enum {
+        ORPHAN_INO,             /* for orphan ino list */
+        APPEND_INO,             /* for append ino list */
+        UPDATE_INO,             /* for update ino list */
+        MAX_INO_ENTRY,          /* max. list */
+};
+struct ino_entry {
        struct list_head list;  /* list head */
        nid_t ino;              /* inode number */
 };
@@ -256,6 +264,8 @@ struct f2fs_nm_info {
        unsigned int nat_cnt;           /* the # of cached nat entries */
        struct list_head nat_entries;   /* cached nat entry list (clean) */
        struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
+        struct list_head nat_entry_set; /* nat entry set list */
+        unsigned int dirty_nat_cnt;     /* total num of nat entries in set */
        /* free node ids management */
        struct radix_tree_root free_nid_root;/* root of the free_nid cache */
@@ -442,14 +452,17 @@ struct f2fs_sb_info {
        struct inode *meta_inode;               /* cache meta blocks */
        struct mutex cp_mutex;                  /* checkpoint procedure lock */
        struct rw_semaphore cp_rwsem;           /* blocking FS operations */
-        struct mutex node_write;                /* locking node writes */
+        struct rw_semaphore node_write;         /* locking node writes */
        struct mutex writepages;                /* mutex for writepages() */
        bool por_doing;                         /* recovery is doing or not */
        wait_queue_head_t cp_wait;
-        /* for orphan inode management */
+        /* for inode management */
-        struct list_head orphan_inode_list;     /* orphan inode list */
+        struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */
-        spinlock_t orphan_inode_lock;           /* for orphan inode list */
+        spinlock_t ino_lock[MAX_INO_ENTRY];             /* for ino entry lock */
+        struct list_head ino_list[MAX_INO_ENTRY];       /* inode list head */
+        /* for orphan inode, use 0'th array */
        unsigned int n_orphans;                 /* # of orphan inodes */
        unsigned int max_orphans;               /* max orphan inodes */
@@ -768,7 +781,7 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
                if (flag == NAT_BITMAP)
                        return &ckpt->sit_nat_version_bitmap;
                else
-                        return ((unsigned char *)ckpt + F2FS_BLKSIZE);
+                        return (unsigned char *)ckpt + F2FS_BLKSIZE;
        } else {
                offset = (flag == NAT_BITMAP) ?
                        le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
@@ -983,11 +996,15 @@ enum {
        FI_NO_EXTENT,           /* not to use the extent cache */
        FI_INLINE_XATTR,        /* used for inline xattr */
        FI_INLINE_DATA,         /* used for inline data*/
+        FI_APPEND_WRITE,        /* inode has appended data */
+        FI_UPDATE_WRITE,        /* inode has in-place-update data */
+        FI_NEED_IPU,            /* used fo ipu for fdatasync */
 };
 static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
 {
-        set_bit(flag, &fi->flags);
+        if (!test_bit(flag, &fi->flags))
+                set_bit(flag, &fi->flags);
 }
 static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)
@@ -997,7 +1014,8 @@ static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)
 static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag)
 {
-        clear_bit(flag, &fi->flags);
+        if (test_bit(flag, &fi->flags))
+                clear_bit(flag, &fi->flags);
 }
 static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode)
@@ -1136,6 +1154,7 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
 int update_dent_inode(struct inode *, const struct qstr *);
 int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *);
 void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *);
+int f2fs_do_tmpfile(struct inode *, struct inode *);
 int f2fs_make_empty(struct inode *, struct inode *);
 bool f2fs_empty_dir(struct inode *);
@@ -1155,7 +1174,7 @@ void f2fs_msg(struct super_block *, const char *, const char *, ...);
 /*
 * hash.c
 */
-f2fs_hash_t f2fs_dentry_hash(const char *, size_t);
+f2fs_hash_t f2fs_dentry_hash(const struct qstr *);
 /*
 * node.c
@@ -1173,7 +1192,7 @@ int truncate_inode_blocks(struct inode *, pgoff_t);
 int truncate_xattr_node(struct inode *, struct page *);
 int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
 void remove_inode_page(struct inode *);
-struct page *new_inode_page(struct inode *, const struct qstr *);
+struct page *new_inode_page(struct inode *);
 struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
 void ra_node_page(struct f2fs_sb_info *, nid_t);
 struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
@@ -1185,6 +1204,7 @@ void alloc_nid_done(struct f2fs_sb_info *, nid_t);
 void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
 void recover_node_page(struct f2fs_sb_info *, struct page *,
                struct f2fs_summary *, struct node_info *, block_t);
+void recover_inline_xattr(struct inode *, struct page *);
 bool recover_xattr_data(struct inode *, struct page *, block_t);
 int recover_inode_page(struct f2fs_sb_info *, struct page *);
 int restore_node_summary(struct f2fs_sb_info *, unsigned int,
@@ -1206,7 +1226,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *);
 void invalidate_blocks(struct f2fs_sb_info *, block_t);
 void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
 void clear_prefree_segments(struct f2fs_sb_info *);
-void discard_next_dnode(struct f2fs_sb_info *);
+void discard_next_dnode(struct f2fs_sb_info *, block_t);
 int npages_for_summary_flush(struct f2fs_sb_info *);
 void allocate_new_segments(struct f2fs_sb_info *);
 struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
@@ -1240,6 +1260,9 @@ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
 struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
 int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
 long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
+void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
+void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
+bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
 int acquire_orphan_inode(struct f2fs_sb_info *);
 void release_orphan_inode(struct f2fs_sb_info *);
 void add_orphan_inode(struct f2fs_sb_info *, nid_t);
@@ -1251,7 +1274,7 @@ void add_dirty_dir_inode(struct inode *);
 void remove_dirty_dir_inode(struct inode *);
 void sync_dirty_dir_inodes(struct f2fs_sb_info *);
 void write_checkpoint(struct f2fs_sb_info *, bool);
-void init_orphan_info(struct f2fs_sb_info *);
+void init_ino_entry_info(struct f2fs_sb_info *);
 int __init create_checkpoint_caches(void);
 void destroy_checkpoint_caches(void);
@@ -1295,7 +1318,6 @@ bool space_for_roll_forward(struct f2fs_sb_info *);
 struct f2fs_stat_info {
        struct list_head stat_list;
        struct f2fs_sb_info *sbi;
-        struct mutex stat_lock;
        int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
        int main_area_segs, main_area_sections, main_area_zones;
        int hit_ext, total_ext;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 7d8b96275092..208f1a9bd569 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -127,12 +127,30 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                return 0;
        trace_f2fs_sync_file_enter(inode);
+        /* if fdatasync is triggered, let's do in-place-update */
+        if (datasync)
+                set_inode_flag(fi, FI_NEED_IPU);
        ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+        if (datasync)
+                clear_inode_flag(fi, FI_NEED_IPU);
        if (ret) {
                trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
                return ret;
        }
+        /*
+         * if there is no written data, don't waste time to write recovery info.
+         */
+        if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
+                !exist_written_data(sbi, inode->i_ino, APPEND_INO)) {
+                if (is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
+                        exist_written_data(sbi, inode->i_ino, UPDATE_INO))
+                        goto flush_out;
+                goto out;
+        }
        /* guarantee free sections for fsync */
        f2fs_balance_fs(sbi);
@@ -188,6 +206,13 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
                if (ret)
                        goto out;
+                /* once recovery info is written, don't need to tack this */
+                remove_dirty_inode(sbi, inode->i_ino, APPEND_INO);
+                clear_inode_flag(fi, FI_APPEND_WRITE);
+flush_out:
+                remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
+                clear_inode_flag(fi, FI_UPDATE_WRITE);
                ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
        }
 out:
@@ -206,8 +231,9 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping,
        /* find first dirty page index */
        pagevec_init(&pvec, 0);
-        nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1);
+        nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs,
-        pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX;
+                                        PAGECACHE_TAG_DIRTY, 1);
+        pgofs = nr_pages ? pvec.pages[0]->index : LONG_MAX;
        pagevec_release(&pvec);
        return pgofs;
 }
@@ -272,8 +298,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
                        }
                }
-                end_offset = IS_INODE(dn.node_page) ?
+                end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
-                        ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
                /* find data/hole in dnode block */
                for (; dn.ofs_in_node < end_offset;
@@ -380,13 +405,15 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
                return;
        lock_page(page);
-        if (unlikely(page->mapping != inode->i_mapping)) {
+        if (unlikely(!PageUptodate(page) ||
-                f2fs_put_page(page, 1);
+                        page->mapping != inode->i_mapping))
-                return;
+                goto out;
-        }
        f2fs_wait_on_page_writeback(page, DATA);
        zero_user(page, offset, PAGE_CACHE_SIZE - offset);
        set_page_dirty(page);
+out:
        f2fs_put_page(page, 1);
 }
@@ -645,6 +672,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
        loff_t off_start, off_end;
        int ret = 0;
+        f2fs_balance_fs(sbi);
        ret = inode_newsize_ok(inode, (len + offset));
        if (ret)
                return ret;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index b90dbe55403a..d7947d90ccc3 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -186,7 +186,6 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
 static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
 {
        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
-        unsigned int hint = 0;
        unsigned int secno;
        /*
@@ -194,11 +193,9 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
         * selected by background GC before.
         * Those segments guarantee they have small valid blocks.
         */
-next:
+        for_each_set_bit(secno, dirty_i->victim_secmap, TOTAL_SECS(sbi)) {
-        secno = find_next_bit(dirty_i->victim_secmap, TOTAL_SECS(sbi), hint++);
-        if (secno < TOTAL_SECS(sbi)) {
                if (sec_usage_check(sbi, secno))
-                        goto next;
+                        continue;
                clear_bit(secno, dirty_i->victim_secmap);
                return secno * sbi->segs_per_sec;
        }
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index 6eb8d269b53b..948d17bf7281 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -69,12 +69,14 @@ static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num)
                *buf++ = pad;
 }
-f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len)
+f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)
 {
        __u32 hash;
        f2fs_hash_t f2fs_hash;
        const char *p;
        __u32 in[8], buf[4];
+        const char *name = name_info->name;
+        size_t len = name_info->len;
        if ((len <= 2) && (name[0] == '.') &&
                (name[1] == '.' || name[1] == '\0'))
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 1bba5228c197..5beeccef9ae1 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -172,6 +172,7 @@ int f2fs_write_inline_data(struct inode *inode,
                stat_inc_inline_inode(inode);
        }
+        set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
        sync_inode_page(&dn);
        f2fs_put_dnode(&dn);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 2cf6962f6cc8..2c39999f3868 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -267,13 +267,14 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
 void f2fs_evict_inode(struct inode *inode)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+        nid_t xnid = F2FS_I(inode)->i_xattr_nid;
        trace_f2fs_evict_inode(inode);
        truncate_inode_pages_final(&inode->i_data);
        if (inode->i_ino == F2FS_NODE_INO(sbi) ||
                        inode->i_ino == F2FS_META_INO(sbi))
-                goto no_delete;
+                goto out_clear;
        f2fs_bug_on(get_dirty_dents(inode));
        remove_dirty_dir_inode(inode);
@@ -295,6 +296,13 @@ void f2fs_evict_inode(struct inode *inode)
        sb_end_intwrite(inode->i_sb);
 no_delete:
-        clear_inode(inode);
        invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
+        if (xnid)
+                invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
+        if (is_inode_flag_set(F2FS_I(inode), FI_APPEND_WRITE))
+                add_dirty_inode(sbi, inode->i_ino, APPEND_INO);
+        if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE))
+                add_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
+out_clear:
+        clear_inode(inode);
 }
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index a6bdddc33ce2..27b03776ffd2 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -13,6 +13,7 @@
 #include <linux/pagemap.h>
 #include <linux/sched.h>
 #include <linux/ctype.h>
+#include <linux/dcache.h>
 #include "f2fs.h"
 #include "node.h"
@@ -22,14 +23,13 @@
 static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
 {
-        struct super_block *sb = dir->i_sb;
+        struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
-        struct f2fs_sb_info *sbi = F2FS_SB(sb);
        nid_t ino;
        struct inode *inode;
        bool nid_free = false;
        int err;
-        inode = new_inode(sb);
+        inode = new_inode(dir->i_sb);
        if (!inode)
                return ERR_PTR(-ENOMEM);
@@ -102,8 +102,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode,
 static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                                                bool excl)
 {
-        struct super_block *sb = dir->i_sb;
+        struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
-        struct f2fs_sb_info *sbi = F2FS_SB(sb);
        struct inode *inode;
        nid_t ino = 0;
        int err;
@@ -146,8 +145,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
                struct dentry *dentry)
 {
        struct inode *inode = old_dentry->d_inode;
-        struct super_block *sb = dir->i_sb;
+        struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
-        struct f2fs_sb_info *sbi = F2FS_SB(sb);
        int err;
        f2fs_balance_fs(sbi);
@@ -207,8 +205,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
 static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
 {
-        struct super_block *sb = dir->i_sb;
+        struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
-        struct f2fs_sb_info *sbi = F2FS_SB(sb);
        struct inode *inode = dentry->d_inode;
        struct f2fs_dir_entry *de;
        struct page *page;
@@ -242,8 +239,7 @@ fail:
 static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
                                        const char *symname)
 {
-        struct super_block *sb = dir->i_sb;
+        struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
-        struct f2fs_sb_info *sbi = F2FS_SB(sb);
        struct inode *inode;
        size_t symlen = strlen(symname) + 1;
        int err;
@@ -330,8 +326,7 @@ static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
 static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
                                umode_t mode, dev_t rdev)
 {
-        struct super_block *sb = dir->i_sb;
+        struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
-        struct f2fs_sb_info *sbi = F2FS_SB(sb);
        struct inode *inode;
        int err = 0;
@@ -369,8 +364,7 @@ out:
 static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
                        struct inode *new_dir, struct dentry *new_dentry)
 {
-        struct super_block *sb = old_dir->i_sb;
+        struct f2fs_sb_info *sbi = F2FS_SB(old_dir->i_sb);
-        struct f2fs_sb_info *sbi = F2FS_SB(sb);
        struct inode *old_inode = old_dentry->d_inode;
        struct inode *new_inode = new_dentry->d_inode;
        struct page *old_dir_page;
@@ -393,8 +387,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
                        goto out_old;
        }
-        f2fs_lock_op(sbi);
        if (new_inode) {
                err = -ENOTEMPTY;
@@ -407,6 +399,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
                if (!new_entry)
                        goto out_dir;
+                f2fs_lock_op(sbi);
                err = acquire_orphan_inode(sbi);
                if (err)
                        goto put_out_dir;
@@ -435,9 +429,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
                update_inode_page(old_inode);
                update_inode_page(new_inode);
        } else {
+                f2fs_lock_op(sbi);
                err = f2fs_add_link(new_dentry, old_inode);
-                if (err)
+                if (err) {
+                        f2fs_unlock_op(sbi);
                        goto out_dir;
+                }
                if (old_dir_entry) {
                        inc_nlink(new_dir);
@@ -472,6 +470,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
        return 0;
 put_out_dir:
+        f2fs_unlock_op(sbi);
        kunmap(new_page);
        f2fs_put_page(new_page, 0);
 out_dir:
@@ -479,7 +478,151 @@ out_dir:
                kunmap(old_dir_page);
                f2fs_put_page(old_dir_page, 0);
        }
+out_old:
+        kunmap(old_page);
+        f2fs_put_page(old_page, 0);
+out:
+        return err;
+}
+static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
+                             struct inode *new_dir, struct dentry *new_dentry)
+{
+        struct super_block *sb = old_dir->i_sb;
+        struct f2fs_sb_info *sbi = F2FS_SB(sb);
+        struct inode *old_inode = old_dentry->d_inode;
+        struct inode *new_inode = new_dentry->d_inode;
+        struct page *old_dir_page, *new_dir_page;
+        struct page *old_page, *new_page;
+        struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL;
+        struct f2fs_dir_entry *old_entry, *new_entry;
+        int old_nlink = 0, new_nlink = 0;
+        int err = -ENOENT;
+        f2fs_balance_fs(sbi);
+        old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
+        if (!old_entry)
+                goto out;
+        new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page);
+        if (!new_entry)
+                goto out_old;
+        /* prepare for updating ".." directory entry info later */
+        if (old_dir != new_dir) {
+                if (S_ISDIR(old_inode->i_mode)) {
+                        err = -EIO;
+                        old_dir_entry = f2fs_parent_dir(old_inode,
+                                                        &old_dir_page);
+                        if (!old_dir_entry)
+                                goto out_new;
+                }
+                if (S_ISDIR(new_inode->i_mode)) {
+                        err = -EIO;
+                        new_dir_entry = f2fs_parent_dir(new_inode,
+                                                        &new_dir_page);
+                        if (!new_dir_entry)
+                                goto out_old_dir;
+                }
+        }
+        /*
+         * If cross rename between file and directory those are not
+         * in the same directory, we will inc nlink of file's parent
+         * later, so we should check upper boundary of its nlink.
+         */
+        if ((!old_dir_entry || !new_dir_entry) &&
+                                old_dir_entry != new_dir_entry) {
+                old_nlink = old_dir_entry ? -1 : 1;
+                new_nlink = -old_nlink;
+                err = -EMLINK;
+                if ((old_nlink > 0 && old_inode->i_nlink >= F2FS_LINK_MAX) ||
+                        (new_nlink > 0 && new_inode->i_nlink >= F2FS_LINK_MAX))
+                        goto out_new_dir;
+        }
+        f2fs_lock_op(sbi);
+        err = update_dent_inode(old_inode, &new_dentry->d_name);
+        if (err)
+                goto out_unlock;
+        err = update_dent_inode(new_inode, &old_dentry->d_name);
+        if (err)
+                goto out_undo;
+        /* update ".." directory entry info of old dentry */
+        if (old_dir_entry)
+                f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir);
+        /* update ".." directory entry info of new dentry */
+        if (new_dir_entry)
+                f2fs_set_link(new_inode, new_dir_entry, new_dir_page, old_dir);
+        /* update directory entry info of old dir inode */
+        f2fs_set_link(old_dir, old_entry, old_page, new_inode);
+        down_write(&F2FS_I(old_inode)->i_sem);
+        file_lost_pino(old_inode);
+        up_write(&F2FS_I(old_inode)->i_sem);
+        update_inode_page(old_inode);
+        old_dir->i_ctime = CURRENT_TIME;
+        if (old_nlink) {
+                down_write(&F2FS_I(old_dir)->i_sem);
+                if (old_nlink < 0)
+                        drop_nlink(old_dir);
+                else
+                        inc_nlink(old_dir);
+                up_write(&F2FS_I(old_dir)->i_sem);
+        }
+        mark_inode_dirty(old_dir);
+        update_inode_page(old_dir);
+        /* update directory entry info of new dir inode */
+        f2fs_set_link(new_dir, new_entry, new_page, old_inode);
+        down_write(&F2FS_I(new_inode)->i_sem);
+        file_lost_pino(new_inode);
+        up_write(&F2FS_I(new_inode)->i_sem);
+        update_inode_page(new_inode);
+        new_dir->i_ctime = CURRENT_TIME;
+        if (new_nlink) {
+                down_write(&F2FS_I(new_dir)->i_sem);
+                if (new_nlink < 0)
+                        drop_nlink(new_dir);
+                else
+                        inc_nlink(new_dir);
+                up_write(&F2FS_I(new_dir)->i_sem);
+        }
+        mark_inode_dirty(new_dir);
+        update_inode_page(new_dir);
+        f2fs_unlock_op(sbi);
+        return 0;
+out_undo:
+        /* Still we may fail to recover name info of f2fs_inode here */
+        update_dent_inode(old_inode, &old_dentry->d_name);
+out_unlock:
        f2fs_unlock_op(sbi);
+out_new_dir:
+        if (new_dir_entry) {
+                kunmap(new_dir_page);
+                f2fs_put_page(new_dir_page, 0);
+        }
+out_old_dir:
+        if (old_dir_entry) {
+                kunmap(old_dir_page);
+                f2fs_put_page(old_dir_page, 0);
+        }
+out_new:
+        kunmap(new_page);
+        f2fs_put_page(new_page, 0);
 out_old:
        kunmap(old_page);
        f2fs_put_page(old_page, 0);
@@ -487,6 +630,71 @@ out:
        return err;
 }
+static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
+                        struct inode *new_dir, struct dentry *new_dentry,
+                        unsigned int flags)
+{
+        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+                return -EINVAL;
+        if (flags & RENAME_EXCHANGE) {
+                return f2fs_cross_rename(old_dir, old_dentry,
+                                         new_dir, new_dentry);
+        }
+        /*
+         * VFS has already handled the new dentry existence case,
+         * here, we just deal with "RENAME_NOREPLACE" as regular rename.
+         */
+        return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry);
+}
+static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+        struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+        struct inode *inode;
+        int err;
+        inode = f2fs_new_inode(dir, mode);
+        if (IS_ERR(inode))
+                return PTR_ERR(inode);
+        inode->i_op = &f2fs_file_inode_operations;
+        inode->i_fop = &f2fs_file_operations;
+        inode->i_mapping->a_ops = &f2fs_dblock_aops;
+        f2fs_lock_op(sbi);
+        err = acquire_orphan_inode(sbi);
+        if (err)
+                goto out;
+        err = f2fs_do_tmpfile(inode, dir);
+        if (err)
+                goto release_out;
+        /*
+         * add this non-linked tmpfile to orphan list, in this way we could
+         * remove all unused data of tmpfile after abnormal power-off.
+         */
+        add_orphan_inode(sbi, inode->i_ino);
+        f2fs_unlock_op(sbi);
+        alloc_nid_done(sbi, inode->i_ino);
+        d_tmpfile(dentry, inode);
+        unlock_new_inode(inode);
+        return 0;
+release_out:
+        release_orphan_inode(sbi);
+out:
+        f2fs_unlock_op(sbi);
+        clear_nlink(inode);
+        unlock_new_inode(inode);
+        make_bad_inode(inode);
+        iput(inode);
+        alloc_nid_failed(sbi, inode->i_ino);
+        return err;
+}
 const struct inode_operations f2fs_dir_inode_operations = {
        .create         = f2fs_create,
        .lookup         = f2fs_lookup,
@@ -497,6 +705,8 @@ const struct inode_operations f2fs_dir_inode_operations = {
        .rmdir          = f2fs_rmdir,
        .mknod          = f2fs_mknod,
        .rename         = f2fs_rename,
+        .rename2        = f2fs_rename2,
+        .tmpfile        = f2fs_tmpfile,
        .getattr        = f2fs_getattr,
        .setattr        = f2fs_setattr,
        .get_acl        = f2fs_get_acl,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 4b697ccc9b0c..d3d90d284631 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -25,6 +25,7 @@
 static struct kmem_cache *nat_entry_slab;
 static struct kmem_cache *free_nid_slab;
+static struct kmem_cache *nat_entry_set_slab;
 bool available_free_memory(struct f2fs_sb_info *sbi, int type)
 {
@@ -90,12 +91,8 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
        /* get current nat block page with lock */
        src_page = get_meta_page(sbi, src_off);
-        /* Dirty src_page means that it is already the new target NAT page. */
-        if (PageDirty(src_page))
-                return src_page;
        dst_page = grab_meta_page(sbi, dst_off);
+        f2fs_bug_on(PageDirty(src_page));
        src_addr = page_address(src_page);
        dst_addr = page_address(dst_page);
@@ -845,7 +842,7 @@ void remove_inode_page(struct inode *inode)
        truncate_node(&dn);
 }
-struct page *new_inode_page(struct inode *inode, const struct qstr *name)
+struct page *new_inode_page(struct inode *inode)
 {
        struct dnode_of_data dn;
@@ -1234,12 +1231,12 @@ static int f2fs_write_node_page(struct page *page,
        if (wbc->for_reclaim)
                goto redirty_out;
-        mutex_lock(&sbi->node_write);
+        down_read(&sbi->node_write);
        set_page_writeback(page);
        write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
        set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
        dec_page_count(sbi, F2FS_DIRTY_NODES);
-        mutex_unlock(&sbi->node_write);
+        up_read(&sbi->node_write);
        unlock_page(page);
        return 0;
@@ -1552,7 +1549,7 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
        clear_node_page_dirty(page);
 }
-static void recover_inline_xattr(struct inode *inode, struct page *page)
+void recover_inline_xattr(struct inode *inode, struct page *page)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
        void *src_addr, *dst_addr;
@@ -1591,8 +1588,6 @@ bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
        nid_t new_xnid = nid_of_node(page);
        struct node_info ni;
-        recover_inline_xattr(inode, page);
        if (!f2fs_has_xattr_block(ofs_of_node(page)))
                return false;
@@ -1744,7 +1739,90 @@ skip:
        return err;
 }
-static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
+static struct nat_entry_set *grab_nat_entry_set(void)
+{
+        struct nat_entry_set *nes =
+                        f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
+        nes->entry_cnt = 0;
+        INIT_LIST_HEAD(&nes->set_list);
+        INIT_LIST_HEAD(&nes->entry_list);
+        return nes;
+}
+static void release_nat_entry_set(struct nat_entry_set *nes,
+                                                struct f2fs_nm_info *nm_i)
+{
+        f2fs_bug_on(!list_empty(&nes->entry_list));
+        nm_i->dirty_nat_cnt -= nes->entry_cnt;
+        list_del(&nes->set_list);
+        kmem_cache_free(nat_entry_set_slab, nes);
+}
+static void adjust_nat_entry_set(struct nat_entry_set *nes,
+                                                struct list_head *head)
+{
+        struct nat_entry_set *next = nes;
+        if (list_is_last(&nes->set_list, head))
+                return;
+        list_for_each_entry_continue(next, head, set_list)
+                if (nes->entry_cnt <= next->entry_cnt)
+                        break;
+        list_move_tail(&nes->set_list, &next->set_list);
+}
+static void add_nat_entry(struct nat_entry *ne, struct list_head *head)
+{
+        struct nat_entry_set *nes;
+        nid_t start_nid = START_NID(ne->ni.nid);
+        list_for_each_entry(nes, head, set_list) {
+                if (nes->start_nid == start_nid) {
+                        list_move_tail(&ne->list, &nes->entry_list);
+                        nes->entry_cnt++;
+                        adjust_nat_entry_set(nes, head);
+                        return;
+                }
+        }
+        nes = grab_nat_entry_set();
+        nes->start_nid = start_nid;
+        list_move_tail(&ne->list, &nes->entry_list);
+        nes->entry_cnt++;
+        list_add(&nes->set_list, head);
+}
+static void merge_nats_in_set(struct f2fs_sb_info *sbi)
+{
+        struct f2fs_nm_info *nm_i = NM_I(sbi);
+        struct list_head *dirty_list = &nm_i->dirty_nat_entries;
+        struct list_head *set_list = &nm_i->nat_entry_set;
+        struct nat_entry *ne, *tmp;
+        write_lock(&nm_i->nat_tree_lock);
+        list_for_each_entry_safe(ne, tmp, dirty_list, list) {
+                if (nat_get_blkaddr(ne) == NEW_ADDR)
+                        continue;
+                add_nat_entry(ne, set_list);
+                nm_i->dirty_nat_cnt++;
+        }
+        write_unlock(&nm_i->nat_tree_lock);
+}
+static bool __has_cursum_space(struct f2fs_summary_block *sum, int size)
+{
+        if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES)
+                return true;
+        else
+                return false;
+}
+static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1752,12 +1830,6 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
        int i;
        mutex_lock(&curseg->curseg_mutex);
-        if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) {
-                mutex_unlock(&curseg->curseg_mutex);
-                return false;
-        }
        for (i = 0; i < nats_in_cursum(sum); i++) {
                struct nat_entry *ne;
                struct f2fs_nat_entry raw_ne;
@@ -1767,23 +1839,21 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
 retry:
                write_lock(&nm_i->nat_tree_lock);
                ne = __lookup_nat_cache(nm_i, nid);
-                if (ne) {
+                if (ne)
-                        __set_nat_cache_dirty(nm_i, ne);
+                        goto found;
-                        write_unlock(&nm_i->nat_tree_lock);
-                        continue;
-                }
                ne = grab_nat_entry(nm_i, nid);
                if (!ne) {
                        write_unlock(&nm_i->nat_tree_lock);
                        goto retry;
                }
                node_info_from_raw_nat(&ne->ni, &raw_ne);
+found:
                __set_nat_cache_dirty(nm_i, ne);
                write_unlock(&nm_i->nat_tree_lock);
        }
        update_nats_in_cursum(sum, -i);
        mutex_unlock(&curseg->curseg_mutex);
-        return true;
 }
 /*
@@ -1794,80 +1864,91 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
        struct f2fs_summary_block *sum = curseg->sum_blk;
-        struct nat_entry *ne, *cur;
+        struct nat_entry_set *nes, *tmp;
-        struct page *page = NULL;
+        struct list_head *head = &nm_i->nat_entry_set;
-        struct f2fs_nat_block *nat_blk = NULL;
+        bool to_journal = true;
-        nid_t start_nid = 0, end_nid = 0;
-        bool flushed;
-        flushed = flush_nats_in_journal(sbi);
+        /* merge nat entries of dirty list to nat entry set temporarily */
+        merge_nats_in_set(sbi);
-        if (!flushed)
-                mutex_lock(&curseg->curseg_mutex);
-        /* 1) flush dirty nat caches */
-        list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {
-                nid_t nid;
-                struct f2fs_nat_entry raw_ne;
-                int offset = -1;
-                if (nat_get_blkaddr(ne) == NEW_ADDR)
-                        continue;
-                nid = nat_get_nid(ne);
+        /*
+         * if there are no enough space in journal to store dirty nat
+         * entries, remove all entries from journal and merge them
+         * into nat entry set.
+         */
+        if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) {
+                remove_nats_in_journal(sbi);
-                if (flushed)
+                /*
-                        goto to_nat_page;
+                 * merge nat entries of dirty list to nat entry set temporarily
+                 */
+                merge_nats_in_set(sbi);
+        }
-                /* if there is room for nat enries in curseg->sumpage */
+        if (!nm_i->dirty_nat_cnt)
-                offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1);
+                return;
-                if (offset >= 0) {
-                        raw_ne = nat_in_journal(sum, offset);
-                        goto flush_now;
-                }
-to_nat_page:
-                if (!page || (start_nid > nid || nid > end_nid)) {
-                        if (page) {
-                                f2fs_put_page(page, 1);
-                                page = NULL;
-                        }
-                        start_nid = START_NID(nid);
-                        end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1;
-                        /*
+        /*
-                         * get nat block with dirty flag, increased reference
+         * there are two steps to flush nat entries:
-                         * count, mapped and lock
+         * #1, flush nat entries to journal in current hot data summary block.
-                         */
+         * #2, flush nat entries to nat page.
+         */
+        list_for_each_entry_safe(nes, tmp, head, set_list) {
+                struct f2fs_nat_block *nat_blk;
+                struct nat_entry *ne, *cur;
+                struct page *page;
+                nid_t start_nid = nes->start_nid;
+                if (to_journal && !__has_cursum_space(sum, nes->entry_cnt))
+                        to_journal = false;
+                if (to_journal) {
+                        mutex_lock(&curseg->curseg_mutex);
+                } else {
                        page = get_next_nat_page(sbi, start_nid);
                        nat_blk = page_address(page);
+                        f2fs_bug_on(!nat_blk);
                }
-                f2fs_bug_on(!nat_blk);
+                /* flush dirty nats in nat entry set */
-                raw_ne = nat_blk->entries[nid - start_nid];
+                list_for_each_entry_safe(ne, cur, &nes->entry_list, list) {
-flush_now:
+                        struct f2fs_nat_entry *raw_ne;
-                raw_nat_from_node_info(&raw_ne, &ne->ni);
+                        nid_t nid = nat_get_nid(ne);
+                        int offset;
-                if (offset < 0) {
-                        nat_blk->entries[nid - start_nid] = raw_ne;
+                        if (to_journal) {
-                } else {
+                                offset = lookup_journal_in_cursum(sum,
-                        nat_in_journal(sum, offset) = raw_ne;
+                                                        NAT_JOURNAL, nid, 1);
-                        nid_in_journal(sum, offset) = cpu_to_le32(nid);
+                                f2fs_bug_on(offset < 0);
-                }
+                                raw_ne = &nat_in_journal(sum, offset);
+                                nid_in_journal(sum, offset) = cpu_to_le32(nid);
+                        } else {
+                                raw_ne = &nat_blk->entries[nid - start_nid];
+                        }
+                        raw_nat_from_node_info(raw_ne, &ne->ni);
-                if (nat_get_blkaddr(ne) == NULL_ADDR &&
+                        if (nat_get_blkaddr(ne) == NULL_ADDR &&
                                add_free_nid(sbi, nid, false) <= 0) {
-                        write_lock(&nm_i->nat_tree_lock);
+                                write_lock(&nm_i->nat_tree_lock);
-                        __del_from_nat_cache(nm_i, ne);
+                                __del_from_nat_cache(nm_i, ne);
-                        write_unlock(&nm_i->nat_tree_lock);
+                                write_unlock(&nm_i->nat_tree_lock);
-                } else {
+                        } else {
-                        write_lock(&nm_i->nat_tree_lock);
+                                write_lock(&nm_i->nat_tree_lock);
-                        __clear_nat_cache_dirty(nm_i, ne);
+                                __clear_nat_cache_dirty(nm_i, ne);
-                        write_unlock(&nm_i->nat_tree_lock);
+                                write_unlock(&nm_i->nat_tree_lock);
+                        }
                }
+                if (to_journal)
+                        mutex_unlock(&curseg->curseg_mutex);
+                else
+                        f2fs_put_page(page, 1);
+                release_nat_entry_set(nes, nm_i);
        }
-        if (!flushed)
-                mutex_unlock(&curseg->curseg_mutex);
+        f2fs_bug_on(!list_empty(head));
-        f2fs_put_page(page, 1);
+        f2fs_bug_on(nm_i->dirty_nat_cnt);
 }
 static int init_node_manager(struct f2fs_sb_info *sbi)
@@ -1896,6 +1977,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
        INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
        INIT_LIST_HEAD(&nm_i->nat_entries);
        INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
+        INIT_LIST_HEAD(&nm_i->nat_entry_set);
        mutex_init(&nm_i->build_lock);
        spin_lock_init(&nm_i->free_nid_list_lock);
@@ -1976,19 +2058,30 @@ int __init create_node_manager_caches(void)
        nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
                        sizeof(struct nat_entry));
        if (!nat_entry_slab)
-                return -ENOMEM;
+                goto fail;
        free_nid_slab = f2fs_kmem_cache_create("free_nid",
                        sizeof(struct free_nid));
-        if (!free_nid_slab) {
+        if (!free_nid_slab)
-                kmem_cache_destroy(nat_entry_slab);
+                goto destory_nat_entry;
-                return -ENOMEM;
-        }
+        nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
+                        sizeof(struct nat_entry_set));
+        if (!nat_entry_set_slab)
+                goto destory_free_nid;
        return 0;
+destory_free_nid:
+        kmem_cache_destroy(free_nid_slab);
+destory_nat_entry:
+        kmem_cache_destroy(nat_entry_slab);
+fail:
+        return -ENOMEM;
 }
 void destroy_node_manager_caches(void)
 {
+        kmem_cache_destroy(nat_entry_set_slab);
        kmem_cache_destroy(free_nid_slab);
        kmem_cache_destroy(nat_entry_slab);
 }
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 7281112cd1c8..8a116a407599 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -89,6 +89,13 @@ enum mem_type {
        DIRTY_DENTS     /* indicates dirty dentry pages */
 };
+struct nat_entry_set {
+        struct list_head set_list;      /* link with all nat sets */
+        struct list_head entry_list;    /* link with dirty nat entries */
+        nid_t start_nid;                /* start nid of nats in set */
+        unsigned int entry_cnt;         /* the # of nat entries in set */
+};
 /*
 * For free nid mangement
 */
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index a112368a4a86..fe1c6d921ba2 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -300,6 +300,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
        struct node_info ni;
        int err = 0, recovered = 0;
+        recover_inline_xattr(inode, page);
        if (recover_inline_data(inode, page))
                goto out;
@@ -434,7 +436,9 @@ next:
 int recover_fsync_data(struct f2fs_sb_info *sbi)
 {
+        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
        struct list_head inode_list;
+        block_t blkaddr;
        int err;
        bool need_writecp = false;
@@ -447,6 +451,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
        /* step #1: find fsynced inode numbers */
        sbi->por_doing = true;
+        blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
        err = find_fsync_dnodes(sbi, &inode_list);
        if (err)
                goto out;
@@ -462,8 +469,21 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
 out:
        destroy_fsync_dnodes(&inode_list);
        kmem_cache_destroy(fsync_entry_slab);
+        if (err) {
+                truncate_inode_pages_final(NODE_MAPPING(sbi));
+                truncate_inode_pages_final(META_MAPPING(sbi));
+        }
        sbi->por_doing = false;
-        if (!err && need_writecp)
+        if (err) {
+                discard_next_dnode(sbi, blkaddr);
+                /* Flush all the NAT/SIT pages */
+                while (get_pages(sbi, F2FS_DIRTY_META))
+                        sync_meta_pages(sbi, META, LONG_MAX);
+        } else if (need_writecp) {
                write_checkpoint(sbi, false);
+        }
        return err;
 }
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index d04613df710a..0dfeebae2a50 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -239,6 +239,12 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
        struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
        struct flush_cmd cmd;
+        trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
+                                        test_opt(sbi, FLUSH_MERGE));
+        if (test_opt(sbi, NOBARRIER))
+                return 0;
        if (!test_opt(sbi, FLUSH_MERGE))
                return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
@@ -272,13 +278,13 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
                return -ENOMEM;
        spin_lock_init(&fcc->issue_lock);
        init_waitqueue_head(&fcc->flush_wait_queue);
-        sbi->sm_info->cmd_control_info = fcc;
+        SM_I(sbi)->cmd_control_info = fcc;
        fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
                                "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
        if (IS_ERR(fcc->f2fs_issue_flush)) {
                err = PTR_ERR(fcc->f2fs_issue_flush);
                kfree(fcc);
-                sbi->sm_info->cmd_control_info = NULL;
+                SM_I(sbi)->cmd_control_info = NULL;
                return err;
        }
@@ -287,13 +293,12 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
 void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
 {
-        struct flush_cmd_control *fcc =
+        struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
-                                sbi->sm_info->cmd_control_info;
        if (fcc && fcc->f2fs_issue_flush)
                kthread_stop(fcc->f2fs_issue_flush);
        kfree(fcc);
-        sbi->sm_info->cmd_control_info = NULL;
+        SM_I(sbi)->cmd_control_info = NULL;
 }
 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
@@ -377,11 +382,8 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
        return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
 }
-void discard_next_dnode(struct f2fs_sb_info *sbi)
+void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
 {
-        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
-        block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
        if (f2fs_issue_discard(sbi, blkaddr, 1)) {
                struct page *page = grab_meta_page(sbi, blkaddr);
                /* zero-filled page */
@@ -437,17 +439,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi,
 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
 {
        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
-        unsigned int segno = -1;
+        unsigned int segno;
        unsigned int total_segs = TOTAL_SEGS(sbi);
        mutex_lock(&dirty_i->seglist_lock);
-        while (1) {
+        for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], total_segs)
-                segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
-                                segno + 1);
-                if (segno >= total_segs)
-                        break;
                __set_test_and_free(sbi, segno);
-        }
        mutex_unlock(&dirty_i->seglist_lock);
 }
@@ -974,14 +971,12 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 {
        struct sit_info *sit_i = SIT_I(sbi);
        struct curseg_info *curseg;
-        unsigned int old_cursegno;
        curseg = CURSEG_I(sbi, type);
        mutex_lock(&curseg->curseg_mutex);
        *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
-        old_cursegno = curseg->segno;
        /*
         * __add_sum_entry should be resided under the curseg_mutex
@@ -1002,7 +997,6 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
         * since SSR needs latest valid block information.
         */
        refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
-        locate_dirty_segment(sbi, old_cursegno);
        mutex_unlock(&sit_i->sentry_lock);
@@ -1532,7 +1526,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
        struct page *page = NULL;
        struct f2fs_sit_block *raw_sit = NULL;
        unsigned int start = 0, end = 0;
-        unsigned int segno = -1;
+        unsigned int segno;
        bool flushed;
        mutex_lock(&curseg->curseg_mutex);
@@ -1544,7 +1538,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
         */
        flushed = flush_sits_in_journal(sbi);
-        while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) {
+        for_each_set_bit(segno, bitmap, nsegs) {
                struct seg_entry *se = get_seg_entry(sbi, segno);
                int sit_offset, offset;
@@ -1703,7 +1697,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
        struct curseg_info *array;
        int i;
-        array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL);
+        array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
        if (!array)
                return -ENOMEM;
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 7091204680f4..55973f7b0330 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -347,8 +347,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
        if (test_and_clear_bit(segno, free_i->free_segmap)) {
                free_i->free_segments++;
-                next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi),
+                next = find_next_bit(free_i->free_segmap,
-                                                                start_segno);
+                                start_segno + sbi->segs_per_sec, start_segno);
                if (next >= start_segno + sbi->segs_per_sec) {
                        if (test_and_clear_bit(secno, free_i->free_secmap))
                                free_i->free_sections++;
@@ -486,6 +486,10 @@ static inline bool need_inplace_update(struct inode *inode)
        if (S_ISDIR(inode->i_mode))
                return false;
+        /* this is only set during fdatasync */
+        if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
+                return true;
        switch (SM_I(sbi)->ipu_policy) {
        case F2FS_IPU_FORCE:
                return true;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8f96d9372ade..657582fc7601 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -52,6 +52,7 @@ enum {
        Opt_inline_xattr,
        Opt_inline_data,
        Opt_flush_merge,
+        Opt_nobarrier,
        Opt_err,
 };
@@ -69,6 +70,7 @@ static match_table_t f2fs_tokens = {
        {Opt_inline_xattr, "inline_xattr"},
        {Opt_inline_data, "inline_data"},
        {Opt_flush_merge, "flush_merge"},
+        {Opt_nobarrier, "nobarrier"},
        {Opt_err, NULL},
 };
@@ -339,6 +341,9 @@ static int parse_options(struct super_block *sb, char *options)
                case Opt_flush_merge:
                        set_opt(sbi, FLUSH_MERGE);
                        break;
+                case Opt_nobarrier:
+                        set_opt(sbi, NOBARRIER);
+                        break;
                default:
                        f2fs_msg(sb, KERN_ERR,
                                "Unrecognized mount option \"%s\" or missing value",
@@ -544,6 +549,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
                seq_puts(seq, ",inline_data");
        if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
                seq_puts(seq, ",flush_merge");
+        if (test_opt(sbi, NOBARRIER))
+                seq_puts(seq, ",nobarrier");
        seq_printf(seq, ",active_logs=%u", sbi->active_logs);
        return 0;
@@ -615,7 +622,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
         * Previous and new state of filesystem is RO,
         * so skip checking GC and FLUSH_MERGE conditions.
         */
-        if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY))
+        if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
                goto skip;
        /*
@@ -642,8 +649,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
         */
        if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
                destroy_flush_cmd_control(sbi);
-        } else if (test_opt(sbi, FLUSH_MERGE) &&
+        } else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) {
-                                        !sbi->sm_info->cmd_control_info) {
                err = create_flush_cmd_control(sbi);
                if (err)
                        goto restore_gc;
@@ -947,7 +953,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
        mutex_init(&sbi->gc_mutex);
        mutex_init(&sbi->writepages);
        mutex_init(&sbi->cp_mutex);
-        mutex_init(&sbi->node_write);
+        init_rwsem(&sbi->node_write);
        sbi->por_doing = false;
        spin_lock_init(&sbi->stat_lock);
@@ -997,7 +1003,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
        INIT_LIST_HEAD(&sbi->dir_inode_list);
        spin_lock_init(&sbi->dir_inode_lock);
-        init_orphan_info(sbi);
+        init_ino_entry_info(sbi);
        /* setup f2fs internal modules */
        err = build_segment_manager(sbi);
@@ -1034,8 +1040,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
                goto free_node_inode;
        }
        if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
+                iput(root);
                err = -EINVAL;
-                goto free_root_inode;
+                goto free_node_inode;
        }
        sb->s_root = d_make_root(root); /* allocate root dentry */
@@ -1082,7 +1089,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
         * If filesystem is not mounted as read-only then
         * do start the gc_thread.
         */
-        if (!(sb->s_flags & MS_RDONLY)) {
+        if (!f2fs_readonly(sb)) {
                /* After POR, we can run background GC thread.*/
                err = start_gc_thread(sbi);
                if (err)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index be568b7311d6..ef9bef118342 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -342,7 +342,8 @@ static void __inode_wait_for_writeback(struct inode *inode)
        wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
        while (inode->i_state & I_SYNC) {
                spin_unlock(&inode->i_lock);
-                __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
+                __wait_on_bit(wqh, &wq, bit_wait,
+                              TASK_UNINTERRUPTIBLE);
                spin_lock(&inode->i_lock);
        }
 }
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index aec01be91b0a..89acec742e0b 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -160,7 +160,7 @@ void __fscache_enable_cookie(struct fscache_cookie *cookie,
        _enter("%p", cookie);
        wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
-                         fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+                         TASK_UNINTERRUPTIBLE);
        if (test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
                goto out_unlock;
@@ -255,7 +255,7 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie)
        if (!fscache_defer_lookup) {
                _debug("non-deferred lookup %p", &cookie->flags);
                wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP,
-                            fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+                            TASK_UNINTERRUPTIBLE);
                _debug("complete");
                if (test_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags))
                        goto unavailable;
@@ -463,7 +463,6 @@ void __fscache_wait_on_invalidate(struct fscache_cookie *cookie)
        _enter("%p", cookie);
        wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING,
-                    fscache_wait_bit_interruptible,
                    TASK_UNINTERRUPTIBLE);
        _leave("");
@@ -525,7 +524,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
        }
        wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
-                         fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+                         TASK_UNINTERRUPTIBLE);
        if (!test_and_clear_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
                goto out_unlock_enable;
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index bc6c08fcfddd..7872a62ef30c 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -97,8 +97,6 @@ static inline bool fscache_object_congested(void)
        return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq);
 }
-extern int fscache_wait_bit(void *);
-extern int fscache_wait_bit_interruptible(void *);
 extern int fscache_wait_atomic_t(atomic_t *);
 /*
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index 63f868e869b9..a31b83c5cbd9 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -197,24 +197,6 @@ static void __exit fscache_exit(void)
 module_exit(fscache_exit);
 /*
- * wait_on_bit() sleep function for uninterruptible waiting
- */
-int fscache_wait_bit(void *flags)
-{
-        schedule();
-        return 0;
-}
-/*
- * wait_on_bit() sleep function for interruptible waiting
- */
-int fscache_wait_bit_interruptible(void *flags)
-{
-        schedule();
-        return signal_pending(current);
-}
-/*
 * wait_on_atomic_t() sleep function for uninterruptible waiting
 */
 int fscache_wait_atomic_t(atomic_t *p)
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index ed70714503fa..85332b9d19d1 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -298,7 +298,6 @@ int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie)
        jif = jiffies;
        if (wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP,
-                        fscache_wait_bit_interruptible,
                        TASK_INTERRUPTIBLE) != 0) {
                fscache_stat(&fscache_n_retrievals_intr);
                _leave(" = -ERESTARTSYS");
@@ -342,7 +341,6 @@ int fscache_wait_for_operation_activation(struct fscache_object *object,
        if (stat_op_waits)
                fscache_stat(stat_op_waits);
        if (wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
-                        fscache_wait_bit_interruptible,
                        TASK_INTERRUPTIBLE) != 0) {
                ret = fscache_cancel_op(op, do_cancel);
                if (ret == 0)
@@ -351,7 +349,7 @@ int fscache_wait_for_operation_activation(struct fscache_object *object,
                /* it's been removed from the pending queue by another party,
                 * so we should get to run shortly */
                wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
-                            fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+                            TASK_UNINTERRUPTIBLE);
        }
        _debug("<<< GO");
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 098f97bdcf1b..ca887314aba9 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -643,9 +643,8 @@ struct fuse_copy_state {
        unsigned long seglen;
        unsigned long addr;
        struct page *pg;
-        void *mapaddr;
-        void *buf;
        unsigned len;
+        unsigned offset;
        unsigned move_pages:1;
 };
@@ -666,23 +665,17 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
        if (cs->currbuf) {
                struct pipe_buffer *buf = cs->currbuf;
-                if (!cs->write) {
+                if (cs->write)
-                        kunmap_atomic(cs->mapaddr);
-                } else {
-                        kunmap_atomic(cs->mapaddr);
                        buf->len = PAGE_SIZE - cs->len;
-                }
                cs->currbuf = NULL;
-                cs->mapaddr = NULL;
+        } else if (cs->pg) {
-        } else if (cs->mapaddr) {
-                kunmap_atomic(cs->mapaddr);
                if (cs->write) {
                        flush_dcache_page(cs->pg);
                        set_page_dirty_lock(cs->pg);
                }
                put_page(cs->pg);
-                cs->mapaddr = NULL;
        }
+        cs->pg = NULL;
 }
 /*
@@ -691,7 +684,7 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
 */
 static int fuse_copy_fill(struct fuse_copy_state *cs)
 {
-        unsigned long offset;
+        struct page *page;
        int err;
        unlock_request(cs->fc, cs->req);
@@ -706,14 +699,12 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
                        BUG_ON(!cs->nr_segs);
                        cs->currbuf = buf;
-                        cs->mapaddr = kmap_atomic(buf->page);
+                        cs->pg = buf->page;
+                        cs->offset = buf->offset;
                        cs->len = buf->len;
-                        cs->buf = cs->mapaddr + buf->offset;
                        cs->pipebufs++;
                        cs->nr_segs--;
                } else {
-                        struct page *page;
                        if (cs->nr_segs == cs->pipe->buffers)
                                return -EIO;
@@ -726,8 +717,8 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
                        buf->len = 0;
                        cs->currbuf = buf;
-                        cs->mapaddr = kmap_atomic(page);
+                        cs->pg = page;
-                        cs->buf = cs->mapaddr;
+                        cs->offset = 0;
                        cs->len = PAGE_SIZE;
                        cs->pipebufs++;
                        cs->nr_segs++;
@@ -740,14 +731,13 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
                        cs->iov++;
                        cs->nr_segs--;
                }
-                err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
+                err = get_user_pages_fast(cs->addr, 1, cs->write, &page);
                if (err < 0)
                        return err;
                BUG_ON(err != 1);
-                offset = cs->addr % PAGE_SIZE;
+                cs->pg = page;
-                cs->mapaddr = kmap_atomic(cs->pg);
+                cs->offset = cs->addr % PAGE_SIZE;
-                cs->buf = cs->mapaddr + offset;
+                cs->len = min(PAGE_SIZE - cs->offset, cs->seglen);
-                cs->len = min(PAGE_SIZE - offset, cs->seglen);
                cs->seglen -= cs->len;
                cs->addr += cs->len;
        }
@@ -760,15 +750,20 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
 {
        unsigned ncpy = min(*size, cs->len);
        if (val) {
+                void *pgaddr = kmap_atomic(cs->pg);
+                void *buf = pgaddr + cs->offset;
                if (cs->write)
-                        memcpy(cs->buf, *val, ncpy);
+                        memcpy(buf, *val, ncpy);
                else
-                        memcpy(*val, cs->buf, ncpy);
+                        memcpy(*val, buf, ncpy);
+                kunmap_atomic(pgaddr);
                *val += ncpy;
        }
        *size -= ncpy;
        cs->len -= ncpy;
-        cs->buf += ncpy;
+        cs->offset += ncpy;
        return ncpy;
 }
@@ -874,8 +869,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 out_fallback_unlock:
        unlock_page(newpage);
 out_fallback:
-        cs->mapaddr = kmap_atomic(buf->page);
+        cs->pg = buf->page;
-        cs->buf = cs->mapaddr + buf->offset;
+        cs->offset = buf->offset;
        err = lock_request(cs->fc, cs->req);
        if (err)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 42198359fa1b..0c6048247a34 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -198,7 +198,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
        inode = ACCESS_ONCE(entry->d_inode);
        if (inode && is_bad_inode(inode))
                goto invalid;
-        else if (fuse_dentry_time(entry) < get_jiffies_64()) {
+        else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
+                 (flags & LOOKUP_REVAL)) {
                int err;
                struct fuse_entry_out outarg;
                struct fuse_req *req;
@@ -814,13 +815,6 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
        return err;
 }
-static int fuse_rename(struct inode *olddir, struct dentry *oldent,
-                       struct inode *newdir, struct dentry *newent)
-{
-        return fuse_rename_common(olddir, oldent, newdir, newent, 0,
-                                  FUSE_RENAME, sizeof(struct fuse_rename_in));
-}
 static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
                        struct inode *newdir, struct dentry *newent,
                        unsigned int flags)
@@ -831,17 +825,30 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
                return -EINVAL;
-        if (fc->no_rename2 || fc->minor < 23)
+        if (flags) {
-                return -EINVAL;
+                if (fc->no_rename2 || fc->minor < 23)
+                        return -EINVAL;
-        err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
+                err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
-                                 FUSE_RENAME2, sizeof(struct fuse_rename2_in));
+                                         FUSE_RENAME2,
-        if (err == -ENOSYS) {
+                                         sizeof(struct fuse_rename2_in));
-                fc->no_rename2 = 1;
+                if (err == -ENOSYS) {
-                err = -EINVAL;
+                        fc->no_rename2 = 1;
+                        err = -EINVAL;
+                }
+        } else {
+                err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
+                                         FUSE_RENAME,
+                                         sizeof(struct fuse_rename_in));
        }
        return err;
+}
+static int fuse_rename(struct inode *olddir, struct dentry *oldent,
+                       struct inode *newdir, struct dentry *newent)
+{
+        return fuse_rename2(olddir, oldent, newdir, newent, 0);
 }
 static int fuse_link(struct dentry *entry, struct inode *newdir,
@@ -985,7 +992,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
        int err;
        bool r;
-        if (fi->i_time < get_jiffies_64()) {
+        if (time_before64(fi->i_time, get_jiffies_64())) {
                r = true;
                err = fuse_do_getattr(inode, stat, file);
        } else {
@@ -1171,7 +1178,7 @@ static int fuse_permission(struct inode *inode, int mask)
            ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
                struct fuse_inode *fi = get_fuse_inode(inode);
-                if (fi->i_time < get_jiffies_64()) {
+                if (time_before64(fi->i_time, get_jiffies_64())) {
                        refreshed = true;
                        err = fuse_perm_getattr(inode, mask);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 6e16dad13e9b..40ac2628ddcf 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1687,7 +1687,7 @@ static int fuse_writepage_locked(struct page *page)
        error = -EIO;
        req->ff = fuse_write_file_get(fc, fi);
        if (!req->ff)
-                goto err_free;
+                goto err_nofile;
        fuse_write_fill(req, req->ff, page_offset(page), 0);
@@ -1715,6 +1715,8 @@ static int fuse_writepage_locked(struct page *page)
        return 0;
+err_nofile:
+        __free_page(tmp_page);
 err_free:
        fuse_request_free(req);
 err:
@@ -1955,8 +1957,8 @@ static int fuse_writepages(struct address_space *mapping,
        data.ff = NULL;
        err = -ENOMEM;
-        data.orig_pages = kzalloc(sizeof(struct page *) *
+        data.orig_pages = kcalloc(FUSE_MAX_PAGES_PER_REQ,
-                                  FUSE_MAX_PAGES_PER_REQ,
+                                  sizeof(struct page *),
                                  GFP_NOFS);
        if (!data.orig_pages)
                goto out;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 754dcf23de8a..03246cd9d47a 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -478,6 +478,17 @@ static const match_table_t tokens = {
        {OPT_ERR,                       NULL}
 };
+static int fuse_match_uint(substring_t *s, unsigned int *res)
+{
+        int err = -ENOMEM;
+        char *buf = match_strdup(s);
+        if (buf) {
+                err = kstrtouint(buf, 10, res);
+                kfree(buf);
+        }
+        return err;
+}
 static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
 {
        char *p;
@@ -488,6 +499,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
        while ((p = strsep(&opt, ",")) != NULL) {
                int token;
                int value;
+                unsigned uv;
                substring_t args[MAX_OPT_ARGS];
                if (!*p)
                        continue;
@@ -511,18 +523,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
                        break;
                case OPT_USER_ID:
-                        if (match_int(&args[0], &value))
+                        if (fuse_match_uint(&args[0], &uv))
                                return 0;
-                        d->user_id = make_kuid(current_user_ns(), value);
+                        d->user_id = make_kuid(current_user_ns(), uv);
                        if (!uid_valid(d->user_id))
                                return 0;
                        d->user_id_present = 1;
                        break;
                case OPT_GROUP_ID:
-                        if (match_int(&args[0], &value))
+                        if (fuse_match_uint(&args[0], &uv))
                                return 0;
-                        d->group_id = make_kgid(current_user_ns(), value);
+                        d->group_id = make_kgid(current_user_ns(), uv);
                        if (!gid_valid(d->group_id))
                                return 0;
                        d->group_id_present = 1;
@@ -895,9 +907,6 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                                fc->writeback_cache = 1;
                        if (arg->time_gran && arg->time_gran <= 1000000000)
                                fc->sb->s_time_gran = arg->time_gran;
-                        else
-                                fc->sb->s_time_gran = 1000000000;
                } else {
                        ra_pages = fc->max_read / PAGE_CACHE_SIZE;
                        fc->no_lock = 1;
@@ -926,7 +935,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
                FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
                FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
                FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
-                FUSE_WRITEBACK_CACHE;
+                FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT;
        req->in.h.opcode = FUSE_INIT;
        req->in.numargs = 1;
        req->in.args[0].size = sizeof(*arg);
@@ -1006,7 +1015,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION);
-        if (!parse_fuse_opt((char *) data, &d, is_bdev))
+        if (!parse_fuse_opt(data, &d, is_bdev))
                goto err;
        if (is_bdev) {
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4fc3a3046174..26b3f952e6b1 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -981,7 +981,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
        int error = 0;
        state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
-        flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
+        flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT;
        mutex_lock(&fp->f_fl_mutex);
@@ -991,7 +991,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
                        goto out;
                flock_lock_file_wait(file,
                                     &(struct file_lock){.fl_type = F_UNLCK});
-                gfs2_glock_dq_wait(fl_gh);
+                gfs2_glock_dq(fl_gh);
                gfs2_holder_reinit(state, flags, fl_gh);
        } else {
                error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c355f7320e44..7f513b1ceb2c 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -731,14 +731,14 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
                cachep = gfs2_glock_aspace_cachep;
        else
                cachep = gfs2_glock_cachep;
-        gl = kmem_cache_alloc(cachep, GFP_KERNEL);
+        gl = kmem_cache_alloc(cachep, GFP_NOFS);
        if (!gl)
                return -ENOMEM;
        memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
        if (glops->go_flags & GLOF_LVB) {
-                gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_KERNEL);
+                gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_NOFS);
                if (!gl->gl_lksb.sb_lvbptr) {
                        kmem_cache_free(cachep, gl);
                        return -ENOMEM;
@@ -856,27 +856,6 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
 }
 /**
- * gfs2_glock_holder_wait
- * @word: unused
- *
- * This function and gfs2_glock_demote_wait both show up in the WCHAN
- * field. Thus I've separated these otherwise identical functions in
- * order to be more informative to the user.
- */
-static int gfs2_glock_holder_wait(void *word)
-{
-        schedule();
-        return 0;
-}
-static int gfs2_glock_demote_wait(void *word)
-{
-        schedule();
-        return 0;
-}
-/**
 * gfs2_glock_wait - wait on a glock acquisition
 * @gh: the glock holder
 *
@@ -888,7 +867,7 @@ int gfs2_glock_wait(struct gfs2_holder *gh)
        unsigned long time1 = jiffies;
        might_sleep();
-        wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE);
+        wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE);
        if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */
                /* Lengthen the minimum hold time. */
                gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
@@ -1128,7 +1107,7 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh)
        struct gfs2_glock *gl = gh->gh_gl;
        gfs2_glock_dq(gh);
        might_sleep();
-        wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE);
+        wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
 }
 /**
@@ -1404,12 +1383,16 @@ __acquires(&lru_lock)
                gl = list_entry(list->next, struct gfs2_glock, gl_lru);
                list_del_init(&gl->gl_lru);
                if (!spin_trylock(&gl->gl_spin)) {
+add_back_to_lru:
                        list_add(&gl->gl_lru, &lru_list);
                        atomic_inc(&lru_count);
                        continue;
                }
+                if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
+                        spin_unlock(&gl->gl_spin);
+                        goto add_back_to_lru;
+                }
                clear_bit(GLF_LRU, &gl->gl_flags);
-                spin_unlock(&lru_lock);
                gl->gl_lockref.count++;
                if (demote_ok(gl))
                        handle_callback(gl, LM_ST_UNLOCKED, 0, false);
@@ -1417,7 +1400,7 @@ __acquires(&lru_lock)
                if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
                        gl->gl_lockref.count--;
                spin_unlock(&gl->gl_spin);
-                spin_lock(&lru_lock);
+                cond_resched_lock(&lru_lock);
        }
 }
@@ -1442,7 +1425,7 @@ static long gfs2_scan_glock_lru(int nr)
                gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
                /* Test for being demotable */
-                if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
+                if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
                        list_move(&gl->gl_lru, &dispose);
                        atomic_dec(&lru_count);
                        freed++;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index fc1100781bbc..2ffc67dce87f 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -234,8 +234,8 @@ static void inode_go_sync(struct gfs2_glock *gl)
 * inode_go_inval - prepare a inode glock to be released
 * @gl: the glock
 * @flags:
- * 
+ *
- * Normally we invlidate everything, but if we are moving into
+ * Normally we invalidate everything, but if we are moving into
 * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we
 * can keep hold of the metadata, since it won't have changed.
 *
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 91f274de1246..641383a9c1bb 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -936,12 +936,6 @@ fail:
        return error;
 }
-static int dlm_recovery_wait(void *word)
-{
-        schedule();
-        return 0;
-}
 static int control_first_done(struct gfs2_sbd *sdp)
 {
        struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -976,7 +970,7 @@ restart:
                fs_info(sdp, "control_first_done wait gen %u\n", start_gen);
                wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY,
-                            dlm_recovery_wait, TASK_UNINTERRUPTIBLE);
+                            TASK_UNINTERRUPTIBLE);
                goto restart;
        }
@@ -1036,8 +1030,8 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots,
        new_size = old_size + RECOVER_SIZE_INC;
-        submit = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS);
+        submit = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
-        result = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS);
+        result = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
        if (!submit || !result) {
                kfree(submit);
                kfree(result);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index bc564c0d6d16..d3eae244076e 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1024,20 +1024,13 @@ void gfs2_lm_unmount(struct gfs2_sbd *sdp)
                lm->lm_unmount(sdp);
 }
-static int gfs2_journalid_wait(void *word)
-{
-        if (signal_pending(current))
-                return -EINTR;
-        schedule();
-        return 0;
-}
 static int wait_on_journal(struct gfs2_sbd *sdp)
 {
        if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
                return 0;
-        return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, gfs2_journalid_wait, TASK_INTERRUPTIBLE);
+        return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, TASK_INTERRUPTIBLE)
+                ? -EINTR : 0;
 }
 void gfs2_online_uevent(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 94555d4c5698..573bd3b758fa 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -591,12 +591,6 @@ done:
        wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
 }
-static int gfs2_recovery_wait(void *word)
-{
-        schedule();
-        return 0;
-}
 int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
 {
        int rv;
@@ -609,7 +603,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
        BUG_ON(!rv);
        if (wait)
-                wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait,
+                wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
                            TASK_UNINTERRUPTIBLE);
        return wait ? jd->jd_recover_error : 0;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index db629d1bd1bd..f4cb9c0d6bbd 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -337,7 +337,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le
 /**
 * gfs2_free_extlen - Return extent length of free blocks
- * @rbm: Starting position
+ * @rrbm: Starting position
 * @len: Max length to check
 *
 * Starting at the block specified by the rbm, see how many free blocks
@@ -2522,7 +2522,7 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
 /**
 * gfs2_rlist_free - free a resource group list
- * @list: the list of resource groups
+ * @rlist: the list of resource groups
 *
 */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 1319b5c4ec68..2607ff13d486 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -864,12 +864,6 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
        return error;
 }
-static int gfs2_umount_recovery_wait(void *word)
-{
-        schedule();
-        return 0;
-}
 /**
 * gfs2_put_super - Unmount the filesystem
 * @sb: The VFS superblock
@@ -894,7 +888,7 @@ restart:
                        continue;
                spin_unlock(&sdp->sd_jindex_spin);
                wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
-                            gfs2_umount_recovery_wait, TASK_UNINTERRUPTIBLE);
+                            TASK_UNINTERRUPTIBLE);
                goto restart;
        }
        spin_unlock(&sdp->sd_jindex_spin);
diff --git a/fs/inode.c b/fs/inode.c
index 6eecb7ff0b9a..5938f3928944 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1695,13 +1695,6 @@ int inode_needs_sync(struct inode *inode)
 }
 EXPORT_SYMBOL(inode_needs_sync);
-int inode_wait(void *word)
-{
-        schedule();
-        return 0;
-}
-EXPORT_SYMBOL(inode_wait);
 /*
 * If we try to find an inode in the inode hash while it is being
 * deleted, we have to wait until the filesystem completes its
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 6f0f590cc5a3..5f09370c90a8 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -763,12 +763,6 @@ static void warn_dirty_buffer(struct buffer_head *bh)
               bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
 }
-static int sleep_on_shadow_bh(void *word)
-{
-        io_schedule();
-        return 0;
-}
 /*
 * If the buffer is already part of the current transaction, then there
 * is nothing we need to do.  If it is already part of a prior
@@ -906,8 +900,8 @@ repeat:
                if (buffer_shadow(bh)) {
                        JBUFFER_TRACE(jh, "on shadow: sleep");
                        jbd_unlock_bh_state(bh);
-                        wait_on_bit(&bh->b_state, BH_Shadow,
+                        wait_on_bit_io(&bh->b_state, BH_Shadow,
-                                    sleep_on_shadow_bh, TASK_UNINTERRUPTIBLE);
+                                       TASK_UNINTERRUPTIBLE);
                        goto repeat;
                }
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index d895b4b7b661..4429d6d9217f 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -896,7 +896,7 @@ const struct file_operations kernfs_file_fops = {
 * @ops: kernfs operations for the file
 * @priv: private data for the file
 * @ns: optional namespace tag of the file
- * @static_name: don't copy file name
+ * @name_is_static: don't copy file name
 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
 *
 * Returns the created node on success, ERR_PTR() value on error.
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 1812f026960c..daa8e7514eae 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -306,11 +306,9 @@ static struct nsm_handle *nsm_lookup_priv(const struct nsm_private *priv)
 static void nsm_init_private(struct nsm_handle *nsm)
 {
        u64 *p = (u64 *)&nsm->sm_priv.data;
-        struct timespec ts;
        s64 ns;
-        ktime_get_ts(&ts);
+        ns = ktime_get_ns();
-        ns = timespec_to_ns(&ts);
        put_unaligned(ns, p);
        put_unaligned((unsigned long)nsm, p + 1);
 }
diff --git a/fs/locks.c b/fs/locks.c
index 717fbc404e6b..a6f54802d277 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -325,7 +325,7 @@ static int flock_make_lock(struct file *filp, struct file_lock **lock,
                return -ENOMEM;
        fl->fl_file = filp;
-        fl->fl_owner = (fl_owner_t)filp;
+        fl->fl_owner = filp;
        fl->fl_pid = current->tgid;
        fl->fl_flags = FL_FLOCK;
        fl->fl_type = type;
@@ -431,7 +431,7 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl)
        if (assign_type(fl, type) != 0)
                return -EINVAL;
-        fl->fl_owner = (fl_owner_t)current->files;
+        fl->fl_owner = current->files;
        fl->fl_pid = current->tgid;
        fl->fl_file = filp;
@@ -1155,7 +1155,6 @@ EXPORT_SYMBOL(posix_lock_file_wait);
 int locks_mandatory_locked(struct file *file)
 {
        struct inode *inode = file_inode(file);
-        fl_owner_t owner = current->files;
        struct file_lock *fl;
        /*
@@ -1165,7 +1164,8 @@ int locks_mandatory_locked(struct file *file)
        for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
                if (!IS_POSIX(fl))
                        continue;
-                if (fl->fl_owner != owner && fl->fl_owner != (fl_owner_t)file)
+                if (fl->fl_owner != current->files &&
+                    fl->fl_owner != file)
                        break;
        }
        spin_unlock(&inode->i_lock);
@@ -1205,7 +1205,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
        for (;;) {
                if (filp) {
-                        fl.fl_owner = (fl_owner_t)filp;
+                        fl.fl_owner = filp;
                        fl.fl_flags &= ~FL_SLEEP;
                        error = __posix_lock_file(inode, &fl, NULL);
                        if (!error)
@@ -1948,7 +1948,7 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
                cmd = F_GETLK;
                file_lock.fl_flags |= FL_OFDLCK;
-                file_lock.fl_owner = (fl_owner_t)filp;
+                file_lock.fl_owner = filp;
        }
        error = vfs_test_lock(filp, &file_lock);
@@ -2103,7 +2103,7 @@ again:
                cmd = F_SETLK;
                file_lock->fl_flags |= FL_OFDLCK;
-                file_lock->fl_owner = (fl_owner_t)filp;
+                file_lock->fl_owner = filp;
                break;
        case F_OFD_SETLKW:
                error = -EINVAL;
@@ -2112,7 +2112,7 @@ again:
                cmd = F_SETLKW;
                file_lock->fl_flags |= FL_OFDLCK;
-                file_lock->fl_owner = (fl_owner_t)filp;
+                file_lock->fl_owner = filp;
                /* Fallthrough */
        case F_SETLKW:
                file_lock->fl_flags |= FL_SLEEP;
@@ -2170,7 +2170,7 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
                cmd = F_GETLK64;
                file_lock.fl_flags |= FL_OFDLCK;
-                file_lock.fl_owner = (fl_owner_t)filp;
+                file_lock.fl_owner = filp;
        }
        error = vfs_test_lock(filp, &file_lock);
@@ -2242,7 +2242,7 @@ again:
                cmd = F_SETLK64;
                file_lock->fl_flags |= FL_OFDLCK;
-                file_lock->fl_owner = (fl_owner_t)filp;
+                file_lock->fl_owner = filp;
                break;
        case F_OFD_SETLKW:
                error = -EINVAL;
@@ -2251,7 +2251,7 @@ again:
                cmd = F_SETLKW64;
                file_lock->fl_flags |= FL_OFDLCK;
-                file_lock->fl_owner = (fl_owner_t)filp;
+                file_lock->fl_owner = filp;
                /* Fallthrough */
        case F_SETLKW64:
                file_lock->fl_flags |= FL_SLEEP;
@@ -2324,11 +2324,11 @@ void locks_remove_file(struct file *filp)
        if (!inode->i_flock)
                return;
-        locks_remove_posix(filp, (fl_owner_t)filp);
+        locks_remove_posix(filp, filp);
        if (filp->f_op->flock) {
                struct file_lock fl = {
-                        .fl_owner = (fl_owner_t)filp,
+                        .fl_owner = filp,
                        .fl_pid = current->tgid,
                        .fl_file = filp,
                        .fl_flags = FL_FLOCK,
diff --git a/fs/namei.c b/fs/namei.c
index 985c6f368485..9eb787e5c167 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2256,9 +2256,10 @@ done:
                goto out;
        }
        path->dentry = dentry;
-        path->mnt = mntget(nd->path.mnt);
+        path->mnt = nd->path.mnt;
        if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW))
                return 1;
+        mntget(path->mnt);
        follow_mount(path);
        error = 0;
 out:
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8f98138cbc43..f11b9eed0de1 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -756,7 +756,6 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
        spin_unlock(&dreq->lock);
        while (!list_empty(&hdr->pages)) {
-                bool do_destroy = true;
                req = nfs_list_entry(hdr->pages.next);
                nfs_list_remove_request(req);
@@ -765,7 +764,6 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
                case NFS_IOHDR_NEED_COMMIT:
                        kref_get(&req->wb_kref);
                        nfs_mark_request_commit(req, hdr->lseg, &cinfo);
-                        do_destroy = false;
                }
                nfs_unlock_and_release_request(req);
        }
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 4042ff58fe3f..524dd80d1898 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -361,8 +361,8 @@ start:
         * Prevent starvation issues if someone is doing a consistency
         * sync-to-disk
         */
-        ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
+        ret = wait_on_bit_action(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
-                        nfs_wait_bit_killable, TASK_KILLABLE);
+                                 nfs_wait_bit_killable, TASK_KILLABLE);
        if (ret)
                return ret;
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 44bf0140a4c7..e2a0361e24c6 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -783,8 +783,8 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
 static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
 {
        might_sleep();
-        wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING,
+        wait_on_bit_action(&ds->ds_state, NFS4DS_CONNECTING,
-                        nfs_wait_bit_killable, TASK_KILLABLE);
+                           nfs_wait_bit_killable, TASK_KILLABLE);
 }
 static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 567983d2c0eb..7dd55b745c4d 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -174,7 +174,9 @@ static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
 static struct key_type key_type_id_resolver = {
        .name           = "id_resolver",
-        .instantiate    = user_instantiate,
+        .preparse       = user_preparse,
+        .free_preparse  = user_free_preparse,
+        .instantiate    = generic_key_instantiate,
        .match          = user_match,
        .revoke         = user_revoke,
        .destroy        = user_destroy,
@@ -282,6 +284,8 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
                                                desc, "", 0, idmap);
                mutex_unlock(&idmap->idmap_mutex);
        }
+        if (!IS_ERR(rkey))
+                set_bit(KEY_FLAG_ROOT_CAN_INVAL, &rkey->flags);
        kfree(desc);
        return rkey;
@@ -394,7 +398,9 @@ static const struct rpc_pipe_ops idmap_upcall_ops = {
 static struct key_type key_type_id_resolver_legacy = {
        .name           = "id_legacy",
-        .instantiate    = user_instantiate,
+        .preparse       = user_preparse,
+        .free_preparse  = user_free_preparse,
+        .instantiate    = generic_key_instantiate,
        .match          = user_match,
        .revoke         = user_revoke,
        .destroy        = user_destroy,
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9927913c97c2..abd37a380535 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -75,7 +75,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
 * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
 * @word: long word containing the bit lock
 */
-int nfs_wait_bit_killable(void *word)
+int nfs_wait_bit_killable(struct wait_bit_key *key)
 {
        if (fatal_signal_pending(current))
                return -ERESTARTSYS;
@@ -1074,8 +1074,8 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
         * the bit lock here if it looks like we're going to be doing that.
         */
        for (;;) {
-                ret = wait_on_bit(bitlock, NFS_INO_INVALIDATING,
+                ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING,
-                                  nfs_wait_bit_killable, TASK_KILLABLE);
+                                         nfs_wait_bit_killable, TASK_KILLABLE);
                if (ret)
                        goto out;
                spin_lock(&inode->i_lock);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 82ddbf46660e..617f36611d4a 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -244,6 +244,7 @@ void nfs_pgio_data_release(struct nfs_pgio_data *);
 int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
 int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
                      const struct rpc_call_ops *, int, int);
+void nfs_free_request(struct nfs_page *req);
 static inline void nfs_iocounter_init(struct nfs_io_counter *c)
 {
@@ -347,7 +348,7 @@ extern int nfs_drop_inode(struct inode *);
 extern void nfs_clear_inode(struct inode *);
 extern void nfs_evict_inode(struct inode *);
 void nfs_zap_acl_cache(struct inode *inode);
-extern int nfs_wait_bit_killable(void *word);
+extern int nfs_wait_bit_killable(struct wait_bit_key *key);
 /* super.c */
 extern const struct super_operations nfs_sops;
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 871d6eda8dba..8f854dde4150 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -247,3 +247,46 @@ const struct xattr_handler *nfs3_xattr_handlers[] = {
        &posix_acl_default_xattr_handler,
        NULL,
 };
+static int
+nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data,
+                size_t size, ssize_t *result)
+{
+        struct posix_acl *acl;
+        char *p = data + *result;
+        acl = get_acl(inode, type);
+        if (!acl)
+                return 0;
+        posix_acl_release(acl);
+        *result += strlen(name);
+        *result += 1;
+        if (!size)
+                return 0;
+        if (*result > size)
+                return -ERANGE;
+        strcpy(p, name);
+        return 0;
+}
+ssize_t
+nfs3_listxattr(struct dentry *dentry, char *data, size_t size)
+{
+        struct inode *inode = dentry->d_inode;
+        ssize_t result = 0;
+        int error;
+        error = nfs3_list_one_acl(inode, ACL_TYPE_ACCESS,
+                        POSIX_ACL_XATTR_ACCESS, data, size, &result);
+        if (error)
+                return error;
+        error = nfs3_list_one_acl(inode, ACL_TYPE_DEFAULT,
+                        POSIX_ACL_XATTR_DEFAULT, data, size, &result);
+        if (error)
+                return error;
+        return result;
+}
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index e7daa42bbc86..f0afa291fd58 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -885,7 +885,7 @@ static const struct inode_operations nfs3_dir_inode_operations = {
        .getattr        = nfs_getattr,
        .setattr        = nfs_setattr,
 #ifdef CONFIG_NFS_V3_ACL
-        .listxattr      = generic_listxattr,
+        .listxattr      = nfs3_listxattr,
        .getxattr       = generic_getxattr,
        .setxattr       = generic_setxattr,
        .removexattr    = generic_removexattr,
@@ -899,7 +899,7 @@ static const struct inode_operations nfs3_file_inode_operations = {
        .getattr        = nfs_getattr,
        .setattr        = nfs_setattr,
 #ifdef CONFIG_NFS_V3_ACL
-        .listxattr      = generic_listxattr,
+        .listxattr      = nfs3_listxattr,
        .getxattr       = generic_getxattr,
        .setxattr       = generic_setxattr,
        .removexattr    = generic_removexattr,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 848f6853c59e..42f121182167 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1251,8 +1251,8 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp)
        might_sleep();
        atomic_inc(&clp->cl_count);
-        res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
+        res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
-                        nfs_wait_bit_killable, TASK_KILLABLE);
+                                 nfs_wait_bit_killable, TASK_KILLABLE);
        if (res)
                goto out;
        if (clp->cl_cons_state < 0)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index b6ee3a6ee96d..0be5050638f7 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -29,8 +29,6 @@
 static struct kmem_cache *nfs_page_cachep;
 static const struct rpc_call_ops nfs_pgio_common_ops;
-static void nfs_free_request(struct nfs_page *);
 static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
 {
        p->npages = pagecount;
@@ -117,7 +115,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c)
                set_bit(NFS_IO_INPROGRESS, &c->flags);
                if (atomic_read(&c->io_count) == 0)
                        break;
-                ret = nfs_wait_bit_killable(&c->flags);
+                ret = nfs_wait_bit_killable(&q.key);
        } while (atomic_read(&c->io_count) != 0);
        finish_wait(wq, &q.wait);
        return ret;
@@ -138,12 +136,6 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
        return __nfs_iocounter_wait(c);
 }
-static int nfs_wait_bit_uninterruptible(void *word)
-{
-        io_schedule();
-        return 0;
-}
 /*
 * nfs_page_group_lock - lock the head of the page group
 * @req - request in group that is to be locked
@@ -158,7 +150,6 @@ nfs_page_group_lock(struct nfs_page *req)
        WARN_ON_ONCE(head != head->wb_head);
        wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
-                        nfs_wait_bit_uninterruptible,
                        TASK_UNINTERRUPTIBLE);
 }
@@ -239,20 +230,28 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
        WARN_ON_ONCE(prev == req);
        if (!prev) {
+                /* a head request */
                req->wb_head = req;
                req->wb_this_page = req;
        } else {
+                /* a subrequest */
                WARN_ON_ONCE(prev->wb_this_page != prev->wb_head);
                WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags));
                req->wb_head = prev->wb_head;
                req->wb_this_page = prev->wb_this_page;
                prev->wb_this_page = req;
+                /* All subrequests take a ref on the head request until
+                 * nfs_page_group_destroy is called */
+                kref_get(&req->wb_head->wb_kref);
                /* grab extra ref if head request has extra ref from
                 * the write/commit path to handle handoff between write
                 * and commit lists */
-                if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags))
+                if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) {
+                        set_bit(PG_INODE_REF, &req->wb_flags);
                        kref_get(&req->wb_kref);
+                }
        }
 }
@@ -269,6 +268,10 @@ nfs_page_group_destroy(struct kref *kref)
        struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
        struct nfs_page *tmp, *next;
+        /* subrequests must release the ref on the head request */
+        if (req->wb_head != req)
+                nfs_release_request(req->wb_head);
        if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN))
                return;
@@ -394,7 +397,7 @@ static void nfs_clear_request(struct nfs_page *req)
 *
 * Note: Should never be called with the spinlock held!
 */
-static void nfs_free_request(struct nfs_page *req)
+void nfs_free_request(struct nfs_page *req)
 {
        WARN_ON_ONCE(req->wb_this_page != req);
@@ -425,9 +428,8 @@ void nfs_release_request(struct nfs_page *req)
 int
 nfs_wait_on_request(struct nfs_page *req)
 {
-        return wait_on_bit(&req->wb_flags, PG_BUSY,
+        return wait_on_bit_io(&req->wb_flags, PG_BUSY,
-                        nfs_wait_bit_uninterruptible,
+                              TASK_UNINTERRUPTIBLE);
-                        TASK_UNINTERRUPTIBLE);
 }
 /*
@@ -925,7 +927,6 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
                        nfs_pageio_doio(desc);
                        if (desc->pg_error < 0)
                                return 0;
-                        desc->pg_moreio = 0;
                        if (desc->pg_recoalesce)
                                return 0;
                        /* retry add_request for this subreq */
@@ -972,6 +973,7 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
                desc->pg_count = 0;
                desc->pg_base = 0;
                desc->pg_recoalesce = 0;
+                desc->pg_moreio = 0;
                while (!list_empty(&head)) {
                        struct nfs_page *req;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6fdcd233d6f7..a8914b335617 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1885,7 +1885,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
        if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
                if (!sync)
                        goto out;
-                status = wait_on_bit_lock(&nfsi->flags,
+                status = wait_on_bit_lock_action(&nfsi->flags,
                                NFS_INO_LAYOUTCOMMITTING,
                                nfs_wait_bit_killable,
                                TASK_KILLABLE);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 98ff061ccaf3..962c9ee758be 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -46,6 +46,7 @@ static const struct rpc_call_ops nfs_commit_ops;
 static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
 static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
 static const struct nfs_rw_ops nfs_rw_write_ops;
+static void nfs_clear_request_commit(struct nfs_page *req);
 static struct kmem_cache *nfs_wdata_cachep;
 static mempool_t *nfs_wdata_mempool;
@@ -91,8 +92,15 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
        set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
 }
+/*
+ * nfs_page_find_head_request_locked - find head request associated with @page
+ *
+ * must be called while holding the inode lock.
+ *
+ * returns matching head request with reference held, or NULL if not found.
+ */
 static struct nfs_page *
-nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)
+nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
 {
        struct nfs_page *req = NULL;
@@ -104,25 +112,33 @@ nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)
                /* Linearly search the commit list for the correct req */
                list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) {
                        if (freq->wb_page == page) {
-                                req = freq;
+                                req = freq->wb_head;
                                break;
                        }
                }
        }
-        if (req)
+        if (req) {
+                WARN_ON_ONCE(req->wb_head != req);
                kref_get(&req->wb_kref);
+        }
        return req;
 }
-static struct nfs_page *nfs_page_find_request(struct page *page)
+/*
+ * nfs_page_find_head_request - find head request associated with @page
+ *
+ * returns matching head request with reference held, or NULL if not found.
+ */
+static struct nfs_page *nfs_page_find_head_request(struct page *page)
 {
        struct inode *inode = page_file_mapping(page)->host;
        struct nfs_page *req = NULL;
        spin_lock(&inode->i_lock);
-        req = nfs_page_find_request_locked(NFS_I(inode), page);
+        req = nfs_page_find_head_request_locked(NFS_I(inode), page);
        spin_unlock(&inode->i_lock);
        return req;
 }
@@ -274,36 +290,246 @@ static void nfs_end_page_writeback(struct nfs_page *req)
                clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
 }
-static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
+/* nfs_page_group_clear_bits
+ *   @req - an nfs request
+ * clears all page group related bits from @req
+ */
+static void
+nfs_page_group_clear_bits(struct nfs_page *req)
+{
+        clear_bit(PG_TEARDOWN, &req->wb_flags);
+        clear_bit(PG_UNLOCKPAGE, &req->wb_flags);
+        clear_bit(PG_UPTODATE, &req->wb_flags);
+        clear_bit(PG_WB_END, &req->wb_flags);
+        clear_bit(PG_REMOVE, &req->wb_flags);
+}
+/*
+ * nfs_unroll_locks_and_wait -  unlock all newly locked reqs and wait on @req
+ *
+ * this is a helper function for nfs_lock_and_join_requests
+ *
+ * @inode - inode associated with request page group, must be holding inode lock
+ * @head  - head request of page group, must be holding head lock
+ * @req   - request that couldn't lock and needs to wait on the req bit lock
+ * @nonblock - if true, don't actually wait
+ *
+ * NOTE: this must be called holding page_group bit lock and inode spin lock
+ *       and BOTH will be released before returning.
+ *
+ * returns 0 on success, < 0 on error.
+ */
+static int
+nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,
+                          struct nfs_page *req, bool nonblock)
+        __releases(&inode->i_lock)
+{
+        struct nfs_page *tmp;
+        int ret;
+        /* relinquish all the locks successfully grabbed this run */
+        for (tmp = head ; tmp != req; tmp = tmp->wb_this_page)
+                nfs_unlock_request(tmp);
+        WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
+        /* grab a ref on the request that will be waited on */
+        kref_get(&req->wb_kref);
+        nfs_page_group_unlock(head);
+        spin_unlock(&inode->i_lock);
+        /* release ref from nfs_page_find_head_request_locked */
+        nfs_release_request(head);
+        if (!nonblock)
+                ret = nfs_wait_on_request(req);
+        else
+                ret = -EAGAIN;
+        nfs_release_request(req);
+        return ret;
+}
+/*
+ * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
+ *
+ * @destroy_list - request list (using wb_this_page) terminated by @old_head
+ * @old_head - the old head of the list
+ *
+ * All subrequests must be locked and removed from all lists, so at this point
+ * they are only "active" in this function, and possibly in nfs_wait_on_request
+ * with a reference held by some other context.
+ */
+static void
+nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
+                                 struct nfs_page *old_head)
+{
+        while (destroy_list) {
+                struct nfs_page *subreq = destroy_list;
+                destroy_list = (subreq->wb_this_page == old_head) ?
+                                   NULL : subreq->wb_this_page;
+                WARN_ON_ONCE(old_head != subreq->wb_head);
+                /* make sure old group is not used */
+                subreq->wb_head = subreq;
+                subreq->wb_this_page = subreq;
+                nfs_clear_request_commit(subreq);
+                /* subreq is now totally disconnected from page group or any
+                 * write / commit lists. last chance to wake any waiters */
+                nfs_unlock_request(subreq);
+                if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) {
+                        /* release ref on old head request */
+                        nfs_release_request(old_head);
+                        nfs_page_group_clear_bits(subreq);
+                        /* release the PG_INODE_REF reference */
+                        if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags))
+                                nfs_release_request(subreq);
+                        else
+                                WARN_ON_ONCE(1);
+                } else {
+                        WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags));
+                        /* zombie requests have already released the last
+                         * reference and were waiting on the rest of the
+                         * group to complete. Since it's no longer part of a
+                         * group, simply free the request */
+                        nfs_page_group_clear_bits(subreq);
+                        nfs_free_request(subreq);
+                }
+        }
+}
+/*
+ * nfs_lock_and_join_requests - join all subreqs to the head req and return
+ *                              a locked reference, cancelling any pending
+ *                              operations for this page.
+ *
+ * @page - the page used to lookup the "page group" of nfs_page structures
+ * @nonblock - if true, don't block waiting for request locks
+ *
+ * This function joins all sub requests to the head request by first
+ * locking all requests in the group, cancelling any pending operations
+ * and finally updating the head request to cover the whole range covered by
+ * the (former) group.  All subrequests are removed from any write or commit
+ * lists, unlinked from the group and destroyed.
+ *
+ * Returns a locked, referenced pointer to the head request - which after
+ * this call is guaranteed to be the only request associated with the page.
+ * Returns NULL if no requests are found for @page, or a ERR_PTR if an
+ * error was encountered.
+ */
+static struct nfs_page *
+nfs_lock_and_join_requests(struct page *page, bool nonblock)
 {
        struct inode *inode = page_file_mapping(page)->host;
-        struct nfs_page *req;
+        struct nfs_page *head, *subreq;
+        struct nfs_page *destroy_list = NULL;
+        unsigned int total_bytes;
        int ret;
+try_again:
+        total_bytes = 0;
+        WARN_ON_ONCE(destroy_list);
        spin_lock(&inode->i_lock);
-        for (;;) {
-                req = nfs_page_find_request_locked(NFS_I(inode), page);
+        /*
-                if (req == NULL)
+         * A reference is taken only on the head request which acts as a
-                        break;
+         * reference to the whole page group - the group will not be destroyed
-                if (nfs_lock_request(req))
+         * until the head reference is released.
-                        break;
+         */
-                /* Note: If we hold the page lock, as is the case in nfs_writepage,
+        head = nfs_page_find_head_request_locked(NFS_I(inode), page);
-                 *       then the call to nfs_lock_request() will always
-                 *       succeed provided that someone hasn't already marked the
+        if (!head) {
-                 *       request as dirty (in which case we don't care).
-                 */
                spin_unlock(&inode->i_lock);
-                if (!nonblock)
+                return NULL;
-                        ret = nfs_wait_on_request(req);
+        }
-                else
-                        ret = -EAGAIN;
+        /* lock each request in the page group */
-                nfs_release_request(req);
+        nfs_page_group_lock(head);
-                if (ret != 0)
+        subreq = head;
+        do {
+                /*
+                 * Subrequests are always contiguous, non overlapping
+                 * and in order. If not, it's a programming error.
+                 */
+                WARN_ON_ONCE(subreq->wb_offset !=
+                     (head->wb_offset + total_bytes));
+                /* keep track of how many bytes this group covers */
+                total_bytes += subreq->wb_bytes;
+                if (!nfs_lock_request(subreq)) {
+                        /* releases page group bit lock and
+                         * inode spin lock and all references */
+                        ret = nfs_unroll_locks_and_wait(inode, head,
+                                subreq, nonblock);
+                        if (ret == 0)
+                                goto try_again;
                        return ERR_PTR(ret);
-                spin_lock(&inode->i_lock);
+                }
+                subreq = subreq->wb_this_page;
+        } while (subreq != head);
+        /* Now that all requests are locked, make sure they aren't on any list.
+         * Commit list removal accounting is done after locks are dropped */
+        subreq = head;
+        do {
+                nfs_list_remove_request(subreq);
+                subreq = subreq->wb_this_page;
+        } while (subreq != head);
+        /* unlink subrequests from head, destroy them later */
+        if (head->wb_this_page != head) {
+                /* destroy list will be terminated by head */
+                destroy_list = head->wb_this_page;
+                head->wb_this_page = head;
+                /* change head request to cover whole range that
+                 * the former page group covered */
+                head->wb_bytes = total_bytes;
        }
+        /*
+         * prepare head request to be added to new pgio descriptor
+         */
+        nfs_page_group_clear_bits(head);
+        /*
+         * some part of the group was still on the inode list - otherwise
+         * the group wouldn't be involved in async write.
+         * grab a reference for the head request, iff it needs one.
+         */
+        if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags))
+                kref_get(&head->wb_kref);
+        nfs_page_group_unlock(head);
+        /* drop lock to clear_request_commit the head req and clean up
+         * requests on destroy list */
        spin_unlock(&inode->i_lock);
-        return req;
+        nfs_destroy_unlinked_subrequests(destroy_list, head);
+        /* clean up commit list state */
+        nfs_clear_request_commit(head);
+        /* still holds ref on head from nfs_page_find_head_request_locked
+         * and still has lock on head from lock loop */
+        return head;
 }
 /*
@@ -316,7 +542,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
        struct nfs_page *req;
        int ret = 0;
-        req = nfs_find_and_lock_request(page, nonblock);
+        req = nfs_lock_and_join_requests(page, nonblock);
        if (!req)
                goto out;
        ret = PTR_ERR(req);
@@ -397,7 +623,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
        int err;
        /* Stop dirtying of new pages while we sync */
-        err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING,
+        err = wait_on_bit_lock_action(bitlock, NFS_INO_FLUSHING,
                        nfs_wait_bit_killable, TASK_KILLABLE);
        if (err)
                goto out_err;
@@ -448,7 +674,9 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
                set_page_private(req->wb_page, (unsigned long)req);
        }
        nfsi->npages++;
-        set_bit(PG_INODE_REF, &req->wb_flags);
+        /* this a head request for a page group - mark it as having an
+         * extra reference so sub groups can follow suit */
+        WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
        kref_get(&req->wb_kref);
        spin_unlock(&inode->i_lock);
 }
@@ -474,7 +702,9 @@ static void nfs_inode_remove_request(struct nfs_page *req)
                nfsi->npages--;
                spin_unlock(&inode->i_lock);
        }
-        nfs_release_request(req);
+        if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
+                nfs_release_request(req);
 }
 static void
@@ -638,7 +868,6 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 {
        struct nfs_commit_info cinfo;
        unsigned long bytes = 0;
-        bool do_destroy;
        if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
                goto out;
@@ -668,7 +897,6 @@ remove_req:
 next:
                nfs_unlock_request(req);
                nfs_end_page_writeback(req);
-                do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);
                nfs_release_request(req);
        }
 out:
@@ -769,7 +997,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
        spin_lock(&inode->i_lock);
        for (;;) {
-                req = nfs_page_find_request_locked(NFS_I(inode), page);
+                req = nfs_page_find_head_request_locked(NFS_I(inode), page);
                if (req == NULL)
                        goto out_unlock;
@@ -877,7 +1105,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
         * dropped page.
         */
        do {
-                req = nfs_page_find_request(page);
+                req = nfs_page_find_head_request(page);
                if (req == NULL)
                        return 0;
                l_ctx = req->wb_lock_context;
@@ -1475,7 +1703,7 @@ int nfs_commit_inode(struct inode *inode, int how)
                        return error;
                if (!may_wait)
                        goto out_mark_dirty;
-                error = wait_on_bit(&NFS_I(inode)->flags,
+                error = wait_on_bit_action(&NFS_I(inode)->flags,
                                NFS_INO_COMMIT,
                                nfs_wait_bit_killable,
                                TASK_KILLABLE);
@@ -1569,27 +1797,28 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
        struct nfs_page *req;
        int ret = 0;
-        for (;;) {
+        wait_on_page_writeback(page);
-                wait_on_page_writeback(page);
-                req = nfs_page_find_request(page);
+        /* blocking call to cancel all requests and join to a single (head)
-                if (req == NULL)
+         * request */
-                        break;
+        req = nfs_lock_and_join_requests(page, false);
-                if (nfs_lock_request(req)) {
-                        nfs_clear_request_commit(req);
+        if (IS_ERR(req)) {
-                        nfs_inode_remove_request(req);
+                ret = PTR_ERR(req);
-                        /*
+        } else if (req) {
-                         * In case nfs_inode_remove_request has marked the
+                /* all requests from this page have been cancelled by
-                         * page as being dirty
+                 * nfs_lock_and_join_requests, so just remove the head
-                         */
+                 * request from the inode / page_private pointer and
-                        cancel_dirty_page(page, PAGE_CACHE_SIZE);
+                 * release it */
-                        nfs_unlock_and_release_request(req);
+                nfs_inode_remove_request(req);
-                        break;
+                /*
-                }
+                 * In case nfs_inode_remove_request has marked the
-                ret = nfs_wait_on_request(req);
+                 * page as being dirty
-                nfs_release_request(req);
+                 */
-                if (ret < 0)
+                cancel_dirty_page(page, PAGE_CACHE_SIZE);
-                        break;
+                nfs_unlock_and_release_request(req);
        }
        return ret;
 }
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b56b1cc02718..944275c8f56d 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2879,6 +2879,7 @@ again:
                 * return the conflicting open:
                 */
                if (conf->len) {
+                        kfree(conf->data);
                        conf->len = 0;
                        conf->data = NULL;
                        goto again;
@@ -2891,6 +2892,7 @@ again:
        if (conf->len) {
                p = xdr_encode_opaque_fixed(p, &ld->ld_clientid, 8);
                p = xdr_encode_opaque(p, conf->data, conf->len);
+                kfree(conf->data);
        }  else {  /* non - nfsv4 lock in conflict, no clientid nor owner */
                p = xdr_encode_hyper(p, (u64)0); /* clientid */
                *p++ = cpu_to_be32(0); /* length of owner name */
@@ -2907,7 +2909,7 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo
                nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid);
        else if (nfserr == nfserr_denied)
                nfserr = nfsd4_encode_lock_denied(xdr, &lock->lk_denied);
-        kfree(lock->lk_denied.ld_owner.data);
        return nfserr;
 }
diff --git a/fs/open.c b/fs/open.c
index 36662d036237..d6fd3acde134 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -263,11 +263,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                return -EPERM;
        /*
-         * We can not allow to do any fallocate operation on an active
+         * We cannot allow any fallocate operation on an active swapfile
-         * swapfile
         */
        if (IS_SWAPFILE(inode))
-                ret = -ETXTBSY;
+                return -ETXTBSY;
        /*
         * Revalidate the write permissions, in case security policy has
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 64db2bceac59..cd3653e4f35c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -297,15 +297,11 @@ static void render_cap_t(struct seq_file *m, const char *header,
        seq_puts(m, header);
        CAP_FOR_EACH_U32(__capi) {
                seq_printf(m, "%08x",
-                           a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
+                           a->cap[CAP_LAST_U32 - __capi]);
        }
        seq_putc(m, '\n');
 }
-/* Remove non-existent capabilities */
-#define NORM_CAPS(v) (v.cap[CAP_TO_INDEX(CAP_LAST_CAP)] &= \
-                                CAP_TO_MASK(CAP_LAST_CAP + 1) - 1)
 static inline void task_cap(struct seq_file *m, struct task_struct *p)
 {
        const struct cred *cred;
@@ -319,11 +315,6 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
        cap_bset        = cred->cap_bset;
        rcu_read_unlock();
-        NORM_CAPS(cap_inheritable);
-        NORM_CAPS(cap_permitted);
-        NORM_CAPS(cap_effective);
-        NORM_CAPS(cap_bset);
        render_cap_t(m, "CapInh:\t", &cap_inheritable);
        render_cap_t(m, "CapPrm:\t", &cap_permitted);
        render_cap_t(m, "CapEff:\t", &cap_effective);
@@ -473,13 +464,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
        priority = task_prio(task);
        nice = task_nice(task);
-        /* Temporary variable needed for gcc-2.96 */
-        /* convert timespec -> nsec*/
-        start_time =
-                (unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC
-                                + task->real_start_time.tv_nsec;
        /* convert nsec -> ticks */
-        start_time = nsec_to_clock_t(start_time);
+        start_time = nsec_to_clock_t(task->real_start_time);
        seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
        seq_put_decimal_ll(m, ' ', ppid);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 9cd5f63715c0..7f30bdc57d13 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -702,6 +702,7 @@ dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
        struct dquot *dquot;
        unsigned long freed = 0;
+        spin_lock(&dq_list_lock);
        head = free_dquots.prev;
        while (head != &free_dquots && sc->nr_to_scan) {
                dquot = list_entry(head, struct dquot, dq_free);
@@ -713,6 +714,7 @@ dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
                freed++;
                head = free_dquots.prev;
        }
+        spin_unlock(&dq_list_lock);
        return freed;
 }
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 0013142c0475..80c350216ea8 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -35,8 +35,9 @@ struct timerfd_ctx {
        ktime_t moffs;
        wait_queue_head_t wqh;
        u64 ticks;
-        int expired;
        int clockid;
+        short unsigned expired;
+        short unsigned settime_flags;   /* to show in fdinfo */
        struct rcu_head rcu;
        struct list_head clist;
        bool might_cancel;
@@ -92,7 +93,7 @@ static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm,
 */
 void timerfd_clock_was_set(void)
 {
-        ktime_t moffs = ktime_get_monotonic_offset();
+        ktime_t moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
        struct timerfd_ctx *ctx;
        unsigned long flags;
@@ -125,7 +126,7 @@ static bool timerfd_canceled(struct timerfd_ctx *ctx)
 {
        if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
                return false;
-        ctx->moffs = ktime_get_monotonic_offset();
+        ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
        return true;
 }
@@ -196,6 +197,8 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
                if (timerfd_canceled(ctx))
                        return -ECANCELED;
        }
+        ctx->settime_flags = flags & TFD_SETTIME_FLAGS;
        return 0;
 }
@@ -284,11 +287,77 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
        return res;
 }
+#ifdef CONFIG_PROC_FS
+static int timerfd_show(struct seq_file *m, struct file *file)
+{
+        struct timerfd_ctx *ctx = file->private_data;
+        struct itimerspec t;
+        spin_lock_irq(&ctx->wqh.lock);
+        t.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
+        t.it_interval = ktime_to_timespec(ctx->tintv);
+        spin_unlock_irq(&ctx->wqh.lock);
+        return seq_printf(m,
+                          "clockid: %d\n"
+                          "ticks: %llu\n"
+                          "settime flags: 0%o\n"
+                          "it_value: (%llu, %llu)\n"
+                          "it_interval: (%llu, %llu)\n",
+                          ctx->clockid, (unsigned long long)ctx->ticks,
+                          ctx->settime_flags,
+                          (unsigned long long)t.it_value.tv_sec,
+                          (unsigned long long)t.it_value.tv_nsec,
+                          (unsigned long long)t.it_interval.tv_sec,
+                          (unsigned long long)t.it_interval.tv_nsec);
+}
+#else
+#define timerfd_show NULL
+#endif
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+        struct timerfd_ctx *ctx = file->private_data;
+        int ret = 0;
+        switch (cmd) {
+        case TFD_IOC_SET_TICKS: {
+                u64 ticks;
+                if (copy_from_user(&ticks, (u64 __user *)arg, sizeof(ticks)))
+                        return -EFAULT;
+                if (!ticks)
+                        return -EINVAL;
+                spin_lock_irq(&ctx->wqh.lock);
+                if (!timerfd_canceled(ctx)) {
+                        ctx->ticks = ticks;
+                        if (ticks)
+                                wake_up_locked(&ctx->wqh);
+                } else
+                        ret = -ECANCELED;
+                spin_unlock_irq(&ctx->wqh.lock);
+                break;
+        }
+        default:
+                ret = -ENOTTY;
+                break;
+        }
+        return ret;
+}
+#else
+#define timerfd_ioctl NULL
+#endif
 static const struct file_operations timerfd_fops = {
        .release        = timerfd_release,
        .poll           = timerfd_poll,
        .read           = timerfd_read,
        .llseek         = noop_llseek,
+        .show_fdinfo    = timerfd_show,
+        .unlocked_ioctl = timerfd_ioctl,
 };
 static int timerfd_fget(int fd, struct fd *p)
@@ -336,7 +405,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
        else
                hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS);
-        ctx->moffs = ktime_get_monotonic_offset();
+        ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
        ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
                               O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
diff --git a/fs/xattr.c b/fs/xattr.c
index 3377dff18404..c69e6d43a0d2 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -843,7 +843,7 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
        /* wrap around? */
        len = sizeof(*new_xattr) + size;
-        if (len <= sizeof(*new_xattr))
+        if (len < sizeof(*new_xattr))
                return NULL;
        new_xattr = kmalloc(len, GFP_KERNEL);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 96175df211b1..75c3fe5f3d9d 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4298,8 +4298,8 @@ xfs_bmapi_delay(
 }
-int
+static int
-__xfs_bmapi_allocate(
+xfs_bmapi_allocate(
        struct xfs_bmalloca     *bma)
 {
        struct xfs_mount        *mp = bma->ip->i_mount;
@@ -4578,9 +4578,6 @@ xfs_bmapi_write(
        bma.flist = flist;
        bma.firstblock = firstblock;
-        if (flags & XFS_BMAPI_STACK_SWITCH)
-                bma.stack_switch = 1;
        while (bno < end && n < *nmap) {
                inhole = eof || bma.got.br_startoff > bno;
                wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 38ba36e9b2f0..b879ca56a64c 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -77,7 +77,6 @@ typedef	struct xfs_bmap_free
 * from written to unwritten, otherwise convert from unwritten to written.
 */
 #define XFS_BMAPI_CONVERT       0x040
-#define XFS_BMAPI_STACK_SWITCH  0x080
 #define XFS_BMAPI_FLAGS \
        { XFS_BMAPI_ENTIRE,     "ENTIRE" }, \
@@ -86,8 +85,7 @@ typedef	struct xfs_bmap_free
        { XFS_BMAPI_PREALLOC,   "PREALLOC" }, \
        { XFS_BMAPI_IGSTATE,    "IGSTATE" }, \
        { XFS_BMAPI_CONTIG,     "CONTIG" }, \
-        { XFS_BMAPI_CONVERT,    "CONVERT" }, \
+        { XFS_BMAPI_CONVERT,    "CONVERT" }
-        { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" }
 static inline int xfs_bmapi_aflag(int w)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 703b3ec1796c..64731ef3324d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -249,59 +249,6 @@ xfs_bmap_rtalloc(
 }
 /*
- * Stack switching interfaces for allocation
- */
-static void
-xfs_bmapi_allocate_worker(
-        struct work_struct      *work)
-{
-        struct xfs_bmalloca     *args = container_of(work,
-                                                struct xfs_bmalloca, work);
-        unsigned long           pflags;
-        unsigned long           new_pflags = PF_FSTRANS;
-        /*
-         * we are in a transaction context here, but may also be doing work
-         * in kswapd context, and hence we may need to inherit that state
-         * temporarily to ensure that we don't block waiting for memory reclaim
-         * in any way.
-         */
-        if (args->kswapd)
-                new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
-        current_set_flags_nested(&pflags, new_pflags);
-        args->result = __xfs_bmapi_allocate(args);
-        complete(args->done);
-        current_restore_flags_nested(&pflags, new_pflags);
-}
-/*
- * Some allocation requests often come in with little stack to work on. Push
- * them off to a worker thread so there is lots of stack to use. Otherwise just
- * call directly to avoid the context switch overhead here.
- */
-int
-xfs_bmapi_allocate(
-        struct xfs_bmalloca     *args)
-{
-        DECLARE_COMPLETION_ONSTACK(done);
-        if (!args->stack_switch)
-                return __xfs_bmapi_allocate(args);
-        args->done = &done;
-        args->kswapd = current_is_kswapd();
-        INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
-        queue_work(xfs_alloc_wq, &args->work);
-        wait_for_completion(&done);
-        destroy_work_on_stack(&args->work);
-        return args->result;
-}
-/*
 * Check if the endoff is outside the last extent. If so the caller will grow
 * the allocation to a stripe unit boundary.  All offsets are considered outside
 * the end of file for an empty fork, so 1 is returned in *eof in that case.
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 075f72232a64..2fdb72d2c908 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -55,8 +55,6 @@ struct xfs_bmalloca {
        bool                    userdata;/* set if is user data */
        bool                    aeof;   /* allocated space at eof */
        bool                    conv;   /* overwriting unwritten extents */
-        bool                    stack_switch;
-        bool                    kswapd; /* allocation in kswapd context */
        int                     flags;
        struct completion       *done;
        struct work_struct      work;
@@ -66,8 +64,6 @@ struct xfs_bmalloca {
 int     xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
                        int *committed);
 int     xfs_bmap_rtalloc(struct xfs_bmalloca *ap);
-int     xfs_bmapi_allocate(struct xfs_bmalloca *args);
-int     __xfs_bmapi_allocate(struct xfs_bmalloca *args);
 int     xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
                     int whichfork, int *eof);
 int     xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index bf810c6baf2b..cf893bc1e373 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -33,6 +33,7 @@
 #include "xfs_error.h"
 #include "xfs_trace.h"
 #include "xfs_cksum.h"
+#include "xfs_alloc.h"
 /*
 * Cursor allocation zone.
@@ -2323,7 +2324,7 @@ error1:
 * record (to be inserted into parent).
 */
 STATIC int                                      /* error */
-xfs_btree_split(
+__xfs_btree_split(
        struct xfs_btree_cur    *cur,
        int                     level,
        union xfs_btree_ptr     *ptrp,
@@ -2503,6 +2504,85 @@ error0:
        return error;
 }
+struct xfs_btree_split_args {
+        struct xfs_btree_cur    *cur;
+        int                     level;
+        union xfs_btree_ptr     *ptrp;
+        union xfs_btree_key     *key;
+        struct xfs_btree_cur    **curp;
+        int                     *stat;          /* success/failure */
+        int                     result;
+        bool                    kswapd; /* allocation in kswapd context */
+        struct completion       *done;
+        struct work_struct      work;
+};
+/*
+ * Stack switching interfaces for allocation
+ */
+static void
+xfs_btree_split_worker(
+        struct work_struct      *work)
+{
+        struct xfs_btree_split_args     *args = container_of(work,
+                                                struct xfs_btree_split_args, work);
+        unsigned long           pflags;
+        unsigned long           new_pflags = PF_FSTRANS;
+        /*
+         * we are in a transaction context here, but may also be doing work
+         * in kswapd context, and hence we may need to inherit that state
+         * temporarily to ensure that we don't block waiting for memory reclaim
+         * in any way.
+         */
+        if (args->kswapd)
+                new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
+        current_set_flags_nested(&pflags, new_pflags);
+        args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
+                                         args->key, args->curp, args->stat);
+        complete(args->done);
+        current_restore_flags_nested(&pflags, new_pflags);
+}
+/*
+ * BMBT split requests often come in with little stack to work on. Push
+ * them off to a worker thread so there is lots of stack to use. For the other
+ * btree types, just call directly to avoid the context switch overhead here.
+ */
+STATIC int                                      /* error */
+xfs_btree_split(
+        struct xfs_btree_cur    *cur,
+        int                     level,
+        union xfs_btree_ptr     *ptrp,
+        union xfs_btree_key     *key,
+        struct xfs_btree_cur    **curp,
+        int                     *stat)          /* success/failure */
+{
+        struct xfs_btree_split_args     args;
+        DECLARE_COMPLETION_ONSTACK(done);
+        if (cur->bc_btnum != XFS_BTNUM_BMAP)
+                return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
+        args.cur = cur;
+        args.level = level;
+        args.ptrp = ptrp;
+        args.key = key;
+        args.curp = curp;
+        args.stat = stat;
+        args.done = &done;
+        args.kswapd = current_is_kswapd();
+        INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
+        queue_work(xfs_alloc_wq, &args.work);
+        wait_for_completion(&done);
+        destroy_work_on_stack(&args.work);
+        return args.result;
+}
 /*
 * Copy the old inode root contents into a real block and make the
 * broot point to it.
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 6c5eb4c551e3..6d3ec2b6ee29 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -749,8 +749,7 @@ xfs_iomap_write_allocate(
                         * pointer that the caller gave to us.
                         */
                        error = xfs_bmapi_write(tp, ip, map_start_fsb,
-                                                count_fsb,
+                                                count_fsb, 0,
-                                                XFS_BMAPI_STACK_SWITCH,
                                                &first_block, 1,
                                                imap, &nimaps, &free_list);
                        if (error)
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c
index c3453b11f563..7703fa6770ff 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/xfs_sb.c
@@ -483,10 +483,16 @@ xfs_sb_quota_to_disk(
        }
        /*
-         * GQUOTINO and PQUOTINO cannot be used together in versions
+         * GQUOTINO and PQUOTINO cannot be used together in versions of
-         * of superblock that do not have pquotino. from->sb_flags
+         * superblock that do not have pquotino. from->sb_flags tells us which
-         * tells us which quota is active and should be copied to
+         * quota is active and should be copied to disk. If neither are active,
-         * disk.
+         * make sure we write NULLFSINO to the sb_gquotino field as a quota
+         * inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature
+         * bit is set.
+         *
+         * Note that we don't need to handle the sb_uquotino or sb_pquotino here
+         * as they do not require any translation. Hence the main sb field loop
+         * will write them appropriately from the in-core superblock.
         */
        if ((*fields & XFS_SB_GQUOTINO) &&
                                (from->sb_qflags & XFS_GQUOTA_ACCT))
@@ -494,6 +500,17 @@ xfs_sb_quota_to_disk(
        else if ((*fields & XFS_SB_PQUOTINO) &&
                                (from->sb_qflags & XFS_PQUOTA_ACCT))
                to->sb_gquotino = cpu_to_be64(from->sb_pquotino);
+        else {
+                /*
+                 * We can't rely on just the fields being logged to tell us
+                 * that it is safe to write NULLFSINO - we should only do that
+                 * if quotas are not actually enabled. Hence only write
+                 * NULLFSINO if both in-core quota inodes are NULL.
+                 */
+                if (from->sb_gquotino == NULLFSINO &&
+                    from->sb_pquotino == NULLFSINO)
+                        to->sb_gquotino = cpu_to_be64(NULLFSINO);
+        }
        *fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO);
 }