55 files changed, 732 insertions, 307 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 4f078c054b41..955947ef3e02 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1021,6 +1021,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
        /* everything turned out well, dispose of the aiocb. */
        kiocb_free(iocb);
+        put_reqs_available(ctx, 1);
        /*
         * We have to order our ring_info tail store above and test
@@ -1062,6 +1063,9 @@ static long aio_read_events_ring(struct kioctx *ctx,
        if (head == tail)
                goto out;
+        head %= ctx->nr_events;
+        tail %= ctx->nr_events;
        while (ret < nr) {
                long avail;
                struct io_event *ev;
@@ -1100,8 +1104,6 @@ static long aio_read_events_ring(struct kioctx *ctx,
        flush_dcache_page(ctx->ring_pages[0]);
        pr_debug("%li  h%u t%u\n", ret, head, tail);
-        put_reqs_available(ctx, ret);
 out:
        mutex_unlock(&ctx->ring_lock);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index d7bd395ab586..1c55388ae633 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -210,7 +210,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
        int pipefd;
        struct autofs_sb_info *sbi;
        struct autofs_info *ino;
-        int pgrp;
+        int pgrp = 0;
        bool pgrp_set = false;
        int ret = -EINVAL;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 92371c414228..1daea0b47187 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -821,7 +821,7 @@ static void free_workspace(int type, struct list_head *workspace)
        spin_lock(workspace_lock);
        if (*num_workspace < num_online_cpus()) {
-                list_add_tail(workspace, idle_workspace);
+                list_add(workspace, idle_workspace);
                (*num_workspace)++;
                spin_unlock(workspace_lock);
                goto wake;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 2af6e66fe788..eea26e1b2fda 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -36,6 +36,7 @@
 #include "check-integrity.h"
 #include "rcu-string.h"
 #include "dev-replace.h"
+#include "sysfs.h"
 static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
                                       int scrub_ret);
@@ -562,6 +563,10 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
                fs_info->fs_devices->latest_bdev = tgt_device->bdev;
        list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
+        /* replace the sysfs entry */
+        btrfs_kobj_rm_device(fs_info, src_device);
+        btrfs_kobj_add_device(fs_info, tgt_device);
        btrfs_rm_dev_replace_blocked(fs_info);
        btrfs_rm_dev_replace_srcdev(fs_info, src_device);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8bb4aa19898f..08e65e9cf2aa 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -369,7 +369,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
 out:
        unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
                             &cached_state, GFP_NOFS);
-        btrfs_tree_read_unlock_blocking(eb);
+        if (need_lock)
+                btrfs_tree_read_unlock_blocking(eb);
        return ret;
 }
@@ -2904,7 +2905,9 @@ retry_root_backup:
                if (ret)
                        goto fail_qgroup;
+                mutex_lock(&fs_info->cleaner_mutex);
                ret = btrfs_recover_relocation(tree_root);
+                mutex_unlock(&fs_info->cleaner_mutex);
                if (ret < 0) {
                        printk(KERN_WARNING
                               "BTRFS: failed to recover relocation\n");
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 99c253918208..813537f362f9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5678,7 +5678,6 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
        struct btrfs_caching_control *next;
        struct btrfs_caching_control *caching_ctl;
        struct btrfs_block_group_cache *cache;
-        struct btrfs_space_info *space_info;
        down_write(&fs_info->commit_root_sem);
@@ -5701,9 +5700,6 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
        up_write(&fs_info->commit_root_sem);
-        list_for_each_entry_rcu(space_info, &fs_info->space_info, list)
-                percpu_counter_set(&space_info->total_bytes_pinned, 0);
        update_global_block_rsv(fs_info);
 }
@@ -5741,6 +5737,7 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
                spin_lock(&cache->lock);
                cache->pinned -= len;
                space_info->bytes_pinned -= len;
+                percpu_counter_add(&space_info->total_bytes_pinned, -len);
                if (cache->ro) {
                        space_info->bytes_readonly += len;
                        readonly = true;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0d321c23069a..47aceb494d1d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -136,19 +136,22 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
 void btrfs_update_iflags(struct inode *inode)
 {
        struct btrfs_inode *ip = BTRFS_I(inode);
+        unsigned int new_fl = 0;
-        inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
        if (ip->flags & BTRFS_INODE_SYNC)
-                inode->i_flags |= S_SYNC;
+                new_fl |= S_SYNC;
        if (ip->flags & BTRFS_INODE_IMMUTABLE)
-                inode->i_flags |= S_IMMUTABLE;
+                new_fl |= S_IMMUTABLE;
        if (ip->flags & BTRFS_INODE_APPEND)
-                inode->i_flags |= S_APPEND;
+                new_fl |= S_APPEND;
        if (ip->flags & BTRFS_INODE_NOATIME)
-                inode->i_flags |= S_NOATIME;
+                new_fl |= S_NOATIME;
        if (ip->flags & BTRFS_INODE_DIRSYNC)
-                inode->i_flags |= S_DIRSYNC;
+                new_fl |= S_DIRSYNC;
+        set_mask_bits(&inode->i_flags,
+                      S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC,
+                      new_fl);
 }
 /*
@@ -3139,7 +3142,6 @@ out:
 static void clone_update_extent_map(struct inode *inode,
                                    const struct btrfs_trans_handle *trans,
                                    const struct btrfs_path *path,
-                                    struct btrfs_file_extent_item *fi,
                                    const u64 hole_offset,
                                    const u64 hole_len)
 {
@@ -3154,7 +3156,11 @@ static void clone_update_extent_map(struct inode *inode,
                return;
        }
-        if (fi) {
+        if (path) {
+                struct btrfs_file_extent_item *fi;
+                fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                                    struct btrfs_file_extent_item);
                btrfs_extent_item_to_extent_map(inode, path, fi, false, em);
                em->generation = -1;
                if (btrfs_file_extent_type(path->nodes[0], fi) ==
@@ -3508,18 +3514,15 @@ process_slot:
                                            btrfs_item_ptr_offset(leaf, slot),
                                            size);
                                inode_add_bytes(inode, datal);
-                                extent = btrfs_item_ptr(leaf, slot,
-                                                struct btrfs_file_extent_item);
                        }
                        /* If we have an implicit hole (NO_HOLES feature). */
                        if (drop_start < new_key.offset)
                                clone_update_extent_map(inode, trans,
-                                                path, NULL, drop_start,
+                                                NULL, drop_start,
                                                new_key.offset - drop_start);
-                        clone_update_extent_map(inode, trans, path,
+                        clone_update_extent_map(inode, trans, path, 0, 0);
-                                                extent, 0, 0);
                        btrfs_mark_buffer_dirty(leaf);
                        btrfs_release_path(path);
@@ -3562,12 +3565,10 @@ process_slot:
                        btrfs_end_transaction(trans, root);
                        goto out;
                }
+                clone_update_extent_map(inode, trans, NULL, last_dest_end,
+                                        destoff + len - last_dest_end);
                ret = clone_finish_inode_update(trans, inode, destoff + len,
                                                destoff, olen);
-                if (ret)
-                        goto out;
-                clone_update_extent_map(inode, trans, path, NULL, last_dest_end,
-                                        destoff + len - last_dest_end);
        }
 out:
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 6efd70d3b64f..9626b4ad3b9a 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -54,7 +54,7 @@ static void print_extent_data_ref(struct extent_buffer *eb,
               btrfs_extent_data_ref_count(eb, ref));
 }
-static void print_extent_item(struct extent_buffer *eb, int slot)
+static void print_extent_item(struct extent_buffer *eb, int slot, int type)
 {
        struct btrfs_extent_item *ei;
        struct btrfs_extent_inline_ref *iref;
@@ -63,7 +63,6 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
        struct btrfs_disk_key key;
        unsigned long end;
        unsigned long ptr;
-        int type;
        u32 item_size = btrfs_item_size_nr(eb, slot);
        u64 flags;
        u64 offset;
@@ -88,7 +87,8 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
               btrfs_extent_refs(eb, ei), btrfs_extent_generation(eb, ei),
               flags);
-        if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+        if ((type == BTRFS_EXTENT_ITEM_KEY) &&
+            flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
                struct btrfs_tree_block_info *info;
                info = (struct btrfs_tree_block_info *)(ei + 1);
                btrfs_tree_block_key(eb, info, &key);
@@ -223,7 +223,8 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
                                btrfs_disk_root_refs(l, ri));
                        break;
                case BTRFS_EXTENT_ITEM_KEY:
-                        print_extent_item(l, i);
+                case BTRFS_METADATA_ITEM_KEY:
+                        print_extent_item(l, i, type);
                        break;
                case BTRFS_TREE_BLOCK_REF_KEY:
                        printk(KERN_INFO "\t\ttree block backref\n");
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 4055291a523e..4a88f073fdd7 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1956,9 +1956,10 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
         * pages are going to be uptodate.
         */
        for (stripe = 0; stripe < bbio->num_stripes; stripe++) {
-                if (rbio->faila == stripe ||
+                if (rbio->faila == stripe || rbio->failb == stripe) {
-                    rbio->failb == stripe)
+                        atomic_inc(&rbio->bbio->error);
                        continue;
+                }
                for (pagenr = 0; pagenr < nr_pages; pagenr++) {
                        struct page *p;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4662d92a4b73..8e16bca69c56 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -522,9 +522,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                case Opt_ssd_spread:
                        btrfs_set_and_info(root, SSD_SPREAD,
                                           "use spread ssd allocation scheme");
+                        btrfs_set_opt(info->mount_opt, SSD);
                        break;
                case Opt_nossd:
-                        btrfs_clear_and_info(root, NOSSD,
+                        btrfs_set_and_info(root, NOSSD,
                                             "not using ssd allocation scheme");
                        btrfs_clear_opt(info->mount_opt, SSD);
                        break;
@@ -1467,7 +1468,9 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
                        goto restore;
                /* recover relocation */
+                mutex_lock(&fs_info->cleaner_mutex);
                ret = btrfs_recover_relocation(root);
+                mutex_unlock(&fs_info->cleaner_mutex);
                if (ret)
                        goto restore;
@@ -1808,6 +1811,8 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
                list_for_each_entry(dev, head, dev_list) {
                        if (dev->missing)
                                continue;
+                        if (!dev->name)
+                                continue;
                        if (!first_dev || dev->devid < first_dev->devid)
                                first_dev = dev;
                }
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index df39458f1487..78699364f537 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -605,14 +605,37 @@ static void init_feature_attrs(void)
        }
 }
-static int add_device_membership(struct btrfs_fs_info *fs_info)
+int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
+                struct btrfs_device *one_device)
+{
+        struct hd_struct *disk;
+        struct kobject *disk_kobj;
+        if (!fs_info->device_dir_kobj)
+                return -EINVAL;
+        if (one_device) {
+                disk = one_device->bdev->bd_part;
+                disk_kobj = &part_to_dev(disk)->kobj;
+                sysfs_remove_link(fs_info->device_dir_kobj,
+                                                disk_kobj->name);
+        }
+        return 0;
+}
+int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info,
+                struct btrfs_device *one_device)
 {
        int error = 0;
        struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
        struct btrfs_device *dev;
-        fs_info->device_dir_kobj = kobject_create_and_add("devices",
+        if (!fs_info->device_dir_kobj)
+                fs_info->device_dir_kobj = kobject_create_and_add("devices",
                                                &fs_info->super_kobj);
        if (!fs_info->device_dir_kobj)
                return -ENOMEM;
@@ -623,6 +646,9 @@ static int add_device_membership(struct btrfs_fs_info *fs_info)
                if (!dev->bdev)
                        continue;
+                if (one_device && one_device != dev)
+                        continue;
                disk = dev->bdev->bd_part;
                disk_kobj = &part_to_dev(disk)->kobj;
@@ -666,7 +692,7 @@ int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info)
        if (error)
                goto failure;
-        error = add_device_membership(fs_info);
+        error = btrfs_kobj_add_device(fs_info, NULL);
        if (error)
                goto failure;
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index 9ab576318a84..ac46df37504c 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -66,4 +66,8 @@ char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
 extern const char * const btrfs_feature_set_names[3];
 extern struct kobj_type space_info_ktype;
 extern struct kobj_type btrfs_raid_ktype;
+int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info,
+                struct btrfs_device *one_device);
+int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
+                struct btrfs_device *one_device);
 #endif /* _BTRFS_SYSFS_H_ */
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 511839c04f11..5f379affdf23 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -386,11 +386,13 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
        bool reloc_reserved = false;
        int ret;
+        /* Send isn't supposed to start transactions. */
+        ASSERT(current->journal_info != (void *)BTRFS_SEND_TRANS_STUB);
        if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
                return ERR_PTR(-EROFS);
-        if (current->journal_info &&
+        if (current->journal_info) {
-            current->journal_info != (void *)BTRFS_SEND_TRANS_STUB) {
                WARN_ON(type & TRANS_EXTWRITERS);
                h = current->journal_info;
                h->use_count++;
@@ -491,6 +493,7 @@ again:
        smp_mb();
        if (cur_trans->state >= TRANS_STATE_BLOCKED &&
            may_wait_transaction(root, type)) {
+                current->journal_info = h;
                btrfs_commit_transaction(h, root);
                goto again;
        }
@@ -1615,11 +1618,6 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
        int ret;
        ret = btrfs_run_delayed_items(trans, root);
-        /*
-         * running the delayed items may have added new refs. account
-         * them now so that they hinder processing of more delayed refs
-         * as little as possible.
-         */
        if (ret)
                return ret;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c83b24251e53..6104676857f5 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -40,6 +40,7 @@
 #include "rcu-string.h"
 #include "math.h"
 #include "dev-replace.h"
+#include "sysfs.h"
 static int init_first_rw_device(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
@@ -554,12 +555,14 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
                 * This is ok to do without rcu read locked because we hold the
                 * uuid mutex so nothing we touch in here is going to disappear.
                 */
-                name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS);
+                if (orig_dev->name) {
-                if (!name) {
+                        name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS);
-                        kfree(device);
+                        if (!name) {
-                        goto error;
+                                kfree(device);
+                                goto error;
+                        }
+                        rcu_assign_pointer(device->name, name);
                }
-                rcu_assign_pointer(device->name, name);
                list_add(&device->dev_list, &fs_devices->devices);
                device->fs_devices = fs_devices;
@@ -1680,6 +1683,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        if (device->bdev)
                device->fs_devices->open_devices--;
+        /* remove sysfs entry */
+        btrfs_kobj_rm_device(root->fs_info, device);
        call_rcu(&device->rcu, free_device);
        num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
@@ -2143,9 +2149,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        total_bytes = btrfs_super_num_devices(root->fs_info->super_copy);
        btrfs_set_super_num_devices(root->fs_info->super_copy,
                                    total_bytes + 1);
+        /* add sysfs device entry */
+        btrfs_kobj_add_device(root->fs_info, device);
        mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
        if (seeding_dev) {
+                char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
                ret = init_first_rw_device(trans, root, device);
                if (ret) {
                        btrfs_abort_transaction(trans, root, ret);
@@ -2156,6 +2167,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
                        btrfs_abort_transaction(trans, root, ret);
                        goto error_trans;
                }
+                /* Sprouting would change fsid of the mounted root,
+                 * so rename the fsid on the sysfs
+                 */
+                snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
+                                                root->fs_info->fsid);
+                if (kobject_rename(&root->fs_info->super_kobj, fsid_buf))
+                        goto error_trans;
        } else {
                ret = btrfs_add_device(trans, root, device);
                if (ret) {
@@ -2205,6 +2224,7 @@ error_trans:
        unlock_chunks(root);
        btrfs_end_transaction(trans, root);
        rcu_string_free(device->name);
+        btrfs_kobj_rm_device(root->fs_info, device);
        kfree(device);
 error:
        blkdev_put(bdev, FMODE_EXCL);
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 4f196314c0c1..b67d8fc81277 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -136,7 +136,7 @@ static int zlib_compress_pages(struct list_head *ws,
                if (workspace->def_strm.total_in > 8192 &&
                    workspace->def_strm.total_in <
                    workspace->def_strm.total_out) {
-                        ret = -EIO;
+                        ret = -E2BIG;
                        goto out;
                }
                /* we need another page for writing out.  Test this
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 0227b45ef00a..15e9505aa35f 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -290,7 +290,8 @@ int
 cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
                 const struct nls_table *cp, int mapChars)
 {
-        int i, j, charlen;
+        int i, charlen;
+        int j = 0;
        char src_char;
        __le16 dst_char;
        wchar_t tmp;
@@ -298,12 +299,11 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
        if (!mapChars)
                return cifs_strtoUTF16(target, source, PATH_MAX, cp);
-        for (i = 0, j = 0; i < srclen; j++) {
+        for (i = 0; i < srclen; j++) {
                src_char = source[i];
                charlen = 1;
                switch (src_char) {
                case 0:
-                        put_unaligned(0, &target[j]);
                        goto ctoUTF16_out;
                case ':':
                        dst_char = cpu_to_le16(UNI_COLON);
@@ -350,6 +350,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
        }
 ctoUTF16_out:
+        put_unaligned(0, &target[j]); /* Null terminate target unicode string */
        return j;
 }
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 2c90d07c0b3a..888398067420 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -725,6 +725,19 @@ out_nls:
        goto out;
 }
+static ssize_t
+cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+        ssize_t rc;
+        struct inode *inode = file_inode(iocb->ki_filp);
+        rc = cifs_revalidate_mapping(inode);
+        if (rc)
+                return rc;
+        return generic_file_read_iter(iocb, iter);
+}
 static ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
        struct inode *inode = file_inode(iocb->ki_filp);
@@ -881,7 +894,7 @@ const struct inode_operations cifs_symlink_inode_ops = {
 const struct file_operations cifs_file_ops = {
        .read = new_sync_read,
        .write = new_sync_write,
-        .read_iter = generic_file_read_iter,
+        .read_iter = cifs_loose_read_iter,
        .write_iter = cifs_file_write_iter,
        .open = cifs_open,
        .release = cifs_close,
@@ -939,7 +952,7 @@ const struct file_operations cifs_file_direct_ops = {
 const struct file_operations cifs_file_nobrl_ops = {
        .read = new_sync_read,
        .write = new_sync_write,
-        .read_iter = generic_file_read_iter,
+        .read_iter = cifs_loose_read_iter,
        .write_iter = cifs_file_write_iter,
        .open = cifs_open,
        .release = cifs_close,
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 264ece71bdb2..68559fd557fb 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -374,7 +374,7 @@ cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
        oparms.cifs_sb = cifs_sb;
        oparms.desired_access = GENERIC_WRITE;
        oparms.create_options = create_options;
-        oparms.disposition = FILE_OPEN;
+        oparms.disposition = FILE_CREATE;
        oparms.path = path;
        oparms.fid = &fid;
        oparms.reconnect = false;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 0762d143e252..fca382037ddd 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -194,7 +194,16 @@ static void ext4_init_block_bitmap(struct super_block *sb,
        if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
                ext4_error(sb, "Checksum bad for group %u", block_group);
                grp = ext4_get_group_info(sb, block_group);
+                if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+                        percpu_counter_sub(&sbi->s_freeclusters_counter,
+                                           grp->bb_free);
                set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+                if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+                        int count;
+                        count = ext4_free_inodes_count(sb, gdp);
+                        percpu_counter_sub(&sbi->s_freeinodes_counter,
+                                           count);
+                }
                set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
                return;
        }
@@ -359,6 +368,7 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
 {
        ext4_fsblk_t    blk;
        struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
+        struct ext4_sb_info *sbi = EXT4_SB(sb);
        if (buffer_verified(bh))
                return;
@@ -369,6 +379,9 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
                ext4_unlock_group(sb, block_group);
                ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
                           block_group, blk);
+                if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+                        percpu_counter_sub(&sbi->s_freeclusters_counter,
+                                           grp->bb_free);
                set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
                return;
        }
@@ -376,6 +389,9 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
                        desc, bh))) {
                ext4_unlock_group(sb, block_group);
                ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
+                if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+                        percpu_counter_sub(&sbi->s_freeclusters_counter,
+                                           grp->bb_free);
                set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
                return;
        }
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 3f5c188953a4..0b7e28e7eaa4 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -966,10 +966,10 @@ retry:
                        continue;
                }
-                if (ei->i_es_lru_nr == 0 || ei == locked_ei)
+                if (ei->i_es_lru_nr == 0 || ei == locked_ei ||
+                    !write_trylock(&ei->i_es_lock))
                        continue;
-                write_lock(&ei->i_es_lock);
                shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
                if (ei->i_es_lru_nr == 0)
                        list_del_init(&ei->i_es_lru);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 0ee59a6644e2..5b87fc36aab8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -71,6 +71,7 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
                                       struct ext4_group_desc *gdp)
 {
        struct ext4_group_info *grp;
+        struct ext4_sb_info *sbi = EXT4_SB(sb);
        J_ASSERT_BH(bh, buffer_locked(bh));
        /* If checksum is bad mark all blocks and inodes use to prevent
@@ -78,7 +79,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
        if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
                ext4_error(sb, "Checksum bad for group %u", block_group);
                grp = ext4_get_group_info(sb, block_group);
+                if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+                        percpu_counter_sub(&sbi->s_freeclusters_counter,
+                                           grp->bb_free);
                set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+                if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+                        int count;
+                        count = ext4_free_inodes_count(sb, gdp);
+                        percpu_counter_sub(&sbi->s_freeinodes_counter,
+                                           count);
+                }
                set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
                return 0;
        }
@@ -116,6 +126,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
        struct buffer_head *bh = NULL;
        ext4_fsblk_t bitmap_blk;
        struct ext4_group_info *grp;
+        struct ext4_sb_info *sbi = EXT4_SB(sb);
        desc = ext4_get_group_desc(sb, block_group, NULL);
        if (!desc)
@@ -185,6 +196,12 @@ verify:
                ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
                           "inode_bitmap = %llu", block_group, bitmap_blk);
                grp = ext4_get_group_info(sb, block_group);
+                if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+                        int count;
+                        count = ext4_free_inodes_count(sb, desc);
+                        percpu_counter_sub(&sbi->s_freeinodes_counter,
+                                           count);
+                }
                set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
                return NULL;
        }
@@ -321,6 +338,12 @@ out:
                        fatal = err;
        } else {
                ext4_error(sb, "bit already cleared for inode %lu", ino);
+                if (gdp && !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+                        int count;
+                        count = ext4_free_inodes_count(sb, gdp);
+                        percpu_counter_sub(&sbi->s_freeinodes_counter,
+                                           count);
+                }
                set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
        }
@@ -851,6 +874,13 @@ got:
                goto out;
        }
+        BUFFER_TRACE(group_desc_bh, "get_write_access");
+        err = ext4_journal_get_write_access(handle, group_desc_bh);
+        if (err) {
+                ext4_std_error(sb, err);
+                goto out;
+        }
        /* We may have to initialize the block bitmap if it isn't already */
        if (ext4_has_group_desc_csum(sb) &&
            gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
@@ -887,13 +917,6 @@ got:
                }
        }
-        BUFFER_TRACE(group_desc_bh, "get_write_access");
-        err = ext4_journal_get_write_access(handle, group_desc_bh);
-        if (err) {
-                ext4_std_error(sb, err);
-                goto out;
-        }
        /* Update the relevant bg descriptor fields */
        if (ext4_has_group_desc_csum(sb)) {
                int free;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 8a57e9fcd1b9..fd69da194826 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -389,7 +389,13 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
        return 0;
 failed:
        for (; i >= 0; i--) {
-                if (i != indirect_blks && branch[i].bh)
+                /*
+                 * We want to ext4_forget() only freshly allocated indirect
+                 * blocks.  Buffer for new_blocks[i-1] is at branch[i].bh and
+                 * buffer at branch[0].bh is indirect block / inode already
+                 * existing before ext4_alloc_branch() was called.
+                 */
+                if (i > 0 && i != indirect_blks && branch[i].bh)
                        ext4_forget(handle, 1, inode, branch[i].bh,
                                    branch[i].bh->b_blocknr);
                ext4_free_blocks(handle, inode, NULL, new_blocks[i],
@@ -1310,16 +1316,24 @@ static int free_hole_blocks(handle_t *handle, struct inode *inode,
                blk = *i_data;
                if (level > 0) {
                        ext4_lblk_t first2;
+                        ext4_lblk_t count2;
                        bh = sb_bread(inode->i_sb, le32_to_cpu(blk));
                        if (!bh) {
                                EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk),
                                                       "Read failure");
                                return -EIO;
                        }
-                        first2 = (first > offset) ? first - offset : 0;
+                        if (first > offset) {
+                                first2 = first - offset;
+                                count2 = count;
+                        } else {
+                                first2 = 0;
+                                count2 = count - (offset - first);
+                        }
                        ret = free_hole_blocks(handle, inode, bh,
                                               (__le32 *)bh->b_data, level - 1,
-                                               first2, count - offset,
+                                               first2, count2,
                                               inode->i_sb->s_blocksize >> 2);
                        if (ret) {
                                brelse(bh);
@@ -1329,8 +1343,8 @@ static int free_hole_blocks(handle_t *handle, struct inode *inode,
                if (level == 0 ||
                    (bh && all_zeroes((__le32 *)bh->b_data,
                                      (__le32 *)bh->b_data + addr_per_block))) {
-                        ext4_free_data(handle, inode, parent_bh, &blk, &blk+1);
+                        ext4_free_data(handle, inode, parent_bh,
-                        *i_data = 0;
+                                       i_data, i_data + 1);
                }
                brelse(bh);
                bh = NULL;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 59e31622cc6e..2dcb936be90e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -722,6 +722,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
                                void *buddy, void *bitmap, ext4_group_t group)
 {
        struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+        struct ext4_sb_info *sbi = EXT4_SB(sb);
        ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
        ext4_grpblk_t i = 0;
        ext4_grpblk_t first;
@@ -751,14 +752,17 @@ void ext4_mb_generate_buddy(struct super_block *sb,
        if (free != grp->bb_free) {
                ext4_grp_locked_error(sb, group, 0, 0,
-                                      "%u clusters in bitmap, %u in gd; "
+                                      "block bitmap and bg descriptor "
-                                      "block bitmap corrupt.",
+                                      "inconsistent: %u vs %u free clusters",
                                      free, grp->bb_free);
                /*
                 * If we intend to continue, we consider group descriptor
                 * corrupt and update bb_free using bitmap value
                 */
                grp->bb_free = free;
+                if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+                        percpu_counter_sub(&sbi->s_freeclusters_counter,
+                                           grp->bb_free);
                set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
        }
        mb_set_largest_free_order(sb, grp);
@@ -1431,6 +1435,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
                right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
        if (unlikely(block != -1)) {
+                struct ext4_sb_info *sbi = EXT4_SB(sb);
                ext4_fsblk_t blocknr;
                blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
@@ -1441,6 +1446,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
                                      "freeing already freed block "
                                      "(bit %u); block bitmap corrupt.",
                                      block);
+                if (!EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))
+                        percpu_counter_sub(&sbi->s_freeclusters_counter,
+                                           e4b->bd_info->bb_free);
                /* Mark the block group as corrupt. */
                set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
                        &e4b->bd_info->bb_state);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b9b9aabfb4d2..6df7bc611dbd 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1525,8 +1525,6 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
                        arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
                sbi->s_commit_interval = HZ * arg;
        } else if (token == Opt_max_batch_time) {
-                if (arg == 0)
-                        arg = EXT4_DEF_MAX_BATCH_TIME;
                sbi->s_max_batch_time = arg;
        } else if (token == Opt_min_batch_time) {
                sbi->s_min_batch_time = arg;
@@ -2809,10 +2807,11 @@ static void print_daily_error_info(unsigned long arg)
        es = sbi->s_es;
        if (es->s_error_count)
-                ext4_msg(sb, KERN_NOTICE, "error count: %u",
+                /* fsck newer than v1.41.13 is needed to clean this condition. */
+                ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
                         le32_to_cpu(es->s_error_count));
        if (es->s_first_error_time) {
-                printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d",
+                printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d",
                       sb->s_id, le32_to_cpu(es->s_first_error_time),
                       (int) sizeof(es->s_first_error_func),
                       es->s_first_error_func,
@@ -2826,7 +2825,7 @@ static void print_daily_error_info(unsigned long arg)
                printk("\n");
        }
        if (es->s_last_error_time) {
-                printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d",
+                printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
                       sb->s_id, le32_to_cpu(es->s_last_error_time),
                       (int) sizeof(es->s_last_error_func),
                       es->s_last_error_func,
@@ -3880,38 +3879,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                        goto failed_mount2;
                }
        }
-        /*
-         * set up enough so that it can read an inode,
-         * and create new inode for buddy allocator
-         */
-        sbi->s_gdb_count = db_count;
-        if (!test_opt(sb, NOLOAD) &&
-            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
-                sb->s_op = &ext4_sops;
-        else
-                sb->s_op = &ext4_nojournal_sops;
-        ext4_ext_init(sb);
-        err = ext4_mb_init(sb);
-        if (err) {
-                ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
-                         err);
-                goto failed_mount2;
-        }
        if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
                ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
-                goto failed_mount2a;
+                goto failed_mount2;
        }
        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
                if (!ext4_fill_flex_info(sb)) {
                        ext4_msg(sb, KERN_ERR,
                               "unable to initialize "
                               "flex_bg meta info!");
-                        goto failed_mount2a;
+                        goto failed_mount2;
                }
+        sbi->s_gdb_count = db_count;
        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
        spin_lock_init(&sbi->s_next_gen_lock);
@@ -3946,6 +3926,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        sbi->s_stripe = ext4_get_stripe_size(sbi);
        sbi->s_extent_max_zeroout_kb = 32;
+        /*
+         * set up enough so that it can read an inode
+         */
+        if (!test_opt(sb, NOLOAD) &&
+            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
+                sb->s_op = &ext4_sops;
+        else
+                sb->s_op = &ext4_nojournal_sops;
        sb->s_export_op = &ext4_export_ops;
        sb->s_xattr = ext4_xattr_handlers;
 #ifdef CONFIG_QUOTA
@@ -4135,13 +4123,21 @@ no_journal:
        if (err) {
                ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
                         "reserved pool", ext4_calculate_resv_clusters(sb));
-                goto failed_mount5;
+                goto failed_mount4a;
        }
        err = ext4_setup_system_zone(sb);
        if (err) {
                ext4_msg(sb, KERN_ERR, "failed to initialize system "
                         "zone (%d)", err);
+                goto failed_mount4a;
+        }
+        ext4_ext_init(sb);
+        err = ext4_mb_init(sb);
+        if (err) {
+                ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
+                         err);
                goto failed_mount5;
        }
@@ -4218,8 +4214,11 @@ failed_mount8:
 failed_mount7:
        ext4_unregister_li_request(sb);
 failed_mount6:
-        ext4_release_system_zone(sb);
+        ext4_mb_release(sb);
 failed_mount5:
+        ext4_ext_release(sb);
+        ext4_release_system_zone(sb);
+failed_mount4a:
        dput(sb->s_root);
        sb->s_root = NULL;
 failed_mount4:
@@ -4243,14 +4242,11 @@ failed_mount3:
        percpu_counter_destroy(&sbi->s_extent_cache_cnt);
        if (sbi->s_mmp_tsk)
                kthread_stop(sbi->s_mmp_tsk);
-failed_mount2a:
-        ext4_mb_release(sb);
 failed_mount2:
        for (i = 0; i < db_count; i++)
                brelse(sbi->s_group_desc[i]);
        ext4_kvfree(sbi->s_group_desc);
 failed_mount:
-        ext4_ext_release(sb);
        if (sbi->s_chksum_driver)
                crypto_free_shash(sbi->s_chksum_driver);
        if (sbi->s_proc) {
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 0924521306b4..f8cf619edb5f 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -608,8 +608,8 @@ static int __allocate_data_block(struct dnode_of_data *dn)
 *     b. do not use extent cache for better performance
 *     c. give the block addresses to blockdev
 */
-static int get_data_block(struct inode *inode, sector_t iblock,
+static int __get_data_block(struct inode *inode, sector_t iblock,
-                        struct buffer_head *bh_result, int create)
+                        struct buffer_head *bh_result, int create, bool fiemap)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
        unsigned int blkbits = inode->i_sb->s_blocksize_bits;
@@ -637,7 +637,7 @@ static int get_data_block(struct inode *inode, sector_t iblock,
                        err = 0;
                goto unlock_out;
        }
-        if (dn.data_blkaddr == NEW_ADDR)
+        if (dn.data_blkaddr == NEW_ADDR && !fiemap)
                goto put_out;
        if (dn.data_blkaddr != NULL_ADDR) {
@@ -671,7 +671,7 @@ get_next:
                                err = 0;
                        goto unlock_out;
                }
-                if (dn.data_blkaddr == NEW_ADDR)
+                if (dn.data_blkaddr == NEW_ADDR && !fiemap)
                        goto put_out;
                end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
@@ -708,10 +708,23 @@ out:
        return err;
 }
+static int get_data_block(struct inode *inode, sector_t iblock,
+                        struct buffer_head *bh_result, int create)
+{
+        return __get_data_block(inode, iblock, bh_result, create, false);
+}
+static int get_data_block_fiemap(struct inode *inode, sector_t iblock,
+                        struct buffer_head *bh_result, int create)
+{
+        return __get_data_block(inode, iblock, bh_result, create, true);
+}
 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                u64 start, u64 len)
 {
-        return generic_block_fiemap(inode, fieinfo, start, len, get_data_block);
+        return generic_block_fiemap(inode, fieinfo,
+                                start, len, get_data_block_fiemap);
 }
 static int f2fs_read_data_page(struct file *file, struct page *page)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 966acb039e3b..a4addd72ebbd 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -376,11 +376,11 @@ static struct page *init_inode_metadata(struct inode *inode,
 put_error:
        f2fs_put_page(page, 1);
+error:
        /* once the failed inode becomes a bad inode, i_mode is S_IFREG */
        truncate_inode_pages(&inode->i_data, 0);
        truncate_blocks(inode, 0);
        remove_dirty_dir_inode(inode);
-error:
        remove_inode_page(inode);
        return ERR_PTR(err);
 }
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e51c732b0dd9..58df97e174d0 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -342,9 +342,6 @@ struct f2fs_sm_info {
        struct dirty_seglist_info *dirty_info;  /* dirty segment information */
        struct curseg_info *curseg_array;       /* active segment information */
-        struct list_head wblist_head;   /* list of under-writeback pages */
-        spinlock_t wblist_lock;         /* lock for checkpoint */
        block_t seg0_blkaddr;           /* block address of 0'th segment */
        block_t main_blkaddr;           /* start block address of main area */
        block_t ssa_blkaddr;            /* start block address of SSA area */
@@ -644,7 +641,8 @@ static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
 */
 static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
 {
-        WARN_ON((nid >= NM_I(sbi)->max_nid));
+        if (unlikely(nid < F2FS_ROOT_INO(sbi)))
+                return -EINVAL;
        if (unlikely(nid >= NM_I(sbi)->max_nid))
                return -EINVAL;
        return 0;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index c58e33075719..7d8b96275092 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -659,16 +659,19 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
        off_start = offset & (PAGE_CACHE_SIZE - 1);
        off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
+        f2fs_lock_op(sbi);
        for (index = pg_start; index <= pg_end; index++) {
                struct dnode_of_data dn;
-                f2fs_lock_op(sbi);
+                if (index == pg_end && !off_end)
+                        goto noalloc;
                set_new_dnode(&dn, inode, NULL, NULL, 0);
                ret = f2fs_reserve_block(&dn, index);
-                f2fs_unlock_op(sbi);
                if (ret)
                        break;
+noalloc:
                if (pg_start == pg_end)
                        new_size = offset + len;
                else if (index == pg_start && off_start)
@@ -683,8 +686,9 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
                i_size_read(inode) < new_size) {
                i_size_write(inode, new_size);
                mark_inode_dirty(inode);
-                f2fs_write_inode(inode, NULL);
+                update_inode_page(inode);
        }
+        f2fs_unlock_op(sbi);
        return ret;
 }
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index adc622c6bdce..2cf6962f6cc8 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -78,6 +78,7 @@ static int do_read_inode(struct inode *inode)
        if (check_nid_range(sbi, inode->i_ino)) {
                f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu",
                         (unsigned long) inode->i_ino);
+                WARN_ON(1);
                return -EINVAL;
        }
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 9138c32aa698..a6bdddc33ce2 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -417,9 +417,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
                }
                f2fs_set_link(new_dir, new_entry, new_page, old_inode);
-                down_write(&F2FS_I(old_inode)->i_sem);
-                F2FS_I(old_inode)->i_pino = new_dir->i_ino;
-                up_write(&F2FS_I(old_inode)->i_sem);
                new_inode->i_ctime = CURRENT_TIME;
                down_write(&F2FS_I(new_inode)->i_sem);
@@ -448,6 +445,10 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
                }
        }
+        down_write(&F2FS_I(old_inode)->i_sem);
+        file_lost_pino(old_inode);
+        up_write(&F2FS_I(old_inode)->i_sem);
        old_inode->i_ctime = CURRENT_TIME;
        mark_inode_dirty(old_inode);
@@ -457,9 +458,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
                if (old_dir != new_dir) {
                        f2fs_set_link(old_inode, old_dir_entry,
                                                old_dir_page, new_dir);
-                        down_write(&F2FS_I(old_inode)->i_sem);
-                        F2FS_I(old_inode)->i_pino = new_dir->i_ino;
-                        up_write(&F2FS_I(old_inode)->i_sem);
                        update_inode_page(old_inode);
                } else {
                        kunmap(old_dir_page);
@@ -474,7 +472,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
        return 0;
 put_out_dir:
-        f2fs_put_page(new_page, 1);
+        kunmap(new_page);
+        f2fs_put_page(new_page, 0);
 out_dir:
        if (old_dir_entry) {
                kunmap(old_dir_page);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 9dfb9a042fd2..4b697ccc9b0c 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -42,6 +42,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
                mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12;
                res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
        } else if (type == DIRTY_DENTS) {
+                if (sbi->sb->s_bdi->dirty_exceeded)
+                        return false;
                mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
                res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1);
        }
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f25f0e07e26f..d04613df710a 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -272,14 +272,15 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
                return -ENOMEM;
        spin_lock_init(&fcc->issue_lock);
        init_waitqueue_head(&fcc->flush_wait_queue);
+        sbi->sm_info->cmd_control_info = fcc;
        fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
                                "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
        if (IS_ERR(fcc->f2fs_issue_flush)) {
                err = PTR_ERR(fcc->f2fs_issue_flush);
                kfree(fcc);
+                sbi->sm_info->cmd_control_info = NULL;
                return err;
        }
-        sbi->sm_info->cmd_control_info = fcc;
        return err;
 }
@@ -1885,8 +1886,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
        /* init sm info */
        sbi->sm_info = sm_info;
-        INIT_LIST_HEAD(&sm_info->wblist_head);
-        spin_lock_init(&sm_info->wblist_lock);
        sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
        sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
        sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index b2b18637cb9e..8f96d9372ade 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -689,9 +689,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
        struct f2fs_sb_info *sbi = F2FS_SB(sb);
        struct inode *inode;
-        if (unlikely(ino < F2FS_ROOT_INO(sbi)))
+        if (check_nid_range(sbi, ino))
-                return ERR_PTR(-ESTALE);
-        if (unlikely(ino >= NM_I(sbi)->max_nid))
                return ERR_PTR(-ESTALE);
        /*
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 38cfcf5f6fce..6f0f590cc5a3 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1588,9 +1588,12 @@ int jbd2_journal_stop(handle_t *handle)
         * to perform a synchronous write.  We do this to detect the
         * case where a single process is doing a stream of sync
         * writes.  No point in waiting for joiners in that case.
+         *
+         * Setting max_batch_time to 0 disables this completely.
         */
        pid = current->pid;
-        if (handle->h_sync && journal->j_last_sync_writer != pid) {
+        if (handle->h_sync && journal->j_last_sync_writer != pid &&
+            journal->j_max_batch_time) {
                u64 commit_time, trans_time;
                journal->j_last_sync_writer = pid;
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index e3d37f607f97..d895b4b7b661 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -39,6 +39,19 @@ struct kernfs_open_node {
        struct list_head        files; /* goes through kernfs_open_file.list */
 };
+/*
+ * kernfs_notify() may be called from any context and bounces notifications
+ * through a work item.  To minimize space overhead in kernfs_node, the
+ * pending queue is implemented as a singly linked list of kernfs_nodes.
+ * The list is terminated with the self pointer so that whether a
+ * kernfs_node is on the list or not can be determined by testing the next
+ * pointer for NULL.
+ */
+#define KERNFS_NOTIFY_EOL                       ((void *)&kernfs_notify_list)
+static DEFINE_SPINLOCK(kernfs_notify_lock);
+static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;
 static struct kernfs_open_file *kernfs_of(struct file *file)
 {
        return ((struct seq_file *)file->private_data)->private;
@@ -783,24 +796,25 @@ static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait)
        return DEFAULT_POLLMASK|POLLERR|POLLPRI;
 }
-/**
+static void kernfs_notify_workfn(struct work_struct *work)
- * kernfs_notify - notify a kernfs file
- * @kn: file to notify
- *
- * Notify @kn such that poll(2) on @kn wakes up.
- */
-void kernfs_notify(struct kernfs_node *kn)
 {
-        struct kernfs_root *root = kernfs_root(kn);
+        struct kernfs_node *kn;
        struct kernfs_open_node *on;
        struct kernfs_super_info *info;
-        unsigned long flags;
+repeat:
+        /* pop one off the notify_list */
-        if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
+        spin_lock_irq(&kernfs_notify_lock);
+        kn = kernfs_notify_list;
+        if (kn == KERNFS_NOTIFY_EOL) {
+                spin_unlock_irq(&kernfs_notify_lock);
                return;
+        }
+        kernfs_notify_list = kn->attr.notify_next;
+        kn->attr.notify_next = NULL;
+        spin_unlock_irq(&kernfs_notify_lock);
        /* kick poll */
-        spin_lock_irqsave(&kernfs_open_node_lock, flags);
+        spin_lock_irq(&kernfs_open_node_lock);
        on = kn->attr.open;
        if (on) {
@@ -808,12 +822,12 @@ void kernfs_notify(struct kernfs_node *kn)
                wake_up_interruptible(&on->poll);
        }
-        spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
+        spin_unlock_irq(&kernfs_open_node_lock);
        /* kick fsnotify */
        mutex_lock(&kernfs_mutex);
-        list_for_each_entry(info, &root->supers, node) {
+        list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
                struct inode *inode;
                struct dentry *dentry;
@@ -833,6 +847,33 @@ void kernfs_notify(struct kernfs_node *kn)
        }
        mutex_unlock(&kernfs_mutex);
+        kernfs_put(kn);
+        goto repeat;
+}
+/**
+ * kernfs_notify - notify a kernfs file
+ * @kn: file to notify
+ *
+ * Notify @kn such that poll(2) on @kn wakes up.  Maybe be called from any
+ * context.
+ */
+void kernfs_notify(struct kernfs_node *kn)
+{
+        static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn);
+        unsigned long flags;
+        if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
+                return;
+        spin_lock_irqsave(&kernfs_notify_lock, flags);
+        if (!kn->attr.notify_next) {
+                kernfs_get(kn);
+                kn->attr.notify_next = kernfs_notify_list;
+                kernfs_notify_list = kn;
+                schedule_work(&kernfs_notify_work);
+        }
+        spin_unlock_irqrestore(&kernfs_notify_lock, flags);
 }
 EXPORT_SYMBOL_GPL(kernfs_notify);
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index d171b98a6cdd..f973ae9b05f1 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -211,6 +211,36 @@ void kernfs_kill_sb(struct super_block *sb)
        kernfs_put(root_kn);
 }
+/**
+ * kernfs_pin_sb: try to pin the superblock associated with a kernfs_root
+ * @kernfs_root: the kernfs_root in question
+ * @ns: the namespace tag
+ *
+ * Pin the superblock so the superblock won't be destroyed in subsequent
+ * operations.  This can be used to block ->kill_sb() which may be useful
+ * for kernfs users which dynamically manage superblocks.
+ *
+ * Returns NULL if there's no superblock associated to this kernfs_root, or
+ * -EINVAL if the superblock is being freed.
+ */
+struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns)
+{
+        struct kernfs_super_info *info;
+        struct super_block *sb = NULL;
+        mutex_lock(&kernfs_mutex);
+        list_for_each_entry(info, &root->supers, node) {
+                if (info->ns == ns) {
+                        sb = info->sb;
+                        if (!atomic_inc_not_zero(&info->sb->s_active))
+                                sb = ERR_PTR(-EINVAL);
+                        break;
+                }
+        }
+        mutex_unlock(&kernfs_mutex);
+        return sb;
+}
 void __init kernfs_init(void)
 {
        kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
diff --git a/fs/mbcache.c b/fs/mbcache.c
index bf166e388f0d..187477ded6b3 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -73,6 +73,7 @@
 #include <linux/mbcache.h>
 #include <linux/init.h>
 #include <linux/blockgroup_lock.h>
+#include <linux/log2.h>
 #ifdef MB_CACHE_DEBUG
 # define mb_debug(f...) do { \
@@ -93,7 +94,7 @@
 #define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
-#define MB_CACHE_ENTRY_LOCK_BITS        __builtin_log2(NR_BG_LOCKS)
+#define MB_CACHE_ENTRY_LOCK_BITS        ilog2(NR_BG_LOCKS)
 #define MB_CACHE_ENTRY_LOCK_INDEX(ce)                   \
        (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS))
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c496f8a74639..9927913c97c2 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -147,6 +147,17 @@ int nfs_sync_mapping(struct address_space *mapping)
        return ret;
 }
+static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
+{
+        struct nfs_inode *nfsi = NFS_I(inode);
+        if (inode->i_mapping->nrpages == 0)
+                flags &= ~NFS_INO_INVALID_DATA;
+        nfsi->cache_validity |= flags;
+        if (flags & NFS_INO_INVALID_DATA)
+                nfs_fscache_invalidate(inode);
+}
 /*
 * Invalidate the local caches
 */
@@ -162,17 +173,16 @@ static void nfs_zap_caches_locked(struct inode *inode)
        memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf));
        if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
-                nfs_fscache_invalidate(inode);
+                nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
-                nfsi->cache_validity |= NFS_INO_INVALID_ATTR
                                        | NFS_INO_INVALID_DATA
                                        | NFS_INO_INVALID_ACCESS
                                        | NFS_INO_INVALID_ACL
-                                        | NFS_INO_REVAL_PAGECACHE;
+                                        | NFS_INO_REVAL_PAGECACHE);
        } else
-                nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+                nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
                                        | NFS_INO_INVALID_ACCESS
                                        | NFS_INO_INVALID_ACL
-                                        | NFS_INO_REVAL_PAGECACHE;
+                                        | NFS_INO_REVAL_PAGECACHE);
        nfs_zap_label_cache_locked(nfsi);
 }
@@ -187,8 +197,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping)
 {
        if (mapping->nrpages != 0) {
                spin_lock(&inode->i_lock);
-                NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
+                nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
-                nfs_fscache_invalidate(inode);
                spin_unlock(&inode->i_lock);
        }
 }
@@ -209,7 +218,7 @@ EXPORT_SYMBOL_GPL(nfs_zap_acl_cache);
 void nfs_invalidate_atime(struct inode *inode)
 {
        spin_lock(&inode->i_lock);
-        NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+        nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
        spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL_GPL(nfs_invalidate_atime);
@@ -369,7 +378,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
                inode->i_mode = fattr->mode;
                if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
                                && nfs_server_capable(inode, NFS_CAP_MODE))
-                        nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+                        nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
                /* Why so? Because we want revalidate for devices/FIFOs, and
                 * that's precisely what we have in nfs_file_inode_operations.
                 */
@@ -415,36 +424,36 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
                if (fattr->valid & NFS_ATTR_FATTR_ATIME)
                        inode->i_atime = fattr->atime;
                else if (nfs_server_capable(inode, NFS_CAP_ATIME))
-                        nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+                        nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
                if (fattr->valid & NFS_ATTR_FATTR_MTIME)
                        inode->i_mtime = fattr->mtime;
                else if (nfs_server_capable(inode, NFS_CAP_MTIME))
-                        nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+                        nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
                if (fattr->valid & NFS_ATTR_FATTR_CTIME)
                        inode->i_ctime = fattr->ctime;
                else if (nfs_server_capable(inode, NFS_CAP_CTIME))
-                        nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+                        nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
                if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
                        inode->i_version = fattr->change_attr;
                else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
-                        nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+                        nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
                if (fattr->valid & NFS_ATTR_FATTR_SIZE)
                        inode->i_size = nfs_size_to_loff_t(fattr->size);
                else
-                        nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+                        nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
-                                | NFS_INO_REVAL_PAGECACHE;
+                                | NFS_INO_REVAL_PAGECACHE);
                if (fattr->valid & NFS_ATTR_FATTR_NLINK)
                        set_nlink(inode, fattr->nlink);
                else if (nfs_server_capable(inode, NFS_CAP_NLINK))
-                        nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+                        nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
                if (fattr->valid & NFS_ATTR_FATTR_OWNER)
                        inode->i_uid = fattr->uid;
                else if (nfs_server_capable(inode, NFS_CAP_OWNER))
-                        nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+                        nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
                if (fattr->valid & NFS_ATTR_FATTR_GROUP)
                        inode->i_gid = fattr->gid;
                else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
-                        nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+                        nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
                if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
                        inode->i_blocks = fattr->du.nfs2.blocks;
                if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -550,6 +559,9 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
        spin_lock(&inode->i_lock);
        i_size_write(inode, offset);
+        /* Optimisation */
+        if (offset == 0)
+                NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA;
        spin_unlock(&inode->i_lock);
        truncate_pagecache(inode, offset);
@@ -578,7 +590,8 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
                        inode->i_uid = attr->ia_uid;
                if ((attr->ia_valid & ATTR_GID) != 0)
                        inode->i_gid = attr->ia_gid;
-                NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+                nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS
+                                | NFS_INO_INVALID_ACL);
                spin_unlock(&inode->i_lock);
        }
        if ((attr->ia_valid & ATTR_SIZE) != 0) {
@@ -1101,7 +1114,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
                        && inode->i_version == fattr->pre_change_attr) {
                inode->i_version = fattr->change_attr;
                if (S_ISDIR(inode->i_mode))
-                        nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+                        nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
                ret |= NFS_INO_INVALID_ATTR;
        }
        /* If we have atomic WCC data, we may update some attributes */
@@ -1117,7 +1130,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
                        && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
                memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
                if (S_ISDIR(inode->i_mode))
-                        nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+                        nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
                ret |= NFS_INO_INVALID_ATTR;
        }
        if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
@@ -1128,9 +1141,6 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
                ret |= NFS_INO_INVALID_ATTR;
        }
-        if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
-                nfs_fscache_invalidate(inode);
        return ret;
 }
@@ -1189,7 +1199,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
                invalid |= NFS_INO_INVALID_ATIME;
        if (invalid != 0)
-                nfsi->cache_validity |= invalid;
+                nfs_set_cache_invalid(inode, invalid);
        nfsi->read_cache_jiffies = fattr->time_start;
        return 0;
@@ -1402,13 +1412,11 @@ EXPORT_SYMBOL_GPL(nfs_refresh_inode);
 static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
 {
-        struct nfs_inode *nfsi = NFS_I(inode);
+        unsigned long invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
-        nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+        if (S_ISDIR(inode->i_mode))
-        if (S_ISDIR(inode->i_mode)) {
+                invalid |= NFS_INO_INVALID_DATA;
-                nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+        nfs_set_cache_invalid(inode, invalid);
-                nfs_fscache_invalidate(inode);
-        }
        if ((fattr->valid & NFS_ATTR_FATTR) == 0)
                return 0;
        return nfs_refresh_inode_locked(inode, fattr);
@@ -1601,6 +1609,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                        if ((nfsi->npages == 0) || new_isize > cur_isize) {
                                i_size_write(inode, new_isize);
                                invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+                                invalid &= ~NFS_INO_REVAL_PAGECACHE;
                        }
                        dprintk("NFS: isize change on server for file %s/%ld "
                                        "(%Ld to %Ld)\n",
@@ -1702,10 +1711,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                invalid &= ~NFS_INO_INVALID_DATA;
        if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) ||
                        (save_cache_validity & NFS_INO_REVAL_FORCED))
-                nfsi->cache_validity |= invalid;
+                nfs_set_cache_invalid(inode, invalid);
-        if (invalid & NFS_INO_INVALID_DATA)
-                nfs_fscache_invalidate(inode);
        return 0;
 out_err:
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index f63cb87cd730..ba2affa51941 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -230,7 +230,7 @@ int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
 extern struct file_system_type nfs4_fs_type;
 /* nfs4namespace.c */
-struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
+struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *, struct qstr *);
 struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
                               struct nfs_fh *, struct nfs_fattr *);
 int nfs4_replace_transport(struct nfs_server *server,
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 3d5dbf80d46a..3d83cb1fdc70 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -139,16 +139,22 @@ static size_t nfs_parse_server_name(char *string, size_t len,
 * @server: NFS server struct
 * @flavors: List of security tuples returned by SECINFO procedure
 *
- * Return the pseudoflavor of the first security mechanism in
+ * Return an rpc client that uses the first security mechanism in
- * "flavors" that is locally supported.  Return RPC_AUTH_UNIX if
+ * "flavors" that is locally supported.  The "flavors" array
- * no matching flavor is found in the array.  The "flavors" array
 * is searched in the order returned from the server, per RFC 3530
- * recommendation.
+ * recommendation and each flavor is checked for membership in the
+ * sec= mount option list if it exists.
+ *
+ * Return -EPERM if no matching flavor is found in the array.
+ *
+ * Please call rpc_shutdown_client() when you are done with this rpc client.
+ *
 */
-static rpc_authflavor_t nfs_find_best_sec(struct nfs_server *server,
+static struct rpc_clnt *nfs_find_best_sec(struct rpc_clnt *clnt,
+                                          struct nfs_server *server,
                                          struct nfs4_secinfo_flavors *flavors)
 {
-        rpc_authflavor_t pseudoflavor;
+        rpc_authflavor_t pflavor;
        struct nfs4_secinfo4 *secinfo;
        unsigned int i;
@@ -159,62 +165,73 @@ static rpc_authflavor_t nfs_find_best_sec(struct nfs_server *server,
                case RPC_AUTH_NULL:
                case RPC_AUTH_UNIX:
                case RPC_AUTH_GSS:
-                        pseudoflavor = rpcauth_get_pseudoflavor(secinfo->flavor,
+                        pflavor = rpcauth_get_pseudoflavor(secinfo->flavor,
                                                        &secinfo->flavor_info);
-                        /* make sure pseudoflavor matches sec= mount opt */
+                        /* does the pseudoflavor match a sec= mount opt? */
-                        if (pseudoflavor != RPC_AUTH_MAXFLAVOR &&
+                        if (pflavor != RPC_AUTH_MAXFLAVOR &&
-                            nfs_auth_info_match(&server->auth_info,
+                            nfs_auth_info_match(&server->auth_info, pflavor)) {
-                                                pseudoflavor))
+                                struct rpc_clnt *new;
-                                return pseudoflavor;
+                                struct rpc_cred *cred;
-                        break;
+                                /* Cloning creates an rpc_auth for the flavor */
+                                new = rpc_clone_client_set_auth(clnt, pflavor);
+                                if (IS_ERR(new))
+                                        continue;
+                                /**
+                                * Check that the user actually can use the
+                                * flavor. This is mostly for RPC_AUTH_GSS
+                                * where cr_init obtains a gss context
+                                */
+                                cred = rpcauth_lookupcred(new->cl_auth, 0);
+                                if (IS_ERR(cred)) {
+                                        rpc_shutdown_client(new);
+                                        continue;
+                                }
+                                put_rpccred(cred);
+                                return new;
+                        }
                }
        }
+        return ERR_PTR(-EPERM);
-        /* if there were any sec= options then nothing matched */
-        if (server->auth_info.flavor_len > 0)
-                return -EPERM;
-        return RPC_AUTH_UNIX;
 }
-static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name)
+/**
+ * nfs4_negotiate_security - in response to an NFS4ERR_WRONGSEC on lookup,
+ * return an rpc_clnt that uses the best available security flavor with
+ * respect to the secinfo flavor list and the sec= mount options.
+ *
+ * @clnt: RPC client to clone
+ * @inode: directory inode
+ * @name: lookup name
+ *
+ * Please call rpc_shutdown_client() when you are done with this rpc client.
+ */
+struct rpc_clnt *
+nfs4_negotiate_security(struct rpc_clnt *clnt, struct inode *inode,
+                                        struct qstr *name)
 {
        struct page *page;
        struct nfs4_secinfo_flavors *flavors;
-        rpc_authflavor_t flavor;
+        struct rpc_clnt *new;
        int err;
        page = alloc_page(GFP_KERNEL);
        if (!page)
-                return -ENOMEM;
+                return ERR_PTR(-ENOMEM);
        flavors = page_address(page);
        err = nfs4_proc_secinfo(inode, name, flavors);
        if (err < 0) {
-                flavor = err;
+                new = ERR_PTR(err);
                goto out;
        }
-        flavor = nfs_find_best_sec(NFS_SERVER(inode), flavors);
+        new = nfs_find_best_sec(clnt, NFS_SERVER(inode), flavors);
 out:
        put_page(page);
-        return flavor;
+        return new;
-}
-/*
- * Please call rpc_shutdown_client() when you are done with this client.
- */
-struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode,
-                                        struct qstr *name)
-{
-        rpc_authflavor_t flavor;
-        flavor = nfs4_negotiate_security(inode, name);
-        if ((int)flavor < 0)
-                return ERR_PTR((int)flavor);
-        return rpc_clone_client_set_auth(clnt, flavor);
 }
 static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
@@ -397,11 +414,6 @@ struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
        if (client->cl_auth->au_flavor != flavor)
                flavor = client->cl_auth->au_flavor;
-        else {
-                rpc_authflavor_t new = nfs4_negotiate_security(dir, name);
-                if ((int)new >= 0)
-                        flavor = new;
-        }
        mnt = nfs_do_submount(dentry, fh, fattr, flavor);
 out:
        rpc_shutdown_client(client);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 285ad5334018..4bf3d97cc5a0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3247,7 +3247,7 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
                        err = -EPERM;
                        if (client != *clnt)
                                goto out;
-                        client = nfs4_create_sec_client(client, dir, name);
+                        client = nfs4_negotiate_security(client, dir, name);
                        if (IS_ERR(client))
                                return PTR_ERR(client);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3ee5af4e738e..98ff061ccaf3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -934,12 +934,14 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
        if (nfs_have_delegated_attributes(inode))
                goto out;
-        if (nfsi->cache_validity & (NFS_INO_INVALID_DATA|NFS_INO_REVAL_PAGECACHE))
+        if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
                return false;
        smp_rmb();
        if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags))
                return false;
 out:
+        if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+                return false;
        return PageUptodate(page) != 0;
 }
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 6851b003f2a4..8f029db5d271 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -617,15 +617,6 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        switch (create->cr_type) {
        case NF4LNK:
-                /* ugh! we have to null-terminate the linktext, or
-                 * vfs_symlink() will choke.  it is always safe to
-                 * null-terminate by brute force, since at worst we
-                 * will overwrite the first byte of the create namelen
-                 * in the XDR buffer, which has already been extracted
-                 * during XDR decode.
-                 */
-                create->cr_linkname[create->cr_linklen] = 0;
                status = nfsd_symlink(rqstp, &cstate->current_fh,
                                      create->cr_name, create->cr_namelen,
                                      create->cr_linkname, create->cr_linklen,
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 83baf2bfe9e9..b56b1cc02718 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -600,7 +600,18 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
                READ_BUF(4);
                create->cr_linklen = be32_to_cpup(p++);
                READ_BUF(create->cr_linklen);
-                SAVEMEM(create->cr_linkname, create->cr_linklen);
+                /*
+                 * The VFS will want a null-terminated string, and
+                 * null-terminating in place isn't safe since this might
+                 * end on a page boundary:
+                 */
+                create->cr_linkname =
+                                kmalloc(create->cr_linklen + 1, GFP_KERNEL);
+                if (!create->cr_linkname)
+                        return nfserr_jukebox;
+                memcpy(create->cr_linkname, p, create->cr_linklen);
+                create->cr_linkname[create->cr_linklen] = '\0';
+                defer_free(argp, kfree, create->cr_linkname);
                break;
        case NF4BLK:
        case NF4CHR:
@@ -2630,7 +2641,7 @@ nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr)
 {
        __be32 *p;
-        p = xdr_reserve_space(xdr, 6);
+        p = xdr_reserve_space(xdr, 20);
        if (!p)
                return NULL;
        *p++ = htonl(2);
@@ -3267,7 +3278,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
        wire_count = htonl(maxcount);
        write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4);
-        xdr_truncate_encode(xdr, length_offset + 4 + maxcount);
+        xdr_truncate_encode(xdr, length_offset + 4 + ALIGN(maxcount, 4));
        if (maxcount & 3)
                write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount,
                                                &zero, 4 - (maxcount&3));
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index a106b3f2b22a..fae17c640df3 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -331,6 +331,7 @@ struct dlm_lock_resource
        u16 state;
        char lvb[DLM_LVB_LEN];
        unsigned int inflight_locks;
+        unsigned int inflight_assert_workers;
        unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
 };
@@ -910,6 +911,9 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
 void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
                                   struct dlm_lock_resource *res);
+void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
+                struct dlm_lock_resource *res);
 void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
 void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
 void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 3087a21d32f9..82abf0cc9a12 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -581,6 +581,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
        atomic_set(&res->asts_reserved, 0);
        res->migration_pending = 0;
        res->inflight_locks = 0;
+        res->inflight_assert_workers = 0;
        res->dlm = dlm;
@@ -683,6 +684,43 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
        wake_up(&res->wq);
 }
+void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
+                struct dlm_lock_resource *res)
+{
+        assert_spin_locked(&res->spinlock);
+        res->inflight_assert_workers++;
+        mlog(0, "%s:%.*s: inflight assert worker++: now %u\n",
+                        dlm->name, res->lockname.len, res->lockname.name,
+                        res->inflight_assert_workers);
+}
+static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
+                struct dlm_lock_resource *res)
+{
+        spin_lock(&res->spinlock);
+        __dlm_lockres_grab_inflight_worker(dlm, res);
+        spin_unlock(&res->spinlock);
+}
+static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
+                struct dlm_lock_resource *res)
+{
+        assert_spin_locked(&res->spinlock);
+        BUG_ON(res->inflight_assert_workers == 0);
+        res->inflight_assert_workers--;
+        mlog(0, "%s:%.*s: inflight assert worker--: now %u\n",
+                        dlm->name, res->lockname.len, res->lockname.name,
+                        res->inflight_assert_workers);
+}
+static void dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
+                struct dlm_lock_resource *res)
+{
+        spin_lock(&res->spinlock);
+        __dlm_lockres_drop_inflight_worker(dlm, res);
+        spin_unlock(&res->spinlock);
+}
 /*
 * lookup a lock resource by name.
 * may already exist in the hashtable.
@@ -1603,7 +1641,8 @@ send_response:
                        mlog(ML_ERROR, "failed to dispatch assert master work\n");
                        response = DLM_MASTER_RESP_ERROR;
                        dlm_lockres_put(res);
-                }
+                } else
+                        dlm_lockres_grab_inflight_worker(dlm, res);
        } else {
                if (res)
                        dlm_lockres_put(res);
@@ -2118,6 +2157,8 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
        dlm_lockres_release_ast(dlm, res);
 put:
+        dlm_lockres_drop_inflight_worker(dlm, res);
        dlm_lockres_put(res);
        mlog(0, "finished with dlm_assert_master_worker\n");
@@ -3088,11 +3129,15 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
                        /* remove it so that only one mle will be found */
                        __dlm_unlink_mle(dlm, tmp);
                        __dlm_mle_detach_hb_events(dlm, tmp);
-                        ret = DLM_MIGRATE_RESPONSE_MASTERY_REF;
+                        if (tmp->type == DLM_MLE_MASTER) {
-                        mlog(0, "%s:%.*s: master=%u, newmaster=%u, "
+                                ret = DLM_MIGRATE_RESPONSE_MASTERY_REF;
-                            "telling master to get ref for cleared out mle "
+                                mlog(0, "%s:%.*s: master=%u, newmaster=%u, "
-                            "during migration\n", dlm->name, namelen, name,
+                                                "telling master to get ref "
-                            master, new_master);
+                                                "for cleared out mle during "
+                                                "migration\n", dlm->name,
+                                                namelen, name, master,
+                                                new_master);
+                        }
                }
                spin_unlock(&tmp->spinlock);
        }
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 5de019437ea5..45067faf5695 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1708,7 +1708,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
                                mlog_errno(-ENOMEM);
                                /* retry!? */
                                BUG();
-                        }
+                        } else
+                                __dlm_lockres_grab_inflight_worker(dlm, res);
                } else /* put.. incase we are not the master */
                        dlm_lockres_put(res);
                spin_unlock(&res->spinlock);
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 9db869de829d..69aac6f088ad 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -259,12 +259,15 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
                 * refs on it. */
                unused = __dlm_lockres_unused(lockres);
                if (!unused ||
-                    (lockres->state & DLM_LOCK_RES_MIGRATING)) {
+                    (lockres->state & DLM_LOCK_RES_MIGRATING) ||
+                    (lockres->inflight_assert_workers != 0)) {
                        mlog(0, "%s: res %.*s is in use or being remastered, "
-                             "used %d, state %d\n", dlm->name,
+                             "used %d, state %d, assert master workers %u\n",
-                             lockres->lockname.len, lockres->lockname.name,
+                             dlm->name, lockres->lockname.len,
-                             !unused, lockres->state);
+                             lockres->lockname.name,
-                        list_move_tail(&dlm->purge_list, &lockres->purge);
+                             !unused, lockres->state,
+                             lockres->inflight_assert_workers);
+                        list_move_tail(&lockres->purge, &dlm->purge_list);
                        spin_unlock(&lockres->spinlock);
                        continue;
                }
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 5698b52cf5c9..2e3c9dbab68c 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -191,7 +191,9 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
                                     DLM_UNLOCK_CLEAR_CONVERT_TYPE);
                } else if (status == DLM_RECOVERING ||
                           status == DLM_MIGRATING ||
-                           status == DLM_FORWARD) {
+                           status == DLM_FORWARD ||
+                           status == DLM_NOLOCKMGR
+                           ) {
                        /* must clear the actions because this unlock
                         * is about to be retried.  cannot free or do
                         * any list manipulation. */
@@ -200,7 +202,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
                             res->lockname.name,
                             status==DLM_RECOVERING?"recovering":
                             (status==DLM_MIGRATING?"migrating":
-                              "forward"));
+                                (status == DLM_FORWARD ? "forward" :
+                                                "nolockmanager")));
                        actions = 0;
                }
                if (flags & LKM_CANCEL)
@@ -364,7 +367,10 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
                         * updated state to the recovery master.  this thread
                         * just needs to finish out the operation and call
                         * the unlockast. */
-                        ret = DLM_NORMAL;
+                        if (dlm_is_node_dead(dlm, owner))
+                                ret = DLM_NORMAL;
+                        else
+                                ret = DLM_NOLOCKMGR;
                } else {
                        /* something bad.  this will BUG in ocfs2 */
                        ret = dlm_err_to_dlm_status(tmpret);
@@ -638,7 +644,9 @@ retry:
        if (status == DLM_RECOVERING ||
            status == DLM_MIGRATING ||
-            status == DLM_FORWARD) {
+            status == DLM_FORWARD ||
+            status == DLM_NOLOCKMGR) {
                /* We want to go away for a tiny bit to allow recovery
                 * / migration to complete on this resource. I don't
                 * know of any wait queue we could sleep on as this
@@ -650,7 +658,7 @@ retry:
                msleep(50);
                mlog(0, "retrying unlock due to pending recovery/"
-                     "migration/in-progress\n");
+                     "migration/in-progress/reconnect\n");
                goto retry;
        }
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 2060fc398445..8add6f1030d7 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -205,6 +205,21 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
        return inode;
 }
+static void ocfs2_cleanup_add_entry_failure(struct ocfs2_super *osb,
+                struct dentry *dentry, struct inode *inode)
+{
+        struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
+        ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
+        ocfs2_lock_res_free(&dl->dl_lockres);
+        BUG_ON(dl->dl_count != 1);
+        spin_lock(&dentry_attach_lock);
+        dentry->d_fsdata = NULL;
+        spin_unlock(&dentry_attach_lock);
+        kfree(dl);
+        iput(inode);
+}
 static int ocfs2_mknod(struct inode *dir,
                       struct dentry *dentry,
                       umode_t mode,
@@ -231,6 +246,7 @@ static int ocfs2_mknod(struct inode *dir,
        sigset_t oldset;
        int did_block_signals = 0;
        struct posix_acl *default_acl = NULL, *acl = NULL;
+        struct ocfs2_dentry_lock *dl = NULL;
        trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name,
                          (unsigned long long)OCFS2_I(dir)->ip_blkno,
@@ -423,6 +439,8 @@ static int ocfs2_mknod(struct inode *dir,
                goto leave;
        }
+        dl = dentry->d_fsdata;
        status = ocfs2_add_entry(handle, dentry, inode,
                                 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
                                 &lookup);
@@ -469,6 +487,9 @@ leave:
         * ocfs2_delete_inode will mutex_lock again.
         */
        if ((status < 0) && inode) {
+                if (dl)
+                        ocfs2_cleanup_add_entry_failure(osb, dentry, inode);
                OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
                clear_nlink(inode);
                iput(inode);
@@ -991,6 +1012,65 @@ leave:
        return status;
 }
+static int ocfs2_check_if_ancestor(struct ocfs2_super *osb,
+                u64 src_inode_no, u64 dest_inode_no)
+{
+        int ret = 0, i = 0;
+        u64 parent_inode_no = 0;
+        u64 child_inode_no = src_inode_no;
+        struct inode *child_inode;
+#define MAX_LOOKUP_TIMES 32
+        while (1) {
+                child_inode = ocfs2_iget(osb, child_inode_no, 0, 0);
+                if (IS_ERR(child_inode)) {
+                        ret = PTR_ERR(child_inode);
+                        break;
+                }
+                ret = ocfs2_inode_lock(child_inode, NULL, 0);
+                if (ret < 0) {
+                        iput(child_inode);
+                        if (ret != -ENOENT)
+                                mlog_errno(ret);
+                        break;
+                }
+                ret = ocfs2_lookup_ino_from_name(child_inode, "..", 2,
+                                &parent_inode_no);
+                ocfs2_inode_unlock(child_inode, 0);
+                iput(child_inode);
+                if (ret < 0) {
+                        ret = -ENOENT;
+                        break;
+                }
+                if (parent_inode_no == dest_inode_no) {
+                        ret = 1;
+                        break;
+                }
+                if (parent_inode_no == osb->root_inode->i_ino) {
+                        ret = 0;
+                        break;
+                }
+                child_inode_no = parent_inode_no;
+                if (++i >= MAX_LOOKUP_TIMES) {
+                        mlog(ML_NOTICE, "max lookup times reached, filesystem "
+                                        "may have nested directories, "
+                                        "src inode: %llu, dest inode: %llu.\n",
+                                        (unsigned long long)src_inode_no,
+                                        (unsigned long long)dest_inode_no);
+                        ret = 0;
+                        break;
+                }
+        }
+        return ret;
+}
 /*
 * The only place this should be used is rename!
 * if they have the same id, then the 1st one is the only one locked.
@@ -1002,6 +1082,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
                             struct inode *inode2)
 {
        int status;
+        int inode1_is_ancestor, inode2_is_ancestor;
        struct ocfs2_inode_info *oi1 = OCFS2_I(inode1);
        struct ocfs2_inode_info *oi2 = OCFS2_I(inode2);
        struct buffer_head **tmpbh;
@@ -1015,9 +1096,26 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
        if (*bh2)
                *bh2 = NULL;
-        /* we always want to lock the one with the lower lockid first. */
+        /* we always want to lock the one with the lower lockid first.
+         * and if they are nested, we lock ancestor first */
        if (oi1->ip_blkno != oi2->ip_blkno) {
-                if (oi1->ip_blkno < oi2->ip_blkno) {
+                inode1_is_ancestor = ocfs2_check_if_ancestor(osb, oi2->ip_blkno,
+                                oi1->ip_blkno);
+                if (inode1_is_ancestor < 0) {
+                        status = inode1_is_ancestor;
+                        goto bail;
+                }
+                inode2_is_ancestor = ocfs2_check_if_ancestor(osb, oi1->ip_blkno,
+                                oi2->ip_blkno);
+                if (inode2_is_ancestor < 0) {
+                        status = inode2_is_ancestor;
+                        goto bail;
+                }
+                if ((inode1_is_ancestor == 1) ||
+                                (oi1->ip_blkno < oi2->ip_blkno &&
+                                inode2_is_ancestor == 0)) {
                        /* switch id1 and id2 around */
                        tmpbh = bh2;
                        bh2 = bh1;
@@ -1098,6 +1196,7 @@ static int ocfs2_rename(struct inode *old_dir,
        struct ocfs2_dir_lookup_result old_entry_lookup = { NULL, };
        struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
        struct ocfs2_dir_lookup_result target_insert = { NULL, };
+        bool should_add_orphan = false;
        /* At some point it might be nice to break this function up a
         * bit. */
@@ -1134,6 +1233,21 @@ static int ocfs2_rename(struct inode *old_dir,
                        goto bail;
                }
                rename_lock = 1;
+                /* here we cannot guarantee the inodes haven't just been
+                 * changed, so check if they are nested again */
+                status = ocfs2_check_if_ancestor(osb, new_dir->i_ino,
+                                old_inode->i_ino);
+                if (status < 0) {
+                        mlog_errno(status);
+                        goto bail;
+                } else if (status == 1) {
+                        status = -EPERM;
+                        trace_ocfs2_rename_not_permitted(
+                                        (unsigned long long)old_inode->i_ino,
+                                        (unsigned long long)new_dir->i_ino);
+                        goto bail;
+                }
        }
        /* if old and new are the same, this'll just do one lock. */
@@ -1304,6 +1418,7 @@ static int ocfs2_rename(struct inode *old_dir,
                                mlog_errno(status);
                                goto bail;
                        }
+                        should_add_orphan = true;
                }
        } else {
                BUG_ON(new_dentry->d_parent->d_inode != new_dir);
@@ -1348,17 +1463,6 @@ static int ocfs2_rename(struct inode *old_dir,
                        goto bail;
                }
-                if (S_ISDIR(new_inode->i_mode) ||
-                    (ocfs2_read_links_count(newfe) == 1)) {
-                        status = ocfs2_orphan_add(osb, handle, new_inode,
-                                                  newfe_bh, orphan_name,
-                                                  &orphan_insert, orphan_dir);
-                        if (status < 0) {
-                                mlog_errno(status);
-                                goto bail;
-                        }
-                }
                /* change the dirent to point to the correct inode */
                status = ocfs2_update_entry(new_dir, handle, &target_lookup_res,
                                            old_inode);
@@ -1373,6 +1477,15 @@ static int ocfs2_rename(struct inode *old_dir,
                else
                        ocfs2_add_links_count(newfe, -1);
                ocfs2_journal_dirty(handle, newfe_bh);
+                if (should_add_orphan) {
+                        status = ocfs2_orphan_add(osb, handle, new_inode,
+                                        newfe_bh, orphan_name,
+                                        &orphan_insert, orphan_dir);
+                        if (status < 0) {
+                                mlog_errno(status);
+                                goto bail;
+                        }
+                }
        } else {
                /* if the name was not found in new_dir, add it now */
                status = ocfs2_add_entry(handle, new_dentry, old_inode,
@@ -1642,6 +1755,7 @@ static int ocfs2_symlink(struct inode *dir,
        struct ocfs2_dir_lookup_result lookup = { NULL, };
        sigset_t oldset;
        int did_block_signals = 0;
+        struct ocfs2_dentry_lock *dl = NULL;
        trace_ocfs2_symlink_begin(dir, dentry, symname,
                                  dentry->d_name.len, dentry->d_name.name);
@@ -1830,6 +1944,8 @@ static int ocfs2_symlink(struct inode *dir,
                goto bail;
        }
+        dl = dentry->d_fsdata;
        status = ocfs2_add_entry(handle, dentry, inode,
                                 le64_to_cpu(fe->i_blkno), parent_fe_bh,
                                 &lookup);
@@ -1864,6 +1980,9 @@ bail:
        if (xattr_ac)
                ocfs2_free_alloc_context(xattr_ac);
        if ((status < 0) && inode) {
+                if (dl)
+                        ocfs2_cleanup_add_entry_failure(osb, dentry, inode);
                OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
                clear_nlink(inode);
                iput(inode);
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index 1b60c62aa9d6..6cb019b7c6a8 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -2292,6 +2292,8 @@ TRACE_EVENT(ocfs2_rename,
                  __entry->new_len, __get_str(new_name))
 );
+DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_rename_not_permitted);
 TRACE_EVENT(ocfs2_rename_target_exists,
        TP_PROTO(int new_len, const char *new_name),
        TP_ARGS(new_len, new_name),
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 714e53b9cc66..636aab69ead5 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4288,9 +4288,16 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
                goto out;
        }
+        error = ocfs2_rw_lock(inode, 1);
+        if (error) {
+                mlog_errno(error);
+                goto out;
+        }
        error = ocfs2_inode_lock(inode, &old_bh, 1);
        if (error) {
                mlog_errno(error);
+                ocfs2_rw_unlock(inode, 1);
                goto out;
        }
@@ -4302,6 +4309,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
        up_write(&OCFS2_I(inode)->ip_xattr_sem);
        ocfs2_inode_unlock(inode, 1);
+        ocfs2_rw_unlock(inode, 1);
        brelse(old_bh);
        if (error) {
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index c7a89cea5c5d..ddb662b32447 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1925,15 +1925,11 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
        ocfs2_shutdown_local_alloc(osb);
+        ocfs2_truncate_log_shutdown(osb);
        /* This will disable recovery and flush any recovery work. */
        ocfs2_recovery_exit(osb);
-        /*
-         * During dismount, when it recovers another node it will call
-         * ocfs2_recover_orphans and queue delayed work osb_truncate_log_wq.
-         */
-        ocfs2_truncate_log_shutdown(osb);
        ocfs2_journal_shutdown(osb);
        ocfs2_sync_blockdev(sb);
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 9d231e9e5f0e..bf2d03f8fd3e 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -184,29 +184,11 @@ static int show_stat(struct seq_file *p, void *v)
 static int stat_open(struct inode *inode, struct file *file)
 {
-        size_t size = 1024 + 128 * num_possible_cpus();
+        size_t size = 1024 + 128 * num_online_cpus();
-        char *buf;
-        struct seq_file *m;
-        int res;
        /* minimum size to display an interrupt count : 2 bytes */
        size += 2 * nr_irqs;
+        return single_open_size(file, show_stat, NULL, size);
-        /* don't ask for more than the kmalloc() max size */
-        if (size > KMALLOC_MAX_SIZE)
-                size = KMALLOC_MAX_SIZE;
-        buf = kmalloc(size, GFP_KERNEL);
-        if (!buf)
-                return -ENOMEM;
-        res = single_open(file, show_stat, NULL);
-        if (!res) {
-                m = file->private_data;
-                m->buf = buf;
-                m->size = ksize(buf);
-        } else
-                kfree(buf);
-        return res;
 }
 static const struct file_operations proc_stat_operations = {
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 1d641bb108d2..3857b720cb1b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -8,8 +8,10 @@
 #include <linux/fs.h>
 #include <linux/export.h>
 #include <linux/seq_file.h>
+#include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/cred.h>
+#include <linux/mm.h>
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -30,6 +32,16 @@ static void seq_set_overflow(struct seq_file *m)
        m->count = m->size;
 }
+static void *seq_buf_alloc(unsigned long size)
+{
+        void *buf;
+        buf = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
+        if (!buf && size > PAGE_SIZE)
+                buf = vmalloc(size);
+        return buf;
+}
 /**
 *      seq_open -      initialize sequential file
 *      @file: file we initialize
@@ -96,7 +108,7 @@ static int traverse(struct seq_file *m, loff_t offset)
                return 0;
        }
        if (!m->buf) {
-                m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
+                m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
                if (!m->buf)
                        return -ENOMEM;
        }
@@ -135,9 +147,9 @@ static int traverse(struct seq_file *m, loff_t offset)
 Eoverflow:
        m->op->stop(m, p);
-        kfree(m->buf);
+        kvfree(m->buf);
        m->count = 0;
-        m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
+        m->buf = seq_buf_alloc(m->size <<= 1);
        return !m->buf ? -ENOMEM : -EAGAIN;
 }
@@ -192,7 +204,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
        /* grab buffer if we didn't have one */
        if (!m->buf) {
-                m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
+                m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
                if (!m->buf)
                        goto Enomem;
        }
@@ -232,9 +244,9 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
                if (m->count < m->size)
                        goto Fill;
                m->op->stop(m, p);
-                kfree(m->buf);
+                kvfree(m->buf);
                m->count = 0;
-                m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
+                m->buf = seq_buf_alloc(m->size <<= 1);
                if (!m->buf)
                        goto Enomem;
                m->version = 0;
@@ -350,7 +362,7 @@ EXPORT_SYMBOL(seq_lseek);
 int seq_release(struct inode *inode, struct file *file)
 {
        struct seq_file *m = file->private_data;
-        kfree(m->buf);
+        kvfree(m->buf);
        kfree(m);
        return 0;
 }
@@ -605,13 +617,13 @@ EXPORT_SYMBOL(single_open);
 int single_open_size(struct file *file, int (*show)(struct seq_file *, void *),
                void *data, size_t size)
 {
-        char *buf = kmalloc(size, GFP_KERNEL);
+        char *buf = seq_buf_alloc(size);
        int ret;
        if (!buf)
                return -ENOMEM;
        ret = single_open(file, show, data);
        if (ret) {
-                kfree(buf);
+                kvfree(buf);
                return ret;
        }
        ((struct seq_file *)file->private_data)->buf = buf;